AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Tomkou, D.; Fatouros, G.; Andreou, A.; Makridis, G.; Liarokapis, F.; Dardanis, D.; Kiourtis, A.; Soldatos, J.; Kyriazis, D.
Bridging Industrial Expertise and XR with LLM-Powered Conversational Agents Proceedings Article
In: pp. 1050–1056, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331543723 (ISBN).
Abstract | Links | BibTeX | Tags: Air navigation, Conversational Agents, Conversational AI, Embeddings, Engineering education, Extended reality, Knowledge Management, Knowledge transfer, Language Model, Large language model, large language models, Personnel training, Remote Assistance, Retrieval-Augmented Generation, Robotics, Semantics, Smart manufacturing
@inproceedings{tomkou_bridging_2025,
title = {Bridging Industrial Expertise and XR with LLM-Powered Conversational Agents},
author = {D. Tomkou and G. Fatouros and A. Andreou and G. Makridis and F. Liarokapis and D. Dardanis and A. Kiourtis and J. Soldatos and D. Kyriazis},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013837767&doi=10.1109%2FDCOSS-IoT65416.2025.00158&partnerID=40&md5=45e35086d8be9d3e16afeade6598d238},
doi = {10.1109/DCOSS-IoT65416.2025.00158},
isbn = {9798331543723 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {1050–1056},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper introduces a novel integration of Retrieval-Augmented Generation (RAG) enhanced Large Language Models (LLMs) with Extended Reality (XR) technologies to address knowledge transfer challenges in industrial environments. The proposed system embeds domain-specific industrial knowledge into XR environments through a natural language interface, enabling hands-free, context-aware expert guidance for workers. We present the architecture of the proposed system consisting of an LLM Chat Engine with dynamic tool orchestration and an XR application featuring voice-driven interaction. Performance evaluation of various chunking strategies, embedding models, and vector databases reveals that semantic chunking, balanced embedding models, and efficient vector stores deliver optimal performance for industrial knowledge retrieval. The system's potential is demonstrated through early implementation in multiple industrial use cases, including robotic assembly, smart infrastructure maintenance, and aerospace component servicing. Results indicate potential for enhancing training efficiency, remote assistance capabilities, and operational guidance in alignment with Industry 5.0's human-centric and resilient approach to industrial development. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Air navigation, Conversational Agents, Conversational AI, Embeddings, Engineering education, Extended reality, Knowledge Management, Knowledge transfer, Language Model, Large language model, large language models, Personnel training, Remote Assistance, Retrieval-Augmented Generation, Robotics, Semantics, Smart manufacturing},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Imamura, S.; Hiraki, H.; Rekimoto, J.
Serendipity Wall: A Discussion Support System Using Real-Time Speech Recognition and Large Language Model Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 588–590, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350374490 (ISBN).
Abstract | Links | BibTeX | Tags: Brainstorming sessions, Discussion support, Embeddings, Group discussions, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Language Model, Large displays, Real- time, Speech recognition, Support systems, Virtual Reality
@inproceedings{imamura_serendipity_2024,
title = {Serendipity Wall: A Discussion Support System Using Real-Time Speech Recognition and Large Language Model},
author = {S. Imamura and H. Hiraki and J. Rekimoto},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195557406&doi=10.1109%2FVRW62533.2024.00113&partnerID=40&md5=5e9dee609e4039e310de8e8c0edce8a0},
doi = {10.1109/VRW62533.2024.00113},
isbn = {9798350374490 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {588–590},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Group discussions are important for exploring new ideas. One method to support discussions is presenting relevant keywords or images. However, the context of the conversation and information tended not to be taken into account. Therefore, we propose a system that develops group discussions by presenting related information in response to discussions. As a specific example, this study addressed academic discussions among HCI researchers. During brainstorming sessions, the system continuously transcribes the dialogue and generates embedding vectors of the discussions. These vectors are matched against those of existing research articles to identify relevant studies. Then, the system presented relevant studies on the large display with summarizing by an LLM. In a case study, this system had the effect of broadening the topics of discussion and facilitating the acquisition of new knowledge. A larger display area is desirable in terms of information volume and size. Therefore, in addition to large displays, virtual reality environments with headsets could be suitable for this system. © 2024 Elsevier B.V., All rights reserved.},
keywords = {Brainstorming sessions, Discussion support, Embeddings, Group discussions, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Language Model, Large displays, Real- time, Speech recognition, Support systems, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Bao, Y.; Gao, N.; Weng, D.; Chen, J.; Tian, Z.
MuseGesture: A Framework for Gesture Synthesis by Virtual Agents in VR Museum Guides Proceedings Article
In: Eck, U.; Sra, M.; Stefanucci, J.; Sugimoto, M.; Tatzgern, M.; Williams, I. (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 337–338, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798331506919 (ISBN).
Abstract | Links | BibTeX | Tags: Adversarial machine learning, Embeddings, Gesture Generation, Intelligent Agents, Intelligent systems, Intelligent virtual agents, Language generation, Language Model, Large language model, large language models, Museum guide, Reinforcement Learning, Reinforcement learnings, Robust language understanding, Virtual agent, Virtual Agents, Virtual environments, Virtual reality museum guide, VR Museum Guides
@inproceedings{bao_musegesture_2024,
title = {MuseGesture: A Framework for Gesture Synthesis by Virtual Agents in VR Museum Guides},
author = {Y. Bao and N. Gao and D. Weng and J. Chen and Z. Tian},
editor = {U. Eck and M. Sra and J. Stefanucci and M. Sugimoto and M. Tatzgern and I. Williams},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214385900&doi=10.1109%2FISMAR-Adjunct64951.2024.00079&partnerID=40&md5=16a1b740663a051f3611cb201211620a},
doi = {10.1109/ISMAR-Adjunct64951.2024.00079},
isbn = {9798331506919 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {337–338},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents an innovative framework named MuseGesture, designed to generate contextually adaptive gestures for virtual agents in Virtual Reality (VR) museums. The framework leverages the robust language understanding and generation capabilities of Large Language Models (LLMs) to parse tour narration texts and generate corresponding explanatory gestures. Through reinforcement learning and adversarial skill embeddings, the framework also generates guiding gestures tailored to the virtual museum environment, integrating both gesture types using conditional motion interpolation methods. Experimental results and user studies demonstrate that this approach effectively enables voice-command-controlled virtual guide gestures, offering a novel intelligent guiding system solution that enhances the interactive experience in VR museum environments. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Adversarial machine learning, Embeddings, Gesture Generation, Intelligent Agents, Intelligent systems, Intelligent virtual agents, Language generation, Language Model, Large language model, large language models, Museum guide, Reinforcement Learning, Reinforcement learnings, Robust language understanding, Virtual agent, Virtual Agents, Virtual environments, Virtual reality museum guide, VR Museum Guides},
pubstate = {published},
tppubtype = {inproceedings}
}
He, K.; Yao, K.; Zhang, Q.; Yu, J.; Liu, L.; Xu, L.
DressCode: Autoregressively Sewing and Generating Garments from Text Guidance Journal Article
In: ACM Transactions on Graphics, vol. 43, no. 4, 2024, ISSN: 07300301 (ISSN).
Abstract | Links | BibTeX | Tags: 3D content, 3d garments, autoregressive model, Autoregressive modelling, Content creation, Digital humans, Embeddings, Fashion design, Garment generation, Interactive computer graphics, Sewing pattern, sewing patterns, Textures, Virtual Reality, Virtual Try-On
@article{he_dresscode_2024,
title = {DressCode: Autoregressively Sewing and Generating Garments from Text Guidance},
author = {K. He and K. Yao and Q. Zhang and J. Yu and L. Liu and L. Xu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199257820&doi=10.1145%2f3658147&partnerID=40&md5=8996e62e4d9dabb5a7034f8bf4df5a43},
doi = {10.1145/3658147},
issn = {07300301 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {ACM Transactions on Graphics},
volume = {43},
number = {4},
abstract = {Apparel's significant role in human appearance underscores the importance of garment digitalization for digital human creation. Recent advances in 3D content creation are pivotal for digital human creation. Nonetheless, garment generation from text guidance is still nascent. We introduce a text-driven 3D garment generation framework, DressCode, which aims to democratize design for novices and offer immense potential in fashion design, virtual try-on, and digital human creation. We first introduce SewingGPT, a GPT-based architecture integrating cross-attention with text-conditioned embedding to generate sewing patterns with text guidance. We then tailor a pre-trained Stable Diffusion to generate tile-based Physically-based Rendering (PBR) textures for the garments. By leveraging a large language model, our framework generates CG-friendly garments through natural language interaction. It also facilitates pattern completion and texture editing, streamlining the design process through user-friendly interaction. This framework fosters innovation by allowing creators to freely experiment with designs and incorporate unique elements into their work. With comprehensive evaluations and comparisons with other state-of-the-art methods, our method showcases superior quality and alignment with input prompts. User studies further validate our high-quality rendering results, highlighting its practical utility and potential in production settings. Copyright © 2024 held by the owner/author(s).},
keywords = {3D content, 3d garments, autoregressive model, Autoregressive modelling, Content creation, Digital humans, Embeddings, Fashion design, Garment generation, Interactive computer graphics, Sewing pattern, sewing patterns, Textures, Virtual Reality, Virtual Try-On},
pubstate = {published},
tppubtype = {article}
}