AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Fernandez, J. A. V.; Lee, J. J.; Vacca, S. A. S.; Magana, A.; Peša, R.; Benes, B.; Popescu, V.
Hands-Free VR Proceedings Article
In: T., Bashford-Rogers; D., Meneveaux; M., Ammi; M., Ziat; S., Jänicke; H., Purchase; P., Radeva; A., Furnari; K., Bouatouch; A.A., Sousa (Ed.): Proc. Int. Jt. Conf. Comput. Vis. Imaging Comput. Graph. Theory Appl., pp. 533–542, Science and Technology Publications, Lda, 2025, ISBN: 21845921 (ISSN).
Abstract | Links | BibTeX | Tags: Deep learning, Large language model, Retrieval-Augmented Generation, Speech-to-Text, Virtual Reality
@inproceedings{fernandez_hands-free_2025,
title = {Hands-Free VR},
author = {J. A. V. Fernandez and J. J. Lee and S. A. S. Vacca and A. Magana and R. Peša and B. Benes and V. Popescu},
editor = {Bashford-Rogers T. and Meneveaux D. and Ammi M. and Ziat M. and Jänicke S. and Purchase H. and Radeva P. and Furnari A. and Bouatouch K. and Sousa A.A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001963646&doi=10.5220%2f0013115100003912&partnerID=40&md5=a3f2f4e16bcd5e0579b38e062c987eab},
doi = {10.5220/0013115100003912},
isbn = {21845921 (ISSN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. Int. Jt. Conf. Comput. Vis. Imaging Comput. Graph. Theory Appl.},
volume = {1},
pages = {533–542},
publisher = {Science and Technology Publications, Lda},
abstract = {We introduce Hands-Free VR, a voice-based natural-language interface for VR that allows interaction without additional hardware just using voice. The user voice command is converted into text using a fine-tuned speechto-text deep-learning model. Then, the text is mapped to an executable VR command using an LLM, which is robust to natural language diversity. Hands-Free VR was evaluated in a within-subjects study (N = 22) where participants arranged objects using either a conventional VR interface or Hands-Free VR. The results confirm that Hands-Free VR is: (1) significantly more efficient than conventional VR interfaces in task completion time and user motion metrics; (2) highly rated for ease of use, intuitiveness, ergonomics, reliability, and desirability; (3) robust to English accents (20 participants were non-native speakers) and phonetic similarity, accurately transcribing 96.7% of voice commands, and (3) robust to natural language diversity, mapping 97.83% of transcriptions to executable commands. © 2025 by SCITEPRESS–Science and Technology Publications, Lda.},
keywords = {Deep learning, Large language model, Retrieval-Augmented Generation, Speech-to-Text, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Tomkou, D.; Fatouros, G.; Andreou, A.; Makridis, G.; Liarokapis, F.; Dardanis, D.; Kiourtis, A.; Soldatos, J.; Kyriazis, D.
Bridging Industrial Expertise and XR with LLM-Powered Conversational Agents Proceedings Article
In: pp. 1050–1056, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331543723 (ISBN).
Abstract | Links | BibTeX | Tags: Air navigation, Conversational Agents, Conversational AI, Embeddings, Engineering education, Extended reality, Knowledge Management, Knowledge transfer, Language Model, Large language model, large language models, Personnel training, Remote Assistance, Retrieval-Augmented Generation, Robotics, Semantics, Smart manufacturing
@inproceedings{tomkou_bridging_2025,
title = {Bridging Industrial Expertise and XR with LLM-Powered Conversational Agents},
author = {D. Tomkou and G. Fatouros and A. Andreou and G. Makridis and F. Liarokapis and D. Dardanis and A. Kiourtis and J. Soldatos and D. Kyriazis},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013837767&doi=10.1109%2FDCOSS-IoT65416.2025.00158&partnerID=40&md5=45e35086d8be9d3e16afeade6598d238},
doi = {10.1109/DCOSS-IoT65416.2025.00158},
isbn = {9798331543723 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {1050–1056},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper introduces a novel integration of Retrieval-Augmented Generation (RAG) enhanced Large Language Models (LLMs) with Extended Reality (XR) technologies to address knowledge transfer challenges in industrial environments. The proposed system embeds domain-specific industrial knowledge into XR environments through a natural language interface, enabling hands-free, context-aware expert guidance for workers. We present the architecture of the proposed system consisting of an LLM Chat Engine with dynamic tool orchestration and an XR application featuring voice-driven interaction. Performance evaluation of various chunking strategies, embedding models, and vector databases reveals that semantic chunking, balanced embedding models, and efficient vector stores deliver optimal performance for industrial knowledge retrieval. The system's potential is demonstrated through early implementation in multiple industrial use cases, including robotic assembly, smart infrastructure maintenance, and aerospace component servicing. Results indicate potential for enhancing training efficiency, remote assistance capabilities, and operational guidance in alignment with Industry 5.0's human-centric and resilient approach to industrial development. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Air navigation, Conversational Agents, Conversational AI, Embeddings, Engineering education, Extended reality, Knowledge Management, Knowledge transfer, Language Model, Large language model, large language models, Personnel training, Remote Assistance, Retrieval-Augmented Generation, Robotics, Semantics, Smart manufacturing},
pubstate = {published},
tppubtype = {inproceedings}
}
de Oliveira, E. A. Masasi; Sousa, R. T.; Bastos, A. A.; de Freitas Cintra, L. Martins; Filho, A. R. G. Galvão
Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 437–440, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages
@inproceedings{masasi_de_oliveira_immersive_2025,
title = {Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation},
author = {E. A. Masasi de Oliveira and R. T. Sousa and A. A. Bastos and L. Martins de Freitas Cintra and A. R. G. Galvão Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007979183&doi=10.1145%2F3706370.3731643&partnerID=40&md5=47a47f3408a0e6cb35c16dd6101a15b0},
doi = {10.1145/3706370.3731643},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {437–440},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual Reality has significantly expanded possibilities for immersive museum experiences, overcoming traditional constraints such as space, preservation, and geographic limitations. However, existing virtual museum platforms typically lack dynamic, personalized, and contextually accurate interactions. To address this, we propose Spatially-Aware Retrieval-Augmented Generation (SA-RAG), an innovative framework integrating visual attention tracking with Retrieval-Augmented Generation systems and advanced Large Language Models. By capturing users' visual attention in real time, SA-RAG dynamically retrieves contextually relevant data, enhancing the accuracy, personalization, and depth of user interactions within immersive virtual environments. The system's effectiveness is initially demonstrated through our preliminary tests within a realistic VR museum implemented using Unreal Engine. Although promising, comprehensive human evaluations involving broader user groups are planned for future studies to rigorously validate SA-RAG's effectiveness, educational enrichment potential, and accessibility improvements in virtual museums. The framework also presents opportunities for broader applications in immersive educational and storytelling domains. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}