AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Li, Z.; Zhang, H.; Peng, C.; Peiris, R.
Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 1–11, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331536459 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment
@inproceedings{li_exploring_2025,
title = {Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios},
author = {Z. Li and H. Zhang and C. Peng and R. Peiris},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002706893&doi=10.1109%2FVR59515.2025.00025&partnerID=40&md5=1987c128f6ec4bd24011388ef9ece179},
doi = {10.1109/VR59515.2025.00025},
isbn = {9798331536459 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {1–11},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent research has begun adopting Large Language Model (LLM) agents to enhance Virtual Reality (VR) interactions, creating immersive chatbot experiences. However, while current studies focus on generating dialogue from user speech inputs, their abilities to generate richer experiences based on the perception of LLM agents' VR environments and interaction cues remain unexplored. Hence, in this work, we propose an approach that enables LLM agents to perceive virtual environments and generate environment-aware interactions and conversations for an embodied human-AI interaction experience in VR environments. Here, we define a schema for describing VR environments and their interactions through text prompts. We evaluate the performance of our method through five role-play scenarios created using our approach in a study with 14 participants. The findings discuss the opportunities and challenges of our proposed approach for developing environment-aware LLM agents that facilitate spatial interactions and conversations within VR role-play scenarios. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, K.; Mostajeran, F.; Rings, S.; Kruse, L.; Schmidt, S.; Arz, M.; Wolf, E.; Steinicke, F.
I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1648–1649, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331514846 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows
@inproceedings{li_i_2025,
title = {I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication},
author = {K. Li and F. Mostajeran and S. Rings and L. Kruse and S. Schmidt and M. Arz and E. Wolf and F. Steinicke},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005146647&doi=10.1109%2FVRW66409.2025.00469&partnerID=40&md5=bffaee22da4891b9faf2ac053efca066},
doi = {10.1109/VRW66409.2025.00469},
isbn = {9798331514846 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1648–1649},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {In this demo paper, we present an Extended Reality (XR) framework providing a streamlined workflow for creating and interacting with intelligent virtual agents (IVAs) with multimodal information processing capabilities using commercially available artificial intelligence (AI) tools and cloud services such as large language and vision models. The system supports (i) the integration of high-quality, customizable virtual 3D human models for visual representations of IVAs and (ii) multimodal communication with generative AI-driven IVAs in immersive XR, featuring realistic human behavior simulations. Our demo showcases the enormous potential and vast design space of embodied IVAs for various XR applications. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows},
pubstate = {published},
tppubtype = {inproceedings}
}
Yokoyama, N.; Kimura, R.; Nakajima, T.
ViGen: Defamiliarizing Everyday Perception for Discovering Unexpected Insights Proceedings Article
In: H., Degen; S., Ntoa (Ed.): Lect. Notes Comput. Sci., pp. 397–417, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303193417-9 (ISBN).
Abstract | Links | BibTeX | Tags: Artful Expression, Artistic technique, Augmented Reality, Daily lives, Defamiliarization, Dynamic environments, Engineering education, Enhanced vision systems, Generative AI, generative artificial intelligence, Human augmentation, Human engineering, Human-AI Interaction, Human-artificial intelligence interaction, Semi-transparent
@inproceedings{yokoyama_vigen_2025,
title = {ViGen: Defamiliarizing Everyday Perception for Discovering Unexpected Insights},
author = {N. Yokoyama and R. Kimura and T. Nakajima},
editor = {Degen H. and Ntoa S.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007760030&doi=10.1007%2f978-3-031-93418-6_26&partnerID=40&md5=dee6f54688284313a45579aab5f934d6},
doi = {10.1007/978-3-031-93418-6_26},
isbn = {03029743 (ISSN); 978-303193417-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15821 LNAI},
pages = {397–417},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper proposes ViGen, an Augmented Reality (AR) and Artificial Intelligence (AI)-enhanced vision system designed to facilitate defamiliarization in daily life. Humans rely on sight to gather information, think, and act, yet the act of seeing often becomes passive in daily life. Inspired by Victor Shklovsky’s concept of defamiliarization and the artistic technique of photomontage, ViGen seeks to disrupt habitual perceptions. It achieves this by overlaying semi-transparent, AI-generated images, created based on the user’s view, through an AR display. The system is evaluated by several structured interviews, in which participants experience ViGen in three different scenarios. Results indicate that AI-generated visuals effectively supported defamiliarization by transforming ordinary scenes into unfamiliar ones. However, the user’s familiarity with a place plays a significant role. Also, while the feature that adjusts the transparency of overlaid images enhances safety, its limitations in dynamic environments suggest the need for further research across diverse cultural and geographic contexts. This study demonstrates the potential of AI-augmented vision systems to stimulate new ways of seeing, offering insights for further development in visual augmentation technologies. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Artful Expression, Artistic technique, Augmented Reality, Daily lives, Defamiliarization, Dynamic environments, Engineering education, Enhanced vision systems, Generative AI, generative artificial intelligence, Human augmentation, Human engineering, Human-AI Interaction, Human-artificial intelligence interaction, Semi-transparent},
pubstate = {published},
tppubtype = {inproceedings}
}
Mendoza, A. P.; Quiroga, K. J. Barrios; Celis, S. D. Solano; M., C. G. Quintero
NAIA: A Multi-Technology Virtual Assistant for Boosting Academic Environments—A Case Study Journal Article
In: IEEE Access, vol. 13, pp. 141461–141483, 2025, ISSN: 21693536 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: Academic environment, Artificial intelligence, Case-studies, Computational Linguistics, Computer vision, Digital avatar, Digital avatars, Efficiency, Human computer interaction, Human-AI Interaction, Interactive computer graphics, Language Model, Large language model, large language model (LLM), Learning systems, Natural language processing systems, Personal digital assistants, Personnel training, Population statistics, Speech communication, Speech processing, Speech to text, speech to text (STT), Text to speech, text to speech (TTS), user experience, User interfaces, Virtual assistant, Virtual assistants, Virtual Reality
@article{mendoza_naia_2025,
title = {NAIA: A Multi-Technology Virtual Assistant for Boosting Academic Environments—A Case Study},
author = {A. P. Mendoza and K. J. Barrios Quiroga and S. D. Solano Celis and C. G. Quintero M.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013598763&doi=10.1109%2FACCESS.2025.3597565&partnerID=40&md5=7ad6b037cfedb943fc026642c4854284},
doi = {10.1109/ACCESS.2025.3597565},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {141461–141483},
abstract = {Virtual assistants have become essential tools for improving productivity and efficiency in various domains. This paper presents NAIA (Nimble Artificial Intelligence Assistant), an advanced multi-role and multi-task virtual assistant enhanced with artificial intelligence, designed to serve a university community case study. The system integrates AI technologies including Large Language Models (LLM), Computer Vision, and voice processing to create an immersive and efficient interaction through animated digital avatars. NAIA features five specialized roles: researcher, receptionist, personal skills trainer, personal assistant, and university guide, each equipped with specific capabilities to support different aspects of academic life. The system’s Computer Vision capabilities enable it to comment on users’ physical appearance and environment, enriching the interaction. Through natural language processing and voice interaction, NAIA aims to improve productivity and efficiency within the university environment while providing personalized assistance through a ubiquitous platform accessible across multiple devices. NAIA is evaluated through a user experience survey involving 30 participants with different demographic characteristics, this is the most accepted way by the community to evaluate this type of solution. Participants give their feedback after using one role of NAIA after using it for 30 minutes. The experiment showed that 90% of the participants considered NAIA-assisted tasks of higher quality and, on average, NAIA has a score of 4.27 out of 5 on user satisfaction. Participants particularly appreciated the assistant’s visual recognition, natural conversation flow, and user interaction capabilities. Results demonstrate NAIA’s capabilities and effectiveness across the five roles. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {Academic environment, Artificial intelligence, Case-studies, Computational Linguistics, Computer vision, Digital avatar, Digital avatars, Efficiency, Human computer interaction, Human-AI Interaction, Interactive computer graphics, Language Model, Large language model, large language model (LLM), Learning systems, Natural language processing systems, Personal digital assistants, Personnel training, Population statistics, Speech communication, Speech processing, Speech to text, speech to text (STT), Text to speech, text to speech (TTS), user experience, User interfaces, Virtual assistant, Virtual assistants, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Suzuki, R.; González-Franco, M.; Sra, M.; Lindlbauer, D.
Everyday AR through AI-in-the-Loop Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 9798400713958 (ISBN); 9798400713941 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization
@inproceedings{suzuki_everyday_2025,
title = {Everyday AR through AI-in-the-Loop},
author = {R. Suzuki and M. González-Franco and M. Sra and D. Lindlbauer},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005752990&doi=10.1145%2F3706599.3706741&partnerID=40&md5=a5369bb371ce25feca340b4f5952e6a6},
doi = {10.1145/3706599.3706741},
isbn = {9798400713958 (ISBN); 9798400713941 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {This workshop brings together experts and practitioners from augmented reality (AR) and artificial intelligence (AI) to shape the future of AI-in-the-loop everyday AR experiences. With recent advancements in both AR hardware and AI capabilities, we envision that everyday AR—always-available and seamlessly integrated into users’ daily environments—is becoming increasingly feasible. This workshop will explore how AI can drive such everyday AR experiences. We discuss a range of topics, including adaptive and context-aware AR, generative AR content creation, always-on AI assistants, AI-driven accessible design, and real-world-oriented AI agents. Our goal is to identify the opportunities and challenges in AI-enabled AR, focusing on creating novel AR experiences that seamlessly blend the digital and physical worlds. Through the workshop, we aim to foster collaboration, inspire future research, and build a community to advance the research field of AI-enhanced AR. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}