AHCI RESEARCH GROUP

Publications

Papers published in international journals,
proceedings of conferences, workshops and books.

OUR RESEARCH

Scientific Publications

How to

Here you can find the complete list of our publications.
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.

Show all

2025

Buldu, K. B.; Özdel, S.; Lau, K. H. Carrie; Wang, M.; Saad, D.; Schönborn, S.; Boch, A.; Kasneci, E.; Bozkir, E.

CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR Proceedings Article

In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 192–197, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331521578 (ISBN).

Abstract | Links | BibTeX | Tags: Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality

@inproceedings{buldu_cuify_2025,

title = {CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR},

author = {K. B. Buldu and S. Özdel and K. H. Carrie Lau and M. Wang and D. Saad and S. Schönborn and A. Boch and E. Kasneci and E. Bozkir},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000229165&doi=10.1109%2FAIxVR63409.2025.00037&partnerID=40&md5=f11f49480d075aee04ec44cedc984844},

doi = {10.1109/AIxVR63409.2025.00037},

isbn = {9798331521578 (ISBN)},

year  = {2025},

date = {2025-01-01},

booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},

pages = {192–197},

publisher = {Institute of Electrical and Electronics Engineers Inc.},

abstract = {Recent developments in computer graphics, machine learning, and sensor technologies enable numerous opportunities for extended reality (XR) setups for everyday life, from skills training to entertainment. With large corporations offering affordable consumer-grade head-mounted displays (HMDs), XR will likely become pervasive, and HMDs will develop as personal devices like smartphones and tablets. However, having intelligent spaces and naturalistic interactions in XR is as important as tech-nological advances so that users grow their engagement in virtual and augmented spaces. To this end, large language model (LLM)-powered non-player characters (NPCs) with speech-to-text (STT) and text-to-speech (TTS) models bring significant advantages over conventional or pre-scripted NPCs for facilitating more natural conversational user interfaces (CUIs) in XR. This paper provides the community with an open-source, customizable, extendable, and privacy-aware Unity package, CUIfy, that facili-tates speech-based NPC-user interaction with widely used LLMs, STT, and TTS models. Our package also supports multiple LLM-powered NPCs per environment and minimizes latency between different computational models through streaming to achieve us-able interactions between users and NPCs. We publish our source code in the following repository: https://gitlab.lrz.de/hctl/cuify © 2025 Elsevier B.V., All rights reserved.},

keywords = {Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality},

pubstate = {published},

tppubtype = {inproceedings}

}

Mendoza, A. P.; Quiroga, K. J. Barrios; Celis, S. D. Solano; M., C. G. Quintero

NAIA: A Multi-Technology Virtual Assistant for Boosting Academic Environments—A Case Study Journal Article

In: IEEE Access, vol. 13, pp. 141461–141483, 2025, ISSN: 21693536 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).

Abstract | Links | BibTeX | Tags: Academic environment, Artificial intelligence, Case-studies, Computational Linguistics, Computer vision, Digital avatar, Digital avatars, Efficiency, Human computer interaction, Human-AI Interaction, Interactive computer graphics, Language Model, Large language model, large language model (LLM), Learning systems, Natural language processing systems, Personal digital assistants, Personnel training, Population statistics, Speech communication, Speech processing, Speech to text, speech to text (STT), Text to speech, text to speech (TTS), user experience, User interfaces, Virtual assistant, Virtual assistants, Virtual Reality

@article{mendoza_naia_2025,

title = {NAIA: A Multi-Technology Virtual Assistant for Boosting Academic Environments—A Case Study},

author = {A. P. Mendoza and K. J. Barrios Quiroga and S. D. Solano Celis and C. G. Quintero M.},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013598763&doi=10.1109%2FACCESS.2025.3597565&partnerID=40&md5=7ad6b037cfedb943fc026642c4854284},

doi = {10.1109/ACCESS.2025.3597565},

issn = {21693536 (ISSN)},

year  = {2025},

date = {2025-01-01},

journal = {IEEE Access},

volume = {13},

pages = {141461–141483},

abstract = {Virtual assistants have become essential tools for improving productivity and efficiency in various domains. This paper presents NAIA (Nimble Artificial Intelligence Assistant), an advanced multi-role and multi-task virtual assistant enhanced with artificial intelligence, designed to serve a university community case study. The system integrates AI technologies including Large Language Models (LLM), Computer Vision, and voice processing to create an immersive and efficient interaction through animated digital avatars. NAIA features five specialized roles: researcher, receptionist, personal skills trainer, personal assistant, and university guide, each equipped with specific capabilities to support different aspects of academic life. The system’s Computer Vision capabilities enable it to comment on users’ physical appearance and environment, enriching the interaction. Through natural language processing and voice interaction, NAIA aims to improve productivity and efficiency within the university environment while providing personalized assistance through a ubiquitous platform accessible across multiple devices. NAIA is evaluated through a user experience survey involving 30 participants with different demographic characteristics, this is the most accepted way by the community to evaluate this type of solution. Participants give their feedback after using one role of NAIA after using it for 30 minutes. The experiment showed that 90% of the participants considered NAIA-assisted tasks of higher quality and, on average, NAIA has a score of 4.27 out of 5 on user satisfaction. Participants particularly appreciated the assistant’s visual recognition, natural conversation flow, and user interaction capabilities. Results demonstrate NAIA’s capabilities and effectiveness across the five roles. © 2025 Elsevier B.V., All rights reserved.},

note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},

keywords = {Academic environment, Artificial intelligence, Case-studies, Computational Linguistics, Computer vision, Digital avatar, Digital avatars, Efficiency, Human computer interaction, Human-AI Interaction, Interactive computer graphics, Language Model, Large language model, large language model (LLM), Learning systems, Natural language processing systems, Personal digital assistants, Personnel training, Population statistics, Speech communication, Speech processing, Speech to text, speech to text (STT), Text to speech, text to speech (TTS), user experience, User interfaces, Virtual assistant, Virtual assistants, Virtual Reality},

pubstate = {published},

tppubtype = {article}

}

2024

Christiansen, F. R.; Hollensberg, L. Nø.; Jensen, N. B.; Julsgaard, K.; Jespersen, K. N.; Nikolov, I.

Exploring Presence in Interactions with LLM-Driven NPCs: A Comparative Study of Speech Recognition and Dialogue Options Proceedings Article

In: Spencer, S. N. (Ed.): Proc. ACM Symp. Virtual Reality Softw. Technol. VRST, Association for Computing Machinery, 2024, ISBN: 9798400705359 (ISBN).

Abstract | Links | BibTeX | Tags: Comparatives studies, Computer simulation languages, Economic and social effects, Immersive System, Immersive systems, Language Model, Large language model, Large language models (LLM), Model-driven, Modern technologies, Non-playable character, NPC, Presence, Social Actors, Speech enhancement, Speech recognition, Text to speech, Virtual environments, Virtual Reality, VR

@inproceedings{christiansen_exploring_2024,

title = {Exploring Presence in Interactions with LLM-Driven NPCs: A Comparative Study of Speech Recognition and Dialogue Options},

author = {F. R. Christiansen and L. Nø. Hollensberg and N. B. Jensen and K. Julsgaard and K. N. Jespersen and I. Nikolov},

editor = {S. N. Spencer},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85212512351&doi=10.1145%2F3641825.3687716&partnerID=40&md5=96540f274db6d000f4092edc5a07e241},

doi = {10.1145/3641825.3687716},

isbn = {9798400705359 (ISBN)},

year  = {2024},

date = {2024-01-01},

booktitle = {Proc. ACM Symp. Virtual Reality Softw. Technol. VRST},

publisher = {Association for Computing Machinery},

abstract = {Combining modern technologies like large-language models (LLMs), speech-to-text, and text-to-speech can enhance immersion in virtual reality (VR) environments. However, challenges exist in effectively implementing LLMs and educating users. This paper explores implementing LLM-powered virtual social actors and facilitating user communication. We developed a murder mystery game where users interact with LLM-based non-playable characters (NPCs) through interrogation, clue-gathering, and exploration. Two versions were tested: one using speech recognition and another with traditional dialog boxes. While both provided similar social presence, users felt more immersed with speech recognition but found it overwhelming, while the dialog version was more challenging. Slow NPC response times were a source of frustration, highlighting the need for faster generation or better masking for a seamless experience. © 2024 Elsevier B.V., All rights reserved.},

keywords = {Comparatives studies, Computer simulation languages, Economic and social effects, Immersive System, Immersive systems, Language Model, Large language model, Large language models (LLM), Model-driven, Modern technologies, Non-playable character, NPC, Presence, Social Actors, Speech enhancement, Speech recognition, Text to speech, Virtual environments, Virtual Reality, VR},

pubstate = {published},

tppubtype = {inproceedings}

}