AHCI RESEARCH GROUP

Publications

Papers published in international journals,
proceedings of conferences, workshops and books.

OUR RESEARCH

Scientific Publications

How to

Here you can find the complete list of our publications.
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.

Show all

2025

Häfner, P.; Eisenlohr, F.; Karande, A.; Grethler, M.; Mukherjee, A.; Tran, N.

Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines Proceedings Article

In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 281–285, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331521578 (ISBN).

Abstract | Links | BibTeX | Tags: Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method

@inproceedings{hafner_leveraging_2025,

title = {Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines},

author = {P. Häfner and F. Eisenlohr and A. Karande and M. Grethler and A. Mukherjee and N. Tran},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000344182&doi=10.1109%2FAIxVR63409.2025.00054&partnerID=40&md5=464de1fae1a7a9dbc4362b0a984e0cd4},

doi = {10.1109/AIxVR63409.2025.00054},

isbn = {9798331521578 (ISBN)},

year  = {2025},

date = {2025-01-01},

booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},

pages = {281–285},

publisher = {Institute of Electrical and Electronics Engineers Inc.},

abstract = {Voice User Interfaces (VUIs) are becoming increasingly valuable in industrial applications, offering hands-free control in complex environments. However, developing and validating VUIs for such applications faces challenges, including limited access to physical prototypes and high testing costs. This paper presents a methodology that utilizes virtual reality (VR) prototypes to collect training data for large language model (LLM)-based VUIs, allowing early-stage voice control development before physical prototypes are accessible. Through an immersive Wizard-of-Oz (WoZ) method, participants interact with a virtual reality representation of a machine, generating realistic, scenario-based conversational data. This combined WoZ and VR approach enables high-quality data collection and iterative model training, offering an effective solution that can be applied across various types of machine. Preliminary findings demonstrate the viability of VR in generating diverse and robust data sets that closely simulate real-world dialogs for voice interactions in industrial settings. © 2025 Elsevier B.V., All rights reserved.},

keywords = {Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method},

pubstate = {published},

tppubtype = {inproceedings}

}

Lau, K. H. C.; Bozkir, E.; Gao, H.; Kasneci, E.

Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling Proceedings Article

In: A., Del Bue; C., Canton; J., Pont-Tuset; T., Tommasi (Ed.): Lect. Notes Comput. Sci., pp. 177–195, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303191571-0 (ISBN).

Abstract | Links | BibTeX | Tags: Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization

@inproceedings{lau_evaluating_2025,

title = {Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling},

author = {K. H. C. Lau and E. Bozkir and H. Gao and E. Kasneci},

editor = {Del Bue A. and Canton C. and Pont-Tuset J. and Tommasi T.},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105006905979&doi=10.1007%2f978-3-031-91572-7_11&partnerID=40&md5=8a81fb09ff54e57b9429660a8898149a},

doi = {10.1007/978-3-031-91572-7_11},

isbn = {03029743 (ISSN); 978-303191571-0 (ISBN)},

year  = {2025},

date = {2025-01-01},

booktitle = {Lect. Notes Comput. Sci.},

volume = {15628 LNCS},

pages = {177–195},

publisher = {Springer Science and Business Media Deutschland GmbH},

abstract = {This paper explores the innovative application of Large Language Models (LLMs) in Virtual Reality (VR) environments to promote heritage education, focusing on traditional Scottish curling presented in the game “Scottish Bonspiel VR”. Our study compares the effectiveness of LLM-based chatbots with pre-defined scripted chatbots, evaluating key criteria such as usability, user engagement, and learning outcomes. The results show that LLM-based chatbots significantly improve interactivity and engagement, creating a more dynamic and immersive learning environment. This integration helps document and preserve cultural heritage and enhances dissemination processes, which are crucial for safeguarding intangible cultural heritage (ICH) amid environmental changes. Furthermore, the study highlights the potential of novel technologies in education to provide immersive experiences that foster a deeper appreciation of cultural heritage. These findings support the wider application of LLMs and VR in cultural education to address global challenges and promote sustainable practices to preserve and enhance cultural heritage. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},

keywords = {Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization},

pubstate = {published},

tppubtype = {inproceedings}

}

Zhu, X. T.; Cheerman, H.; Cheng, M.; Kiami, S. R.; Chukoskie, L.; McGivney, E.

Designing VR Simulation System for Clinical Communication Training with LLMs-Based Embodied Conversational Agents Proceedings Article

In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 9798400713958 (ISBN); 9798400713941 (ISBN).

Abstract | Links | BibTeX | Tags: Clinical communications, Clinical Simulation, Communications training, Curricula, Embodied conversational agent, Embodied Conversational Agents, Health professions, Intelligent virtual agents, Language Model, Medical education, Model-based OPC, Patient simulators, Personnel training, Students, Teaching, User centered design, Virtual environments, Virtual Reality, VR simulation, VR simulation systems

@inproceedings{zhu_designing_2025,

title = {Designing VR Simulation System for Clinical Communication Training with LLMs-Based Embodied Conversational Agents},

author = {X. T. Zhu and H. Cheerman and M. Cheng and S. R. Kiami and L. Chukoskie and E. McGivney},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005754066&doi=10.1145%2F3706599.3719693&partnerID=40&md5=6ad72d5adf98c2ca2437b5a3f6508a88},

doi = {10.1145/3706599.3719693},

isbn = {9798400713958 (ISBN); 9798400713941 (ISBN)},

year  = {2025},

date = {2025-01-01},

booktitle = {Conf Hum Fact Comput Syst Proc},

publisher = {Association for Computing Machinery},

abstract = {VR simulation in Health Professions (HP) education demonstrates huge potential, but fixed learning content with little customization limits its application beyond lab environments. To address these limitations in the context of VR for patient communication training, we conducted a user-centered study involving semi-structured interviews with advanced HP students to understand their challenges in clinical communication training and perceptions of VR-based solutions. From this, we derived design insights emphasizing the importance of realistic scenarios, simple interactions, and unpredictable dialogues. Building on these insights, we developed the Virtual AI Patient Simulator (VAPS), a novel VR system powered by Large Language Models (LLMs) and Embodied Conversational Agents (ECAs), supporting dynamic and customizable patient interactions for immersive learning. We also provided an example of how clinical professors could use user-friendly design forms to create personalized scenarios that align with course objectives in VAPS and discuss future implications of integrating AI-driven technologies into VR education. © 2025 Elsevier B.V., All rights reserved.},

keywords = {Clinical communications, Clinical Simulation, Communications training, Curricula, Embodied conversational agent, Embodied Conversational Agents, Health professions, Intelligent virtual agents, Language Model, Medical education, Model-based OPC, Patient simulators, Personnel training, Students, Teaching, User centered design, Virtual environments, Virtual Reality, VR simulation, VR simulation systems},

pubstate = {published},

tppubtype = {inproceedings}

}

Dong, W.; Li, S.; Zheng, P.; Liu, L.; Chen, S.

A 3DGS and LLM-based physical-to-virtual approach for human-robot interactive manufacturing Journal Article

In: Manufacturing Letters, vol. 44, pp. 121–128, 2025, ISSN: 22138463 (ISSN), (Publisher: Elsevier Ltd).

Abstract | Links | BibTeX | Tags: 3D modeling, Gaussian distribution, Gaussians, High level languages, Human computer interaction, Human Robot Interaction, Human robots, Humans-robot interactions, Industrial robots, Language Model, Large language model, Man machine systems, Metaverses, Model-based OPC, Natural language processing systems, Physical-to-virtual, Robot programming, Robotic assembly, Splatting, Three dimensional computer graphics, Three-dimensional gaussian splatting

@article{dong_3dgs_2025,

title = {A 3DGS and LLM-based physical-to-virtual approach for human-robot interactive manufacturing},

author = {W. Dong and S. Li and P. Zheng and L. Liu and S. Chen},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105014947667&doi=10.1016%2Fj.mfglet.2025.06.016&partnerID=40&md5=8fd8b07c1f2c71e46b396d2e244bf701},

doi = {10.1016/j.mfglet.2025.06.016},

issn = {22138463 (ISSN)},

year  = {2025},

date = {2025-01-01},

journal = {Manufacturing Letters},

volume = {44},

pages = {121–128},

abstract = {With the exploration of digital transformation in the industry, the introduction of the industrial metaverse is bringing unprecedented opportunities and challenges to the manufacturing industry. In the industrial metaverse, humans can interact safely and naturally with robots in high-fidelity digital environments, enabling non-technical operators to quickly validate industrial scenarios and help optimize decision-making and production processes. However, the complexity of Three-Dimensional (3D) modeling poses a challenge to achieving this goal. Additionally, programming-based Human Robot Interaction (HRI) also presents obstacles, as operators need significant time to learn how to control robots. Therefore, this paper proposes a 3D Gaussian Splatting (3DGS) and Large Language Model (LLM)-based physical-to-virtual approach for human-robot interactive manufacturing, which further facilitates digital interaction for non-technical operators in manufacturing environments. Specifically, 3DGS is first used for rapid visualization and reconstruction of the overall scene, achieving new perspective rendering and providing a gaussian ellipsoid representation. Then mesh extraction algorithms based on gaussian representation are used to build a physical-to-virtual transfer framework. Finally, LLM is utilized for understanding natural language commands and generating virtual robot Python programming to complete robot assembly tasks. This framework is implemented in the Isaac Sim simulator, and the case study shows that the proposed framework can quickly and accurately complete physical-to-virtual transfer and accomplish robot assembly manufacturing tasks in the simulator with low code. © 2025 Elsevier B.V., All rights reserved.},

note = {Publisher: Elsevier Ltd},

keywords = {3D modeling, Gaussian distribution, Gaussians, High level languages, Human computer interaction, Human Robot Interaction, Human robots, Humans-robot interactions, Industrial robots, Language Model, Large language model, Man machine systems, Metaverses, Model-based OPC, Natural language processing systems, Physical-to-virtual, Robot programming, Robotic assembly, Splatting, Three dimensional computer graphics, Three-dimensional gaussian splatting},

pubstate = {published},

tppubtype = {article}

}

2024

Si, J.; Yang, S.; Song, J.; Son, S.; Lee, S.; Kim, D.; Kim, S.

Generating and Integrating Diffusion Model-Based Panoramic Views for Virtual Interview Platform Proceedings Article

In: IEEE Int. Conf. Artif. Intell. Eng. Technol., IICAIET, pp. 343–348, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350389692 (ISBN).

Abstract | Links | BibTeX | Tags: AI, Deep learning, Diffusion, Diffusion Model, Diffusion technology, Digital elevation model, High quality, Manual process, Model-based OPC, New approaches, Panorama, Panoramic views, Virtual environments, Virtual Interview, Virtual Reality

@inproceedings{si_generating_2024,

title = {Generating and Integrating Diffusion Model-Based Panoramic Views for Virtual Interview Platform},

author = {J. Si and S. Yang and J. Song and S. Son and S. Lee and D. Kim and S. Kim},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85209663031&doi=10.1109%2FIICAIET62352.2024.10730450&partnerID=40&md5=a8baef1851b8ad9b37e4df4e4b1735e2},

doi = {10.1109/IICAIET62352.2024.10730450},

isbn = {9798350389692 (ISBN)},

year  = {2024},

date = {2024-01-01},

booktitle = {IEEE Int. Conf. Artif. Intell. Eng. Technol., IICAIET},

pages = {343–348},

publisher = {Institute of Electrical and Electronics Engineers Inc.},

abstract = {This paper presents a new approach to improve virtual interview platforms in education, which are gaining significant attention. This study aims to simplify the complex manual process of equipment setup to enhance the realism and reliability of virtual interviews. To this end, this study proposes a method for automatically constructing 3D virtual interview environments using diffusion technology in generative AI. In this research, we exploit a diffusion model capable of generating high-quality panoramic images. We generate images of interview rooms capable of delivering immersive interview experiences via refined text prompts. The resulting imagery is then reconstituted 3D VR content utilizing the Unity engine, facilitating enhanced interaction and engagement within virtual environments. This research compares and analyzes various methods presented in related research and proposes a new process for efficiently constructing 360-degree virtual environments. When wearing Oculus Quest 2 and experiencing the virtual environment created using the proposed method, a high sense of immersion was experienced, similar to the actual interview environment. © 2024 Elsevier B.V., All rights reserved.},

keywords = {AI, Deep learning, Diffusion, Diffusion Model, Diffusion technology, Digital elevation model, High quality, Manual process, Model-based OPC, New approaches, Panorama, Panoramic views, Virtual environments, Virtual Interview, Virtual Reality},

pubstate = {published},

tppubtype = {inproceedings}

}

Min, Y.; Jeong, J. -W.

Public Speaking Q&A Practice with LLM-Generated Personas in Virtual Reality Proceedings Article

In: Eck, U.; Sra, M.; Stefanucci, J.; Sugimoto, M.; Tatzgern, M.; Williams, I. (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 493–496, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798331506919 (ISBN).

Abstract | Links | BibTeX | Tags: Digital elevation model, Economic and social effects, Language Model, Large language model-based persona generation, LLM-based Persona Generation, Model-based OPC, Personnel training, Power, Practice systems, Presentation Anxiety, Public speaking, Q&A practice, user experience, Users' experiences, Virtual environments, Virtual Reality, VR training

@inproceedings{min_public_2024,

title = {Public Speaking Q&A Practice with LLM-Generated Personas in Virtual Reality},

author = {Y. Min and J. -W. Jeong},

editor = {U. Eck and M. Sra and J. Stefanucci and M. Sugimoto and M. Tatzgern and I. Williams},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214393734&doi=10.1109%2FISMAR-Adjunct64951.2024.00143&partnerID=40&md5=62583f87d8d870d6e44a13fe311a761d},

doi = {10.1109/ISMAR-Adjunct64951.2024.00143},

isbn = {9798331506919 (ISBN)},

year  = {2024},

date = {2024-01-01},

booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},

pages = {493–496},

publisher = {Institute of Electrical and Electronics Engineers Inc.},

abstract = {This paper introduces a novel VR-based Q&A practice system that harnesses the power of Large Language Models (LLMs). We support Q&A practice for upcoming public speaking by providing an immersive VR training environment populated with LLM-generated audiences, each capable of posing diverse and realistic questions based on different personas. We conducted a pilot user study involving 20 participants who engaged in VR-based Q&A practice sessions. The sessions featured a variety of questions regarding presentation material provided by the participants, all of which were generated by LLM-based personas. Through post-surveys and interviews, we evaluated the effectiveness of the proposed method. The participants valued the system for engagement and focus while also identifying several areas for improvement. Our study demonstrated the potential of integrating VR and LLMs to create a powerful, immersive tool for Q&A practice. © 2025 Elsevier B.V., All rights reserved.},

keywords = {Digital elevation model, Economic and social effects, Language Model, Large language model-based persona generation, LLM-based Persona Generation, Model-based OPC, Personnel training, Power, Practice systems, Presentation Anxiety, Public speaking, Q&A practice, user experience, Users' experiences, Virtual environments, Virtual Reality, VR training},

pubstate = {published},

tppubtype = {inproceedings}

}

Constantinides, N.; Constantinides, A.; Koukopoulos, D.; Fidas, C.; Belk, M.

CulturAI: Exploring Mixed Reality Art Exhibitions with Large Language Models for Personalized Immersive Experiences Proceedings Article

In: UMAP - Adjun. Proc. ACM Conf. User Model., Adapt. Personal., pp. 102–105, Association for Computing Machinery, Inc, 2024, ISBN: 9798400704666 (ISBN).

Abstract | Links | BibTeX | Tags: Computational Linguistics, Immersive, Language Model, Large language model, large language models, Mixed reality, Mixed reality art, Mixed reality technologies, Model-based OPC, User Experience Evaluation, User experience evaluations, User interfaces, User study, Users' experiences

@inproceedings{constantinides_culturai_2024,

title = {CulturAI: Exploring Mixed Reality Art Exhibitions with Large Language Models for Personalized Immersive Experiences},

author = {N. Constantinides and A. Constantinides and D. Koukopoulos and C. Fidas and M. Belk},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85198910809&doi=10.1145%2F3631700.3664874&partnerID=40&md5=8e2439a6a0dd4fdf808e5f418fbb0c01},

doi = {10.1145/3631700.3664874},

isbn = {9798400704666 (ISBN)},

year  = {2024},

date = {2024-01-01},

booktitle = {UMAP - Adjun. Proc. ACM Conf. User Model., Adapt. Personal.},

pages = {102–105},

publisher = {Association for Computing Machinery, Inc},

abstract = {Mixed Reality (MR) technologies have transformed the way in which we interact and engage with digital content, offering immersive experiences that blend the physical and virtual worlds. Over the past years, there has been increasing interest in employing Artificial Intelligence (AI) technologies to improve user experience and trustworthiness in cultural contexts. However, the integration of Large Language Models (LLMs) into MR applications within the Cultural Heritage (CH) domain is relatively underexplored. In this work, we present an investigation into the integration of LLMs within MR environments, focusing on the context of virtual art exhibitions. We implemented a HoloLens MR application, which enables users to explore artworks while interacting with an LLM through voice. To evaluate the user experience and perceived trustworthiness of individuals engaging with an LLM-based virtual art guide, we adopted a between-subject study design, in which participants were randomly assigned to either the LLM-based version or a control group using conventional interaction methods. The LLM-based version allows users to pose inquiries about the artwork displayed, ranging from details about the creator to information about the artwork's origin and historical significance. This paper presents the technical aspects of integrating LLMs within MR applications and evaluates the user experience and perceived trustworthiness of this approach in enhancing the exploration of virtual art exhibitions. Results of an initial evaluation provide evidence about the positive aspect of integrating LLMs in MR applications. Findings of this work contribute to the advancement of MR technologies for the development of future interactive personalized art experiences. © 2024 Elsevier B.V., All rights reserved.},

keywords = {Computational Linguistics, Immersive, Language Model, Large language model, large language models, Mixed reality, Mixed reality art, Mixed reality technologies, Model-based OPC, User Experience Evaluation, User experience evaluations, User interfaces, User study, Users' experiences},

pubstate = {published},

tppubtype = {inproceedings}

}

Hong, J.; Lee, Y.; Kim, D. H.; Choi, D.; Yoon, Y. -J.; Lee, G. -C.; Lee, Z.; Kim, J.

A Context-Aware Onboarding Agent for Metaverse Powered by Large Language Models Proceedings Article

In: Vallgarda, A.; Jonsson, L.; Fritsch, J.; Alaoui, S. F.; Dantec, C. A. Le (Ed.): Proc. ACM Des. Interact. Syst. Conf., pp. 1857–1874, Association for Computing Machinery, Inc, 2024, ISBN: 9798400705830 (ISBN).

Abstract | Links | BibTeX | Tags: 'current, Computational Linguistics, Context- awareness, Context-Aware, context-awareness, conversational agent, Conversational Agents, Divergents, Language Model, Large-language model, large-language models, Metaverse, Metaverses, Model-based OPC, Onboarding, User interfaces, Virtual Reality

@inproceedings{hong_context-aware_2024,

title = {A Context-Aware Onboarding Agent for Metaverse Powered by Large Language Models},

author = {J. Hong and Y. Lee and D. H. Kim and D. Choi and Y. -J. Yoon and G. -C. Lee and Z. Lee and J. Kim},

editor = {A. Vallgarda and L. Jonsson and J. Fritsch and S. F. Alaoui and C. A. Le Dantec},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85200340104&doi=10.1145%2F3643834.3661579&partnerID=40&md5=1c1f06827a32d9ed7310636d42874475},

doi = {10.1145/3643834.3661579},

isbn = {9798400705830 (ISBN)},

year  = {2024},

date = {2024-01-01},

booktitle = {Proc. ACM Des. Interact. Syst. Conf.},

pages = {1857–1874},

publisher = {Association for Computing Machinery, Inc},

abstract = {One common asset of metaverse is that users can freely explore places and actions without linear procedures. Thus, it is hard yet important to understand the divergent challenges each user faces when onboarding metaverse. Our formative study (N = 16) shows that frst-time users ask questions about metaverse that concern 1) a short-term spatiotemporal context, regarding the user’s current location, recent conversation, and actions, and 2) a long-term exploration context regarding the user’s experience history. Based on the fndings, we present PICAN, a Large Language Model-based pipeline that generates context-aware answers to users when onboarding metaverse. An ablation study (N = 20) reveals that PICAN’s usage of context made responses more useful and immersive than those generated without contexts. Furthermore, a user study (N = 21) shows that the use of long-term exploration context promotes users’ learning about the locations and activities within the virtual environment. © 2024 Elsevier B.V., All rights reserved.},

keywords = {'current, Computational Linguistics, Context- awareness, Context-Aware, context-awareness, conversational agent, Conversational Agents, Divergents, Language Model, Large-language model, large-language models, Metaverse, Metaverses, Model-based OPC, Onboarding, User interfaces, Virtual Reality},

pubstate = {published},

tppubtype = {inproceedings}

}