AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Buldu, K. B.; Özdel, S.; Lau, K. H. Carrie; Wang, M.; Saad, D.; Schönborn, S.; Boch, A.; Kasneci, E.; Bozkir, E.
CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 192–197, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331521578 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality
@inproceedings{buldu_cuify_2025,
title = {CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR},
author = {K. B. Buldu and S. Özdel and K. H. Carrie Lau and M. Wang and D. Saad and S. Schönborn and A. Boch and E. Kasneci and E. Bozkir},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000229165&doi=10.1109%2FAIxVR63409.2025.00037&partnerID=40&md5=f11f49480d075aee04ec44cedc984844},
doi = {10.1109/AIxVR63409.2025.00037},
isbn = {9798331521578 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {192–197},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent developments in computer graphics, machine learning, and sensor technologies enable numerous opportunities for extended reality (XR) setups for everyday life, from skills training to entertainment. With large corporations offering affordable consumer-grade head-mounted displays (HMDs), XR will likely become pervasive, and HMDs will develop as personal devices like smartphones and tablets. However, having intelligent spaces and naturalistic interactions in XR is as important as tech-nological advances so that users grow their engagement in virtual and augmented spaces. To this end, large language model (LLM)-powered non-player characters (NPCs) with speech-to-text (STT) and text-to-speech (TTS) models bring significant advantages over conventional or pre-scripted NPCs for facilitating more natural conversational user interfaces (CUIs) in XR. This paper provides the community with an open-source, customizable, extendable, and privacy-aware Unity package, CUIfy, that facili-tates speech-based NPC-user interaction with widely used LLMs, STT, and TTS models. Our package also supports multiple LLM-powered NPCs per environment and minimizes latency between different computational models through streaming to achieve us-able interactions between users and NPCs. We publish our source code in the following repository: https://gitlab.lrz.de/hctl/cuify © 2025 Elsevier B.V., All rights reserved.},
keywords = {Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Häfner, P.; Eisenlohr, F.; Karande, A.; Grethler, M.; Mukherjee, A.; Tran, N.
Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 281–285, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331521578 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method
@inproceedings{hafner_leveraging_2025,
title = {Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines},
author = {P. Häfner and F. Eisenlohr and A. Karande and M. Grethler and A. Mukherjee and N. Tran},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000344182&doi=10.1109%2FAIxVR63409.2025.00054&partnerID=40&md5=464de1fae1a7a9dbc4362b0a984e0cd4},
doi = {10.1109/AIxVR63409.2025.00054},
isbn = {9798331521578 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {281–285},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Voice User Interfaces (VUIs) are becoming increasingly valuable in industrial applications, offering hands-free control in complex environments. However, developing and validating VUIs for such applications faces challenges, including limited access to physical prototypes and high testing costs. This paper presents a methodology that utilizes virtual reality (VR) prototypes to collect training data for large language model (LLM)-based VUIs, allowing early-stage voice control development before physical prototypes are accessible. Through an immersive Wizard-of-Oz (WoZ) method, participants interact with a virtual reality representation of a machine, generating realistic, scenario-based conversational data. This combined WoZ and VR approach enables high-quality data collection and iterative model training, offering an effective solution that can be applied across various types of machine. Preliminary findings demonstrate the viability of VR in generating diverse and robust data sets that closely simulate real-world dialogs for voice interactions in industrial settings. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, J.; Grubert, J.; Kristensson, P. O.
Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 206–216, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331536459 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality
@inproceedings{chen_analyzing_2025,
title = {Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes},
author = {J. Chen and J. Grubert and P. O. Kristensson},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002716635&doi=10.1109%2FVR59515.2025.00045&partnerID=40&md5=9db6769cd401503605578c4b711152b9},
doi = {10.1109/VR59515.2025.00045},
isbn = {9798331536459 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {206–216},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {As more applications of large language models (LLMs) for 3D content in immersive environments emerge, it is crucial to study user behavior to identify interaction patterns and potential barriers to guide the future design of immersive content creation and editing systems which involve LLMs. In an empirical user study with 12 participants, we combine quantitative usage data with post-experience questionnaire feedback to reveal common interaction patterns and key barriers in LLM-assisted 3D scene editing systems. We identify opportunities for improving natural language interfaces in 3D design tools and propose design recommendations. Through an empirical study, we demonstrate that LLM-assisted interactive systems can be used productively in immersive environments. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Kim, Y.; Aamir, Z.; Singh, M.; Boorboor, S.; Mueller, K.; Kaufman, A. E.
Explainable XR: Understanding User Behaviors of XR Environments Using LLM-Assisted Analytics Framework Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2756–2766, 2025, ISSN: 10772626 (ISSN), (Publisher: IEEE Computer Society).
Abstract | Links | BibTeX | Tags: adult, Agnostic, Article, Assistive, Cross Reality, Data Analytics, Data collection, data interpretation, Data recording, Data visualization, Extended reality, human, Language Model, Large language model, large language models, Multi-modal, Multimodal Data Collection, normal human, Personalized assistive technique, Personalized Assistive Techniques, recorder, Spatio-temporal data, therapy, user behavior, User behaviors, Virtual addresses, Virtual environments, Virtual Reality, Visual analytics, Visual languages
@article{kim_explainable_2025,
title = {Explainable XR: Understanding User Behaviors of XR Environments Using LLM-Assisted Analytics Framework},
author = {Y. Kim and Z. Aamir and M. Singh and S. Boorboor and K. Mueller and A. E. Kaufman},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003815583&doi=10.1109%2FTVCG.2025.3549537&partnerID=40&md5=bc5ac38eb19faa224282cf385f43799f},
doi = {10.1109/TVCG.2025.3549537},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2756–2766},
abstract = {We present Explainable XR, an end-to-end framework for analyzing user behavior in diverse eXtended Reality (XR) environments by leveraging Large Language Models (LLMs) for data interpretation assistance. Existing XR user analytics frameworks face challenges in handling cross-virtuality - AR, VR, MR - transitions, multi-user collaborative application scenarios, and the complexity of multimodal data. Explainable XR addresses these challenges by providing a virtuality-agnostic solution for the collection, analysis, and visualization of immersive sessions. We propose three main components in our framework: (1) A novel user data recording schema, called User Action Descriptor (UAD), that can capture the users' multimodal actions, along with their intents and the contexts; (2) a platform-agnostic XR session recorder, and (3) a visual analytics interface that offers LLM-assisted insights tailored to the analysts' perspectives, facilitating the exploration and analysis of the recorded XR session data. We demonstrate the versatility of Explainable XR by demonstrating five use-case scenarios, in both individual and collaborative XR applications across virtualities. Our technical evaluation and user studies show that Explainable XR provides a highly usable analytics solution for understanding user actions and delivering multifaceted, actionable insights into user behaviors in immersive environments. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: IEEE Computer Society},
keywords = {adult, Agnostic, Article, Assistive, Cross Reality, Data Analytics, Data collection, data interpretation, Data recording, Data visualization, Extended reality, human, Language Model, Large language model, large language models, Multi-modal, Multimodal Data Collection, normal human, Personalized assistive technique, Personalized Assistive Techniques, recorder, Spatio-temporal data, therapy, user behavior, User behaviors, Virtual addresses, Virtual environments, Virtual Reality, Visual analytics, Visual languages},
pubstate = {published},
tppubtype = {article}
}
Chen, J.; Wu, X.; Lan, T.; Li, B.
LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2715–2724, 2025, ISSN: 10772626 (ISSN), (Publisher: IEEE Computer Society).
Abstract | Links | BibTeX | Tags: % reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality
@article{chen_llmer_2025,
title = {LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models},
author = {J. Chen and X. Wu and T. Lan and B. Li},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003825793&doi=10.1109%2FTVCG.2025.3549549&partnerID=40&md5=50597473616678390f143a33082a13d3},
doi = {10.1109/TVCG.2025.3549549},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2715–2724},
abstract = {The integration of Large Language Models (LLMs) like GPT-4 with Extended Reality (XR) technologies offers the potential to build truly immersive XR environments that interact with human users through natural language, e.g., generating and animating 3D scenes from audio inputs. However, the complexity of XR environments makes it difficult to accurately extract relevant contextual data and scene/object parameters from an overwhelming volume of XR artifacts. It leads to not only increased costs with pay-per-use models, but also elevated levels of generation errors. Moreover, existing approaches focusing on coding script generation are often prone to generation errors, resulting in flawed or invalid scripts, application crashes, and ultimately a degraded user experience. To overcome these challenges, we introduce LLMER, a novel framework that creates interactive XR worlds using JSON data generated by LLMs. Unlike prior approaches focusing on coding script generation, LLMER translates natural language inputs into JSON data, significantly reducing the likelihood of application crashes and processing latency. It employs a multi-stage strategy to supply only the essential contextual information adapted to the user's request and features multiple modules designed for various XR tasks. Our preliminary user study reveals the effectiveness of the proposed system, with over 80% reduction in consumed tokens and around 60% reduction in task completion time compared to state-of-the-art approaches. The analysis of users' feedback also illuminates a series of directions for further optimization. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: IEEE Computer Society},
keywords = {% reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Li, Z.; Zhang, H.; Peng, C.; Peiris, R.
Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 1–11, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331536459 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment
@inproceedings{li_exploring_2025,
title = {Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios},
author = {Z. Li and H. Zhang and C. Peng and R. Peiris},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002706893&doi=10.1109%2FVR59515.2025.00025&partnerID=40&md5=1987c128f6ec4bd24011388ef9ece179},
doi = {10.1109/VR59515.2025.00025},
isbn = {9798331536459 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {1–11},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent research has begun adopting Large Language Model (LLM) agents to enhance Virtual Reality (VR) interactions, creating immersive chatbot experiences. However, while current studies focus on generating dialogue from user speech inputs, their abilities to generate richer experiences based on the perception of LLM agents' VR environments and interaction cues remain unexplored. Hence, in this work, we propose an approach that enables LLM agents to perceive virtual environments and generate environment-aware interactions and conversations for an embodied human-AI interaction experience in VR environments. Here, we define a schema for describing VR environments and their interactions through text prompts. We evaluate the performance of our method through five role-play scenarios created using our approach in a study with 14 participants. The findings discuss the opportunities and challenges of our proposed approach for developing environment-aware LLM agents that facilitate spatial interactions and conversations within VR role-play scenarios. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
Sousa, R. T.; Oliveira, E. A. M.; Cintra, L. M. F.; Filho, A. R. G. Galvão
Transformative Technologies for Rehabilitation: Leveraging Immersive and AI-Driven Solutions to Reduce Recidivism and Promote Decent Work Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 168–171, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331514846 (ISBN).
Abstract | Links | BibTeX | Tags: AI- Driven Rehabilitation, Artificial intelligence- driven rehabilitation, Emotional intelligence, Engineering education, Generative AI, generative artificial intelligence, Immersive, Immersive technologies, Immersive Technology, Language Model, Large language model, large language models, Skills development, Social Reintegration, Social skills, Sociology, Vocational training
@inproceedings{sousa_transformative_2025,
title = {Transformative Technologies for Rehabilitation: Leveraging Immersive and AI-Driven Solutions to Reduce Recidivism and Promote Decent Work},
author = {R. T. Sousa and E. A. M. Oliveira and L. M. F. Cintra and A. R. G. Galvão Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005140551&doi=10.1109%2FVRW66409.2025.00042&partnerID=40&md5=a8dbe15493fd8361602d049f2b09efe3},
doi = {10.1109/VRW66409.2025.00042},
isbn = {9798331514846 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {168–171},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The reintegration of incarcerated individuals into society presents significant challenges, particularly in addressing barriers related to vocational training, social skill development, and emotional rehabilitation. Immersive technologies, such as Virtual Reality and Augmented Reality, combined with generative Artificial Intelligence (AI) and Large Language Models, offer innovative opportunities to enhance these areas. These technologies create practical, controlled environments for skill acquisition and behavioral training, while generative AI enables dynamic, personalized, and adaptive experiences. This paper explores the broader potential of these integrated technologies in supporting rehabilitation, reducing recidivism, and fostering sustainable employment opportunities and these initiatives align with the overarching equity objective of ensuring Decent Work for All, reinforcing the commitment to inclusive and equitable progress across diverse communities, through the transformative potential of immersive and AI-driven systems in correctional systems. © 2025 Elsevier B.V., All rights reserved.},
keywords = {AI- Driven Rehabilitation, Artificial intelligence- driven rehabilitation, Emotional intelligence, Engineering education, Generative AI, generative artificial intelligence, Immersive, Immersive technologies, Immersive Technology, Language Model, Large language model, large language models, Skills development, Social Reintegration, Social skills, Sociology, Vocational training},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Grǎcanin, D.
From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 150–155, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331514846 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering
@inproceedings{behravan_voices_2025,
title = {From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality},
author = {M. Behravan and D. Grǎcanin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005153589&doi=10.1109%2FVRW66409.2025.00038&partnerID=40&md5=34311e63349697801caf849bc231e879},
doi = {10.1109/VRW66409.2025.00038},
isbn = {9798331514846 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {150–155},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents Matrix, an advanced AI-powered framework designed for real-time 3D object generation in Augmented Reality (AR) environments. By integrating a cutting-edge text-to-3D generative AI model, multilingual speech-to-text translation, and large language models (LLMs), the system enables seamless user interactions through spoken commands. The framework processes speech inputs, generates 3D objects, and provides object recommendations based on contextual understanding, enhancing AR experiences. A key feature of this framework is its ability to optimize 3D models by reducing mesh complexity, resulting in significantly smaller file sizes and faster processing on resource-constrained AR devices. Our approach addresses the challenges of high GPU usage, large model output sizes, and real-time system responsiveness, ensuring a smoother user experience. Moreover, the system is equipped with a pre-generated object repository, further reducing GPU load and improving efficiency. We demonstrate the practical applications of this framework in various fields such as education, design, and accessibility, and discuss future enhancements including image-to-3D conversion, environmental object detection, and multimodal support. The open-source nature of the framework promotes ongoing innovation and its utility across diverse industries. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering},
pubstate = {published},
tppubtype = {inproceedings}
}
Gao, H.; Xie, Y.; Kasneci, E.
PerVRML: ChatGPT-Driven Personalized VR Environments for Machine Learning Education Journal Article
In: International Journal of Human-Computer Interaction, 2025, ISSN: 10447318 (ISSN); 15327590 (ISSN), (Publisher: Taylor and Francis Ltd.).
Abstract | Links | BibTeX | Tags: Backpropagation, ChatGPT, Curricula, Educational robots, Immersive learning, Interactive learning, Language Model, Large language model, large language models, Learning mode, Machine learning education, Machine-learning, Personalized learning, Support vector machines, Teaching, Virtual Reality, Virtual-reality environment, Virtualization
@article{gao_pervrml_2025,
title = {PerVRML: ChatGPT-Driven Personalized VR Environments for Machine Learning Education},
author = {H. Gao and Y. Xie and E. Kasneci},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005776517&doi=10.1080%2F10447318.2025.2504188&partnerID=40&md5=27accdeba3e1e2202fc1102053d54b7c},
doi = {10.1080/10447318.2025.2504188},
issn = {10447318 (ISSN); 15327590 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Human-Computer Interaction},
abstract = {The advent of large language models (LLMs) such as ChatGPT has demonstrated significant potential for advancing educational technologies. Recently, growing interest has emerged in integrating ChatGPT with virtual reality (VR) to provide interactive and dynamic learning environments. This study explores the effectiveness of ChatGTP-driven VR in facilitating machine learning education through PerVRML. PerVRML incorporates a ChatGPT-powered avatar that provides real-time assistance and uses LLMs to personalize learning paths based on various sensor data from VR. A between-subjects design was employed to compare two learning modes: personalized and non-personalized. Quantitative data were collected from assessments, user experience surveys, and interaction metrics. The results indicate that while both learning modes supported learning effectively, ChatGPT-powered personalization significantly improved learning outcomes and had distinct impacts on user feedback. These findings underscore the potential of ChatGPT-enhanced VR to deliver adaptive and personalized educational experiences. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Taylor and Francis Ltd.},
keywords = {Backpropagation, ChatGPT, Curricula, Educational robots, Immersive learning, Interactive learning, Language Model, Large language model, large language models, Learning mode, Machine learning education, Machine-learning, Personalized learning, Support vector machines, Teaching, Virtual Reality, Virtual-reality environment, Virtualization},
pubstate = {published},
tppubtype = {article}
}
Lau, K. H. C.; Bozkir, E.; Gao, H.; Kasneci, E.
Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling Proceedings Article
In: A., Del Bue; C., Canton; J., Pont-Tuset; T., Tommasi (Ed.): Lect. Notes Comput. Sci., pp. 177–195, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303191571-0 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization
@inproceedings{lau_evaluating_2025,
title = {Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling},
author = {K. H. C. Lau and E. Bozkir and H. Gao and E. Kasneci},
editor = {Del Bue A. and Canton C. and Pont-Tuset J. and Tommasi T.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105006905979&doi=10.1007%2f978-3-031-91572-7_11&partnerID=40&md5=8a81fb09ff54e57b9429660a8898149a},
doi = {10.1007/978-3-031-91572-7_11},
isbn = {03029743 (ISSN); 978-303191571-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15628 LNCS},
pages = {177–195},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper explores the innovative application of Large Language Models (LLMs) in Virtual Reality (VR) environments to promote heritage education, focusing on traditional Scottish curling presented in the game “Scottish Bonspiel VR”. Our study compares the effectiveness of LLM-based chatbots with pre-defined scripted chatbots, evaluating key criteria such as usability, user engagement, and learning outcomes. The results show that LLM-based chatbots significantly improve interactivity and engagement, creating a more dynamic and immersive learning environment. This integration helps document and preserve cultural heritage and enhances dissemination processes, which are crucial for safeguarding intangible cultural heritage (ICH) amid environmental changes. Furthermore, the study highlights the potential of novel technologies in education to provide immersive experiences that foster a deeper appreciation of cultural heritage. These findings support the wider application of LLMs and VR in cultural education to address global challenges and promote sustainable practices to preserve and enhance cultural heritage. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Carcangiu, A.; Manca, M.; Mereu, J.; Santoro, C.; Simeoli, L.; Spano, L. D.
Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation Proceedings Article
In: C., Santoro; A., Schmidt; M., Matera; A., Bellucci (Ed.): Lect. Notes Comput. Sci., pp. 59–79, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303195451-1 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces
@inproceedings{carcangiu_conversational_2025,
title = {Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation},
author = {A. Carcangiu and M. Manca and J. Mereu and C. Santoro and L. Simeoli and L. D. Spano},
editor = {Santoro C. and Schmidt A. and Matera M. and Bellucci A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105009012634&doi=10.1007%2f978-3-031-95452-8_4&partnerID=40&md5=67e2b8ca4bb2b508cd41548e3471705b},
doi = {10.1007/978-3-031-95452-8_4},
isbn = {03029743 (ISSN); 978-303195451-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15713 LNCS},
pages = {59–79},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Rule-based approaches allow users to customize XR environments. However, the current menu-based interfaces still create barriers for end-user developers. Chatbots based on Large Language Models (LLMs) have the potential to reduce the threshold needed for rule creation, but how users articulate their intentions through conversation remains under-explored. This work investigates how users express event-condition-action automation rules in Virtual Reality (VR) and Augmented Reality (AR) environments. Through two user studies, we show that the dialogues share consistent strategies across the interaction setting (keywords, difficulties in expressing conditions, task success), even if we registered different adaptations for each setting (verbal structure, event vs action first rules). Our findings are relevant for the design and implementation of chatbot-based support for expressing automations in an XR setting. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces},
pubstate = {published},
tppubtype = {inproceedings}
}
Tomkou, D.; Fatouros, G.; Andreou, A.; Makridis, G.; Liarokapis, F.; Dardanis, D.; Kiourtis, A.; Soldatos, J.; Kyriazis, D.
Bridging Industrial Expertise and XR with LLM-Powered Conversational Agents Proceedings Article
In: pp. 1050–1056, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331543723 (ISBN).
Abstract | Links | BibTeX | Tags: Air navigation, Conversational Agents, Conversational AI, Embeddings, Engineering education, Extended reality, Knowledge Management, Knowledge transfer, Language Model, Large language model, large language models, Personnel training, Remote Assistance, Retrieval-Augmented Generation, Robotics, Semantics, Smart manufacturing
@inproceedings{tomkou_bridging_2025,
title = {Bridging Industrial Expertise and XR with LLM-Powered Conversational Agents},
author = {D. Tomkou and G. Fatouros and A. Andreou and G. Makridis and F. Liarokapis and D. Dardanis and A. Kiourtis and J. Soldatos and D. Kyriazis},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013837767&doi=10.1109%2FDCOSS-IoT65416.2025.00158&partnerID=40&md5=45e35086d8be9d3e16afeade6598d238},
doi = {10.1109/DCOSS-IoT65416.2025.00158},
isbn = {9798331543723 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {1050–1056},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper introduces a novel integration of Retrieval-Augmented Generation (RAG) enhanced Large Language Models (LLMs) with Extended Reality (XR) technologies to address knowledge transfer challenges in industrial environments. The proposed system embeds domain-specific industrial knowledge into XR environments through a natural language interface, enabling hands-free, context-aware expert guidance for workers. We present the architecture of the proposed system consisting of an LLM Chat Engine with dynamic tool orchestration and an XR application featuring voice-driven interaction. Performance evaluation of various chunking strategies, embedding models, and vector databases reveals that semantic chunking, balanced embedding models, and efficient vector stores deliver optimal performance for industrial knowledge retrieval. The system's potential is demonstrated through early implementation in multiple industrial use cases, including robotic assembly, smart infrastructure maintenance, and aerospace component servicing. Results indicate potential for enhancing training efficiency, remote assistance capabilities, and operational guidance in alignment with Industry 5.0's human-centric and resilient approach to industrial development. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Air navigation, Conversational Agents, Conversational AI, Embeddings, Engineering education, Extended reality, Knowledge Management, Knowledge transfer, Language Model, Large language model, large language models, Personnel training, Remote Assistance, Retrieval-Augmented Generation, Robotics, Semantics, Smart manufacturing},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Y.; Yan, Y.; Yang, G.
Bringing Microbiology to Life in Museum: Using Mobile VR and LLM-Powered Virtual Character for Children's Science Learning Proceedings Article
In: Chui, K. T.; Jaikaeo, C.; Niramitranon, J.; Kaewmanee, W.; Ng, K. -K.; Ongkunaruk, P. (Ed.): pp. 83–87, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331595500 (ISBN).
Abstract | Links | BibTeX | Tags: Computer aided instruction, E-Learning, Engineering education, Experimental groups, Immersive technologies, Informal learning, Language Model, Large language model, large language models, Learning systems, Microbiology, Mobile virtual reality, Museum, Museums, Science education, Science learning, Virtual addresses, Virtual character, Virtual Reality, Virtual reality system
@inproceedings{chen_bringing_2025,
title = {Bringing Microbiology to Life in Museum: Using Mobile VR and LLM-Powered Virtual Character for Children's Science Learning},
author = {Y. Chen and Y. Yan and G. Yang},
editor = {K. T. Chui and C. Jaikaeo and J. Niramitranon and W. Kaewmanee and K. -K. Ng and P. Ongkunaruk},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105015708152&doi=10.1109%2FISET65607.2025.00025&partnerID=40&md5=77ae9a4829656155010abc280a817a72},
doi = {10.1109/ISET65607.2025.00025},
isbn = {9798331595500 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {83–87},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Although the increasing advantages of immersive technology-enhanced museum informal learning in children's science education, the application of mobile virtual reality (MVR) technology combined with large language models (LLM) in this environment has not yet been fully explored. Furthermore, virtual character, as an intelligent learning assistant, is capable of providing personalized guidance and instant feedback to children through natural language interactions, but its potential in museum learning has yet to be fully tapped. To address these gaps, this study investigates the effectiveness of integrating MVR with LLM-powered virtual character in promoting children's microbiology learning during museum activities. In this paper, the technology-enhanced POE (Prediction-observation-explanation) learning model was studied, and the corresponding MVR system was designed and developed to carry out microbial learning activities. A quasiexperimental design was used with 60 children aged 10-12. The experimental group learned via an MVR system combining LLM-powered virtual character, while the control group used traditional methods. Results showed the experimental group significantly outperformed the control group in both academic achievement and learning motivation, including attention, confidence, and satisfaction. This provides evidence for using immersive technologies in informal learning and offers insights into applying LLM-powered virtual character in science education. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Computer aided instruction, E-Learning, Engineering education, Experimental groups, Immersive technologies, Informal learning, Language Model, Large language model, large language models, Learning systems, Microbiology, Mobile virtual reality, Museum, Museums, Science education, Science learning, Virtual addresses, Virtual character, Virtual Reality, Virtual reality system},
pubstate = {published},
tppubtype = {inproceedings}
}
Tovias, E.; Wu, L.
Leveraging Virtual Reality and AI for Enhanced Vocabulary Learning Proceedings Article
In: pp. 308, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331521646 (ISBN).
Abstract | Links | BibTeX | Tags: Avatar, Avatars, E-Learning, Immersive, Interactive computer graphics, Interactive learning, Language Model, Large language model, large language models, Learning experiences, Real time interactions, Text-based methods, user experience, Users' experiences, Virtual environments, Virtual Reality, Vocabulary learning
@inproceedings{tovias_leveraging_2025,
title = {Leveraging Virtual Reality and AI for Enhanced Vocabulary Learning},
author = {E. Tovias and L. Wu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105017563813&doi=10.1109%2FICHMS65439.2025.11154184&partnerID=40&md5=7b79f93d6f8ec222b25a4bfeac408d3a},
doi = {10.1109/ICHMS65439.2025.11154184},
isbn = {9798331521646 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {308},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This study examines the integration of virtual reality (VR) and Artificial Intelligence (AI) to create more immersive, interactive learning experiences. By combining VR's engaging user experience with AI-powered avatars, this research explores how these tools can enhance vocabulary learning compared to traditional text-based methods. Utilizing a Meta Quest 3 headset, Unity for development, and OpenAI's API & ElevenLabs for dynamic dialogues, this system offers personalized, real-time interactions (Fig. 1). The integration of these technologies fosters a bright future, driving significant advancements in the development of highly immersive and effective learning environments. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Avatar, Avatars, E-Learning, Immersive, Interactive computer graphics, Interactive learning, Language Model, Large language model, large language models, Learning experiences, Real time interactions, Text-based methods, user experience, Users' experiences, Virtual environments, Virtual Reality, Vocabulary learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Huang, D.; Ge, M.; Xiang, K.; Zhang, X.; Yang, H.
Privacy Preservation of Large Language Models in the Metaverse Era: Research Frontiers, Categorical Comparisons, and Future Directions Journal Article
In: International Journal of Network Management, vol. 35, no. 1, 2025, ISSN: 10557148 (ISSN); 10991190 (ISSN), (Publisher: John Wiley and Sons Ltd).
Abstract | Links | BibTeX | Tags: Adversarial networks, Computational Linguistics, Cryptography, Differential privacies, Excel, Language Model, Large language model, large language models, Life cycle, Metaverse, Metaverses, Natural language processing systems, Natural languages, Privacy preservation, Privacy protection, Research frontiers
@article{huang_privacy_2025,
title = {Privacy Preservation of Large Language Models in the Metaverse Era: Research Frontiers, Categorical Comparisons, and Future Directions},
author = {D. Huang and M. Ge and K. Xiang and X. Zhang and H. Yang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199980257&doi=10.1002%2Fnem.2292&partnerID=40&md5=55662aeedfb216784f0ed398cf8bd2f0},
doi = {10.1002/nem.2292},
issn = {10557148 (ISSN); 10991190 (ISSN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int J Network Manage},
journal = {International Journal of Network Management},
volume = {35},
number = {1},
publisher = {John Wiley and Sons Ltd},
abstract = {Large language models (LLMs), with their billions to trillions of parameters, excel in natural language processing, machine translation, dialog systems, and text summarization. These capabilities are increasingly pivotal in the metaverse, where they can enhance virtual interactions and environments. However, their extensive use, particularly in the metaverse's immersive platforms, raises significant privacy concerns. This paper analyzes existing privacy issues in LLMs, vital for both traditional and metaverse applications, and examines protection techniques across the entire life cycle of these models, from training to user deployment. We delve into cryptography, embedding layer encoding, differential privacy and its variants, and adversarial networks, highlighting their relevance in the metaverse context. Specifically, we explore technologies like homomorphic encryption and secure multiparty computation, which are essential for metaverse security. Our discussion on Gaussian differential privacy, Renyi differential privacy, Edgeworth accounting, and the generation of adversarial samples and loss functions emphasizes their importance in the metaverse's dynamic and interactive environments. Lastly, the paper discusses the current research status and future challenges in the security of LLMs within and beyond the metaverse, emphasizing urgent problems and potential areas for exploration. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: John Wiley and Sons Ltd},
keywords = {Adversarial networks, Computational Linguistics, Cryptography, Differential privacies, Excel, Language Model, Large language model, large language models, Life cycle, Metaverse, Metaverses, Natural language processing systems, Natural languages, Privacy preservation, Privacy protection, Research frontiers},
pubstate = {published},
tppubtype = {article}
}
Shao, Y.; You, W.; Zheng, Z.; Lu, Y.; Yang, C.; Zhou, Z.
CONDA: Introducing Context-Aware Decision Making Assistant in Virtual Reality for Interior Renovation Journal Article
In: International Journal of Human-Computer Interaction, vol. 41, no. 20, pp. 13239–13255, 2025, ISSN: 10447318 (ISSN); 15327590 (ISSN), (Publisher: Taylor and Francis Ltd.).
Abstract | Links | BibTeX | Tags: Computing formula, Context-aware decision makings, Contextual cue, Decision making, Decision-Making, Decisions makings, Design solutions, Driving demand, Interior Design, Interior designs, Interiors (building), Language Model, Large language model, large language models, Quality of life, Virtual environments, Virtual Reality
@article{shao_conda_2025,
title = {CONDA: Introducing Context-Aware Decision Making Assistant in Virtual Reality for Interior Renovation},
author = {Y. Shao and W. You and Z. Zheng and Y. Lu and C. Yang and Z. Zhou},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-86000228595&doi=10.1080%2F10447318.2025.2470285&partnerID=40&md5=0801b1854ec172c10a0cb374623cac77},
doi = {10.1080/10447318.2025.2470285},
issn = {10447318 (ISSN); 15327590 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Human-Computer Interaction},
volume = {41},
number = {20},
pages = {13239–13255},
abstract = {Customized interiors enhance quality of life and self-expression, driving demand for VR-based design solutions. However, scant research exists on exploiting contextual cues in VR to aid decision making. Consequently, we propose CONDA, a context-aware assistant which leveraging LLMs to support interior renovation decisions. Specifically, we reconstruct users’ homes in VR and provide CONDA with stylistic details and spatial layouts, allowing it to predict furniture labels based on the decision scenario. Besides, we devise various modes to comprehensively express users’ purchasing preferences. Finally, CONDA recommend compatible items based on the label matching algorithm, and generate multi-dimensional explanations. A 30-user study reveals contextual completeness and preference diversity critically influence recommendation quality and decision behaviors, with 90% praising CONDA’s performance and all expressing daily-use intent. Overall, we validated the efficacy and practicality of CONDA, deriving universal design insights for VR decision-support systems and establishing new research directions.CCS Concepts Human-centered computing (Formula presented.) Virtual reality Computing methodologies (Formula presented.) Natural language generation Applied computing (Formula presented.) Computer-aided design. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Taylor and Francis Ltd.},
keywords = {Computing formula, Context-aware decision makings, Contextual cue, Decision making, Decision-Making, Decisions makings, Design solutions, Driving demand, Interior Design, Interior designs, Interiors (building), Language Model, Large language model, large language models, Quality of life, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Nasser, M.; Gaglio, G. F.; Seidita, V.; Chella, A.
The Art of Replication: Lifelike Avatars with Personalized Conversational Style Journal Article
In: Robotics, vol. 14, no. 3, 2025, ISSN: 22186581 (ISSN), (Publisher: Multidisciplinary Digital Publishing Institute (MDPI)).
Abstract | Links | BibTeX | Tags: Avatar, large language models, Metaverse
@article{nasser_art_2025,
title = {The Art of Replication: Lifelike Avatars with Personalized Conversational Style},
author = {M. Nasser and G. F. Gaglio and V. Seidita and A. Chella},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001408683&doi=10.3390%2Frobotics14030033&partnerID=40&md5=d417766e5621fe85f0f0f1d2ca916256},
doi = {10.3390/robotics14030033},
issn = {22186581 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Robotics},
volume = {14},
number = {3},
abstract = {This study presents an approach for developing digital avatars replicating individuals’ physical characteristics and communicative style, contributing to research on virtual interactions in the metaverse. The proposed method integrates large language models (LLMs) with 3D avatar creation techniques, using what we call the Tree of Style (ToS) methodology to generate stylistically consistent and contextually appropriate responses. Linguistic analysis and personalized voice synthesis enhance conversational and auditory realism. The results suggest that ToS offers a practical alternative to fine-tuning for creating stylistically accurate responses while maintaining efficiency. This study outlines potential applications and acknowledges the need for further work on adaptability and ethical considerations. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Multidisciplinary Digital Publishing Institute (MDPI)},
keywords = {Avatar, large language models, Metaverse},
pubstate = {published},
tppubtype = {article}
}
Xing, Y.; Liu, Q.; Wang, J.; Gómez-Zará, D.
sMoRe: Spatial Mapping and Object Rendering Environment Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 115–119, Association for Computing Machinery, 2025, ISBN: 9798400714092 (ISBN).
Abstract | Links | BibTeX | Tags: Generative adversarial networks, Generative AI, Language Model, Large language model, large language models, Mapping, Mixed reality, Mixed-reality environment, Object rendering, Rendering (computer graphics), Space Manipulation, Spatial mapping, Spatial objects, Users' experiences, Virtual environments, Virtual objects
@inproceedings{xing_smore_2025,
title = {sMoRe: Spatial Mapping and Object Rendering Environment},
author = {Y. Xing and Q. Liu and J. Wang and D. Gómez-Zará},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001670668&doi=10.1145%2F3708557.3716337&partnerID=40&md5=c23b3e19f42dbd8796e43f5ab71e12b6},
doi = {10.1145/3708557.3716337},
isbn = {9798400714092 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {115–119},
publisher = {Association for Computing Machinery},
abstract = {In mixed reality (MR) environments, understanding space and creating virtual objects is crucial to providing an intuitive user experience. This paper introduces sMoRe (Spatial Mapping and Object Rendering Environment), an MR application that combines Generative AI (GenAI) to assist users in creating, placing, and managing virtual objects within physical spaces. sMoRe allows users to use voice or typed text commands to create and place virtual objects using GenAI while specifying spatial constraints. The system employs Large Language Models (LLMs) to interpret users’ commands, analyze the current scene, and identify optimal locations. Additionally, sMoRe integrates a text-to-3D generative model to dynamically create 3D objects based on users’ descriptions. Our user study demonstrates the effectiveness of sMoRe in enhancing user comprehension, interaction, and organization of the MR environment. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Generative adversarial networks, Generative AI, Language Model, Large language model, large language models, Mapping, Mixed reality, Mixed-reality environment, Object rendering, Rendering (computer graphics), Space Manipulation, Spatial mapping, Spatial objects, Users' experiences, Virtual environments, Virtual objects},
pubstate = {published},
tppubtype = {inproceedings}
}
Suzuki, R.; González-Franco, M.; Sra, M.; Lindlbauer, D.
Everyday AR through AI-in-the-Loop Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 9798400713958 (ISBN); 9798400713941 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization
@inproceedings{suzuki_everyday_2025,
title = {Everyday AR through AI-in-the-Loop},
author = {R. Suzuki and M. González-Franco and M. Sra and D. Lindlbauer},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005752990&doi=10.1145%2F3706599.3706741&partnerID=40&md5=a5369bb371ce25feca340b4f5952e6a6},
doi = {10.1145/3706599.3706741},
isbn = {9798400713958 (ISBN); 9798400713941 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {This workshop brings together experts and practitioners from augmented reality (AR) and artificial intelligence (AI) to shape the future of AI-in-the-loop everyday AR experiences. With recent advancements in both AR hardware and AI capabilities, we envision that everyday AR—always-available and seamlessly integrated into users’ daily environments—is becoming increasingly feasible. This workshop will explore how AI can drive such everyday AR experiences. We discuss a range of topics, including adaptive and context-aware AR, generative AR content creation, always-on AI assistants, AI-driven accessible design, and real-world-oriented AI agents. Our goal is to identify the opportunities and challenges in AI-enabled AR, focusing on creating novel AR experiences that seamlessly blend the digital and physical worlds. Through the workshop, we aim to foster collaboration, inspire future research, and build a community to advance the research field of AI-enhanced AR. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Fang, A.; Chhabria, H.; Maram, A.; Zhu, H.
Social Simulation for Everyday Self-Care: Design Insights from Leveraging VR, AR, and LLMs for Practicing Stress Relief Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 9798400713958 (ISBN); 9798400713941 (ISBN).
Abstract | Links | BibTeX | Tags: design, Design insights, Language Model, Large language model, large language models, Mental health, Peer support, Professional supports, Self-care, Social simulations, Speed dating, Virtual environments, Virtual Reality, Well being
@inproceedings{fang_social_2025,
title = {Social Simulation for Everyday Self-Care: Design Insights from Leveraging VR, AR, and LLMs for Practicing Stress Relief},
author = {A. Fang and H. Chhabria and A. Maram and H. Zhu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005770377&doi=10.1145%2F3706598.3713115&partnerID=40&md5=828b06008a1409e9dc32425e568f4f33},
doi = {10.1145/3706598.3713115},
isbn = {9798400713958 (ISBN); 9798400713941 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Stress is an inevitable part of day-to-day life yet many find themselves unable to manage it themselves, particularly when professional or peer support are not always readily available. As self-care becomes increasingly vital for mental well-being, this paper explores the potential of social simulation as a safe, virtual environment for practicing in-the-moment stress relief for everyday social situations. Leveraging the immersive capabilities of VR, AR, and LLMs to create realistic interactions and environments, we developed eight interactive prototypes for various social stress related scenarios (e.g. public speaking, interpersonal conflict) across design dimensions of modality, interactivity, and mental health guidance in order to conduct prototype-driven semi-structured interviews with 19 participants. Our qualitative findings reveal that people currently lack effective means to support themselves through everyday stress and perceive social simulation - even at low immersion and interaction levels - to fill a gap for practical, controlled training of mental health practices. We outline key design needs for developing social simulation for self-care needs, and identify important considerations including risks of trauma from hyper-realism, distrust of LLM-recommended timing for mental health recommendations, and the value of accessibility for self-care interventions. © 2025 Elsevier B.V., All rights reserved.},
keywords = {design, Design insights, Language Model, Large language model, large language models, Mental health, Peer support, Professional supports, Self-care, Social simulations, Speed dating, Virtual environments, Virtual Reality, Well being},
pubstate = {published},
tppubtype = {inproceedings}
}
de Oliveira, E. A. Masasi; Sousa, R. T.; Bastos, A. A.; de Freitas Cintra, L. Martins; Filho, A. R. G. Galvão
Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 437–440, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages
@inproceedings{masasi_de_oliveira_immersive_2025,
title = {Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation},
author = {E. A. Masasi de Oliveira and R. T. Sousa and A. A. Bastos and L. Martins de Freitas Cintra and A. R. G. Galvão Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007979183&doi=10.1145%2F3706370.3731643&partnerID=40&md5=47a47f3408a0e6cb35c16dd6101a15b0},
doi = {10.1145/3706370.3731643},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {437–440},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual Reality has significantly expanded possibilities for immersive museum experiences, overcoming traditional constraints such as space, preservation, and geographic limitations. However, existing virtual museum platforms typically lack dynamic, personalized, and contextually accurate interactions. To address this, we propose Spatially-Aware Retrieval-Augmented Generation (SA-RAG), an innovative framework integrating visual attention tracking with Retrieval-Augmented Generation systems and advanced Large Language Models. By capturing users' visual attention in real time, SA-RAG dynamically retrieves contextually relevant data, enhancing the accuracy, personalization, and depth of user interactions within immersive virtual environments. The system's effectiveness is initially demonstrated through our preliminary tests within a realistic VR museum implemented using Unreal Engine. Although promising, comprehensive human evaluations involving broader user groups are planned for future studies to rigorously validate SA-RAG's effectiveness, educational enrichment potential, and accessibility improvements in virtual museums. The framework also presents opportunities for broader applications in immersive educational and storytelling domains. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Sabir, A.; Hussain, R.; Pedro, A.; Park, C.
Personalized construction safety training system using conversational AI in virtual reality Journal Article
In: Automation in Construction, vol. 175, 2025, ISSN: 09265805 (ISSN), (Publisher: Elsevier B.V.).
Abstract | Links | BibTeX | Tags: Construction safety, Construction safety training, Conversational AI, Digital elevation model, Helmet mounted displays, Language Model, Large language model, large language models, Personalized safety training, Personnel training, Safety training, Training Systems, Virtual environments, Virtual Reality, Workers'
@article{sabir_personalized_2025,
title = {Personalized construction safety training system using conversational AI in virtual reality},
author = {A. Sabir and R. Hussain and A. Pedro and C. Park},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002741042&doi=10.1016%2Fj.autcon.2025.106207&partnerID=40&md5=b071b04c835e74758e168f5c19da8271},
doi = {10.1016/j.autcon.2025.106207},
issn = {09265805 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Automation in Construction},
volume = {175},
abstract = {Training workers in safety protocols is crucial for mitigating job site hazards, yet traditional methods often fall short. This paper explores integrating virtual reality (VR) and large language models (LLMs) into iSafeTrainer, an AI-powered safety training system. The system allows trainees to engage with trade-specific content tailored to their expertise level in a third-person perspective in a non-immersive desktop virtual environment, eliminating the need for head-mounted displays. An experimental study evaluated the system through qualitative, survey-based assessments, focusing on user satisfaction, experience, engagement, guidance, and confidence. Results showed high satisfaction rates (>85 %) among novice users, with improved safety knowledge. Expert users suggested advanced scenarios, highlighting the system's potential for expansion. The modular architecture supports customization across various construction settings, ensuring adaptability for future improvements. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Elsevier B.V.},
keywords = {Construction safety, Construction safety training, Conversational AI, Digital elevation model, Helmet mounted displays, Language Model, Large language model, large language models, Personalized safety training, Personnel training, Safety training, Training Systems, Virtual environments, Virtual Reality, Workers'},
pubstate = {published},
tppubtype = {article}
}
B, C. E. Pardo; R, O. I. Iglesias; A, M. D. León; M., C. G. Quintero
EverydAI: Virtual Assistant for Decision-Making in Daily Contexts, Powered by Artificial Intelligence Journal Article
In: Systems, vol. 13, no. 9, 2025, ISSN: 20798954 (ISSN), (Publisher: Multidisciplinary Digital Publishing Institute (MDPI)).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Behavioral Research, Decision making, Decisions makings, Digital avatar, Digital avatars, Information overloads, Informed decision, Interactive computer graphics, Language Model, Large language model, large language models, Natural language processing systems, Natural languages, Object Detection, Object recognition, Objects detection, recommendation systems, Recommender systems, Three dimensional computer graphics, Virtual assistants, Virtual Reality, web scraping, Web scrapings
@article{pardo_b_everydai_2025,
title = {EverydAI: Virtual Assistant for Decision-Making in Daily Contexts, Powered by Artificial Intelligence},
author = {C. E. Pardo B and O. I. Iglesias R and M. D. León A and C. G. Quintero M.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105017115803&doi=10.3390%2Fsystems13090753&partnerID=40&md5=475327fffcdc43ee3466b4a65111866a},
doi = {10.3390/systems13090753},
issn = {20798954 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Systems},
volume = {13},
number = {9},
abstract = {In an era of information overload, artificial intelligence plays a pivotal role in supporting everyday decision-making. This paper introduces EverydAI, a virtual AI-powered assistant designed to help users make informed decisions across various daily domains such as cooking, fashion, and fitness. By integrating advanced natural language processing, object detection, augmented reality, contextual understanding, digital 3D avatar models, web scraping, and image generation, EverydAI delivers personalized recommendations and insights tailored to individual needs. The proposed framework addresses challenges related to decision fatigue and information overload by combining real-time object detection and web scraping to enhance the relevance and reliability of its suggestions. EverydAI is evaluated through a two-phase survey, each one involving 30 participants with diverse demographic backgrounds. Results indicate that on average, 92.7% of users agreed or strongly agreed with statements reflecting the system’s usefulness, ease of use, and overall performance, indicating a high level of acceptance and perceived effectiveness. Additionally, EverydAI received an average user satisfaction score of 4.53 out of 5, underscoring its effectiveness in supporting users’ daily routines. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Multidisciplinary Digital Publishing Institute (MDPI)},
keywords = {Artificial intelligence, Augmented Reality, Behavioral Research, Decision making, Decisions makings, Digital avatar, Digital avatars, Information overloads, Informed decision, Interactive computer graphics, Language Model, Large language model, large language models, Natural language processing systems, Natural languages, Object Detection, Object recognition, Objects detection, recommendation systems, Recommender systems, Three dimensional computer graphics, Virtual assistants, Virtual Reality, web scraping, Web scrapings},
pubstate = {published},
tppubtype = {article}
}
Wei, Q.; Huang, J.; Gao, Y.; Dong, W.
One Model to Fit Them All: Universal IMU-based Human Activity Recognition with LLM-assisted Cross-dataset Representation Journal Article
In: Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies, vol. 9, no. 3, 2025, ISSN: 24749567 (ISSN), (Publisher: Association for Computing Machinery).
Abstract | Links | BibTeX | Tags: Broad application, Contrastive Learning, Cross-dataset, Data collection, Human activity recognition, Human activity recognition systems, Human computer interaction, Intelligent interactions, Language Model, Large datasets, Large language model, large language models, Learning systems, Neural-networks, Pattern recognition, Spatial relationships, Ubiquitous computing, Virtual Reality
@article{wei_one_2025,
title = {One Model to Fit Them All: Universal IMU-based Human Activity Recognition with LLM-assisted Cross-dataset Representation},
author = {Q. Wei and J. Huang and Y. Gao and W. Dong},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105015431117&doi=10.1145%2F3749509&partnerID=40&md5=2a6f26a05856c48ba3aaaf356b375dc0},
doi = {10.1145/3749509},
issn = {24749567 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies},
volume = {9},
number = {3},
abstract = {Human Activity Recognition (HAR) is essential for pervasive computing and intelligent interaction, with broad applications across various fields. However, there is still no one model capable of fitting various HAR datasets, severely limiting its applicability in practical scenarios. To address this, we propose oneHAR, an LLM-assisted universal IMU-based HAR system designed to achieve "one model to fit them all" — just one model that can adapt to diverse HAR datasets without any dataset-specific operation. In particular, we propose Cross-Dataset neural network (CDNet) for the "one model," which models both the temporal context and spatial relationships of IMU data to capture cross-dataset representations, encompassing differences in device, participant, data collection position, and environment, etc. Additionally, we introduce LLM-driven data synthesis, which enhances the training process by generating virtual IMU data through three carefully designed strategies. Furthermore, LLM-assisted adaptive position processing optimizes the inference process by flexibly handling a variable combination of positional inputs. Our model demonstrates strong generalization across five public IMU-based HAR datasets, outperforming the best baselines by up to 46.9% in the unseen-dataset scenario, and 6.5% in the cross-dataset scenario. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Association for Computing Machinery},
keywords = {Broad application, Contrastive Learning, Cross-dataset, Data collection, Human activity recognition, Human activity recognition systems, Human computer interaction, Intelligent interactions, Language Model, Large datasets, Large language model, large language models, Learning systems, Neural-networks, Pattern recognition, Spatial relationships, Ubiquitous computing, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Izquierdo-Domenech, J.; Linares-Pellicer, J.; Ferri-Molla, I.
Virtual Reality and Language Models, a New Frontier in Learning Journal Article
In: International Journal of Interactive Multimedia and Artificial Intelligence, vol. 8, no. 5, pp. 46–54, 2024, ISSN: 19891660 (ISSN), (Publisher: Universidad Internacional de la Rioja).
Abstract | Links | BibTeX | Tags: large language models, RetrievalAugmented Generation, Virtual Reality
@article{izquierdo-domenech_virtual_2024,
title = {Virtual Reality and Language Models, a New Frontier in Learning},
author = {J. Izquierdo-Domenech and J. Linares-Pellicer and I. Ferri-Molla},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85186906440&doi=10.9781%2Fijimai.2024.02.007&partnerID=40&md5=14fdce4d265b7a3b1b630312b87f9949},
doi = {10.9781/ijimai.2024.02.007},
issn = {19891660 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {International Journal of Interactive Multimedia and Artificial Intelligence},
volume = {8},
number = {5},
pages = {46–54},
abstract = {The proposed research introduces an innovative Virtual Reality (VR) and Large Language Model (LLM) architecture to enhance the learning process across diverse educational contexts, ranging from school to industrial settings. Leveraging the capabilities of LLMs and Retrieval-Augmented Generation (RAG), the architecture centers around an immersive VR application. This application empowers students of all backgrounds to interactively engage with their environment by posing questions and receiving informative responses in text format and with visual hints in VR, thereby fostering a dynamic learning experience. LLMs with RAG act as the backbones of this architecture, facilitating the integration of private or domain-specific data into the learning process. By seamlessly connecting various data sources through data connectors, RAG overcomes the challenge of disparate and siloed information repositories, including APIs, PDFs, SQL databases, and more. The data indexes provided by RAG solutions further streamline this process by structuring the ingested data into formats optimized for consumption by LLMs. An empirical study was conducted to evaluate the effectiveness of this VR and LLM architecture. Twenty participants, divided into Experimental and Control groups, were selected to assess the impact on their learning process. The Experimental group utilized the immersive VR application, which allowed interactive engagement with the educational environment, while the Control group followed traditional learning methods. The study revealed significant improvements in learning outcomes for the Experimental group, demonstrating the potential of integrating VR and LLMs in enhancing comprehension and engagement in learning contexts. This study presents an innovative approach that capitalizes on the synergy between LLMs and immersive VR technology, opening avenues for a transformative learning experience that transcends traditional boundaries and empowers learners across a spectrum of educational landscapes. © 2024 Elsevier B.V., All rights reserved.},
note = {Publisher: Universidad Internacional de la Rioja},
keywords = {large language models, RetrievalAugmented Generation, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}