AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Li, Z.; Zhang, H.; Peng, C.; Peiris, R.
Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 1–11, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833153645-9 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment
@inproceedings{li_exploring_2025,
title = {Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios},
author = {Z. Li and H. Zhang and C. Peng and R. Peiris},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002706893&doi=10.1109%2fVR59515.2025.00025&partnerID=40&md5=60f22109e054c9035a0c2210bb797039},
doi = {10.1109/VR59515.2025.00025},
isbn = {979-833153645-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {1–11},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent research has begun adopting Large Language Model (LLM) agents to enhance Virtual Reality (VR) interactions, creating immersive chatbot experiences. However, while current studies focus on generating dialogue from user speech inputs, their abilities to generate richer experiences based on the perception of LLM agents' VR environments and interaction cues remain unexplored. Hence, in this work, we propose an approach that enables LLM agents to perceive virtual environments and generate environment-aware interactions and conversations for an embodied human-AI interaction experience in VR environments. Here, we define a schema for describing VR environments and their interactions through text prompts. We evaluate the performance of our method through five role-play scenarios created using our approach in a study with 14 participants. The findings discuss the opportunities and challenges of our proposed approach for developing environment-aware LLM agents that facilitate spatial interactions and conversations within VR role-play scenarios. © 2025 IEEE.},
keywords = {Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
Kai, W. -H.; Xing, K. -X.
Video-driven musical composition using large language model with memory-augmented state space Journal Article
In: Visual Computer, vol. 41, no. 5, pp. 3345–3357, 2025, ISSN: 01782789 (ISSN).
Abstract | Links | BibTeX | Tags: 'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space
@article{kai_video-driven_2025,
title = {Video-driven musical composition using large language model with memory-augmented state space},
author = {W. -H. Kai and K. -X. Xing},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001073242&doi=10.1007%2fs00371-024-03606-w&partnerID=40&md5=7ea24f13614a9a24caf418c37a10bd8c},
doi = {10.1007/s00371-024-03606-w},
issn = {01782789 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Visual Computer},
volume = {41},
number = {5},
pages = {3345–3357},
abstract = {The current landscape of research leveraging large language models (LLMs) is experiencing a surge. Many works harness the powerful reasoning capabilities of these models to comprehend various modalities, such as text, speech, images, videos, etc. However, the research work on LLms for music inspiration is still in its infancy. To fill the gap in this field and break through the dilemma that LLMs can only understand short videos with limited frames, we propose a large language model with state space for long-term video-to-music generation. To capture long-range dependency and maintaining high performance, while further decrease the computing cost, our overall network includes the Enhanced Video Mamba, which incorporates continuous moving window partitioning and local feature augmentation, and a long-term memory bank that captures and aggregates historical video information to mitigate information loss in long sequences. This framework achieves both subquadratic-time computation and near-linear memory complexity, enabling effective long-term video-to-music generation. We conduct a thorough evaluation of our proposed framework. The experimental results demonstrate that our model achieves or surpasses the performance of the current state-of-the-art models. Our code released on https://github.com/kai211233/S2L2-V2M. © The Author(s), under exclusive licence to Springer-Verlag GmbH Germany, part of Springer Nature 2024.},
keywords = {'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space},
pubstate = {published},
tppubtype = {article}
}
Shoa, A.; Friedman, D.
Milo: an LLM-based virtual human open-source platform for extended reality Journal Article
In: Frontiers in Virtual Reality, vol. 6, 2025, ISSN: 26734192 (ISSN).
Abstract | Links | BibTeX | Tags: Large language model, open-source, Virtual agent, virtual human, Virtual Reality, XR
@article{shoa_milo_2025,
title = {Milo: an LLM-based virtual human open-source platform for extended reality},
author = {A. Shoa and D. Friedman},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008867438&doi=10.3389%2ffrvir.2025.1555173&partnerID=40&md5=6e68c9604b5ae52671b2ff02d51c7e75},
doi = {10.3389/frvir.2025.1555173},
issn = {26734192 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Frontiers in Virtual Reality},
volume = {6},
abstract = {Large language models (LLMs) have made dramatic advancements in recent years, allowing for a new generation of dialogue agents. This allows for new types of social experiences with virtual humans, in both virtual and augmented reality. In this paper, we introduce an open-source system specifically designed for implementing LLM-based virtual humans within extended reality (XR) environments. Our system integrates into XR platforms, providing a robust framework for the creation and management of interactive virtual agents. We detail the design and architecture of the system and showcase the system’s versatility through various scenarios. In addition to a straightforward single-agent setup, we demonstrate how an LLM-based virtual human can attend a multi-user virtual reality (VR) meeting, enhance a VR self-talk session, and take part in an augmented reality (AR) live event. We provide lessons learned, with focus on the possibilities for human intervention during live events. We provide the system as open-source, inviting collaboration and innovation within the community, paving the way for new types of social experiences. Copyright © 2025 Shoa and Friedman.},
keywords = {Large language model, open-source, Virtual agent, virtual human, Virtual Reality, XR},
pubstate = {published},
tppubtype = {article}
}
Sousa, R. T.; Oliveira, E. A. M.; Cintra, L. M. F.; Filho, A. R. G.
Transformative Technologies for Rehabilitation: Leveraging Immersive and AI-Driven Solutions to Reduce Recidivism and Promote Decent Work Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 168–171, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: AI- Driven Rehabilitation, Artificial intelligence- driven rehabilitation, Emotional intelligence, Engineering education, Generative AI, generative artificial intelligence, Immersive, Immersive technologies, Immersive Technology, Language Model, Large language model, large language models, Skills development, Social Reintegration, Social skills, Sociology, Vocational training
@inproceedings{sousa_transformative_2025,
title = {Transformative Technologies for Rehabilitation: Leveraging Immersive and AI-Driven Solutions to Reduce Recidivism and Promote Decent Work},
author = {R. T. Sousa and E. A. M. Oliveira and L. M. F. Cintra and A. R. G. Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005140551&doi=10.1109%2fVRW66409.2025.00042&partnerID=40&md5=89da6954863a272d48c0d8da3760bfb6},
doi = {10.1109/VRW66409.2025.00042},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {168–171},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The reintegration of incarcerated individuals into society presents significant challenges, particularly in addressing barriers related to vocational training, social skill development, and emotional rehabilitation. Immersive technologies, such as Virtual Reality and Augmented Reality, combined with generative Artificial Intelligence (AI) and Large Language Models, offer innovative opportunities to enhance these areas. These technologies create practical, controlled environments for skill acquisition and behavioral training, while generative AI enables dynamic, personalized, and adaptive experiences. This paper explores the broader potential of these integrated technologies in supporting rehabilitation, reducing recidivism, and fostering sustainable employment opportunities and these initiatives align with the overarching equity objective of ensuring Decent Work for All, reinforcing the commitment to inclusive and equitable progress across diverse communities, through the transformative potential of immersive and AI-driven systems in correctional systems. © 2025 IEEE.},
keywords = {AI- Driven Rehabilitation, Artificial intelligence- driven rehabilitation, Emotional intelligence, Engineering education, Generative AI, generative artificial intelligence, Immersive, Immersive technologies, Immersive Technology, Language Model, Large language model, large language models, Skills development, Social Reintegration, Social skills, Sociology, Vocational training},
pubstate = {published},
tppubtype = {inproceedings}
}
Huang, D.; Ge, M.; Xiang, K.; Zhang, X.; Yang, H.
Privacy Preservation of Large Language Models in the Metaverse Era: Research Frontiers, Categorical Comparisons, and Future Directions Proceedings Article
In: Int J Network Manage, John Wiley and Sons Ltd, 2025, ISBN: 10557148 (ISSN).
Abstract | Links | BibTeX | Tags: Adversarial networks, Computational Linguistics, Cryptography, Differential privacies, Excel, Language Model, Large language model, large language models, Life cycle, Metaverse, Metaverses, Natural language processing systems, Natural languages, Privacy preservation, Privacy protection, Research frontiers
@inproceedings{huang_privacy_2025,
title = {Privacy Preservation of Large Language Models in the Metaverse Era: Research Frontiers, Categorical Comparisons, and Future Directions},
author = {D. Huang and M. Ge and K. Xiang and X. Zhang and H. Yang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199980257&doi=10.1002%2fnem.2292&partnerID=40&md5=2dea1caa1d31aecde3d302a908fb7dd3},
doi = {10.1002/nem.2292},
isbn = {10557148 (ISSN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int J Network Manage},
volume = {35},
publisher = {John Wiley and Sons Ltd},
abstract = {Large language models (LLMs), with their billions to trillions of parameters, excel in natural language processing, machine translation, dialog systems, and text summarization. These capabilities are increasingly pivotal in the metaverse, where they can enhance virtual interactions and environments. However, their extensive use, particularly in the metaverse's immersive platforms, raises significant privacy concerns. This paper analyzes existing privacy issues in LLMs, vital for both traditional and metaverse applications, and examines protection techniques across the entire life cycle of these models, from training to user deployment. We delve into cryptography, embedding layer encoding, differential privacy and its variants, and adversarial networks, highlighting their relevance in the metaverse context. Specifically, we explore technologies like homomorphic encryption and secure multiparty computation, which are essential for metaverse security. Our discussion on Gaussian differential privacy, Renyi differential privacy, Edgeworth accounting, and the generation of adversarial samples and loss functions emphasizes their importance in the metaverse's dynamic and interactive environments. Lastly, the paper discusses the current research status and future challenges in the security of LLMs within and beyond the metaverse, emphasizing urgent problems and potential areas for exploration. © 2024 John Wiley & Sons Ltd.},
keywords = {Adversarial networks, Computational Linguistics, Cryptography, Differential privacies, Excel, Language Model, Large language model, large language models, Life cycle, Metaverse, Metaverses, Natural language processing systems, Natural languages, Privacy preservation, Privacy protection, Research frontiers},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Gračanin, D.
From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 150–155, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering
@inproceedings{behravan_voices_2025,
title = {From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality},
author = {M. Behravan and D. Gračanin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005153589&doi=10.1109%2fVRW66409.2025.00038&partnerID=40&md5=b8aaab4e2378cde3595d98d79266d371},
doi = {10.1109/VRW66409.2025.00038},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {150–155},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents Matrix, an advanced AI-powered framework designed for real-time 3D object generation in Augmented Reality (AR) environments. By integrating a cutting-edge text-to-3D generative AI model, multilingual speech-to-text translation, and large language models (LLMs), the system enables seamless user interactions through spoken commands. The framework processes speech inputs, generates 3D objects, and provides object recommendations based on contextual understanding, enhancing AR experiences. A key feature of this framework is its ability to optimize 3D models by reducing mesh complexity, resulting in significantly smaller file sizes and faster processing on resource-constrained AR devices. Our approach addresses the challenges of high GPU usage, large model output sizes, and real-time system responsiveness, ensuring a smoother user experience. Moreover, the system is equipped with a pre-generated object repository, further reducing GPU load and improving efficiency. We demonstrate the practical applications of this framework in various fields such as education, design, and accessibility, and discuss future enhancements including image-to-3D conversion, environmental object detection, and multimodal support. The open-source nature of the framework promotes ongoing innovation and its utility across diverse industries. © 2025 IEEE.},
keywords = {3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering},
pubstate = {published},
tppubtype = {inproceedings}
}
Sabir, A.; Hussain, R.; Pedro, A.; Park, C.
Personalized construction safety training system using conversational AI in virtual reality Journal Article
In: Automation in Construction, vol. 175, 2025, ISSN: 09265805 (ISSN).
Abstract | Links | BibTeX | Tags: Construction safety, Construction safety training, Conversational AI, Digital elevation model, Helmet mounted displays, Language Model, Large language model, large language models, Personalized safety training, Personnel training, Safety training, Training Systems, Virtual environments, Virtual Reality, Workers'
@article{sabir_personalized_2025,
title = {Personalized construction safety training system using conversational AI in virtual reality},
author = {A. Sabir and R. Hussain and A. Pedro and C. Park},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002741042&doi=10.1016%2fj.autcon.2025.106207&partnerID=40&md5=376284339bf10fd5d799cc56c6643d36},
doi = {10.1016/j.autcon.2025.106207},
issn = {09265805 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Automation in Construction},
volume = {175},
abstract = {Training workers in safety protocols is crucial for mitigating job site hazards, yet traditional methods often fall short. This paper explores integrating virtual reality (VR) and large language models (LLMs) into iSafeTrainer, an AI-powered safety training system. The system allows trainees to engage with trade-specific content tailored to their expertise level in a third-person perspective in a non-immersive desktop virtual environment, eliminating the need for head-mounted displays. An experimental study evaluated the system through qualitative, survey-based assessments, focusing on user satisfaction, experience, engagement, guidance, and confidence. Results showed high satisfaction rates (>85 %) among novice users, with improved safety knowledge. Expert users suggested advanced scenarios, highlighting the system's potential for expansion. The modular architecture supports customization across various construction settings, ensuring adaptability for future improvements. © 2024},
keywords = {Construction safety, Construction safety training, Conversational AI, Digital elevation model, Helmet mounted displays, Language Model, Large language model, large language models, Personalized safety training, Personnel training, Safety training, Training Systems, Virtual environments, Virtual Reality, Workers'},
pubstate = {published},
tppubtype = {article}
}
Lau, K. H. C.; Bozkir, E.; Gao, H.; Kasneci, E.
Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling Proceedings Article
In: A., Del Bue; C., Canton; J., Pont-Tuset; T., Tommasi (Ed.): Lect. Notes Comput. Sci., pp. 177–195, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303191571-0 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization
@inproceedings{lau_evaluating_2025,
title = {Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling},
author = {K. H. C. Lau and E. Bozkir and H. Gao and E. Kasneci},
editor = {Del Bue A. and Canton C. and Pont-Tuset J. and Tommasi T.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105006905979&doi=10.1007%2f978-3-031-91572-7_11&partnerID=40&md5=8a81fb09ff54e57b9429660a8898149a},
doi = {10.1007/978-3-031-91572-7_11},
isbn = {03029743 (ISSN); 978-303191571-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15628 LNCS},
pages = {177–195},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper explores the innovative application of Large Language Models (LLMs) in Virtual Reality (VR) environments to promote heritage education, focusing on traditional Scottish curling presented in the game “Scottish Bonspiel VR”. Our study compares the effectiveness of LLM-based chatbots with pre-defined scripted chatbots, evaluating key criteria such as usability, user engagement, and learning outcomes. The results show that LLM-based chatbots significantly improve interactivity and engagement, creating a more dynamic and immersive learning environment. This integration helps document and preserve cultural heritage and enhances dissemination processes, which are crucial for safeguarding intangible cultural heritage (ICH) amid environmental changes. Furthermore, the study highlights the potential of novel technologies in education to provide immersive experiences that foster a deeper appreciation of cultural heritage. These findings support the wider application of LLMs and VR in cultural education to address global challenges and promote sustainable practices to preserve and enhance cultural heritage. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Lakehal, A.; Alti, A.; Annane, B.
CORES: Context-Aware Emotion-Driven Recommendation System-Based LLM to Improve Virtual Shopping Experiences Journal Article
In: Future Internet, vol. 17, no. 2, 2025, ISSN: 19995903 (ISSN).
Abstract | Links | BibTeX | Tags: Context, Context-Aware, Customisation, Decisions makings, E- commerces, e-commerce, Emotion, emotions, Language Model, Large language model, LLM, Recommendation, Virtual environments, Virtual Reality, Virtual shopping
@article{lakehal_cores_2025,
title = {CORES: Context-Aware Emotion-Driven Recommendation System-Based LLM to Improve Virtual Shopping Experiences},
author = {A. Lakehal and A. Alti and B. Annane},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85218626299&doi=10.3390%2ffi17020094&partnerID=40&md5=a0f68e273de08b2c33d03da4cb6c19bb},
doi = {10.3390/fi17020094},
issn = {19995903 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Future Internet},
volume = {17},
number = {2},
abstract = {In today’s business landscape, artificial intelligence (AI) plays a pivotal role in shopping processes and customization. As the demand for customization grows, virtual reality (VR) emerges as an innovative solution to improve users’ perception and decision making in virtual shopping experiences (VSEs). Despite its potential, limited research has explored the integration of contextual information and emotions in VR to deliver effective product recommendations. This paper presents CORES (context-aware emotion-driven recommendation system), a novel approach designed to enrich users’ experiences and to support decision making in VR. CORES combines advanced large language models (LLMs) and embedding-based context-aware recommendation strategies to provide customized products. Therefore, emotions are collected from social platforms, and relevant contextual information is matched to enable effective recommendation. Additionally, CORES leverages transformers and retrieval-augmented generation (RAG) capabilities to explain recommended items, facilitate VR visualization, and generate insights using various prompt templates. CORES is applied to a VR shop of different items. An empirical study validates the efficiency and accuracy of this approach, achieving a significant average accuracy of 97% and an acceptable response time of 0.3267s in dynamic shopping scenarios. © 2025 by the authors.},
keywords = {Context, Context-Aware, Customisation, Decisions makings, E- commerces, e-commerce, Emotion, emotions, Language Model, Large language model, LLM, Recommendation, Virtual environments, Virtual Reality, Virtual shopping},
pubstate = {published},
tppubtype = {article}
}
Gatti, E.; Giunchi, D.; Numan, N.; Steed, A.
Around the Virtual Campfire: Early UX Insights into AI-Generated Stories in VR Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 136–141, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: Generative AI, Images synthesis, Immersive, Interactive Environments, Language Model, Large language model, Storytelling, User input, User study, Users' experiences, Virtual environments, VR
@inproceedings{gatti_around_2025,
title = {Around the Virtual Campfire: Early UX Insights into AI-Generated Stories in VR},
author = {E. Gatti and D. Giunchi and N. Numan and A. Steed},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000263662&doi=10.1109%2fAIxVR63409.2025.00027&partnerID=40&md5=cd804d892d45554e936d0221508b3447},
doi = {10.1109/AIxVR63409.2025.00027},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {136–141},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual Reality (VR) presents an immersive platform for storytelling, allowing narratives to unfold in highly engaging, interactive environments. Leveraging AI capabilities and image synthesis offers new possibilities for creating scalable, generative VR content. In this work, we use an LLM-driven VR storytelling platform to explore how AI-generated visuals and narrative elements impact the user experience in VR storytelling. Previously, we presented AIsop, a system to integrate LLM-generated text and images and TTS audio into a storytelling experience, where the narrative unfolds based on user input. In this paper, we present two user studies focusing on how AI-generated visuals influence narrative perception and the overall VR experience. Our findings highlight the positive impact of AI-generated pictorial content on the storytelling experience, highlighting areas for enhancement and further research in interactive narrative design. © 2025 IEEE.},
keywords = {Generative AI, Images synthesis, Immersive, Interactive Environments, Language Model, Large language model, Storytelling, User input, User study, Users' experiences, Virtual environments, VR},
pubstate = {published},
tppubtype = {inproceedings}
}
Guo, H.; Liu, Z.; Tang, C.; Zhang, X.
An Interactive Framework for Personalized Navigation Based on Metacosmic Cultural Tourism and Large Model Fine-Tuning Journal Article
In: IEEE Access, vol. 13, pp. 81450–81461, 2025, ISSN: 21693536 (ISSN).
Abstract | Links | BibTeX | Tags: Cultural informations, Digital Cultural Heritage, Digital cultural heritages, Digital guide, Fine tuning, fine-tuning, Historical monuments, Language Model, Large language model, Leisure, Metacosmic cultural tourism, Multimodal Interaction, Tourism, Virtual tour
@article{guo_interactive_2025,
title = {An Interactive Framework for Personalized Navigation Based on Metacosmic Cultural Tourism and Large Model Fine-Tuning},
author = {H. Guo and Z. Liu and C. Tang and X. Zhang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105004059236&doi=10.1109%2fACCESS.2025.3565359&partnerID=40&md5=45d328831c5795fa31e7e033299912b5},
doi = {10.1109/ACCESS.2025.3565359},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {81450–81461},
abstract = {With the wide application of large language models (LLMs) and the rapid growth of metaverse tourism demand, the digital tour and personalized interaction of historical sites have become the key to improving users’ digital travel experience. Creating an environment where users can access rich cultural information and enjoy personalized, immersive experiences is a crucial issue in the field of digital cultural travel. To this end, we propose a tourism information multimodal generation personalized question-answering interactive framework TIGMI (Tourism Information Generation and Multimodal Interaction) based on LLM fine-tuning, which aims to provide a richer and more in-depth experience for virtual tours of historical monuments. Taking Qutan Temple as an example, the framework combines LLM, retrieval augmented generation (RAG), and auto-prompting engineering techniques to retrieve accurate information related to the historical monument from external knowledge bases and seamlessly integrates it into the generated content. This integration mechanism ensures the accuracy and relevance of the generated answers. Through TIGMI’s LLM-driven command interaction mechanism in the 3D digital scene of Qutan Temple, users are able to interact with the building and scene environment in a personalized and real-time manner, successfully integrating historical and cultural information with modern digital technology. This integration significantly enhances the naturalness of interaction and personalizes the user experience, thereby improving user immersion and information acquisition efficiency. Evaluation results show that TIGMI excels in question-answering and multimodal interactions, significantly enhancing the depth and breadth of services provided by the personalized virtual tour. We conclude by addressing the limitations of TIGMI and briefly discuss how future research will focus on further improving the accuracy and user satisfaction of the generated content to adapt to the dynamically changing tourism environment. © 2013 IEEE.},
keywords = {Cultural informations, Digital Cultural Heritage, Digital cultural heritages, Digital guide, Fine tuning, fine-tuning, Historical monuments, Language Model, Large language model, Leisure, Metacosmic cultural tourism, Multimodal Interaction, Tourism, Virtual tour},
pubstate = {published},
tppubtype = {article}
}
Oliveira, E. A. Masasi De; Sousa, R. T.; Bastos, A. A.; Cintra, L. Martins De Freitas; Filho, A. R. G.
Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 437–440, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages
@inproceedings{masasi_de_oliveira_immersive_2025,
title = {Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation},
author = {E. A. Masasi De Oliveira and R. T. Sousa and A. A. Bastos and L. Martins De Freitas Cintra and A. R. G. Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007979183&doi=10.1145%2f3706370.3731643&partnerID=40&md5=db10b41217dd8a0b0705c3fb4a615666},
doi = {10.1145/3706370.3731643},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {437–440},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual Reality has significantly expanded possibilities for immersive museum experiences, overcoming traditional constraints such as space, preservation, and geographic limitations. However, existing virtual museum platforms typically lack dynamic, personalized, and contextually accurate interactions. To address this, we propose Spatially-Aware Retrieval-Augmented Generation (SA-RAG), an innovative framework integrating visual attention tracking with Retrieval-Augmented Generation systems and advanced Large Language Models. By capturing users' visual attention in real time, SA-RAG dynamically retrieves contextually relevant data, enhancing the accuracy, personalization, and depth of user interactions within immersive virtual environments. The system's effectiveness is initially demonstrated through our preliminary tests within a realistic VR museum implemented using Unreal Engine. Although promising, comprehensive human evaluations involving broader user groups are planned for future studies to rigorously validate SA-RAG's effectiveness, educational enrichment potential, and accessibility improvements in virtual museums. The framework also presents opportunities for broader applications in immersive educational and storytelling domains. © 2025 Copyright held by the owner/author(s).},
keywords = {Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Angelopoulos, J.; Manettas, C.; Alexopoulos, K.
Industrial Maintenance Optimization Based on the Integration of Large Language Models (LLM) and Augmented Reality (AR) Proceedings Article
In: K., Alexopoulos; S., Makris; P., Stavropoulos (Ed.): Lect. Notes Mech. Eng., pp. 197–205, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 21954356 (ISSN); 978-303186488-9 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Competition, Cost reduction, Critical path analysis, Crushed stone plants, Generative AI, generative artificial intelligence, Human expertise, Industrial equipment, Industrial maintenance, Language Model, Large language model, Maintenance, Maintenance optimization, Maintenance procedures, Manufacturing data processing, Potential errors, Problem oriented languages, Scheduled maintenance, Shopfloors, Solar power plants
@inproceedings{angelopoulos_industrial_2025,
title = {Industrial Maintenance Optimization Based on the Integration of Large Language Models (LLM) and Augmented Reality (AR)},
author = {J. Angelopoulos and C. Manettas and K. Alexopoulos},
editor = {Alexopoulos K. and Makris S. and Stavropoulos P.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001421726&doi=10.1007%2f978-3-031-86489-6_20&partnerID=40&md5=63be31b9f4dda4aafd6a641630506c09},
doi = {10.1007/978-3-031-86489-6_20},
isbn = {21954356 (ISSN); 978-303186488-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Mech. Eng.},
pages = {197–205},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Traditional maintenance procedures often rely on manual data processing and human expertise, leading to inefficiencies and potential errors. In the context of Industry 4.0 several digital technologies, such as Artificial Intelligence (AI), Big Data Analytics (BDA), and eXtended Reality (XR) have been developed and are constantly being integrated in a plethora of manufacturing activities (including industrial maintenance), in an attempt to minimize human error, facilitate shop floor technicians, reduce costs as well as reduce equipment downtimes. The latest developments in the field of AI point towards Large Language Models (LLM) which can communicate with human operators in an intuitive manner. On the other hand, Augmented Reality, as part of XR technologies, offers useful functionalities for improving user perception and interaction with modern, complex industrial equipment. Therefore, the context of this research work lies in the development and training of an LLM in order to provide suggestions and actionable items for the mitigation of unforeseen events (e.g. equipment breakdowns), in order to facilitate shop-floor technicians during their everyday tasks. Paired with AR visualizations over the physical environment, the technicians will get instructions for performing tasks and checks on the industrial equipment in a manner similar to human-to-human communication. The functionality of the proposed framework extends to the integration of modules for exchanging information with the engineering department towards the scheduling of Maintenance and Repair Operations (MRO) as well as the creation of a repository of historical data in order to constantly retrain and optimize the LLM. © The Author(s) 2025.},
keywords = {Augmented Reality, Competition, Cost reduction, Critical path analysis, Crushed stone plants, Generative AI, generative artificial intelligence, Human expertise, Industrial equipment, Industrial maintenance, Language Model, Large language model, Maintenance, Maintenance optimization, Maintenance procedures, Manufacturing data processing, Potential errors, Problem oriented languages, Scheduled maintenance, Shopfloors, Solar power plants},
pubstate = {published},
tppubtype = {inproceedings}
}
Xu, F.; Zhou, T.; Nguyen, T.; Bao, H.; Lin, C.; Du, J.
Integrating augmented reality and LLM for enhanced cognitive support in critical audio communications Journal Article
In: International Journal of Human Computer Studies, vol. 194, 2025, ISSN: 10715819 (ISSN).
Abstract | Links | BibTeX | Tags: Audio communications, Augmented Reality, Cognitive loads, Cognitive support, Decisions makings, Language Model, Large language model, LLM, Logic reasoning, Maintenance, Operations and maintenance, Oral communication, Situational awareness
@article{xu_integrating_2025,
title = {Integrating augmented reality and LLM for enhanced cognitive support in critical audio communications},
author = {F. Xu and T. Zhou and T. Nguyen and H. Bao and C. Lin and J. Du},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85208467299&doi=10.1016%2fj.ijhcs.2024.103402&partnerID=40&md5=153d095b837ee1666a7da0f7ed03362c},
doi = {10.1016/j.ijhcs.2024.103402},
issn = {10715819 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Human Computer Studies},
volume = {194},
abstract = {Operation and Maintenance (O&M) missions are often time-sensitive and accuracy-dependent, requiring rapid and precise information processing in noisy, chaotic environments where oral communication can lead to cognitive overload and impaired decision-making. Augmented Reality (AR) and Large Language Models (LLMs) offer potential for enhancing situational awareness and lowering cognitive load by integrating digital visualizations with the physical world and improving dialogue management. However, synthesizing these technologies into a real-time system that effectively aids operators remains a challenge. This study explores the integration of AR and GPT-4, an advanced LLM, in time-sensitive O&M tasks, aiming to enhance situational awareness and manage cognitive load during oral communications. A customized AR system, incorporating the Microsoft HoloLens2 for cognitive monitoring and GPT-4 for decision making assistance, was tested in a human subject experiment with 30 participants. The 2×2 factorial experiment evaluated the effects of AR and LLM assistance on task performance and cognitive load. Results demonstrated significant improvements in task accuracy and reductions in cognitive load, highlighting the effectiveness of AR and LLM integration in supporting O&M missions. These findings emphasize the need for further research to optimize operational strategies in mission critical environments. © 2024 Elsevier Ltd},
keywords = {Audio communications, Augmented Reality, Cognitive loads, Cognitive support, Decisions makings, Language Model, Large language model, LLM, Logic reasoning, Maintenance, Operations and maintenance, Oral communication, Situational awareness},
pubstate = {published},
tppubtype = {article}
}
Aloudat, M. Z.; Aboumadi, A.; Soliman, A.; Al-Mohammed, H. A.; Al-Ali, M.; Mahgoub, A.; Barhamgi, M.; Yaacoub, E.
Metaverse Unbound: A Survey on Synergistic Integration Between Semantic Communication, 6G, and Edge Learning Journal Article
In: IEEE Access, vol. 13, pp. 58302–58350, 2025, ISSN: 21693536 (ISSN).
Abstract | Links | BibTeX | Tags: 6g wireless system, 6G wireless systems, Augmented Reality, Block-chain, Blockchain, Blockchain technology, Digital Twin Technology, Edge learning, Extended reality (XR), Language Model, Large language model, large language models (LLMs), Metaverse, Metaverses, Semantic communication, Virtual environments, Wireless systems
@article{aloudat_metaverse_2025,
title = {Metaverse Unbound: A Survey on Synergistic Integration Between Semantic Communication, 6G, and Edge Learning},
author = {M. Z. Aloudat and A. Aboumadi and A. Soliman and H. A. Al-Mohammed and M. Al-Ali and A. Mahgoub and M. Barhamgi and E. Yaacoub},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003088610&doi=10.1109%2fACCESS.2025.3555753&partnerID=40&md5=8f3f9421ce2d6be57f8154a122ee192c},
doi = {10.1109/ACCESS.2025.3555753},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {58302–58350},
abstract = {With a focus on edge learning, blockchain, sixth generation (6G) wireless systems, semantic communication, and large language models (LLMs), this survey paper examines the revolutionary integration of cutting-edge technologies within the metaverse. This thorough examination highlights the critical role these technologies play in improving realism and user engagement on three main levels: technical, virtual, and physical. While the virtual layer focuses on building immersive experiences, the physical layer highlights improvements to the user interface through augmented reality (AR) goggles and virtual reality (VR) headsets. Blockchain-powered technical layer enables safe, decentralized communication. The survey highlights how the metaverse has the potential to drastically change how people interact in society by exploring applications in a variety of fields, such as immersive education, remote work, and entertainment. Concerns about privacy, scalability, and interoperability are raised, highlighting the necessity of continued study to realize the full potential of the metaverse. For scholars looking to broaden the reach and significance of the metaverse in the digital age, this paper is a useful tool. © 2013 IEEE.},
keywords = {6g wireless system, 6G wireless systems, Augmented Reality, Block-chain, Blockchain, Blockchain technology, Digital Twin Technology, Edge learning, Extended reality (XR), Language Model, Large language model, large language models (LLMs), Metaverse, Metaverses, Semantic communication, Virtual environments, Wireless systems},
pubstate = {published},
tppubtype = {article}
}
Zhang, G.; Wang, Y.; Luo, C.; Xu, S.; Ming, Y.; Peng, J.; Zhang, M.
Visual Harmony: LLM’s Power in Crafting Coherent Indoor Scenes from Images Proceedings Article
In: Z., Lin; H., Zha; M.-M., Cheng; R., He; C.-L., Liu; K., Ubul; W., Silamu; J., Zhou (Ed.): Lect. Notes Comput. Sci., pp. 3–17, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-981978507-0 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Depth perception, Indoor scene generation, Input image, Language Model, Large language model, Metaverses, Point-clouds, Power, Scene completion, Scene Generation, Scene-graphs, Semantic Segmentation, Semantics, Virtual Reality, Visual languages
@inproceedings{zhang_visual_2025,
title = {Visual Harmony: LLM’s Power in Crafting Coherent Indoor Scenes from Images},
author = {G. Zhang and Y. Wang and C. Luo and S. Xu and Y. Ming and J. Peng and M. Zhang},
editor = {Lin Z. and Zha H. and Cheng M.-M. and He R. and Liu C.-L. and Ubul K. and Silamu W. and Zhou J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85209374797&doi=10.1007%2f978-981-97-8508-7_1&partnerID=40&md5=5231ab0bce95fb3f09db80392acd58ff},
doi = {10.1007/978-981-97-8508-7_1},
isbn = {03029743 (ISSN); 978-981978507-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15036 LNCS},
pages = {3–17},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Indoor scene generation has recently attracted significant attention as it is crucial for metaverse, 3D animation, visual effects in movies, and virtual/augmented reality. Existing image-based indoor scene generation methods often produce scenes that are not realistic enough, with issues such as floating objects, incorrect object orientations, and incomplete scenes that only include the part of the scenes captured by the input image. To address these challenges, we propose Visual Harmony, a method that leverages the powerful spatial imagination capabilities of Large Language Model (LLM) to generate corresponding indoor scenes based on the input image. Specifically, we first extract information from the input image through depth estimation and panorama segmentation, reconstructing a semantic point cloud. Using this reconstructed semantic point cloud, we extract a scene graph that describes only the objects in the image. Then we leverage the strong spatial imagination capabilities of LLM to complete the scene graph, forming a representation of a complete room scene. Based on this fine scene graph, we can generate entire indoor scene that includes both the captured and not captured parts of the input image. Extensive experiments demonstrate that our method can generate realistic, plausible, and highly relevant complete indoor scenes related to the input image. © The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.},
keywords = {Augmented Reality, Depth perception, Indoor scene generation, Input image, Language Model, Large language model, Metaverses, Point-clouds, Power, Scene completion, Scene Generation, Scene-graphs, Semantic Segmentation, Semantics, Virtual Reality, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Häfner, P.; Eisenlohr, F.; Karande, A.; Grethler, M.; Mukherjee, A.; Tran, N.
Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 281–285, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method
@inproceedings{hafner_leveraging_2025,
title = {Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines},
author = {P. Häfner and F. Eisenlohr and A. Karande and M. Grethler and A. Mukherjee and N. Tran},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000344182&doi=10.1109%2fAIxVR63409.2025.00054&partnerID=40&md5=05fe014eddba395881575bec5d96ce15},
doi = {10.1109/AIxVR63409.2025.00054},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {281–285},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Voice User Interfaces (VUIs) are becoming increasingly valuable in industrial applications, offering hands-free control in complex environments. However, developing and validating VUIs for such applications faces challenges, including limited access to physical prototypes and high testing costs. This paper presents a methodology that utilizes virtual reality (VR) prototypes to collect training data for large language model (LLM)-based VUIs, allowing early-stage voice control development before physical prototypes are accessible. Through an immersive Wizard-of-Oz (WoZ) method, participants interact with a virtual reality representation of a machine, generating realistic, scenario-based conversational data. This combined WoZ and VR approach enables high-quality data collection and iterative model training, offering an effective solution that can be applied across various types of machine. Preliminary findings demonstrate the viability of VR in generating diverse and robust data sets that closely simulate real-world dialogs for voice interactions in industrial settings. © 2025 IEEE.},
keywords = {Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, J.; Wu, X.; Lan, T.; Li, B.
LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2715–2724, 2025, ISSN: 10772626 (ISSN).
Abstract | Links | BibTeX | Tags: % reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality
@article{chen_llmer_2025,
title = {LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models},
author = {J. Chen and X. Wu and T. Lan and B. Li},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003825793&doi=10.1109%2fTVCG.2025.3549549&partnerID=40&md5=da4681d0714548e3a7e0c8c3295d2348},
doi = {10.1109/TVCG.2025.3549549},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2715–2724},
abstract = {The integration of Large Language Models (LLMs) like GPT-4 with Extended Reality (XR) technologies offers the potential to build truly immersive XR environments that interact with human users through natural language, e.g., generating and animating 3D scenes from audio inputs. However, the complexity of XR environments makes it difficult to accurately extract relevant contextual data and scene/object parameters from an overwhelming volume of XR artifacts. It leads to not only increased costs with pay-per-use models, but also elevated levels of generation errors. Moreover, existing approaches focusing on coding script generation are often prone to generation errors, resulting in flawed or invalid scripts, application crashes, and ultimately a degraded user experience. To overcome these challenges, we introduce LLMER, a novel framework that creates interactive XR worlds using JSON data generated by LLMs. Unlike prior approaches focusing on coding script generation, LLMER translates natural language inputs into JSON data, significantly reducing the likelihood of application crashes and processing latency. It employs a multi-stage strategy to supply only the essential contextual information adapted to the user's request and features multiple modules designed for various XR tasks. Our preliminary user study reveals the effectiveness of the proposed system, with over 80% reduction in consumed tokens and around 60% reduction in task completion time compared to state-of-the-art approaches. The analysis of users' feedback also illuminates a series of directions for further optimization. © 1995-2012 IEEE.},
keywords = {% reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Carcangiu, A.; Manca, M.; Mereu, J.; Santoro, C.; Simeoli, L.; Spano, L. D.
Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation Proceedings Article
In: C., Santoro; A., Schmidt; M., Matera; A., Bellucci (Ed.): Lect. Notes Comput. Sci., pp. 59–79, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303195451-1 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces
@inproceedings{carcangiu_conversational_2025,
title = {Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation},
author = {A. Carcangiu and M. Manca and J. Mereu and C. Santoro and L. Simeoli and L. D. Spano},
editor = {Santoro C. and Schmidt A. and Matera M. and Bellucci A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105009012634&doi=10.1007%2f978-3-031-95452-8_4&partnerID=40&md5=67e2b8ca4bb2b508cd41548e3471705b},
doi = {10.1007/978-3-031-95452-8_4},
isbn = {03029743 (ISSN); 978-303195451-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15713 LNCS},
pages = {59–79},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Rule-based approaches allow users to customize XR environments. However, the current menu-based interfaces still create barriers for end-user developers. Chatbots based on Large Language Models (LLMs) have the potential to reduce the threshold needed for rule creation, but how users articulate their intentions through conversation remains under-explored. This work investigates how users express event-condition-action automation rules in Virtual Reality (VR) and Augmented Reality (AR) environments. Through two user studies, we show that the dialogues share consistent strategies across the interaction setting (keywords, difficulties in expressing conditions, task success), even if we registered different adaptations for each setting (verbal structure, event vs action first rules). Our findings are relevant for the design and implementation of chatbot-based support for expressing automations in an XR setting. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces},
pubstate = {published},
tppubtype = {inproceedings}
}
Buldu, K. B.; Özdel, S.; Lau, K. H. Carrie; Wang, M.; Saad, D.; Schönborn, S.; Boch, A.; Kasneci, E.; Bozkir, E.
CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 192–197, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality
@inproceedings{buldu_cuify_2025,
title = {CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR},
author = {K. B. Buldu and S. Özdel and K. H. Carrie Lau and M. Wang and D. Saad and S. Schönborn and A. Boch and E. Kasneci and E. Bozkir},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000229165&doi=10.1109%2fAIxVR63409.2025.00037&partnerID=40&md5=837b0e3425d2e5a9358bbe6c8ecb5754},
doi = {10.1109/AIxVR63409.2025.00037},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {192–197},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent developments in computer graphics, machine learning, and sensor technologies enable numerous opportunities for extended reality (XR) setups for everyday life, from skills training to entertainment. With large corporations offering affordable consumer-grade head-mounted displays (HMDs), XR will likely become pervasive, and HMDs will develop as personal devices like smartphones and tablets. However, having intelligent spaces and naturalistic interactions in XR is as important as tech-nological advances so that users grow their engagement in virtual and augmented spaces. To this end, large language model (LLM)-powered non-player characters (NPCs) with speech-to-text (STT) and text-to-speech (TTS) models bring significant advantages over conventional or pre-scripted NPCs for facilitating more natural conversational user interfaces (CUIs) in XR. This paper provides the community with an open-source, customizable, extendable, and privacy-aware Unity package, CUIfy, that facili-tates speech-based NPC-user interaction with widely used LLMs, STT, and TTS models. Our package also supports multiple LLM-powered NPCs per environment and minimizes latency between different computational models through streaming to achieve us-able interactions between users and NPCs. We publish our source code in the following repository: https://gitlab.lrz.de/hctl/cuify © 2025 IEEE.},
keywords = {Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Suzuki, R.; Gonzalez-Franco, M.; Sra, M.; Lindlbauer, D.
Everyday AR through AI-in-the-Loop Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071395-8 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization
@inproceedings{suzuki_everyday_2025,
title = {Everyday AR through AI-in-the-Loop},
author = {R. Suzuki and M. Gonzalez-Franco and M. Sra and D. Lindlbauer},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005752990&doi=10.1145%2f3706599.3706741&partnerID=40&md5=56b5e447819dde7aa4a29f8e3899e535},
doi = {10.1145/3706599.3706741},
isbn = {979-840071395-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {This workshop brings together experts and practitioners from augmented reality (AR) and artificial intelligence (AI) to shape the future of AI-in-the-loop everyday AR experiences. With recent advancements in both AR hardware and AI capabilities, we envision that everyday AR—always-available and seamlessly integrated into users’ daily environments—is becoming increasingly feasible. This workshop will explore how AI can drive such everyday AR experiences. We discuss a range of topics, including adaptive and context-aware AR, generative AR content creation, always-on AI assistants, AI-driven accessible design, and real-world-oriented AI agents. Our goal is to identify the opportunities and challenges in AI-enabled AR, focusing on creating novel AR experiences that seamlessly blend the digital and physical worlds. Through the workshop, we aim to foster collaboration, inspire future research, and build a community to advance the research field of AI-enhanced AR. © 2025 Copyright held by the owner/author(s).},
keywords = {Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Alibrahim, Y.; Ibrahim, M.; Gurdayal, D.; Munshi, M.
AI speechbots and 3D segmentations in virtual reality improve radiology on-call training in resource-limited settings Journal Article
In: Intelligence-Based Medicine, vol. 11, 2025, ISSN: 26665212 (ISSN).
Abstract | Links | BibTeX | Tags: 3D segmentation, AI speechbots, Article, artificial intelligence chatbot, ChatGPT, computer assisted tomography, Deep learning, headache, human, Image segmentation, interventional radiology, Large language model, Likert scale, nausea, Proof of concept, prospective study, radiology, radiology on call training, resource limited setting, Teaching, Training, ultrasound, Virtual Reality, voice recognition
@article{alibrahim_ai_2025,
title = {AI speechbots and 3D segmentations in virtual reality improve radiology on-call training in resource-limited settings},
author = {Y. Alibrahim and M. Ibrahim and D. Gurdayal and M. Munshi},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001472313&doi=10.1016%2fj.ibmed.2025.100245&partnerID=40&md5=623a0ceaa07e5516a296420d25c3033b},
doi = {10.1016/j.ibmed.2025.100245},
issn = {26665212 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Intelligence-Based Medicine},
volume = {11},
abstract = {Objective: Evaluate the use of large-language model (LLM) speechbot tools and deep learning-assisted generation of 3D reconstructions when integrated in a virtual reality (VR) setting to teach radiology on-call topics to radiology residents. Methods: Three first year radiology residents in Guyana were enrolled in an 8-week radiology course that focused on preparation for on-call duties. The course, delivered via VR headsets with custom software integrating LLM-powered speechbots trained on imaging reports and 3D reconstructions segmented with the help of a deep learning model. Each session focused on a specific radiology area, employing a didactic and case-based learning approach, enhanced with 3D reconstructions and an LLM-powered speechbot. Post-session, residents reassessed their knowledge and provided feedback on their VR and LLM-powered speechbot experiences. Results/discussion: Residents found that the 3D reconstructions segmented semi-automatically by deep learning algorithms and AI-driven self-learning via speechbot was highly valuable. The 3D reconstructions, especially in the interventional radiology session, were helpful and the benefit is augmented by VR where navigating the models is seamless and perception of depth is pronounced. Residents also found conversing with the AI-speechbot seamless and was valuable in their post session self-learning. The major drawback of VR was motion sickness, which was mild and improved over time. Conclusion: AI-assisted VR radiology education could be used to develop new and accessible ways of teaching a variety of radiology topics in a seamless and cost-effective way. This could be especially useful in supporting radiology education remotely in regions which lack local radiology expertise. © 2025},
keywords = {3D segmentation, AI speechbots, Article, artificial intelligence chatbot, ChatGPT, computer assisted tomography, Deep learning, headache, human, Image segmentation, interventional radiology, Large language model, Likert scale, nausea, Proof of concept, prospective study, radiology, radiology on call training, resource limited setting, Teaching, Training, ultrasound, Virtual Reality, voice recognition},
pubstate = {published},
tppubtype = {article}
}
Nygren, T.; Samuelsson, M.; Hansson, P. -O.; Efimova, E.; Bachelder, S.
In: International Journal of Artificial Intelligence in Education, 2025, ISSN: 15604292 (ISSN).
Abstract | Links | BibTeX | Tags: AI-generated feedback, Controversial issue in social study education, Controversial issues in social studies education, Curricula, Domain knowledge, Economic and social effects, Expert systems, Generative AI, Human engineering, Knowledge engineering, Language Model, Large language model, large language models (LLMs), Mixed reality, Mixed reality simulation, Mixed reality simulation (MRS), Pedagogical content knowledge, Pedagogical content knowledge (PCK), Personnel training, Preservice teachers, Social studies education, Teacher training, Teacher training simulation, Teacher training simulations, Teaching, Training simulation
@article{nygren_ai_2025,
title = {AI Versus Human Feedback in Mixed Reality Simulations: Comparing LLM and Expert Mentoring in Preservice Teacher Education on Controversial Issues},
author = {T. Nygren and M. Samuelsson and P. -O. Hansson and E. Efimova and S. Bachelder},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007244772&doi=10.1007%2fs40593-025-00484-8&partnerID=40&md5=d3cb14a8117045505cbbeb174b32b88d},
doi = {10.1007/s40593-025-00484-8},
issn = {15604292 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Artificial Intelligence in Education},
abstract = {This study explores the potential role of AI-generated mentoring within simulated environments designed for teacher education, specifically focused on the challenges of teaching controversial issues. Using a mixed-methods approach, we empirically investigate the potential and challenges of AI-generated feedback compared to that provided by human experts when mentoring preservice teachers in the context of mixed reality simulations. Findings reveal that human experts offered more mixed and nuanced feedback than ChatGPT-4o and Perplexity, especially when identifying missed teaching opportunities and balancing classroom discussions. The AI models evaluated were publicly available pro versions of LLMs and were tested using detailed prompts and coding schemes aligned with educational theories. AI systems were not very good at identifying aspects of general, pedagogical or content knowledge based on Shulman’s theories but were still quite effective in generating feedback in line with human experts. The study highlights the promise of AI to enhance teacher training but underscores the importance of combining AI feedback with expert insights to address the complexities of real-world teaching. This research contributes to a growing understanding of AI's potential role and limitations in education. It suggests that, while AI can be valuable to scale mixed reality simulations, it should be carefully evaluated and balanced by human expertise in teacher education. © The Author(s) 2025.},
keywords = {AI-generated feedback, Controversial issue in social study education, Controversial issues in social studies education, Curricula, Domain knowledge, Economic and social effects, Expert systems, Generative AI, Human engineering, Knowledge engineering, Language Model, Large language model, large language models (LLMs), Mixed reality, Mixed reality simulation, Mixed reality simulation (MRS), Pedagogical content knowledge, Pedagogical content knowledge (PCK), Personnel training, Preservice teachers, Social studies education, Teacher training, Teacher training simulation, Teacher training simulations, Teaching, Training simulation},
pubstate = {published},
tppubtype = {article}
}
Bendarkawi, J.; Ponce, A.; Mata, S. C.; Aliu, A.; Liu, Y.; Zhang, L.; Liaqat, A.; Rao, V. N.; Monroy-Hernández, A.
ConversAR: Exploring Embodied LLM-Powered Group Conversations in Augmented Reality for Second Language Learners Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071395-8 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Augmented Reality (AR), Embodied agent, Embodied Agents, Language learning, Language Model, Large language model, large language models (LLMs), Population dynamics, Second language, Second Language Acquisition, Second language learners, Social dynamics, Turn-taking
@inproceedings{bendarkawi_conversar_2025,
title = {ConversAR: Exploring Embodied LLM-Powered Group Conversations in Augmented Reality for Second Language Learners},
author = {J. Bendarkawi and A. Ponce and S. C. Mata and A. Aliu and Y. Liu and L. Zhang and A. Liaqat and V. N. Rao and A. Monroy-Hernández},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005746128&doi=10.1145%2f3706599.3720162&partnerID=40&md5=8330d3e0cb735caffa828b848ab9a110},
doi = {10.1145/3706599.3720162},
isbn = {979-840071395-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Group conversations are valuable for second language (L2) learners as they provide opportunities to practice listening and speaking, exercise complex turn-taking skills, and experience group social dynamics in a target language. However, most existing Augmented Reality (AR)-based conversational learning tools focus on dyadic interactions rather than group dialogues. Although research has shown that AR can help reduce speaking anxiety and create a comfortable space for practicing speaking skills in dyadic scenarios, especially with Large Language Model (LLM)-based conversational agents, the potential for group language practice using these technologies remains largely unexplored. We introduce ConversAR, a gpt-4o powered AR application, that enables L2 learners to practice contextualized group conversations. Our system features two embodied LLM agents with vision-based scene understanding and live captions. In a system evaluation with 10 participants, users reported reduced speaking anxiety and increased learner autonomy compared to perceptions of in-person practice methods with other learners. © 2025 Copyright held by the owner/author(s).},
keywords = {Augmented Reality, Augmented Reality (AR), Embodied agent, Embodied Agents, Language learning, Language Model, Large language model, large language models (LLMs), Population dynamics, Second language, Second Language Acquisition, Second language learners, Social dynamics, Turn-taking},
pubstate = {published},
tppubtype = {inproceedings}
}
Mereu, J.; Artizzu, V.; Carcangiu, A.; Spano, L. D.; Simeoli, L.; Mattioli, A.; Manca, M.; Santoro, C.; Paternò, F.
Empowering End-User in Creating eXtended Reality Content with a Conversational Chatbot Proceedings Article
In: L., Zaina; J.C., Campos; D., Spano; K., Luyten; P., Palanque; G., Veer; A., Ebert; S.R., Humayoun; V., Memmesheimer (Ed.): Lect. Notes Comput. Sci., pp. 126–137, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303191759-2 (ISBN).
Abstract | Links | BibTeX | Tags: Context, End-User Development, End-Users, Event condition action rules, Event-condition-action rules, Extended reality, Immersive authoring, Language Model, Large language model, Meta-design, multimodal input, Multimodal inputs, Virtualization
@inproceedings{mereu_empowering_2025,
title = {Empowering End-User in Creating eXtended Reality Content with a Conversational Chatbot},
author = {J. Mereu and V. Artizzu and A. Carcangiu and L. D. Spano and L. Simeoli and A. Mattioli and M. Manca and C. Santoro and F. Paternò},
editor = {Zaina L. and Campos J.C. and Spano D. and Luyten K. and Palanque P. and Veer G. and Ebert A. and Humayoun S.R. and Memmesheimer V.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007719800&doi=10.1007%2f978-3-031-91760-8_9&partnerID=40&md5=280b33b96bf2b250e515922072f92204},
doi = {10.1007/978-3-031-91760-8_9},
isbn = {03029743 (ISSN); 978-303191759-2 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15518 LNCS},
pages = {126–137},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Recent advancements in eXtended Reality (XR) technologies have found application across diverse domains. However, creating complex interactions within XR environments remains challenging for non-technical users. In this work, we present EUD4XR, a project aiming to: i) empower end-user developers (EUDevs) to customize XR environments by supporting virtual objects and physical devices; ii) involve an intelligent conversational agent which assists the user in defining behaviours. The agent can handle multimodal input, to drive the EUDev during the rule authoring process, using contextual knowledge of the virtual environment and its elements. By integrating conversational assistance, EUD4XR seeks to lower further the usage barriers for end-users to personalize XR experiences according to their needs. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Context, End-User Development, End-Users, Event condition action rules, Event-condition-action rules, Extended reality, Immersive authoring, Language Model, Large language model, Meta-design, multimodal input, Multimodal inputs, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}