AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Buldu, K. B.; Özdel, S.; Lau, K. H. Carrie; Wang, M.; Saad, D.; Schönborn, S.; Boch, A.; Kasneci, E.; Bozkir, E.
CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 192–197, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331521578 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality
@inproceedings{buldu_cuify_2025,
title = {CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR},
author = {K. B. Buldu and S. Özdel and K. H. Carrie Lau and M. Wang and D. Saad and S. Schönborn and A. Boch and E. Kasneci and E. Bozkir},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000229165&doi=10.1109%2FAIxVR63409.2025.00037&partnerID=40&md5=f11f49480d075aee04ec44cedc984844},
doi = {10.1109/AIxVR63409.2025.00037},
isbn = {9798331521578 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {192–197},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent developments in computer graphics, machine learning, and sensor technologies enable numerous opportunities for extended reality (XR) setups for everyday life, from skills training to entertainment. With large corporations offering affordable consumer-grade head-mounted displays (HMDs), XR will likely become pervasive, and HMDs will develop as personal devices like smartphones and tablets. However, having intelligent spaces and naturalistic interactions in XR is as important as tech-nological advances so that users grow their engagement in virtual and augmented spaces. To this end, large language model (LLM)-powered non-player characters (NPCs) with speech-to-text (STT) and text-to-speech (TTS) models bring significant advantages over conventional or pre-scripted NPCs for facilitating more natural conversational user interfaces (CUIs) in XR. This paper provides the community with an open-source, customizable, extendable, and privacy-aware Unity package, CUIfy, that facili-tates speech-based NPC-user interaction with widely used LLMs, STT, and TTS models. Our package also supports multiple LLM-powered NPCs per environment and minimizes latency between different computational models through streaming to achieve us-able interactions between users and NPCs. We publish our source code in the following repository: https://gitlab.lrz.de/hctl/cuify © 2025 Elsevier B.V., All rights reserved.},
keywords = {Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Gatti, E.; Giunchi, D.; Numan, N.; Steed, A.
Around the Virtual Campfire: Early UX Insights into AI-Generated Stories in VR Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 136–141, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331521578 (ISBN).
Abstract | Links | BibTeX | Tags: Generative AI, Images synthesis, Immersive, Interactive Environments, Language Model, Large language model, Storytelling, User input, User study, Users' experiences, Virtual environments, VR
@inproceedings{gatti_around_2025,
title = {Around the Virtual Campfire: Early UX Insights into AI-Generated Stories in VR},
author = {E. Gatti and D. Giunchi and N. Numan and A. Steed},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000263662&doi=10.1109%2FAIxVR63409.2025.00027&partnerID=40&md5=ab95e803af14233db6ed307222632542},
doi = {10.1109/AIxVR63409.2025.00027},
isbn = {9798331521578 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {136–141},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual Reality (VR) presents an immersive platform for storytelling, allowing narratives to unfold in highly engaging, interactive environments. Leveraging AI capabilities and image synthesis offers new possibilities for creating scalable, generative VR content. In this work, we use an LLM-driven VR storytelling platform to explore how AI-generated visuals and narrative elements impact the user experience in VR storytelling. Previously, we presented AIsop, a system to integrate LLM-generated text and images and TTS audio into a storytelling experience, where the narrative unfolds based on user input. In this paper, we present two user studies focusing on how AI-generated visuals influence narrative perception and the overall VR experience. Our findings highlight the positive impact of AI-generated pictorial content on the storytelling experience, highlighting areas for enhancement and further research in interactive narrative design. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Generative AI, Images synthesis, Immersive, Interactive Environments, Language Model, Large language model, Storytelling, User input, User study, Users' experiences, Virtual environments, VR},
pubstate = {published},
tppubtype = {inproceedings}
}
Häfner, P.; Eisenlohr, F.; Karande, A.; Grethler, M.; Mukherjee, A.; Tran, N.
Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 281–285, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331521578 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method
@inproceedings{hafner_leveraging_2025,
title = {Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines},
author = {P. Häfner and F. Eisenlohr and A. Karande and M. Grethler and A. Mukherjee and N. Tran},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000344182&doi=10.1109%2FAIxVR63409.2025.00054&partnerID=40&md5=464de1fae1a7a9dbc4362b0a984e0cd4},
doi = {10.1109/AIxVR63409.2025.00054},
isbn = {9798331521578 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {281–285},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Voice User Interfaces (VUIs) are becoming increasingly valuable in industrial applications, offering hands-free control in complex environments. However, developing and validating VUIs for such applications faces challenges, including limited access to physical prototypes and high testing costs. This paper presents a methodology that utilizes virtual reality (VR) prototypes to collect training data for large language model (LLM)-based VUIs, allowing early-stage voice control development before physical prototypes are accessible. Through an immersive Wizard-of-Oz (WoZ) method, participants interact with a virtual reality representation of a machine, generating realistic, scenario-based conversational data. This combined WoZ and VR approach enables high-quality data collection and iterative model training, offering an effective solution that can be applied across various types of machine. Preliminary findings demonstrate the viability of VR in generating diverse and robust data sets that closely simulate real-world dialogs for voice interactions in industrial settings. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method},
pubstate = {published},
tppubtype = {inproceedings}
}
Angelopoulos, J.; Manettas, C.; Alexopoulos, K.
Industrial Maintenance Optimization Based on the Integration of Large Language Models (LLM) and Augmented Reality (AR) Proceedings Article
In: K., Alexopoulos; S., Makris; P., Stavropoulos (Ed.): Lect. Notes Mech. Eng., pp. 197–205, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 21954356 (ISSN); 978-303186488-9 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Competition, Cost reduction, Critical path analysis, Crushed stone plants, Generative AI, generative artificial intelligence, Human expertise, Industrial equipment, Industrial maintenance, Language Model, Large language model, Maintenance, Maintenance optimization, Maintenance procedures, Manufacturing data processing, Potential errors, Problem oriented languages, Scheduled maintenance, Shopfloors, Solar power plants
@inproceedings{angelopoulos_industrial_2025,
title = {Industrial Maintenance Optimization Based on the Integration of Large Language Models (LLM) and Augmented Reality (AR)},
author = {J. Angelopoulos and C. Manettas and K. Alexopoulos},
editor = {Alexopoulos K. and Makris S. and Stavropoulos P.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001421726&doi=10.1007%2f978-3-031-86489-6_20&partnerID=40&md5=63be31b9f4dda4aafd6a641630506c09},
doi = {10.1007/978-3-031-86489-6_20},
isbn = {21954356 (ISSN); 978-303186488-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Mech. Eng.},
pages = {197–205},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Traditional maintenance procedures often rely on manual data processing and human expertise, leading to inefficiencies and potential errors. In the context of Industry 4.0 several digital technologies, such as Artificial Intelligence (AI), Big Data Analytics (BDA), and eXtended Reality (XR) have been developed and are constantly being integrated in a plethora of manufacturing activities (including industrial maintenance), in an attempt to minimize human error, facilitate shop floor technicians, reduce costs as well as reduce equipment downtimes. The latest developments in the field of AI point towards Large Language Models (LLM) which can communicate with human operators in an intuitive manner. On the other hand, Augmented Reality, as part of XR technologies, offers useful functionalities for improving user perception and interaction with modern, complex industrial equipment. Therefore, the context of this research work lies in the development and training of an LLM in order to provide suggestions and actionable items for the mitigation of unforeseen events (e.g. equipment breakdowns), in order to facilitate shop-floor technicians during their everyday tasks. Paired with AR visualizations over the physical environment, the technicians will get instructions for performing tasks and checks on the industrial equipment in a manner similar to human-to-human communication. The functionality of the proposed framework extends to the integration of modules for exchanging information with the engineering department towards the scheduling of Maintenance and Repair Operations (MRO) as well as the creation of a repository of historical data in order to constantly retrain and optimize the LLM. © The Author(s) 2025.},
keywords = {Augmented Reality, Competition, Cost reduction, Critical path analysis, Crushed stone plants, Generative AI, generative artificial intelligence, Human expertise, Industrial equipment, Industrial maintenance, Language Model, Large language model, Maintenance, Maintenance optimization, Maintenance procedures, Manufacturing data processing, Potential errors, Problem oriented languages, Scheduled maintenance, Shopfloors, Solar power plants},
pubstate = {published},
tppubtype = {inproceedings}
}
Fernandez, J. A. V.; Lee, J. J.; Vacca, S. A. S.; Magana, A.; Peša, R.; Benes, B.; Popescu, V.
Hands-Free VR Proceedings Article
In: T., Bashford-Rogers; D., Meneveaux; M., Ammi; M., Ziat; S., Jänicke; H., Purchase; P., Radeva; A., Furnari; K., Bouatouch; A.A., Sousa (Ed.): Proc. Int. Jt. Conf. Comput. Vis. Imaging Comput. Graph. Theory Appl., pp. 533–542, Science and Technology Publications, Lda, 2025, ISBN: 21845921 (ISSN).
Abstract | Links | BibTeX | Tags: Deep learning, Large language model, Retrieval-Augmented Generation, Speech-to-Text, Virtual Reality
@inproceedings{fernandez_hands-free_2025,
title = {Hands-Free VR},
author = {J. A. V. Fernandez and J. J. Lee and S. A. S. Vacca and A. Magana and R. Peša and B. Benes and V. Popescu},
editor = {Bashford-Rogers T. and Meneveaux D. and Ammi M. and Ziat M. and Jänicke S. and Purchase H. and Radeva P. and Furnari A. and Bouatouch K. and Sousa A.A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001963646&doi=10.5220%2f0013115100003912&partnerID=40&md5=a3f2f4e16bcd5e0579b38e062c987eab},
doi = {10.5220/0013115100003912},
isbn = {21845921 (ISSN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. Int. Jt. Conf. Comput. Vis. Imaging Comput. Graph. Theory Appl.},
volume = {1},
pages = {533–542},
publisher = {Science and Technology Publications, Lda},
abstract = {We introduce Hands-Free VR, a voice-based natural-language interface for VR that allows interaction without additional hardware just using voice. The user voice command is converted into text using a fine-tuned speechto-text deep-learning model. Then, the text is mapped to an executable VR command using an LLM, which is robust to natural language diversity. Hands-Free VR was evaluated in a within-subjects study (N = 22) where participants arranged objects using either a conventional VR interface or Hands-Free VR. The results confirm that Hands-Free VR is: (1) significantly more efficient than conventional VR interfaces in task completion time and user motion metrics; (2) highly rated for ease of use, intuitiveness, ergonomics, reliability, and desirability; (3) robust to English accents (20 participants were non-native speakers) and phonetic similarity, accurately transcribing 96.7% of voice commands, and (3) robust to natural language diversity, mapping 97.83% of transcriptions to executable commands. © 2025 by SCITEPRESS–Science and Technology Publications, Lda.},
keywords = {Deep learning, Large language model, Retrieval-Augmented Generation, Speech-to-Text, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Alibrahim, Y.; Ibrahim, M.; Gurdayal, D.; Munshi, M.
AI speechbots and 3D segmentations in virtual reality improve radiology on-call training in resource-limited settings Journal Article
In: Intelligence-Based Medicine, vol. 11, 2025, ISSN: 26665212 (ISSN), (Publisher: Elsevier B.V.).
Abstract | Links | BibTeX | Tags: 3D segmentation, AI speechbots, Article, artificial intelligence chatbot, ChatGPT, computer assisted tomography, Deep learning, headache, human, Image segmentation, interventional radiology, Large language model, Likert scale, nausea, Proof of concept, prospective study, radiology, radiology on call training, resource limited setting, Teaching, Training, ultrasound, Virtual Reality, voice recognition
@article{alibrahim_ai_2025,
title = {AI speechbots and 3D segmentations in virtual reality improve radiology on-call training in resource-limited settings},
author = {Y. Alibrahim and M. Ibrahim and D. Gurdayal and M. Munshi},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001472313&doi=10.1016%2Fj.ibmed.2025.100245&partnerID=40&md5=981139e173e781b67dba5a46be64de31},
doi = {10.1016/j.ibmed.2025.100245},
issn = {26665212 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Intelligence-Based Medicine},
volume = {11},
abstract = {Objective: Evaluate the use of large-language model (LLM) speechbot tools and deep learning-assisted generation of 3D reconstructions when integrated in a virtual reality (VR) setting to teach radiology on-call topics to radiology residents. Methods: Three first year radiology residents in Guyana were enrolled in an 8-week radiology course that focused on preparation for on-call duties. The course, delivered via VR headsets with custom software integrating LLM-powered speechbots trained on imaging reports and 3D reconstructions segmented with the help of a deep learning model. Each session focused on a specific radiology area, employing a didactic and case-based learning approach, enhanced with 3D reconstructions and an LLM-powered speechbot. Post-session, residents reassessed their knowledge and provided feedback on their VR and LLM-powered speechbot experiences. Results/discussion: Residents found that the 3D reconstructions segmented semi-automatically by deep learning algorithms and AI-driven self-learning via speechbot was highly valuable. The 3D reconstructions, especially in the interventional radiology session, were helpful and the benefit is augmented by VR where navigating the models is seamless and perception of depth is pronounced. Residents also found conversing with the AI-speechbot seamless and was valuable in their post session self-learning. The major drawback of VR was motion sickness, which was mild and improved over time. Conclusion: AI-assisted VR radiology education could be used to develop new and accessible ways of teaching a variety of radiology topics in a seamless and cost-effective way. This could be especially useful in supporting radiology education remotely in regions which lack local radiology expertise. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Elsevier B.V.},
keywords = {3D segmentation, AI speechbots, Article, artificial intelligence chatbot, ChatGPT, computer assisted tomography, Deep learning, headache, human, Image segmentation, interventional radiology, Large language model, Likert scale, nausea, Proof of concept, prospective study, radiology, radiology on call training, resource limited setting, Teaching, Training, ultrasound, Virtual Reality, voice recognition},
pubstate = {published},
tppubtype = {article}
}
Gaglio, G. F.; Vinanzi, S.; Cangelosi, A.; Chella, A.
Intention Reading Architecture for Virtual Agents Proceedings Article
In: O., Palinko; L., Bodenhagen; J.-J., Cabibihan; K., Fischer; S., Šabanović; K., Winkle; L., Behera; S.S., Ge; D., Chrysostomou; W., Jiang; H., He (Ed.): Lect. Notes Comput. Sci., pp. 488–497, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-981963521-4 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Cognitive Architecture, Cognitive Architectures, Computer simulation languages, Intelligent virtual agents, Intention Reading, Intention readings, Language Model, Large language model, Metaverse, Metaverses, Physical robots, Video-games, Virtual agent, Virtual assistants, Virtual contexts, Virtual environments, Virtual machine
@inproceedings{gaglio_intention_2025,
title = {Intention Reading Architecture for Virtual Agents},
author = {G. F. Gaglio and S. Vinanzi and A. Cangelosi and A. Chella},
editor = {Palinko O. and Bodenhagen L. and Cabibihan J.-J. and Fischer K. and Šabanović S. and Winkle K. and Behera L. and Ge S.S. and Chrysostomou D. and Jiang W. and He H.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002042645&doi=10.1007%2f978-981-96-3522-1_41&partnerID=40&md5=70ccc7039785bb4ca4d45752f1d3587f},
doi = {10.1007/978-981-96-3522-1_41},
isbn = {03029743 (ISSN); 978-981963521-4 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15561 LNAI},
pages = {488–497},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This work presents the development of a virtual agent designed specifically for use in the Metaverse, video games, and other virtual environments, capable of performing intention reading on a human-controlled avatar through a cognitive architecture that endows it with contextual awareness. The paper explores the adaptation of a cognitive architecture, originally developed for physical robots, to a fully virtual context, where it is integrated with a Large Language Model to create highly communicative virtual assistants. Although this work primarily focuses on virtual applications, integrating cognitive architectures with LLMs marks a significant step toward creating collaborative artificial agents capable of providing meaningful support by deeply understanding context and user intentions in digital environments. © The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.},
keywords = {Chatbots, Cognitive Architecture, Cognitive Architectures, Computer simulation languages, Intelligent virtual agents, Intention Reading, Intention readings, Language Model, Large language model, Metaverse, Metaverses, Physical robots, Video-games, Virtual agent, Virtual assistants, Virtual contexts, Virtual environments, Virtual machine},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, J.; Grubert, J.; Kristensson, P. O.
Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 206–216, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331536459 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality
@inproceedings{chen_analyzing_2025,
title = {Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes},
author = {J. Chen and J. Grubert and P. O. Kristensson},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002716635&doi=10.1109%2FVR59515.2025.00045&partnerID=40&md5=9db6769cd401503605578c4b711152b9},
doi = {10.1109/VR59515.2025.00045},
isbn = {9798331536459 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {206–216},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {As more applications of large language models (LLMs) for 3D content in immersive environments emerge, it is crucial to study user behavior to identify interaction patterns and potential barriers to guide the future design of immersive content creation and editing systems which involve LLMs. In an empirical user study with 12 participants, we combine quantitative usage data with post-experience questionnaire feedback to reveal common interaction patterns and key barriers in LLM-assisted 3D scene editing systems. We identify opportunities for improving natural language interfaces in 3D design tools and propose design recommendations. Through an empirical study, we demonstrate that LLM-assisted interactive systems can be used productively in immersive environments. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Kim, Y.; Aamir, Z.; Singh, M.; Boorboor, S.; Mueller, K.; Kaufman, A. E.
Explainable XR: Understanding User Behaviors of XR Environments Using LLM-Assisted Analytics Framework Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2756–2766, 2025, ISSN: 10772626 (ISSN), (Publisher: IEEE Computer Society).
Abstract | Links | BibTeX | Tags: adult, Agnostic, Article, Assistive, Cross Reality, Data Analytics, Data collection, data interpretation, Data recording, Data visualization, Extended reality, human, Language Model, Large language model, large language models, Multi-modal, Multimodal Data Collection, normal human, Personalized assistive technique, Personalized Assistive Techniques, recorder, Spatio-temporal data, therapy, user behavior, User behaviors, Virtual addresses, Virtual environments, Virtual Reality, Visual analytics, Visual languages
@article{kim_explainable_2025,
title = {Explainable XR: Understanding User Behaviors of XR Environments Using LLM-Assisted Analytics Framework},
author = {Y. Kim and Z. Aamir and M. Singh and S. Boorboor and K. Mueller and A. E. Kaufman},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003815583&doi=10.1109%2FTVCG.2025.3549537&partnerID=40&md5=bc5ac38eb19faa224282cf385f43799f},
doi = {10.1109/TVCG.2025.3549537},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2756–2766},
abstract = {We present Explainable XR, an end-to-end framework for analyzing user behavior in diverse eXtended Reality (XR) environments by leveraging Large Language Models (LLMs) for data interpretation assistance. Existing XR user analytics frameworks face challenges in handling cross-virtuality - AR, VR, MR - transitions, multi-user collaborative application scenarios, and the complexity of multimodal data. Explainable XR addresses these challenges by providing a virtuality-agnostic solution for the collection, analysis, and visualization of immersive sessions. We propose three main components in our framework: (1) A novel user data recording schema, called User Action Descriptor (UAD), that can capture the users' multimodal actions, along with their intents and the contexts; (2) a platform-agnostic XR session recorder, and (3) a visual analytics interface that offers LLM-assisted insights tailored to the analysts' perspectives, facilitating the exploration and analysis of the recorded XR session data. We demonstrate the versatility of Explainable XR by demonstrating five use-case scenarios, in both individual and collaborative XR applications across virtualities. Our technical evaluation and user studies show that Explainable XR provides a highly usable analytics solution for understanding user actions and delivering multifaceted, actionable insights into user behaviors in immersive environments. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: IEEE Computer Society},
keywords = {adult, Agnostic, Article, Assistive, Cross Reality, Data Analytics, Data collection, data interpretation, Data recording, Data visualization, Extended reality, human, Language Model, Large language model, large language models, Multi-modal, Multimodal Data Collection, normal human, Personalized assistive technique, Personalized Assistive Techniques, recorder, Spatio-temporal data, therapy, user behavior, User behaviors, Virtual addresses, Virtual environments, Virtual Reality, Visual analytics, Visual languages},
pubstate = {published},
tppubtype = {article}
}
Chen, J.; Wu, X.; Lan, T.; Li, B.
LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2715–2724, 2025, ISSN: 10772626 (ISSN), (Publisher: IEEE Computer Society).
Abstract | Links | BibTeX | Tags: % reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality
@article{chen_llmer_2025,
title = {LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models},
author = {J. Chen and X. Wu and T. Lan and B. Li},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003825793&doi=10.1109%2FTVCG.2025.3549549&partnerID=40&md5=50597473616678390f143a33082a13d3},
doi = {10.1109/TVCG.2025.3549549},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2715–2724},
abstract = {The integration of Large Language Models (LLMs) like GPT-4 with Extended Reality (XR) technologies offers the potential to build truly immersive XR environments that interact with human users through natural language, e.g., generating and animating 3D scenes from audio inputs. However, the complexity of XR environments makes it difficult to accurately extract relevant contextual data and scene/object parameters from an overwhelming volume of XR artifacts. It leads to not only increased costs with pay-per-use models, but also elevated levels of generation errors. Moreover, existing approaches focusing on coding script generation are often prone to generation errors, resulting in flawed or invalid scripts, application crashes, and ultimately a degraded user experience. To overcome these challenges, we introduce LLMER, a novel framework that creates interactive XR worlds using JSON data generated by LLMs. Unlike prior approaches focusing on coding script generation, LLMER translates natural language inputs into JSON data, significantly reducing the likelihood of application crashes and processing latency. It employs a multi-stage strategy to supply only the essential contextual information adapted to the user's request and features multiple modules designed for various XR tasks. Our preliminary user study reveals the effectiveness of the proposed system, with over 80% reduction in consumed tokens and around 60% reduction in task completion time compared to state-of-the-art approaches. The analysis of users' feedback also illuminates a series of directions for further optimization. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: IEEE Computer Society},
keywords = {% reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Li, Z.; Zhang, H.; Peng, C.; Peiris, R.
Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 1–11, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331536459 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment
@inproceedings{li_exploring_2025,
title = {Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios},
author = {Z. Li and H. Zhang and C. Peng and R. Peiris},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002706893&doi=10.1109%2FVR59515.2025.00025&partnerID=40&md5=1987c128f6ec4bd24011388ef9ece179},
doi = {10.1109/VR59515.2025.00025},
isbn = {9798331536459 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {1–11},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent research has begun adopting Large Language Model (LLM) agents to enhance Virtual Reality (VR) interactions, creating immersive chatbot experiences. However, while current studies focus on generating dialogue from user speech inputs, their abilities to generate richer experiences based on the perception of LLM agents' VR environments and interaction cues remain unexplored. Hence, in this work, we propose an approach that enables LLM agents to perceive virtual environments and generate environment-aware interactions and conversations for an embodied human-AI interaction experience in VR environments. Here, we define a schema for describing VR environments and their interactions through text prompts. We evaluate the performance of our method through five role-play scenarios created using our approach in a study with 14 participants. The findings discuss the opportunities and challenges of our proposed approach for developing environment-aware LLM agents that facilitate spatial interactions and conversations within VR role-play scenarios. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
Aloudat, M. Z.; Aboumadi, A.; Soliman, A.; Al-Mohammed, H. A.; al-Ali, M.; Mahgoub, A.; Barhamgi, M.; Yaacoub, E.
Metaverse Unbound: A Survey on Synergistic Integration Between Semantic Communication, 6G, and Edge Learning Journal Article
In: IEEE Access, vol. 13, pp. 58302–58350, 2025, ISSN: 21693536 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: 6g wireless system, 6G wireless systems, Augmented Reality, Block-chain, Blockchain, Blockchain technology, Digital Twin Technology, Edge learning, Extended reality (XR), Language Model, Large language model, large language models (LLMs), Metaverse, Metaverses, Semantic communication, Virtual environments, Wireless systems
@article{aloudat_metaverse_2025,
title = {Metaverse Unbound: A Survey on Synergistic Integration Between Semantic Communication, 6G, and Edge Learning},
author = {M. Z. Aloudat and A. Aboumadi and A. Soliman and H. A. Al-Mohammed and M. al-Ali and A. Mahgoub and M. Barhamgi and E. Yaacoub},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003088610&doi=10.1109%2FACCESS.2025.3555753&partnerID=40&md5=c84a85efab6a29ee6916f5698922f720},
doi = {10.1109/ACCESS.2025.3555753},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {58302–58350},
abstract = {With a focus on edge learning, blockchain, sixth generation (6G) wireless systems, semantic communication, and large language models (LLMs), this survey paper examines the revolutionary integration of cutting-edge technologies within the metaverse. This thorough examination highlights the critical role these technologies play in improving realism and user engagement on three main levels: technical, virtual, and physical. While the virtual layer focuses on building immersive experiences, the physical layer highlights improvements to the user interface through augmented reality (AR) goggles and virtual reality (VR) headsets. Blockchain-powered technical layer enables safe, decentralized communication. The survey highlights how the metaverse has the potential to drastically change how people interact in society by exploring applications in a variety of fields, such as immersive education, remote work, and entertainment. Concerns about privacy, scalability, and interoperability are raised, highlighting the necessity of continued study to realize the full potential of the metaverse. For scholars looking to broaden the reach and significance of the metaverse in the digital age, this paper is a useful tool. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {6g wireless system, 6G wireless systems, Augmented Reality, Block-chain, Blockchain, Blockchain technology, Digital Twin Technology, Edge learning, Extended reality (XR), Language Model, Large language model, large language models (LLMs), Metaverse, Metaverses, Semantic communication, Virtual environments, Wireless systems},
pubstate = {published},
tppubtype = {article}
}
Afzal, M. Z.; Ali, S. K. A.; Stricker, D.; Eisert, P.; Hilsmann, A.; Pérez-Marcos, D.; Bianchi, M.; Crottaz-Herbette, S.; Ioris, R.; Mangina, E.; Sanguineti, M.; Salaberria, A.; de Lacalle, O. Lopez; García-Pablos, A.; Cuadros, M.
Next Generation XR Systems - Large Language Models Meet Augmented and Virtual Reality Journal Article
In: IEEE Computer Graphics and Applications, vol. 45, no. 1, pp. 43–55, 2025, ISSN: 02721716 (ISSN); 15581756 (ISSN), (Publisher: IEEE Computer Society).
Abstract | Links | BibTeX | Tags: adult, Article, Augmented and virtual realities, Augmented Reality, Awareness, Context-Aware, human, Information Retrieval, Knowledge model, Knowledge reasoning, Knowledge retrieval, Language Model, Large language model, Mixed reality, neurorehabilitation, Position papers, privacy, Real- time, Reasoning, Situational awareness, Virtual environments, Virtual Reality
@article{afzal_next_2025,
title = {Next Generation XR Systems - Large Language Models Meet Augmented and Virtual Reality},
author = {M. Z. Afzal and S. K. A. Ali and D. Stricker and P. Eisert and A. Hilsmann and D. Pérez-Marcos and M. Bianchi and S. Crottaz-Herbette and R. Ioris and E. Mangina and M. Sanguineti and A. Salaberria and O. Lopez de Lacalle and A. García-Pablos and M. Cuadros},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003598602&doi=10.1109%2FMCG.2025.3548554&partnerID=40&md5=94e7efe987708afc9f066b906ce232b1},
doi = {10.1109/MCG.2025.3548554},
issn = {02721716 (ISSN); 15581756 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Computer Graphics and Applications},
volume = {45},
number = {1},
pages = {43–55},
abstract = {Extended reality (XR) is evolving rapidly, offering new paradigms for human-computer interaction. This position paper argues that integrating large language models (LLMs) with XR systems represents a fundamental shift toward more intelligent, context-aware, and adaptive mixed-reality experiences. We propose a structured framework built on three key pillars: first, perception and situational awareness, second, knowledge modeling and reasoning, and third, visualization and interaction. We believe leveraging LLMs within XR environments enables enhanced situational awareness, real-time knowledge retrieval, and dynamic user interaction, surpassing traditional XR capabilities. We highlight the potential of this integration in neurorehabilitation, safety training, and architectural design while underscoring ethical considerations, such as privacy, transparency, and inclusivity. This vision aims to spark discussion and drive research toward more intelligent, human-centric XR systems. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: IEEE Computer Society},
keywords = {adult, Article, Augmented and virtual realities, Augmented Reality, Awareness, Context-Aware, human, Information Retrieval, Knowledge model, Knowledge reasoning, Knowledge retrieval, Language Model, Large language model, Mixed reality, neurorehabilitation, Position papers, privacy, Real- time, Reasoning, Situational awareness, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Guo, H.; Liu, Z.; Tang, C.; Zhang, X.
An Interactive Framework for Personalized Navigation Based on Metacosmic Cultural Tourism and Large Model Fine-Tuning Journal Article
In: IEEE Access, vol. 13, pp. 81450–81461, 2025, ISSN: 21693536 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: Cultural informations, Digital Cultural Heritage, Digital cultural heritages, Digital guide, Fine tuning, fine-tuning, Historical monuments, Language Model, Large language model, Leisure, Metacosmic cultural tourism, Multimodal Interaction, Tourism, Virtual tour
@article{guo_interactive_2025,
title = {An Interactive Framework for Personalized Navigation Based on Metacosmic Cultural Tourism and Large Model Fine-Tuning},
author = {H. Guo and Z. Liu and C. Tang and X. Zhang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105004059236&doi=10.1109%2FACCESS.2025.3565359&partnerID=40&md5=d7c978d103f69395f1a4ab99b3cee5e9},
doi = {10.1109/ACCESS.2025.3565359},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {81450–81461},
abstract = {With the wide application of large language models (LLMs) and the rapid growth of metaverse tourism demand, the digital tour and personalized interaction of historical sites have become the key to improving users’ digital travel experience. Creating an environment where users can access rich cultural information and enjoy personalized, immersive experiences is a crucial issue in the field of digital cultural travel. To this end, we propose a tourism information multimodal generation personalized question-answering interactive framework TIGMI (Tourism Information Generation and Multimodal Interaction) based on LLM fine-tuning, which aims to provide a richer and more in-depth experience for virtual tours of historical monuments. Taking Qutan Temple as an example, the framework combines LLM, retrieval augmented generation (RAG), and auto-prompting engineering techniques to retrieve accurate information related to the historical monument from external knowledge bases and seamlessly integrates it into the generated content. This integration mechanism ensures the accuracy and relevance of the generated answers. Through TIGMI’s LLM-driven command interaction mechanism in the 3D digital scene of Qutan Temple, users are able to interact with the building and scene environment in a personalized and real-time manner, successfully integrating historical and cultural information with modern digital technology. This integration significantly enhances the naturalness of interaction and personalizes the user experience, thereby improving user immersion and information acquisition efficiency. Evaluation results show that TIGMI excels in question-answering and multimodal interactions, significantly enhancing the depth and breadth of services provided by the personalized virtual tour. We conclude by addressing the limitations of TIGMI and briefly discuss how future research will focus on further improving the accuracy and user satisfaction of the generated content to adapt to the dynamically changing tourism environment. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {Cultural informations, Digital Cultural Heritage, Digital cultural heritages, Digital guide, Fine tuning, fine-tuning, Historical monuments, Language Model, Large language model, Leisure, Metacosmic cultural tourism, Multimodal Interaction, Tourism, Virtual tour},
pubstate = {published},
tppubtype = {article}
}
Sousa, R. T.; Oliveira, E. A. M.; Cintra, L. M. F.; Filho, A. R. G. Galvão
Transformative Technologies for Rehabilitation: Leveraging Immersive and AI-Driven Solutions to Reduce Recidivism and Promote Decent Work Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 168–171, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331514846 (ISBN).
Abstract | Links | BibTeX | Tags: AI- Driven Rehabilitation, Artificial intelligence- driven rehabilitation, Emotional intelligence, Engineering education, Generative AI, generative artificial intelligence, Immersive, Immersive technologies, Immersive Technology, Language Model, Large language model, large language models, Skills development, Social Reintegration, Social skills, Sociology, Vocational training
@inproceedings{sousa_transformative_2025,
title = {Transformative Technologies for Rehabilitation: Leveraging Immersive and AI-Driven Solutions to Reduce Recidivism and Promote Decent Work},
author = {R. T. Sousa and E. A. M. Oliveira and L. M. F. Cintra and A. R. G. Galvão Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005140551&doi=10.1109%2FVRW66409.2025.00042&partnerID=40&md5=a8dbe15493fd8361602d049f2b09efe3},
doi = {10.1109/VRW66409.2025.00042},
isbn = {9798331514846 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {168–171},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The reintegration of incarcerated individuals into society presents significant challenges, particularly in addressing barriers related to vocational training, social skill development, and emotional rehabilitation. Immersive technologies, such as Virtual Reality and Augmented Reality, combined with generative Artificial Intelligence (AI) and Large Language Models, offer innovative opportunities to enhance these areas. These technologies create practical, controlled environments for skill acquisition and behavioral training, while generative AI enables dynamic, personalized, and adaptive experiences. This paper explores the broader potential of these integrated technologies in supporting rehabilitation, reducing recidivism, and fostering sustainable employment opportunities and these initiatives align with the overarching equity objective of ensuring Decent Work for All, reinforcing the commitment to inclusive and equitable progress across diverse communities, through the transformative potential of immersive and AI-driven systems in correctional systems. © 2025 Elsevier B.V., All rights reserved.},
keywords = {AI- Driven Rehabilitation, Artificial intelligence- driven rehabilitation, Emotional intelligence, Engineering education, Generative AI, generative artificial intelligence, Immersive, Immersive technologies, Immersive Technology, Language Model, Large language model, large language models, Skills development, Social Reintegration, Social skills, Sociology, Vocational training},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Grǎcanin, D.
From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 150–155, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331514846 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering
@inproceedings{behravan_voices_2025,
title = {From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality},
author = {M. Behravan and D. Grǎcanin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005153589&doi=10.1109%2FVRW66409.2025.00038&partnerID=40&md5=34311e63349697801caf849bc231e879},
doi = {10.1109/VRW66409.2025.00038},
isbn = {9798331514846 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {150–155},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents Matrix, an advanced AI-powered framework designed for real-time 3D object generation in Augmented Reality (AR) environments. By integrating a cutting-edge text-to-3D generative AI model, multilingual speech-to-text translation, and large language models (LLMs), the system enables seamless user interactions through spoken commands. The framework processes speech inputs, generates 3D objects, and provides object recommendations based on contextual understanding, enhancing AR experiences. A key feature of this framework is its ability to optimize 3D models by reducing mesh complexity, resulting in significantly smaller file sizes and faster processing on resource-constrained AR devices. Our approach addresses the challenges of high GPU usage, large model output sizes, and real-time system responsiveness, ensuring a smoother user experience. Moreover, the system is equipped with a pre-generated object repository, further reducing GPU load and improving efficiency. We demonstrate the practical applications of this framework in various fields such as education, design, and accessibility, and discuss future enhancements including image-to-3D conversion, environmental object detection, and multimodal support. The open-source nature of the framework promotes ongoing innovation and its utility across diverse industries. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering},
pubstate = {published},
tppubtype = {inproceedings}
}
Gao, H.; Xie, Y.; Kasneci, E.
PerVRML: ChatGPT-Driven Personalized VR Environments for Machine Learning Education Journal Article
In: International Journal of Human-Computer Interaction, 2025, ISSN: 10447318 (ISSN); 15327590 (ISSN), (Publisher: Taylor and Francis Ltd.).
Abstract | Links | BibTeX | Tags: Backpropagation, ChatGPT, Curricula, Educational robots, Immersive learning, Interactive learning, Language Model, Large language model, large language models, Learning mode, Machine learning education, Machine-learning, Personalized learning, Support vector machines, Teaching, Virtual Reality, Virtual-reality environment, Virtualization
@article{gao_pervrml_2025,
title = {PerVRML: ChatGPT-Driven Personalized VR Environments for Machine Learning Education},
author = {H. Gao and Y. Xie and E. Kasneci},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005776517&doi=10.1080%2F10447318.2025.2504188&partnerID=40&md5=27accdeba3e1e2202fc1102053d54b7c},
doi = {10.1080/10447318.2025.2504188},
issn = {10447318 (ISSN); 15327590 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Human-Computer Interaction},
abstract = {The advent of large language models (LLMs) such as ChatGPT has demonstrated significant potential for advancing educational technologies. Recently, growing interest has emerged in integrating ChatGPT with virtual reality (VR) to provide interactive and dynamic learning environments. This study explores the effectiveness of ChatGTP-driven VR in facilitating machine learning education through PerVRML. PerVRML incorporates a ChatGPT-powered avatar that provides real-time assistance and uses LLMs to personalize learning paths based on various sensor data from VR. A between-subjects design was employed to compare two learning modes: personalized and non-personalized. Quantitative data were collected from assessments, user experience surveys, and interaction metrics. The results indicate that while both learning modes supported learning effectively, ChatGPT-powered personalization significantly improved learning outcomes and had distinct impacts on user feedback. These findings underscore the potential of ChatGPT-enhanced VR to deliver adaptive and personalized educational experiences. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Taylor and Francis Ltd.},
keywords = {Backpropagation, ChatGPT, Curricula, Educational robots, Immersive learning, Interactive learning, Language Model, Large language model, large language models, Learning mode, Machine learning education, Machine-learning, Personalized learning, Support vector machines, Teaching, Virtual Reality, Virtual-reality environment, Virtualization},
pubstate = {published},
tppubtype = {article}
}
Lau, K. H. C.; Bozkir, E.; Gao, H.; Kasneci, E.
Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling Proceedings Article
In: A., Del Bue; C., Canton; J., Pont-Tuset; T., Tommasi (Ed.): Lect. Notes Comput. Sci., pp. 177–195, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303191571-0 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization
@inproceedings{lau_evaluating_2025,
title = {Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling},
author = {K. H. C. Lau and E. Bozkir and H. Gao and E. Kasneci},
editor = {Del Bue A. and Canton C. and Pont-Tuset J. and Tommasi T.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105006905979&doi=10.1007%2f978-3-031-91572-7_11&partnerID=40&md5=8a81fb09ff54e57b9429660a8898149a},
doi = {10.1007/978-3-031-91572-7_11},
isbn = {03029743 (ISSN); 978-303191571-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15628 LNCS},
pages = {177–195},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper explores the innovative application of Large Language Models (LLMs) in Virtual Reality (VR) environments to promote heritage education, focusing on traditional Scottish curling presented in the game “Scottish Bonspiel VR”. Our study compares the effectiveness of LLM-based chatbots with pre-defined scripted chatbots, evaluating key criteria such as usability, user engagement, and learning outcomes. The results show that LLM-based chatbots significantly improve interactivity and engagement, creating a more dynamic and immersive learning environment. This integration helps document and preserve cultural heritage and enhances dissemination processes, which are crucial for safeguarding intangible cultural heritage (ICH) amid environmental changes. Furthermore, the study highlights the potential of novel technologies in education to provide immersive experiences that foster a deeper appreciation of cultural heritage. These findings support the wider application of LLMs and VR in cultural education to address global challenges and promote sustainable practices to preserve and enhance cultural heritage. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Nygren, T.; Samuelsson, M.; Hansson, P. -O.; Efimova, E.; Bachelder, S.
In: International Journal of Artificial Intelligence in Education, 2025, ISSN: 15604306 (ISSN); 15604292 (ISSN), (Publisher: Springer).
Abstract | Links | BibTeX | Tags: AI-generated feedback, Controversial issue in social study education, Controversial issues in social studies education, Curricula, Domain knowledge, Economic and social effects, Expert systems, Generative AI, Human engineering, Knowledge engineering, Language Model, Large language model, large language models (LLMs), Mixed reality, Mixed reality simulation, Mixed reality simulation (MRS), Pedagogical content knowledge, Pedagogical content knowledge (PCK), Personnel training, Preservice teachers, Social studies education, Teacher training, Teacher training simulation, Teacher training simulations, Teaching, Training simulation
@article{nygren_ai_2025,
title = {AI Versus Human Feedback in Mixed Reality Simulations: Comparing LLM and Expert Mentoring in Preservice Teacher Education on Controversial Issues},
author = {T. Nygren and M. Samuelsson and P. -O. Hansson and E. Efimova and S. Bachelder},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007244772&doi=10.1007%2Fs40593-025-00484-8&partnerID=40&md5=3404a614af6fe4d4d2cb284060600e3c},
doi = {10.1007/s40593-025-00484-8},
issn = {15604306 (ISSN); 15604292 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Artificial Intelligence in Education},
abstract = {This study explores the potential role of AI-generated mentoring within simulated environments designed for teacher education, specifically focused on the challenges of teaching controversial issues. Using a mixed-methods approach, we empirically investigate the potential and challenges of AI-generated feedback compared to that provided by human experts when mentoring preservice teachers in the context of mixed reality simulations. Findings reveal that human experts offered more mixed and nuanced feedback than ChatGPT-4o and Perplexity, especially when identifying missed teaching opportunities and balancing classroom discussions. The AI models evaluated were publicly available pro versions of LLMs and were tested using detailed prompts and coding schemes aligned with educational theories. AI systems were not very good at identifying aspects of general, pedagogical or content knowledge based on Shulman’s theories but were still quite effective in generating feedback in line with human experts. The study highlights the promise of AI to enhance teacher training but underscores the importance of combining AI feedback with expert insights to address the complexities of real-world teaching. This research contributes to a growing understanding of AI's potential role and limitations in education. It suggests that, while AI can be valuable to scale mixed reality simulations, it should be carefully evaluated and balanced by human expertise in teacher education. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Springer},
keywords = {AI-generated feedback, Controversial issue in social study education, Controversial issues in social studies education, Curricula, Domain knowledge, Economic and social effects, Expert systems, Generative AI, Human engineering, Knowledge engineering, Language Model, Large language model, large language models (LLMs), Mixed reality, Mixed reality simulation, Mixed reality simulation (MRS), Pedagogical content knowledge, Pedagogical content knowledge (PCK), Personnel training, Preservice teachers, Social studies education, Teacher training, Teacher training simulation, Teacher training simulations, Teaching, Training simulation},
pubstate = {published},
tppubtype = {article}
}
Mereu, J.; Artizzu, V.; Carcangiu, A.; Spano, L. D.; Simeoli, L.; Mattioli, A.; Manca, M.; Santoro, C.; Paternò, F.
Empowering End-User in Creating eXtended Reality Content with a Conversational Chatbot Proceedings Article
In: L., Zaina; J.C., Campos; D., Spano; K., Luyten; P., Palanque; G., Veer; A., Ebert; S.R., Humayoun; V., Memmesheimer (Ed.): Lect. Notes Comput. Sci., pp. 126–137, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303191759-2 (ISBN).
Abstract | Links | BibTeX | Tags: Context, End-User Development, End-Users, Event condition action rules, Event-condition-action rules, Extended reality, Immersive authoring, Language Model, Large language model, Meta-design, multimodal input, Multimodal inputs, Virtualization
@inproceedings{mereu_empowering_2025,
title = {Empowering End-User in Creating eXtended Reality Content with a Conversational Chatbot},
author = {J. Mereu and V. Artizzu and A. Carcangiu and L. D. Spano and L. Simeoli and A. Mattioli and M. Manca and C. Santoro and F. Paternò},
editor = {Zaina L. and Campos J.C. and Spano D. and Luyten K. and Palanque P. and Veer G. and Ebert A. and Humayoun S.R. and Memmesheimer V.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007719800&doi=10.1007%2f978-3-031-91760-8_9&partnerID=40&md5=280b33b96bf2b250e515922072f92204},
doi = {10.1007/978-3-031-91760-8_9},
isbn = {03029743 (ISSN); 978-303191759-2 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15518 LNCS},
pages = {126–137},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Recent advancements in eXtended Reality (XR) technologies have found application across diverse domains. However, creating complex interactions within XR environments remains challenging for non-technical users. In this work, we present EUD4XR, a project aiming to: i) empower end-user developers (EUDevs) to customize XR environments by supporting virtual objects and physical devices; ii) involve an intelligent conversational agent which assists the user in defining behaviours. The agent can handle multimodal input, to drive the EUDev during the rule authoring process, using contextual knowledge of the virtual environment and its elements. By integrating conversational assistance, EUD4XR seeks to lower further the usage barriers for end-users to personalize XR experiences according to their needs. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Context, End-User Development, End-Users, Event condition action rules, Event-condition-action rules, Extended reality, Immersive authoring, Language Model, Large language model, Meta-design, multimodal input, Multimodal inputs, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Shoa, A.; Friedman, D.
Milo: an LLM-based virtual human open-source platform for extended reality Journal Article
In: Frontiers in Virtual Reality, vol. 6, 2025, ISSN: 26734192 (ISSN), (Publisher: Frontiers Media SA).
Abstract | Links | BibTeX | Tags: Large language model, open-source, Virtual agent, virtual human, Virtual Reality, XR
@article{shoa_milo_2025,
title = {Milo: an LLM-based virtual human open-source platform for extended reality},
author = {A. Shoa and D. Friedman},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008867438&doi=10.3389%2Ffrvir.2025.1555173&partnerID=40&md5=e9e59ae0063a978f2637f04f87352fe3},
doi = {10.3389/frvir.2025.1555173},
issn = {26734192 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Frontiers in Virtual Reality},
volume = {6},
abstract = {Large language models (LLMs) have made dramatic advancements in recent years, allowing for a new generation of dialogue agents. This allows for new types of social experiences with virtual humans, in both virtual and augmented reality. In this paper, we introduce an open-source system specifically designed for implementing LLM-based virtual humans within extended reality (XR) environments. Our system integrates into XR platforms, providing a robust framework for the creation and management of interactive virtual agents. We detail the design and architecture of the system and showcase the system’s versatility through various scenarios. In addition to a straightforward single-agent setup, we demonstrate how an LLM-based virtual human can attend a multi-user virtual reality (VR) meeting, enhance a VR self-talk session, and take part in an augmented reality (AR) live event. We provide lessons learned, with focus on the possibilities for human intervention during live events. We provide the system as open-source, inviting collaboration and innovation within the community, paving the way for new types of social experiences. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Frontiers Media SA},
keywords = {Large language model, open-source, Virtual agent, virtual human, Virtual Reality, XR},
pubstate = {published},
tppubtype = {article}
}
Carcangiu, A.; Manca, M.; Mereu, J.; Santoro, C.; Simeoli, L.; Spano, L. D.
Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation Proceedings Article
In: C., Santoro; A., Schmidt; M., Matera; A., Bellucci (Ed.): Lect. Notes Comput. Sci., pp. 59–79, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303195451-1 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces
@inproceedings{carcangiu_conversational_2025,
title = {Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation},
author = {A. Carcangiu and M. Manca and J. Mereu and C. Santoro and L. Simeoli and L. D. Spano},
editor = {Santoro C. and Schmidt A. and Matera M. and Bellucci A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105009012634&doi=10.1007%2f978-3-031-95452-8_4&partnerID=40&md5=67e2b8ca4bb2b508cd41548e3471705b},
doi = {10.1007/978-3-031-95452-8_4},
isbn = {03029743 (ISSN); 978-303195451-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15713 LNCS},
pages = {59–79},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Rule-based approaches allow users to customize XR environments. However, the current menu-based interfaces still create barriers for end-user developers. Chatbots based on Large Language Models (LLMs) have the potential to reduce the threshold needed for rule creation, but how users articulate their intentions through conversation remains under-explored. This work investigates how users express event-condition-action automation rules in Virtual Reality (VR) and Augmented Reality (AR) environments. Through two user studies, we show that the dialogues share consistent strategies across the interaction setting (keywords, difficulties in expressing conditions, task success), even if we registered different adaptations for each setting (verbal structure, event vs action first rules). Our findings are relevant for the design and implementation of chatbot-based support for expressing automations in an XR setting. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces},
pubstate = {published},
tppubtype = {inproceedings}
}
Ozeki, R.; Yonekura, H.; Rizk, H.; Yamaguchi, H.
Cellular-based Indoor Localization with Adapted LLM and Label-aware Contrastive Learning Proceedings Article
In: pp. 138–145, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331586461 (ISBN).
Abstract | Links | BibTeX | Tags: Cellular Network, Cellulars, Computer interaction, Contrastive Learning, Deep learning, Human computer interaction, Indoor Localization, Indoor Navigation, Indoor positioning, Indoor positioning systems, Language Model, Large language model, Learning systems, Mobile computing, Mobile-computing, Signal processing, Smart Environment, Wireless networks
@inproceedings{ozeki_cellular-based_2025,
title = {Cellular-based Indoor Localization with Adapted LLM and Label-aware Contrastive Learning},
author = {R. Ozeki and H. Yonekura and H. Rizk and H. Yamaguchi},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105010820397&doi=10.1109%2FSMARTCOMP65954.2025.00070&partnerID=40&md5=9e15d9f4225f00cd57bedc511aad27d9},
doi = {10.1109/SMARTCOMP65954.2025.00070},
isbn = {9798331586461 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {138–145},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Accurate indoor positioning is essential for mobile computing, human-computer interaction, and next-generation smart environments, enabling applications in indoor navigation, augmented reality, personalized services, healthcare, and emergency response. Cellular signal fingerprinting has emerged as a widely adopted solution, with deep learning models achieving state-of-the-art performance. However, existing approaches face critical deployment challenges, including labor-intensive fingerprinting, sparse reference points, and missing RSS values caused by environmental interference, hardware variability, and dynamic signal fluctuations. These limitations hinder their scalability, adaptability, and real-world usability in complex indoor environments. To address these challenges, we present GPT2Loc a novel indoor localization framework that integrates LLM with label-aware contrastive learning, improving accuracy while reducing reliance on extensive fingerprinting. LLMs effectively extract meaningful spatial features from incomplete and noisy RSS data, enabling robust localization even in sparsely finger-printed areas. Our label-aware contrastive learning approach further enhances generalization by aligning latent representations with spatial relationships, allowing GPT2Loc to interpolate user locations in unseen areas and mitigate signal inconsistencies. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Cellular Network, Cellulars, Computer interaction, Contrastive Learning, Deep learning, Human computer interaction, Indoor Localization, Indoor Navigation, Indoor positioning, Indoor positioning systems, Language Model, Large language model, Learning systems, Mobile computing, Mobile-computing, Signal processing, Smart Environment, Wireless networks},
pubstate = {published},
tppubtype = {inproceedings}
}
Su, Z.
Integrating digital twin and large language models for advanced tower crane monitoring Proceedings Article
In: pp. 1133–1137, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331532598 (ISBN).
Abstract | Links | BibTeX | Tags: Alarm systems, Anomaly detection, Behavioral Research, Cognitive Systems, Computer architecture, digital twin, Language Model, Large language model, Manual inspection, Micro services, Micro-service, Monitoring approach, Multi-modal, Operational safety, Real- time, Risk perception, Safety engineering, Three dimensional computer graphics, Tower Crane Monitoring, Tower cranes, Virtual Reality, Visualization
@inproceedings{su_integrating_2025,
title = {Integrating digital twin and large language models for advanced tower crane monitoring},
author = {Z. Su},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105010831175&doi=10.1109%2FEEICE65049.2025.11033896&partnerID=40&md5=cba8e7e255ee4c394b6c47a996977fc9},
doi = {10.1109/EEICE65049.2025.11033896},
isbn = {9798331532598 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {1133–1137},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Traditional monitoring approaches for tower crane operational safety primarily rely on manual inspections and univariate sensor threshold alarms, which exhibit significant limitations including delayed dynamic response and insufficient risk prediction capabilities, failing to meet real-time safety requirements in complex construction scenarios. To address these challenges, this study proposes an innovative intelligent monitoring system that integrates digital twin technology with multimodal large language models (MLLMs). The system first constructs a 3D digital twin of the crane using IoT-enabled digital twin technology, establishing multidimensional dynamic mapping between physical entities and virtual models to create a comprehensive digital representation encompassing mechanical structures, motion trajectories, and environmental parameters. Building upon this foundation, a multimodal MLLM-based analytical framework is designed to intelligently process surveillance video streams and identify potential safety hazards. The system employs a microservices architecture to develop a web-based visualization platform that integrates real-time situational awareness, abnormal behavior detection, operational status monitoring, and early warning functionalities. Experimental results demonstrate the system's capability to monitor crane operations in real time while effectively identifying potential risks and anomalies. The research contributes novel methodologies for digital twin construction, multimodal cognitive model architectures, and virtual-physical fusion warning mechanisms, providing both theoretical foundations and practical solutions for advancing safety management systems in construction sites. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Alarm systems, Anomaly detection, Behavioral Research, Cognitive Systems, Computer architecture, digital twin, Language Model, Large language model, Manual inspection, Micro services, Micro-service, Monitoring approach, Multi-modal, Operational safety, Real- time, Risk perception, Safety engineering, Three dimensional computer graphics, Tower Crane Monitoring, Tower cranes, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Yang, G.; Sun, Z.; Wang, Y.
ShellBox: Adversarially Enhanced LLM-Interactive Honeypot Framework Journal Article
In: IEEE Access, vol. 13, pp. 143618–143630, 2025, ISSN: 21693536 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: Complex networks, Dynamic error, Dynamic error simulation, Dynamics, Error simulation, Errors, honeypot, Honeypots, Interaction history, Interaction history pruning algorithm, Language Model, Large language model, Multi-turn, Network attack, Network attack and multi-turn, Network Security, Pruning algorithms, Systems analysis, Virtual Reality
@article{yang_shellbox_2025,
title = {ShellBox: Adversarially Enhanced LLM-Interactive Honeypot Framework},
author = {G. Yang and Z. Sun and Y. Wang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013277136&doi=10.1109%2FACCESS.2025.3598779&partnerID=40&md5=f776206ec2788fc77e3766f209bc82f1},
doi = {10.1109/ACCESS.2025.3598779},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {143618–143630},
abstract = {Honeypot technology is an active defence strategy designed to mitigate the asymmetry inherent in network attacks and defence dynamics. In recent years, honeypot systems powered by large language models (LLMs) have become a focal point of research owing to their ability to simulate complex network environments and generate highly deceptive virtual assets. However, response inconsistency in multi-turn dialogues and prompt injection vulnerabilities inherent to LLMs significantly reduce the deceptive capability of honeypots. This study first defines the threat model, and then introduces a relevance-based interaction history pruning algorithm and dynamic error simulation strategy to mitigate these challenges. Considering practical issues such as response latency and network instability, our experiments were conducted using multiple locally deployed open-source LLMs. The experimental results demonstrated that the proposed dynamic error simulation mechanism achieved a maximum accuracy of 81.63% for the DeepSeek-R1 model. Furthermore, applying the interaction history pruning algorithm improved the turn-level coherence score (TCS) by 34.5% compared with the baseline. Finally, this paper outlines potential future research directions for LLM-based honeypot technologies in active multi-turn mechanisms. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {Complex networks, Dynamic error, Dynamic error simulation, Dynamics, Error simulation, Errors, honeypot, Honeypots, Interaction history, Interaction history pruning algorithm, Language Model, Large language model, Multi-turn, Network attack, Network attack and multi-turn, Network Security, Pruning algorithms, Systems analysis, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}