AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Tortora, A.; Amaro, I.; Greca, A. Della; Barra, P.
Exploring the Role of Generative Artificial Intelligence in Virtual Reality: Opportunities and Future Perspectives Proceedings Article
In: J.Y.C., Chen; G., Fragomeni (Ed.): Lect. Notes Comput. Sci., pp. 125–142, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303193699-9 (ISBN).
Abstract | Links | BibTeX | Tags: Ethical technology, Future perspectives, Generative AI, Image modeling, Immersive, immersive experience, Immersive Experiences, Information Management, Language Model, Personnel training, Professional training, Real- time, Sensitive data, Training design, Users' experiences, Virtual Reality
@inproceedings{tortora_exploring_2025,
title = {Exploring the Role of Generative Artificial Intelligence in Virtual Reality: Opportunities and Future Perspectives},
author = {A. Tortora and I. Amaro and A. Della Greca and P. Barra},
editor = {Chen J.Y.C. and Fragomeni G.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007788684&doi=10.1007%2f978-3-031-93700-2_9&partnerID=40&md5=7b69183bbf8172f9595f939254fb6831},
doi = {10.1007/978-3-031-93700-2_9},
isbn = {03029743 (ISSN); 978-303193699-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15788 LNCS},
pages = {125–142},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {In recent years, generative AI, such as language and image models, have started to revolutionize virtual reality (VR) by offering new opportunities for immersive and personalized interaction. This paper explores the potential of these Intelligent Augmentation technologies in the context of VR, analyzing how the generation of text and images in real time can enhance the user experience through dynamic and personalized environments and contents. The integration of generative AI in VR scenarios holds promise in multiple fields, including education, professional training, design, and healthcare. However, their implementation involves significant challenges, such as privacy management, data security, and ethical issues related to cognitive manipulation and representation of reality. Through an overview of current applications and future prospects, this paper highlights the crucial role of generative AI in enhancing VR, helping to outline a path for the ethical and sustainable development of these immersive technologies. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Ethical technology, Future perspectives, Generative AI, Image modeling, Immersive, immersive experience, Immersive Experiences, Information Management, Language Model, Personnel training, Professional training, Real- time, Sensitive data, Training design, Users' experiences, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, K.; Mostajeran, F.; Rings, S.; Kruse, L.; Schmidt, S.; Arz, M.; Wolf, E.; Steinicke, F.
I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1648–1649, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows
@inproceedings{li_i_2025,
title = {I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication},
author = {K. Li and F. Mostajeran and S. Rings and L. Kruse and S. Schmidt and M. Arz and E. Wolf and F. Steinicke},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005146647&doi=10.1109%2fVRW66409.2025.00469&partnerID=40&md5=77e755f6a059f81e81c18987f58d00cc},
doi = {10.1109/VRW66409.2025.00469},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1648–1649},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {In this demo paper, we present an Extended Reality (XR) framework providing a streamlined workflow for creating and interacting with intelligent virtual agents (IVAs) with multimodal information processing capabilities using commercially available artificial intelligence (AI) tools and cloud services such as large language and vision models. The system supports (i) the integration of high-quality, customizable virtual 3D human models for visual representations of IVAs and (ii) multimodal communication with generative AI-driven IVAs in immersive XR, featuring realistic human behavior simulations. Our demo showcases the enormous potential and vast design space of embodied IVAs for various XR applications. © 2025 IEEE.},
keywords = {Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, Z.; Zhang, H.; Peng, C.; Peiris, R.
Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 1–11, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833153645-9 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment
@inproceedings{li_exploring_2025,
title = {Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios},
author = {Z. Li and H. Zhang and C. Peng and R. Peiris},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002706893&doi=10.1109%2fVR59515.2025.00025&partnerID=40&md5=60f22109e054c9035a0c2210bb797039},
doi = {10.1109/VR59515.2025.00025},
isbn = {979-833153645-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {1–11},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent research has begun adopting Large Language Model (LLM) agents to enhance Virtual Reality (VR) interactions, creating immersive chatbot experiences. However, while current studies focus on generating dialogue from user speech inputs, their abilities to generate richer experiences based on the perception of LLM agents' VR environments and interaction cues remain unexplored. Hence, in this work, we propose an approach that enables LLM agents to perceive virtual environments and generate environment-aware interactions and conversations for an embodied human-AI interaction experience in VR environments. Here, we define a schema for describing VR environments and their interactions through text prompts. We evaluate the performance of our method through five role-play scenarios created using our approach in a study with 14 participants. The findings discuss the opportunities and challenges of our proposed approach for developing environment-aware LLM agents that facilitate spatial interactions and conversations within VR role-play scenarios. © 2025 IEEE.},
keywords = {Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
Song, T.; Liu, Z.; Zhao, R.; Fu, J.
ElderEase AR: Enhancing Elderly Daily Living with the Multimodal Large Language Model and Augmented Reality Proceedings Article
In: ICVRT - Proc. Int. Conf. Virtual Real. Technol., pp. 60–67, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071018-6 (ISBN).
Abstract | Links | BibTeX | Tags: Age-related, Assisted living, Augmented Reality, Augmented reality technology, Daily Life Support, Daily living, Daily-life supports, Elderly, Elderly users, Independent living, Independent living systems, Language Model, Modeling languages, Multi agent systems, Multi-modal, Multimodal large language model
@inproceedings{song_elderease_2025,
title = {ElderEase AR: Enhancing Elderly Daily Living with the Multimodal Large Language Model and Augmented Reality},
author = {T. Song and Z. Liu and R. Zhao and J. Fu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001924899&doi=10.1145%2f3711496.3711505&partnerID=40&md5=4df693735547b505172657a73359f3ca},
doi = {10.1145/3711496.3711505},
isbn = {979-840071018-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {ICVRT - Proc. Int. Conf. Virtual Real. Technol.},
pages = {60–67},
publisher = {Association for Computing Machinery, Inc},
abstract = {Elderly individuals often face challenges in independent living due to age-related cognitive and physical decline. To address these issues, we propose an innovative Augmented Reality (AR) system, “ElderEase AR”, designed to assist elderly users in their daily lives by leveraging a Multimodal Large Language Model (MLLM). This system enables elderly users to capture images of their surroundings and ask related questions, providing context-aware feedback. We evaluated the system’s perceived ease-of-use and feasibility through a pilot study involving 30 elderly users, aiming to enhance their independence and quality of life. Our system integrates advanced AR technology with an intelligent agent trained on multimodal datasets. Through prompt engineering, the agent is tailored to respond in a manner that aligns with the speaking style of elderly users. Experimental results demonstrate high accuracy in object recognition and question answering, with positive feedback from user trials. Specifically, the system accurately identified objects in various environments and provided relevant answers to user queries. This study highlights the powerful potential of AR and AI technologies in creating support tools for the elderly. It suggests directions for future improvements and applications, such as enhancing the system’s adaptability to different user needs and expanding its functionality to cover more aspects of daily living. © 2024 Copyright held by the owner/author(s).},
keywords = {Age-related, Assisted living, Augmented Reality, Augmented reality technology, Daily Life Support, Daily living, Daily-life supports, Elderly, Elderly users, Independent living, Independent living systems, Language Model, Modeling languages, Multi agent systems, Multi-modal, Multimodal large language model},
pubstate = {published},
tppubtype = {inproceedings}
}
Song, T.; Pabst, F.; Eck, U.; Navab, N.
Enhancing Patient Acceptance of Robotic Ultrasound through Conversational Virtual Agent and Immersive Visualizations Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2901–2911, 2025, ISSN: 10772626 (ISSN).
Abstract | Links | BibTeX | Tags: 3D reconstruction, adult, Augmented Reality, Computer graphics, computer interface, echography, female, human, Humans, Imaging, Intelligent robots, Intelligent virtual agents, Language Model, male, Medical robotics, Middle Aged, Mixed reality, Patient Acceptance of Health Care, patient attitude, Patient comfort, procedures, Real-world, Reality visualization, Robotic Ultrasound, Robotics, Three-Dimensional, three-dimensional imaging, Trust and Acceptance, Ultrasonic applications, Ultrasonic equipment, Ultrasonography, Ultrasound probes, User-Computer Interface, Virtual agent, Virtual assistants, Virtual environments, Virtual Reality, Visual languages, Visualization, Young Adult
@article{song_enhancing_2025,
title = {Enhancing Patient Acceptance of Robotic Ultrasound through Conversational Virtual Agent and Immersive Visualizations},
author = {T. Song and F. Pabst and U. Eck and N. Navab},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003687673&doi=10.1109%2fTVCG.2025.3549181&partnerID=40&md5=1d46569933582ecf5e967f0794aafc07},
doi = {10.1109/TVCG.2025.3549181},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2901–2911},
abstract = {Robotic ultrasound systems have the potential to improve medical diagnostics, but patient acceptance remains a key challenge. To address this, we propose a novel system that combines an AI-based virtual agent, powered by a large language model (LLM), with three mixed reality visualizations aimed at enhancing patient comfort and trust. The LLM enables the virtual assistant to engage in natural, conversational dialogue with patients, answering questions in any format and offering real-time reassurance, creating a more intelligent and reliable interaction. The virtual assistant is animated as controlling the ultrasound probe, giving the impression that the robot is guided by the assistant. The first visualization employs augmented reality (AR), allowing patients to see the real world and the robot with the virtual avatar superimposed. The second visualization is an augmented virtuality (AV) environment, where the real-world body part being scanned is visible, while a 3D Gaussian Splatting reconstruction of the room, excluding the robot, forms the virtual environment. The third is a fully immersive virtual reality (VR) experience, featuring the same 3D reconstruction but entirely virtual, where the patient sees a virtual representation of their body being scanned in a robot-free environment. In this case, the virtual ultrasound probe, mirrors the movement of the probe controlled by the robot, creating a synchronized experience as it touches and moves over the patient's virtual body. We conducted a comprehensive agent-guided robotic ultrasound study with all participants, comparing these visualizations against a standard robotic ultrasound procedure. Results showed significant improvements in patient trust, acceptance, and comfort. Based on these findings, we offer insights into designing future mixed reality visualizations and virtual agents to further enhance patient comfort and acceptance in autonomous medical procedures. © 1995-2012 IEEE.},
keywords = {3D reconstruction, adult, Augmented Reality, Computer graphics, computer interface, echography, female, human, Humans, Imaging, Intelligent robots, Intelligent virtual agents, Language Model, male, Medical robotics, Middle Aged, Mixed reality, Patient Acceptance of Health Care, patient attitude, Patient comfort, procedures, Real-world, Reality visualization, Robotic Ultrasound, Robotics, Three-Dimensional, three-dimensional imaging, Trust and Acceptance, Ultrasonic applications, Ultrasonic equipment, Ultrasonography, Ultrasound probes, User-Computer Interface, Virtual agent, Virtual assistants, Virtual environments, Virtual Reality, Visual languages, Visualization, Young Adult},
pubstate = {published},
tppubtype = {article}
}
Coronado, A.; Carvalho, S. T.; Berretta, L.
See Through My Eyes: Using Multimodal Large Language Model for Describing Rendered Environments to Blind People Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 451–457, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Accessibility, Behavioral Research, Blind, Blind people, Helmet mounted displays, Human engineering, Human rehabilitation equipment, Interactive computer graphics, Interactive computer systems, Language Model, LLM, Multi-modal, Rendered environment, rendered environments, Spatial cognition, Virtual Reality, Vision aids, Visual impairment, Visual languages, Visually impaired people
@inproceedings{coronado_see_2025,
title = {See Through My Eyes: Using Multimodal Large Language Model for Describing Rendered Environments to Blind People},
author = {A. Coronado and S. T. Carvalho and L. Berretta},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007991842&doi=10.1145%2f3706370.3731641&partnerID=40&md5=2f7cb1535d39d5e59b1f43f773de3272},
doi = {10.1145/3706370.3731641},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {451–457},
publisher = {Association for Computing Machinery, Inc},
abstract = {Extended Reality (XR) is quickly expanding "as the next major technology wave in personal computing". Nevertheless, this expansion and adoption could also exclude certain disabled users, particularly people with visual impairment (VIP). According to the World Health Organization (WHO) in their 2019 publication, there were at least 2.2 billion people with visual impairment, a number that is also estimated to have increased in recent years. Therefore, it is important to include disabled users, especially visually impaired people, in the design of Head-Mounted Displays and Extended Reality environments. Indeed, this objective can be pursued by incorporating Multimodal Large Language Model (MLLM) technology, which can assist visually impaired people. As a case study, this study employs different prompts that result in environment descriptions from an MLLM integrated into a virtual reality (VR) escape room. Therefore, six potential prompts were engineered to generate valuable outputs for visually impaired users inside a VR environment. These outputs were evaluated using the G-Eval, and VIEScore metrics. Even though, the results show that the prompt patterns provided a description that aligns with the user's point of view, it is highly recommended to evaluate these outputs through "expected outputs"from Orientation and Mobility Specialists, and Sighted Guides. Furthermore, the subsequent step in the process is to evaluate these outputs by visually impaired people themselves to identify the most effective prompt pattern. © 2025 Copyright held by the owner/author(s).},
keywords = {Accessibility, Behavioral Research, Blind, Blind people, Helmet mounted displays, Human engineering, Human rehabilitation equipment, Interactive computer graphics, Interactive computer systems, Language Model, LLM, Multi-modal, Rendered environment, rendered environments, Spatial cognition, Virtual Reality, Vision aids, Visual impairment, Visual languages, Visually impaired people},
pubstate = {published},
tppubtype = {inproceedings}
}
Kai, W. -H.; Xing, K. -X.
Video-driven musical composition using large language model with memory-augmented state space Journal Article
In: Visual Computer, vol. 41, no. 5, pp. 3345–3357, 2025, ISSN: 01782789 (ISSN).
Abstract | Links | BibTeX | Tags: 'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space
@article{kai_video-driven_2025,
title = {Video-driven musical composition using large language model with memory-augmented state space},
author = {W. -H. Kai and K. -X. Xing},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001073242&doi=10.1007%2fs00371-024-03606-w&partnerID=40&md5=7ea24f13614a9a24caf418c37a10bd8c},
doi = {10.1007/s00371-024-03606-w},
issn = {01782789 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Visual Computer},
volume = {41},
number = {5},
pages = {3345–3357},
abstract = {The current landscape of research leveraging large language models (LLMs) is experiencing a surge. Many works harness the powerful reasoning capabilities of these models to comprehend various modalities, such as text, speech, images, videos, etc. However, the research work on LLms for music inspiration is still in its infancy. To fill the gap in this field and break through the dilemma that LLMs can only understand short videos with limited frames, we propose a large language model with state space for long-term video-to-music generation. To capture long-range dependency and maintaining high performance, while further decrease the computing cost, our overall network includes the Enhanced Video Mamba, which incorporates continuous moving window partitioning and local feature augmentation, and a long-term memory bank that captures and aggregates historical video information to mitigate information loss in long sequences. This framework achieves both subquadratic-time computation and near-linear memory complexity, enabling effective long-term video-to-music generation. We conduct a thorough evaluation of our proposed framework. The experimental results demonstrate that our model achieves or surpasses the performance of the current state-of-the-art models. Our code released on https://github.com/kai211233/S2L2-V2M. © The Author(s), under exclusive licence to Springer-Verlag GmbH Germany, part of Springer Nature 2024.},
keywords = {'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space},
pubstate = {published},
tppubtype = {article}
}
Zhu, X. T.; Cheerman, H.; Cheng, M.; Kiami, S. R.; Chukoskie, L.; McGivney, E.
Designing VR Simulation System for Clinical Communication Training with LLMs-Based Embodied Conversational Agents Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071395-8 (ISBN).
Abstract | Links | BibTeX | Tags: Clinical communications, Clinical Simulation, Communications training, Curricula, Embodied conversational agent, Embodied Conversational Agents, Health professions, Intelligent virtual agents, Language Model, Medical education, Model-based OPC, Patient simulators, Personnel training, Students, Teaching, User centered design, Virtual environments, Virtual Reality, VR simulation, VR simulation systems
@inproceedings{zhu_designing_2025,
title = {Designing VR Simulation System for Clinical Communication Training with LLMs-Based Embodied Conversational Agents},
author = {X. T. Zhu and H. Cheerman and M. Cheng and S. R. Kiami and L. Chukoskie and E. McGivney},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005754066&doi=10.1145%2f3706599.3719693&partnerID=40&md5=4468fbd54b43d6779259300afd08632e},
doi = {10.1145/3706599.3719693},
isbn = {979-840071395-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {VR simulation in Health Professions (HP) education demonstrates huge potential, but fixed learning content with little customization limits its application beyond lab environments. To address these limitations in the context of VR for patient communication training, we conducted a user-centered study involving semi-structured interviews with advanced HP students to understand their challenges in clinical communication training and perceptions of VR-based solutions. From this, we derived design insights emphasizing the importance of realistic scenarios, simple interactions, and unpredictable dialogues. Building on these insights, we developed the Virtual AI Patient Simulator (VAPS), a novel VR system powered by Large Language Models (LLMs) and Embodied Conversational Agents (ECAs), supporting dynamic and customizable patient interactions for immersive learning. We also provided an example of how clinical professors could use user-friendly design forms to create personalized scenarios that align with course objectives in VAPS and discuss future implications of integrating AI-driven technologies into VR education. © 2025 Copyright held by the owner/author(s).},
keywords = {Clinical communications, Clinical Simulation, Communications training, Curricula, Embodied conversational agent, Embodied Conversational Agents, Health professions, Intelligent virtual agents, Language Model, Medical education, Model-based OPC, Patient simulators, Personnel training, Students, Teaching, User centered design, Virtual environments, Virtual Reality, VR simulation, VR simulation systems},
pubstate = {published},
tppubtype = {inproceedings}
}
Sousa, R. T.; Oliveira, E. A. M.; Cintra, L. M. F.; Filho, A. R. G.
Transformative Technologies for Rehabilitation: Leveraging Immersive and AI-Driven Solutions to Reduce Recidivism and Promote Decent Work Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 168–171, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: AI- Driven Rehabilitation, Artificial intelligence- driven rehabilitation, Emotional intelligence, Engineering education, Generative AI, generative artificial intelligence, Immersive, Immersive technologies, Immersive Technology, Language Model, Large language model, large language models, Skills development, Social Reintegration, Social skills, Sociology, Vocational training
@inproceedings{sousa_transformative_2025,
title = {Transformative Technologies for Rehabilitation: Leveraging Immersive and AI-Driven Solutions to Reduce Recidivism and Promote Decent Work},
author = {R. T. Sousa and E. A. M. Oliveira and L. M. F. Cintra and A. R. G. Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005140551&doi=10.1109%2fVRW66409.2025.00042&partnerID=40&md5=89da6954863a272d48c0d8da3760bfb6},
doi = {10.1109/VRW66409.2025.00042},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {168–171},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The reintegration of incarcerated individuals into society presents significant challenges, particularly in addressing barriers related to vocational training, social skill development, and emotional rehabilitation. Immersive technologies, such as Virtual Reality and Augmented Reality, combined with generative Artificial Intelligence (AI) and Large Language Models, offer innovative opportunities to enhance these areas. These technologies create practical, controlled environments for skill acquisition and behavioral training, while generative AI enables dynamic, personalized, and adaptive experiences. This paper explores the broader potential of these integrated technologies in supporting rehabilitation, reducing recidivism, and fostering sustainable employment opportunities and these initiatives align with the overarching equity objective of ensuring Decent Work for All, reinforcing the commitment to inclusive and equitable progress across diverse communities, through the transformative potential of immersive and AI-driven systems in correctional systems. © 2025 IEEE.},
keywords = {AI- Driven Rehabilitation, Artificial intelligence- driven rehabilitation, Emotional intelligence, Engineering education, Generative AI, generative artificial intelligence, Immersive, Immersive technologies, Immersive Technology, Language Model, Large language model, large language models, Skills development, Social Reintegration, Social skills, Sociology, Vocational training},
pubstate = {published},
tppubtype = {inproceedings}
}
Huang, D.; Ge, M.; Xiang, K.; Zhang, X.; Yang, H.
Privacy Preservation of Large Language Models in the Metaverse Era: Research Frontiers, Categorical Comparisons, and Future Directions Proceedings Article
In: Int J Network Manage, John Wiley and Sons Ltd, 2025, ISBN: 10557148 (ISSN).
Abstract | Links | BibTeX | Tags: Adversarial networks, Computational Linguistics, Cryptography, Differential privacies, Excel, Language Model, Large language model, large language models, Life cycle, Metaverse, Metaverses, Natural language processing systems, Natural languages, Privacy preservation, Privacy protection, Research frontiers
@inproceedings{huang_privacy_2025,
title = {Privacy Preservation of Large Language Models in the Metaverse Era: Research Frontiers, Categorical Comparisons, and Future Directions},
author = {D. Huang and M. Ge and K. Xiang and X. Zhang and H. Yang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199980257&doi=10.1002%2fnem.2292&partnerID=40&md5=2dea1caa1d31aecde3d302a908fb7dd3},
doi = {10.1002/nem.2292},
isbn = {10557148 (ISSN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int J Network Manage},
volume = {35},
publisher = {John Wiley and Sons Ltd},
abstract = {Large language models (LLMs), with their billions to trillions of parameters, excel in natural language processing, machine translation, dialog systems, and text summarization. These capabilities are increasingly pivotal in the metaverse, where they can enhance virtual interactions and environments. However, their extensive use, particularly in the metaverse's immersive platforms, raises significant privacy concerns. This paper analyzes existing privacy issues in LLMs, vital for both traditional and metaverse applications, and examines protection techniques across the entire life cycle of these models, from training to user deployment. We delve into cryptography, embedding layer encoding, differential privacy and its variants, and adversarial networks, highlighting their relevance in the metaverse context. Specifically, we explore technologies like homomorphic encryption and secure multiparty computation, which are essential for metaverse security. Our discussion on Gaussian differential privacy, Renyi differential privacy, Edgeworth accounting, and the generation of adversarial samples and loss functions emphasizes their importance in the metaverse's dynamic and interactive environments. Lastly, the paper discusses the current research status and future challenges in the security of LLMs within and beyond the metaverse, emphasizing urgent problems and potential areas for exploration. © 2024 John Wiley & Sons Ltd.},
keywords = {Adversarial networks, Computational Linguistics, Cryptography, Differential privacies, Excel, Language Model, Large language model, large language models, Life cycle, Metaverse, Metaverses, Natural language processing systems, Natural languages, Privacy preservation, Privacy protection, Research frontiers},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Gračanin, D.
From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 150–155, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering
@inproceedings{behravan_voices_2025,
title = {From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality},
author = {M. Behravan and D. Gračanin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005153589&doi=10.1109%2fVRW66409.2025.00038&partnerID=40&md5=b8aaab4e2378cde3595d98d79266d371},
doi = {10.1109/VRW66409.2025.00038},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {150–155},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents Matrix, an advanced AI-powered framework designed for real-time 3D object generation in Augmented Reality (AR) environments. By integrating a cutting-edge text-to-3D generative AI model, multilingual speech-to-text translation, and large language models (LLMs), the system enables seamless user interactions through spoken commands. The framework processes speech inputs, generates 3D objects, and provides object recommendations based on contextual understanding, enhancing AR experiences. A key feature of this framework is its ability to optimize 3D models by reducing mesh complexity, resulting in significantly smaller file sizes and faster processing on resource-constrained AR devices. Our approach addresses the challenges of high GPU usage, large model output sizes, and real-time system responsiveness, ensuring a smoother user experience. Moreover, the system is equipped with a pre-generated object repository, further reducing GPU load and improving efficiency. We demonstrate the practical applications of this framework in various fields such as education, design, and accessibility, and discuss future enhancements including image-to-3D conversion, environmental object detection, and multimodal support. The open-source nature of the framework promotes ongoing innovation and its utility across diverse industries. © 2025 IEEE.},
keywords = {3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering},
pubstate = {published},
tppubtype = {inproceedings}
}
Sabir, A.; Hussain, R.; Pedro, A.; Park, C.
Personalized construction safety training system using conversational AI in virtual reality Journal Article
In: Automation in Construction, vol. 175, 2025, ISSN: 09265805 (ISSN).
Abstract | Links | BibTeX | Tags: Construction safety, Construction safety training, Conversational AI, Digital elevation model, Helmet mounted displays, Language Model, Large language model, large language models, Personalized safety training, Personnel training, Safety training, Training Systems, Virtual environments, Virtual Reality, Workers'
@article{sabir_personalized_2025,
title = {Personalized construction safety training system using conversational AI in virtual reality},
author = {A. Sabir and R. Hussain and A. Pedro and C. Park},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002741042&doi=10.1016%2fj.autcon.2025.106207&partnerID=40&md5=376284339bf10fd5d799cc56c6643d36},
doi = {10.1016/j.autcon.2025.106207},
issn = {09265805 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Automation in Construction},
volume = {175},
abstract = {Training workers in safety protocols is crucial for mitigating job site hazards, yet traditional methods often fall short. This paper explores integrating virtual reality (VR) and large language models (LLMs) into iSafeTrainer, an AI-powered safety training system. The system allows trainees to engage with trade-specific content tailored to their expertise level in a third-person perspective in a non-immersive desktop virtual environment, eliminating the need for head-mounted displays. An experimental study evaluated the system through qualitative, survey-based assessments, focusing on user satisfaction, experience, engagement, guidance, and confidence. Results showed high satisfaction rates (>85 %) among novice users, with improved safety knowledge. Expert users suggested advanced scenarios, highlighting the system's potential for expansion. The modular architecture supports customization across various construction settings, ensuring adaptability for future improvements. © 2024},
keywords = {Construction safety, Construction safety training, Conversational AI, Digital elevation model, Helmet mounted displays, Language Model, Large language model, large language models, Personalized safety training, Personnel training, Safety training, Training Systems, Virtual environments, Virtual Reality, Workers'},
pubstate = {published},
tppubtype = {article}
}
Lau, K. H. C.; Bozkir, E.; Gao, H.; Kasneci, E.
Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling Proceedings Article
In: A., Del Bue; C., Canton; J., Pont-Tuset; T., Tommasi (Ed.): Lect. Notes Comput. Sci., pp. 177–195, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303191571-0 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization
@inproceedings{lau_evaluating_2025,
title = {Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling},
author = {K. H. C. Lau and E. Bozkir and H. Gao and E. Kasneci},
editor = {Del Bue A. and Canton C. and Pont-Tuset J. and Tommasi T.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105006905979&doi=10.1007%2f978-3-031-91572-7_11&partnerID=40&md5=8a81fb09ff54e57b9429660a8898149a},
doi = {10.1007/978-3-031-91572-7_11},
isbn = {03029743 (ISSN); 978-303191571-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15628 LNCS},
pages = {177–195},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper explores the innovative application of Large Language Models (LLMs) in Virtual Reality (VR) environments to promote heritage education, focusing on traditional Scottish curling presented in the game “Scottish Bonspiel VR”. Our study compares the effectiveness of LLM-based chatbots with pre-defined scripted chatbots, evaluating key criteria such as usability, user engagement, and learning outcomes. The results show that LLM-based chatbots significantly improve interactivity and engagement, creating a more dynamic and immersive learning environment. This integration helps document and preserve cultural heritage and enhances dissemination processes, which are crucial for safeguarding intangible cultural heritage (ICH) amid environmental changes. Furthermore, the study highlights the potential of novel technologies in education to provide immersive experiences that foster a deeper appreciation of cultural heritage. These findings support the wider application of LLMs and VR in cultural education to address global challenges and promote sustainable practices to preserve and enhance cultural heritage. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Espinal, W. Y. Arevalo; Jimenez, J.; Corneo, L.
An eXtended Reality Data Transformation Framework for Internet of Things Devices Integration Proceedings Article
In: IoT - Proc. Int. Conf. Internet Things, pp. 10–18, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071285-2 (ISBN).
Abstract | Links | BibTeX | Tags: Application programs, Comprehensive evaluation, Data integration, Data Transformation, Device and Data Integration, Devices integration, Extended reality, Generative AI, Interactive objects, Internet of Things, Language Model, Software runtime, Time-consuming tasks
@inproceedings{arevalo_espinal_extended_2025,
title = {An eXtended Reality Data Transformation Framework for Internet of Things Devices Integration},
author = {W. Y. Arevalo Espinal and J. Jimenez and L. Corneo},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002862430&doi=10.1145%2f3703790.3703792&partnerID=40&md5=6ba7d70e00e3b0803149854b5744e55e},
doi = {10.1145/3703790.3703792},
isbn = {979-840071285-2 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IoT - Proc. Int. Conf. Internet Things},
pages = {10–18},
publisher = {Association for Computing Machinery, Inc},
abstract = {The multidisciplinary nature of XR applications makes device and data integration a resource-intensive and time-consuming task, especially in the context of the Internet of Things (IoT). This paper presents Visualize Interactive Objects, VIO for short, a data transformation framework aimed at simplifying visualization and interaction of IoT devices and their data into XR applications. VIO comprises a software runtime (VRT) running on XR headsets, and a JSON-based syntax for defining VIO Descriptions (VDs). The VRT interprets VDs to facilitate visualization and interaction within the application. By raising the level of abstraction, VIO enhances interoperability among XR experiences and enables developers to integrate IoT data with minimal coding effort. A comprehensive evaluation demonstrated that VIO is lightweight, incurring in negligible overhead compared to native implementations. Ten Large Language Models (LLM) were used to generate VDs and native source-code from user intents. The results showed that LLMs have superior syntactical and semantical accuracy in generating VDs compared to native XR application development code, thus indicating that the task of creating VDs can be effectively automated using LLMs. Additionally, a user study with 12 participants found that VIO is developer-friendly and easily extensible. © 2024 Copyright held by the owner/author(s).},
keywords = {Application programs, Comprehensive evaluation, Data integration, Data Transformation, Device and Data Integration, Devices integration, Extended reality, Generative AI, Interactive objects, Internet of Things, Language Model, Software runtime, Time-consuming tasks},
pubstate = {published},
tppubtype = {inproceedings}
}
Lakehal, A.; Alti, A.; Annane, B.
CORES: Context-Aware Emotion-Driven Recommendation System-Based LLM to Improve Virtual Shopping Experiences Journal Article
In: Future Internet, vol. 17, no. 2, 2025, ISSN: 19995903 (ISSN).
Abstract | Links | BibTeX | Tags: Context, Context-Aware, Customisation, Decisions makings, E- commerces, e-commerce, Emotion, emotions, Language Model, Large language model, LLM, Recommendation, Virtual environments, Virtual Reality, Virtual shopping
@article{lakehal_cores_2025,
title = {CORES: Context-Aware Emotion-Driven Recommendation System-Based LLM to Improve Virtual Shopping Experiences},
author = {A. Lakehal and A. Alti and B. Annane},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85218626299&doi=10.3390%2ffi17020094&partnerID=40&md5=a0f68e273de08b2c33d03da4cb6c19bb},
doi = {10.3390/fi17020094},
issn = {19995903 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Future Internet},
volume = {17},
number = {2},
abstract = {In today’s business landscape, artificial intelligence (AI) plays a pivotal role in shopping processes and customization. As the demand for customization grows, virtual reality (VR) emerges as an innovative solution to improve users’ perception and decision making in virtual shopping experiences (VSEs). Despite its potential, limited research has explored the integration of contextual information and emotions in VR to deliver effective product recommendations. This paper presents CORES (context-aware emotion-driven recommendation system), a novel approach designed to enrich users’ experiences and to support decision making in VR. CORES combines advanced large language models (LLMs) and embedding-based context-aware recommendation strategies to provide customized products. Therefore, emotions are collected from social platforms, and relevant contextual information is matched to enable effective recommendation. Additionally, CORES leverages transformers and retrieval-augmented generation (RAG) capabilities to explain recommended items, facilitate VR visualization, and generate insights using various prompt templates. CORES is applied to a VR shop of different items. An empirical study validates the efficiency and accuracy of this approach, achieving a significant average accuracy of 97% and an acceptable response time of 0.3267s in dynamic shopping scenarios. © 2025 by the authors.},
keywords = {Context, Context-Aware, Customisation, Decisions makings, E- commerces, e-commerce, Emotion, emotions, Language Model, Large language model, LLM, Recommendation, Virtual environments, Virtual Reality, Virtual shopping},
pubstate = {published},
tppubtype = {article}
}
Graziano, M.; Cante, L. Colucci; Martino, B. Di
Deploying Large Language Model on Cloud-Edge Architectures: A Case Study for Conversational Historical Characters Book Section
In: Lecture Notes on Data Engineering and Communications Technologies, vol. 250, pp. 196–205, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 23674512 (ISSN).
Abstract | Links | BibTeX | Tags: Agent based, Augmented Reality, Case-studies, Chatbots, Cloud computing architecture, Conversational Agents, EDGE architectures, Historical characters, Language Model, Modeling languages, Real time performance, WEB application, Web applications, Work analysis
@incollection{graziano_deploying_2025,
title = {Deploying Large Language Model on Cloud-Edge Architectures: A Case Study for Conversational Historical Characters},
author = {M. Graziano and L. Colucci Cante and B. Di Martino},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002995405&doi=10.1007%2f978-3-031-87778-0_19&partnerID=40&md5=c54e9ce66901050a05de68602e4a8266},
doi = {10.1007/978-3-031-87778-0_19},
isbn = {23674512 (ISSN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lecture Notes on Data Engineering and Communications Technologies},
volume = {250},
pages = {196–205},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This work analyzes the deployment of conversational agents based on large language models (LLMs) in cloud-edge architectures, placing emphasis on scalability, efficiency and real-time performance. Through a case study, we present a web application that allows users to interact with an augmented reality avatar that impersonates a historical character. The agent, powered by an LLM delivers immersive and contextually coherent dialogues. We discuss the solutions adopted to manage latency and distribute the computational load between the cloud, which takes care of language processing, and the edge nodes, ensuring a smooth user experience. The results obtained demonstrate how accurate design can optimize the use of LLMs in distributed environments, offering advanced and high-performance interactions even in applications with high reactivity and customization requirements. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Agent based, Augmented Reality, Case-studies, Chatbots, Cloud computing architecture, Conversational Agents, EDGE architectures, Historical characters, Language Model, Modeling languages, Real time performance, WEB application, Web applications, Work analysis},
pubstate = {published},
tppubtype = {incollection}
}
Gatti, E.; Giunchi, D.; Numan, N.; Steed, A.
Around the Virtual Campfire: Early UX Insights into AI-Generated Stories in VR Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 136–141, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: Generative AI, Images synthesis, Immersive, Interactive Environments, Language Model, Large language model, Storytelling, User input, User study, Users' experiences, Virtual environments, VR
@inproceedings{gatti_around_2025,
title = {Around the Virtual Campfire: Early UX Insights into AI-Generated Stories in VR},
author = {E. Gatti and D. Giunchi and N. Numan and A. Steed},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000263662&doi=10.1109%2fAIxVR63409.2025.00027&partnerID=40&md5=cd804d892d45554e936d0221508b3447},
doi = {10.1109/AIxVR63409.2025.00027},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {136–141},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual Reality (VR) presents an immersive platform for storytelling, allowing narratives to unfold in highly engaging, interactive environments. Leveraging AI capabilities and image synthesis offers new possibilities for creating scalable, generative VR content. In this work, we use an LLM-driven VR storytelling platform to explore how AI-generated visuals and narrative elements impact the user experience in VR storytelling. Previously, we presented AIsop, a system to integrate LLM-generated text and images and TTS audio into a storytelling experience, where the narrative unfolds based on user input. In this paper, we present two user studies focusing on how AI-generated visuals influence narrative perception and the overall VR experience. Our findings highlight the positive impact of AI-generated pictorial content on the storytelling experience, highlighting areas for enhancement and further research in interactive narrative design. © 2025 IEEE.},
keywords = {Generative AI, Images synthesis, Immersive, Interactive Environments, Language Model, Large language model, Storytelling, User input, User study, Users' experiences, Virtual environments, VR},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Matković, K.; Gračanin, D.
Generative AI for Context-Aware 3D Object Creation Using Vision-Language Models in Augmented Reality Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 73–81, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: 3D object, 3D Object Generation, Artificial intelligence systems, Augmented Reality, Capture images, Context-Aware, Generative adversarial networks, Generative AI, generative artificial intelligence, Generative model, Language Model, Object creation, Vision language model, vision language models, Visual languages
@inproceedings{behravan_generative_2025,
title = {Generative AI for Context-Aware 3D Object Creation Using Vision-Language Models in Augmented Reality},
author = {M. Behravan and K. Matković and D. Gračanin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000292700&doi=10.1109%2fAIxVR63409.2025.00018&partnerID=40&md5=b40fa769a6b427918c3fcd86f7c52a75},
doi = {10.1109/AIxVR63409.2025.00018},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {73–81},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {We present a novel Artificial Intelligence (AI) system that functions as a designer assistant in augmented reality (AR) environments. Leveraging Vision Language Models (VLMs) like LLaVA and advanced text-to-3D generative models, users can capture images of their surroundings with an Augmented Reality (AR) headset. The system analyzes these images to recommend contextually relevant objects that enhance both functionality and visual appeal. The recommended objects are generated as 3D models and seamlessly integrated into the AR environment for interactive use. Our system utilizes open-source AI models running on local systems to enhance data security and reduce operational costs. Key features include context-aware object suggestions, optimal placement guidance, aesthetic matching, and an intuitive user interface for real-time interaction. Evaluations using the COCO 2017 dataset and real-world AR testing demonstrated high accuracy in object detection and contextual fit rating of 4.1 out of 5. By addressing the challenge of providing context-aware object recommendations in AR, our system expands the capabilities of AI applications in this domain. It enables users to create personalized digital spaces efficiently, leveraging AI for contextually relevant suggestions. © 2025 IEEE.},
keywords = {3D object, 3D Object Generation, Artificial intelligence systems, Augmented Reality, Capture images, Context-Aware, Generative adversarial networks, Generative AI, generative artificial intelligence, Generative model, Language Model, Object creation, Vision language model, vision language models, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Guo, H.; Liu, Z.; Tang, C.; Zhang, X.
An Interactive Framework for Personalized Navigation Based on Metacosmic Cultural Tourism and Large Model Fine-Tuning Journal Article
In: IEEE Access, vol. 13, pp. 81450–81461, 2025, ISSN: 21693536 (ISSN).
Abstract | Links | BibTeX | Tags: Cultural informations, Digital Cultural Heritage, Digital cultural heritages, Digital guide, Fine tuning, fine-tuning, Historical monuments, Language Model, Large language model, Leisure, Metacosmic cultural tourism, Multimodal Interaction, Tourism, Virtual tour
@article{guo_interactive_2025,
title = {An Interactive Framework for Personalized Navigation Based on Metacosmic Cultural Tourism and Large Model Fine-Tuning},
author = {H. Guo and Z. Liu and C. Tang and X. Zhang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105004059236&doi=10.1109%2fACCESS.2025.3565359&partnerID=40&md5=45d328831c5795fa31e7e033299912b5},
doi = {10.1109/ACCESS.2025.3565359},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {81450–81461},
abstract = {With the wide application of large language models (LLMs) and the rapid growth of metaverse tourism demand, the digital tour and personalized interaction of historical sites have become the key to improving users’ digital travel experience. Creating an environment where users can access rich cultural information and enjoy personalized, immersive experiences is a crucial issue in the field of digital cultural travel. To this end, we propose a tourism information multimodal generation personalized question-answering interactive framework TIGMI (Tourism Information Generation and Multimodal Interaction) based on LLM fine-tuning, which aims to provide a richer and more in-depth experience for virtual tours of historical monuments. Taking Qutan Temple as an example, the framework combines LLM, retrieval augmented generation (RAG), and auto-prompting engineering techniques to retrieve accurate information related to the historical monument from external knowledge bases and seamlessly integrates it into the generated content. This integration mechanism ensures the accuracy and relevance of the generated answers. Through TIGMI’s LLM-driven command interaction mechanism in the 3D digital scene of Qutan Temple, users are able to interact with the building and scene environment in a personalized and real-time manner, successfully integrating historical and cultural information with modern digital technology. This integration significantly enhances the naturalness of interaction and personalizes the user experience, thereby improving user immersion and information acquisition efficiency. Evaluation results show that TIGMI excels in question-answering and multimodal interactions, significantly enhancing the depth and breadth of services provided by the personalized virtual tour. We conclude by addressing the limitations of TIGMI and briefly discuss how future research will focus on further improving the accuracy and user satisfaction of the generated content to adapt to the dynamically changing tourism environment. © 2013 IEEE.},
keywords = {Cultural informations, Digital Cultural Heritage, Digital cultural heritages, Digital guide, Fine tuning, fine-tuning, Historical monuments, Language Model, Large language model, Leisure, Metacosmic cultural tourism, Multimodal Interaction, Tourism, Virtual tour},
pubstate = {published},
tppubtype = {article}
}
Oliveira, E. A. Masasi De; Sousa, R. T.; Bastos, A. A.; Cintra, L. Martins De Freitas; Filho, A. R. G.
Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 437–440, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages
@inproceedings{masasi_de_oliveira_immersive_2025,
title = {Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation},
author = {E. A. Masasi De Oliveira and R. T. Sousa and A. A. Bastos and L. Martins De Freitas Cintra and A. R. G. Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007979183&doi=10.1145%2f3706370.3731643&partnerID=40&md5=db10b41217dd8a0b0705c3fb4a615666},
doi = {10.1145/3706370.3731643},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {437–440},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual Reality has significantly expanded possibilities for immersive museum experiences, overcoming traditional constraints such as space, preservation, and geographic limitations. However, existing virtual museum platforms typically lack dynamic, personalized, and contextually accurate interactions. To address this, we propose Spatially-Aware Retrieval-Augmented Generation (SA-RAG), an innovative framework integrating visual attention tracking with Retrieval-Augmented Generation systems and advanced Large Language Models. By capturing users' visual attention in real time, SA-RAG dynamically retrieves contextually relevant data, enhancing the accuracy, personalization, and depth of user interactions within immersive virtual environments. The system's effectiveness is initially demonstrated through our preliminary tests within a realistic VR museum implemented using Unreal Engine. Although promising, comprehensive human evaluations involving broader user groups are planned for future studies to rigorously validate SA-RAG's effectiveness, educational enrichment potential, and accessibility improvements in virtual museums. The framework also presents opportunities for broader applications in immersive educational and storytelling domains. © 2025 Copyright held by the owner/author(s).},
keywords = {Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Angelopoulos, J.; Manettas, C.; Alexopoulos, K.
Industrial Maintenance Optimization Based on the Integration of Large Language Models (LLM) and Augmented Reality (AR) Proceedings Article
In: K., Alexopoulos; S., Makris; P., Stavropoulos (Ed.): Lect. Notes Mech. Eng., pp. 197–205, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 21954356 (ISSN); 978-303186488-9 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Competition, Cost reduction, Critical path analysis, Crushed stone plants, Generative AI, generative artificial intelligence, Human expertise, Industrial equipment, Industrial maintenance, Language Model, Large language model, Maintenance, Maintenance optimization, Maintenance procedures, Manufacturing data processing, Potential errors, Problem oriented languages, Scheduled maintenance, Shopfloors, Solar power plants
@inproceedings{angelopoulos_industrial_2025,
title = {Industrial Maintenance Optimization Based on the Integration of Large Language Models (LLM) and Augmented Reality (AR)},
author = {J. Angelopoulos and C. Manettas and K. Alexopoulos},
editor = {Alexopoulos K. and Makris S. and Stavropoulos P.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001421726&doi=10.1007%2f978-3-031-86489-6_20&partnerID=40&md5=63be31b9f4dda4aafd6a641630506c09},
doi = {10.1007/978-3-031-86489-6_20},
isbn = {21954356 (ISSN); 978-303186488-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Mech. Eng.},
pages = {197–205},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Traditional maintenance procedures often rely on manual data processing and human expertise, leading to inefficiencies and potential errors. In the context of Industry 4.0 several digital technologies, such as Artificial Intelligence (AI), Big Data Analytics (BDA), and eXtended Reality (XR) have been developed and are constantly being integrated in a plethora of manufacturing activities (including industrial maintenance), in an attempt to minimize human error, facilitate shop floor technicians, reduce costs as well as reduce equipment downtimes. The latest developments in the field of AI point towards Large Language Models (LLM) which can communicate with human operators in an intuitive manner. On the other hand, Augmented Reality, as part of XR technologies, offers useful functionalities for improving user perception and interaction with modern, complex industrial equipment. Therefore, the context of this research work lies in the development and training of an LLM in order to provide suggestions and actionable items for the mitigation of unforeseen events (e.g. equipment breakdowns), in order to facilitate shop-floor technicians during their everyday tasks. Paired with AR visualizations over the physical environment, the technicians will get instructions for performing tasks and checks on the industrial equipment in a manner similar to human-to-human communication. The functionality of the proposed framework extends to the integration of modules for exchanging information with the engineering department towards the scheduling of Maintenance and Repair Operations (MRO) as well as the creation of a repository of historical data in order to constantly retrain and optimize the LLM. © The Author(s) 2025.},
keywords = {Augmented Reality, Competition, Cost reduction, Critical path analysis, Crushed stone plants, Generative AI, generative artificial intelligence, Human expertise, Industrial equipment, Industrial maintenance, Language Model, Large language model, Maintenance, Maintenance optimization, Maintenance procedures, Manufacturing data processing, Potential errors, Problem oriented languages, Scheduled maintenance, Shopfloors, Solar power plants},
pubstate = {published},
tppubtype = {inproceedings}
}
Xu, F.; Zhou, T.; Nguyen, T.; Bao, H.; Lin, C.; Du, J.
Integrating augmented reality and LLM for enhanced cognitive support in critical audio communications Journal Article
In: International Journal of Human Computer Studies, vol. 194, 2025, ISSN: 10715819 (ISSN).
Abstract | Links | BibTeX | Tags: Audio communications, Augmented Reality, Cognitive loads, Cognitive support, Decisions makings, Language Model, Large language model, LLM, Logic reasoning, Maintenance, Operations and maintenance, Oral communication, Situational awareness
@article{xu_integrating_2025,
title = {Integrating augmented reality and LLM for enhanced cognitive support in critical audio communications},
author = {F. Xu and T. Zhou and T. Nguyen and H. Bao and C. Lin and J. Du},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85208467299&doi=10.1016%2fj.ijhcs.2024.103402&partnerID=40&md5=153d095b837ee1666a7da0f7ed03362c},
doi = {10.1016/j.ijhcs.2024.103402},
issn = {10715819 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Human Computer Studies},
volume = {194},
abstract = {Operation and Maintenance (O&M) missions are often time-sensitive and accuracy-dependent, requiring rapid and precise information processing in noisy, chaotic environments where oral communication can lead to cognitive overload and impaired decision-making. Augmented Reality (AR) and Large Language Models (LLMs) offer potential for enhancing situational awareness and lowering cognitive load by integrating digital visualizations with the physical world and improving dialogue management. However, synthesizing these technologies into a real-time system that effectively aids operators remains a challenge. This study explores the integration of AR and GPT-4, an advanced LLM, in time-sensitive O&M tasks, aiming to enhance situational awareness and manage cognitive load during oral communications. A customized AR system, incorporating the Microsoft HoloLens2 for cognitive monitoring and GPT-4 for decision making assistance, was tested in a human subject experiment with 30 participants. The 2×2 factorial experiment evaluated the effects of AR and LLM assistance on task performance and cognitive load. Results demonstrated significant improvements in task accuracy and reductions in cognitive load, highlighting the effectiveness of AR and LLM integration in supporting O&M missions. These findings emphasize the need for further research to optimize operational strategies in mission critical environments. © 2024 Elsevier Ltd},
keywords = {Audio communications, Augmented Reality, Cognitive loads, Cognitive support, Decisions makings, Language Model, Large language model, LLM, Logic reasoning, Maintenance, Operations and maintenance, Oral communication, Situational awareness},
pubstate = {published},
tppubtype = {article}
}
Mao, H.; Xu, Z.; Wei, S.; Quan, Y.; Deng, N.; Yang, X.
LLM-powered Gaussian Splatting in VR interactions Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1654–1655, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Gaussian Splatting, 3D reconstruction, Content creation, Digital elevation model, Gaussians, High quality, Language Model, material analysis, Materials analysis, Physical simulation, Quality rendering, Rendering (computer graphics), Splatting, Virtual Reality, Volume Rendering, VR systems
@inproceedings{mao_llm-powered_2025,
title = {LLM-powered Gaussian Splatting in VR interactions},
author = {H. Mao and Z. Xu and S. Wei and Y. Quan and N. Deng and X. Yang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005148017&doi=10.1109%2fVRW66409.2025.00472&partnerID=40&md5=ee725f655a37251ff335ad2098d15f22},
doi = {10.1109/VRW66409.2025.00472},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1654–1655},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent advances in radiance field rendering, particularly 3D Gaussian Splatting (3DGS), have demonstrated significant potential for VR content creation, offering both high-quality rendering and an efficient production pipeline. However, current physics-based interaction systems for 3DGS are limited to either simplistic, unrealistic simulations or require substantial user input for complex scenes, largely due to the lack of scene comprehension. In this demonstration, we present a highly realistic interactive VR system powered by large language models (LLMs). After object-aware GS reconstruction, we prompt GPT-4o to analyze the physical properties of objects in the scene, which then guide physical simulations that adhere to real-world phenomena. Additionally, We design a GPT-assisted GS inpainting module to complete the areas occluded by manipulated objects. To facilitate rich interaction, we introduce a computationally efficient physical simulation framework through a PBD-based unified interpolation method, which supports various forms of physical interactions. In our research demonstrations, we reconstruct varieties of scenes enhanced by LLM's understanding, showcasing how our VR system can support complex, realistic interactions without additional manual design or annotation. © 2025 IEEE.},
keywords = {3D Gaussian Splatting, 3D reconstruction, Content creation, Digital elevation model, Gaussians, High quality, Language Model, material analysis, Materials analysis, Physical simulation, Quality rendering, Rendering (computer graphics), Splatting, Virtual Reality, Volume Rendering, VR systems},
pubstate = {published},
tppubtype = {inproceedings}
}
Aloudat, M. Z.; Aboumadi, A.; Soliman, A.; Al-Mohammed, H. A.; Al-Ali, M.; Mahgoub, A.; Barhamgi, M.; Yaacoub, E.
Metaverse Unbound: A Survey on Synergistic Integration Between Semantic Communication, 6G, and Edge Learning Journal Article
In: IEEE Access, vol. 13, pp. 58302–58350, 2025, ISSN: 21693536 (ISSN).
Abstract | Links | BibTeX | Tags: 6g wireless system, 6G wireless systems, Augmented Reality, Block-chain, Blockchain, Blockchain technology, Digital Twin Technology, Edge learning, Extended reality (XR), Language Model, Large language model, large language models (LLMs), Metaverse, Metaverses, Semantic communication, Virtual environments, Wireless systems
@article{aloudat_metaverse_2025,
title = {Metaverse Unbound: A Survey on Synergistic Integration Between Semantic Communication, 6G, and Edge Learning},
author = {M. Z. Aloudat and A. Aboumadi and A. Soliman and H. A. Al-Mohammed and M. Al-Ali and A. Mahgoub and M. Barhamgi and E. Yaacoub},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003088610&doi=10.1109%2fACCESS.2025.3555753&partnerID=40&md5=8f3f9421ce2d6be57f8154a122ee192c},
doi = {10.1109/ACCESS.2025.3555753},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {58302–58350},
abstract = {With a focus on edge learning, blockchain, sixth generation (6G) wireless systems, semantic communication, and large language models (LLMs), this survey paper examines the revolutionary integration of cutting-edge technologies within the metaverse. This thorough examination highlights the critical role these technologies play in improving realism and user engagement on three main levels: technical, virtual, and physical. While the virtual layer focuses on building immersive experiences, the physical layer highlights improvements to the user interface through augmented reality (AR) goggles and virtual reality (VR) headsets. Blockchain-powered technical layer enables safe, decentralized communication. The survey highlights how the metaverse has the potential to drastically change how people interact in society by exploring applications in a variety of fields, such as immersive education, remote work, and entertainment. Concerns about privacy, scalability, and interoperability are raised, highlighting the necessity of continued study to realize the full potential of the metaverse. For scholars looking to broaden the reach and significance of the metaverse in the digital age, this paper is a useful tool. © 2013 IEEE.},
keywords = {6g wireless system, 6G wireless systems, Augmented Reality, Block-chain, Blockchain, Blockchain technology, Digital Twin Technology, Edge learning, Extended reality (XR), Language Model, Large language model, large language models (LLMs), Metaverse, Metaverses, Semantic communication, Virtual environments, Wireless systems},
pubstate = {published},
tppubtype = {article}
}
Zhang, G.; Wang, Y.; Luo, C.; Xu, S.; Ming, Y.; Peng, J.; Zhang, M.
Visual Harmony: LLM’s Power in Crafting Coherent Indoor Scenes from Images Proceedings Article
In: Z., Lin; H., Zha; M.-M., Cheng; R., He; C.-L., Liu; K., Ubul; W., Silamu; J., Zhou (Ed.): Lect. Notes Comput. Sci., pp. 3–17, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-981978507-0 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Depth perception, Indoor scene generation, Input image, Language Model, Large language model, Metaverses, Point-clouds, Power, Scene completion, Scene Generation, Scene-graphs, Semantic Segmentation, Semantics, Virtual Reality, Visual languages
@inproceedings{zhang_visual_2025,
title = {Visual Harmony: LLM’s Power in Crafting Coherent Indoor Scenes from Images},
author = {G. Zhang and Y. Wang and C. Luo and S. Xu and Y. Ming and J. Peng and M. Zhang},
editor = {Lin Z. and Zha H. and Cheng M.-M. and He R. and Liu C.-L. and Ubul K. and Silamu W. and Zhou J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85209374797&doi=10.1007%2f978-981-97-8508-7_1&partnerID=40&md5=5231ab0bce95fb3f09db80392acd58ff},
doi = {10.1007/978-981-97-8508-7_1},
isbn = {03029743 (ISSN); 978-981978507-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15036 LNCS},
pages = {3–17},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Indoor scene generation has recently attracted significant attention as it is crucial for metaverse, 3D animation, visual effects in movies, and virtual/augmented reality. Existing image-based indoor scene generation methods often produce scenes that are not realistic enough, with issues such as floating objects, incorrect object orientations, and incomplete scenes that only include the part of the scenes captured by the input image. To address these challenges, we propose Visual Harmony, a method that leverages the powerful spatial imagination capabilities of Large Language Model (LLM) to generate corresponding indoor scenes based on the input image. Specifically, we first extract information from the input image through depth estimation and panorama segmentation, reconstructing a semantic point cloud. Using this reconstructed semantic point cloud, we extract a scene graph that describes only the objects in the image. Then we leverage the strong spatial imagination capabilities of LLM to complete the scene graph, forming a representation of a complete room scene. Based on this fine scene graph, we can generate entire indoor scene that includes both the captured and not captured parts of the input image. Extensive experiments demonstrate that our method can generate realistic, plausible, and highly relevant complete indoor scenes related to the input image. © The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.},
keywords = {Augmented Reality, Depth perception, Indoor scene generation, Input image, Language Model, Large language model, Metaverses, Point-clouds, Power, Scene completion, Scene Generation, Scene-graphs, Semantic Segmentation, Semantics, Virtual Reality, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}