AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Li, Z.; Zhang, H.; Peng, C.; Peiris, R.
Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 1–11, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833153645-9 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment
@inproceedings{li_exploring_2025,
title = {Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios},
author = {Z. Li and H. Zhang and C. Peng and R. Peiris},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002706893&doi=10.1109%2fVR59515.2025.00025&partnerID=40&md5=60f22109e054c9035a0c2210bb797039},
doi = {10.1109/VR59515.2025.00025},
isbn = {979-833153645-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {1–11},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent research has begun adopting Large Language Model (LLM) agents to enhance Virtual Reality (VR) interactions, creating immersive chatbot experiences. However, while current studies focus on generating dialogue from user speech inputs, their abilities to generate richer experiences based on the perception of LLM agents' VR environments and interaction cues remain unexplored. Hence, in this work, we propose an approach that enables LLM agents to perceive virtual environments and generate environment-aware interactions and conversations for an embodied human-AI interaction experience in VR environments. Here, we define a schema for describing VR environments and their interactions through text prompts. We evaluate the performance of our method through five role-play scenarios created using our approach in a study with 14 participants. The findings discuss the opportunities and challenges of our proposed approach for developing environment-aware LLM agents that facilitate spatial interactions and conversations within VR role-play scenarios. © 2025 IEEE.},
keywords = {Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
Nasser, M.; Gaglio, G. F.; Seidita, V.; Chella, A.
The Art of Replication: Lifelike Avatars with Personalized Conversational Style Journal Article
In: Robotics, vol. 14, no. 3, 2025, ISSN: 22186581 (ISSN).
Abstract | Links | BibTeX | Tags: Avatar, large language models, Metaverse
@article{nasser_art_2025,
title = {The Art of Replication: Lifelike Avatars with Personalized Conversational Style},
author = {M. Nasser and G. F. Gaglio and V. Seidita and A. Chella},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001408683&doi=10.3390%2frobotics14030033&partnerID=40&md5=126c37258886f5c7ac65dc828f41b747},
doi = {10.3390/robotics14030033},
issn = {22186581 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Robotics},
volume = {14},
number = {3},
abstract = {This study presents an approach for developing digital avatars replicating individuals’ physical characteristics and communicative style, contributing to research on virtual interactions in the metaverse. The proposed method integrates large language models (LLMs) with 3D avatar creation techniques, using what we call the Tree of Style (ToS) methodology to generate stylistically consistent and contextually appropriate responses. Linguistic analysis and personalized voice synthesis enhance conversational and auditory realism. The results suggest that ToS offers a practical alternative to fine-tuning for creating stylistically accurate responses while maintaining efficiency. This study outlines potential applications and acknowledges the need for further work on adaptability and ethical considerations. © 2025 by the authors.},
keywords = {Avatar, large language models, Metaverse},
pubstate = {published},
tppubtype = {article}
}
Sousa, R. T.; Oliveira, E. A. M.; Cintra, L. M. F.; Filho, A. R. G.
Transformative Technologies for Rehabilitation: Leveraging Immersive and AI-Driven Solutions to Reduce Recidivism and Promote Decent Work Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 168–171, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: AI- Driven Rehabilitation, Artificial intelligence- driven rehabilitation, Emotional intelligence, Engineering education, Generative AI, generative artificial intelligence, Immersive, Immersive technologies, Immersive Technology, Language Model, Large language model, large language models, Skills development, Social Reintegration, Social skills, Sociology, Vocational training
@inproceedings{sousa_transformative_2025,
title = {Transformative Technologies for Rehabilitation: Leveraging Immersive and AI-Driven Solutions to Reduce Recidivism and Promote Decent Work},
author = {R. T. Sousa and E. A. M. Oliveira and L. M. F. Cintra and A. R. G. Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005140551&doi=10.1109%2fVRW66409.2025.00042&partnerID=40&md5=89da6954863a272d48c0d8da3760bfb6},
doi = {10.1109/VRW66409.2025.00042},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {168–171},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The reintegration of incarcerated individuals into society presents significant challenges, particularly in addressing barriers related to vocational training, social skill development, and emotional rehabilitation. Immersive technologies, such as Virtual Reality and Augmented Reality, combined with generative Artificial Intelligence (AI) and Large Language Models, offer innovative opportunities to enhance these areas. These technologies create practical, controlled environments for skill acquisition and behavioral training, while generative AI enables dynamic, personalized, and adaptive experiences. This paper explores the broader potential of these integrated technologies in supporting rehabilitation, reducing recidivism, and fostering sustainable employment opportunities and these initiatives align with the overarching equity objective of ensuring Decent Work for All, reinforcing the commitment to inclusive and equitable progress across diverse communities, through the transformative potential of immersive and AI-driven systems in correctional systems. © 2025 IEEE.},
keywords = {AI- Driven Rehabilitation, Artificial intelligence- driven rehabilitation, Emotional intelligence, Engineering education, Generative AI, generative artificial intelligence, Immersive, Immersive technologies, Immersive Technology, Language Model, Large language model, large language models, Skills development, Social Reintegration, Social skills, Sociology, Vocational training},
pubstate = {published},
tppubtype = {inproceedings}
}
Huang, D.; Ge, M.; Xiang, K.; Zhang, X.; Yang, H.
Privacy Preservation of Large Language Models in the Metaverse Era: Research Frontiers, Categorical Comparisons, and Future Directions Proceedings Article
In: Int J Network Manage, John Wiley and Sons Ltd, 2025, ISBN: 10557148 (ISSN).
Abstract | Links | BibTeX | Tags: Adversarial networks, Computational Linguistics, Cryptography, Differential privacies, Excel, Language Model, Large language model, large language models, Life cycle, Metaverse, Metaverses, Natural language processing systems, Natural languages, Privacy preservation, Privacy protection, Research frontiers
@inproceedings{huang_privacy_2025,
title = {Privacy Preservation of Large Language Models in the Metaverse Era: Research Frontiers, Categorical Comparisons, and Future Directions},
author = {D. Huang and M. Ge and K. Xiang and X. Zhang and H. Yang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199980257&doi=10.1002%2fnem.2292&partnerID=40&md5=2dea1caa1d31aecde3d302a908fb7dd3},
doi = {10.1002/nem.2292},
isbn = {10557148 (ISSN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int J Network Manage},
volume = {35},
publisher = {John Wiley and Sons Ltd},
abstract = {Large language models (LLMs), with their billions to trillions of parameters, excel in natural language processing, machine translation, dialog systems, and text summarization. These capabilities are increasingly pivotal in the metaverse, where they can enhance virtual interactions and environments. However, their extensive use, particularly in the metaverse's immersive platforms, raises significant privacy concerns. This paper analyzes existing privacy issues in LLMs, vital for both traditional and metaverse applications, and examines protection techniques across the entire life cycle of these models, from training to user deployment. We delve into cryptography, embedding layer encoding, differential privacy and its variants, and adversarial networks, highlighting their relevance in the metaverse context. Specifically, we explore technologies like homomorphic encryption and secure multiparty computation, which are essential for metaverse security. Our discussion on Gaussian differential privacy, Renyi differential privacy, Edgeworth accounting, and the generation of adversarial samples and loss functions emphasizes their importance in the metaverse's dynamic and interactive environments. Lastly, the paper discusses the current research status and future challenges in the security of LLMs within and beyond the metaverse, emphasizing urgent problems and potential areas for exploration. © 2024 John Wiley & Sons Ltd.},
keywords = {Adversarial networks, Computational Linguistics, Cryptography, Differential privacies, Excel, Language Model, Large language model, large language models, Life cycle, Metaverse, Metaverses, Natural language processing systems, Natural languages, Privacy preservation, Privacy protection, Research frontiers},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Gračanin, D.
From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 150–155, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering
@inproceedings{behravan_voices_2025,
title = {From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality},
author = {M. Behravan and D. Gračanin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005153589&doi=10.1109%2fVRW66409.2025.00038&partnerID=40&md5=b8aaab4e2378cde3595d98d79266d371},
doi = {10.1109/VRW66409.2025.00038},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {150–155},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents Matrix, an advanced AI-powered framework designed for real-time 3D object generation in Augmented Reality (AR) environments. By integrating a cutting-edge text-to-3D generative AI model, multilingual speech-to-text translation, and large language models (LLMs), the system enables seamless user interactions through spoken commands. The framework processes speech inputs, generates 3D objects, and provides object recommendations based on contextual understanding, enhancing AR experiences. A key feature of this framework is its ability to optimize 3D models by reducing mesh complexity, resulting in significantly smaller file sizes and faster processing on resource-constrained AR devices. Our approach addresses the challenges of high GPU usage, large model output sizes, and real-time system responsiveness, ensuring a smoother user experience. Moreover, the system is equipped with a pre-generated object repository, further reducing GPU load and improving efficiency. We demonstrate the practical applications of this framework in various fields such as education, design, and accessibility, and discuss future enhancements including image-to-3D conversion, environmental object detection, and multimodal support. The open-source nature of the framework promotes ongoing innovation and its utility across diverse industries. © 2025 IEEE.},
keywords = {3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering},
pubstate = {published},
tppubtype = {inproceedings}
}
Sabir, A.; Hussain, R.; Pedro, A.; Park, C.
Personalized construction safety training system using conversational AI in virtual reality Journal Article
In: Automation in Construction, vol. 175, 2025, ISSN: 09265805 (ISSN).
Abstract | Links | BibTeX | Tags: Construction safety, Construction safety training, Conversational AI, Digital elevation model, Helmet mounted displays, Language Model, Large language model, large language models, Personalized safety training, Personnel training, Safety training, Training Systems, Virtual environments, Virtual Reality, Workers'
@article{sabir_personalized_2025,
title = {Personalized construction safety training system using conversational AI in virtual reality},
author = {A. Sabir and R. Hussain and A. Pedro and C. Park},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002741042&doi=10.1016%2fj.autcon.2025.106207&partnerID=40&md5=376284339bf10fd5d799cc56c6643d36},
doi = {10.1016/j.autcon.2025.106207},
issn = {09265805 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Automation in Construction},
volume = {175},
abstract = {Training workers in safety protocols is crucial for mitigating job site hazards, yet traditional methods often fall short. This paper explores integrating virtual reality (VR) and large language models (LLMs) into iSafeTrainer, an AI-powered safety training system. The system allows trainees to engage with trade-specific content tailored to their expertise level in a third-person perspective in a non-immersive desktop virtual environment, eliminating the need for head-mounted displays. An experimental study evaluated the system through qualitative, survey-based assessments, focusing on user satisfaction, experience, engagement, guidance, and confidence. Results showed high satisfaction rates (>85 %) among novice users, with improved safety knowledge. Expert users suggested advanced scenarios, highlighting the system's potential for expansion. The modular architecture supports customization across various construction settings, ensuring adaptability for future improvements. © 2024},
keywords = {Construction safety, Construction safety training, Conversational AI, Digital elevation model, Helmet mounted displays, Language Model, Large language model, large language models, Personalized safety training, Personnel training, Safety training, Training Systems, Virtual environments, Virtual Reality, Workers'},
pubstate = {published},
tppubtype = {article}
}
Lau, K. H. C.; Bozkir, E.; Gao, H.; Kasneci, E.
Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling Proceedings Article
In: A., Del Bue; C., Canton; J., Pont-Tuset; T., Tommasi (Ed.): Lect. Notes Comput. Sci., pp. 177–195, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303191571-0 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization
@inproceedings{lau_evaluating_2025,
title = {Evaluating Usability and Engagement of Large Language Models in Virtual Reality for Traditional Scottish Curling},
author = {K. H. C. Lau and E. Bozkir and H. Gao and E. Kasneci},
editor = {Del Bue A. and Canton C. and Pont-Tuset J. and Tommasi T.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105006905979&doi=10.1007%2f978-3-031-91572-7_11&partnerID=40&md5=8a81fb09ff54e57b9429660a8898149a},
doi = {10.1007/978-3-031-91572-7_11},
isbn = {03029743 (ISSN); 978-303191571-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15628 LNCS},
pages = {177–195},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper explores the innovative application of Large Language Models (LLMs) in Virtual Reality (VR) environments to promote heritage education, focusing on traditional Scottish curling presented in the game “Scottish Bonspiel VR”. Our study compares the effectiveness of LLM-based chatbots with pre-defined scripted chatbots, evaluating key criteria such as usability, user engagement, and learning outcomes. The results show that LLM-based chatbots significantly improve interactivity and engagement, creating a more dynamic and immersive learning environment. This integration helps document and preserve cultural heritage and enhances dissemination processes, which are crucial for safeguarding intangible cultural heritage (ICH) amid environmental changes. Furthermore, the study highlights the potential of novel technologies in education to provide immersive experiences that foster a deeper appreciation of cultural heritage. These findings support the wider application of LLMs and VR in cultural education to address global challenges and promote sustainable practices to preserve and enhance cultural heritage. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Chatbots, Cultural heritages, Digital Cultural Heritage, Digital cultural heritages, Educational robots, Engineering education, Heritage education, Historic Preservation, Language Model, Large language model, large language models, Learning outcome, Model-based OPC, Usability engineering, User Engagement, Virtual Reality, Virtual-reality environment, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Oliveira, E. A. Masasi De; Sousa, R. T.; Bastos, A. A.; Cintra, L. Martins De Freitas; Filho, A. R. G.
Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 437–440, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages
@inproceedings{masasi_de_oliveira_immersive_2025,
title = {Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation},
author = {E. A. Masasi De Oliveira and R. T. Sousa and A. A. Bastos and L. Martins De Freitas Cintra and A. R. G. Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007979183&doi=10.1145%2f3706370.3731643&partnerID=40&md5=db10b41217dd8a0b0705c3fb4a615666},
doi = {10.1145/3706370.3731643},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {437–440},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual Reality has significantly expanded possibilities for immersive museum experiences, overcoming traditional constraints such as space, preservation, and geographic limitations. However, existing virtual museum platforms typically lack dynamic, personalized, and contextually accurate interactions. To address this, we propose Spatially-Aware Retrieval-Augmented Generation (SA-RAG), an innovative framework integrating visual attention tracking with Retrieval-Augmented Generation systems and advanced Large Language Models. By capturing users' visual attention in real time, SA-RAG dynamically retrieves contextually relevant data, enhancing the accuracy, personalization, and depth of user interactions within immersive virtual environments. The system's effectiveness is initially demonstrated through our preliminary tests within a realistic VR museum implemented using Unreal Engine. Although promising, comprehensive human evaluations involving broader user groups are planned for future studies to rigorously validate SA-RAG's effectiveness, educational enrichment potential, and accessibility improvements in virtual museums. The framework also presents opportunities for broader applications in immersive educational and storytelling domains. © 2025 Copyright held by the owner/author(s).},
keywords = {Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Häfner, P.; Eisenlohr, F.; Karande, A.; Grethler, M.; Mukherjee, A.; Tran, N.
Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 281–285, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method
@inproceedings{hafner_leveraging_2025,
title = {Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines},
author = {P. Häfner and F. Eisenlohr and A. Karande and M. Grethler and A. Mukherjee and N. Tran},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000344182&doi=10.1109%2fAIxVR63409.2025.00054&partnerID=40&md5=05fe014eddba395881575bec5d96ce15},
doi = {10.1109/AIxVR63409.2025.00054},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {281–285},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Voice User Interfaces (VUIs) are becoming increasingly valuable in industrial applications, offering hands-free control in complex environments. However, developing and validating VUIs for such applications faces challenges, including limited access to physical prototypes and high testing costs. This paper presents a methodology that utilizes virtual reality (VR) prototypes to collect training data for large language model (LLM)-based VUIs, allowing early-stage voice control development before physical prototypes are accessible. Through an immersive Wizard-of-Oz (WoZ) method, participants interact with a virtual reality representation of a machine, generating realistic, scenario-based conversational data. This combined WoZ and VR approach enables high-quality data collection and iterative model training, offering an effective solution that can be applied across various types of machine. Preliminary findings demonstrate the viability of VR in generating diverse and robust data sets that closely simulate real-world dialogs for voice interactions in industrial settings. © 2025 IEEE.},
keywords = {Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, J.; Wu, X.; Lan, T.; Li, B.
LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2715–2724, 2025, ISSN: 10772626 (ISSN).
Abstract | Links | BibTeX | Tags: % reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality
@article{chen_llmer_2025,
title = {LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models},
author = {J. Chen and X. Wu and T. Lan and B. Li},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003825793&doi=10.1109%2fTVCG.2025.3549549&partnerID=40&md5=da4681d0714548e3a7e0c8c3295d2348},
doi = {10.1109/TVCG.2025.3549549},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2715–2724},
abstract = {The integration of Large Language Models (LLMs) like GPT-4 with Extended Reality (XR) technologies offers the potential to build truly immersive XR environments that interact with human users through natural language, e.g., generating and animating 3D scenes from audio inputs. However, the complexity of XR environments makes it difficult to accurately extract relevant contextual data and scene/object parameters from an overwhelming volume of XR artifacts. It leads to not only increased costs with pay-per-use models, but also elevated levels of generation errors. Moreover, existing approaches focusing on coding script generation are often prone to generation errors, resulting in flawed or invalid scripts, application crashes, and ultimately a degraded user experience. To overcome these challenges, we introduce LLMER, a novel framework that creates interactive XR worlds using JSON data generated by LLMs. Unlike prior approaches focusing on coding script generation, LLMER translates natural language inputs into JSON data, significantly reducing the likelihood of application crashes and processing latency. It employs a multi-stage strategy to supply only the essential contextual information adapted to the user's request and features multiple modules designed for various XR tasks. Our preliminary user study reveals the effectiveness of the proposed system, with over 80% reduction in consumed tokens and around 60% reduction in task completion time compared to state-of-the-art approaches. The analysis of users' feedback also illuminates a series of directions for further optimization. © 1995-2012 IEEE.},
keywords = {% reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Carcangiu, A.; Manca, M.; Mereu, J.; Santoro, C.; Simeoli, L.; Spano, L. D.
Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation Proceedings Article
In: C., Santoro; A., Schmidt; M., Matera; A., Bellucci (Ed.): Lect. Notes Comput. Sci., pp. 59–79, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303195451-1 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces
@inproceedings{carcangiu_conversational_2025,
title = {Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation},
author = {A. Carcangiu and M. Manca and J. Mereu and C. Santoro and L. Simeoli and L. D. Spano},
editor = {Santoro C. and Schmidt A. and Matera M. and Bellucci A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105009012634&doi=10.1007%2f978-3-031-95452-8_4&partnerID=40&md5=67e2b8ca4bb2b508cd41548e3471705b},
doi = {10.1007/978-3-031-95452-8_4},
isbn = {03029743 (ISSN); 978-303195451-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15713 LNCS},
pages = {59–79},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Rule-based approaches allow users to customize XR environments. However, the current menu-based interfaces still create barriers for end-user developers. Chatbots based on Large Language Models (LLMs) have the potential to reduce the threshold needed for rule creation, but how users articulate their intentions through conversation remains under-explored. This work investigates how users express event-condition-action automation rules in Virtual Reality (VR) and Augmented Reality (AR) environments. Through two user studies, we show that the dialogues share consistent strategies across the interaction setting (keywords, difficulties in expressing conditions, task success), even if we registered different adaptations for each setting (verbal structure, event vs action first rules). Our findings are relevant for the design and implementation of chatbot-based support for expressing automations in an XR setting. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces},
pubstate = {published},
tppubtype = {inproceedings}
}
Buldu, K. B.; Özdel, S.; Lau, K. H. Carrie; Wang, M.; Saad, D.; Schönborn, S.; Boch, A.; Kasneci, E.; Bozkir, E.
CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 192–197, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality
@inproceedings{buldu_cuify_2025,
title = {CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR},
author = {K. B. Buldu and S. Özdel and K. H. Carrie Lau and M. Wang and D. Saad and S. Schönborn and A. Boch and E. Kasneci and E. Bozkir},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000229165&doi=10.1109%2fAIxVR63409.2025.00037&partnerID=40&md5=837b0e3425d2e5a9358bbe6c8ecb5754},
doi = {10.1109/AIxVR63409.2025.00037},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {192–197},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent developments in computer graphics, machine learning, and sensor technologies enable numerous opportunities for extended reality (XR) setups for everyday life, from skills training to entertainment. With large corporations offering affordable consumer-grade head-mounted displays (HMDs), XR will likely become pervasive, and HMDs will develop as personal devices like smartphones and tablets. However, having intelligent spaces and naturalistic interactions in XR is as important as tech-nological advances so that users grow their engagement in virtual and augmented spaces. To this end, large language model (LLM)-powered non-player characters (NPCs) with speech-to-text (STT) and text-to-speech (TTS) models bring significant advantages over conventional or pre-scripted NPCs for facilitating more natural conversational user interfaces (CUIs) in XR. This paper provides the community with an open-source, customizable, extendable, and privacy-aware Unity package, CUIfy, that facili-tates speech-based NPC-user interaction with widely used LLMs, STT, and TTS models. Our package also supports multiple LLM-powered NPCs per environment and minimizes latency between different computational models through streaming to achieve us-able interactions between users and NPCs. We publish our source code in the following repository: https://gitlab.lrz.de/hctl/cuify © 2025 IEEE.},
keywords = {Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Suzuki, R.; Gonzalez-Franco, M.; Sra, M.; Lindlbauer, D.
Everyday AR through AI-in-the-Loop Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071395-8 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization
@inproceedings{suzuki_everyday_2025,
title = {Everyday AR through AI-in-the-Loop},
author = {R. Suzuki and M. Gonzalez-Franco and M. Sra and D. Lindlbauer},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005752990&doi=10.1145%2f3706599.3706741&partnerID=40&md5=56b5e447819dde7aa4a29f8e3899e535},
doi = {10.1145/3706599.3706741},
isbn = {979-840071395-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {This workshop brings together experts and practitioners from augmented reality (AR) and artificial intelligence (AI) to shape the future of AI-in-the-loop everyday AR experiences. With recent advancements in both AR hardware and AI capabilities, we envision that everyday AR—always-available and seamlessly integrated into users’ daily environments—is becoming increasingly feasible. This workshop will explore how AI can drive such everyday AR experiences. We discuss a range of topics, including adaptive and context-aware AR, generative AR content creation, always-on AI assistants, AI-driven accessible design, and real-world-oriented AI agents. Our goal is to identify the opportunities and challenges in AI-enabled AR, focusing on creating novel AR experiences that seamlessly blend the digital and physical worlds. Through the workshop, we aim to foster collaboration, inspire future research, and build a community to advance the research field of AI-enhanced AR. © 2025 Copyright held by the owner/author(s).},
keywords = {Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, J.; Grubert, J.; Kristensson, P. O.
Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 206–216, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833153645-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality
@inproceedings{chen_analyzing_2025,
title = {Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes},
author = {J. Chen and J. Grubert and P. O. Kristensson},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002716635&doi=10.1109%2fVR59515.2025.00045&partnerID=40&md5=306aa7fbb3dad0aa9d43545f3c7eb9ea},
doi = {10.1109/VR59515.2025.00045},
isbn = {979-833153645-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {206–216},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {As more applications of large language models (LLMs) for 3D content in immersive environments emerge, it is crucial to study user behavior to identify interaction patterns and potential barriers to guide the future design of immersive content creation and editing systems which involve LLMs. In an empirical user study with 12 participants, we combine quantitative usage data with post-experience questionnaire feedback to reveal common interaction patterns and key barriers in LLM-assisted 3D scene editing systems. We identify opportunities for improving natural language interfaces in 3D design tools and propose design recommendations. Through an empirical study, we demonstrate that LLM-assisted interactive systems can be used productively in immersive environments. © 2025 IEEE.},
keywords = {3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Kim, Y.; Aamir, Z.; Singh, M.; Boorboor, S.; Mueller, K.; Kaufman, A. E.
Explainable XR: Understanding User Behaviors of XR Environments Using LLM-Assisted Analytics Framework Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2756–2766, 2025, ISSN: 10772626 (ISSN).
Abstract | Links | BibTeX | Tags: adult, Agnostic, Article, Assistive, Cross Reality, Data Analytics, Data collection, data interpretation, Data recording, Data visualization, Extended reality, human, Language Model, Large language model, large language models, Multi-modal, Multimodal Data Collection, normal human, Personalized assistive technique, Personalized Assistive Techniques, recorder, Spatio-temporal data, therapy, user behavior, User behaviors, Virtual addresses, Virtual environments, Virtual Reality, Visual analytics, Visual languages
@article{kim_explainable_2025,
title = {Explainable XR: Understanding User Behaviors of XR Environments Using LLM-Assisted Analytics Framework},
author = {Y. Kim and Z. Aamir and M. Singh and S. Boorboor and K. Mueller and A. E. Kaufman},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003815583&doi=10.1109%2fTVCG.2025.3549537&partnerID=40&md5=1085b698db06656985f80418cb37b773},
doi = {10.1109/TVCG.2025.3549537},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2756–2766},
abstract = {We present Explainable XR, an end-to-end framework for analyzing user behavior in diverse eXtended Reality (XR) environments by leveraging Large Language Models (LLMs) for data interpretation assistance. Existing XR user analytics frameworks face challenges in handling cross-virtuality - AR, VR, MR - transitions, multi-user collaborative application scenarios, and the complexity of multimodal data. Explainable XR addresses these challenges by providing a virtuality-agnostic solution for the collection, analysis, and visualization of immersive sessions. We propose three main components in our framework: (1) A novel user data recording schema, called User Action Descriptor (UAD), that can capture the users' multimodal actions, along with their intents and the contexts; (2) a platform-agnostic XR session recorder, and (3) a visual analytics interface that offers LLM-assisted insights tailored to the analysts' perspectives, facilitating the exploration and analysis of the recorded XR session data. We demonstrate the versatility of Explainable XR by demonstrating five use-case scenarios, in both individual and collaborative XR applications across virtualities. Our technical evaluation and user studies show that Explainable XR provides a highly usable analytics solution for understanding user actions and delivering multifaceted, actionable insights into user behaviors in immersive environments. © 1995-2012 IEEE.},
keywords = {adult, Agnostic, Article, Assistive, Cross Reality, Data Analytics, Data collection, data interpretation, Data recording, Data visualization, Extended reality, human, Language Model, Large language model, large language models, Multi-modal, Multimodal Data Collection, normal human, Personalized assistive technique, Personalized Assistive Techniques, recorder, Spatio-temporal data, therapy, user behavior, User behaviors, Virtual addresses, Virtual environments, Virtual Reality, Visual analytics, Visual languages},
pubstate = {published},
tppubtype = {article}
}
Xing, Y.; Liu, Q.; Wang, J.; Gómez-Zará, D.
sMoRe: Spatial Mapping and Object Rendering Environment Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 115–119, Association for Computing Machinery, 2025, ISBN: 979-840071409-2 (ISBN).
Abstract | Links | BibTeX | Tags: Generative adversarial networks, Generative AI, Language Model, Large language model, large language models, Mapping, Mixed reality, Mixed-reality environment, Object rendering, Rendering (computer graphics), Space Manipulation, Spatial mapping, Spatial objects, Users' experiences, Virtual environments, Virtual objects
@inproceedings{xing_smore_2025,
title = {sMoRe: Spatial Mapping and Object Rendering Environment},
author = {Y. Xing and Q. Liu and J. Wang and D. Gómez-Zará},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001670668&doi=10.1145%2f3708557.3716337&partnerID=40&md5=8ef4c5c4ef2b3ee30d00e4b8d19d19b8},
doi = {10.1145/3708557.3716337},
isbn = {979-840071409-2 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {115–119},
publisher = {Association for Computing Machinery},
abstract = {In mixed reality (MR) environments, understanding space and creating virtual objects is crucial to providing an intuitive user experience. This paper introduces sMoRe (Spatial Mapping and Object Rendering Environment), an MR application that combines Generative AI (GenAI) to assist users in creating, placing, and managing virtual objects within physical spaces. sMoRe allows users to use voice or typed text commands to create and place virtual objects using GenAI while specifying spatial constraints. The system employs Large Language Models (LLMs) to interpret users’ commands, analyze the current scene, and identify optimal locations. Additionally, sMoRe integrates a text-to-3D generative model to dynamically create 3D objects based on users’ descriptions. Our user study demonstrates the effectiveness of sMoRe in enhancing user comprehension, interaction, and organization of the MR environment. © 2025 Copyright held by the owner/author(s).},
keywords = {Generative adversarial networks, Generative AI, Language Model, Large language model, large language models, Mapping, Mixed reality, Mixed-reality environment, Object rendering, Rendering (computer graphics), Space Manipulation, Spatial mapping, Spatial objects, Users' experiences, Virtual environments, Virtual objects},
pubstate = {published},
tppubtype = {inproceedings}
}
Gao, H.; Xie, Y.; Kasneci, E.
PerVRML: ChatGPT-Driven Personalized VR Environments for Machine Learning Education Journal Article
In: International Journal of Human-Computer Interaction, 2025, ISSN: 10447318 (ISSN).
Abstract | Links | BibTeX | Tags: Backpropagation, ChatGPT, Curricula, Educational robots, Immersive learning, Interactive learning, Language Model, Large language model, large language models, Learning mode, Machine learning education, Machine-learning, Personalized learning, Support vector machines, Teaching, Virtual Reality, Virtual-reality environment, Virtualization
@article{gao_pervrml_2025,
title = {PerVRML: ChatGPT-Driven Personalized VR Environments for Machine Learning Education},
author = {H. Gao and Y. Xie and E. Kasneci},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005776517&doi=10.1080%2f10447318.2025.2504188&partnerID=40&md5=c2c59be3d20d02c6df7750c2330c8f6d},
doi = {10.1080/10447318.2025.2504188},
issn = {10447318 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Human-Computer Interaction},
abstract = {The advent of large language models (LLMs) such as ChatGPT has demonstrated significant potential for advancing educational technologies. Recently, growing interest has emerged in integrating ChatGPT with virtual reality (VR) to provide interactive and dynamic learning environments. This study explores the effectiveness of ChatGTP-driven VR in facilitating machine learning education through PerVRML. PerVRML incorporates a ChatGPT-powered avatar that provides real-time assistance and uses LLMs to personalize learning paths based on various sensor data from VR. A between-subjects design was employed to compare two learning modes: personalized and non-personalized. Quantitative data were collected from assessments, user experience surveys, and interaction metrics. The results indicate that while both learning modes supported learning effectively, ChatGPT-powered personalization significantly improved learning outcomes and had distinct impacts on user feedback. These findings underscore the potential of ChatGPT-enhanced VR to deliver adaptive and personalized educational experiences. © 2025 Taylor & Francis Group, LLC.},
keywords = {Backpropagation, ChatGPT, Curricula, Educational robots, Immersive learning, Interactive learning, Language Model, Large language model, large language models, Learning mode, Machine learning education, Machine-learning, Personalized learning, Support vector machines, Teaching, Virtual Reality, Virtual-reality environment, Virtualization},
pubstate = {published},
tppubtype = {article}
}
Fang, A.; Chhabria, H.; Maram, A.; Zhu, H.
Social Simulation for Everyday Self-Care: Design Insights from Leveraging VR, AR, and LLMs for Practicing Stress Relief Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071394-1 (ISBN).
Abstract | Links | BibTeX | Tags: design, Design insights, Language Model, Large language model, large language models, Mental health, Peer support, Professional supports, Self-care, Social simulations, Speed dating, Virtual environments, Virtual Reality, Well being
@inproceedings{fang_social_2025,
title = {Social Simulation for Everyday Self-Care: Design Insights from Leveraging VR, AR, and LLMs for Practicing Stress Relief},
author = {A. Fang and H. Chhabria and A. Maram and H. Zhu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005770377&doi=10.1145%2f3706598.3713115&partnerID=40&md5=87d43f04dfd3231cb189fa89570824c5},
doi = {10.1145/3706598.3713115},
isbn = {979-840071394-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Stress is an inevitable part of day-to-day life yet many find themselves unable to manage it themselves, particularly when professional or peer support are not always readily available. As self-care becomes increasingly vital for mental well-being, this paper explores the potential of social simulation as a safe, virtual environment for practicing in-the-moment stress relief for everyday social situations. Leveraging the immersive capabilities of VR, AR, and LLMs to create realistic interactions and environments, we developed eight interactive prototypes for various social stress related scenarios (e.g. public speaking, interpersonal conflict) across design dimensions of modality, interactivity, and mental health guidance in order to conduct prototype-driven semi-structured interviews with 19 participants. Our qualitative findings reveal that people currently lack effective means to support themselves through everyday stress and perceive social simulation - even at low immersion and interaction levels - to fill a gap for practical, controlled training of mental health practices. We outline key design needs for developing social simulation for self-care needs, and identify important considerations including risks of trauma from hyper-realism, distrust of LLM-recommended timing for mental health recommendations, and the value of accessibility for self-care interventions. © 2025 Copyright held by the owner/author(s).},
keywords = {design, Design insights, Language Model, Large language model, large language models, Mental health, Peer support, Professional supports, Self-care, Social simulations, Speed dating, Virtual environments, Virtual Reality, Well being},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Gkournelos, C.; Konstantinou, C.; Angelakis, P.; Michalos, G.; Makris, S.
Enabling Seamless Human-Robot Collaboration in Manufacturing Using LLMs Proceedings Article
In: A., Wagner; K., Alexopoulos; S., Makris (Ed.): Lect. Notes Mech. Eng., pp. 81–89, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 21954356 (ISSN); 978-303157495-5 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Collaboration capabilities, Computational Linguistics, Human operator, Human-Robot Collaboration, Industrial research, Industrial robots, Intelligent robots, Language Model, Large language model, large language models, Manufacturing environments, Programming robots, Reality interface, Research papers, Robot programming, User friendly
@inproceedings{gkournelos_enabling_2024,
title = {Enabling Seamless Human-Robot Collaboration in Manufacturing Using LLMs},
author = {C. Gkournelos and C. Konstantinou and P. Angelakis and G. Michalos and S. Makris},
editor = {Wagner A. and Alexopoulos K. and Makris S.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199196139&doi=10.1007%2f978-3-031-57496-2_9&partnerID=40&md5=cd0b33b3c9e9f9e53f1e99882945e134},
doi = {10.1007/978-3-031-57496-2_9},
isbn = {21954356 (ISSN); 978-303157495-5 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Lect. Notes Mech. Eng.},
pages = {81–89},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {In the era of Industry 5.0, there is a growing interest in harnessing the potential of human-robot collaboration (HRC) in manufacturing environments. This research paper focuses on the integration of Large Language Models (LLMs) to augment HRC capabilities, particularly in addressing configuration issues when programming robots to collaborate with human operators. By harnessing the capabilities of LLMs in combination with a user-friendly augmented reality (AR) interface, the proposed approach empowers human operators to seamlessly collaborate with robots, facilitating smooth and efficient assembly processes. This research introduces the CollabAI an AI assistant for task management and natural communication based on a fine-tuned GPT model focusing on collaborative manufacturing. Real-world experiments conducted in two manufacturing settings coming from the automotive and machinery industries. The findings have implications for various industries seeking to increase productivity and foster a new era of efficient and effective collaboration in manufacturing environments. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {Artificial intelligence, Augmented Reality, Collaboration capabilities, Computational Linguistics, Human operator, Human-Robot Collaboration, Industrial research, Industrial robots, Intelligent robots, Language Model, Large language model, large language models, Manufacturing environments, Programming robots, Reality interface, Research papers, Robot programming, User friendly},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, X. B.; Li, J. N.; Kim, D.; Chen, X.; Du, R.
Human I/O: Towards a Unified Approach to Detecting Situational Impairments Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 979-840070330-0 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Computational Linguistics, Context awareness, Context- awareness, In contexts, Language Model, Large language model, large language models, Multi tasking, Multimodal sensing, Situational impairment, situational impairments, Specific tasks, Unified approach, User interfaces, Users' experiences, Video recording
@inproceedings{liu_human_2024,
title = {Human I/O: Towards a Unified Approach to Detecting Situational Impairments},
author = {X. B. Liu and J. N. Li and D. Kim and X. Chen and R. Du},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194891045&doi=10.1145%2f3613904.3642065&partnerID=40&md5=01b3ece7c1bc2a758126fce88a15d14e},
doi = {10.1145/3613904.3642065},
isbn = {979-840070330-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Situationally Induced Impairments and Disabilities (SIIDs) can significantly hinder user experience in contexts such as poor lighting, noise, and multi-tasking. While prior research has introduced algorithms and systems to address these impairments, they predominantly cater to specific tasks or environments and fail to accommodate the diverse and dynamic nature of SIIDs. We introduce Human I/O, a unified approach to detecting a wide range of SIIDs by gauging the availability of human input/output channels. Leveraging egocentric vision, multimodal sensing and reasoning with large language models, Human I/O achieves a 0.22 mean absolute error and a 82% accuracy in availability prediction across 60 in-the-wild egocentric video recordings in 32 different scenarios. Furthermore, while the core focus of our work is on the detection of SIIDs rather than the creation of adaptive user interfaces, we showcase the efficacy of our prototype via a user study with 10 participants. Findings suggest that Human I/O significantly reduces effort and improves user experience in the presence of SIIDs, paving the way for more adaptive and accessible interactive systems in the future. © 2024 Copyright held by the owner/author(s)},
keywords = {Augmented Reality, Computational Linguistics, Context awareness, Context- awareness, In contexts, Language Model, Large language model, large language models, Multi tasking, Multimodal sensing, Situational impairment, situational impairments, Specific tasks, Unified approach, User interfaces, Users' experiences, Video recording},
pubstate = {published},
tppubtype = {inproceedings}
}
Vasic, I.; Fill, H. -G.; Quattrini, R.; Pierdicca, R.
LLM-Aided Museum Guide: Personalized Tours Based on User Preferences Proceedings Article
In: L.T., De Paolis; P., Arpaia; M., Sacco (Ed.): Lect. Notes Comput. Sci., pp. 249–262, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 03029743 (ISSN); 978-303171709-3 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence techniques, Automated process, Cultural heritages, Extended reality, Language Model, Large language model, large language models, Modeling languages, Museum guide, User's preferences, Virtual environments, Virtual museum, Virtual museums, Virtual tour
@inproceedings{vasic_llm-aided_2024,
title = {LLM-Aided Museum Guide: Personalized Tours Based on User Preferences},
author = {I. Vasic and H. -G. Fill and R. Quattrini and R. Pierdicca},
editor = {De Paolis L.T. and Arpaia P. and Sacco M.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85205127699&doi=10.1007%2f978-3-031-71710-9_18&partnerID=40&md5=fba73e38a432e0749b8e79197ef85310},
doi = {10.1007/978-3-031-71710-9_18},
isbn = {03029743 (ISSN); 978-303171709-3 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15029 LNCS},
pages = {249–262},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {The quick development of generative artificial intelligence (GenAI) techniques is a promising step toward automated processes in the field of cultural heritage (CH). The recent rise of powerful Large Language Models (LLMs) like ChatGPT has made them a commonly utilized tool for a wide range of tasks across various fields. In this paper, we introduce LLMs as a guide in the three-dimensional (3D) panoramic virtual tour of the Civic Art Gallery of Ascoli to enable visitors to express their interest and show them the requested content. The input to our algorithm is a user request in natural language. The processing tasks are performed with the OpenAI’s Generative Pre-trained Transformer (GPT) 4o model. Requests are handled through the OpenAI’s API. We demonstrate all the functionalities within a developed local web-based application. This novel approach is capable of solving the problem of generic guided tours in the museum and offers a solution for the more automatized and personalized ones. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {Artificial intelligence techniques, Automated process, Cultural heritages, Extended reality, Language Model, Large language model, large language models, Modeling languages, Museum guide, User's preferences, Virtual environments, Virtual museum, Virtual museums, Virtual tour},
pubstate = {published},
tppubtype = {inproceedings}
}
Bao, Y.; Gao, N.; Weng, D.; Chen, J.; Tian, Z.
MuseGesture: A Framework for Gesture Synthesis by Virtual Agents in VR Museum Guides Proceedings Article
In: U., Eck; M., Sra; J., Stefanucci; M., Sugimoto; M., Tatzgern; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 337–338, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833150691-9 (ISBN).
Abstract | Links | BibTeX | Tags: Adversarial machine learning, Embeddings, Gesture Generation, Intelligent Agents, Intelligent systems, Intelligent virtual agents, Language generation, Language Model, Large language model, large language models, Museum guide, Reinforcement Learning, Reinforcement learnings, Robust language understanding, Virtual agent, Virtual Agents, Virtual environments, Virtual reality museum guide, VR Museum Guides
@inproceedings{bao_musegesture_2024,
title = {MuseGesture: A Framework for Gesture Synthesis by Virtual Agents in VR Museum Guides},
author = {Y. Bao and N. Gao and D. Weng and J. Chen and Z. Tian},
editor = {Eck U. and Sra M. and Stefanucci J. and Sugimoto M. and Tatzgern M. and Williams I.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214385900&doi=10.1109%2fISMAR-Adjunct64951.2024.00079&partnerID=40&md5=e71ffc28e299597557034259aab50641},
doi = {10.1109/ISMAR-Adjunct64951.2024.00079},
isbn = {979-833150691-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {337–338},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents an innovative framework named MuseGesture, designed to generate contextually adaptive gestures for virtual agents in Virtual Reality (VR) museums. The framework leverages the robust language understanding and generation capabilities of Large Language Models (LLMs) to parse tour narration texts and generate corresponding explanatory gestures. Through reinforcement learning and adversarial skill embeddings, the framework also generates guiding gestures tailored to the virtual museum environment, integrating both gesture types using conditional motion interpolation methods. Experimental results and user studies demonstrate that this approach effectively enables voice-command-controlled virtual guide gestures, offering a novel intelligent guiding system solution that enhances the interactive experience in VR museum environments. © 2024 IEEE.},
keywords = {Adversarial machine learning, Embeddings, Gesture Generation, Intelligent Agents, Intelligent systems, Intelligent virtual agents, Language generation, Language Model, Large language model, large language models, Museum guide, Reinforcement Learning, Reinforcement learnings, Robust language understanding, Virtual agent, Virtual Agents, Virtual environments, Virtual reality museum guide, VR Museum Guides},
pubstate = {published},
tppubtype = {inproceedings}
}
Bojić, L.; Ðapić, V.
The Interplay of Social and Robotics Theories in AGI Alignment: Navigating the Digital City Through Simulation-based Multi-Agent Systems Proceedings Article
In: N., Zdravkovic; University, Belgrade Tadeusa Koscuska 63 Belgrade Metropolitan; D., Domazet; University, Tadeusa Koscuska 63 Belgrade Belgrade Metropolitan; S., Lopez-Pernas; of Eastern Finland, Yliopistokatu-2 Joensuu University; M.A., Conde; de Vegazana S/N University of Leon, Leon Campus; P., Vijayakumar (Ed.): CEUR Workshop Proc., pp. 58–63, CEUR-WS, 2024, ISBN: 16130073 (ISSN).
Abstract | Links | BibTeX | Tags: Artificial general intelligence, Artificial general intelligences, Autonomous agents, Decision making, Decision theory, Decisions makings, Human values, Intelligent Agents, Language Model, Large language model, large language models, Multi agent systems, Philosophical aspects, Robotic theory, Robotics, Robotics Theories, Simulation based approaches, Simulation platform, Simulation-Based Approach, Smart city, Social Theories, Social theory, Theoretical framework, Virtual cities, Virtual Reality
@inproceedings{bojic_interplay_2024,
title = {The Interplay of Social and Robotics Theories in AGI Alignment: Navigating the Digital City Through Simulation-based Multi-Agent Systems},
author = {L. Bojić and V. Ðapić},
editor = {Zdravkovic N. and Belgrade Tadeusa Koscuska 63 Belgrade Metropolitan University and Domazet D. and Tadeusa Koscuska 63 Belgrade Belgrade Metropolitan University and Lopez-Pernas S. and Yliopistokatu-2 Joensuu University of Eastern Finland and Conde M.A. and Leon Campus de Vegazana S/N University of Leon and Vijayakumar P.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85193478708&partnerID=40&md5=6a9fd04d5bbf8b876ba508bef1c09076},
isbn = {16130073 (ISSN)},
year = {2024},
date = {2024-01-01},
booktitle = {CEUR Workshop Proc.},
volume = {3676},
pages = {58–63},
publisher = {CEUR-WS},
abstract = {This study delves into the task of aligning Artificial General Intelligence (AGI) and Large Language Models (LLMs) to societal and ethical norms by using theoretical frameworks derived from social science and robotics. The expansive adoption of AGI technologies magnifies the importance of aligning AGI with human values and ethical boundaries. This paper presents an innovative simulation-based approach, engaging autonomous’digital citizens’ within a multi-agent system simulation in a virtual city environment. The virtual city serves as a platform to examine systematic interactions and decision-making, leveraging various theories, notably, Social Simulation Theory, Theory of Reasoned Action, Multi-Agent System Theory, and Situated Action Theory. The aim of establishing this digital landscape is to create a fluid platform that enables our AI agents to engage in interactions and enact independent decisions, thereby recreating life-like situations. The LLMs, embodying the personas in this digital city, operate as the leading agents demonstrating substantial levels of autonomy. Despite the promising advantages of this approach, limitations primarily lie in the unpredictability of real-world social structures. This work aims to promote a deeper understanding of AGI dynamics and contribute to its future development, prioritizing the integration of diverse societal perspectives in the process. © 2024 Copyright for this paper by its authors.},
keywords = {Artificial general intelligence, Artificial general intelligences, Autonomous agents, Decision making, Decision theory, Decisions makings, Human values, Intelligent Agents, Language Model, Large language model, large language models, Multi agent systems, Philosophical aspects, Robotic theory, Robotics, Robotics Theories, Simulation based approaches, Simulation platform, Simulation-Based Approach, Smart city, Social Theories, Social theory, Theoretical framework, Virtual cities, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Guo, Y.; Hou, K.; Yan, Z.; Chen, H.; Xing, G.; Jiang, X.
Sensor2Scene: Foundation Model-Driven Interactive Realities Proceedings Article
In: Proc. - IEEE Int. Workshop Found. Model. Cyber-Phys. Syst. Internet Things, FMSys, pp. 13–19, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835036345-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Augmented Reality, Computational Linguistics, Data integration, Data visualization, Foundation models, Generative model, Language Model, Large language model, large language models, Model-driven, Sensor Data Integration, Sensors data, Text-to-3d generative model, Text-to-3D Generative Models, Three dimensional computer graphics, User interaction, User Interaction in AR, User interaction in augmented reality, User interfaces, Virtual Reality, Visualization
@inproceedings{guo_sensor2scene_2024,
title = {Sensor2Scene: Foundation Model-Driven Interactive Realities},
author = {Y. Guo and K. Hou and Z. Yan and H. Chen and G. Xing and X. Jiang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199893762&doi=10.1109%2fFMSys62467.2024.00007&partnerID=40&md5=c3bf1739e8c1dc6227d61609ddc66910},
doi = {10.1109/FMSys62467.2024.00007},
isbn = {979-835036345-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Workshop Found. Model. Cyber-Phys. Syst. Internet Things, FMSys},
pages = {13–19},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Augmented Reality (AR) is acclaimed for its potential to bridge the physical and virtual worlds. Yet, current integration between these realms often lacks a deep under-standing of the physical environment and the subsequent scene generation that reflects this understanding. This research introduces Sensor2Scene, a novel system framework designed to enhance user interactions with sensor data through AR. At its core, an AI agent leverages large language models (LLMs) to decode subtle information from sensor data, constructing detailed scene descriptions for visualization. To enable these scenes to be rendered in AR, we decompose the scene creation process into tasks of text-to-3D model generation and spatial composition, allowing new AR scenes to be sketched from the descriptions. We evaluated our framework using an LLM evaluator based on five metrics on various datasets to examine the correlation between sensor readings and corresponding visualizations, and demonstrated the system's effectiveness with scenes generated from end-to-end. The results highlight the potential of LLMs to understand IoT sensor data. Furthermore, generative models can aid in transforming these interpretations into visual formats, thereby enhancing user interaction. This work not only displays the capabilities of Sensor2Scene but also lays a foundation for advancing AR with the goal of creating more immersive and contextually rich experiences. © 2024 IEEE.},
keywords = {3D modeling, Augmented Reality, Computational Linguistics, Data integration, Data visualization, Foundation models, Generative model, Language Model, Large language model, large language models, Model-driven, Sensor Data Integration, Sensors data, Text-to-3d generative model, Text-to-3D Generative Models, Three dimensional computer graphics, User interaction, User Interaction in AR, User interaction in augmented reality, User interfaces, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Izquierdo-Domenech, J.; Linares-Pellicer, J.; Ferri-Molla, I.
Virtual Reality and Language Models, a New Frontier in Learning Journal Article
In: International Journal of Interactive Multimedia and Artificial Intelligence, vol. 8, no. 5, pp. 46–54, 2024, ISSN: 19891660 (ISSN).
Abstract | Links | BibTeX | Tags: large language models, RetrievalAugmented Generation, Virtual Reality
@article{izquierdo-domenech_virtual_2024,
title = {Virtual Reality and Language Models, a New Frontier in Learning},
author = {J. Izquierdo-Domenech and J. Linares-Pellicer and I. Ferri-Molla},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85186906440&doi=10.9781%2fijimai.2024.02.007&partnerID=40&md5=338e80de2837a9d173f432f1467b2dc9},
doi = {10.9781/ijimai.2024.02.007},
issn = {19891660 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {International Journal of Interactive Multimedia and Artificial Intelligence},
volume = {8},
number = {5},
pages = {46–54},
abstract = {The proposed research introduces an innovative Virtual Reality (VR) and Large Language Model (LLM) architecture to enhance the learning process across diverse educational contexts, ranging from school to industrial settings. Leveraging the capabilities of LLMs and Retrieval-Augmented Generation (RAG), the architecture centers around an immersive VR application. This application empowers students of all backgrounds to interactively engage with their environment by posing questions and receiving informative responses in text format and with visual hints in VR, thereby fostering a dynamic learning experience. LLMs with RAG act as the backbones of this architecture, facilitating the integration of private or domain-specific data into the learning process. By seamlessly connecting various data sources through data connectors, RAG overcomes the challenge of disparate and siloed information repositories, including APIs, PDFs, SQL databases, and more. The data indexes provided by RAG solutions further streamline this process by structuring the ingested data into formats optimized for consumption by LLMs. An empirical study was conducted to evaluate the effectiveness of this VR and LLM architecture. Twenty participants, divided into Experimental and Control groups, were selected to assess the impact on their learning process. The Experimental group utilized the immersive VR application, which allowed interactive engagement with the educational environment, while the Control group followed traditional learning methods. The study revealed significant improvements in learning outcomes for the Experimental group, demonstrating the potential of integrating VR and LLMs in enhancing comprehension and engagement in learning contexts. This study presents an innovative approach that capitalizes on the synergy between LLMs and immersive VR technology, opening avenues for a transformative learning experience that transcends traditional boundaries and empowers learners across a spectrum of educational landscapes. © 2024, Universidad Internacional de la Rioja. All rights reserved.},
keywords = {large language models, RetrievalAugmented Generation, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}