AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Huang, D.; Ge, M.; Xiang, K.; Zhang, X.; Yang, H.
Privacy Preservation of Large Language Models in the Metaverse Era: Research Frontiers, Categorical Comparisons, and Future Directions Proceedings Article
In: Int J Network Manage, John Wiley and Sons Ltd, 2025, ISBN: 10557148 (ISSN).
Abstract | Links | BibTeX | Tags: Adversarial networks, Computational Linguistics, Cryptography, Differential privacies, Excel, Language Model, Large language model, large language models, Life cycle, Metaverse, Metaverses, Natural language processing systems, Natural languages, Privacy preservation, Privacy protection, Research frontiers
@inproceedings{huang_privacy_2025,
title = {Privacy Preservation of Large Language Models in the Metaverse Era: Research Frontiers, Categorical Comparisons, and Future Directions},
author = {D. Huang and M. Ge and K. Xiang and X. Zhang and H. Yang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199980257&doi=10.1002%2fnem.2292&partnerID=40&md5=2dea1caa1d31aecde3d302a908fb7dd3},
doi = {10.1002/nem.2292},
isbn = {10557148 (ISSN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int J Network Manage},
volume = {35},
publisher = {John Wiley and Sons Ltd},
abstract = {Large language models (LLMs), with their billions to trillions of parameters, excel in natural language processing, machine translation, dialog systems, and text summarization. These capabilities are increasingly pivotal in the metaverse, where they can enhance virtual interactions and environments. However, their extensive use, particularly in the metaverse's immersive platforms, raises significant privacy concerns. This paper analyzes existing privacy issues in LLMs, vital for both traditional and metaverse applications, and examines protection techniques across the entire life cycle of these models, from training to user deployment. We delve into cryptography, embedding layer encoding, differential privacy and its variants, and adversarial networks, highlighting their relevance in the metaverse context. Specifically, we explore technologies like homomorphic encryption and secure multiparty computation, which are essential for metaverse security. Our discussion on Gaussian differential privacy, Renyi differential privacy, Edgeworth accounting, and the generation of adversarial samples and loss functions emphasizes their importance in the metaverse's dynamic and interactive environments. Lastly, the paper discusses the current research status and future challenges in the security of LLMs within and beyond the metaverse, emphasizing urgent problems and potential areas for exploration. © 2024 John Wiley & Sons Ltd.},
keywords = {Adversarial networks, Computational Linguistics, Cryptography, Differential privacies, Excel, Language Model, Large language model, large language models, Life cycle, Metaverse, Metaverses, Natural language processing systems, Natural languages, Privacy preservation, Privacy protection, Research frontiers},
pubstate = {published},
tppubtype = {inproceedings}
}
Buldu, K. B.; Özdel, S.; Lau, K. H. Carrie; Wang, M.; Saad, D.; Schönborn, S.; Boch, A.; Kasneci, E.; Bozkir, E.
CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 192–197, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality
@inproceedings{buldu_cuify_2025,
title = {CUIfy the XR: An Open-Source Package to Embed LLM-Powered Conversational Agents in XR},
author = {K. B. Buldu and S. Özdel and K. H. Carrie Lau and M. Wang and D. Saad and S. Schönborn and A. Boch and E. Kasneci and E. Bozkir},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000229165&doi=10.1109%2fAIxVR63409.2025.00037&partnerID=40&md5=837b0e3425d2e5a9358bbe6c8ecb5754},
doi = {10.1109/AIxVR63409.2025.00037},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {192–197},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent developments in computer graphics, machine learning, and sensor technologies enable numerous opportunities for extended reality (XR) setups for everyday life, from skills training to entertainment. With large corporations offering affordable consumer-grade head-mounted displays (HMDs), XR will likely become pervasive, and HMDs will develop as personal devices like smartphones and tablets. However, having intelligent spaces and naturalistic interactions in XR is as important as tech-nological advances so that users grow their engagement in virtual and augmented spaces. To this end, large language model (LLM)-powered non-player characters (NPCs) with speech-to-text (STT) and text-to-speech (TTS) models bring significant advantages over conventional or pre-scripted NPCs for facilitating more natural conversational user interfaces (CUIs) in XR. This paper provides the community with an open-source, customizable, extendable, and privacy-aware Unity package, CUIfy, that facili-tates speech-based NPC-user interaction with widely used LLMs, STT, and TTS models. Our package also supports multiple LLM-powered NPCs per environment and minimizes latency between different computational models through streaming to achieve us-able interactions between users and NPCs. We publish our source code in the following repository: https://gitlab.lrz.de/hctl/cuify © 2025 IEEE.},
keywords = {Augmented Reality, Computational Linguistics, Conversational user interface, conversational user interfaces, Extended reality, Head-mounted-displays, Helmet mounted displays, Language Model, Large language model, large language models, Non-player character, non-player characters, Open source software, Personnel training, Problem oriented languages, Speech models, Speech-based interaction, Text to speech, Unity, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Ding, S.; Chen, Y.
RAG-VR: Leveraging Retrieval-Augmented Generation for 3D Question Answering in VR Environments Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 131–136, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: Ambient intelligence, Computational Linguistics, Computer interaction, Computing methodologies, Computing methodologies-Artificial intelligence-Natural language processing-Natural language generation, Computing methodology-artificial intelligence-natural language processing-natural language generation, Data handling, Formal languages, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Language Model, Language processing, Natural language generation, Natural language processing systems, Natural languages, Virtual Reality, Word processing
@inproceedings{ding_rag-vr_2025,
title = {RAG-VR: Leveraging Retrieval-Augmented Generation for 3D Question Answering in VR Environments},
author = {S. Ding and Y. Chen},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005140593&doi=10.1109%2fVRW66409.2025.00034&partnerID=40&md5=36dc5fef97aeea4d6e183c83ce9fcd89},
doi = {10.1109/VRW66409.2025.00034},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {131–136},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent advances in large language models (LLMs) provide new opportunities for context understanding in virtual reality (VR). However, VR contexts are often highly localized and personalized, limiting the effectiveness of general-purpose LLMs. To address this challenge, we present RAG-VR, the first 3D question-answering system for VR that incorporates retrieval-augmented generation (RAG), which augments an LLM with external knowledge retrieved from a localized knowledge database to improve the answer quality. RAG-VR includes a pipeline for extracting comprehensive knowledge about virtual environments and user conditions for accurate answer generation. To ensure efficient retrieval, RAG-VR offloads the retrieval process to a nearby edge server and uses only essential information during retrieval. Moreover, we train the retriever to effectively distinguish among relevant, irrelevant, and hard-to-differentiate information in relation to questions. RAG-VR improves answer accuracy by 17.9%-41.8% and reduces end-to-end latency by 34.5%-47.3% compared with two baseline systems. © 2025 IEEE.},
keywords = {Ambient intelligence, Computational Linguistics, Computer interaction, Computing methodologies, Computing methodologies-Artificial intelligence-Natural language processing-Natural language generation, Computing methodology-artificial intelligence-natural language processing-natural language generation, Data handling, Formal languages, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Language Model, Language processing, Natural language generation, Natural language processing systems, Natural languages, Virtual Reality, Word processing},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Gkournelos, C.; Konstantinou, C.; Angelakis, P.; Michalos, G.; Makris, S.
Enabling Seamless Human-Robot Collaboration in Manufacturing Using LLMs Proceedings Article
In: A., Wagner; K., Alexopoulos; S., Makris (Ed.): Lect. Notes Mech. Eng., pp. 81–89, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 21954356 (ISSN); 978-303157495-5 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Collaboration capabilities, Computational Linguistics, Human operator, Human-Robot Collaboration, Industrial research, Industrial robots, Intelligent robots, Language Model, Large language model, large language models, Manufacturing environments, Programming robots, Reality interface, Research papers, Robot programming, User friendly
@inproceedings{gkournelos_enabling_2024,
title = {Enabling Seamless Human-Robot Collaboration in Manufacturing Using LLMs},
author = {C. Gkournelos and C. Konstantinou and P. Angelakis and G. Michalos and S. Makris},
editor = {Wagner A. and Alexopoulos K. and Makris S.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199196139&doi=10.1007%2f978-3-031-57496-2_9&partnerID=40&md5=cd0b33b3c9e9f9e53f1e99882945e134},
doi = {10.1007/978-3-031-57496-2_9},
isbn = {21954356 (ISSN); 978-303157495-5 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Lect. Notes Mech. Eng.},
pages = {81–89},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {In the era of Industry 5.0, there is a growing interest in harnessing the potential of human-robot collaboration (HRC) in manufacturing environments. This research paper focuses on the integration of Large Language Models (LLMs) to augment HRC capabilities, particularly in addressing configuration issues when programming robots to collaborate with human operators. By harnessing the capabilities of LLMs in combination with a user-friendly augmented reality (AR) interface, the proposed approach empowers human operators to seamlessly collaborate with robots, facilitating smooth and efficient assembly processes. This research introduces the CollabAI an AI assistant for task management and natural communication based on a fine-tuned GPT model focusing on collaborative manufacturing. Real-world experiments conducted in two manufacturing settings coming from the automotive and machinery industries. The findings have implications for various industries seeking to increase productivity and foster a new era of efficient and effective collaboration in manufacturing environments. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {Artificial intelligence, Augmented Reality, Collaboration capabilities, Computational Linguistics, Human operator, Human-Robot Collaboration, Industrial research, Industrial robots, Intelligent robots, Language Model, Large language model, large language models, Manufacturing environments, Programming robots, Reality interface, Research papers, Robot programming, User friendly},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, X. B.; Li, J. N.; Kim, D.; Chen, X.; Du, R.
Human I/O: Towards a Unified Approach to Detecting Situational Impairments Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 979-840070330-0 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Computational Linguistics, Context awareness, Context- awareness, In contexts, Language Model, Large language model, large language models, Multi tasking, Multimodal sensing, Situational impairment, situational impairments, Specific tasks, Unified approach, User interfaces, Users' experiences, Video recording
@inproceedings{liu_human_2024,
title = {Human I/O: Towards a Unified Approach to Detecting Situational Impairments},
author = {X. B. Liu and J. N. Li and D. Kim and X. Chen and R. Du},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194891045&doi=10.1145%2f3613904.3642065&partnerID=40&md5=01b3ece7c1bc2a758126fce88a15d14e},
doi = {10.1145/3613904.3642065},
isbn = {979-840070330-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Situationally Induced Impairments and Disabilities (SIIDs) can significantly hinder user experience in contexts such as poor lighting, noise, and multi-tasking. While prior research has introduced algorithms and systems to address these impairments, they predominantly cater to specific tasks or environments and fail to accommodate the diverse and dynamic nature of SIIDs. We introduce Human I/O, a unified approach to detecting a wide range of SIIDs by gauging the availability of human input/output channels. Leveraging egocentric vision, multimodal sensing and reasoning with large language models, Human I/O achieves a 0.22 mean absolute error and a 82% accuracy in availability prediction across 60 in-the-wild egocentric video recordings in 32 different scenarios. Furthermore, while the core focus of our work is on the detection of SIIDs rather than the creation of adaptive user interfaces, we showcase the efficacy of our prototype via a user study with 10 participants. Findings suggest that Human I/O significantly reduces effort and improves user experience in the presence of SIIDs, paving the way for more adaptive and accessible interactive systems in the future. © 2024 Copyright held by the owner/author(s)},
keywords = {Augmented Reality, Computational Linguistics, Context awareness, Context- awareness, In contexts, Language Model, Large language model, large language models, Multi tasking, Multimodal sensing, Situational impairment, situational impairments, Specific tasks, Unified approach, User interfaces, Users' experiences, Video recording},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, Z.; Zhu, Z.; Zhu, L.; Jiang, E.; Hu, X.; Peppler, K.; Ramani, K.
ClassMeta: Designing Interactive Virtual Classmate to Promote VR Classroom Participation Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 979-840070330-0 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Avatars, Behavioral Research, Classroom learning, Collaborative learning, Computational Linguistics, Condition, E-Learning, Human behaviors, Language Model, Large language model, Learning experiences, Learning systems, pedagogical agent, Pedagogical agents, Students, Three dimensional computer graphics, Virtual Reality, VR classroom
@inproceedings{liu_classmeta_2024,
title = {ClassMeta: Designing Interactive Virtual Classmate to Promote VR Classroom Participation},
author = {Z. Liu and Z. Zhu and L. Zhu and E. Jiang and X. Hu and K. Peppler and K. Ramani},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194868458&doi=10.1145%2f3613904.3642947&partnerID=40&md5=0592b2f977a2ad2e6366c6fa05808a6a},
doi = {10.1145/3613904.3642947},
isbn = {979-840070330-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Peer influence plays a crucial role in promoting classroom participation, where behaviors from active students can contribute to a collective classroom learning experience. However, the presence of these active students depends on several conditions and is not consistently available across all circumstances. Recently, Large Language Models (LLMs) such as GPT have demonstrated the ability to simulate diverse human behaviors convincingly due to their capacity to generate contextually coherent responses based on their role settings. Inspired by this advancement in technology, we designed ClassMeta, a GPT-4 powered agent to help promote classroom participation by playing the role of an active student. These agents, which are embodied as 3D avatars in virtual reality, interact with actual instructors and students with both spoken language and body gestures. We conducted a comparative study to investigate the potential of ClassMeta for improving the overall learning experience of the class. © 2024 Copyright held by the owner/author(s)},
keywords = {3D Avatars, Behavioral Research, Classroom learning, Collaborative learning, Computational Linguistics, Condition, E-Learning, Human behaviors, Language Model, Large language model, Learning experiences, Learning systems, pedagogical agent, Pedagogical agents, Students, Three dimensional computer graphics, Virtual Reality, VR classroom},
pubstate = {published},
tppubtype = {inproceedings}
}
Pester, A.; Tammaa, A.; Gütl, C.; Steinmaurer, A.; El-Seoud, S. A.
Conversational Agents, Virtual Worlds, and Beyond: A Review of Large Language Models Enabling Immersive Learning Proceedings Article
In: IEEE Global Eng. Edu. Conf., EDUCON, IEEE Computer Society, 2024, ISBN: 21659559 (ISSN); 979-835039402-3 (ISBN).
Abstract | Links | BibTeX | Tags: Computational Linguistics, Computer aided instruction, Conversational Agents, Education, Immersive learning, Language Model, Large language model, Learning systems, Literature reviews, LLM, Metaverse, Metaverses, Natural language processing systems, Pedagogy, Survey literature review, Virtual Reality, Virtual worlds
@inproceedings{pester_conversational_2024,
title = {Conversational Agents, Virtual Worlds, and Beyond: A Review of Large Language Models Enabling Immersive Learning},
author = {A. Pester and A. Tammaa and C. Gütl and A. Steinmaurer and S. A. El-Seoud},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199068668&doi=10.1109%2fEDUCON60312.2024.10578895&partnerID=40&md5=1b904fd8a5e06d7ced42a328c028bbb7},
doi = {10.1109/EDUCON60312.2024.10578895},
isbn = {21659559 (ISSN); 979-835039402-3 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE Global Eng. Edu. Conf., EDUCON},
publisher = {IEEE Computer Society},
abstract = {Large Language Models represent a significant breakthrough in Natural Language Processing research and opened a wide range of application domains. This paper demonstrates the successful integration of Large Language Models into immersive learning environments. The review highlights how this emerging technology aligns with pedagogical principles, enhancing the effectiveness of current educational systems. It also reflects recent advancements in integrating Large Language Models, including fine-tuning, hallucination reduction, fact-checking, and human evaluation of generated results. © 2024 IEEE.},
keywords = {Computational Linguistics, Computer aided instruction, Conversational Agents, Education, Immersive learning, Language Model, Large language model, Learning systems, Literature reviews, LLM, Metaverse, Metaverses, Natural language processing systems, Pedagogy, Survey literature review, Virtual Reality, Virtual worlds},
pubstate = {published},
tppubtype = {inproceedings}
}
Clocchiatti, A.; Fumero, N.; Soccini, A. M.
Character Animation Pipeline based on Latent Diffusion and Large Language Models Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 398–405, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037202-1 (ISBN).
Abstract | Links | BibTeX | Tags: Animation, Animation pipeline, Artificial intelligence, Augmented Reality, Character animation, Computational Linguistics, Computer animation, Deep learning, Diffusion, E-Learning, Extended reality, Film production, Generative art, Language Model, Learning systems, Learning techniques, Natural language processing systems, Pipelines, Production pipelines, Virtual Reality
@inproceedings{clocchiatti_character_2024,
title = {Character Animation Pipeline based on Latent Diffusion and Large Language Models},
author = {A. Clocchiatti and N. Fumero and A. M. Soccini},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187217072&doi=10.1109%2fAIxVR59861.2024.00067&partnerID=40&md5=d88b9ba7c80d49b60fd0d7acd5e7c4f0},
doi = {10.1109/AIxVR59861.2024.00067},
isbn = {979-835037202-1 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {398–405},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Artificial intelligence and deep learning techniques are revolutionizing the film production pipeline. The majority of the current screenplay-to-animation pipelines focus on understanding the screenplay through natural language processing techniques, and on the generation of the animation through custom engines, missing the possibility to customize the characters. To address these issues, we propose a high-level pipeline for generating 2D characters and animations starting from screenplays, through a combination of Latent Diffusion Models and Large Language Models. Our approach uses ChatGPT to generate character descriptions starting from the screenplay. Then, using that data, it generates images of custom characters with Stable Diffusion and animates them according to their actions in different scenes. The proposed approach avoids well-known problems in generative AI tools such as temporal inconsistency and lack of control on the outcome. The results suggest that the pipeline is consistent and reliable, benefiting industries ranging from film production to virtual, augmented and extended reality content creation. © 2024 IEEE.},
keywords = {Animation, Animation pipeline, Artificial intelligence, Augmented Reality, Character animation, Computational Linguistics, Computer animation, Deep learning, Diffusion, E-Learning, Extended reality, Film production, Generative art, Language Model, Learning systems, Learning techniques, Natural language processing systems, Pipelines, Production pipelines, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Weerasinghe, K.; Janapati, S.; Ge, X.; Kim, S.; Iyer, S.; Stankovic, J. A.; Alemzadeh, H.
Real-Time Multimodal Cognitive Assistant for Emergency Medical Services Proceedings Article
In: Proc. - ACM/IEEE Conf. Internet-of-Things Des. Implement., IoTDI, pp. 85–96, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037025-6 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Cognitive Assistance, Computational Linguistics, Decision making, Domain knowledge, Edge computing, Emergency medical services, Forecasting, Graphic methods, Language Model, machine learning, Machine-learning, Multi-modal, Real- time, Service protocols, Smart Health, Speech recognition, State of the art
@inproceedings{weerasinghe_real-time_2024,
title = {Real-Time Multimodal Cognitive Assistant for Emergency Medical Services},
author = {K. Weerasinghe and S. Janapati and X. Ge and S. Kim and S. Iyer and J. A. Stankovic and H. Alemzadeh},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85197769304&doi=10.1109%2fIoTDI61053.2024.00012&partnerID=40&md5=a3b7cf14e46ecb2d4e49905fb845f2c9},
doi = {10.1109/IoTDI61053.2024.00012},
isbn = {979-835037025-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - ACM/IEEE Conf. Internet-of-Things Des. Implement., IoTDI},
pages = {85–96},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Emergency Medical Services (EMS) responders often operate under time-sensitive conditions, facing cognitive overload and inherent risks, requiring essential skills in critical thinking and rapid decision-making. This paper presents CognitiveEMS, an end-to-end wearable cognitive assistant system that can act as a collaborative virtual partner engaging in the real-time acquisition and analysis of multimodal data from an emergency scene and interacting with EMS responders through Augmented Reality (AR) smart glasses. CognitiveEMS processes the continuous streams of data in real-time and leverages edge computing to provide assistance in EMS protocol selection and intervention recognition. We address key technical challenges in real-time cognitive assistance by introducing three novel components: (i) a Speech Recognition model that is fine-tuned for real-world medical emergency conversations using simulated EMS audio recordings, augmented with synthetic data generated by large language models (LLMs); (ii) an EMS Protocol Prediction model that combines state-of-the-art (SOTA) tiny language models with EMS domain knowledge using graph-based attention mechanisms; (iii) an EMS Action Recognition module which leverages multimodal audio and video data and protocol predictions to infer the intervention/treatment actions taken by the responders at the incident scene. Our results show that for speech recognition we achieve superior performance compared to SOTA (WER of 0.290 vs. 0.618) on conversational data. Our protocol prediction component also significantly outperforms SOTA (top-3 accuracy of 0.800 vs. 0.200) and the action recognition achieves an accuracy of 0.727, while maintaining an end-to-end latency of 3.78s for protocol prediction on the edge and 0.31s on the server. © 2024 IEEE.},
keywords = {Artificial intelligence, Augmented Reality, Cognitive Assistance, Computational Linguistics, Decision making, Domain knowledge, Edge computing, Emergency medical services, Forecasting, Graphic methods, Language Model, machine learning, Machine-learning, Multi-modal, Real- time, Service protocols, Smart Health, Speech recognition, State of the art},
pubstate = {published},
tppubtype = {inproceedings}
}
Guo, Y.; Hou, K.; Yan, Z.; Chen, H.; Xing, G.; Jiang, X.
Sensor2Scene: Foundation Model-Driven Interactive Realities Proceedings Article
In: Proc. - IEEE Int. Workshop Found. Model. Cyber-Phys. Syst. Internet Things, FMSys, pp. 13–19, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835036345-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Augmented Reality, Computational Linguistics, Data integration, Data visualization, Foundation models, Generative model, Language Model, Large language model, large language models, Model-driven, Sensor Data Integration, Sensors data, Text-to-3d generative model, Text-to-3D Generative Models, Three dimensional computer graphics, User interaction, User Interaction in AR, User interaction in augmented reality, User interfaces, Virtual Reality, Visualization
@inproceedings{guo_sensor2scene_2024,
title = {Sensor2Scene: Foundation Model-Driven Interactive Realities},
author = {Y. Guo and K. Hou and Z. Yan and H. Chen and G. Xing and X. Jiang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199893762&doi=10.1109%2fFMSys62467.2024.00007&partnerID=40&md5=c3bf1739e8c1dc6227d61609ddc66910},
doi = {10.1109/FMSys62467.2024.00007},
isbn = {979-835036345-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Workshop Found. Model. Cyber-Phys. Syst. Internet Things, FMSys},
pages = {13–19},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Augmented Reality (AR) is acclaimed for its potential to bridge the physical and virtual worlds. Yet, current integration between these realms often lacks a deep under-standing of the physical environment and the subsequent scene generation that reflects this understanding. This research introduces Sensor2Scene, a novel system framework designed to enhance user interactions with sensor data through AR. At its core, an AI agent leverages large language models (LLMs) to decode subtle information from sensor data, constructing detailed scene descriptions for visualization. To enable these scenes to be rendered in AR, we decompose the scene creation process into tasks of text-to-3D model generation and spatial composition, allowing new AR scenes to be sketched from the descriptions. We evaluated our framework using an LLM evaluator based on five metrics on various datasets to examine the correlation between sensor readings and corresponding visualizations, and demonstrated the system's effectiveness with scenes generated from end-to-end. The results highlight the potential of LLMs to understand IoT sensor data. Furthermore, generative models can aid in transforming these interpretations into visual formats, thereby enhancing user interaction. This work not only displays the capabilities of Sensor2Scene but also lays a foundation for advancing AR with the goal of creating more immersive and contextually rich experiences. © 2024 IEEE.},
keywords = {3D modeling, Augmented Reality, Computational Linguistics, Data integration, Data visualization, Foundation models, Generative model, Language Model, Large language model, large language models, Model-driven, Sensor Data Integration, Sensors data, Text-to-3d generative model, Text-to-3D Generative Models, Three dimensional computer graphics, User interaction, User Interaction in AR, User interaction in augmented reality, User interfaces, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, M.; M'Hiri, F.
Beyond Traditional Teaching: Large Language Models as Simulated Teaching Assistants in Computer Science Proceedings Article
In: SIGCSE - Proc. ACM Tech. Symp. Comput. Sci. Educ., pp. 743–749, Association for Computing Machinery, Inc, 2024, ISBN: 979-840070423-9 (ISBN).
Abstract | Links | BibTeX | Tags: Adaptive teaching, ChatGPT, Computational Linguistics, CS education, E-Learning, Education computing, Engineering education, GPT, Language Model, LLM, machine learning, Machine-learning, Novice programmer, novice programmers, Openai, Programming, Python, Students, Teaching, Virtual Reality
@inproceedings{liu_beyond_2024,
title = {Beyond Traditional Teaching: Large Language Models as Simulated Teaching Assistants in Computer Science},
author = {M. Liu and F. M'Hiri},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85189289344&doi=10.1145%2f3626252.3630789&partnerID=40&md5=44ec79c8f005f4551c820c61f5b5d435},
doi = {10.1145/3626252.3630789},
isbn = {979-840070423-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {SIGCSE - Proc. ACM Tech. Symp. Comput. Sci. Educ.},
volume = {1},
pages = {743–749},
publisher = {Association for Computing Machinery, Inc},
abstract = {As the prominence of Large Language Models (LLMs) grows in various sectors, their potential in education warrants exploration. In this study, we investigate the feasibility of employing GPT-3.5 from OpenAI, as an LLM teaching assistant (TA) or a virtual TA in computer science (CS) courses. The objective is to enhance the accessibility of CS education while maintaining academic integrity by refraining from providing direct solutions to current-semester assignments. Targeting Foundations of Programming (COMP202), an undergraduate course that introduces students to programming with Python, we have developed a virtual TA using the LangChain framework, known for integrating language models with diverse data sources and environments. The virtual TA assists students with their code and clarifies complex concepts. For homework questions, it is designed to guide students with hints rather than giving out direct solutions. We assessed its performance first through a qualitative evaluation, then a survey-based comparative analysis, using a mix of questions commonly asked on the COMP202 discussion board and questions created by the authors. Our preliminary results indicate that the virtual TA outperforms human TAs on clarity and engagement, matching them on accuracy when the question is non-assignment-specific, for which human TAs still proved more reliable. These findings suggest that while virtual TAs, leveraging the capabilities of LLMs, hold great promise towards making CS education experience more accessible and engaging, their optimal use necessitates human supervision. We conclude by identifying several directions that could be explored in future implementations. © 2024 ACM.},
keywords = {Adaptive teaching, ChatGPT, Computational Linguistics, CS education, E-Learning, Education computing, Engineering education, GPT, Language Model, LLM, machine learning, Machine-learning, Novice programmer, novice programmers, Openai, Programming, Python, Students, Teaching, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Torre, F. De La; Fang, C. M.; Huang, H.; Banburski-Fahey, A.; Fernandez, J. A.; Lanier, J.
LLMR: Real-time Prompting of Interactive Worlds using Large Language Models Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 979-840070330-0 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Computational Linguistics, Design goal, Interactive computer graphics, Interactive worlds, Internal dynamics, Language Model, Large language model, Mixed reality, Novel strategies, Real- time, Spatial Reasoning, Training data
@inproceedings{de_la_torre_llmr_2024,
title = {LLMR: Real-time Prompting of Interactive Worlds using Large Language Models},
author = {F. De La Torre and C. M. Fang and H. Huang and A. Banburski-Fahey and J. A. Fernandez and J. Lanier},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194848276&doi=10.1145%2f3613904.3642579&partnerID=40&md5=14969e96507a1f0110262021e5b1172d},
doi = {10.1145/3613904.3642579},
isbn = {979-840070330-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {We present Large Language Model for Mixed Reality (LLMR), a framework for the real-time creation and modification of interactive Mixed Reality experiences using LLMs. LLMR leverages novel strategies to tackle difficult cases where ideal training data is scarce, or where the design goal requires the synthesis of internal dynamics, intuitive analysis, or advanced interactivity. Our framework relies on text interaction and the Unity game engine. By incorporating techniques for scene understanding, task planning, self-debugging, and memory management, LLMR outperforms the standard GPT-4 by 4x in average error rate. We demonstrate LLMR's cross-platform interoperability with several example worlds, and evaluate it on a variety of creation and modification tasks to show that it can produce and edit diverse objects, tools, and scenes. Finally, we conducted a usability study (N=11) with a diverse set that revealed participants had positive experiences with the system and would use it again. © 2024 Copyright held by the owner/author(s)},
keywords = {Artificial intelligence, Computational Linguistics, Design goal, Interactive computer graphics, Interactive worlds, Internal dynamics, Language Model, Large language model, Mixed reality, Novel strategies, Real- time, Spatial Reasoning, Training data},
pubstate = {published},
tppubtype = {inproceedings}
}
Krauss, C.; Bassbouss, L.; Upravitelev, M.; An, T. -S.; Altun, D.; Reray, L.; Balitzki, E.; Tamimi, T. El; Karagülle, M.
Opportunities and Challenges in Developing Educational AI-Assistants for the Metaverse Proceedings Article
In: R.A., Sottilare; J., Schwarz (Ed.): Lect. Notes Comput. Sci., pp. 219–238, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 03029743 (ISSN); 978-303160608-3 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, AI-assistant, AI-Assistants, Computational Linguistics, Computer aided instruction, Concept-based, E-Learning, Education, Interoperability, Language Model, Large language model, large language models, Learning Environments, Learning systems, Learning Technologies, Learning technology, LLM, Metaverse, Metaverses, Natural language processing systems, Proof of concept, User interfaces, Virtual assistants, Virtual Reality
@inproceedings{krauss_opportunities_2024,
title = {Opportunities and Challenges in Developing Educational AI-Assistants for the Metaverse},
author = {C. Krauss and L. Bassbouss and M. Upravitelev and T. -S. An and D. Altun and L. Reray and E. Balitzki and T. El Tamimi and M. Karagülle},
editor = {Sottilare R.A. and Schwarz J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85196214138&doi=10.1007%2f978-3-031-60609-0_16&partnerID=40&md5=9a66876cb30e9e5d287a86e6cfa66e05},
doi = {10.1007/978-3-031-60609-0_16},
isbn = {03029743 (ISSN); 978-303160608-3 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {14727 LNCS},
pages = {219–238},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {The paper explores the opportunities and challenges for metaverse learning environments with AI-Assistants based on Large Language Models. A proof of concept based on popular but proprietary technologies is presented that enables a natural language exchange between the user and an AI-based medical expert in a highly immersive environment based on the Unreal Engine. The answers generated by ChatGPT are not only played back lip-synchronously, but also visualized in the VR environment using a 3D model of a skeleton. Usability and user experience play a particularly important role in the development of the highly immersive AI-Assistant. The proof of concept serves to illustrate the opportunities and challenges that lie in the merging of large language models, metaverse applications and educational ecosystems, which are self-contained research areas. Development strategies, tools and interoperability standards will be presented to facilitate future developments in this triangle of tension. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {3D modeling, AI-assistant, AI-Assistants, Computational Linguistics, Computer aided instruction, Concept-based, E-Learning, Education, Interoperability, Language Model, Large language model, large language models, Learning Environments, Learning systems, Learning Technologies, Learning technology, LLM, Metaverse, Metaverses, Natural language processing systems, Proof of concept, User interfaces, Virtual assistants, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Omirgaliyev, R.; Kenzhe, D.; Mirambekov, S.
Simulating life: the application of generative agents in virtual environments Proceedings Article
In: IEEE AITU: Digit. Gener., Conf. Proc. - AITU, pp. 181–187, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835036437-8 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Artificial intelligence agent, Artificial Intelligence Agents, Autonomous agents, Behavioral Research, Behaviour models, Computational Linguistics, Decision making, Dynamics, Dynamics simulation, Economic and social effects, Game Development, Game environment, Language Model, Large language model, large language models, Modeling languages, Social dynamic simulation, Social dynamics, Social Dynamics Simulation, Software design, Virtual Reality, Virtual Societies
@inproceedings{omirgaliyev_simulating_2024,
title = {Simulating life: the application of generative agents in virtual environments},
author = {R. Omirgaliyev and D. Kenzhe and S. Mirambekov},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199876250&doi=10.1109%2fIEEECONF61558.2024.10585387&partnerID=40&md5=70f8b598d10bec13c39d3506a15534a1},
doi = {10.1109/IEEECONF61558.2024.10585387},
isbn = {979-835036437-8 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE AITU: Digit. Gener., Conf. Proc. - AITU},
pages = {181–187},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This research explores the innovative integration of Large Language Models (LLMs) in game development, focusing on the autonomous creation, development, and governance of a virtual village by AI agents within a 2D game environment. The core of this study lies in observing and analyzing the interactions and societal development among AI agents, utilizing advanced algorithms for generative behavior modeling and dynamic skill tree learning. These AI agents are endowed with human-like decision-making capabilities, enabled by LLMs, allowing them to engage in complex social interactions and contribute to emergent societal structures within the game. The uniqueness of this project stems from its approach to simulating lifelike social dynamics in a virtual setting, thus addressing a gap in existing research and marking a significant contribution to the interdisciplinary fields of artificial intelligence and game development. By comparing AI-generated societal behaviors with human social interactions, the study delves into the potential of AI to mirror or enhance human social structures, offering a fresh perspective on the capabilities of AI in game development. This research not only aims to push the boundaries of AI applications in game development but also seeks to provide valuable insights into the potential for AI-driven simulations in studying complex social and behavioral dynamics. ©2024 IEEE.},
keywords = {Artificial intelligence, Artificial intelligence agent, Artificial Intelligence Agents, Autonomous agents, Behavioral Research, Behaviour models, Computational Linguistics, Decision making, Dynamics, Dynamics simulation, Economic and social effects, Game Development, Game environment, Language Model, Large language model, large language models, Modeling languages, Social dynamic simulation, Social dynamics, Social Dynamics Simulation, Software design, Virtual Reality, Virtual Societies},
pubstate = {published},
tppubtype = {inproceedings}
}
Saddik, A. E.; Ghaboura, S.
The Integration of ChatGPT With the Metaverse for Medical Consultations Journal Article
In: IEEE Consumer Electronics Magazine, vol. 13, no. 3, pp. 6–15, 2024, ISSN: 21622248 (ISSN).
Abstract | Links | BibTeX | Tags: Chatbots, Computational Linguistics, Cutting edges, Diagnosis, Health care, Healthcare delivery, Healthcare environments, Human like, Immersive, Language Model, Medical diagnostic imaging, Medical Imaging, Medical services, Metaverses
@article{saddik_integration_2024,
title = {The Integration of ChatGPT With the Metaverse for Medical Consultations},
author = {A. E. Saddik and S. Ghaboura},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85174844304&doi=10.1109%2fMCE.2023.3324978&partnerID=40&md5=ce0da4988d06258a1bc695e2d4ac4677},
doi = {10.1109/MCE.2023.3324978},
issn = {21622248 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {IEEE Consumer Electronics Magazine},
volume = {13},
number = {3},
pages = {6–15},
abstract = {Recent years witnessed a promising synergy between healthcare and the Metaverse leading to the development of virtual healthcare environments. This convergence offers accessible and immersive healthcare experiences and holds the potential for transforming the delivery of medical services and enhancing patient outcomes. However, the reliance on specialist presence in the metaverse for medical support remains a challenge. On the other hand, the newly launched large language model chatbot, the ChatGPT of OpenAI, has emerged as a game-changer, providing human-like responses and facilitating interactive conversations. By integrating this cutting-edge language model with the Metaverse for medical purposes, we can potentially revolutionize healthcare delivery, enhance access to care, and increase patient engagement. This study proposes a new medical Metaverse model utilizing GPT-4 as a content creator, highlighting its potential, addressing challenges and limitations, and exploring various application fields. We conclude by outlining our ongoing efforts to transform this concept into a practical reality. © 2012 IEEE.},
keywords = {Chatbots, Computational Linguistics, Cutting edges, Diagnosis, Health care, Healthcare delivery, Healthcare environments, Human like, Immersive, Language Model, Medical diagnostic imaging, Medical Imaging, Medical services, Metaverses},
pubstate = {published},
tppubtype = {article}
}
Kapadia, N.; Gokhale, S.; Nepomuceno, A.; Cheng, W.; Bothwell, S.; Mathews, M.; Shallat, J. S.; Schultz, C.; Gupta, A.
Evaluation of Large Language Model Generated Dialogues for an AI Based VR Nurse Training Simulator Proceedings Article
In: J.Y.C., Chen; G., Fragomeni (Ed.): Lect. Notes Comput. Sci., pp. 200–212, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 03029743 (ISSN); 978-303161040-0 (ISBN).
Abstract | Links | BibTeX | Tags: Bard, ChatGPT, ClaudeAI, Clinical research, Computational Linguistics, Dialogue Generation, Dialogue generations, Education computing, Extended reality, Health care education, Healthcare Education, Language Model, Language processing, Large language model, large language models, Natural Language Processing, Natural language processing systems, Natural languages, Nurse Training Simulation, Nursing, Patient avatar, Patient Avatars, Semantics, Students, Training simulation, Virtual Reality
@inproceedings{kapadia_evaluation_2024,
title = {Evaluation of Large Language Model Generated Dialogues for an AI Based VR Nurse Training Simulator},
author = {N. Kapadia and S. Gokhale and A. Nepomuceno and W. Cheng and S. Bothwell and M. Mathews and J. S. Shallat and C. Schultz and A. Gupta},
editor = {Chen J.Y.C. and Fragomeni G.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85196200653&doi=10.1007%2f978-3-031-61041-7_13&partnerID=40&md5=8890a8d0c289fdf6e7ab82e105249097},
doi = {10.1007/978-3-031-61041-7_13},
isbn = {03029743 (ISSN); 978-303161040-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {14706 LNCS},
pages = {200–212},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper explores the efficacy of Large Language Models (LLMs) in generating dialogues for patient avatars in Virtual Reality (VR) nurse training simulators. With the integration of technology in healthcare education evolving rapidly, the potential of NLP to enhance nurse training through realistic patient interactions presents a significant opportunity. Our study introduces a novel LLM-based dialogue generation system, leveraging models such as ChatGPT, GoogleBard, and ClaudeAI. We detail the development of our script generation system, which was a collaborative endeavor involving nurses, technical artists, and developers. The system, tested on the Meta Quest 2 VR headset, integrates complex dialogues created through a synthesis of clinical expertise and advanced NLP, aimed at simulating real-world nursing scenarios. Through a comprehensive evaluation involving lexical and semantic similarity tests compared to clinical expert-generated scripts, we assess the potential of LLMs as suitable alternatives for script generation. The findings aim to contribute to the development of a more interactive and effective VR nurse training simulator, enhancing communication skills among nursing students for improved patient care outcomes. This research underscores the importance of advanced NLP applications in healthcare education, offering insights into the practicality and limitations of employing LLMs in clinical training environments. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {Bard, ChatGPT, ClaudeAI, Clinical research, Computational Linguistics, Dialogue Generation, Dialogue generations, Education computing, Extended reality, Health care education, Healthcare Education, Language Model, Language processing, Large language model, large language models, Natural Language Processing, Natural language processing systems, Natural languages, Nurse Training Simulation, Nursing, Patient avatar, Patient Avatars, Semantics, Students, Training simulation, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Patel, P.; Goiri, Í.; Choukse, E.; Warrier, B.; Bianchini, R.; Zhang, C.; Mahalingam, N.
Characterizing Power Management Opportunities for LLMs in the Cloud Proceedings Article
In: Int Conf Archit Support Program Lang Oper Syst ASPLOS, pp. 207–222, Association for Computing Machinery, 2024, ISBN: 979-840070386-7 (ISBN).
Abstract | Links | BibTeX | Tags: Cloud, Cloud providers, Computational Linguistics, Computing power, Consumption patterns, Datacenter, datacenters, Electric power utilization, GPUs, Language Model, Large language model, large language models, Model inference, Power, Power management, Power oversubscription, Power usage, Profiling, Program processors, Virtual Reality
@inproceedings{patel_characterizing_2024,
title = {Characterizing Power Management Opportunities for LLMs in the Cloud},
author = {P. Patel and Í. Goiri and E. Choukse and B. Warrier and R. Bianchini and C. Zhang and N. Mahalingam},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85192199791&doi=10.1145%2f3620666.3651329&partnerID=40&md5=6102cbb096a789e297711420d4b8427a},
doi = {10.1145/3620666.3651329},
isbn = {979-840070386-7 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Int Conf Archit Support Program Lang Oper Syst ASPLOS},
volume = {3},
pages = {207–222},
publisher = {Association for Computing Machinery},
abstract = {Recent innovation in large language models (LLMs), and their myriad use cases have rapidly driven up the compute demand for datacenter GPUs. Several cloud providers and other enterprises plan to substantially grow their datacenter capacity to support these new workloads. A key bottleneck resource in datacenters is power, which LLMs are quickly saturating due to their rapidly increasing model sizes. We extensively characterize the power consumption patterns of a variety of LLMs and their configurations. We identify the differences between the training and inference power consumption patterns. Based on our analysis, we claim that the average and peak power utilization in LLM inference clusters should not be very high. Our deductions align with data from production LLM clusters, revealing that inference workloads offer substantial headroom for power oversubscription. However, the stringent set of telemetry and controls that GPUs offer in a virtualized environment make it challenging to build a reliable and robust power management framework. We leverage the insights from our characterization to identify opportunities for better power management. As a detailed use case, we propose a new framework called POLCA, which enables power oversubscription in LLM inference clouds. POLCA is robust, reliable, and readily deployable. Using open-source models to replicate the power patterns observed in production, we simulate POLCA and demonstrate that we can deploy 30% more servers in existing clusters with minimal performance loss. © 2024 Copyright held by the owner/author(s).},
keywords = {Cloud, Cloud providers, Computational Linguistics, Computing power, Consumption patterns, Datacenter, datacenters, Electric power utilization, GPUs, Language Model, Large language model, large language models, Model inference, Power, Power management, Power oversubscription, Power usage, Profiling, Program processors, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Xu, F.; Nguyen, T.; Du, J.
Augmented Reality for Maintenance Tasks with ChatGPT for Automated Text-To-Action Journal Article
In: Journal of Construction Engineering and Management, vol. 150, no. 4, 2024, ISSN: 07339364 (ISSN).
Abstract | Links | BibTeX | Tags: Artificial intelligence systems, Augmented Reality, Augmented Reality (AR), ChatGPT, Complex sequences, Computational Linguistics, Diverse fields, Human like, Language Model, Maintenance, Maintenance tasks, Operations and maintenance, Optical character recognition, Sensor technologies, Virtual Reality
@article{xu_augmented_2024,
title = {Augmented Reality for Maintenance Tasks with ChatGPT for Automated Text-To-Action},
author = {F. Xu and T. Nguyen and J. Du},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85183669638&doi=10.1061%2fJCEMD4.COENG-14142&partnerID=40&md5=6b02d2f4f6e74a8152adf2eb30ee2d88},
doi = {10.1061/JCEMD4.COENG-14142},
issn = {07339364 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {Journal of Construction Engineering and Management},
volume = {150},
number = {4},
abstract = {Advancements in sensor technology, artificial intelligence (AI), and augmented reality (AR) have unlocked opportunities across various domains. AR and large language models like GPT have witnessed substantial progress and increasingly are being employed in diverse fields. One such promising application is in operations and maintenance (OM). OM tasks often involve complex procedures and sequences that can be challenging to memorize and execute correctly, particularly for novices or in high-stress situations. By combining the advantages of superimposing virtual objects onto the physical world and generating human-like text using GPT, we can revolutionize OM operations. This study introduces a system that combines AR, optical character recognition (OCR), and the GPT language model to optimize user performance while offering trustworthy interactions and alleviating workload in OM tasks. This system provides an interactive virtual environment controlled by the Unity game engine, facilitating a seamless interaction between virtual and physical realities. A case study (N=30) was conducted to illustrate the findings and answer the research questions. The Multidimensional Measurement of Trust (MDMT) was applied to understand the complexity of trust engagement with such a human-like system. The results indicate that users can complete similarly challenging tasks in less time using our proposed AR and AI system. Moreover, the collected data also suggest a reduction in cognitive load when executing the same operations using the AR and AI system. A divergence of trust was observed concerning capability and ethical dimensions. © 2024 American Society of Civil Engineers.},
keywords = {Artificial intelligence systems, Augmented Reality, Augmented Reality (AR), ChatGPT, Complex sequences, Computational Linguistics, Diverse fields, Human like, Language Model, Maintenance, Maintenance tasks, Operations and maintenance, Optical character recognition, Sensor technologies, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Hong, J.; Lee, Y.; Kim, D. H.; Choi, D.; Yoon, Y. -J.; Lee, G. -C.; Lee, Z.; Kim, J.
A Context-Aware Onboarding Agent for Metaverse Powered by Large Language Models Proceedings Article
In: A., Vallgarda; L., Jonsson; J., Fritsch; S.F., Alaoui; C.A., Le Dantec (Ed.): Proc. ACM Des. Interact. Syst. Conf., pp. 1857–1874, Association for Computing Machinery, Inc, 2024, ISBN: 979-840070583-0 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Computational Linguistics, Context- awareness, Context-Aware, context-awareness, conversational agent, Conversational Agents, Divergents, Language Model, Large-language model, large-language models, Metaverse, Metaverses, Model-based OPC, Onboarding, User interfaces, Virtual Reality
@inproceedings{hong_context-aware_2024,
title = {A Context-Aware Onboarding Agent for Metaverse Powered by Large Language Models},
author = {J. Hong and Y. Lee and D. H. Kim and D. Choi and Y. -J. Yoon and G. -C. Lee and Z. Lee and J. Kim},
editor = {Vallgarda A. and Jonsson L. and Fritsch J. and Alaoui S.F. and Le Dantec C.A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85200340104&doi=10.1145%2f3643834.3661579&partnerID=40&md5=5fe96b5155ca45c6d7a0d239b68f2b30},
doi = {10.1145/3643834.3661579},
isbn = {979-840070583-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. ACM Des. Interact. Syst. Conf.},
pages = {1857–1874},
publisher = {Association for Computing Machinery, Inc},
abstract = {One common asset of metaverse is that users can freely explore places and actions without linear procedures. Thus, it is hard yet important to understand the divergent challenges each user faces when onboarding metaverse. Our formative study (N = 16) shows that frst-time users ask questions about metaverse that concern 1) a short-term spatiotemporal context, regarding the user’s current location, recent conversation, and actions, and 2) a long-term exploration context regarding the user’s experience history. Based on the fndings, we present PICAN, a Large Language Model-based pipeline that generates context-aware answers to users when onboarding metaverse. An ablation study (N = 20) reveals that PICAN’s usage of context made responses more useful and immersive than those generated without contexts. Furthermore, a user study (N = 21) shows that the use of long-term exploration context promotes users’ learning about the locations and activities within the virtual environment. © 2024 Copyright held by the owner/author(s).},
keywords = {'current, Computational Linguistics, Context- awareness, Context-Aware, context-awareness, conversational agent, Conversational Agents, Divergents, Language Model, Large-language model, large-language models, Metaverse, Metaverses, Model-based OPC, Onboarding, User interfaces, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Scott, A. J. S.; McCuaig, F.; Lim, V.; Watkins, W.; Wang, J.; Strachan, G.
Revolutionizing Nurse Practitioner Training: Integrating Virtual Reality and Large Language Models for Enhanced Clinical Education Proceedings Article
In: G., Strudwick; N.R., Hardiker; G., Rees; R., Cook; R., Cook; Y.J., Lee (Ed.): Stud. Health Technol. Informatics, pp. 671–672, IOS Press BV, 2024, ISBN: 09269630 (ISSN); 978-164368527-4 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3d-modeling, adult, anamnesis, clinical decision making, clinical education, Clinical Simulation, Computational Linguistics, computer interface, Computer-Assisted Instruction, conference paper, Curriculum, Decision making, E-Learning, Education, Health care education, Healthcare Education, human, Humans, Language Model, Large language model, large language models, Mesh generation, Model animations, Modeling languages, nurse practitioner, Nurse Practitioners, Nursing, nursing education, nursing student, OSCE preparation, procedures, simulation, Teaching, therapy, Training, Training program, User-Computer Interface, Virtual Reality, Virtual reality training
@inproceedings{scott_revolutionizing_2024,
title = {Revolutionizing Nurse Practitioner Training: Integrating Virtual Reality and Large Language Models for Enhanced Clinical Education},
author = {A. J. S. Scott and F. McCuaig and V. Lim and W. Watkins and J. Wang and G. Strachan},
editor = {Strudwick G. and Hardiker N.R. and Rees G. and Cook R. and Cook R. and Lee Y.J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199593781&doi=10.3233%2fSHTI240272&partnerID=40&md5=90c7bd43ba978f942723e6cf1983ffb3},
doi = {10.3233/SHTI240272},
isbn = {09269630 (ISSN); 978-164368527-4 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Stud. Health Technol. Informatics},
volume = {315},
pages = {671–672},
publisher = {IOS Press BV},
abstract = {This project introduces an innovative virtual reality (VR) training program for student Nurse Practitioners, incorporating advanced 3D modeling, animation, and Large Language Models (LLMs). Designed to simulate realistic patient interactions, the program aims to improve communication, history taking, and clinical decision-making skills in a controlled, authentic setting. This abstract outlines the methods, results, and potential impact of this cutting-edge educational tool on nursing education. © 2024 The Authors.},
keywords = {3D modeling, 3D models, 3d-modeling, adult, anamnesis, clinical decision making, clinical education, Clinical Simulation, Computational Linguistics, computer interface, Computer-Assisted Instruction, conference paper, Curriculum, Decision making, E-Learning, Education, Health care education, Healthcare Education, human, Humans, Language Model, Large language model, large language models, Mesh generation, Model animations, Modeling languages, nurse practitioner, Nurse Practitioners, Nursing, nursing education, nursing student, OSCE preparation, procedures, simulation, Teaching, therapy, Training, Training program, User-Computer Interface, Virtual Reality, Virtual reality training},
pubstate = {published},
tppubtype = {inproceedings}
}
Xu, S.; Wei, Y.; Zheng, P.; Zhang, J.; Yu, C.
LLM enabled generative collaborative design in a mixed reality environment Journal Article
In: Journal of Manufacturing Systems, vol. 74, pp. 703–715, 2024, ISSN: 02786125 (ISSN).
Abstract | Links | BibTeX | Tags: Collaborative design, Collaborative design process, Communication barriers, Computational Linguistics, design, Design frameworks, generative artificial intelligence, Iterative methods, Language Model, Large language model, Mixed reality, Mixed-reality environment, Multi-modal, Visual languages
@article{xu_llm_2024,
title = {LLM enabled generative collaborative design in a mixed reality environment},
author = {S. Xu and Y. Wei and P. Zheng and J. Zhang and C. Yu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85192244873&doi=10.1016%2fj.jmsy.2024.04.030&partnerID=40&md5=3f050c429cf5a4120d10a432311f46cb},
doi = {10.1016/j.jmsy.2024.04.030},
issn = {02786125 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {Journal of Manufacturing Systems},
volume = {74},
pages = {703–715},
abstract = {In the collaborative design process, diverse stakeholder backgrounds often introduce inefficiencies in collaboration, such as delays in design delivery and decreased creativity, primarily due to misunderstandings and communication barriers caused by this diversity. To respond, this study proposes an AI-augmented Multimodal Collaborative Design (AI-MCD) framework. This framework utilizes Large Language Models (LLM) to establish an iterative prompting mechanism that provides professional design prompts for Generative AI (GAI) to generate precise visual schemes. On this basis, the GAI cooperates with Mixed Reality (MR) technology to form an interactive and immersive environment for enabling full participation in the design process. By integrating these technologies, the study aims to help stakeholders form a unified cognition and optimize the traditional collaborative design process. Through a case study involving the development of heart education products for children, the effectiveness of the framework is emphasized, and the practical application and effectiveness of the proposed method innovation are demonstrated. © 2024 The Society of Manufacturing Engineers},
keywords = {Collaborative design, Collaborative design process, Communication barriers, Computational Linguistics, design, Design frameworks, generative artificial intelligence, Iterative methods, Language Model, Large language model, Mixed reality, Mixed-reality environment, Multi-modal, Visual languages},
pubstate = {published},
tppubtype = {article}
}
Constantinides, N.; Constantinides, A.; Koukopoulos, D.; Fidas, C.; Belk, M.
CulturAI: Exploring Mixed Reality Art Exhibitions with Large Language Models for Personalized Immersive Experiences Proceedings Article
In: UMAP - Adjun. Proc. ACM Conf. User Model., Adapt. Personal., pp. 102–105, Association for Computing Machinery, Inc, 2024, ISBN: 979-840070466-6 (ISBN).
Abstract | Links | BibTeX | Tags: Computational Linguistics, Immersive, Language Model, Large language model, large language models, Mixed reality, Mixed reality art, Mixed reality technologies, Model-based OPC, User Experience Evaluation, User experience evaluations, User interfaces, User study, Users' experiences
@inproceedings{constantinides_culturai_2024,
title = {CulturAI: Exploring Mixed Reality Art Exhibitions with Large Language Models for Personalized Immersive Experiences},
author = {N. Constantinides and A. Constantinides and D. Koukopoulos and C. Fidas and M. Belk},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85198910809&doi=10.1145%2f3631700.3664874&partnerID=40&md5=952d82629a3fcdc6e2a960dd532b2c09},
doi = {10.1145/3631700.3664874},
isbn = {979-840070466-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {UMAP - Adjun. Proc. ACM Conf. User Model., Adapt. Personal.},
pages = {102–105},
publisher = {Association for Computing Machinery, Inc},
abstract = {Mixed Reality (MR) technologies have transformed the way in which we interact and engage with digital content, offering immersive experiences that blend the physical and virtual worlds. Over the past years, there has been increasing interest in employing Artificial Intelligence (AI) technologies to improve user experience and trustworthiness in cultural contexts. However, the integration of Large Language Models (LLMs) into MR applications within the Cultural Heritage (CH) domain is relatively underexplored. In this work, we present an investigation into the integration of LLMs within MR environments, focusing on the context of virtual art exhibitions. We implemented a HoloLens MR application, which enables users to explore artworks while interacting with an LLM through voice. To evaluate the user experience and perceived trustworthiness of individuals engaging with an LLM-based virtual art guide, we adopted a between-subject study design, in which participants were randomly assigned to either the LLM-based version or a control group using conventional interaction methods. The LLM-based version allows users to pose inquiries about the artwork displayed, ranging from details about the creator to information about the artwork's origin and historical significance. This paper presents the technical aspects of integrating LLMs within MR applications and evaluates the user experience and perceived trustworthiness of this approach in enhancing the exploration of virtual art exhibitions. Results of an initial evaluation provide evidence about the positive aspect of integrating LLMs in MR applications. Findings of this work contribute to the advancement of MR technologies for the development of future interactive personalized art experiences. © 2024 Owner/Author.},
keywords = {Computational Linguistics, Immersive, Language Model, Large language model, large language models, Mixed reality, Mixed reality art, Mixed reality technologies, Model-based OPC, User Experience Evaluation, User experience evaluations, User interfaces, User study, Users' experiences},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Marín-Morales, J.; Llanes-Jurado, J.; Minissi, M. E.; Gómez-Zaragozá, L.; Altozano, A.; Alcaniz, M.
Gaze and Head Movement Patterns of Depressive Symptoms During Conversations with Emotional Virtual Humans Proceedings Article
In: Int. Conf. Affect. Comput. Intell. Interact., ACII, Institute of Electrical and Electronics Engineers Inc., 2023, ISBN: 979-835032743-4 (ISBN).
Abstract | Links | BibTeX | Tags: Biomarkers, Clustering, Clusterings, Computational Linguistics, Depressive disorder, Depressive symptom, E-Learning, Emotion elicitation, Eye movements, Gaze movements, K-means clustering, Language Model, Large language model, large language models, Learning systems, Mental health, Multivariant analysis, Signal processing, Statistical learning, virtual human, Virtual humans, Virtual Reality
@inproceedings{marin-morales_gaze_2023,
title = {Gaze and Head Movement Patterns of Depressive Symptoms During Conversations with Emotional Virtual Humans},
author = {J. Marín-Morales and J. Llanes-Jurado and M. E. Minissi and L. Gómez-Zaragozá and A. Altozano and M. Alcaniz},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85184656388&doi=10.1109%2fACII59096.2023.10388134&partnerID=40&md5=143cdd8530e17a7b64bdf88f3a0496ab},
doi = {10.1109/ACII59096.2023.10388134},
isbn = {979-835032743-4 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Int. Conf. Affect. Comput. Intell. Interact., ACII},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Depressive symptoms involve dysfunctional social attitudes and heightened negative emotional states. Identifying biomarkers requires data collection in realistic environments that activate depression-specific phenomena. However, no previous research analysed biomarkers in combination with AI-powered conversational virtual humans (VH) for mental health assessment. This study aims to explore gaze and head movements patterns related to depressive symptoms during conversations with emotional VH. A total of 105 participants were evenly divided into a control group and a group of subjects with depressive symptoms (SDS). They completed six semi-guided conversations designed to evoke basic emotions. The VHs were developed using a cognitive-inspired framework, enabling real-time voice-based conversational interactions powered by a Large Language Model, and including emotional facial expressions and lip synchronization. They have embedded life-history, context, attitudes, emotions and motivations. Signal processing techniques were applied to obtain gaze and head movements features, and heatmaps were generated. Then, parametric and non-parametric statistical tests were applied to evaluate differences between groups. Additionally, a two-dimensional t-SNE embedding was created and combined with k-means clustering. Results indicate that SDS exhibited shorter blinks and longer saccades. The control group showed affiliative lateral head gyros and accelerations, while the SDS demonstrated stress-related back-and-forth movements. SDS also displayed the avoidance of eye contact. The exploratory multivariate statistical unsupervised learning achieved 72.3% accuracy. The present study analyse biomarkers in affective processes with multiple social contextual factors and information modalities in ecological environments, and enhances our understanding of gaze and head movements patterns in individuals with depressive symptoms, ultimately contributing to the development of more effective assessments and intervention strategies. © 2023 IEEE.},
keywords = {Biomarkers, Clustering, Clusterings, Computational Linguistics, Depressive disorder, Depressive symptom, E-Learning, Emotion elicitation, Eye movements, Gaze movements, K-means clustering, Language Model, Large language model, large language models, Learning systems, Mental health, Multivariant analysis, Signal processing, Statistical learning, virtual human, Virtual humans, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Leng, Z.; Kwon, H.; Ploetz, T.
Generating Virtual On-body Accelerometer Data from Virtual Textual Descriptions for Human Activity Recognition Proceedings Article
In: ISWC - Proc. Int. Symp. Wearable Comput., pp. 39–43, Association for Computing Machinery, Inc, 2023, ISBN: 979-840070199-3 (ISBN).
Abstract | Links | BibTeX | Tags: Activity recognition, Computational Linguistics, E-Learning, Human activity recognition, Language Model, Large language model, large language models, Motion estimation, Motion Synthesis, On-body, Pattern recognition, Recognition models, Textual description, Training data, Virtual IMU Data, Virtual Reality, Wearable Sensors
@inproceedings{leng_generating_2023,
title = {Generating Virtual On-body Accelerometer Data from Virtual Textual Descriptions for Human Activity Recognition},
author = {Z. Leng and H. Kwon and T. Ploetz},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85175788497&doi=10.1145%2f3594738.3611361&partnerID=40&md5=ddecaf6d81f71511c8152ca14f33cd7f},
doi = {10.1145/3594738.3611361},
isbn = {979-840070199-3 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {ISWC - Proc. Int. Symp. Wearable Comput.},
pages = {39–43},
publisher = {Association for Computing Machinery, Inc},
abstract = {The development of robust, generalized models for human activity recognition (HAR) has been hindered by the scarcity of large-scale, labeled data sets. Recent work has shown that virtual IMU data extracted from videos using computer vision techniques can lead to substantial performance improvements when training HAR models combined with small portions of real IMU data. Inspired by recent advances in motion synthesis from textual descriptions and connecting Large Language Models (LLMs) to various AI models, we introduce an automated pipeline that first uses ChatGPT to generate diverse textual descriptions of activities. These textual descriptions are then used to generate 3D human motion sequences via a motion synthesis model, T2M-GPT, and later converted to streams of virtual IMU data. We benchmarked our approach on three HAR datasets (RealWorld, PAMAP2, and USC-HAD) and demonstrate that the use of virtual IMU training data generated using our new approach leads to significantly improved HAR model performance compared to only using real IMU data. Our approach contributes to the growing field of cross-modality transfer methods and illustrate how HAR models can be improved through the generation of virtual training data that do not require any manual effort. © 2023 Owner/Author.},
keywords = {Activity recognition, Computational Linguistics, E-Learning, Human activity recognition, Language Model, Large language model, large language models, Motion estimation, Motion Synthesis, On-body, Pattern recognition, Recognition models, Textual description, Training data, Virtual IMU Data, Virtual Reality, Wearable Sensors},
pubstate = {published},
tppubtype = {inproceedings}
}
Bottega, J. A.; Kich, V. A.; Jesus, J. C.; Steinmetz, R.; Kolling, A. H.; Grando, R. B.; Guerra, R. S.; Gamarra, D. F. T.
Jubileo: An Immersive Simulation Framework for Social Robot Design Journal Article
In: Journal of Intelligent and Robotic Systems: Theory and Applications, vol. 109, no. 4, 2023, ISSN: 09210296 (ISSN).
Abstract | Links | BibTeX | Tags: Anthropomorphic Robots, Computational Linguistics, Cost effectiveness, E-Learning, English language learning, English languages, Human Robot Interaction, Human-robot interaction, Humanoid robot, Humans-robot interactions, Immersive, Language learning, Language Model, Large language model, large language models, Learning game, Machine design, Man machine systems, Open systems, Robot Operating System, Simulation framework, Simulation platform, Virtual Reality
@article{bottega_jubileo_2023,
title = {Jubileo: An Immersive Simulation Framework for Social Robot Design},
author = {J. A. Bottega and V. A. Kich and J. C. Jesus and R. Steinmetz and A. H. Kolling and R. B. Grando and R. S. Guerra and D. F. T. Gamarra},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85178895874&doi=10.1007%2fs10846-023-01991-3&partnerID=40&md5=6392af1e9a500ef51c3e215bd9709ce5},
doi = {10.1007/s10846-023-01991-3},
issn = {09210296 (ISSN)},
year = {2023},
date = {2023-01-01},
journal = {Journal of Intelligent and Robotic Systems: Theory and Applications},
volume = {109},
number = {4},
abstract = {This paper introduces Jubileo, an open-source simulated humanoid robot as a framework for the development of human-robot interaction applications. By leveraging the power of the Robot Operating System (ROS) and Unity in a virtual reality environment, this simulation establishes a strong connection to real robotics, faithfully replicating the robot’s physical components down to its motors and enabling communication with servo-actuators to control both the animatronic face and the joints of a real humanoid robot. To validate the capabilities of the framework, we propose English teaching games that integrate Virtual Reality (VR), game-based Human-Robot Interaction (HRI), and advanced large language models such as Generative Pre-trained Transformer (GPT). These games aim to foster linguistic competence within dynamic and interactive virtual environments. The incorporation of large language models bolsters the robot’s capability to generate human-like responses, thus facilitating a more realistic conversational experience. Moreover, the simulation framework reduces real-world testing risks and offers a cost-effective, efficient, and scalable platform for developing new HRI applications. The paper underscores the transformative potential of converging VR, large language models, and HRI, particularly in educational applications. © 2023, The Author(s), under exclusive licence to Springer Nature B.V.},
keywords = {Anthropomorphic Robots, Computational Linguistics, Cost effectiveness, E-Learning, English language learning, English languages, Human Robot Interaction, Human-robot interaction, Humanoid robot, Humans-robot interactions, Immersive, Language learning, Language Model, Large language model, large language models, Learning game, Machine design, Man machine systems, Open systems, Robot Operating System, Simulation framework, Simulation platform, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}