AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Li, K.; Mostajeran, F.; Rings, S.; Kruse, L.; Schmidt, S.; Arz, M.; Wolf, E.; Steinicke, F.
I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1648–1649, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows
@inproceedings{li_i_2025,
title = {I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication},
author = {K. Li and F. Mostajeran and S. Rings and L. Kruse and S. Schmidt and M. Arz and E. Wolf and F. Steinicke},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005146647&doi=10.1109%2fVRW66409.2025.00469&partnerID=40&md5=77e755f6a059f81e81c18987f58d00cc},
doi = {10.1109/VRW66409.2025.00469},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1648–1649},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {In this demo paper, we present an Extended Reality (XR) framework providing a streamlined workflow for creating and interacting with intelligent virtual agents (IVAs) with multimodal information processing capabilities using commercially available artificial intelligence (AI) tools and cloud services such as large language and vision models. The system supports (i) the integration of high-quality, customizable virtual 3D human models for visual representations of IVAs and (ii) multimodal communication with generative AI-driven IVAs in immersive XR, featuring realistic human behavior simulations. Our demo showcases the enormous potential and vast design space of embodied IVAs for various XR applications. © 2025 IEEE.},
keywords = {Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows},
pubstate = {published},
tppubtype = {inproceedings}
}
Song, T.; Pabst, F.; Eck, U.; Navab, N.
Enhancing Patient Acceptance of Robotic Ultrasound through Conversational Virtual Agent and Immersive Visualizations Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2901–2911, 2025, ISSN: 10772626 (ISSN).
Abstract | Links | BibTeX | Tags: 3D reconstruction, adult, Augmented Reality, Computer graphics, computer interface, echography, female, human, Humans, Imaging, Intelligent robots, Intelligent virtual agents, Language Model, male, Medical robotics, Middle Aged, Mixed reality, Patient Acceptance of Health Care, patient attitude, Patient comfort, procedures, Real-world, Reality visualization, Robotic Ultrasound, Robotics, Three-Dimensional, three-dimensional imaging, Trust and Acceptance, Ultrasonic applications, Ultrasonic equipment, Ultrasonography, Ultrasound probes, User-Computer Interface, Virtual agent, Virtual assistants, Virtual environments, Virtual Reality, Visual languages, Visualization, Young Adult
@article{song_enhancing_2025,
title = {Enhancing Patient Acceptance of Robotic Ultrasound through Conversational Virtual Agent and Immersive Visualizations},
author = {T. Song and F. Pabst and U. Eck and N. Navab},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003687673&doi=10.1109%2fTVCG.2025.3549181&partnerID=40&md5=1d46569933582ecf5e967f0794aafc07},
doi = {10.1109/TVCG.2025.3549181},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2901–2911},
abstract = {Robotic ultrasound systems have the potential to improve medical diagnostics, but patient acceptance remains a key challenge. To address this, we propose a novel system that combines an AI-based virtual agent, powered by a large language model (LLM), with three mixed reality visualizations aimed at enhancing patient comfort and trust. The LLM enables the virtual assistant to engage in natural, conversational dialogue with patients, answering questions in any format and offering real-time reassurance, creating a more intelligent and reliable interaction. The virtual assistant is animated as controlling the ultrasound probe, giving the impression that the robot is guided by the assistant. The first visualization employs augmented reality (AR), allowing patients to see the real world and the robot with the virtual avatar superimposed. The second visualization is an augmented virtuality (AV) environment, where the real-world body part being scanned is visible, while a 3D Gaussian Splatting reconstruction of the room, excluding the robot, forms the virtual environment. The third is a fully immersive virtual reality (VR) experience, featuring the same 3D reconstruction but entirely virtual, where the patient sees a virtual representation of their body being scanned in a robot-free environment. In this case, the virtual ultrasound probe, mirrors the movement of the probe controlled by the robot, creating a synchronized experience as it touches and moves over the patient's virtual body. We conducted a comprehensive agent-guided robotic ultrasound study with all participants, comparing these visualizations against a standard robotic ultrasound procedure. Results showed significant improvements in patient trust, acceptance, and comfort. Based on these findings, we offer insights into designing future mixed reality visualizations and virtual agents to further enhance patient comfort and acceptance in autonomous medical procedures. © 1995-2012 IEEE.},
keywords = {3D reconstruction, adult, Augmented Reality, Computer graphics, computer interface, echography, female, human, Humans, Imaging, Intelligent robots, Intelligent virtual agents, Language Model, male, Medical robotics, Middle Aged, Mixed reality, Patient Acceptance of Health Care, patient attitude, Patient comfort, procedures, Real-world, Reality visualization, Robotic Ultrasound, Robotics, Three-Dimensional, three-dimensional imaging, Trust and Acceptance, Ultrasonic applications, Ultrasonic equipment, Ultrasonography, Ultrasound probes, User-Computer Interface, Virtual agent, Virtual assistants, Virtual environments, Virtual Reality, Visual languages, Visualization, Young Adult},
pubstate = {published},
tppubtype = {article}
}
Zhu, X. T.; Cheerman, H.; Cheng, M.; Kiami, S. R.; Chukoskie, L.; McGivney, E.
Designing VR Simulation System for Clinical Communication Training with LLMs-Based Embodied Conversational Agents Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071395-8 (ISBN).
Abstract | Links | BibTeX | Tags: Clinical communications, Clinical Simulation, Communications training, Curricula, Embodied conversational agent, Embodied Conversational Agents, Health professions, Intelligent virtual agents, Language Model, Medical education, Model-based OPC, Patient simulators, Personnel training, Students, Teaching, User centered design, Virtual environments, Virtual Reality, VR simulation, VR simulation systems
@inproceedings{zhu_designing_2025,
title = {Designing VR Simulation System for Clinical Communication Training with LLMs-Based Embodied Conversational Agents},
author = {X. T. Zhu and H. Cheerman and M. Cheng and S. R. Kiami and L. Chukoskie and E. McGivney},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005754066&doi=10.1145%2f3706599.3719693&partnerID=40&md5=4468fbd54b43d6779259300afd08632e},
doi = {10.1145/3706599.3719693},
isbn = {979-840071395-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {VR simulation in Health Professions (HP) education demonstrates huge potential, but fixed learning content with little customization limits its application beyond lab environments. To address these limitations in the context of VR for patient communication training, we conducted a user-centered study involving semi-structured interviews with advanced HP students to understand their challenges in clinical communication training and perceptions of VR-based solutions. From this, we derived design insights emphasizing the importance of realistic scenarios, simple interactions, and unpredictable dialogues. Building on these insights, we developed the Virtual AI Patient Simulator (VAPS), a novel VR system powered by Large Language Models (LLMs) and Embodied Conversational Agents (ECAs), supporting dynamic and customizable patient interactions for immersive learning. We also provided an example of how clinical professors could use user-friendly design forms to create personalized scenarios that align with course objectives in VAPS and discuss future implications of integrating AI-driven technologies into VR education. © 2025 Copyright held by the owner/author(s).},
keywords = {Clinical communications, Clinical Simulation, Communications training, Curricula, Embodied conversational agent, Embodied Conversational Agents, Health professions, Intelligent virtual agents, Language Model, Medical education, Model-based OPC, Patient simulators, Personnel training, Students, Teaching, User centered design, Virtual environments, Virtual Reality, VR simulation, VR simulation systems},
pubstate = {published},
tppubtype = {inproceedings}
}
Volkova, S.; Nguyen, D.; Penafiel, L.; Kao, H. -T.; Cohen, M.; Engberson, G.; Cassani, L.; Almutairi, M.; Chiang, C.; Banerjee, N.; Belcher, M.; Ford, T. W.; Yankoski, M. G.; Weninger, T.; Gomez-Zara, D.; Rebensky, S.
VirTLab: Augmented Intelligence for Modeling and Evaluating Human-AI Teaming Through Agent Interactions Proceedings Article
In: R.A., Sottilare; J., Schwarz (Ed.): Lect. Notes Comput. Sci., pp. 279–301, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303192969-4 (ISBN).
Abstract | Links | BibTeX | Tags: Agent based simulation, agent-based simulation, Augmented Reality, Causal analysis, HAT processes and states, Human digital twin, human digital twins, Human-AI team process and state, Human-AI teaming, Intelligent virtual agents, Operational readiness, Personnel training, Team performance, Team process, Virtual teaming, Visual analytics
@inproceedings{volkova_virtlab_2025,
title = {VirTLab: Augmented Intelligence for Modeling and Evaluating Human-AI Teaming Through Agent Interactions},
author = {S. Volkova and D. Nguyen and L. Penafiel and H. -T. Kao and M. Cohen and G. Engberson and L. Cassani and M. Almutairi and C. Chiang and N. Banerjee and M. Belcher and T. W. Ford and M. G. Yankoski and T. Weninger and D. Gomez-Zara and S. Rebensky},
editor = {Sottilare R.A. and Schwarz J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007830752&doi=10.1007%2f978-3-031-92970-0_20&partnerID=40&md5=c578dc95176a617f6de2a1c6f998f73f},
doi = {10.1007/978-3-031-92970-0_20},
isbn = {03029743 (ISSN); 978-303192969-4 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15813 LNCS},
pages = {279–301},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper introduces VirTLab (Virtual Teaming Laboratory), a novel augmented intelligence platform designed to simulate and analyze interactions between human-AI teams (HATs) through the use of human digital twins (HDTs) and AI agents. VirTLab enhances operational readiness by systematically analyzing HAT dynamics, fostering trust development, and providing actionable recommendations to improve team performance outcomes. VirTLab combines agents driven by large language models (LLM) interacting in a simulated environment with integrated HAT performance measures obtained using interactive visual analytics. VirTLab integrates four key components: (1) HDTs with configurable profiles, (2) operational AI teammates, (3) a simulation engine that enforces temporal and spatial environment constraints, ensures situational awareness, and coordinates events between HDT and AI agents to deliver high-fidelity simulations, and (4) an evaluation platform that validates simulations against ground truth and enables exploration of how HDTs and AI attributes influence HAT functioning. We demonstrate VirTLab’s capabilities through focused experiments examining how variations in HDT openness, agreeableness, propensity to trust, and AI reliability and transparency influence HAT performance. Our HAT performance evaluation framework incorporates both objective measures such as communication patterns and mission completion, and subjective measures to include perceived trust and team coordination. Results on search and rescue missions reveal that AI teammate reliability significantly impacts communication dynamics and team assistance behaviors, whereas HDT personality traits influence trust development and team coordination -insights that directly inform the design of HAT training programs. VirTLab enables instructional designers to explore interventions in HAT behaviors through controlled experiments and causal analysis, leading to improved HAT performance. Visual analytics support the examination of HAT functioning across different conditions, allowing for real-time assessment and adaptation of scenarios. VirTLab contributes to operational readiness by preparing human operators to work seamlessly with AI counterparts in real-world situations. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Agent based simulation, agent-based simulation, Augmented Reality, Causal analysis, HAT processes and states, Human digital twin, human digital twins, Human-AI team process and state, Human-AI teaming, Intelligent virtual agents, Operational readiness, Personnel training, Team performance, Team process, Virtual teaming, Visual analytics},
pubstate = {published},
tppubtype = {inproceedings}
}
Gaglio, G. F.; Vinanzi, S.; Cangelosi, A.; Chella, A.
Intention Reading Architecture for Virtual Agents Proceedings Article
In: O., Palinko; L., Bodenhagen; J.-J., Cabibihan; K., Fischer; S., Šabanović; K., Winkle; L., Behera; S.S., Ge; D., Chrysostomou; W., Jiang; H., He (Ed.): Lect. Notes Comput. Sci., pp. 488–497, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-981963521-4 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Cognitive Architecture, Cognitive Architectures, Computer simulation languages, Intelligent virtual agents, Intention Reading, Intention readings, Language Model, Large language model, Metaverse, Metaverses, Physical robots, Video-games, Virtual agent, Virtual assistants, Virtual contexts, Virtual environments, Virtual machine
@inproceedings{gaglio_intention_2025,
title = {Intention Reading Architecture for Virtual Agents},
author = {G. F. Gaglio and S. Vinanzi and A. Cangelosi and A. Chella},
editor = {Palinko O. and Bodenhagen L. and Cabibihan J.-J. and Fischer K. and Šabanović S. and Winkle K. and Behera L. and Ge S.S. and Chrysostomou D. and Jiang W. and He H.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002042645&doi=10.1007%2f978-981-96-3522-1_41&partnerID=40&md5=70ccc7039785bb4ca4d45752f1d3587f},
doi = {10.1007/978-981-96-3522-1_41},
isbn = {03029743 (ISSN); 978-981963521-4 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15561 LNAI},
pages = {488–497},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This work presents the development of a virtual agent designed specifically for use in the Metaverse, video games, and other virtual environments, capable of performing intention reading on a human-controlled avatar through a cognitive architecture that endows it with contextual awareness. The paper explores the adaptation of a cognitive architecture, originally developed for physical robots, to a fully virtual context, where it is integrated with a Large Language Model to create highly communicative virtual assistants. Although this work primarily focuses on virtual applications, integrating cognitive architectures with LLMs marks a significant step toward creating collaborative artificial agents capable of providing meaningful support by deeply understanding context and user intentions in digital environments. © The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.},
keywords = {Chatbots, Cognitive Architecture, Cognitive Architectures, Computer simulation languages, Intelligent virtual agents, Intention Reading, Intention readings, Language Model, Large language model, Metaverse, Metaverses, Physical robots, Video-games, Virtual agent, Virtual assistants, Virtual contexts, Virtual environments, Virtual machine},
pubstate = {published},
tppubtype = {inproceedings}
}
Dongye, X.; Weng, D.; Jiang, H.; Tian, Z.; Bao, Y.; Chen, P.
Personalized decision-making for agents in face-to-face interaction in virtual reality Journal Article
In: Multimedia Systems, vol. 31, no. 1, 2025, ISSN: 09424962 (ISSN).
Abstract | Links | BibTeX | Tags: Decision making, Decision-making process, Decisions makings, Design frameworks, Face-to-face interaction, Feed-back based, Fine tuning, Human-agent interaction, Human–agent interaction, Integrated circuit design, Intelligent virtual agents, Language Model, Large language model, Multi agent systems, Multimodal Interaction, Virtual environments, Virtual Reality
@article{dongye_personalized_2025,
title = {Personalized decision-making for agents in face-to-face interaction in virtual reality},
author = {X. Dongye and D. Weng and H. Jiang and Z. Tian and Y. Bao and P. Chen},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85212947825&doi=10.1007%2fs00530-024-01591-7&partnerID=40&md5=d969cd926fdfd241399f2f96dbf42907},
doi = {10.1007/s00530-024-01591-7},
issn = {09424962 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Multimedia Systems},
volume = {31},
number = {1},
abstract = {Intelligent agents for face-to-face interaction in virtual reality are expected to make decisions and provide appropriate feedback based on the user’s multimodal interaction inputs. Designing the agent’s decision-making process poses a significant challenge owing to the limited availability of multimodal interaction decision-making datasets and the complexities associated with providing personalized interaction feedback to diverse users. To overcome these challenges, we propose a novel design framework that involves generating and labeling symbolic interaction data, pre-training a small-scale real-time decision-making network, collecting personalized interaction data within interactions, and fine-tuning the network using personalized data. We develop a prototype system to demonstrate our design framework, which utilizes interaction distances, head orientations, and hand postures as inputs in virtual reality. The agent is capable of delivering personalized feedback from different users. We evaluate the proposed design framework by demonstrating the utilization of large language models for data labeling, emphasizing reliability and robustness. Furthermore, we evaluate the incorporation of personalized data fine-tuning for decision-making networks within our design framework, highlighting its importance in improving the user interaction experience. The design principles of this framework can be further explored and applied to various domains involving virtual agents. © The Author(s), under exclusive licence to Springer-Verlag GmbH Germany, part of Springer Nature 2024.},
keywords = {Decision making, Decision-making process, Decisions makings, Design frameworks, Face-to-face interaction, Feed-back based, Fine tuning, Human-agent interaction, Human–agent interaction, Integrated circuit design, Intelligent virtual agents, Language Model, Large language model, Multi agent systems, Multimodal Interaction, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Harinee, S.; Raja, R. Vimal; Mugila, E.; Govindharaj, I.; Sanjaykumar, V.; Ragavendhiran, T.
Elevating Medical Training: A Synergistic Fusion of AI and VR for Immersive Anatomy Learning and Practical Procedure Mastery Proceedings Article
In: Int. Conf. Syst., Comput., Autom. Netw., ICSCAN, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833151002-2 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Anatomy education, Anatomy educations, Computer interaction, Curricula, Embodied virtual assistant, Embodied virtual assistants, Generative AI, Human- Computer Interaction, Immersive, Intelligent virtual agents, Medical computing, Medical education, Medical procedure practice, Medical procedures, Medical training, Personnel training, Students, Teaching, Three dimensional computer graphics, Usability engineering, Virtual assistants, Virtual environments, Virtual Reality, Visualization
@inproceedings{harinee_elevating_2024,
title = {Elevating Medical Training: A Synergistic Fusion of AI and VR for Immersive Anatomy Learning and Practical Procedure Mastery},
author = {S. Harinee and R. Vimal Raja and E. Mugila and I. Govindharaj and V. Sanjaykumar and T. Ragavendhiran},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000334626&doi=10.1109%2fICSCAN62807.2024.10894451&partnerID=40&md5=100899b489c00335e0a652f2efd33e23},
doi = {10.1109/ICSCAN62807.2024.10894451},
isbn = {979-833151002-2 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Int. Conf. Syst., Comput., Autom. Netw., ICSCAN},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual reality with its 3D visualization have brought an overwhelming change in the face of medical education, especially for courses like human anatomy. The proposed virtual reality system to bring massive improvements in the education received by a medical student studying for their degree courses. The project puts forward the text-to-speech and speech-to-text aligned system that simplifies the usage of a chatbot empowered by OpenAI GPT-4 and allows pupils to vocally speak with Avatar, the set virtual assistant. Contrary to the current methodologies, the setup of virtual reality is powered by avatars and thus covers an enhanced virtual assistant environment. Avatars offer students the set of repeated practicing of medical procedures on it, and the real uniqueness in the proposed product. The developed virtual reality environment is enhanced over other current training techniques where a student should interact and immerse in three-dimensional human organs for visualization in three dimensions and hence get better knowledge of the subjects in greater depth. A virtual assistant guides the whole process, giving insights and support to help the student bridge the gap from theory to practice. Then, the system is essentially Knowledge based and Analysis based approach. The combination of generative AI along with embodied virtual agents has great potential when it comes to customized virtual conversation assistant for much wider range of applications. The study brings out the value of acquiring hands-on skills through simulated medical procedures and opens new frontiers of research and development in AI, VR, and medical education. In addition to assessing the effectiveness of such novel functionalities, the study also explores user experience related dimensions such as usability, task loading, and the sense of presence in proposed virtual medical environment. © 2024 IEEE.},
keywords = {'current, Anatomy education, Anatomy educations, Computer interaction, Curricula, Embodied virtual assistant, Embodied virtual assistants, Generative AI, Human- Computer Interaction, Immersive, Intelligent virtual agents, Medical computing, Medical education, Medical procedure practice, Medical procedures, Medical training, Personnel training, Students, Teaching, Three dimensional computer graphics, Usability engineering, Virtual assistants, Virtual environments, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Bao, Y.; Gao, N.; Weng, D.; Chen, J.; Tian, Z.
MuseGesture: A Framework for Gesture Synthesis by Virtual Agents in VR Museum Guides Proceedings Article
In: U., Eck; M., Sra; J., Stefanucci; M., Sugimoto; M., Tatzgern; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 337–338, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833150691-9 (ISBN).
Abstract | Links | BibTeX | Tags: Adversarial machine learning, Embeddings, Gesture Generation, Intelligent Agents, Intelligent systems, Intelligent virtual agents, Language generation, Language Model, Large language model, large language models, Museum guide, Reinforcement Learning, Reinforcement learnings, Robust language understanding, Virtual agent, Virtual Agents, Virtual environments, Virtual reality museum guide, VR Museum Guides
@inproceedings{bao_musegesture_2024,
title = {MuseGesture: A Framework for Gesture Synthesis by Virtual Agents in VR Museum Guides},
author = {Y. Bao and N. Gao and D. Weng and J. Chen and Z. Tian},
editor = {Eck U. and Sra M. and Stefanucci J. and Sugimoto M. and Tatzgern M. and Williams I.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214385900&doi=10.1109%2fISMAR-Adjunct64951.2024.00079&partnerID=40&md5=e71ffc28e299597557034259aab50641},
doi = {10.1109/ISMAR-Adjunct64951.2024.00079},
isbn = {979-833150691-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {337–338},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents an innovative framework named MuseGesture, designed to generate contextually adaptive gestures for virtual agents in Virtual Reality (VR) museums. The framework leverages the robust language understanding and generation capabilities of Large Language Models (LLMs) to parse tour narration texts and generate corresponding explanatory gestures. Through reinforcement learning and adversarial skill embeddings, the framework also generates guiding gestures tailored to the virtual museum environment, integrating both gesture types using conditional motion interpolation methods. Experimental results and user studies demonstrate that this approach effectively enables voice-command-controlled virtual guide gestures, offering a novel intelligent guiding system solution that enhances the interactive experience in VR museum environments. © 2024 IEEE.},
keywords = {Adversarial machine learning, Embeddings, Gesture Generation, Intelligent Agents, Intelligent systems, Intelligent virtual agents, Language generation, Language Model, Large language model, large language models, Museum guide, Reinforcement Learning, Reinforcement learnings, Robust language understanding, Virtual agent, Virtual Agents, Virtual environments, Virtual reality museum guide, VR Museum Guides},
pubstate = {published},
tppubtype = {inproceedings}
}