AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Li, K.; Mostajeran, F.; Rings, S.; Kruse, L.; Schmidt, S.; Arz, M.; Wolf, E.; Steinicke, F.
I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1648–1649, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows
@inproceedings{li_i_2025,
title = {I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication},
author = {K. Li and F. Mostajeran and S. Rings and L. Kruse and S. Schmidt and M. Arz and E. Wolf and F. Steinicke},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005146647&doi=10.1109%2fVRW66409.2025.00469&partnerID=40&md5=77e755f6a059f81e81c18987f58d00cc},
doi = {10.1109/VRW66409.2025.00469},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1648–1649},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {In this demo paper, we present an Extended Reality (XR) framework providing a streamlined workflow for creating and interacting with intelligent virtual agents (IVAs) with multimodal information processing capabilities using commercially available artificial intelligence (AI) tools and cloud services such as large language and vision models. The system supports (i) the integration of high-quality, customizable virtual 3D human models for visual representations of IVAs and (ii) multimodal communication with generative AI-driven IVAs in immersive XR, featuring realistic human behavior simulations. Our demo showcases the enormous potential and vast design space of embodied IVAs for various XR applications. © 2025 IEEE.},
keywords = {Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, H.; Wang, Z.; Liang, W.; Wang, Y.
X’s Day: Personality-Driven Virtual Human Behavior Generation Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 3514–3524, 2025, ISSN: 10772626 (ISSN).
Abstract | Links | BibTeX | Tags: adult, Augmented Reality, Behavior Generation, Chatbots, Computer graphics, computer interface, Contextual Scene, female, human, Human behaviors, Humans, Long-term behavior, male, Novel task, Personality, Personality traits, Personality-driven Behavior, physiology, Social behavior, User-Computer Interface, Users' experiences, Virtual agent, Virtual environments, Virtual humans, Virtual Reality, Young Adult
@article{li_xs_2025,
title = {X’s Day: Personality-Driven Virtual Human Behavior Generation},
author = {H. Li and Z. Wang and W. Liang and Y. Wang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003864932&doi=10.1109%2fTVCG.2025.3549574&partnerID=40&md5=a865bbd2b0fa964a4f0f4190955dc787},
doi = {10.1109/TVCG.2025.3549574},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {3514–3524},
abstract = {Developing convincing and realistic virtual human behavior is essential for enhancing user experiences in virtual reality (VR) and augmented reality (AR) settings. This paper introduces a novel task focused on generating long-term behaviors for virtual agents, guided by specific personality traits and contextual elements within 3D environments. We present a comprehensive framework capable of autonomously producing daily activities autoregressively. By modeling the intricate connections between personality characteristics and observable activities, we establish a hierarchical structure of Needs, Task, and Activity levels. Integrating a Behavior Planner and a World State module allows for the dynamic sampling of behaviors using large language models (LLMs), ensuring that generated activities remain relevant and responsive to environmental changes. Extensive experiments validate the effectiveness and adaptability of our approach across diverse scenarios. This research makes a significant contribution to the field by establishing a new paradigm for personalized and context-aware interactions with virtual humans, ultimately enhancing user engagement in immersive applications. Our project website is at: https://behavior.agent-x.cn/. © 2025 IEEE. All rights reserved,},
keywords = {adult, Augmented Reality, Behavior Generation, Chatbots, Computer graphics, computer interface, Contextual Scene, female, human, Human behaviors, Humans, Long-term behavior, male, Novel task, Personality, Personality traits, Personality-driven Behavior, physiology, Social behavior, User-Computer Interface, Users' experiences, Virtual agent, Virtual environments, Virtual humans, Virtual Reality, Young Adult},
pubstate = {published},
tppubtype = {article}
}
Song, T.; Pabst, F.; Eck, U.; Navab, N.
Enhancing Patient Acceptance of Robotic Ultrasound through Conversational Virtual Agent and Immersive Visualizations Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2901–2911, 2025, ISSN: 10772626 (ISSN).
Abstract | Links | BibTeX | Tags: 3D reconstruction, adult, Augmented Reality, Computer graphics, computer interface, echography, female, human, Humans, Imaging, Intelligent robots, Intelligent virtual agents, Language Model, male, Medical robotics, Middle Aged, Mixed reality, Patient Acceptance of Health Care, patient attitude, Patient comfort, procedures, Real-world, Reality visualization, Robotic Ultrasound, Robotics, Three-Dimensional, three-dimensional imaging, Trust and Acceptance, Ultrasonic applications, Ultrasonic equipment, Ultrasonography, Ultrasound probes, User-Computer Interface, Virtual agent, Virtual assistants, Virtual environments, Virtual Reality, Visual languages, Visualization, Young Adult
@article{song_enhancing_2025,
title = {Enhancing Patient Acceptance of Robotic Ultrasound through Conversational Virtual Agent and Immersive Visualizations},
author = {T. Song and F. Pabst and U. Eck and N. Navab},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003687673&doi=10.1109%2fTVCG.2025.3549181&partnerID=40&md5=1d46569933582ecf5e967f0794aafc07},
doi = {10.1109/TVCG.2025.3549181},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2901–2911},
abstract = {Robotic ultrasound systems have the potential to improve medical diagnostics, but patient acceptance remains a key challenge. To address this, we propose a novel system that combines an AI-based virtual agent, powered by a large language model (LLM), with three mixed reality visualizations aimed at enhancing patient comfort and trust. The LLM enables the virtual assistant to engage in natural, conversational dialogue with patients, answering questions in any format and offering real-time reassurance, creating a more intelligent and reliable interaction. The virtual assistant is animated as controlling the ultrasound probe, giving the impression that the robot is guided by the assistant. The first visualization employs augmented reality (AR), allowing patients to see the real world and the robot with the virtual avatar superimposed. The second visualization is an augmented virtuality (AV) environment, where the real-world body part being scanned is visible, while a 3D Gaussian Splatting reconstruction of the room, excluding the robot, forms the virtual environment. The third is a fully immersive virtual reality (VR) experience, featuring the same 3D reconstruction but entirely virtual, where the patient sees a virtual representation of their body being scanned in a robot-free environment. In this case, the virtual ultrasound probe, mirrors the movement of the probe controlled by the robot, creating a synchronized experience as it touches and moves over the patient's virtual body. We conducted a comprehensive agent-guided robotic ultrasound study with all participants, comparing these visualizations against a standard robotic ultrasound procedure. Results showed significant improvements in patient trust, acceptance, and comfort. Based on these findings, we offer insights into designing future mixed reality visualizations and virtual agents to further enhance patient comfort and acceptance in autonomous medical procedures. © 1995-2012 IEEE.},
keywords = {3D reconstruction, adult, Augmented Reality, Computer graphics, computer interface, echography, female, human, Humans, Imaging, Intelligent robots, Intelligent virtual agents, Language Model, male, Medical robotics, Middle Aged, Mixed reality, Patient Acceptance of Health Care, patient attitude, Patient comfort, procedures, Real-world, Reality visualization, Robotic Ultrasound, Robotics, Three-Dimensional, three-dimensional imaging, Trust and Acceptance, Ultrasonic applications, Ultrasonic equipment, Ultrasonography, Ultrasound probes, User-Computer Interface, Virtual agent, Virtual assistants, Virtual environments, Virtual Reality, Visual languages, Visualization, Young Adult},
pubstate = {published},
tppubtype = {article}
}
Shoa, A.; Friedman, D.
Milo: an LLM-based virtual human open-source platform for extended reality Journal Article
In: Frontiers in Virtual Reality, vol. 6, 2025, ISSN: 26734192 (ISSN).
Abstract | Links | BibTeX | Tags: Large language model, open-source, Virtual agent, virtual human, Virtual Reality, XR
@article{shoa_milo_2025,
title = {Milo: an LLM-based virtual human open-source platform for extended reality},
author = {A. Shoa and D. Friedman},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008867438&doi=10.3389%2ffrvir.2025.1555173&partnerID=40&md5=6e68c9604b5ae52671b2ff02d51c7e75},
doi = {10.3389/frvir.2025.1555173},
issn = {26734192 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Frontiers in Virtual Reality},
volume = {6},
abstract = {Large language models (LLMs) have made dramatic advancements in recent years, allowing for a new generation of dialogue agents. This allows for new types of social experiences with virtual humans, in both virtual and augmented reality. In this paper, we introduce an open-source system specifically designed for implementing LLM-based virtual humans within extended reality (XR) environments. Our system integrates into XR platforms, providing a robust framework for the creation and management of interactive virtual agents. We detail the design and architecture of the system and showcase the system’s versatility through various scenarios. In addition to a straightforward single-agent setup, we demonstrate how an LLM-based virtual human can attend a multi-user virtual reality (VR) meeting, enhance a VR self-talk session, and take part in an augmented reality (AR) live event. We provide lessons learned, with focus on the possibilities for human intervention during live events. We provide the system as open-source, inviting collaboration and innovation within the community, paving the way for new types of social experiences. Copyright © 2025 Shoa and Friedman.},
keywords = {Large language model, open-source, Virtual agent, virtual human, Virtual Reality, XR},
pubstate = {published},
tppubtype = {article}
}
Gaglio, G. F.; Vinanzi, S.; Cangelosi, A.; Chella, A.
Intention Reading Architecture for Virtual Agents Proceedings Article
In: O., Palinko; L., Bodenhagen; J.-J., Cabibihan; K., Fischer; S., Šabanović; K., Winkle; L., Behera; S.S., Ge; D., Chrysostomou; W., Jiang; H., He (Ed.): Lect. Notes Comput. Sci., pp. 488–497, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-981963521-4 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Cognitive Architecture, Cognitive Architectures, Computer simulation languages, Intelligent virtual agents, Intention Reading, Intention readings, Language Model, Large language model, Metaverse, Metaverses, Physical robots, Video-games, Virtual agent, Virtual assistants, Virtual contexts, Virtual environments, Virtual machine
@inproceedings{gaglio_intention_2025,
title = {Intention Reading Architecture for Virtual Agents},
author = {G. F. Gaglio and S. Vinanzi and A. Cangelosi and A. Chella},
editor = {Palinko O. and Bodenhagen L. and Cabibihan J.-J. and Fischer K. and Šabanović S. and Winkle K. and Behera L. and Ge S.S. and Chrysostomou D. and Jiang W. and He H.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002042645&doi=10.1007%2f978-981-96-3522-1_41&partnerID=40&md5=70ccc7039785bb4ca4d45752f1d3587f},
doi = {10.1007/978-981-96-3522-1_41},
isbn = {03029743 (ISSN); 978-981963521-4 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15561 LNAI},
pages = {488–497},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This work presents the development of a virtual agent designed specifically for use in the Metaverse, video games, and other virtual environments, capable of performing intention reading on a human-controlled avatar through a cognitive architecture that endows it with contextual awareness. The paper explores the adaptation of a cognitive architecture, originally developed for physical robots, to a fully virtual context, where it is integrated with a Large Language Model to create highly communicative virtual assistants. Although this work primarily focuses on virtual applications, integrating cognitive architectures with LLMs marks a significant step toward creating collaborative artificial agents capable of providing meaningful support by deeply understanding context and user intentions in digital environments. © The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.},
keywords = {Chatbots, Cognitive Architecture, Cognitive Architectures, Computer simulation languages, Intelligent virtual agents, Intention Reading, Intention readings, Language Model, Large language model, Metaverse, Metaverses, Physical robots, Video-games, Virtual agent, Virtual assistants, Virtual contexts, Virtual environments, Virtual machine},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Bao, Y.; Gao, N.; Weng, D.; Chen, J.; Tian, Z.
MuseGesture: A Framework for Gesture Synthesis by Virtual Agents in VR Museum Guides Proceedings Article
In: U., Eck; M., Sra; J., Stefanucci; M., Sugimoto; M., Tatzgern; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 337–338, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833150691-9 (ISBN).
Abstract | Links | BibTeX | Tags: Adversarial machine learning, Embeddings, Gesture Generation, Intelligent Agents, Intelligent systems, Intelligent virtual agents, Language generation, Language Model, Large language model, large language models, Museum guide, Reinforcement Learning, Reinforcement learnings, Robust language understanding, Virtual agent, Virtual Agents, Virtual environments, Virtual reality museum guide, VR Museum Guides
@inproceedings{bao_musegesture_2024,
title = {MuseGesture: A Framework for Gesture Synthesis by Virtual Agents in VR Museum Guides},
author = {Y. Bao and N. Gao and D. Weng and J. Chen and Z. Tian},
editor = {Eck U. and Sra M. and Stefanucci J. and Sugimoto M. and Tatzgern M. and Williams I.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214385900&doi=10.1109%2fISMAR-Adjunct64951.2024.00079&partnerID=40&md5=e71ffc28e299597557034259aab50641},
doi = {10.1109/ISMAR-Adjunct64951.2024.00079},
isbn = {979-833150691-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {337–338},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents an innovative framework named MuseGesture, designed to generate contextually adaptive gestures for virtual agents in Virtual Reality (VR) museums. The framework leverages the robust language understanding and generation capabilities of Large Language Models (LLMs) to parse tour narration texts and generate corresponding explanatory gestures. Through reinforcement learning and adversarial skill embeddings, the framework also generates guiding gestures tailored to the virtual museum environment, integrating both gesture types using conditional motion interpolation methods. Experimental results and user studies demonstrate that this approach effectively enables voice-command-controlled virtual guide gestures, offering a novel intelligent guiding system solution that enhances the interactive experience in VR museum environments. © 2024 IEEE.},
keywords = {Adversarial machine learning, Embeddings, Gesture Generation, Intelligent Agents, Intelligent systems, Intelligent virtual agents, Language generation, Language Model, Large language model, large language models, Museum guide, Reinforcement Learning, Reinforcement learnings, Robust language understanding, Virtual agent, Virtual Agents, Virtual environments, Virtual reality museum guide, VR Museum Guides},
pubstate = {published},
tppubtype = {inproceedings}
}
Amato, N.; Carolis, B. De; Gioia, F.; Venezia, M. N.; Palestra, G.; Loglisci, C.
Can an AI-driven VTuber engage People? The KawAIi Case Study Proceedings Article
In: A., Soto; E., Zangerle (Ed.): CEUR Workshop Proc., CEUR-WS, 2024, ISBN: 16130073 (ISSN).
Abstract | Links | BibTeX | Tags: 3D Avatars, Case-studies, Conversational Agents, Facial Expressions, Language Model, Live streaming, LLM, LLMs, Real- time, Three dimensional computer graphics, Virtual agent, Virtual Reality, YouTube
@inproceedings{amato_can_2024,
title = {Can an AI-driven VTuber engage People? The KawAIi Case Study},
author = {N. Amato and B. De Carolis and F. Gioia and M. N. Venezia and G. Palestra and C. Loglisci},
editor = {Soto A. and Zangerle E.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85190754935&partnerID=40&md5=bd76d56b13e328027aa1b458849cf73f},
isbn = {16130073 (ISSN)},
year = {2024},
date = {2024-01-01},
booktitle = {CEUR Workshop Proc.},
volume = {3660},
publisher = {CEUR-WS},
abstract = {Live streaming has become increasingly popular, with most streamers presenting their real-life appearance. However, Virtual YouTubers (VTubers), virtual 2D or 3D avatars that are voiced by humans, are emerging as live streamers and attracting a growing viewership. This paper presents the development of a conversational agent, named KawAIi, embodied in a 2D character that, while accurately and promptly responding to user requests, provides an entertaining experience in streaming chat platforms such as YouTube while providing adequate real-time support. The agent relies on the Vicuna 7B GPTQ 4-bit Large Language Model (LLM). In addition, KawAIi uses a BERT-based model for analyzing the sentence generated by the model in terms of conveyed emotion and shows self-emotion awareness through facial expressions. Tested with users, the system has demonstrated a good ability to handle the interaction with the user while maintaining a pleasant user experience. In particular, KawAIi has been evaluated positively in terms of engagement and competence on various topics. The results show the potential of this technology to enrich interactivity in streaming platforms and offer a promising model for future online assistance contexts. © 2024 Copyright for this paper by its authors.},
keywords = {3D Avatars, Case-studies, Conversational Agents, Facial Expressions, Language Model, Live streaming, LLM, LLMs, Real- time, Three dimensional computer graphics, Virtual agent, Virtual Reality, YouTube},
pubstate = {published},
tppubtype = {inproceedings}
}