AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Shoa, A.; Friedman, D.
Milo: an LLM-based virtual human open-source platform for extended reality Journal Article
In: Frontiers in Virtual Reality, vol. 6, 2025, ISSN: 26734192 (ISSN).
Abstract | Links | BibTeX | Tags: Large language model, open-source, Virtual agent, virtual human, Virtual Reality, XR
@article{shoa_milo_2025,
title = {Milo: an LLM-based virtual human open-source platform for extended reality},
author = {A. Shoa and D. Friedman},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008867438&doi=10.3389%2ffrvir.2025.1555173&partnerID=40&md5=6e68c9604b5ae52671b2ff02d51c7e75},
doi = {10.3389/frvir.2025.1555173},
issn = {26734192 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Frontiers in Virtual Reality},
volume = {6},
abstract = {Large language models (LLMs) have made dramatic advancements in recent years, allowing for a new generation of dialogue agents. This allows for new types of social experiences with virtual humans, in both virtual and augmented reality. In this paper, we introduce an open-source system specifically designed for implementing LLM-based virtual humans within extended reality (XR) environments. Our system integrates into XR platforms, providing a robust framework for the creation and management of interactive virtual agents. We detail the design and architecture of the system and showcase the system’s versatility through various scenarios. In addition to a straightforward single-agent setup, we demonstrate how an LLM-based virtual human can attend a multi-user virtual reality (VR) meeting, enhance a VR self-talk session, and take part in an augmented reality (AR) live event. We provide lessons learned, with focus on the possibilities for human intervention during live events. We provide the system as open-source, inviting collaboration and innovation within the community, paving the way for new types of social experiences. Copyright © 2025 Shoa and Friedman.},
keywords = {Large language model, open-source, Virtual agent, virtual human, Virtual Reality, XR},
pubstate = {published},
tppubtype = {article}
}
2024
Peretti, A.; Mazzola, M.; Capra, L.; Piazzola, M.; Carlevaro, C.
Seamless Human-Robot Interaction Through a Distributed Zero-Trust Architecture and Advanced User Interfaces Proceedings Article
In: C., Secchi; L., Marconi (Ed.): Springer. Proc. Adv. Robot., pp. 92–95, Springer Nature, 2024, ISBN: 25111256 (ISSN); 978-303176427-1 (ISBN).
Abstract | Links | BibTeX | Tags: Advanced user interfaces, Digital Twins, HRC, Human Robot Interaction, Human-Robot Collaboration, Humans-robot interactions, Industrial robots, Industry 4.0, Intelligent robots, Interaction platform, Language Model, Large language model, LLM, Problem oriented languages, Robot Operating System, Robot operating system 2, Robot-robot collaboration, ROS2, RRC, Wages, XR, ZTA
@inproceedings{peretti_seamless_2024,
title = {Seamless Human-Robot Interaction Through a Distributed Zero-Trust Architecture and Advanced User Interfaces},
author = {A. Peretti and M. Mazzola and L. Capra and M. Piazzola and C. Carlevaro},
editor = {Secchi C. and Marconi L.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85216090556&doi=10.1007%2f978-3-031-76428-8_18&partnerID=40&md5=9f58281f8a8c034fb45fed610ce64bd2},
doi = {10.1007/978-3-031-76428-8_18},
isbn = {25111256 (ISSN); 978-303176427-1 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Springer. Proc. Adv. Robot.},
volume = {33 SPAR},
pages = {92–95},
publisher = {Springer Nature},
abstract = {The proposed work presents a novel interaction platform designed to address the shortage of skilled workers in the labor market, facilitating the seamless integration of robotics and advanced user interfaces such as eXtended Reality (XR) to optimize Human-Robot Collaboration (HRC) as well as Robot-Robot Collaboration (RRC) in an Industry 4.0 scenario. One of the most challenging situations is to optimize and simplify the collaborations of humans and robots to decrease or avoid system slowdowns, blocks, or dangerous situations for both users and robots. The advent of the LLMs (Large Language Model) have been breakthrough the whole IT environment because they perform well in different scenario from human text generation to autonomous systems management. Due to their malleability, LLMs have a primary role for Human-Robot collaboration processes. For this reason, the platform comprises three key technical components: a distributed zero-trust architecture, a virtual avatar, and digital twins of robots powered by the Robot Operating System 2 (ROS2) platform. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {Advanced user interfaces, Digital Twins, HRC, Human Robot Interaction, Human-Robot Collaboration, Humans-robot interactions, Industrial robots, Industry 4.0, Intelligent robots, Interaction platform, Language Model, Large language model, LLM, Problem oriented languages, Robot Operating System, Robot operating system 2, Robot-robot collaboration, ROS2, RRC, Wages, XR, ZTA},
pubstate = {published},
tppubtype = {inproceedings}
}
Lee, S.; Park, W.; Lee, K.
Building Knowledge Base of 3D Object Assets Using Multimodal LLM AI Model Proceedings Article
In: Int. Conf. ICT Convergence, pp. 416–418, IEEE Computer Society, 2024, ISBN: 21621233 (ISSN); 979-835036463-7 (ISBN).
Abstract | Links | BibTeX | Tags: 3D object, Asset management, Content services, Exponentials, Information Management, Knowledge Base, Language Model, Large language model, LLM, Multi-modal, Multi-Modal AI, Reusability, Visual effects, XR
@inproceedings{lee_building_2024,
title = {Building Knowledge Base of 3D Object Assets Using Multimodal LLM AI Model},
author = {S. Lee and W. Park and K. Lee},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85217636269&doi=10.1109%2fICTC62082.2024.10827434&partnerID=40&md5=581ee8ca50eb3dae15dc9675971cf428},
doi = {10.1109/ICTC62082.2024.10827434},
isbn = {21621233 (ISSN); 979-835036463-7 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Int. Conf. ICT Convergence},
pages = {416–418},
publisher = {IEEE Computer Society},
abstract = {The proliferation of various XR (eXtended Reality) services and the increasing incorporation of visual effects into existing content services have led to an exponential rise in the demand for 3D object assets. This paper describes an LLM (Large Language Model)-based multimodal AI model pipeline that can be applied to a generative AI model for creating new 3D objects or restructuring the asset management system to enhance the reusability of existing 3D objects. By leveraging a multimodal AI model, we derived descriptive text for assets such as 3D object, 2D image at a human-perceptible level, rather than mere data, and subsequently used an LLM to generate knowledge triplets for constructing an asset knowledge base. The applicability of this pipeline was verified using actual 3D objects from a content production company. Future work will focus on improving the quality of the generated knowledge triplets themselves by training the multimodal AI model with real-world content usage assets. © 2024 IEEE.},
keywords = {3D object, Asset management, Content services, Exponentials, Information Management, Knowledge Base, Language Model, Large language model, LLM, Multi-modal, Multi-Modal AI, Reusability, Visual effects, XR},
pubstate = {published},
tppubtype = {inproceedings}
}
Janaka, N.
Towards Intelligent Wearable Assistants Proceedings Article
In: UbiComp Companion - Companion ACM Int. Jt. Conf. Pervasive Ubiquitous Comput., pp. 618–621, Association for Computing Machinery, Inc, 2024, ISBN: 979-840071058-2 (ISBN).
Abstract | Links | BibTeX | Tags: AI assistance, Augmented Reality, context-aware system, Context-aware systems, HMD, Interaction, interactions, Interruption, interruptions, MR, Notification, notifications, Smart glass, smart glasses, wearable, Wearable assistant, Wearable computers, XR
@inproceedings{janaka_towards_2024,
title = {Towards Intelligent Wearable Assistants},
author = {N. Janaka},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85206157248&doi=10.1145%2f3675094.3678989&partnerID=40&md5=539933fdbb3b5289b179cbe9e8f7c083},
doi = {10.1145/3675094.3678989},
isbn = {979-840071058-2 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {UbiComp Companion - Companion ACM Int. Jt. Conf. Pervasive Ubiquitous Comput.},
pages = {618–621},
publisher = {Association for Computing Machinery, Inc},
abstract = {This summary outlines my research toward developing intelligent wearable assistants that provide personalized, context-aware computing assistance. Previous work explored information presentation using smart glasses, socially-aware interactions, and applications for learning, communication, and documentation. Current research aims to develop tools for interaction research, including data collection, multimodal evaluation metrics, and a platform for creating context-aware AI assistants. Future goals include extending assistants to physical spaces via telepresence, optimizing learning with generative AI, and investigating collaborative human-AI learning. Ultimately, this research seeks to redefine how humans receive seamless support through proactive, intelligent wearable assistants that comprehend users and environments, augmenting capabilities while reducing reliance on manual labor. © 2024 Copyright held by the owner/author(s).},
keywords = {AI assistance, Augmented Reality, context-aware system, Context-aware systems, HMD, Interaction, interactions, Interruption, interruptions, MR, Notification, notifications, Smart glass, smart glasses, wearable, Wearable assistant, Wearable computers, XR},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Lee, S.; Lee, H.; Lee, K.
Knowledge Generation Pipeline using LLM for Building 3D Object Knowledge Base Proceedings Article
In: Int. Conf. ICT Convergence, pp. 1303–1305, IEEE Computer Society, 2023, ISBN: 21621233 (ISSN); 979-835031327-7 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3D object, 3d-modeling, Augmented Reality, Data Mining, Knowledge Base, Knowledge based systems, Knowledge generations, Language Model, Metaverse, Metaverses, Multi-modal, MultiModal AI, Multimodal artificial intelligence, Pipelines, Virtual Reality, XR
@inproceedings{lee_knowledge_2023,
title = {Knowledge Generation Pipeline using LLM for Building 3D Object Knowledge Base},
author = {S. Lee and H. Lee and K. Lee},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85184593202&doi=10.1109%2fICTC58733.2023.10392933&partnerID=40&md5=b877638607a04e5a31a2d5723af6e11b},
doi = {10.1109/ICTC58733.2023.10392933},
isbn = {21621233 (ISSN); 979-835031327-7 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Int. Conf. ICT Convergence},
pages = {1303–1305},
publisher = {IEEE Computer Society},
abstract = {With the wide spread of XR(eXtended Reality) contents such as Metaverse and VR(Virtual Reality) / AR(Augmented Reality), the utilization and importance of 3D objects are increasing. In this paper, we describe a knowledge generation pipeline of 3D object for reuse of existing 3D objects and production of new 3D object using generative AI(Artificial Intelligence). 3D object knowledge includes not only the object itself data that are generated in object editing phase but the information for human to recognize and understand objects. The target 3D model for building knowledge is the space model of office for business Metaverse service and the model of objects composing the space. LLM(Large Language Model)-based multimodal AI was used to extract knowledge from 3D model in a systematic and automated way. We plan to expand the pipeline to utilize knowledge base for managing extracted knowledge and correcting errors occurred during the LLM process for the knowledge extraction. © 2023 IEEE.},
keywords = {3D modeling, 3D models, 3D object, 3d-modeling, Augmented Reality, Data Mining, Knowledge Base, Knowledge based systems, Knowledge generations, Language Model, Metaverse, Metaverses, Multi-modal, MultiModal AI, Multimodal artificial intelligence, Pipelines, Virtual Reality, XR},
pubstate = {published},
tppubtype = {inproceedings}
}