AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Kurai, R.; Hiraki, T.; Hiroi, Y.; Hirao, Y.; Perusquía-Hernández, M.; Uchiyama, H.; Kiyokawa, K.
MagicItem: Dynamic Behavior Design of Virtual Objects With Large Language Models in a Commercial Metaverse Platform Journal Article
In: IEEE Access, vol. 13, pp. 19132–19143, 2025, ISSN: 21693536 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: Behavior design, Code programming, Computer simulation languages, Dynamic behaviors, Language Model, Large-language model, Low-code programming, Metaverse platform, Metaverses, Virtual addresses, Virtual environments, Virtual objects, Virtual Reality, Virtual-reality environment
@article{kurai_magicitem_2025,
title = {MagicItem: Dynamic Behavior Design of Virtual Objects With Large Language Models in a Commercial Metaverse Platform},
author = {R. Kurai and T. Hiraki and Y. Hiroi and Y. Hirao and M. Perusquía-Hernández and H. Uchiyama and K. Kiyokawa},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85216011970&doi=10.1109%2FACCESS.2025.3530439&partnerID=40&md5=6de2e69c95854cb0860a95d0f4246d8d},
doi = {10.1109/ACCESS.2025.3530439},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {19132–19143},
abstract = {To create rich experiences in virtual reality (VR) environments, it is essential to define the behavior of virtual objects through programming. However, programming in 3D spaces requires a wide range of background knowledge and programming skills. Although Large Language Models (LLMs) have provided programming support, they are still primarily aimed at programmers. In metaverse platforms, where many users inhabit VR spaces, most users are unfamiliar with programming, making it difficult for them to modify the behavior of objects in the VR environment easily. Existing LLM-based script generation methods for VR spaces require multiple lengthy iterations to implement the desired behaviors and are difficult to integrate into the operation of metaverse platforms. To address this issue, we propose a tool that generates behaviors for objects in VR spaces from natural language within Cluster, a metaverse platform with a large user base. By integrating LLMs with the Cluster Script provided by this platform, we enable users with limited programming experience to define object behaviors within the platform freely. We have also integrated our tool into a commercial metaverse platform and are conducting online experiments with 63 general users of the platform. The experiments show that even users with no programming background can successfully generate behaviors for objects in VR spaces, resulting in a highly satisfying system. Our research contributes to democratizing VR content creation by enabling non-programmers to design dynamic behaviors for virtual objects in metaverse platforms. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {Behavior design, Code programming, Computer simulation languages, Dynamic behaviors, Language Model, Large-language model, Low-code programming, Metaverse platform, Metaverses, Virtual addresses, Virtual environments, Virtual objects, Virtual Reality, Virtual-reality environment},
pubstate = {published},
tppubtype = {article}
}
Xing, Y.; Liu, Q.; Wang, J.; Gómez-Zará, D.
sMoRe: Spatial Mapping and Object Rendering Environment Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 115–119, Association for Computing Machinery, 2025, ISBN: 9798400714092 (ISBN).
Abstract | Links | BibTeX | Tags: Generative adversarial networks, Generative AI, Language Model, Large language model, large language models, Mapping, Mixed reality, Mixed-reality environment, Object rendering, Rendering (computer graphics), Space Manipulation, Spatial mapping, Spatial objects, Users' experiences, Virtual environments, Virtual objects
@inproceedings{xing_smore_2025,
title = {sMoRe: Spatial Mapping and Object Rendering Environment},
author = {Y. Xing and Q. Liu and J. Wang and D. Gómez-Zará},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001670668&doi=10.1145%2F3708557.3716337&partnerID=40&md5=c23b3e19f42dbd8796e43f5ab71e12b6},
doi = {10.1145/3708557.3716337},
isbn = {9798400714092 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {115–119},
publisher = {Association for Computing Machinery},
abstract = {In mixed reality (MR) environments, understanding space and creating virtual objects is crucial to providing an intuitive user experience. This paper introduces sMoRe (Spatial Mapping and Object Rendering Environment), an MR application that combines Generative AI (GenAI) to assist users in creating, placing, and managing virtual objects within physical spaces. sMoRe allows users to use voice or typed text commands to create and place virtual objects using GenAI while specifying spatial constraints. The system employs Large Language Models (LLMs) to interpret users’ commands, analyze the current scene, and identify optimal locations. Additionally, sMoRe integrates a text-to-3D generative model to dynamically create 3D objects based on users’ descriptions. Our user study demonstrates the effectiveness of sMoRe in enhancing user comprehension, interaction, and organization of the MR environment. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Generative adversarial networks, Generative AI, Language Model, Large language model, large language models, Mapping, Mixed reality, Mixed-reality environment, Object rendering, Rendering (computer graphics), Space Manipulation, Spatial mapping, Spatial objects, Users' experiences, Virtual environments, Virtual objects},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhao, Y.; Dasari, M.; Guo, T.
CleAR: Robust Context-Guided Generative Lighting Estimation for Mobile Augmented Reality Journal Article
In: Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies, vol. 9, no. 3, 2025, ISSN: 24749567 (ISSN), (Publisher: Association for Computing Machinery).
Abstract | Links | BibTeX | Tags: Augmented Reality, Color computer graphics, Environment lighting, Estimation results, Generative model, High quality, Human engineering, Immersive, Lighting, Lighting conditions, Lighting estimation, Mobile augmented reality, Real-time refinement, Rendering (computer graphics), Statistical tests, Virtual objects, Virtual Reality
@article{zhao_clear_2025,
title = {CleAR: Robust Context-Guided Generative Lighting Estimation for Mobile Augmented Reality},
author = {Y. Zhao and M. Dasari and T. Guo},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105015452988&doi=10.1145%2F3749535&partnerID=40&md5=ed970d47cbf7f547555eca43b32cd7e7},
doi = {10.1145/3749535},
issn = {24749567 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies},
volume = {9},
number = {3},
abstract = {High-quality environment lighting is essential for creating immersive mobile augmented reality (AR) experiences. However, achieving visually coherent estimation for mobile AR is challenging due to several key limitations in AR device sensing capabilities, including low camera FoV and limited pixel dynamic ranges. Recent advancements in generative AI, which can generate high-quality images from different types of prompts, including texts and images, present a potential solution for high-quality lighting estimation. Still, to effectively use generative image diffusion models, we must address two key limitations of content quality and slow inference. In this work, we design and implement a generative lighting estimation system called CleAR that can produce high-quality, diverse environment maps in the format of 360◦ HDR images. Specifically, we design a two-step generation pipeline guided by AR environment context data to ensure the output aligns with the physical environment’s visual context and color appearance. To improve the estimation robustness under different lighting conditions, we design a real-time refinement component to adjust lighting estimation results on AR devices. To train and test our generative models, we curate a large-scale environment lighting estimation dataset with diverse lighting conditions. Through a combination of quantitative and qualitative evaluations, we show that CleAR outperforms state-of-the-art lighting estimation methods on both estimation accuracy, latency, and robustness, and is rated by 31 participants as producing better renderings for most virtual objects. For example, CleAR achieves 51% to 56% accuracy improvement on virtual object renderings across objects of three distinctive types of materials and reflective properties. CleAR produces lighting estimates of comparable or better quality in just 3.2 seconds—over 110X faster than state-of-the-art methods. Moreover, CleAR supports real-time refinement of lighting estimation results, ensuring robust and timely updates for AR applications. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Association for Computing Machinery},
keywords = {Augmented Reality, Color computer graphics, Environment lighting, Estimation results, Generative model, High quality, Human engineering, Immersive, Lighting, Lighting conditions, Lighting estimation, Mobile augmented reality, Real-time refinement, Rendering (computer graphics), Statistical tests, Virtual objects, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Lombardo, A.; Morabito, G.; Quattropani, S.; Ricci, C.; Siino, M.; Tinnirello, I.
AI-GeneSI: Exploiting generative AI for autonomous generation of the southbound interface in the IoT Proceedings Article
In: IEEE World Forum Internet Things, WF-IoT, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350373011 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Autonomous generation, Codes (symbols), Communications protocols, Complex task, Data representations, Digital world, Interface functions, Language Model, Reusability, Sensor nodes, Sensors data, Virtual objects, Virtual Reality
@inproceedings{lombardo_ai-genesi_2024,
title = {AI-GeneSI: Exploiting generative AI for autonomous generation of the southbound interface in the IoT},
author = {A. Lombardo and G. Morabito and S. Quattropani and C. Ricci and M. Siino and I. Tinnirello},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85216509327&doi=10.1109%2FWF-IoT62078.2024.10811300&partnerID=40&md5=c9575ec7e7baebaf92d1dfee95291edc},
doi = {10.1109/WF-IoT62078.2024.10811300},
isbn = {9798350373011 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE World Forum Internet Things, WF-IoT},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual objects, which are representations in the digital world of physical entities, uses the data collected by one or several sensor nodes to operate. To overcome the diversity and heterogeneity of protocols implemented by different sensor nodes and the way in which sensor data is represented, it is convenient to exploit appropriate components referred to as 'southbound interfaces' in this paper. The objective of the southbound interface is to convert the communication protocols implemented by sensor nodes and virtual objects and to harmonize data representations. The implementation of the southbound interfaces is not a complex task, however it is extremely specific of the current setting, which turns in low reusability of the code, and is time-consuming. In this paper, a methodology named AI-GeneSI is proposed to exploit Large Language Models (LLM)s to generate the code to communicate with the southbound interface. Such code is utilized to create and deploy a microservice which implements the southbound interface functions. A prototype of the proposed methodology has been implemented to demonstrate the feasibility of the proposed approach. © 2025 Elsevier B.V., All rights reserved.},
keywords = {'current, Autonomous generation, Codes (symbols), Communications protocols, Complex task, Data representations, Digital world, Interface functions, Language Model, Reusability, Sensor nodes, Sensors data, Virtual objects, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}