AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Shawash, J.; Thibault, M.; Hamari, J.
Who Killed Helene Pumpulivaara?: AI-Assisted Content Creation and XR Implementation for Interactive Built Heritage Storytelling Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 377–379, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Built heritage, Content creation, Digital heritage, Digital Interpretation, Extended reality, Human computer interaction, Human engineering, Industrial Heritage, Interactive computer graphics, Interactive computer systems, Mobile photographies, Narrative Design, Narrative designs, Production pipelines, Uncanny valley, Virtual Reality
@inproceedings{shawash_who_2025,
title = {Who Killed Helene Pumpulivaara?: AI-Assisted Content Creation and XR Implementation for Interactive Built Heritage Storytelling},
author = {J. Shawash and M. Thibault and J. Hamari},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008003446&doi=10.1145%2f3706370.3731703&partnerID=40&md5=bc8a8d221abcf6c560446979fbd06cbc},
doi = {10.1145/3706370.3731703},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {377–379},
publisher = {Association for Computing Machinery, Inc},
abstract = {This demo presents "Who Killed Helene Pumpulivaara?", an innovative interactive heritage experience that combines crime mystery narrative with XR technology to address key challenges in digital heritage interpretation. Our work makes six significant contributions: (1) the discovery of a "Historical Uncanny Valley"effect where varying fidelity levels between AI-generated and authentic content serve as implicit markers distinguishing fact from interpretation; (2) an accessible production pipeline combining mobile photography with AI tools that democratizes XR heritage creation for resource-limited institutions; (3) a spatial storytelling approach that effectively counters decontextualization in digital heritage; (4) a multi-platform implementation strategy across web and VR environments; (5) a practical model for AI-assisted heritage content creation balancing authenticity with engagement; and (6) a pathway toward spatial augmented reality for future heritage interpretation. Using the historic Finlayson Factory in Tampere, Finland as a case study, our implementation demonstrates how emerging technologies can enrich the authenticity of heritage experiences, fostering deeper emotional connections between visitors and the histories embedded in place. © 2025 Copyright held by the owner/author(s).},
keywords = {Artificial intelligence, Augmented Reality, Built heritage, Content creation, Digital heritage, Digital Interpretation, Extended reality, Human computer interaction, Human engineering, Industrial Heritage, Interactive computer graphics, Interactive computer systems, Mobile photographies, Narrative Design, Narrative designs, Production pipelines, Uncanny valley, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Nygren, T.; Samuelsson, M.; Hansson, P. -O.; Efimova, E.; Bachelder, S.
In: International Journal of Artificial Intelligence in Education, 2025, ISSN: 15604306 (ISSN); 15604292 (ISSN), (Publisher: Springer).
Abstract | Links | BibTeX | Tags: AI-generated feedback, Controversial issue in social study education, Controversial issues in social studies education, Curricula, Domain knowledge, Economic and social effects, Expert systems, Generative AI, Human engineering, Knowledge engineering, Language Model, Large language model, large language models (LLMs), Mixed reality, Mixed reality simulation, Mixed reality simulation (MRS), Pedagogical content knowledge, Pedagogical content knowledge (PCK), Personnel training, Preservice teachers, Social studies education, Teacher training, Teacher training simulation, Teacher training simulations, Teaching, Training simulation
@article{nygren_ai_2025,
title = {AI Versus Human Feedback in Mixed Reality Simulations: Comparing LLM and Expert Mentoring in Preservice Teacher Education on Controversial Issues},
author = {T. Nygren and M. Samuelsson and P. -O. Hansson and E. Efimova and S. Bachelder},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007244772&doi=10.1007%2Fs40593-025-00484-8&partnerID=40&md5=3404a614af6fe4d4d2cb284060600e3c},
doi = {10.1007/s40593-025-00484-8},
issn = {15604306 (ISSN); 15604292 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Artificial Intelligence in Education},
abstract = {This study explores the potential role of AI-generated mentoring within simulated environments designed for teacher education, specifically focused on the challenges of teaching controversial issues. Using a mixed-methods approach, we empirically investigate the potential and challenges of AI-generated feedback compared to that provided by human experts when mentoring preservice teachers in the context of mixed reality simulations. Findings reveal that human experts offered more mixed and nuanced feedback than ChatGPT-4o and Perplexity, especially when identifying missed teaching opportunities and balancing classroom discussions. The AI models evaluated were publicly available pro versions of LLMs and were tested using detailed prompts and coding schemes aligned with educational theories. AI systems were not very good at identifying aspects of general, pedagogical or content knowledge based on Shulman’s theories but were still quite effective in generating feedback in line with human experts. The study highlights the promise of AI to enhance teacher training but underscores the importance of combining AI feedback with expert insights to address the complexities of real-world teaching. This research contributes to a growing understanding of AI's potential role and limitations in education. It suggests that, while AI can be valuable to scale mixed reality simulations, it should be carefully evaluated and balanced by human expertise in teacher education. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Springer},
keywords = {AI-generated feedback, Controversial issue in social study education, Controversial issues in social studies education, Curricula, Domain knowledge, Economic and social effects, Expert systems, Generative AI, Human engineering, Knowledge engineering, Language Model, Large language model, large language models (LLMs), Mixed reality, Mixed reality simulation, Mixed reality simulation (MRS), Pedagogical content knowledge, Pedagogical content knowledge (PCK), Personnel training, Preservice teachers, Social studies education, Teacher training, Teacher training simulation, Teacher training simulations, Teaching, Training simulation},
pubstate = {published},
tppubtype = {article}
}
Yokoyama, N.; Kimura, R.; Nakajima, T.
ViGen: Defamiliarizing Everyday Perception for Discovering Unexpected Insights Proceedings Article
In: H., Degen; S., Ntoa (Ed.): Lect. Notes Comput. Sci., pp. 397–417, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303193417-9 (ISBN).
Abstract | Links | BibTeX | Tags: Artful Expression, Artistic technique, Augmented Reality, Daily lives, Defamiliarization, Dynamic environments, Engineering education, Enhanced vision systems, Generative AI, generative artificial intelligence, Human augmentation, Human engineering, Human-AI Interaction, Human-artificial intelligence interaction, Semi-transparent
@inproceedings{yokoyama_vigen_2025,
title = {ViGen: Defamiliarizing Everyday Perception for Discovering Unexpected Insights},
author = {N. Yokoyama and R. Kimura and T. Nakajima},
editor = {Degen H. and Ntoa S.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007760030&doi=10.1007%2f978-3-031-93418-6_26&partnerID=40&md5=dee6f54688284313a45579aab5f934d6},
doi = {10.1007/978-3-031-93418-6_26},
isbn = {03029743 (ISSN); 978-303193417-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15821 LNAI},
pages = {397–417},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper proposes ViGen, an Augmented Reality (AR) and Artificial Intelligence (AI)-enhanced vision system designed to facilitate defamiliarization in daily life. Humans rely on sight to gather information, think, and act, yet the act of seeing often becomes passive in daily life. Inspired by Victor Shklovsky’s concept of defamiliarization and the artistic technique of photomontage, ViGen seeks to disrupt habitual perceptions. It achieves this by overlaying semi-transparent, AI-generated images, created based on the user’s view, through an AR display. The system is evaluated by several structured interviews, in which participants experience ViGen in three different scenarios. Results indicate that AI-generated visuals effectively supported defamiliarization by transforming ordinary scenes into unfamiliar ones. However, the user’s familiarity with a place plays a significant role. Also, while the feature that adjusts the transparency of overlaid images enhances safety, its limitations in dynamic environments suggest the need for further research across diverse cultural and geographic contexts. This study demonstrates the potential of AI-augmented vision systems to stimulate new ways of seeing, offering insights for further development in visual augmentation technologies. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Artful Expression, Artistic technique, Augmented Reality, Daily lives, Defamiliarization, Dynamic environments, Engineering education, Enhanced vision systems, Generative AI, generative artificial intelligence, Human augmentation, Human engineering, Human-AI Interaction, Human-artificial intelligence interaction, Semi-transparent},
pubstate = {published},
tppubtype = {inproceedings}
}
Richter, S.; Richter, A.; Trier, M.
Narratives of the Future-Exploring Human-AI Collaboration Journal Article
In: Communications of the Association for Information Systems, vol. 57, 2025, ISSN: 15293181 (ISSN), (Publisher: Association for Information Systems).
Abstract | Links | BibTeX | Tags: Emerging technologies, Future, Horizon Scanning, Human engineering, Human-ai collaboration, Hybrid Work, Information Systems, Information use, Metaverse, Metaverses, Narrative, Narratives, New forms, Organisational, Technological innovation, Virtual Reality
@article{richter_narratives_2025,
title = {Narratives of the Future-Exploring Human-AI Collaboration},
author = {S. Richter and A. Richter and M. Trier},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105011937728&partnerID=40&md5=28f6b90199fcfc1044fc27d74a20bdc4},
issn = {15293181 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Communications of the Association for Information Systems},
volume = {57},
abstract = {As organizational leaders and employees navigate the new landscape of hybrid work, two additional technological innovations are quickly becoming prevalent: Human-AI Collaboration and new forms of presence in virtual environments such as the Metaverse. While both developments are still in the early stages, the example of generative AI has shown how fast emerging technology can have important implications for organizational practices. But how do we study something that has not yet materialized? In this paper, we apply web-based horizon scanning and develop four narratives, based on tasks in McGrath's group task circumplex, to engage with the future of work. Beyond dis-cussing the possible implications of the narratives and providing exemplary research questions, we explore and discuss horizon scanning as a future-looking methodology for information systems re-search. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Association for Information Systems},
keywords = {Emerging technologies, Future, Horizon Scanning, Human engineering, Human-ai collaboration, Hybrid Work, Information Systems, Information use, Metaverse, Metaverses, Narrative, Narratives, New forms, Organisational, Technological innovation, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
de Oliveira, E. A. Masasi; Sousa, R. T.; Bastos, A. A.; de Freitas Cintra, L. Martins; Filho, A. R. G. Galvão
Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 437–440, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages
@inproceedings{masasi_de_oliveira_immersive_2025,
title = {Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation},
author = {E. A. Masasi de Oliveira and R. T. Sousa and A. A. Bastos and L. Martins de Freitas Cintra and A. R. G. Galvão Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007979183&doi=10.1145%2F3706370.3731643&partnerID=40&md5=47a47f3408a0e6cb35c16dd6101a15b0},
doi = {10.1145/3706370.3731643},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {437–440},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual Reality has significantly expanded possibilities for immersive museum experiences, overcoming traditional constraints such as space, preservation, and geographic limitations. However, existing virtual museum platforms typically lack dynamic, personalized, and contextually accurate interactions. To address this, we propose Spatially-Aware Retrieval-Augmented Generation (SA-RAG), an innovative framework integrating visual attention tracking with Retrieval-Augmented Generation systems and advanced Large Language Models. By capturing users' visual attention in real time, SA-RAG dynamically retrieves contextually relevant data, enhancing the accuracy, personalization, and depth of user interactions within immersive virtual environments. The system's effectiveness is initially demonstrated through our preliminary tests within a realistic VR museum implemented using Unreal Engine. Although promising, comprehensive human evaluations involving broader user groups are planned for future studies to rigorously validate SA-RAG's effectiveness, educational enrichment potential, and accessibility improvements in virtual museums. The framework also presents opportunities for broader applications in immersive educational and storytelling domains. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Coronado, A.; Carvalho, S. T.; Berretta, L. Oliveira
See Through My Eyes: Using Multimodal Large Language Model for Describing Rendered Environments to Blind People Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 451–457, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: Accessibility, Behavioral Research, Blind, Blind people, Helmet mounted displays, Human engineering, Human rehabilitation equipment, Interactive computer graphics, Interactive computer systems, Language Model, LLM, Multi-modal, Rendered environment, rendered environments, Spatial cognition, Virtual Reality, Vision aids, Visual impairment, Visual languages, Visually impaired people
@inproceedings{coronado_see_2025,
title = {See Through My Eyes: Using Multimodal Large Language Model for Describing Rendered Environments to Blind People},
author = {A. Coronado and S. T. Carvalho and L. Oliveira Berretta},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007991842&doi=10.1145%2F3706370.3731641&partnerID=40&md5=7eb509d2ac724af78ec04575a8c71085},
doi = {10.1145/3706370.3731641},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {451–457},
publisher = {Association for Computing Machinery, Inc},
abstract = {Extended Reality (XR) is quickly expanding "as the next major technology wave in personal computing". Nevertheless, this expansion and adoption could also exclude certain disabled users, particularly people with visual impairment (VIP). According to the World Health Organization (WHO) in their 2019 publication, there were at least 2.2 billion people with visual impairment, a number that is also estimated to have increased in recent years. Therefore, it is important to include disabled users, especially visually impaired people, in the design of Head-Mounted Displays and Extended Reality environments. Indeed, this objective can be pursued by incorporating Multimodal Large Language Model (MLLM) technology, which can assist visually impaired people. As a case study, this study employs different prompts that result in environment descriptions from an MLLM integrated into a virtual reality (VR) escape room. Therefore, six potential prompts were engineered to generate valuable outputs for visually impaired users inside a VR environment. These outputs were evaluated using the G-Eval, and VIEScore metrics. Even though, the results show that the prompt patterns provided a description that aligns with the user's point of view, it is highly recommended to evaluate these outputs through "expected outputs"from Orientation and Mobility Specialists, and Sighted Guides. Furthermore, the subsequent step in the process is to evaluate these outputs by visually impaired people themselves to identify the most effective prompt pattern. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Accessibility, Behavioral Research, Blind, Blind people, Helmet mounted displays, Human engineering, Human rehabilitation equipment, Interactive computer graphics, Interactive computer systems, Language Model, LLM, Multi-modal, Rendered environment, rendered environments, Spatial cognition, Virtual Reality, Vision aids, Visual impairment, Visual languages, Visually impaired people},
pubstate = {published},
tppubtype = {inproceedings}
}
Peter, K.; Makosa, I.; Auala, S.; Ndjao, L.; Maasz, D.; Mbinge, U.; Winschiers-Theophilus, H.
Co-creating a VR Narrative Experience of Constructing a Food Storage Following OvaHimba Traditional Practices Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 418–423, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Modelling, 3D models, 3d-modeling, Co-designs, Community-based, Community-Based Co-Design, Computer aided design, Cultural heritage, Cultural heritages, Food storage, Human computer interaction, Human engineering, Indigenous Knowledge, Information Systems, Interactive computer graphics, Interactive computer systems, IVR, Namibia, OvaHimba, Ovahimbum, Photogrammetry, Sustainable development, Virtual environments, Virtual Reality
@inproceedings{peter_co-creating_2025,
title = {Co-creating a VR Narrative Experience of Constructing a Food Storage Following OvaHimba Traditional Practices},
author = {K. Peter and I. Makosa and S. Auala and L. Ndjao and D. Maasz and U. Mbinge and H. Winschiers-Theophilus},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007984089&doi=10.1145%2F3706370.3731652&partnerID=40&md5=10c67ae9849b2b9093515e04828d423d},
doi = {10.1145/3706370.3731652},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {418–423},
publisher = {Association for Computing Machinery, Inc},
abstract = {As part of an attempt to co-create a comprehensive virtual environment in which one can explore and learn traditional practices of the OvaHimba people, we have co-designed and implemented a VR experience to construct a traditional food storage. In collaboration with the OvaHimba community residing in Otjisa, we have explored culturally valid representations of the process. We have further investigated different techniques such as photogrammetry, generative AI and manual methods to develop 3D models. Our findings highlight the importance of context, process, and community-defined relevance in co-design, the fluidity of cultural realities and virtual representations, as well as technical challenges. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D Modelling, 3D models, 3d-modeling, Co-designs, Community-based, Community-Based Co-Design, Computer aided design, Cultural heritage, Cultural heritages, Food storage, Human computer interaction, Human engineering, Indigenous Knowledge, Information Systems, Interactive computer graphics, Interactive computer systems, IVR, Namibia, OvaHimba, Ovahimbum, Photogrammetry, Sustainable development, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Wan, X.; Luo, Y.
A Study of Anti-war Memorial Hall of Leshan City based on Virtual Museum Technology Proceedings Article
In: pp. 493–497, Association for Computing Machinery, Inc, 2025, ISBN: 9798400712432 (ISBN).
Abstract | Links | BibTeX | Tags: 3d modeling technologies, 3D reconstruction, Anti-war, Artificial intelligence, Augmented Reality, Digital researches, Historic Preservation, Human engineering, Interactive computer graphics, Knowledge graph, Knowledge graphs, Language Model, Localization and mappings, Metaverses, Model knowledge, Museum technology, Museums, Restoration, Three dimensional computer graphics, Virtual museum, Virtual Reality
@inproceedings{wan_study_2025,
title = {A Study of Anti-war Memorial Hall of Leshan City based on Virtual Museum Technology},
author = {X. Wan and Y. Luo},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105011594066&doi=10.1145%2F3732801.3732887&partnerID=40&md5=ac25032b46edf5a9d5949b8ceb5a41e1},
doi = {10.1145/3732801.3732887},
isbn = {9798400712432 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {493–497},
publisher = {Association for Computing Machinery, Inc},
abstract = {This study adopted augmented reality (AR), virtual reality (VR), artificial intelligence (AI), metaverse (META), large language models (LLM), knowledge graphs (KG), and synchronous localization and mapping (SLAM) technologies to create a virtual museum (VM) with the theme of the history of Leshan anti-Japanese war. Its aim is to enrich the digital research of this area, and to restore and vividly reflect the significance of Leshan’s contributions during the anti-Japanese war. This study combines 3D modeling technology with historical scene restoration to create a method of field investigation of local history and anti-Japanese war sites, which constructed six unique exhibition areas to describe historical events. The virtual museum integrates lots of historical sites, stories, achievements, and cultural aspects into a unique cultural interaction center. Through diverse technological approaches, this study aims to enable the public to contemplate history, cultivate national pride and patriotism, and deliver novel strategies for the digital protection of historical heritage. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3d modeling technologies, 3D reconstruction, Anti-war, Artificial intelligence, Augmented Reality, Digital researches, Historic Preservation, Human engineering, Interactive computer graphics, Knowledge graph, Knowledge graphs, Language Model, Localization and mappings, Metaverses, Model knowledge, Museum technology, Museums, Restoration, Three dimensional computer graphics, Virtual museum, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Saddik, A. El; Ahmad, J.; Khan, M.; Abouzahir, S.; Gueaieb, W.
Unleashing Creativity in the Metaverse: Generative AI and Multimodal Content Journal Article
In: ACM Transactions on Multimedia Computing, Communications and Applications, vol. 21, no. 7, pp. 1–43, 2025, ISSN: 15516857 (ISSN); 15516865 (ISSN), (Publisher: Association for Computing Machinery).
Abstract | Links | BibTeX | Tags: Adversarial networks, Artificial intelligence, Content generation, Context information, Creatives, Diffusion Model, diffusion models, Generative adversarial networks, Generative AI, Human engineering, Information instructions, Interactive computer graphics, Interactive computer systems, Interactive devices, Interoperability, Metaverse, Metaverses, Multi-modal, multimodal, Simple++, Three dimensional computer graphics, user experience, User interfaces, Virtual Reality
@article{el_saddik_unleashing_2025,
title = {Unleashing Creativity in the Metaverse: Generative AI and Multimodal Content},
author = {A. El Saddik and J. Ahmad and M. Khan and S. Abouzahir and W. Gueaieb},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105011860002&doi=10.1145%2F3713075&partnerID=40&md5=20064843ced240c42e9353d747672cb3},
doi = {10.1145/3713075},
issn = {15516857 (ISSN); 15516865 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {ACM Transactions on Multimedia Computing, Communications and Applications},
volume = {21},
number = {7},
pages = {1–43},
abstract = {The metaverse presents an emerging creative expression and collaboration frontier where generative artificial intelligence (GenAI) can play a pivotal role with its ability to generate multimodal content from simple prompts. These prompts allow the metaverse to interact with GenAI, where context information, instructions, input data, or even output indications constituting the prompt can come from within the metaverse. However, their integration poses challenges regarding interoperability, lack of standards, scalability, and maintaining a high-quality user experience. This article explores how GenAI can productively assist in enhancing creativity within the contexts of the metaverse and unlock new opportunities. We provide a technical, in-depth overview of the different generative models for image, video, audio, and 3D content within the metaverse environments. We also explore the bottlenecks, opportunities, and innovative applications of GenAI from the perspectives of end users, developers, service providers, and AI researchers. This survey commences by highlighting the potential of GenAI for enhancing the metaverse experience through dynamic content generation to populate massive virtual worlds. Subsequently, we shed light on the ongoing research practices and trends in multimodal content generation, enhancing realism and creativity and alleviating bottlenecks related to standardization, computational cost, privacy, and safety. Last, we share insights into promising research directions toward the integration of GenAI with the metaverse for creative enhancement, improved immersion, and innovative interactive applications. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Association for Computing Machinery},
keywords = {Adversarial networks, Artificial intelligence, Content generation, Context information, Creatives, Diffusion Model, diffusion models, Generative adversarial networks, Generative AI, Human engineering, Information instructions, Interactive computer graphics, Interactive computer systems, Interactive devices, Interoperability, Metaverse, Metaverses, Multi-modal, multimodal, Simple++, Three dimensional computer graphics, user experience, User interfaces, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Zhao, Y.; Dasari, M.; Guo, T.
CleAR: Robust Context-Guided Generative Lighting Estimation for Mobile Augmented Reality Journal Article
In: Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies, vol. 9, no. 3, 2025, ISSN: 24749567 (ISSN), (Publisher: Association for Computing Machinery).
Abstract | Links | BibTeX | Tags: Augmented Reality, Color computer graphics, Environment lighting, Estimation results, Generative model, High quality, Human engineering, Immersive, Lighting, Lighting conditions, Lighting estimation, Mobile augmented reality, Real-time refinement, Rendering (computer graphics), Statistical tests, Virtual objects, Virtual Reality
@article{zhao_clear_2025,
title = {CleAR: Robust Context-Guided Generative Lighting Estimation for Mobile Augmented Reality},
author = {Y. Zhao and M. Dasari and T. Guo},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105015452988&doi=10.1145%2F3749535&partnerID=40&md5=ed970d47cbf7f547555eca43b32cd7e7},
doi = {10.1145/3749535},
issn = {24749567 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies},
volume = {9},
number = {3},
abstract = {High-quality environment lighting is essential for creating immersive mobile augmented reality (AR) experiences. However, achieving visually coherent estimation for mobile AR is challenging due to several key limitations in AR device sensing capabilities, including low camera FoV and limited pixel dynamic ranges. Recent advancements in generative AI, which can generate high-quality images from different types of prompts, including texts and images, present a potential solution for high-quality lighting estimation. Still, to effectively use generative image diffusion models, we must address two key limitations of content quality and slow inference. In this work, we design and implement a generative lighting estimation system called CleAR that can produce high-quality, diverse environment maps in the format of 360◦ HDR images. Specifically, we design a two-step generation pipeline guided by AR environment context data to ensure the output aligns with the physical environment’s visual context and color appearance. To improve the estimation robustness under different lighting conditions, we design a real-time refinement component to adjust lighting estimation results on AR devices. To train and test our generative models, we curate a large-scale environment lighting estimation dataset with diverse lighting conditions. Through a combination of quantitative and qualitative evaluations, we show that CleAR outperforms state-of-the-art lighting estimation methods on both estimation accuracy, latency, and robustness, and is rated by 31 participants as producing better renderings for most virtual objects. For example, CleAR achieves 51% to 56% accuracy improvement on virtual object renderings across objects of three distinctive types of materials and reflective properties. CleAR produces lighting estimates of comparable or better quality in just 3.2 seconds—over 110X faster than state-of-the-art methods. Moreover, CleAR supports real-time refinement of lighting estimation results, ensuring robust and timely updates for AR applications. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Association for Computing Machinery},
keywords = {Augmented Reality, Color computer graphics, Environment lighting, Estimation results, Generative model, High quality, Human engineering, Immersive, Lighting, Lighting conditions, Lighting estimation, Mobile augmented reality, Real-time refinement, Rendering (computer graphics), Statistical tests, Virtual objects, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Sonawani, S.; Weigend, F.; Amor, H. B.
SiSCo: Signal Synthesis for Effective Human-Robot Communication Via Large Language Models Proceedings Article
In: IEEE Int Conf Intell Rob Syst, pp. 7107–7114, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 21530858 (ISSN); 979-835037770-5 (ISBN).
Abstract | Links | BibTeX | Tags: Communications channels, Extensive resources, Human engineering, Human Robot Interaction, Human-Robot Collaboration, Human-robot communication, Humans-robot interactions, Industrial robots, Intelligent robots, Language Model, Man machine systems, Microrobots, Robust communication, Signal synthesis, Specialized knowledge, Visual communication, Visual cues, Visual languages
@inproceedings{sonawani_sisco_2024,
title = {SiSCo: Signal Synthesis for Effective Human-Robot Communication Via Large Language Models},
author = {S. Sonawani and F. Weigend and H. B. Amor},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85216466596&doi=10.1109%2fIROS58592.2024.10802561&partnerID=40&md5=ccd14b4f0b5d527b179394dffd4e2c73},
doi = {10.1109/IROS58592.2024.10802561},
isbn = {21530858 (ISSN); 979-835037770-5 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE Int Conf Intell Rob Syst},
pages = {7107–7114},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Effective human-robot collaboration hinges on robust communication channels, with visual signaling playing a pivotal role due to its intuitive appeal. Yet, the creation of visually intuitive cues often demands extensive resources and specialized knowledge. The emergence of Large Language Models (LLMs) offers promising avenues for enhancing human-robot interactions and revolutionizing the way we generate context-aware visual cues. To this end, we introduce SiSCo-a novel framework that combines the computational power of LLMs with mixed-reality technologies to streamline the creation of visual cues for human-robot collaboration. Our results show that SiSCo improves the efficiency of communication in human-robot teaming tasks, reducing task completion time by approximately 73% and increasing task success rates by 18% compared to baseline natural language signals. Additionally, SiSCo reduces cognitive load for participants by 46%, as measured by the NASA-TLX subscale, and receives above-average user ratings for on-the-fly signals generated for unseen objects. To encourage further development and broader community engagement, we provide full access to SiSCo's implementation and related materials on our GitHub repository.1 © 2024 IEEE.},
keywords = {Communications channels, Extensive resources, Human engineering, Human Robot Interaction, Human-Robot Collaboration, Human-robot communication, Humans-robot interactions, Industrial robots, Intelligent robots, Language Model, Man machine systems, Microrobots, Robust communication, Signal synthesis, Specialized knowledge, Visual communication, Visual cues, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}