AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Yokoyama, N.; Kimura, R.; Nakajima, T.
ViGen: Defamiliarizing Everyday Perception for Discovering Unexpected Insights Proceedings Article
In: H., Degen; S., Ntoa (Ed.): Lect. Notes Comput. Sci., pp. 397–417, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303193417-9 (ISBN).
Abstract | Links | BibTeX | Tags: Artful Expression, Artistic technique, Augmented Reality, Daily lives, Defamiliarization, Dynamic environments, Engineering education, Enhanced vision systems, Generative AI, generative artificial intelligence, Human augmentation, Human engineering, Human-AI Interaction, Human-artificial intelligence interaction, Semi-transparent
@inproceedings{yokoyama_vigen_2025,
title = {ViGen: Defamiliarizing Everyday Perception for Discovering Unexpected Insights},
author = {N. Yokoyama and R. Kimura and T. Nakajima},
editor = {Degen H. and Ntoa S.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007760030&doi=10.1007%2f978-3-031-93418-6_26&partnerID=40&md5=dee6f54688284313a45579aab5f934d6},
doi = {10.1007/978-3-031-93418-6_26},
isbn = {03029743 (ISSN); 978-303193417-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15821 LNAI},
pages = {397–417},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper proposes ViGen, an Augmented Reality (AR) and Artificial Intelligence (AI)-enhanced vision system designed to facilitate defamiliarization in daily life. Humans rely on sight to gather information, think, and act, yet the act of seeing often becomes passive in daily life. Inspired by Victor Shklovsky’s concept of defamiliarization and the artistic technique of photomontage, ViGen seeks to disrupt habitual perceptions. It achieves this by overlaying semi-transparent, AI-generated images, created based on the user’s view, through an AR display. The system is evaluated by several structured interviews, in which participants experience ViGen in three different scenarios. Results indicate that AI-generated visuals effectively supported defamiliarization by transforming ordinary scenes into unfamiliar ones. However, the user’s familiarity with a place plays a significant role. Also, while the feature that adjusts the transparency of overlaid images enhances safety, its limitations in dynamic environments suggest the need for further research across diverse cultural and geographic contexts. This study demonstrates the potential of AI-augmented vision systems to stimulate new ways of seeing, offering insights for further development in visual augmentation technologies. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Artful Expression, Artistic technique, Augmented Reality, Daily lives, Defamiliarization, Dynamic environments, Engineering education, Enhanced vision systems, Generative AI, generative artificial intelligence, Human augmentation, Human engineering, Human-AI Interaction, Human-artificial intelligence interaction, Semi-transparent},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, K.; Mostajeran, F.; Rings, S.; Kruse, L.; Schmidt, S.; Arz, M.; Wolf, E.; Steinicke, F.
I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1648–1649, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows
@inproceedings{li_i_2025,
title = {I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication},
author = {K. Li and F. Mostajeran and S. Rings and L. Kruse and S. Schmidt and M. Arz and E. Wolf and F. Steinicke},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005146647&doi=10.1109%2fVRW66409.2025.00469&partnerID=40&md5=77e755f6a059f81e81c18987f58d00cc},
doi = {10.1109/VRW66409.2025.00469},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1648–1649},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {In this demo paper, we present an Extended Reality (XR) framework providing a streamlined workflow for creating and interacting with intelligent virtual agents (IVAs) with multimodal information processing capabilities using commercially available artificial intelligence (AI) tools and cloud services such as large language and vision models. The system supports (i) the integration of high-quality, customizable virtual 3D human models for visual representations of IVAs and (ii) multimodal communication with generative AI-driven IVAs in immersive XR, featuring realistic human behavior simulations. Our demo showcases the enormous potential and vast design space of embodied IVAs for various XR applications. © 2025 IEEE.},
keywords = {Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, Z.; Zhang, H.; Peng, C.; Peiris, R.
Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 1–11, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833153645-9 (ISBN).
Abstract | Links | BibTeX | Tags: Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment
@inproceedings{li_exploring_2025,
title = {Exploring Large Language Model-Driven Agents for Environment-Aware Spatial Interactions and Conversations in Virtual Reality Role-Play Scenarios},
author = {Z. Li and H. Zhang and C. Peng and R. Peiris},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002706893&doi=10.1109%2fVR59515.2025.00025&partnerID=40&md5=60f22109e054c9035a0c2210bb797039},
doi = {10.1109/VR59515.2025.00025},
isbn = {979-833153645-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {1–11},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent research has begun adopting Large Language Model (LLM) agents to enhance Virtual Reality (VR) interactions, creating immersive chatbot experiences. However, while current studies focus on generating dialogue from user speech inputs, their abilities to generate richer experiences based on the perception of LLM agents' VR environments and interaction cues remain unexplored. Hence, in this work, we propose an approach that enables LLM agents to perceive virtual environments and generate environment-aware interactions and conversations for an embodied human-AI interaction experience in VR environments. Here, we define a schema for describing VR environments and their interactions through text prompts. We evaluate the performance of our method through five role-play scenarios created using our approach in a study with 14 participants. The findings discuss the opportunities and challenges of our proposed approach for developing environment-aware LLM agents that facilitate spatial interactions and conversations within VR role-play scenarios. © 2025 IEEE.},
keywords = {Chatbots, Computer simulation languages, Context- awareness, context-awareness, Digital elevation model, Generative AI, Human-AI Interaction, Language Model, Large language model, large language models, Model agents, Role-play simulation, role-play simulations, Role-plays, Spatial interaction, Virtual environments, Virtual Reality, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
Suzuki, R.; Gonzalez-Franco, M.; Sra, M.; Lindlbauer, D.
Everyday AR through AI-in-the-Loop Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071395-8 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization
@inproceedings{suzuki_everyday_2025,
title = {Everyday AR through AI-in-the-Loop},
author = {R. Suzuki and M. Gonzalez-Franco and M. Sra and D. Lindlbauer},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005752990&doi=10.1145%2f3706599.3706741&partnerID=40&md5=56b5e447819dde7aa4a29f8e3899e535},
doi = {10.1145/3706599.3706741},
isbn = {979-840071395-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {This workshop brings together experts and practitioners from augmented reality (AR) and artificial intelligence (AI) to shape the future of AI-in-the-loop everyday AR experiences. With recent advancements in both AR hardware and AI capabilities, we envision that everyday AR—always-available and seamlessly integrated into users’ daily environments—is becoming increasingly feasible. This workshop will explore how AI can drive such everyday AR experiences. We discuss a range of topics, including adaptive and context-aware AR, generative AR content creation, always-on AI assistants, AI-driven accessible design, and real-world-oriented AI agents. Our goal is to identify the opportunities and challenges in AI-enabled AR, focusing on creating novel AR experiences that seamlessly blend the digital and physical worlds. Through the workshop, we aim to foster collaboration, inspire future research, and build a community to advance the research field of AI-enhanced AR. © 2025 Copyright held by the owner/author(s).},
keywords = {Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}