AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Yokoyama, N.; Kimura, R.; Nakajima, T.
ViGen: Defamiliarizing Everyday Perception for Discovering Unexpected Insights Proceedings Article
In: H., Degen; S., Ntoa (Ed.): Lect. Notes Comput. Sci., pp. 397–417, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303193417-9 (ISBN).
Abstract | Links | BibTeX | Tags: Artful Expression, Artistic technique, Augmented Reality, Daily lives, Defamiliarization, Dynamic environments, Engineering education, Enhanced vision systems, Generative AI, generative artificial intelligence, Human augmentation, Human engineering, Human-AI Interaction, Human-artificial intelligence interaction, Semi-transparent
@inproceedings{yokoyama_vigen_2025,
title = {ViGen: Defamiliarizing Everyday Perception for Discovering Unexpected Insights},
author = {N. Yokoyama and R. Kimura and T. Nakajima},
editor = {Degen H. and Ntoa S.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007760030&doi=10.1007%2f978-3-031-93418-6_26&partnerID=40&md5=dee6f54688284313a45579aab5f934d6},
doi = {10.1007/978-3-031-93418-6_26},
isbn = {03029743 (ISSN); 978-303193417-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15821 LNAI},
pages = {397–417},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This paper proposes ViGen, an Augmented Reality (AR) and Artificial Intelligence (AI)-enhanced vision system designed to facilitate defamiliarization in daily life. Humans rely on sight to gather information, think, and act, yet the act of seeing often becomes passive in daily life. Inspired by Victor Shklovsky’s concept of defamiliarization and the artistic technique of photomontage, ViGen seeks to disrupt habitual perceptions. It achieves this by overlaying semi-transparent, AI-generated images, created based on the user’s view, through an AR display. The system is evaluated by several structured interviews, in which participants experience ViGen in three different scenarios. Results indicate that AI-generated visuals effectively supported defamiliarization by transforming ordinary scenes into unfamiliar ones. However, the user’s familiarity with a place plays a significant role. Also, while the feature that adjusts the transparency of overlaid images enhances safety, its limitations in dynamic environments suggest the need for further research across diverse cultural and geographic contexts. This study demonstrates the potential of AI-augmented vision systems to stimulate new ways of seeing, offering insights for further development in visual augmentation technologies. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Artful Expression, Artistic technique, Augmented Reality, Daily lives, Defamiliarization, Dynamic environments, Engineering education, Enhanced vision systems, Generative AI, generative artificial intelligence, Human augmentation, Human engineering, Human-AI Interaction, Human-artificial intelligence interaction, Semi-transparent},
pubstate = {published},
tppubtype = {inproceedings}
}
Peter, K.; Makosa, I.; Auala, S.; Ndjao, L.; Maasz, D.; Mbinge, U.; Winschiers-Theophilus, H.
Co-creating a VR Narrative Experience of Constructing a Food Storage Following OvaHimba Traditional Practices Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 418–423, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Modelling, 3D models, 3d-modeling, Co-designs, Community-based, Community-Based Co-Design, Computer aided design, Cultural heritage, Cultural heritages, Food storage, Human computer interaction, Human engineering, Indigenous Knowledge, Information Systems, Interactive computer graphics, Interactive computer systems, IVR, Namibia, OvaHimba, Ovahimbum, Photogrammetry, Sustainable development, Virtual environments, Virtual Reality
@inproceedings{peter_co-creating_2025,
title = {Co-creating a VR Narrative Experience of Constructing a Food Storage Following OvaHimba Traditional Practices},
author = {K. Peter and I. Makosa and S. Auala and L. Ndjao and D. Maasz and U. Mbinge and H. Winschiers-Theophilus},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007984089&doi=10.1145%2f3706370.3731652&partnerID=40&md5=36f95823413852d636b39bd561c97917},
doi = {10.1145/3706370.3731652},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {418–423},
publisher = {Association for Computing Machinery, Inc},
abstract = {As part of an attempt to co-create a comprehensive virtual environment in which one can explore and learn traditional practices of the OvaHimba people, we have co-designed and implemented a VR experience to construct a traditional food storage. In collaboration with the OvaHimba community residing in Otjisa, we have explored culturally valid representations of the process. We have further investigated different techniques such as photogrammetry, generative AI and manual methods to develop 3D models. Our findings highlight the importance of context, process, and community-defined relevance in co-design, the fluidity of cultural realities and virtual representations, as well as technical challenges. © 2025 Copyright held by the owner/author(s).},
keywords = {3D Modelling, 3D models, 3d-modeling, Co-designs, Community-based, Community-Based Co-Design, Computer aided design, Cultural heritage, Cultural heritages, Food storage, Human computer interaction, Human engineering, Indigenous Knowledge, Information Systems, Interactive computer graphics, Interactive computer systems, IVR, Namibia, OvaHimba, Ovahimbum, Photogrammetry, Sustainable development, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Coronado, A.; Carvalho, S. T.; Berretta, L.
See Through My Eyes: Using Multimodal Large Language Model for Describing Rendered Environments to Blind People Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 451–457, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Accessibility, Behavioral Research, Blind, Blind people, Helmet mounted displays, Human engineering, Human rehabilitation equipment, Interactive computer graphics, Interactive computer systems, Language Model, LLM, Multi-modal, Rendered environment, rendered environments, Spatial cognition, Virtual Reality, Vision aids, Visual impairment, Visual languages, Visually impaired people
@inproceedings{coronado_see_2025,
title = {See Through My Eyes: Using Multimodal Large Language Model for Describing Rendered Environments to Blind People},
author = {A. Coronado and S. T. Carvalho and L. Berretta},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007991842&doi=10.1145%2f3706370.3731641&partnerID=40&md5=2f7cb1535d39d5e59b1f43f773de3272},
doi = {10.1145/3706370.3731641},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {451–457},
publisher = {Association for Computing Machinery, Inc},
abstract = {Extended Reality (XR) is quickly expanding "as the next major technology wave in personal computing". Nevertheless, this expansion and adoption could also exclude certain disabled users, particularly people with visual impairment (VIP). According to the World Health Organization (WHO) in their 2019 publication, there were at least 2.2 billion people with visual impairment, a number that is also estimated to have increased in recent years. Therefore, it is important to include disabled users, especially visually impaired people, in the design of Head-Mounted Displays and Extended Reality environments. Indeed, this objective can be pursued by incorporating Multimodal Large Language Model (MLLM) technology, which can assist visually impaired people. As a case study, this study employs different prompts that result in environment descriptions from an MLLM integrated into a virtual reality (VR) escape room. Therefore, six potential prompts were engineered to generate valuable outputs for visually impaired users inside a VR environment. These outputs were evaluated using the G-Eval, and VIEScore metrics. Even though, the results show that the prompt patterns provided a description that aligns with the user's point of view, it is highly recommended to evaluate these outputs through "expected outputs"from Orientation and Mobility Specialists, and Sighted Guides. Furthermore, the subsequent step in the process is to evaluate these outputs by visually impaired people themselves to identify the most effective prompt pattern. © 2025 Copyright held by the owner/author(s).},
keywords = {Accessibility, Behavioral Research, Blind, Blind people, Helmet mounted displays, Human engineering, Human rehabilitation equipment, Interactive computer graphics, Interactive computer systems, Language Model, LLM, Multi-modal, Rendered environment, rendered environments, Spatial cognition, Virtual Reality, Vision aids, Visual impairment, Visual languages, Visually impaired people},
pubstate = {published},
tppubtype = {inproceedings}
}
Oliveira, E. A. Masasi De; Sousa, R. T.; Bastos, A. A.; Cintra, L. Martins De Freitas; Filho, A. R. G.
Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 437–440, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages
@inproceedings{masasi_de_oliveira_immersive_2025,
title = {Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation},
author = {E. A. Masasi De Oliveira and R. T. Sousa and A. A. Bastos and L. Martins De Freitas Cintra and A. R. G. Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007979183&doi=10.1145%2f3706370.3731643&partnerID=40&md5=db10b41217dd8a0b0705c3fb4a615666},
doi = {10.1145/3706370.3731643},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {437–440},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual Reality has significantly expanded possibilities for immersive museum experiences, overcoming traditional constraints such as space, preservation, and geographic limitations. However, existing virtual museum platforms typically lack dynamic, personalized, and contextually accurate interactions. To address this, we propose Spatially-Aware Retrieval-Augmented Generation (SA-RAG), an innovative framework integrating visual attention tracking with Retrieval-Augmented Generation systems and advanced Large Language Models. By capturing users' visual attention in real time, SA-RAG dynamically retrieves contextually relevant data, enhancing the accuracy, personalization, and depth of user interactions within immersive virtual environments. The system's effectiveness is initially demonstrated through our preliminary tests within a realistic VR museum implemented using Unreal Engine. Although promising, comprehensive human evaluations involving broader user groups are planned for future studies to rigorously validate SA-RAG's effectiveness, educational enrichment potential, and accessibility improvements in virtual museums. The framework also presents opportunities for broader applications in immersive educational and storytelling domains. © 2025 Copyright held by the owner/author(s).},
keywords = {Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Nygren, T.; Samuelsson, M.; Hansson, P. -O.; Efimova, E.; Bachelder, S.
In: International Journal of Artificial Intelligence in Education, 2025, ISSN: 15604292 (ISSN).
Abstract | Links | BibTeX | Tags: AI-generated feedback, Controversial issue in social study education, Controversial issues in social studies education, Curricula, Domain knowledge, Economic and social effects, Expert systems, Generative AI, Human engineering, Knowledge engineering, Language Model, Large language model, large language models (LLMs), Mixed reality, Mixed reality simulation, Mixed reality simulation (MRS), Pedagogical content knowledge, Pedagogical content knowledge (PCK), Personnel training, Preservice teachers, Social studies education, Teacher training, Teacher training simulation, Teacher training simulations, Teaching, Training simulation
@article{nygren_ai_2025,
title = {AI Versus Human Feedback in Mixed Reality Simulations: Comparing LLM and Expert Mentoring in Preservice Teacher Education on Controversial Issues},
author = {T. Nygren and M. Samuelsson and P. -O. Hansson and E. Efimova and S. Bachelder},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007244772&doi=10.1007%2fs40593-025-00484-8&partnerID=40&md5=d3cb14a8117045505cbbeb174b32b88d},
doi = {10.1007/s40593-025-00484-8},
issn = {15604292 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Artificial Intelligence in Education},
abstract = {This study explores the potential role of AI-generated mentoring within simulated environments designed for teacher education, specifically focused on the challenges of teaching controversial issues. Using a mixed-methods approach, we empirically investigate the potential and challenges of AI-generated feedback compared to that provided by human experts when mentoring preservice teachers in the context of mixed reality simulations. Findings reveal that human experts offered more mixed and nuanced feedback than ChatGPT-4o and Perplexity, especially when identifying missed teaching opportunities and balancing classroom discussions. The AI models evaluated were publicly available pro versions of LLMs and were tested using detailed prompts and coding schemes aligned with educational theories. AI systems were not very good at identifying aspects of general, pedagogical or content knowledge based on Shulman’s theories but were still quite effective in generating feedback in line with human experts. The study highlights the promise of AI to enhance teacher training but underscores the importance of combining AI feedback with expert insights to address the complexities of real-world teaching. This research contributes to a growing understanding of AI's potential role and limitations in education. It suggests that, while AI can be valuable to scale mixed reality simulations, it should be carefully evaluated and balanced by human expertise in teacher education. © The Author(s) 2025.},
keywords = {AI-generated feedback, Controversial issue in social study education, Controversial issues in social studies education, Curricula, Domain knowledge, Economic and social effects, Expert systems, Generative AI, Human engineering, Knowledge engineering, Language Model, Large language model, large language models (LLMs), Mixed reality, Mixed reality simulation, Mixed reality simulation (MRS), Pedagogical content knowledge, Pedagogical content knowledge (PCK), Personnel training, Preservice teachers, Social studies education, Teacher training, Teacher training simulation, Teacher training simulations, Teaching, Training simulation},
pubstate = {published},
tppubtype = {article}
}
Shawash, J.; Thibault, M.; Hamari, J.
Who Killed Helene Pumpulivaara?: AI-Assisted Content Creation and XR Implementation for Interactive Built Heritage Storytelling Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 377–379, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Built heritage, Content creation, Digital heritage, Digital Interpretation, Extended reality, Human computer interaction, Human engineering, Industrial Heritage, Interactive computer graphics, Interactive computer systems, Mobile photographies, Narrative Design, Narrative designs, Production pipelines, Uncanny valley, Virtual Reality
@inproceedings{shawash_who_2025,
title = {Who Killed Helene Pumpulivaara?: AI-Assisted Content Creation and XR Implementation for Interactive Built Heritage Storytelling},
author = {J. Shawash and M. Thibault and J. Hamari},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008003446&doi=10.1145%2f3706370.3731703&partnerID=40&md5=bc8a8d221abcf6c560446979fbd06cbc},
doi = {10.1145/3706370.3731703},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {377–379},
publisher = {Association for Computing Machinery, Inc},
abstract = {This demo presents "Who Killed Helene Pumpulivaara?", an innovative interactive heritage experience that combines crime mystery narrative with XR technology to address key challenges in digital heritage interpretation. Our work makes six significant contributions: (1) the discovery of a "Historical Uncanny Valley"effect where varying fidelity levels between AI-generated and authentic content serve as implicit markers distinguishing fact from interpretation; (2) an accessible production pipeline combining mobile photography with AI tools that democratizes XR heritage creation for resource-limited institutions; (3) a spatial storytelling approach that effectively counters decontextualization in digital heritage; (4) a multi-platform implementation strategy across web and VR environments; (5) a practical model for AI-assisted heritage content creation balancing authenticity with engagement; and (6) a pathway toward spatial augmented reality for future heritage interpretation. Using the historic Finlayson Factory in Tampere, Finland as a case study, our implementation demonstrates how emerging technologies can enrich the authenticity of heritage experiences, fostering deeper emotional connections between visitors and the histories embedded in place. © 2025 Copyright held by the owner/author(s).},
keywords = {Artificial intelligence, Augmented Reality, Built heritage, Content creation, Digital heritage, Digital Interpretation, Extended reality, Human computer interaction, Human engineering, Industrial Heritage, Interactive computer graphics, Interactive computer systems, Mobile photographies, Narrative Design, Narrative designs, Production pipelines, Uncanny valley, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Sonawani, S.; Weigend, F.; Amor, H. B.
SiSCo: Signal Synthesis for Effective Human-Robot Communication Via Large Language Models Proceedings Article
In: IEEE Int Conf Intell Rob Syst, pp. 7107–7114, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 21530858 (ISSN); 979-835037770-5 (ISBN).
Abstract | Links | BibTeX | Tags: Communications channels, Extensive resources, Human engineering, Human Robot Interaction, Human-Robot Collaboration, Human-robot communication, Humans-robot interactions, Industrial robots, Intelligent robots, Language Model, Man machine systems, Microrobots, Robust communication, Signal synthesis, Specialized knowledge, Visual communication, Visual cues, Visual languages
@inproceedings{sonawani_sisco_2024,
title = {SiSCo: Signal Synthesis for Effective Human-Robot Communication Via Large Language Models},
author = {S. Sonawani and F. Weigend and H. B. Amor},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85216466596&doi=10.1109%2fIROS58592.2024.10802561&partnerID=40&md5=ccd14b4f0b5d527b179394dffd4e2c73},
doi = {10.1109/IROS58592.2024.10802561},
isbn = {21530858 (ISSN); 979-835037770-5 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE Int Conf Intell Rob Syst},
pages = {7107–7114},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Effective human-robot collaboration hinges on robust communication channels, with visual signaling playing a pivotal role due to its intuitive appeal. Yet, the creation of visually intuitive cues often demands extensive resources and specialized knowledge. The emergence of Large Language Models (LLMs) offers promising avenues for enhancing human-robot interactions and revolutionizing the way we generate context-aware visual cues. To this end, we introduce SiSCo-a novel framework that combines the computational power of LLMs with mixed-reality technologies to streamline the creation of visual cues for human-robot collaboration. Our results show that SiSCo improves the efficiency of communication in human-robot teaming tasks, reducing task completion time by approximately 73% and increasing task success rates by 18% compared to baseline natural language signals. Additionally, SiSCo reduces cognitive load for participants by 46%, as measured by the NASA-TLX subscale, and receives above-average user ratings for on-the-fly signals generated for unseen objects. To encourage further development and broader community engagement, we provide full access to SiSCo's implementation and related materials on our GitHub repository.1 © 2024 IEEE.},
keywords = {Communications channels, Extensive resources, Human engineering, Human Robot Interaction, Human-Robot Collaboration, Human-robot communication, Humans-robot interactions, Industrial robots, Intelligent robots, Language Model, Man machine systems, Microrobots, Robust communication, Signal synthesis, Specialized knowledge, Visual communication, Visual cues, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}