AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Li, C.; Da, F.
Refined dense face alignment through image matching Journal Article
In: Visual Computer, vol. 41, no. 1, pp. 157–171, 2025, ISSN: 01782789 (ISSN).
Abstract | Links | BibTeX | Tags: 3D Avatars, Alignment, Dense geometric supervision, Face alignment, Face deformations, Face reconstruction, Geometry, Human computer interaction, Image enhancement, Image matching, Image Reconstruction, Metaverses, Outlier mixup, Pixels, Rendered images, Rendering (computer graphics), State of the art, Statistics, Target images, Three dimensional computer graphics
@article{li_refined_2025,
title = {Refined dense face alignment through image matching},
author = {C. Li and F. Da},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187924785&doi=10.1007%2fs00371-024-03316-3&partnerID=40&md5=839834c6ff3320398d5ef75b055947cb},
doi = {10.1007/s00371-024-03316-3},
issn = {01782789 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Visual Computer},
volume = {41},
number = {1},
pages = {157–171},
abstract = {Face alignment is the foundation of building 3D avatars for virtue communication in the metaverse, human-computer interaction, AI-generated content, etc., and therefore, it is critical that face deformation is reflected precisely to better convey expression, pose and identity. However, misalignment exists in the currently best methods that fit a face model to a target image and can be easily captured by human perception, thus degrading the reconstruction quality. The main reason is that the widely used metrics for training, including the landmark re-projection loss, pixel-wise loss and perception-level loss, are insufficient to address the misalignment and suffer from ambiguity and local minimums. To address misalignment, we propose an image MAtchinG-driveN dEnse geomeTrIC supervision (MAGNETIC). Specifically, we treat face alignment as a matching problem and establish pixel-wise correspondences between the target and rendered images. Then reconstructed facial points are guided towards their corresponding points on the target image, thus improving reconstruction. Synthesized image pairs are mixed up with face outliers to simulate the target and rendered images with ground-truth pixel-wise correspondences to enable the training of a robust prediction network. Compared with existing methods that turn to 3D scans for dense geometric supervision, our method reaches comparable shape reconstruction results with much lower effort. Experimental results on the NoW testset show that we reach the state-of-the-art among all self-supervised methods and even outperform methods using photo-realistic images. We also achieve comparable results with the state-of-the-art on the benchmark of Feng et al. Codes will be available at: github.com/ChunLLee/ReconstructionFromMatching. © The Author(s), under exclusive licence to Springer-Verlag GmbH Germany, part of Springer Nature 2024.},
keywords = {3D Avatars, Alignment, Dense geometric supervision, Face alignment, Face deformations, Face reconstruction, Geometry, Human computer interaction, Image enhancement, Image matching, Image Reconstruction, Metaverses, Outlier mixup, Pixels, Rendered images, Rendering (computer graphics), State of the art, Statistics, Target images, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {article}
}
Li, K.; Mostajeran, F.; Rings, S.; Kruse, L.; Schmidt, S.; Arz, M.; Wolf, E.; Steinicke, F.
I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1648–1649, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows
@inproceedings{li_i_2025,
title = {I Hear, See, Speak & Do: Bringing Multimodal Information Processing to Intelligent Virtual Agents for Natural Human-AI Communication},
author = {K. Li and F. Mostajeran and S. Rings and L. Kruse and S. Schmidt and M. Arz and E. Wolf and F. Steinicke},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005146647&doi=10.1109%2fVRW66409.2025.00469&partnerID=40&md5=77e755f6a059f81e81c18987f58d00cc},
doi = {10.1109/VRW66409.2025.00469},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1648–1649},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {In this demo paper, we present an Extended Reality (XR) framework providing a streamlined workflow for creating and interacting with intelligent virtual agents (IVAs) with multimodal information processing capabilities using commercially available artificial intelligence (AI) tools and cloud services such as large language and vision models. The system supports (i) the integration of high-quality, customizable virtual 3D human models for visual representations of IVAs and (ii) multimodal communication with generative AI-driven IVAs in immersive XR, featuring realistic human behavior simulations. Our demo showcases the enormous potential and vast design space of embodied IVAs for various XR applications. © 2025 IEEE.},
keywords = {Artificial intelligence tools, Cloud services, Embodied AI, Embodied artificial intelligence, Extended reality, Human computer interaction, Human-AI Interaction, Human-artificial intelligence interaction, Information processing capability, Intelligent virtual agents, Language Model, Multi-modal information, Virtual agent, Work-flows},
pubstate = {published},
tppubtype = {inproceedings}
}
Mereu, J.
Using LLMs to enhance end-user development support in XR Proceedings Article
In: V., Paneva; D., Tetteroo; V., Frau; S., Feger; D., Spano; F., Paterno; S., Sauer; M., Manca (Ed.): CEUR Workshop Proc., CEUR-WS, 2025, ISBN: 16130073 (ISSN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Condition, Configuration, Development support, Development technique, End-User Development, End-Users, Event-condition-action, Event-Condition-Actions, Extended reality, Human computer interaction, Information Systems, Information use, Natural Language, Natural language processing systems, Natural languages, Rule, rules
@inproceedings{mereu_using_2025,
title = {Using LLMs to enhance end-user development support in XR},
author = {J. Mereu},
editor = {Paneva V. and Tetteroo D. and Frau V. and Feger S. and Spano D. and Paterno F. and Sauer S. and Manca M.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008755984&partnerID=40&md5=bfaaa38c3bee309621426f8f35332107},
isbn = {16130073 (ISSN)},
year = {2025},
date = {2025-01-01},
booktitle = {CEUR Workshop Proc.},
volume = {3978},
publisher = {CEUR-WS},
abstract = {This paper outlines the center stage of my PhD research, which aims to empower non-developer users to create and customize eXtended Reality (XR) environments through End-User Development (EUD) techniques combined with the latest AI tools. In particular, I describe my contributions to the EUD4XR project, detailing both the work completed and the ongoing developments. EUD4XR seeks to support end-users in customizing XR content with the assistance of a Large Language Model (LLM)-based conversational agent. © 2025 Copyright for this paper by its authors.},
keywords = {Artificial intelligence, Condition, Configuration, Development support, Development technique, End-User Development, End-Users, Event-condition-action, Event-Condition-Actions, Extended reality, Human computer interaction, Information Systems, Information use, Natural Language, Natural language processing systems, Natural languages, Rule, rules},
pubstate = {published},
tppubtype = {inproceedings}
}
Peter, K.; Makosa, I.; Auala, S.; Ndjao, L.; Maasz, D.; Mbinge, U.; Winschiers-Theophilus, H.
Co-creating a VR Narrative Experience of Constructing a Food Storage Following OvaHimba Traditional Practices Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 418–423, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Modelling, 3D models, 3d-modeling, Co-designs, Community-based, Community-Based Co-Design, Computer aided design, Cultural heritage, Cultural heritages, Food storage, Human computer interaction, Human engineering, Indigenous Knowledge, Information Systems, Interactive computer graphics, Interactive computer systems, IVR, Namibia, OvaHimba, Ovahimbum, Photogrammetry, Sustainable development, Virtual environments, Virtual Reality
@inproceedings{peter_co-creating_2025,
title = {Co-creating a VR Narrative Experience of Constructing a Food Storage Following OvaHimba Traditional Practices},
author = {K. Peter and I. Makosa and S. Auala and L. Ndjao and D. Maasz and U. Mbinge and H. Winschiers-Theophilus},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007984089&doi=10.1145%2f3706370.3731652&partnerID=40&md5=36f95823413852d636b39bd561c97917},
doi = {10.1145/3706370.3731652},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {418–423},
publisher = {Association for Computing Machinery, Inc},
abstract = {As part of an attempt to co-create a comprehensive virtual environment in which one can explore and learn traditional practices of the OvaHimba people, we have co-designed and implemented a VR experience to construct a traditional food storage. In collaboration with the OvaHimba community residing in Otjisa, we have explored culturally valid representations of the process. We have further investigated different techniques such as photogrammetry, generative AI and manual methods to develop 3D models. Our findings highlight the importance of context, process, and community-defined relevance in co-design, the fluidity of cultural realities and virtual representations, as well as technical challenges. © 2025 Copyright held by the owner/author(s).},
keywords = {3D Modelling, 3D models, 3d-modeling, Co-designs, Community-based, Community-Based Co-Design, Computer aided design, Cultural heritage, Cultural heritages, Food storage, Human computer interaction, Human engineering, Indigenous Knowledge, Information Systems, Interactive computer graphics, Interactive computer systems, IVR, Namibia, OvaHimba, Ovahimbum, Photogrammetry, Sustainable development, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Oliveira, E. A. Masasi De; Sousa, R. T.; Bastos, A. A.; Cintra, L. Martins De Freitas; Filho, A. R. G.
Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 437–440, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages
@inproceedings{masasi_de_oliveira_immersive_2025,
title = {Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation},
author = {E. A. Masasi De Oliveira and R. T. Sousa and A. A. Bastos and L. Martins De Freitas Cintra and A. R. G. Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007979183&doi=10.1145%2f3706370.3731643&partnerID=40&md5=db10b41217dd8a0b0705c3fb4a615666},
doi = {10.1145/3706370.3731643},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {437–440},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual Reality has significantly expanded possibilities for immersive museum experiences, overcoming traditional constraints such as space, preservation, and geographic limitations. However, existing virtual museum platforms typically lack dynamic, personalized, and contextually accurate interactions. To address this, we propose Spatially-Aware Retrieval-Augmented Generation (SA-RAG), an innovative framework integrating visual attention tracking with Retrieval-Augmented Generation systems and advanced Large Language Models. By capturing users' visual attention in real time, SA-RAG dynamically retrieves contextually relevant data, enhancing the accuracy, personalization, and depth of user interactions within immersive virtual environments. The system's effectiveness is initially demonstrated through our preliminary tests within a realistic VR museum implemented using Unreal Engine. Although promising, comprehensive human evaluations involving broader user groups are planned for future studies to rigorously validate SA-RAG's effectiveness, educational enrichment potential, and accessibility improvements in virtual museums. The framework also presents opportunities for broader applications in immersive educational and storytelling domains. © 2025 Copyright held by the owner/author(s).},
keywords = {Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Carcangiu, A.; Manca, M.; Mereu, J.; Santoro, C.; Simeoli, L.; Spano, L. D.
Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation Proceedings Article
In: C., Santoro; A., Schmidt; M., Matera; A., Bellucci (Ed.): Lect. Notes Comput. Sci., pp. 59–79, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303195451-1 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces
@inproceedings{carcangiu_conversational_2025,
title = {Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation},
author = {A. Carcangiu and M. Manca and J. Mereu and C. Santoro and L. Simeoli and L. D. Spano},
editor = {Santoro C. and Schmidt A. and Matera M. and Bellucci A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105009012634&doi=10.1007%2f978-3-031-95452-8_4&partnerID=40&md5=67e2b8ca4bb2b508cd41548e3471705b},
doi = {10.1007/978-3-031-95452-8_4},
isbn = {03029743 (ISSN); 978-303195451-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15713 LNCS},
pages = {59–79},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Rule-based approaches allow users to customize XR environments. However, the current menu-based interfaces still create barriers for end-user developers. Chatbots based on Large Language Models (LLMs) have the potential to reduce the threshold needed for rule creation, but how users articulate their intentions through conversation remains under-explored. This work investigates how users express event-condition-action automation rules in Virtual Reality (VR) and Augmented Reality (AR) environments. Through two user studies, we show that the dialogues share consistent strategies across the interaction setting (keywords, difficulties in expressing conditions, task success), even if we registered different adaptations for each setting (verbal structure, event vs action first rules). Our findings are relevant for the design and implementation of chatbot-based support for expressing automations in an XR setting. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces},
pubstate = {published},
tppubtype = {inproceedings}
}
Ding, S.; Chen, Y.
RAG-VR: Leveraging Retrieval-Augmented Generation for 3D Question Answering in VR Environments Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 131–136, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: Ambient intelligence, Computational Linguistics, Computer interaction, Computing methodologies, Computing methodologies-Artificial intelligence-Natural language processing-Natural language generation, Computing methodology-artificial intelligence-natural language processing-natural language generation, Data handling, Formal languages, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Language Model, Language processing, Natural language generation, Natural language processing systems, Natural languages, Virtual Reality, Word processing
@inproceedings{ding_rag-vr_2025,
title = {RAG-VR: Leveraging Retrieval-Augmented Generation for 3D Question Answering in VR Environments},
author = {S. Ding and Y. Chen},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005140593&doi=10.1109%2fVRW66409.2025.00034&partnerID=40&md5=36dc5fef97aeea4d6e183c83ce9fcd89},
doi = {10.1109/VRW66409.2025.00034},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {131–136},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent advances in large language models (LLMs) provide new opportunities for context understanding in virtual reality (VR). However, VR contexts are often highly localized and personalized, limiting the effectiveness of general-purpose LLMs. To address this challenge, we present RAG-VR, the first 3D question-answering system for VR that incorporates retrieval-augmented generation (RAG), which augments an LLM with external knowledge retrieved from a localized knowledge database to improve the answer quality. RAG-VR includes a pipeline for extracting comprehensive knowledge about virtual environments and user conditions for accurate answer generation. To ensure efficient retrieval, RAG-VR offloads the retrieval process to a nearby edge server and uses only essential information during retrieval. Moreover, we train the retriever to effectively distinguish among relevant, irrelevant, and hard-to-differentiate information in relation to questions. RAG-VR improves answer accuracy by 17.9%-41.8% and reduces end-to-end latency by 34.5%-47.3% compared with two baseline systems. © 2025 IEEE.},
keywords = {Ambient intelligence, Computational Linguistics, Computer interaction, Computing methodologies, Computing methodologies-Artificial intelligence-Natural language processing-Natural language generation, Computing methodology-artificial intelligence-natural language processing-natural language generation, Data handling, Formal languages, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Language Model, Language processing, Natural language generation, Natural language processing systems, Natural languages, Virtual Reality, Word processing},
pubstate = {published},
tppubtype = {inproceedings}
}
Leininger, P.; Weber, C. J.; Rothe, S.
Understanding Creative Potential and Use Cases of AI-Generated Environments for Virtual Film Productions: Insights from Industry Professionals Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 60–78, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: 3-D environments, 3D reconstruction, 3D Scene Reconstruction, 3d scenes reconstruction, AI-generated 3d environment, AI-Generated 3D Environments, Computer interaction, Creative Collaboration, Creatives, Digital content creation, Digital Content Creation., Filmmaking workflow, Filmmaking Workflows, Gaussian distribution, Gaussian Splatting, Gaussians, Generative AI, Graphical user interface, Graphical User Interface (GUI), Graphical user interfaces, Human computer interaction, human-computer interaction, Human-Computer Interaction (HCI), Immersive, Immersive Storytelling, Interactive computer graphics, Interactive computer systems, Interactive media, Mesh generation, Previsualization, Real-Time Rendering, Splatting, Three dimensional computer graphics, Virtual production, Virtual Production (VP), Virtual Reality, Work-flows
@inproceedings{leininger_understanding_2025,
title = {Understanding Creative Potential and Use Cases of AI-Generated Environments for Virtual Film Productions: Insights from Industry Professionals},
author = {P. Leininger and C. J. Weber and S. Rothe},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007976841&doi=10.1145%2f3706370.3727853&partnerID=40&md5=0d4cf7a2398d12d04e4f0ab182474a10},
doi = {10.1145/3706370.3727853},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {60–78},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual production (VP) is transforming filmmaking by integrating real-time digital elements with live-action footage, offering new creative possibilities and streamlined workflows. While industry experts recognize AI's potential to revolutionize VP, its practical applications and value across different production phases and user groups remain underexplored. Building on initial research into generative and data-driven approaches, this paper presents the first systematic pilot study evaluating three types of AI-generated 3D environments - Depth Mesh, 360° Panoramic Meshes, and Gaussian Splatting - through the participation of 15 filmmaking professionals from diverse roles. Unlike commonly used 2D AI-generated visuals, our approach introduces navigable 3D environments that offer greater control and flexibility, aligning more closely with established VP workflows. Through expert interviews and literature research, we developed evaluation criteria to assess their usefulness beyond concept development, extending to previsualization, scene exploration, and interdisciplinary collaboration. Our findings indicate that different environments cater to distinct production needs, from early ideation to detailed visualization. Gaussian Splatting proved effective for high-fidelity previsualization, while 360° Panoramic Meshes excelled in rapid concept ideation. Despite their promise, challenges such as limited interactivity and customization highlight areas for improvement. Our prototype, EnVisualAIzer, built in Unreal Engine 5, provides an accessible platform for diverse filmmakers to engage with AI-generated environments, fostering a more inclusive production process. By lowering technical barriers, these environments have the potential to make advanced VP tools more widely available. This study offers valuable insights into the evolving role of AI in VP and sets the stage for future research and development. © 2025 Copyright held by the owner/author(s). Publication rights licensed to ACM.},
keywords = {3-D environments, 3D reconstruction, 3D Scene Reconstruction, 3d scenes reconstruction, AI-generated 3d environment, AI-Generated 3D Environments, Computer interaction, Creative Collaboration, Creatives, Digital content creation, Digital Content Creation., Filmmaking workflow, Filmmaking Workflows, Gaussian distribution, Gaussian Splatting, Gaussians, Generative AI, Graphical user interface, Graphical User Interface (GUI), Graphical user interfaces, Human computer interaction, human-computer interaction, Human-Computer Interaction (HCI), Immersive, Immersive Storytelling, Interactive computer graphics, Interactive computer systems, Interactive media, Mesh generation, Previsualization, Real-Time Rendering, Splatting, Three dimensional computer graphics, Virtual production, Virtual Production (VP), Virtual Reality, Work-flows},
pubstate = {published},
tppubtype = {inproceedings}
}
Shawash, J.; Thibault, M.; Hamari, J.
Who Killed Helene Pumpulivaara?: AI-Assisted Content Creation and XR Implementation for Interactive Built Heritage Storytelling Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 377–379, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Built heritage, Content creation, Digital heritage, Digital Interpretation, Extended reality, Human computer interaction, Human engineering, Industrial Heritage, Interactive computer graphics, Interactive computer systems, Mobile photographies, Narrative Design, Narrative designs, Production pipelines, Uncanny valley, Virtual Reality
@inproceedings{shawash_who_2025,
title = {Who Killed Helene Pumpulivaara?: AI-Assisted Content Creation and XR Implementation for Interactive Built Heritage Storytelling},
author = {J. Shawash and M. Thibault and J. Hamari},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008003446&doi=10.1145%2f3706370.3731703&partnerID=40&md5=bc8a8d221abcf6c560446979fbd06cbc},
doi = {10.1145/3706370.3731703},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {377–379},
publisher = {Association for Computing Machinery, Inc},
abstract = {This demo presents "Who Killed Helene Pumpulivaara?", an innovative interactive heritage experience that combines crime mystery narrative with XR technology to address key challenges in digital heritage interpretation. Our work makes six significant contributions: (1) the discovery of a "Historical Uncanny Valley"effect where varying fidelity levels between AI-generated and authentic content serve as implicit markers distinguishing fact from interpretation; (2) an accessible production pipeline combining mobile photography with AI tools that democratizes XR heritage creation for resource-limited institutions; (3) a spatial storytelling approach that effectively counters decontextualization in digital heritage; (4) a multi-platform implementation strategy across web and VR environments; (5) a practical model for AI-assisted heritage content creation balancing authenticity with engagement; and (6) a pathway toward spatial augmented reality for future heritage interpretation. Using the historic Finlayson Factory in Tampere, Finland as a case study, our implementation demonstrates how emerging technologies can enrich the authenticity of heritage experiences, fostering deeper emotional connections between visitors and the histories embedded in place. © 2025 Copyright held by the owner/author(s).},
keywords = {Artificial intelligence, Augmented Reality, Built heritage, Content creation, Digital heritage, Digital Interpretation, Extended reality, Human computer interaction, Human engineering, Industrial Heritage, Interactive computer graphics, Interactive computer systems, Mobile photographies, Narrative Design, Narrative designs, Production pipelines, Uncanny valley, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Gottsacker, M.; Bruder, G.; Welch, G. F.
rlty2rlty: Transitioning Between Realities with Generative AI Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1160–1161, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037449-0 (ISBN).
Abstract | Links | BibTeX | Tags: Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Interactive computer graphics, Liminal spaces, Mixed / augmented reality, Mixed reality, Real environments, System use, User interfaces, Virtual worlds
@inproceedings{gottsacker_rlty2rlty_2024,
title = {rlty2rlty: Transitioning Between Realities with Generative AI},
author = {M. Gottsacker and G. Bruder and G. F. Welch},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195556960&doi=10.1109%2fVRW62533.2024.00374&partnerID=40&md5=c6291f48ce2135a795a0a2d34681b83d},
doi = {10.1109/VRW62533.2024.00374},
isbn = {979-835037449-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1160–1161},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {We present a system for visually transitioning a mixed reality (MR) user between two arbitrary realities (e.g., between two virtual worlds or between the real environment and a virtual world). The system uses artificial intelligence (AI) to generate a 360° video that transforms the user's starting environment to another environment, passing through a liminal space that could help them relax between tasks or prepare them for the ending environment. The video can then be viewed on an MR headset. © 2024 IEEE.},
keywords = {Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Interactive computer graphics, Liminal spaces, Mixed / augmented reality, Mixed reality, Real environments, System use, User interfaces, Virtual worlds},
pubstate = {published},
tppubtype = {inproceedings}
}
Lee, L. -K.; Chan, E. H.; Tong, K. K. -L.; Wong, N. K. -H.; Wu, B. S. -Y.; Fung, Y. -C.; Fong, E. K. S.; Hou, U. Leong; Wu, N. -I.
Utilizing Virtual Reality and Generative AI Chatbot for Job Interview Simulations Proceedings Article
In: K.T., Chui; Y.K., Hui; D., Yang; L.-K., Lee; L.-P., Wong; B.L., Reynolds (Ed.): Proc. - Int. Symp. Educ. Technol., ISET, pp. 209–212, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835036141-4 (ISBN).
Abstract | Links | BibTeX | Tags: chatbot, Chatbots, Computer interaction, Computer simulation languages, Generative adversarial networks, Generative AI, Hong-kong, Human computer interaction, ITS applications, Job interview simulation, Job interviews, Performance, Science graduates, User friendliness, Virtual environments, Virtual Reality
@inproceedings{lee_utilizing_2024,
title = {Utilizing Virtual Reality and Generative AI Chatbot for Job Interview Simulations},
author = {L. -K. Lee and E. H. Chan and K. K. -L. Tong and N. K. -H. Wong and B. S. -Y. Wu and Y. -C. Fung and E. K. S. Fong and U. Leong Hou and N. -I. Wu},
editor = {Chui K.T. and Hui Y.K. and Yang D. and Lee L.-K. and Wong L.-P. and Reynolds B.L.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85206582338&doi=10.1109%2fISET61814.2024.00048&partnerID=40&md5=c6986c0697792254e167e143b75f14c6},
doi = {10.1109/ISET61814.2024.00048},
isbn = {979-835036141-4 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - Int. Symp. Educ. Technol., ISET},
pages = {209–212},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Stress and anxiety experienced by interviewees, particularly fresh graduates, would significantly impact their performance in job interviews. Due to the increased affordability and user-friendliness of virtual reality (VR), VR has seen a surge in its application within the educational sector. This paper presents the design and implementation of a job interview simulation system, leveraging VR and a generative AI chatbot to provide an immersive environment for computer science graduates in Hong Kong. The system aims to help graduates practice and familiarize themselves with various real-world scenarios of a job interview in English, Mandarin, and Cantonese, tailored to the unique language requirements of Hong Kong's professional environment. The system comprises three core modules: a mock question and answer reading module, an AI speech analysis module, and a virtual interview module facilitated by the generative AI chatbot, ChatGPT. We anticipate that the proposed simulator will provide valuable insights to education practitioners on utilizing VR and generative AI for job interview training, extending beyond computer science graduates. © 2024 IEEE.},
keywords = {chatbot, Chatbots, Computer interaction, Computer simulation languages, Generative adversarial networks, Generative AI, Hong-kong, Human computer interaction, ITS applications, Job interview simulation, Job interviews, Performance, Science graduates, User friendliness, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Do, M. D.; Dahlem, N.; Paulus, M.; Krick, M.; Steffny, L.; Werth, D.
“Furnish Your Reality” - Intelligent Mobile AR Application for Personalized Furniture Proceedings Article
In: J., Wei; G., Margetis (Ed.): Lect. Notes Comput. Sci., pp. 196–210, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 03029743 (ISSN); 978-303160457-7 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Augmented reality applications, Electronic commerce, Generative AI, generative artificial intelligence, Human computer interaction, Human computer interfaces, LiDAR, Mobile augmented reality, Mobile human computer interface, Mobile Human Computer Interfaces, Personalized product design, Personalized products, Phygital customer journey, Physical environments, Product design, Recommender system, Recommender systems, Sales, User centered design, User interfaces, User-centered design
@inproceedings{do_furnish_2024,
title = {“Furnish Your Reality” - Intelligent Mobile AR Application for Personalized Furniture},
author = {M. D. Do and N. Dahlem and M. Paulus and M. Krick and L. Steffny and D. Werth},
editor = {Wei J. and Margetis G.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85196202642&doi=10.1007%2f978-3-031-60458-4_14&partnerID=40&md5=017510be06c286789867235cfd98bb36},
doi = {10.1007/978-3-031-60458-4_14},
isbn = {03029743 (ISSN); 978-303160457-7 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {14737 LNCS},
pages = {196–210},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Today’s online retailers are faced with the challenge of providing a convenient solution for their customers to browse through a wide range of products. Simultaneously, they must meet individual customer needs by creating unique, personalized, one-of-a-kind items. Technological advances in areas such as Augmented Reality (AR), Artificial Intelligence (AI) or sensors (e.g. LiDAR), have the potential to address these challenges by enhancing the customer experience in new ways. One option is to implement “phygital” commerce solutions, which combines the benefits of physical and digital environments to improve the customer journey. This work presents a concept for a mobile AR application that integrates LiDAR and an AI-powered recommender system to create a unique phygital customer journey in the context of furniture shopping. The combination of AR, LiDAR and AI enables an accurate immersive experience along with personalized product designs. This concept aims to deliver benefits in terms of usability, convenience, time savings and user experience, while bridging the gap between mass-produced and personalized products. The new possibilities for merging virtual with physical environments hold immense potential, but this work also highlights challenges for customers as well as for online platform providers and future researchers. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {Artificial intelligence, Augmented Reality, Augmented reality applications, Electronic commerce, Generative AI, generative artificial intelligence, Human computer interaction, Human computer interfaces, LiDAR, Mobile augmented reality, Mobile human computer interface, Mobile Human Computer Interfaces, Personalized product design, Personalized products, Phygital customer journey, Physical environments, Product design, Recommender system, Recommender systems, Sales, User centered design, User interfaces, User-centered design},
pubstate = {published},
tppubtype = {inproceedings}
}
Yin, Z.; Wang, Y.; Papatheodorou, T.; Hui, P.
Text2VRScene: Exploring the Framework of Automated Text-driven Generation System for VR Experience Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 701–711, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037402-5 (ISBN).
Abstract | Links | BibTeX | Tags: Automated systems, Automation, Digital contents, Generation systems, Generative model, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Interaction techniques, Language Model, Natural language processing systems, Text input, User interfaces, Virtual Reality
@inproceedings{yin_text2vrscene_2024,
title = {Text2VRScene: Exploring the Framework of Automated Text-driven Generation System for VR Experience},
author = {Z. Yin and Y. Wang and T. Papatheodorou and P. Hui},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85191431035&doi=10.1109%2fVR58804.2024.00090&partnerID=40&md5=5484a5bc3939d003efe68308f56b15a6},
doi = {10.1109/VR58804.2024.00090},
isbn = {979-835037402-5 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {701–711},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {With the recent development of the Virtual Reality (VR) industry, the increasing number of VR users pushes the demand for the massive production of immersive and expressive VR scenes in related industries. However, creating expressive VR scenes involves the reasonable organization of various digital content to express a coherent and logical theme, which is time-consuming and labor-intensive. In recent years, Large Language Models (LLMs) such as ChatGPT 3.5 and generative models such as stable diffusion have emerged as powerful tools for comprehending natural language and generating digital contents such as text, code, images, and 3D objects. In this paper, we have explored how we can generate VR scenes from text by incorporating LLMs and various generative models into an automated system. To achieve this, we first identify the possible limitations of LLMs for an automated system and propose a systematic framework to mitigate them. Subsequently, we developed Text2VRScene, a VR scene generation system, based on our proposed framework with well-designed prompts. To validate the effectiveness of our proposed framework and the designed prompts, we carry out a series of test cases. The results show that the proposed framework contributes to improving the reliability of the system and the quality of the generated VR scenes. The results also illustrate the promising performance of the Text2VRScene in generating satisfying VR scenes with a clear theme regularized by our well-designed prompts. This paper ends with a discussion about the limitations of the current system and the potential of developing similar generation systems based on our framework. © 2024 IEEE.},
keywords = {Automated systems, Automation, Digital contents, Generation systems, Generative model, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Interaction techniques, Language Model, Natural language processing systems, Text input, User interfaces, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Chheang, V.; Sharmin, S.; Marquez-Hernandez, R.; Patel, M.; Rajasekaran, D.; Caulfield, G.; Kiafar, B.; Li, J.; Kullu, P.; Barmaki, R. L.
Towards Anatomy Education with Generative AI-based Virtual Assistants in Immersive Virtual Reality Environments Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 21–30, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037202-1 (ISBN).
Abstract | Links | BibTeX | Tags: 3-D visualization systems, Anatomy education, Anatomy educations, Cognitive complexity, E-Learning, Embodied virtual assistant, Embodied virtual assistants, Generative AI, generative artificial intelligence, Human computer interaction, human-computer interaction, Immersive virtual reality, Interactive 3d visualizations, Knowledge Management, Medical education, Three dimensional computer graphics, Verbal communications, Virtual assistants, Virtual Reality, Virtual-reality environment
@inproceedings{chheang_towards_2024,
title = {Towards Anatomy Education with Generative AI-based Virtual Assistants in Immersive Virtual Reality Environments},
author = {V. Chheang and S. Sharmin and R. Marquez-Hernandez and M. Patel and D. Rajasekaran and G. Caulfield and B. Kiafar and J. Li and P. Kullu and R. L. Barmaki},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187216893&doi=10.1109%2fAIxVR59861.2024.00011&partnerID=40&md5=33e8744309add5fe400f4f341326505f},
doi = {10.1109/AIxVR59861.2024.00011},
isbn = {979-835037202-1 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {21–30},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual reality (VR) and interactive 3D visualization systems have enhanced educational experiences and environments, particularly in complicated subjects such as anatomy education. VR-based systems surpass the potential limitations of traditional training approaches in facilitating interactive engagement among students. However, research on embodied virtual assistants that leverage generative artificial intelligence (AI) and verbal communication in the anatomy education context is underrepresented. In this work, we introduce a VR environment with a generative AI-embodied virtual assistant to support participants in responding to varying cognitive complexity anatomy questions and enable verbal communication. We assessed the technical efficacy and usability of the proposed environment in a pilot user study with 16 participants. We conducted a within-subject design for virtual assistant configuration (avatar- and screen-based), with two levels of cognitive complexity (knowledge- and analysis-based). The results reveal a significant difference in the scores obtained from knowledge- and analysis-based questions in relation to avatar configuration. Moreover, results provide insights into usability, cognitive task load, and the sense of presence in the proposed virtual assistant configurations. Our environment and results of the pilot study offer potential benefits and future research directions beyond medical education, using generative AI and embodied virtual agents as customized virtual conversational assistants. © 2024 IEEE.},
keywords = {3-D visualization systems, Anatomy education, Anatomy educations, Cognitive complexity, E-Learning, Embodied virtual assistant, Embodied virtual assistants, Generative AI, generative artificial intelligence, Human computer interaction, human-computer interaction, Immersive virtual reality, Interactive 3d visualizations, Knowledge Management, Medical education, Three dimensional computer graphics, Verbal communications, Virtual assistants, Virtual Reality, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
Jeong, E.; Kim, H.; Park, S.; Yoon, S.; Ahn, J.; Woo, W.
Function-Adaptive Affordance Extraction from 3D Objects Using LLM for Interaction Authoring with Augmented Artifacts Proceedings Article
In: U., Eck; M., Sra; J., Stefanucci; M., Sugimoto; M., Tatzgern; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 205–208, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833150691-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Applied computing, Art and humanity, Artificial intelligence, Arts and humanities, Augmented Reality, Computer interaction, Computer vision, Computing methodologies, computing methodology, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Humanities computing, Interaction paradigm, Interaction paradigms, Language processing, Mixed / augmented reality, Mixed reality, Modeling languages, Natural Language Processing, Natural language processing systems, Natural languages, Three dimensional computer graphics
@inproceedings{jeong_function-adaptive_2024,
title = {Function-Adaptive Affordance Extraction from 3D Objects Using LLM for Interaction Authoring with Augmented Artifacts},
author = {E. Jeong and H. Kim and S. Park and S. Yoon and J. Ahn and W. Woo},
editor = {Eck U. and Sra M. and Stefanucci J. and Sugimoto M. and Tatzgern M. and Williams I.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214379963&doi=10.1109%2fISMAR-Adjunct64951.2024.00050&partnerID=40&md5=7222e0599a7e2aa0adaea38e4b9e13cc},
doi = {10.1109/ISMAR-Adjunct64951.2024.00050},
isbn = {979-833150691-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {205–208},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {We propose an algorithm that extracts the most suitable affordances, interaction targets, and corresponding coordinates adaptively from 3D models of various artifacts based on their functional context for efficient authoring of XR content with artifacts. Traditionally, authoring AR scenes to convey artifact context required one-to-one manual work. Our approach leverages a Large Language Model (LLM) to extract interaction types, positions, and subjects based on the artifact's name and usage context. This enables templated XR experience creation, replacing repetitive manual labor. Consequently, our system streamlines the XR authoring process, making it more efficient and scalable. © 2024 IEEE.},
keywords = {3D modeling, Applied computing, Art and humanity, Artificial intelligence, Arts and humanities, Augmented Reality, Computer interaction, Computer vision, Computing methodologies, computing methodology, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Humanities computing, Interaction paradigm, Interaction paradigms, Language processing, Mixed / augmented reality, Mixed reality, Modeling languages, Natural Language Processing, Natural language processing systems, Natural languages, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Imamura, S.; Hiraki, H.; Rekimoto, J.
Serendipity Wall: A Discussion Support System Using Real-Time Speech Recognition and Large Language Model Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 588–590, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037449-0 (ISBN).
Abstract | Links | BibTeX | Tags: Brainstorming sessions, Discussion support, Embeddings, Group discussions, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Language Model, Large displays, Real- time, Speech recognition, Support systems, Virtual Reality
@inproceedings{imamura_serendipity_2024,
title = {Serendipity Wall: A Discussion Support System Using Real-Time Speech Recognition and Large Language Model},
author = {S. Imamura and H. Hiraki and J. Rekimoto},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195557406&doi=10.1109%2fVRW62533.2024.00113&partnerID=40&md5=22c393aa1ea99a9e64d382f1b56fb877},
doi = {10.1109/VRW62533.2024.00113},
isbn = {979-835037449-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {588–590},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Group discussions are important for exploring new ideas. One method to support discussions is presenting relevant keywords or images. However, the context of the conversation and information tended not to be taken into account. Therefore, we propose a system that develops group discussions by presenting related information in response to discussions. As a specific example, this study addressed academic discussions among HCI researchers. During brainstorming sessions, the system continuously transcribes the dialogue and generates embedding vectors of the discussions. These vectors are matched against those of existing research articles to identify relevant studies. Then, the system presented relevant studies on the large display with summarizing by an LLM. In a case study, this system had the effect of broadening the topics of discussion and facilitating the acquisition of new knowledge. A larger display area is desirable in terms of information volume and size. Therefore, in addition to large displays, virtual reality environments with headsets could be suitable for this system. © 2024 IEEE.},
keywords = {Brainstorming sessions, Discussion support, Embeddings, Group discussions, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Language Model, Large displays, Real- time, Speech recognition, Support systems, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Weid, M.; Khezrian, N.; Mana, A. P.; Farzinnejad, F.; Grubert, J.
GenDeck: Towards a HoloDeck with Text-to-3D Model Generation Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1188–1189, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037449-0 (ISBN).
Abstract | Links | BibTeX | Tags: 3D content, 3D modeling, 3D models, 3d-modeling, Computational costs, Extende Reality, Human computer interaction, Immersive virtual reality, Knowledge Work, Model generation, Proof of concept, Three dimensional computer graphics, Virtual Reality, Visual fidelity
@inproceedings{weid_gendeck_2024,
title = {GenDeck: Towards a HoloDeck with Text-to-3D Model Generation},
author = {M. Weid and N. Khezrian and A. P. Mana and F. Farzinnejad and J. Grubert},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195600251&doi=10.1109%2fVRW62533.2024.00388&partnerID=40&md5=6dab0cc05259fa2dbe0a2b3806e569af},
doi = {10.1109/VRW62533.2024.00388},
isbn = {979-835037449-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1188–1189},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Generative Artificial Intelligence has the potential to substantially transform the way 3D content for Extended Reality applications is produced. Specifically, the development of text-to-3D and image-to-3D generators with increasing visual fidelity and decreasing computational costs is thriving quickly. Within this work, we present GenDeck, a proof-of-concept application to experience text-to-3D model generation inside an immersive Virtual Reality environment. © 2024 IEEE.},
keywords = {3D content, 3D modeling, 3D models, 3d-modeling, Computational costs, Extende Reality, Human computer interaction, Immersive virtual reality, Knowledge Work, Model generation, Proof of concept, Three dimensional computer graphics, Virtual Reality, Visual fidelity},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Fuchs, A.; Appel, S.; Grimm, P.
Immersive Spaces for Creativity: Smart Working Environments Proceedings Article
In: A.A., Yunanto; A.D., Ramadhani; Y.R., Prayogi; P.A.M., Putra; M., Ruswiansari; M., Ridwan; F., Gamar; W.M., Rahmawati; M.R., Rusli; F.M., Humaira; A.F., Adila (Ed.): IES - Int. Electron. Symp.: Unlocking Potential Immersive Technol. Live Better Life, Proceeding, pp. 610–617, Institute of Electrical and Electronics Engineers Inc., 2023, ISBN: 979-835031473-1 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Generative AI, Human computer interaction, Immersive, Innovative approaches, Intelligent systems, Interactive Environments, Language Model, Language processing, Large language model, large language models, Learning algorithms, machine learning, Natural language processing systems, Natural languages, User behaviors, User interfaces, Virtual Reality, Working environment
@inproceedings{fuchs_immersive_2023,
title = {Immersive Spaces for Creativity: Smart Working Environments},
author = {A. Fuchs and S. Appel and P. Grimm},
editor = {Yunanto A.A. and Ramadhani A.D. and Prayogi Y.R. and Putra P.A.M. and Ruswiansari M. and Ridwan M. and Gamar F. and Rahmawati W.M. and Rusli M.R. and Humaira F.M. and Adila A.F.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85173627291&doi=10.1109%2fIES59143.2023.10242458&partnerID=40&md5=6ab1796f68c29d7747574272314a2e9d},
doi = {10.1109/IES59143.2023.10242458},
isbn = {979-835031473-1 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {IES - Int. Electron. Symp.: Unlocking Potential Immersive Technol. Live Better Life, Proceeding},
pages = {610–617},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents an innovative approach to designing an immersive space that dynamically supports users (inter-)action based on users' behavior, voice, and mood, providing a personalized experience. The objective of this research is to explore how a space can communicate with users in a seamless, engaging, and interactive environment. Therefore, it integrates natural language processing (NLP), generative artificial intelligence applications and human computer interaction that utilizes a combination of sensors, microphones, and cameras to collect real-time data on users' behavior, voice, and mood. This data is then processed and analyzed by an intelligent system that employs machine learning algorithms to identify patterns and adapt the environment accordingly. The adaptive features include changes in lighting, sound, and visual elements to facilitate creativity, focus, relaxation, or socialization, depending on the user's topics and emotional state. The paper discusses the technical aspects of implementing such a system. Additionally, it highlights the potential applications of this technology in various domains such as education, entertainment, and workplace settings. In conclusion, the immersive creative space represents a paradigm shift in human-environment interaction, offering a dynamic and personalized space that caters to the diverse needs of users. The research findings suggest that this innovative approach holds great promise for enhancing user experiences, fostering creativity, and promoting overall well-being. © 2023 IEEE.},
keywords = {Artificial intelligence, Generative AI, Human computer interaction, Immersive, Innovative approaches, Intelligent systems, Interactive Environments, Language Model, Language processing, Large language model, large language models, Learning algorithms, machine learning, Natural language processing systems, Natural languages, User behaviors, User interfaces, Virtual Reality, Working environment},
pubstate = {published},
tppubtype = {inproceedings}
}
Vlasov, A. V.
GALA Inspired by Klimt's Art: Text-to-image Processing with Implementation in Interaction and Perception Studies: Library and Case Examples Journal Article
In: Annual Review of CyberTherapy and Telemedicine, vol. 21, pp. 200–205, 2023, ISSN: 15548716 (ISSN).
Abstract | Links | BibTeX | Tags: AIGC, applied research, art library, Article, Artificial intelligence, benchmarking, dataset, GALA, human, Human computer interaction, Image processing, Klimt, library, life satisfaction, neuropoem, Text-to-image, Virtual Reality, Wellbeing
@article{vlasov_gala_2023,
title = {GALA Inspired by Klimt's Art: Text-to-image Processing with Implementation in Interaction and Perception Studies: Library and Case Examples},
author = {A. V. Vlasov},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85182461798&partnerID=40&md5=0c3f5f4214a46db51f46f0092495eb2b},
issn = {15548716 (ISSN)},
year = {2023},
date = {2023-01-01},
journal = {Annual Review of CyberTherapy and Telemedicine},
volume = {21},
pages = {200–205},
abstract = {Objectives: (a) to develop a library with AI generated content (AIGC) based on а combinatorial scheme of prompting for interaction and perception research; (b) to show examples of AIGC implementation. The result is a public library for applied research in the cyber-psychological community (CYPSY). The Generative Art Library Abstractions (GALA) include images (Figures 1-2) based on the text-image model and inspired by the artwork of Gustav Klimt. They can be used for comparative analysis (benchmarking), end-to-end evaluation, and advanced design. This allows experimentation with complex human-computer interaction (HCI) architectures and visual communication systems, and provides creative design support for experimenting. Examples include: interactive perception of positively colored generative images; HCI dialogues using visual language; generated moods in a VR environment; brain-computer interface for HCI. Respectfully, these visualization resources are a valuable example of AIGC for next-generation R&D. Any suggestions from the CYPSY community are welcome. © 2023, Interactive Media Institute. All rights reserved.},
keywords = {AIGC, applied research, art library, Article, Artificial intelligence, benchmarking, dataset, GALA, human, Human computer interaction, Image processing, Klimt, library, life satisfaction, neuropoem, Text-to-image, Virtual Reality, Wellbeing},
pubstate = {published},
tppubtype = {article}
}
2022
Augello, Agnese; Infantino, Ignazio; Pilato, Giovanni; Vitale, Gianpaolo
Extending Affective Capabilities for Medical Assistive Robots Journal Article
In: Cognitive Systems Research, vol. 73, pp. 21–25, 2022, ISSN: 13890417.
Abstract | Links | BibTeX | Tags: Anthropomorphic Robots, Assistive Robots, Emotion Detection, Facial Expressions, Human computer interaction, Human Robot Interaction, Humanoid Robots, Natural Language Processing, Robotics, Wellbeing
@article{augelloExtendingAffectiveCapabilities2022,
title = {Extending Affective Capabilities for Medical Assistive Robots},
author = { Agnese Augello and Ignazio Infantino and Giovanni Pilato and Gianpaolo Vitale},
doi = {10.1016/j.cogsys.2021.12.004},
issn = {13890417},
year = {2022},
date = {2022-01-01},
journal = {Cognitive Systems Research},
volume = {73},
pages = {21--25},
abstract = {In this work, we discuss methodologies and implementation choices to enable a humanoid robot to estimate patients' mood and emotions during postoperative home rehabilitation. The approach is modular and it has been implemented into a SoftBank Pepper robotic architecture; however, the approach is general and it can be easily adapted to other robotic platforms. A sample of an interactive session for the detection of the patient's affective state is also reported. textcopyright 2022 Elsevier B.V.},
keywords = {Anthropomorphic Robots, Assistive Robots, Emotion Detection, Facial Expressions, Human computer interaction, Human Robot Interaction, Humanoid Robots, Natural Language Processing, Robotics, Wellbeing},
pubstate = {published},
tppubtype = {article}
}
Gaglio, Giuseppe Fulvio; Augello, Agnese; Caggianese, Giuseppe; Gallo, Luigi
Modellazione 3D di avatar per il Serious Game SMILER Technical Report
no. RT-ICAR-NA-2022-01, 2022.
Abstract | Links | BibTeX | Tags: Healthcare, Human computer interaction, Touchless interaction, Virtual Reality
@techreport{gaglioModellazione3DDi2022,
title = {Modellazione 3D di avatar per il Serious Game SMILER},
author = { Giuseppe Fulvio Gaglio and Agnese Augello and Giuseppe Caggianese and Luigi Gallo},
url = {https://intranet.icar.cnr.it/wp-content/uploads/2022/07/RT-ICAR-NA-2022-01.pdf},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
number = {RT-ICAR-NA-2022-01},
abstract = {Il presente documento illustra la progettazione e la realizzazione di un avatar per il serious game previsto nell'ambito del progetto guillemotleft SMILER guillemotright - Serious gaMes as emerging e-health Interventions for young people with neurologicaL or rEspiratory disoRders. Dopo una breve introduzione del progetto, verranno descritte le tecniche e gli strumenti utilizzati per la modellazione 3D dell'avatar.},
keywords = {Healthcare, Human computer interaction, Touchless interaction, Virtual Reality},
pubstate = {published},
tppubtype = {techreport}
}
Gaglio, Giuseppe Fulvio; Augello, Agnese; Caggianese, Giuseppe; Gallo, Luigi
Modellazione 3D di avatar per il Serious Game SMILER Technical Report
ICAR-CNR no. RT-ICAR-NA-2022-01, 2022.
Abstract | Links | BibTeX | Tags: Healthcare, Human computer interaction, Touchless interaction, Virtual Reality
@techreport{gaglio_modellazione_2022,
title = {Modellazione 3D di avatar per il Serious Game SMILER},
author = {Giuseppe Fulvio Gaglio and Agnese Augello and Giuseppe Caggianese and Luigi Gallo},
url = {https://intranet.icar.cnr.it/wp-content/uploads/2022/07/RT-ICAR-NA-2022-01.pdf},
year = {2022},
date = {2022-01-01},
number = {RT-ICAR-NA-2022-01},
institution = {ICAR-CNR},
abstract = {Il presente documento illustra la progettazione e la realizzazione di un avatar per il serious game previsto
nell’ambito del progetto « SMILER » - Serious gaMes as emerging e-health Interventions for young people with
neurologicaL or rEspiratory disoRders. Dopo una breve introduzione del progetto, verranno descritte le tecniche e
gli strumenti utilizzati per la modellazione 3D dell’avatar.},
keywords = {Healthcare, Human computer interaction, Touchless interaction, Virtual Reality},
pubstate = {published},
tppubtype = {techreport}
}
nell’ambito del progetto « SMILER » - Serious gaMes as emerging e-health Interventions for young people with
neurologicaL or rEspiratory disoRders. Dopo una breve introduzione del progetto, verranno descritte le tecniche e
gli strumenti utilizzati per la modellazione 3D dell’avatar.
Augello, Agnese; Infantino, Ignazio; Pilato, Giovanni; Vitale, Gianpaolo
Extending affective capabilities for medical assistive robots Journal Article
In: Cognitive Systems Research, vol. 73, pp. 21–25, 2022, ISSN: 13890417.
Abstract | Links | BibTeX | Tags: Anthropomorphic Robots, Assistive Robots, Emotion Detection, Facial Expressions, Human computer interaction, Human Robot Interaction, Humanoid Robots, Natural Language Processing, Robotics, Wellbeing
@article{augello_extending_2022,
title = {Extending affective capabilities for medical assistive robots},
author = {Agnese Augello and Ignazio Infantino and Giovanni Pilato and Gianpaolo Vitale},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85123046436&doi=10.1016%2fj.cogsys.2021.12.004&partnerID=40&md5=6e76332f7f95333a9ae2e8f11c054622},
doi = {10.1016/j.cogsys.2021.12.004},
issn = {13890417},
year = {2022},
date = {2022-01-01},
journal = {Cognitive Systems Research},
volume = {73},
pages = {21–25},
abstract = {In this work, we discuss methodologies and implementation choices to enable a humanoid robot to estimate patients’ mood and emotions during postoperative home rehabilitation. The approach is modular and it has been implemented into a SoftBank Pepper robotic architecture; however, the approach is general and it can be easily adapted to other robotic platforms. A sample of an interactive session for the detection of the patient's affective state is also reported. © 2022 Elsevier B.V.},
keywords = {Anthropomorphic Robots, Assistive Robots, Emotion Detection, Facial Expressions, Human computer interaction, Human Robot Interaction, Humanoid Robots, Natural Language Processing, Robotics, Wellbeing},
pubstate = {published},
tppubtype = {article}
}
2020
Trifir`o, Irene; Augello, Agnese; Maniscalco, Umberto; Pilato, Giovanni; Vella, Filippo; Meo, Rosa
How Are You? How a Robot Can Learn to Express Its Own Roboceptions Proceedings Article
In: Cristiani, Matteo; Toro, Carlos; Zanni-Merk, Cecilia; Howlett, Robert J.; Jain, Lakhmi C. (Ed.): Procedia Computer Science, pp. 480–489, Elsevier B.V., 2020.
Abstract | Links | BibTeX | Tags: Human computer interaction, Knowledge Representation, Latent Semantic Analysis, Natural Language Processing, Robotics, Semantic Computing, Social Robots
@inproceedings{trifiroHowAreYou2020,
title = {How Are You? How a Robot Can Learn to Express Its Own Roboceptions},
author = { Irene Trifir{`o} and Agnese Augello and Umberto Maniscalco and Giovanni Pilato and Filippo Vella and Rosa Meo},
editor = { Matteo Cristiani and Carlos Toro and Cecilia {Zanni-Merk} and Robert J. Howlett and Lakhmi C. Jain},
doi = {10.1016/j.procs.2020.08.050},
year = {2020},
date = {2020-01-01},
booktitle = {Procedia Computer Science},
volume = {176},
pages = {480--489},
publisher = {Elsevier B.V.},
abstract = {This work is framed on investigating how a robot can learn associations between linguistic elements, such as words or sentences, and its bodily perceptions, that we named ``roboceptions''. We discuss the possibility of defining such a process of an association through the interaction with human beings. By interacting with a user, the robot can learn to ascribe a meaning to its roboceptions to express them in natural language. Such a process could then be used by the robot in a verbal interaction to detect some words recalling the previously experimented roboceptions. In this paper, we discuss a Dual-NMT approach to realize such an association. However, it requires adequate training corpus. For this reason, we consider two different phases towards the realization of the system, and we show the results of the first phase, comparing two approaches: one based on the Latent Semantic Analysis paradigm and one based on the Random Indexing methodology.},
keywords = {Human computer interaction, Knowledge Representation, Latent Semantic Analysis, Natural Language Processing, Robotics, Semantic Computing, Social Robots},
pubstate = {published},
tppubtype = {inproceedings}
}
Trifirò, Irene; Augello, Agnese; Maniscalco, Umberto; Pilato, Giovanni; Vella, Filippo; Meo, Rosa
How are you? How a robot can learn to express its own roboceptions Proceedings Article
In: Cristiani, Matteo; Toro, Carlos; Zanni-Merk, Cecilia; Howlett, Robert J.; Jain, Lakhmi C. (Ed.): Procedia Computer Science, pp. 480–489, Elsevier B.V., 2020.
Abstract | Links | BibTeX | Tags: Human computer interaction, Knowledge Representation, Latent Semantic Analysis, Natural Language Processing, Robotics, Semantic Computing, Social Robots
@inproceedings{trifiro_how_2020,
title = {How are you? How a robot can learn to express its own roboceptions},
author = {Irene Trifirò and Agnese Augello and Umberto Maniscalco and Giovanni Pilato and Filippo Vella and Rosa Meo},
editor = {Matteo Cristiani and Carlos Toro and Cecilia Zanni-Merk and Robert J. Howlett and Lakhmi C. Jain},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85093358258&doi=10.1016%2fj.procs.2020.08.050&partnerID=40&md5=d262d3c7852f492f6a871ed2c4b7e941},
doi = {10.1016/j.procs.2020.08.050},
year = {2020},
date = {2020-01-01},
booktitle = {Procedia Computer Science},
volume = {176},
pages = {480–489},
publisher = {Elsevier B.V.},
abstract = {This work is framed on investigating how a robot can learn associations between linguistic elements, such as words or sentences, and its bodily perceptions, that we named “roboceptions”. We discuss the possibility of defining such a process of an association through the interaction with human beings. By interacting with a user, the robot can learn to ascribe a meaning to its roboceptions to express them in natural language. Such a process could then be used by the robot in a verbal interaction to detect some words recalling the previously experimented roboceptions. In this paper, we discuss a Dual-NMT approach to realize such an association. However, it requires adequate training corpus. For this reason, we consider two different phases towards the realization of the system, and we show the results of the first phase, comparing two approaches: one based on the Latent Semantic Analysis paradigm and one based on the Random Indexing methodology.},
keywords = {Human computer interaction, Knowledge Representation, Latent Semantic Analysis, Natural Language Processing, Robotics, Semantic Computing, Social Robots},
pubstate = {published},
tppubtype = {inproceedings}
}