AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Dong, Y.
Enhancing Painting Exhibition Experiences with the Application of Augmented Reality-Based AI Video Generation Technology Proceedings Article
In: P., Zaphiris; A., Ioannou; A., Ioannou; R.A., Sottilare; J., Schwarz; M., Rauterberg (Ed.): Lect. Notes Comput. Sci., pp. 256–262, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303176814-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, AI-generated art, Art and Technology, Arts computing, Augmented Reality, Augmented reality technology, Digital Exhibition Design, Dynamic content, E-Learning, Education computing, Generation technologies, Interactive computer graphics, Knowledge Management, Multi dimensional, Planning designs, Three dimensional computer graphics, Video contents, Video generation
@inproceedings{dong_enhancing_2025,
title = {Enhancing Painting Exhibition Experiences with the Application of Augmented Reality-Based AI Video Generation Technology},
author = {Y. Dong},
editor = {Zaphiris P. and Ioannou A. and Ioannou A. and Sottilare R.A. and Schwarz J. and Rauterberg M.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85213302959&doi=10.1007%2f978-3-031-76815-6_18&partnerID=40&md5=35484f5ed199a831f1a30f265a0d32d5},
doi = {10.1007/978-3-031-76815-6_18},
isbn = {03029743 (ISSN); 978-303176814-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15378 LNCS},
pages = {256–262},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Traditional painting exhibitions often rely on flat presentation methods, such as walls and stands, limiting their impact. Augmented Reality (AR) technology presents an opportunity to transform these experiences by turning static, flat artwork into dynamic, multi-dimensional presentations. However, creating and integrating video or dynamic content can be time-consuming and challenging, requiring meticulous planning, design, and production. In the context of urban renewal and community revitalization, particularly in China’s first-tier cities where real estate development has saturated the market, there is a growing trend to repurpose traditional commercial and office spaces with cultural and artistic exhibitions. These exhibitions not only enhance the spatial quality but also elevate the user experience, making the spaces more competitive. However, these non-traditional exhibition venues often lack the amenities of professional galleries, relying on walls, windows, and corners for displays, and requiring quick setup times. For visitors, who are often office workers or shoppers with limited time, the use of personal mobile devices for interaction is common. WeChat, China’s most widely used mobile application, provides a platform for convenient digital interactive experiences through mini-programs, which can support lightweight AR applications. AI video generation technologies, such as Conditional Generative Adversarial Networks (ControlNet) and Latent Consistency Models (LCM), have seen significant advancements. These technologies now allow for the creation of 3D models and video content from text and images. Tools like Meshy and Pika provide the ability to generate various video styles and offer precise control over video content. New AI video applications like Stable Video further expand the possibilities by rapidly converting static images into dynamic videos, facilitating easy adjustments and edits. This paper explores the application of AR-based AI video generation technology in enhancing the experience of painting exhibitions. By integrating these technologies, traditional paintings can be transformed into interactive, engaging displays that enrich the viewer’s experience. The study demonstrates the potential of these innovations to make art exhibitions more appealing and competitive in various public spaces, thereby improving both artistic expression and audience engagement. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {3D modeling, AI-generated art, Art and Technology, Arts computing, Augmented Reality, Augmented reality technology, Digital Exhibition Design, Dynamic content, E-Learning, Education computing, Generation technologies, Interactive computer graphics, Knowledge Management, Multi dimensional, Planning designs, Three dimensional computer graphics, Video contents, Video generation},
pubstate = {published},
tppubtype = {inproceedings}
}
Shawash, J.; Thibault, M.; Hamari, J.
Who Killed Helene Pumpulivaara?: AI-Assisted Content Creation and XR Implementation for Interactive Built Heritage Storytelling Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 377–379, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071391-0 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Built heritage, Content creation, Digital heritage, Digital Interpretation, Extended reality, Human computer interaction, Human engineering, Industrial Heritage, Interactive computer graphics, Interactive computer systems, Mobile photographies, Narrative Design, Narrative designs, Production pipelines, Uncanny valley, Virtual Reality
@inproceedings{shawash_who_2025,
title = {Who Killed Helene Pumpulivaara?: AI-Assisted Content Creation and XR Implementation for Interactive Built Heritage Storytelling},
author = {J. Shawash and M. Thibault and J. Hamari},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008003446&doi=10.1145%2f3706370.3731703&partnerID=40&md5=bc8a8d221abcf6c560446979fbd06cbc},
doi = {10.1145/3706370.3731703},
isbn = {979-840071391-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {377–379},
publisher = {Association for Computing Machinery, Inc},
abstract = {This demo presents "Who Killed Helene Pumpulivaara?", an innovative interactive heritage experience that combines crime mystery narrative with XR technology to address key challenges in digital heritage interpretation. Our work makes six significant contributions: (1) the discovery of a "Historical Uncanny Valley"effect where varying fidelity levels between AI-generated and authentic content serve as implicit markers distinguishing fact from interpretation; (2) an accessible production pipeline combining mobile photography with AI tools that democratizes XR heritage creation for resource-limited institutions; (3) a spatial storytelling approach that effectively counters decontextualization in digital heritage; (4) a multi-platform implementation strategy across web and VR environments; (5) a practical model for AI-assisted heritage content creation balancing authenticity with engagement; and (6) a pathway toward spatial augmented reality for future heritage interpretation. Using the historic Finlayson Factory in Tampere, Finland as a case study, our implementation demonstrates how emerging technologies can enrich the authenticity of heritage experiences, fostering deeper emotional connections between visitors and the histories embedded in place. © 2025 Copyright held by the owner/author(s).},
keywords = {Artificial intelligence, Augmented Reality, Built heritage, Content creation, Digital heritage, Digital Interpretation, Extended reality, Human computer interaction, Human engineering, Industrial Heritage, Interactive computer graphics, Interactive computer systems, Mobile photographies, Narrative Design, Narrative designs, Production pipelines, Uncanny valley, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Shen, Y.; Li, B.; Huang, J.; Wang, Z.
GaussianShopVR: Facilitating Immersive 3D Authoring Using Gaussian Splatting in VR Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1292–1293, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331514846 (ISBN).
Abstract | Links | BibTeX | Tags: 3D authoring, 3D modeling, Digital replicas, Gaussian distribution, Gaussian Splatting editing, Gaussians, Graphical user interfaces, High quality, Immersive, Immersive environment, Interactive computer graphics, Rendering (computer graphics), Rendering pipelines, Splatting, Three dimensional computer graphics, User profile, Virtual Reality, Virtual reality user interface, Virtualization, VR user interface
@inproceedings{shen_gaussianshopvr_2025,
title = {GaussianShopVR: Facilitating Immersive 3D Authoring Using Gaussian Splatting in VR},
author = {Y. Shen and B. Li and J. Huang and Z. Wang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005138672&doi=10.1109%2FVRW66409.2025.00292&partnerID=40&md5=2290016d250649f8d7f262212b1f59cb},
doi = {10.1109/VRW66409.2025.00292},
isbn = {9798331514846 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1292–1293},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual reality (VR) applications require massive high-quality 3D assets to create immersive environments. Generating mesh-based 3D assets typically involves a significant amount of manpower and effort, which makes VR applications less accessible. 3D Gaussian Splatting (3DGS) has attracted much attention for its ability to quickly create digital replicas of real-life scenes and its compatibility with traditional rendering pipelines. However, it remains a challenge to edit 3DGS in a flexible and controllable manner. We propose GaussianShopVR, a system that leverages VR user interfaces to specify target areas to achieve flexible and controllable editing of reconstructed 3DGS. In addition, selected areas can provide 3D information to generative AI models to facilitate the editing. GaussianShopVR integrates object hierarchy management while keeping the backpropagated gradient flow to allow local editing with context information. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D authoring, 3D modeling, Digital replicas, Gaussian distribution, Gaussian Splatting editing, Gaussians, Graphical user interfaces, High quality, Immersive, Immersive environment, Interactive computer graphics, Rendering (computer graphics), Rendering pipelines, Splatting, Three dimensional computer graphics, User profile, Virtual Reality, Virtual reality user interface, Virtualization, VR user interface},
pubstate = {published},
tppubtype = {inproceedings}
}
Logothetis, I.; Diakogiannis, K.; Vidakis, N.
Interactive Learning Through Conversational Avatars and Immersive VR: Enhancing Diabetes Education and Self-Management Proceedings Article
In: X., Fang (Ed.): Lect. Notes Comput. Sci., pp. 415–429, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303192577-1 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Chronic disease, Computer aided instruction, Diabetes Education, Diagnosis, E-Learning, Education management, Engineering education, Gamification, Immersive virtual reality, Interactive computer graphics, Interactive learning, Large population, Learning systems, NUI, Self management, Serious game, Serious games, simulation, Virtual Reality
@inproceedings{logothetis_interactive_2025,
title = {Interactive Learning Through Conversational Avatars and Immersive VR: Enhancing Diabetes Education and Self-Management},
author = {I. Logothetis and K. Diakogiannis and N. Vidakis},
editor = {Fang X.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008266480&doi=10.1007%2f978-3-031-92578-8_27&partnerID=40&md5=451274dfa3ef0b3f1b39c7d5a665ee3b},
doi = {10.1007/978-3-031-92578-8_27},
isbn = {03029743 (ISSN); 978-303192577-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15816 LNCS},
pages = {415–429},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Diabetes is a chronic disease affecting a large population of the world. Education and self-management of diabetes are crucial. Technologies such as Virtual Reality (VR) have presented promising results in healthcare education, while studies suggest that Artificial Intelligence (AI) can help in learning by further engaging the learner. This study aims to educate users on the entire routine of managing diabetes. The serious game utilizes VR for realistic interaction with diabetes tools and generative AI through a conversational avatar that acts as an assistant instructor. In this way, it allows users to practice diagnostic and therapeutic interventions in a controlled virtual environment, helping to build their understanding and confidence in diabetes management. To measure the effects of the proposed serious game, presence, and perceived agency were measured. Preliminary results indicate that this setup aids in the engagement and immersion of learners, while the avatar can provide helpful information during gameplay. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Artificial intelligence, Chronic disease, Computer aided instruction, Diabetes Education, Diagnosis, E-Learning, Education management, Engineering education, Gamification, Immersive virtual reality, Interactive computer graphics, Interactive learning, Large population, Learning systems, NUI, Self management, Serious game, Serious games, simulation, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
López-Ozieblo, R.; Jiandong, D. S.; Techanamurthy, U.; Geng, H.; Nurgissayeva, A.
Enhancing AI Literacy through Immersive VR: Evaluating Pedagogical Design and GenAI Integration Proceedings Article
In: pp. 718–723, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331511661 (ISBN).
Abstract | Links | BibTeX | Tags: AI Literacy, Artificial intelligence, Behavioral Research, Classlet platform, E-Learning, Educational settings, Emerging technologies, Engineering education, Experiential learning, GenAI avatar, GenAI Avatars, Immersive virtual reality, Interactive computer graphics, Pedagogical designs, Pedagogical Innovation, Regression analysis, Teaching, Virtual Reality, Virtual-reality environment
@inproceedings{lopez-ozieblo_enhancing_2025,
title = {Enhancing AI Literacy through Immersive VR: Evaluating Pedagogical Design and GenAI Integration},
author = {R. López-Ozieblo and D. S. Jiandong and U. Techanamurthy and H. Geng and A. Nurgissayeva},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013538409&doi=10.1109%2FCSTE64638.2025.11092268&partnerID=40&md5=a963d754ceaa73f360d9678d346a7686},
doi = {10.1109/CSTE64638.2025.11092268},
isbn = {9798331511661 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {718–723},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {As AI continues to reshape industries, enhancing AI literacy is crucial for empowering learners to interact confidently and critically with emerging technologies. Virtual Reality (VR) offers a way to bridge theoretical knowledge with practical application but integrating VR into educational settings struggles with technical and pedagogical challenges. This study investigates how immersive VR environments can be optimized to enhance AI literacy and identifies key factors driving students' intent to adopt these technologies. Using Classlet - a VR platform that integrates interactive multimodal tasks, narrative-driven activities, and GenAI avatar interactions - we created a virtual office where learners engaged in research tasks and simulation scenarios with instructor-customized prompts. Our mixed-methods approach, involving participants from Hong Kong and Malaysia, focused on AI literacy within contexts such as Fast Fashion and European society. Regression analyses revealed that overall intent is strongly predicted by composite enjoyment, perceived performance, and behavioral control (R2 = 0.803). Post-AI literacy self-assessments were predicted by AI self-efficacy and enjoyment ( R2 = 0.421). However, female participants reported lower scores on AI efficacy (p = 0.042), suggesting baseline differences that warrant further investigation. Qualitative insights show the immersive and engaging nature of the experience while highlighting the need for further GenAI prompt designs for elaborative and bidirectional interactions. © 2025 Elsevier B.V., All rights reserved.},
keywords = {AI Literacy, Artificial intelligence, Behavioral Research, Classlet platform, E-Learning, Educational settings, Emerging technologies, Engineering education, Experiential learning, GenAI avatar, GenAI Avatars, Immersive virtual reality, Interactive computer graphics, Pedagogical designs, Pedagogical Innovation, Regression analysis, Teaching, Virtual Reality, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
Mendoza, A. P.; Quiroga, K. J. Barrios; Celis, S. D. Solano; M., C. G. Quintero
NAIA: A Multi-Technology Virtual Assistant for Boosting Academic Environments—A Case Study Journal Article
In: IEEE Access, vol. 13, pp. 141461–141483, 2025, ISSN: 21693536 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: Academic environment, Artificial intelligence, Case-studies, Computational Linguistics, Computer vision, Digital avatar, Digital avatars, Efficiency, Human computer interaction, Human-AI Interaction, Interactive computer graphics, Language Model, Large language model, large language model (LLM), Learning systems, Natural language processing systems, Personal digital assistants, Personnel training, Population statistics, Speech communication, Speech processing, Speech to text, speech to text (STT), Text to speech, text to speech (TTS), user experience, User interfaces, Virtual assistant, Virtual assistants, Virtual Reality
@article{mendoza_naia_2025,
title = {NAIA: A Multi-Technology Virtual Assistant for Boosting Academic Environments—A Case Study},
author = {A. P. Mendoza and K. J. Barrios Quiroga and S. D. Solano Celis and C. G. Quintero M.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013598763&doi=10.1109%2FACCESS.2025.3597565&partnerID=40&md5=7ad6b037cfedb943fc026642c4854284},
doi = {10.1109/ACCESS.2025.3597565},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {141461–141483},
abstract = {Virtual assistants have become essential tools for improving productivity and efficiency in various domains. This paper presents NAIA (Nimble Artificial Intelligence Assistant), an advanced multi-role and multi-task virtual assistant enhanced with artificial intelligence, designed to serve a university community case study. The system integrates AI technologies including Large Language Models (LLM), Computer Vision, and voice processing to create an immersive and efficient interaction through animated digital avatars. NAIA features five specialized roles: researcher, receptionist, personal skills trainer, personal assistant, and university guide, each equipped with specific capabilities to support different aspects of academic life. The system’s Computer Vision capabilities enable it to comment on users’ physical appearance and environment, enriching the interaction. Through natural language processing and voice interaction, NAIA aims to improve productivity and efficiency within the university environment while providing personalized assistance through a ubiquitous platform accessible across multiple devices. NAIA is evaluated through a user experience survey involving 30 participants with different demographic characteristics, this is the most accepted way by the community to evaluate this type of solution. Participants give their feedback after using one role of NAIA after using it for 30 minutes. The experiment showed that 90% of the participants considered NAIA-assisted tasks of higher quality and, on average, NAIA has a score of 4.27 out of 5 on user satisfaction. Participants particularly appreciated the assistant’s visual recognition, natural conversation flow, and user interaction capabilities. Results demonstrate NAIA’s capabilities and effectiveness across the five roles. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {Academic environment, Artificial intelligence, Case-studies, Computational Linguistics, Computer vision, Digital avatar, Digital avatars, Efficiency, Human computer interaction, Human-AI Interaction, Interactive computer graphics, Language Model, Large language model, large language model (LLM), Learning systems, Natural language processing systems, Personal digital assistants, Personnel training, Population statistics, Speech communication, Speech processing, Speech to text, speech to text (STT), Text to speech, text to speech (TTS), user experience, User interfaces, Virtual assistant, Virtual assistants, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Tovias, E.; Wu, L.
Leveraging Virtual Reality and AI for Enhanced Vocabulary Learning Proceedings Article
In: pp. 308, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331521646 (ISBN).
Abstract | Links | BibTeX | Tags: Avatar, Avatars, E-Learning, Immersive, Interactive computer graphics, Interactive learning, Language Model, Large language model, large language models, Learning experiences, Real time interactions, Text-based methods, user experience, Users' experiences, Virtual environments, Virtual Reality, Vocabulary learning
@inproceedings{tovias_leveraging_2025,
title = {Leveraging Virtual Reality and AI for Enhanced Vocabulary Learning},
author = {E. Tovias and L. Wu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105017563813&doi=10.1109%2FICHMS65439.2025.11154184&partnerID=40&md5=7b79f93d6f8ec222b25a4bfeac408d3a},
doi = {10.1109/ICHMS65439.2025.11154184},
isbn = {9798331521646 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {308},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This study examines the integration of virtual reality (VR) and Artificial Intelligence (AI) to create more immersive, interactive learning experiences. By combining VR's engaging user experience with AI-powered avatars, this research explores how these tools can enhance vocabulary learning compared to traditional text-based methods. Utilizing a Meta Quest 3 headset, Unity for development, and OpenAI's API & ElevenLabs for dynamic dialogues, this system offers personalized, real-time interactions (Fig. 1). The integration of these technologies fosters a bright future, driving significant advancements in the development of highly immersive and effective learning environments. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Avatar, Avatars, E-Learning, Immersive, Interactive computer graphics, Interactive learning, Language Model, Large language model, large language models, Learning experiences, Real time interactions, Text-based methods, user experience, Users' experiences, Virtual environments, Virtual Reality, Vocabulary learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Vachha, C.; Kang, Y.; Dive, Z.; Chidambaram, A.; Gupta, A.; Jun, E.; Hartmann, B.
Dreamcrafter: Immersive Editing of 3D Radiance Fields Through Flexible, Generative Inputs and Outputs Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 9798400713958 (ISBN); 9798400713941 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D scenes, AI assisted creativity tool, Animation, Computer vision, Direct manipulation, Drawing (graphics), Gaussian Splatting, Gaussians, Generative AI, Graphic, Graphics, High level languages, Immersive, Interactive computer graphics, Splatting, Three dimensional computer graphics, Virtual Reality, Worldbuilding interface
@inproceedings{vachha_dreamcrafter_2025,
title = {Dreamcrafter: Immersive Editing of 3D Radiance Fields Through Flexible, Generative Inputs and Outputs},
author = {C. Vachha and Y. Kang and Z. Dive and A. Chidambaram and A. Gupta and E. Jun and B. Hartmann},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005725679&doi=10.1145%2F3706598.3714312&partnerID=40&md5=57926f0265e5174a774a67d9013bb2cb},
doi = {10.1145/3706598.3714312},
isbn = {9798400713958 (ISBN); 9798400713941 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Authoring 3D scenes is a central task for spatial computing applications. Competing visions for lowering existing barriers are (1) focus on immersive, direct manipulation of 3D content or (2) leverage AI techniques that capture real scenes (3D Radiance Fields such as, NeRFs, 3D Gaussian Splatting) and modify them at a higher level of abstraction, at the cost of high latency. We unify the complementary strengths of these approaches and investigate how to integrate generative AI advances into real-time, immersive 3D Radiance Field editing. We introduce Dreamcrafter, a VR-based 3D scene editing system that: (1) provides a modular architecture to integrate generative AI algorithms; (2) combines different levels of control for creating objects, including natural language and direct manipulation; and (3) introduces proxy representations that support interaction during high-latency operations. We contribute empirical findings on control preferences and discuss how generative AI interfaces beyond text input enhance creativity in scene editing and world building. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D modeling, 3D scenes, AI assisted creativity tool, Animation, Computer vision, Direct manipulation, Drawing (graphics), Gaussian Splatting, Gaussians, Generative AI, Graphic, Graphics, High level languages, Immersive, Interactive computer graphics, Splatting, Three dimensional computer graphics, Virtual Reality, Worldbuilding interface},
pubstate = {published},
tppubtype = {inproceedings}
}
Cao, X.; Ju, K. P.; Li, C.; Jain, D.
SceneGenA11y: How can Runtime Generative tools improve the Accessibility of a Virtual 3D Scene? Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 9798400713958 (ISBN); 9798400713941 (ISBN).
Abstract | Links | BibTeX | Tags: 3D application, 3D modeling, 3D scenes, Accessibility, BLV, DHH, Discrete event simulation, Generative AI, Generative tools, Interactive computer graphics, One dimensional, Runtimes, Three dimensional computer graphics, Video-games, Virtual 3d scene, virtual 3D scenes, Virtual environments, Virtual Reality
@inproceedings{cao_scenegena11y_2025,
title = {SceneGenA11y: How can Runtime Generative tools improve the Accessibility of a Virtual 3D Scene?},
author = {X. Cao and K. P. Ju and C. Li and D. Jain},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005772656&doi=10.1145%2F3706599.3720265&partnerID=40&md5=163b27affb24972a076fcd3beac3defc},
doi = {10.1145/3706599.3720265},
isbn = {9798400713958 (ISBN); 9798400713941 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {With the popularity of virtual 3D applications, from video games to educational content and virtual reality scenarios, the accessibility of 3D scene information is vital to ensure inclusive and equitable experiences for all. Previous work include information substitutions like audio description and captions, as well as personalized modifications, but they could only provide predefined accommodations. In this work, we propose SceneGenA11y, a system that responds to the user’s natural language prompts to improve accessibility of a 3D virtual scene in runtime. The system primes LLM agents with accessibility-related knowledge, allowing users to explore the scene and perform verifiable modifications to improve accessibility. We conducted a preliminary evaluation of our system with three blind and low-vision people and three deaf and hard-of-hearing people. The results show that our system is intuitive to use and can successfully improve accessibility. We discussed usage patterns of the system, potential improvements, and integration into apps. We ended with highlighting plans for future work. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D application, 3D modeling, 3D scenes, Accessibility, BLV, DHH, Discrete event simulation, Generative AI, Generative tools, Interactive computer graphics, One dimensional, Runtimes, Three dimensional computer graphics, Video-games, Virtual 3d scene, virtual 3D scenes, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Leininger, P.; Weber, C. J.; Rothe, S.
Understanding Creative Potential and Use Cases of AI-Generated Environments for Virtual Film Productions: Insights from Industry Professionals Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 60–78, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: 3-D environments, 3D reconstruction, 3D Scene Reconstruction, 3d scenes reconstruction, AI-generated 3d environment, AI-Generated 3D Environments, Computer interaction, Creative Collaboration, Creatives, Digital content creation, Digital Content Creation., Filmmaking workflow, Filmmaking Workflows, Gaussian distribution, Gaussian Splatting, Gaussians, Generative AI, Graphical user interface, Graphical User Interface (GUI), Graphical user interfaces, Human computer interaction, human-computer interaction, Human-Computer Interaction (HCI), Immersive, Immersive Storytelling, Interactive computer graphics, Interactive computer systems, Interactive media, Mesh generation, Previsualization, Real-Time Rendering, Splatting, Three dimensional computer graphics, Virtual production, Virtual Production (VP), Virtual Reality, Work-flows
@inproceedings{leininger_understanding_2025,
title = {Understanding Creative Potential and Use Cases of AI-Generated Environments for Virtual Film Productions: Insights from Industry Professionals},
author = {P. Leininger and C. J. Weber and S. Rothe},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007976841&doi=10.1145%2F3706370.3727853&partnerID=40&md5=e74b2fa9e7644ddee1b51d3fc34b4af2},
doi = {10.1145/3706370.3727853},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {60–78},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual production (VP) is transforming filmmaking by integrating real-time digital elements with live-action footage, offering new creative possibilities and streamlined workflows. While industry experts recognize AI's potential to revolutionize VP, its practical applications and value across different production phases and user groups remain underexplored. Building on initial research into generative and data-driven approaches, this paper presents the first systematic pilot study evaluating three types of AI-generated 3D environments - Depth Mesh, 360° Panoramic Meshes, and Gaussian Splatting - through the participation of 15 filmmaking professionals from diverse roles. Unlike commonly used 2D AI-generated visuals, our approach introduces navigable 3D environments that offer greater control and flexibility, aligning more closely with established VP workflows. Through expert interviews and literature research, we developed evaluation criteria to assess their usefulness beyond concept development, extending to previsualization, scene exploration, and interdisciplinary collaboration. Our findings indicate that different environments cater to distinct production needs, from early ideation to detailed visualization. Gaussian Splatting proved effective for high-fidelity previsualization, while 360° Panoramic Meshes excelled in rapid concept ideation. Despite their promise, challenges such as limited interactivity and customization highlight areas for improvement. Our prototype, EnVisualAIzer, built in Unreal Engine 5, provides an accessible platform for diverse filmmakers to engage with AI-generated environments, fostering a more inclusive production process. By lowering technical barriers, these environments have the potential to make advanced VP tools more widely available. This study offers valuable insights into the evolving role of AI in VP and sets the stage for future research and development. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3-D environments, 3D reconstruction, 3D Scene Reconstruction, 3d scenes reconstruction, AI-generated 3d environment, AI-Generated 3D Environments, Computer interaction, Creative Collaboration, Creatives, Digital content creation, Digital Content Creation., Filmmaking workflow, Filmmaking Workflows, Gaussian distribution, Gaussian Splatting, Gaussians, Generative AI, Graphical user interface, Graphical User Interface (GUI), Graphical user interfaces, Human computer interaction, human-computer interaction, Human-Computer Interaction (HCI), Immersive, Immersive Storytelling, Interactive computer graphics, Interactive computer systems, Interactive media, Mesh generation, Previsualization, Real-Time Rendering, Splatting, Three dimensional computer graphics, Virtual production, Virtual Production (VP), Virtual Reality, Work-flows},
pubstate = {published},
tppubtype = {inproceedings}
}
de Oliveira, E. A. Masasi; Sousa, R. T.; Bastos, A. A.; de Freitas Cintra, L. Martins; Filho, A. R. G. Galvão
Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 437–440, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages
@inproceedings{masasi_de_oliveira_immersive_2025,
title = {Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation},
author = {E. A. Masasi de Oliveira and R. T. Sousa and A. A. Bastos and L. Martins de Freitas Cintra and A. R. G. Galvão Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007979183&doi=10.1145%2F3706370.3731643&partnerID=40&md5=47a47f3408a0e6cb35c16dd6101a15b0},
doi = {10.1145/3706370.3731643},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {437–440},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual Reality has significantly expanded possibilities for immersive museum experiences, overcoming traditional constraints such as space, preservation, and geographic limitations. However, existing virtual museum platforms typically lack dynamic, personalized, and contextually accurate interactions. To address this, we propose Spatially-Aware Retrieval-Augmented Generation (SA-RAG), an innovative framework integrating visual attention tracking with Retrieval-Augmented Generation systems and advanced Large Language Models. By capturing users' visual attention in real time, SA-RAG dynamically retrieves contextually relevant data, enhancing the accuracy, personalization, and depth of user interactions within immersive virtual environments. The system's effectiveness is initially demonstrated through our preliminary tests within a realistic VR museum implemented using Unreal Engine. Although promising, comprehensive human evaluations involving broader user groups are planned for future studies to rigorously validate SA-RAG's effectiveness, educational enrichment potential, and accessibility improvements in virtual museums. The framework also presents opportunities for broader applications in immersive educational and storytelling domains. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Coronado, A.; Carvalho, S. T.; Berretta, L. Oliveira
See Through My Eyes: Using Multimodal Large Language Model for Describing Rendered Environments to Blind People Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 451–457, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: Accessibility, Behavioral Research, Blind, Blind people, Helmet mounted displays, Human engineering, Human rehabilitation equipment, Interactive computer graphics, Interactive computer systems, Language Model, LLM, Multi-modal, Rendered environment, rendered environments, Spatial cognition, Virtual Reality, Vision aids, Visual impairment, Visual languages, Visually impaired people
@inproceedings{coronado_see_2025,
title = {See Through My Eyes: Using Multimodal Large Language Model for Describing Rendered Environments to Blind People},
author = {A. Coronado and S. T. Carvalho and L. Oliveira Berretta},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007991842&doi=10.1145%2F3706370.3731641&partnerID=40&md5=7eb509d2ac724af78ec04575a8c71085},
doi = {10.1145/3706370.3731641},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {451–457},
publisher = {Association for Computing Machinery, Inc},
abstract = {Extended Reality (XR) is quickly expanding "as the next major technology wave in personal computing". Nevertheless, this expansion and adoption could also exclude certain disabled users, particularly people with visual impairment (VIP). According to the World Health Organization (WHO) in their 2019 publication, there were at least 2.2 billion people with visual impairment, a number that is also estimated to have increased in recent years. Therefore, it is important to include disabled users, especially visually impaired people, in the design of Head-Mounted Displays and Extended Reality environments. Indeed, this objective can be pursued by incorporating Multimodal Large Language Model (MLLM) technology, which can assist visually impaired people. As a case study, this study employs different prompts that result in environment descriptions from an MLLM integrated into a virtual reality (VR) escape room. Therefore, six potential prompts were engineered to generate valuable outputs for visually impaired users inside a VR environment. These outputs were evaluated using the G-Eval, and VIEScore metrics. Even though, the results show that the prompt patterns provided a description that aligns with the user's point of view, it is highly recommended to evaluate these outputs through "expected outputs"from Orientation and Mobility Specialists, and Sighted Guides. Furthermore, the subsequent step in the process is to evaluate these outputs by visually impaired people themselves to identify the most effective prompt pattern. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Accessibility, Behavioral Research, Blind, Blind people, Helmet mounted displays, Human engineering, Human rehabilitation equipment, Interactive computer graphics, Interactive computer systems, Language Model, LLM, Multi-modal, Rendered environment, rendered environments, Spatial cognition, Virtual Reality, Vision aids, Visual impairment, Visual languages, Visually impaired people},
pubstate = {published},
tppubtype = {inproceedings}
}
Peter, K.; Makosa, I.; Auala, S.; Ndjao, L.; Maasz, D.; Mbinge, U.; Winschiers-Theophilus, H.
Co-creating a VR Narrative Experience of Constructing a Food Storage Following OvaHimba Traditional Practices Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 418–423, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Modelling, 3D models, 3d-modeling, Co-designs, Community-based, Community-Based Co-Design, Computer aided design, Cultural heritage, Cultural heritages, Food storage, Human computer interaction, Human engineering, Indigenous Knowledge, Information Systems, Interactive computer graphics, Interactive computer systems, IVR, Namibia, OvaHimba, Ovahimbum, Photogrammetry, Sustainable development, Virtual environments, Virtual Reality
@inproceedings{peter_co-creating_2025,
title = {Co-creating a VR Narrative Experience of Constructing a Food Storage Following OvaHimba Traditional Practices},
author = {K. Peter and I. Makosa and S. Auala and L. Ndjao and D. Maasz and U. Mbinge and H. Winschiers-Theophilus},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007984089&doi=10.1145%2F3706370.3731652&partnerID=40&md5=10c67ae9849b2b9093515e04828d423d},
doi = {10.1145/3706370.3731652},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {418–423},
publisher = {Association for Computing Machinery, Inc},
abstract = {As part of an attempt to co-create a comprehensive virtual environment in which one can explore and learn traditional practices of the OvaHimba people, we have co-designed and implemented a VR experience to construct a traditional food storage. In collaboration with the OvaHimba community residing in Otjisa, we have explored culturally valid representations of the process. We have further investigated different techniques such as photogrammetry, generative AI and manual methods to develop 3D models. Our findings highlight the importance of context, process, and community-defined relevance in co-design, the fluidity of cultural realities and virtual representations, as well as technical challenges. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D Modelling, 3D models, 3d-modeling, Co-designs, Community-based, Community-Based Co-Design, Computer aided design, Cultural heritage, Cultural heritages, Food storage, Human computer interaction, Human engineering, Indigenous Knowledge, Information Systems, Interactive computer graphics, Interactive computer systems, IVR, Namibia, OvaHimba, Ovahimbum, Photogrammetry, Sustainable development, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Wan, X.; Luo, Y.
A Study of Anti-war Memorial Hall of Leshan City based on Virtual Museum Technology Proceedings Article
In: pp. 493–497, Association for Computing Machinery, Inc, 2025, ISBN: 9798400712432 (ISBN).
Abstract | Links | BibTeX | Tags: 3d modeling technologies, 3D reconstruction, Anti-war, Artificial intelligence, Augmented Reality, Digital researches, Historic Preservation, Human engineering, Interactive computer graphics, Knowledge graph, Knowledge graphs, Language Model, Localization and mappings, Metaverses, Model knowledge, Museum technology, Museums, Restoration, Three dimensional computer graphics, Virtual museum, Virtual Reality
@inproceedings{wan_study_2025,
title = {A Study of Anti-war Memorial Hall of Leshan City based on Virtual Museum Technology},
author = {X. Wan and Y. Luo},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105011594066&doi=10.1145%2F3732801.3732887&partnerID=40&md5=ac25032b46edf5a9d5949b8ceb5a41e1},
doi = {10.1145/3732801.3732887},
isbn = {9798400712432 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {493–497},
publisher = {Association for Computing Machinery, Inc},
abstract = {This study adopted augmented reality (AR), virtual reality (VR), artificial intelligence (AI), metaverse (META), large language models (LLM), knowledge graphs (KG), and synchronous localization and mapping (SLAM) technologies to create a virtual museum (VM) with the theme of the history of Leshan anti-Japanese war. Its aim is to enrich the digital research of this area, and to restore and vividly reflect the significance of Leshan’s contributions during the anti-Japanese war. This study combines 3D modeling technology with historical scene restoration to create a method of field investigation of local history and anti-Japanese war sites, which constructed six unique exhibition areas to describe historical events. The virtual museum integrates lots of historical sites, stories, achievements, and cultural aspects into a unique cultural interaction center. Through diverse technological approaches, this study aims to enable the public to contemplate history, cultivate national pride and patriotism, and deliver novel strategies for the digital protection of historical heritage. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3d modeling technologies, 3D reconstruction, Anti-war, Artificial intelligence, Augmented Reality, Digital researches, Historic Preservation, Human engineering, Interactive computer graphics, Knowledge graph, Knowledge graphs, Language Model, Localization and mappings, Metaverses, Model knowledge, Museum technology, Museums, Restoration, Three dimensional computer graphics, Virtual museum, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Saddik, A. El; Ahmad, J.; Khan, M.; Abouzahir, S.; Gueaieb, W.
Unleashing Creativity in the Metaverse: Generative AI and Multimodal Content Journal Article
In: ACM Transactions on Multimedia Computing, Communications and Applications, vol. 21, no. 7, pp. 1–43, 2025, ISSN: 15516857 (ISSN); 15516865 (ISSN), (Publisher: Association for Computing Machinery).
Abstract | Links | BibTeX | Tags: Adversarial networks, Artificial intelligence, Content generation, Context information, Creatives, Diffusion Model, diffusion models, Generative adversarial networks, Generative AI, Human engineering, Information instructions, Interactive computer graphics, Interactive computer systems, Interactive devices, Interoperability, Metaverse, Metaverses, Multi-modal, multimodal, Simple++, Three dimensional computer graphics, user experience, User interfaces, Virtual Reality
@article{el_saddik_unleashing_2025,
title = {Unleashing Creativity in the Metaverse: Generative AI and Multimodal Content},
author = {A. El Saddik and J. Ahmad and M. Khan and S. Abouzahir and W. Gueaieb},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105011860002&doi=10.1145%2F3713075&partnerID=40&md5=20064843ced240c42e9353d747672cb3},
doi = {10.1145/3713075},
issn = {15516857 (ISSN); 15516865 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {ACM Transactions on Multimedia Computing, Communications and Applications},
volume = {21},
number = {7},
pages = {1–43},
abstract = {The metaverse presents an emerging creative expression and collaboration frontier where generative artificial intelligence (GenAI) can play a pivotal role with its ability to generate multimodal content from simple prompts. These prompts allow the metaverse to interact with GenAI, where context information, instructions, input data, or even output indications constituting the prompt can come from within the metaverse. However, their integration poses challenges regarding interoperability, lack of standards, scalability, and maintaining a high-quality user experience. This article explores how GenAI can productively assist in enhancing creativity within the contexts of the metaverse and unlock new opportunities. We provide a technical, in-depth overview of the different generative models for image, video, audio, and 3D content within the metaverse environments. We also explore the bottlenecks, opportunities, and innovative applications of GenAI from the perspectives of end users, developers, service providers, and AI researchers. This survey commences by highlighting the potential of GenAI for enhancing the metaverse experience through dynamic content generation to populate massive virtual worlds. Subsequently, we shed light on the ongoing research practices and trends in multimodal content generation, enhancing realism and creativity and alleviating bottlenecks related to standardization, computational cost, privacy, and safety. Last, we share insights into promising research directions toward the integration of GenAI with the metaverse for creative enhancement, improved immersion, and innovative interactive applications. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Association for Computing Machinery},
keywords = {Adversarial networks, Artificial intelligence, Content generation, Context information, Creatives, Diffusion Model, diffusion models, Generative adversarial networks, Generative AI, Human engineering, Information instructions, Interactive computer graphics, Interactive computer systems, Interactive devices, Interoperability, Metaverse, Metaverses, Multi-modal, multimodal, Simple++, Three dimensional computer graphics, user experience, User interfaces, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Huang, J.; Wang, C.; Li, L.; Huang, C.; Dai, Q.; Xu, W.
BuildingBlock: A Hybrid Approach for Structured Building Generation Proceedings Article
In: Spencer, S. N. (Ed.): Association for Computing Machinery, Inc, 2025, ISBN: 9798400715402 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3d-modeling, Architecture, benchmarking, Building blockes, Construction, Data-driven model, Generative 3D Modeling, Hierarchical systems, Hybrid approach, Interactive computer graphics, Language Model, Layout generations, Procedural & Data-driven Modeling, Procedural content generations, Three dimensional computer graphics
@inproceedings{huang_buildingblock_2025,
title = {BuildingBlock: A Hybrid Approach for Structured Building Generation},
author = {J. Huang and C. Wang and L. Li and C. Huang and Q. Dai and W. Xu},
editor = {S. N. Spencer},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013956460&doi=10.1145%2F3721238.3730705&partnerID=40&md5=a0815a6742f5e1d072f0f559410ce28b},
doi = {10.1145/3721238.3730705},
isbn = {9798400715402 (ISBN)},
year = {2025},
date = {2025-01-01},
publisher = {Association for Computing Machinery, Inc},
abstract = {Three-dimensional building generation is vital for applications in gaming, virtual reality, and digital twins, yet current methods face challenges in producing diverse, structured, and hierarchically coherent buildings. We propose BuildingBlock, a hybrid approach that integrates generative models, procedural content generation (PCG), and large language models (LLMs) to address these limitations. Specifically, our method introduces a two-phase pipeline: the Layout Generation Phase (LGP) and the Building Construction Phase (BCP). LGP reframes box-based layout generation as a point-cloud generation task, utilizing a newly constructed architectural dataset and a Transformer-based diffusion model to create globally consistent layouts. With LLMs, these layouts are extended into rule-based hierarchical designs, seamlessly incorporating component styles and spatial structures. The BCP leverages these layouts to guide PCG, enabling local-customizable, high-quality structured building generation. Experimental results demonstrate BuildingBlock ’s effectiveness in generating diverse and hierarchically structured buildings, achieving state-of-the-art results on multiple benchmarks, and paving the way for scalable and intuitive architectural workflows. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D modeling, 3D models, 3d-modeling, Architecture, benchmarking, Building blockes, Construction, Data-driven model, Generative 3D Modeling, Hierarchical systems, Hybrid approach, Interactive computer graphics, Language Model, Layout generations, Procedural & Data-driven Modeling, Procedural content generations, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Sun, Y.; Cheng, C.; Xu, C.; Lee, C. H.; Asadipour, A.
Hyborg Agency: Fostering AI Agents through Community Conversations in a Digital Forest Journal Article
In: Proceedings of the ACM on Computer Graphics and Interactive Techniques, vol. 8, no. 3, 2025, ISSN: 25776193 (ISSN), (Publisher: Association for Computing Machinery).
Abstract | Links | BibTeX | Tags: 3-D environments, Agents, Artificial intelligence, Communication platforms, Computational ecosystems, Ecosystems, Forestry, Human relationships, Human society, Immersive, Interactive computer graphics, Language evolution, Language Model, Mechanical, Social aspects, Thematic analysis, Virtual Reality
@article{sun_hyborg_2025,
title = {Hyborg Agency: Fostering AI Agents through Community Conversations in a Digital Forest},
author = {Y. Sun and C. Cheng and C. Xu and C. H. Lee and A. Asadipour},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105017974726&doi=10.1145%2F3736778&partnerID=40&md5=4685f589ec5dadb35d73a3ce853d7111},
doi = {10.1145/3736778},
issn = {25776193 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Proceedings of the ACM on Computer Graphics and Interactive Techniques},
volume = {8},
number = {3},
abstract = {This paper presents Hyborg Agency, a computational ecosystem that explores how AI agents can meaningfully coexist with human society through the metaphor of a digital forest. By reimagining AI agents as mechanical deer, mutated from discarded electronics, the project defamiliarizes common perceptions of anthropomorphized AI agents, transforming them into non-human creatures with conversational abilities. The system employs Large Language Models (LLMs) to process community conversations, treating them as nutrients for AI growth, and features a dual-platform structure that connects an immersive 3D environment with Discord, a widely used communication platform. Based on community chats, Hyborgs generate daily summaries of their observations and share them with one another, fostering consistent memories and perceptions of the world. Through thematic analysis of public exhibitions and structured interviews with fourteen experts (seven pairs), we identified three key themes: social expansion in human relationships, language evolution through code-infused communication, and creative engagement through defamiliarized interaction. These findings highlight how AI agents can enrich human social relationships while maintaining transparency about their artificial nature. This work contributes to ongoing discussions on AI participation in society through a speculative scenario: human discourse serves as the foundation for constructing a digital forest that nurtures AI agents. In this virtual forest, humans and AI grow together in a symbiotic relationship, shaping one another and their shared environment, ultimately achieving harmonious coexistence. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Association for Computing Machinery},
keywords = {3-D environments, Agents, Artificial intelligence, Communication platforms, Computational ecosystems, Ecosystems, Forestry, Human relationships, Human society, Immersive, Interactive computer graphics, Language evolution, Language Model, Mechanical, Social aspects, Thematic analysis, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
B, C. E. Pardo; R, O. I. Iglesias; A, M. D. León; M., C. G. Quintero
EverydAI: Virtual Assistant for Decision-Making in Daily Contexts, Powered by Artificial Intelligence Journal Article
In: Systems, vol. 13, no. 9, 2025, ISSN: 20798954 (ISSN), (Publisher: Multidisciplinary Digital Publishing Institute (MDPI)).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Behavioral Research, Decision making, Decisions makings, Digital avatar, Digital avatars, Information overloads, Informed decision, Interactive computer graphics, Language Model, Large language model, large language models, Natural language processing systems, Natural languages, Object Detection, Object recognition, Objects detection, recommendation systems, Recommender systems, Three dimensional computer graphics, Virtual assistants, Virtual Reality, web scraping, Web scrapings
@article{pardo_b_everydai_2025,
title = {EverydAI: Virtual Assistant for Decision-Making in Daily Contexts, Powered by Artificial Intelligence},
author = {C. E. Pardo B and O. I. Iglesias R and M. D. León A and C. G. Quintero M.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105017115803&doi=10.3390%2Fsystems13090753&partnerID=40&md5=475327fffcdc43ee3466b4a65111866a},
doi = {10.3390/systems13090753},
issn = {20798954 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Systems},
volume = {13},
number = {9},
abstract = {In an era of information overload, artificial intelligence plays a pivotal role in supporting everyday decision-making. This paper introduces EverydAI, a virtual AI-powered assistant designed to help users make informed decisions across various daily domains such as cooking, fashion, and fitness. By integrating advanced natural language processing, object detection, augmented reality, contextual understanding, digital 3D avatar models, web scraping, and image generation, EverydAI delivers personalized recommendations and insights tailored to individual needs. The proposed framework addresses challenges related to decision fatigue and information overload by combining real-time object detection and web scraping to enhance the relevance and reliability of its suggestions. EverydAI is evaluated through a two-phase survey, each one involving 30 participants with diverse demographic backgrounds. Results indicate that on average, 92.7% of users agreed or strongly agreed with statements reflecting the system’s usefulness, ease of use, and overall performance, indicating a high level of acceptance and perceived effectiveness. Additionally, EverydAI received an average user satisfaction score of 4.53 out of 5, underscoring its effectiveness in supporting users’ daily routines. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Multidisciplinary Digital Publishing Institute (MDPI)},
keywords = {Artificial intelligence, Augmented Reality, Behavioral Research, Decision making, Decisions makings, Digital avatar, Digital avatars, Information overloads, Informed decision, Interactive computer graphics, Language Model, Large language model, large language models, Natural language processing systems, Natural languages, Object Detection, Object recognition, Objects detection, recommendation systems, Recommender systems, Three dimensional computer graphics, Virtual assistants, Virtual Reality, web scraping, Web scrapings},
pubstate = {published},
tppubtype = {article}
}
Kammari, K. S.; Annambhotla, Y. L.; Khanna, M.
ProWGAN a hybrid generative adversarial network for automated landscape generation in media and video games Journal Article
In: Discover Artificial Intelligence, vol. 5, no. 1, 2025, ISSN: 27310809 (ISSN), (Publisher: Springer Nature).
Abstract | Links | BibTeX | Tags: 'current, 3D modeling, Adversarial networks, Generative AI, Hybrid model, Image production, Interactive computer graphics, Landscape, Landscapes, Motion pictures, Progressive GAN, Video-games, Videogame environment, Videogames environments, Virtual Reality, Wasserstein GAN
@article{kammari_prowgan_2025,
title = {ProWGAN a hybrid generative adversarial network for automated landscape generation in media and video games},
author = {K. S. Kammari and Y. L. Annambhotla and M. Khanna},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105017739103&doi=10.1007%2Fs44163-025-00512-5&partnerID=40&md5=5e48fe6941113d9196abb4308cf5db0f},
doi = {10.1007/s44163-025-00512-5},
issn = {27310809 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Discover Artificial Intelligence},
volume = {5},
number = {1},
abstract = {The current approaches of creating realistic, high-quality landscape imagery mostly depend on labor-intensive manual design procedures. In an effort to simplify image production for video games, virtual reality, and motion pictures, a new hybrid model called ProWGAN, combining ProGAN and WGAN approaches, is employed for automated landscape synthesis. Five models (FCGAN, DCGAN, ProGAN, WGAN, and ProWGAN) were trained on a dataset of landscape images and compared using multiple evaluation metrics. Compared to traditional models, ProWGAN produces 128-128 size images with the best FID score (29.67), IS (5.11), and lowest critic loss (0.2), fully capturing landscape features in just 5 h of training and 50 epochs. The layered method to producing images and progressive learning of ProGAN with the stability of WGAN’s Wasserstein distance showed superior ability to generate realistic landscape images. The results demonstrate how ProWGAN can revolutionize landscape image production by reducing manual work, lowering production time and effort and how a 2d image can be converted into 3d model via MeshRoom. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Springer Nature},
keywords = {'current, 3D modeling, Adversarial networks, Generative AI, Hybrid model, Image production, Interactive computer graphics, Landscape, Landscapes, Motion pictures, Progressive GAN, Video-games, Videogame environment, Videogames environments, Virtual Reality, Wasserstein GAN},
pubstate = {published},
tppubtype = {article}
}
2024
Gottsacker, M.; Bruder, G.; Welch, G. F.
rlty2rlty: Transitioning Between Realities with Generative AI Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1160–1161, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350374490 (ISBN).
Abstract | Links | BibTeX | Tags: Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Interactive computer graphics, Liminal spaces, Mixed / augmented reality, Mixed reality, Real environments, System use, User interfaces, Virtual worlds
@inproceedings{gottsacker_rlty2rlty_2024,
title = {rlty2rlty: Transitioning Between Realities with Generative AI},
author = {M. Gottsacker and G. Bruder and G. F. Welch},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195556960&doi=10.1109%2FVRW62533.2024.00374&partnerID=40&md5=cef1bfa9489c71c9e134cd9dc2326b42},
doi = {10.1109/VRW62533.2024.00374},
isbn = {9798350374490 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1160–1161},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {We present a system for visually transitioning a mixed reality (MR) user between two arbitrary realities (e.g., between two virtual worlds or between the real environment and a virtual world). The system uses artificial intelligence (AI) to generate a 360° video that transforms the user's starting environment to another environment, passing through a liminal space that could help them relax between tasks or prepare them for the ending environment. The video can then be viewed on an MR headset. © 2024 Elsevier B.V., All rights reserved.},
keywords = {Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Interactive computer graphics, Liminal spaces, Mixed / augmented reality, Mixed reality, Real environments, System use, User interfaces, Virtual worlds},
pubstate = {published},
tppubtype = {inproceedings}
}
Nebeling, M.; Oki, M.; Gelsomini, M.; Hayes, G. R.; Billinghurst, M.; Suzuki, K.; Graf, R.
Designing Inclusive Future Augmented Realities Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 9798400703317 (ISBN).
Abstract | Links | BibTeX | Tags: Accessible and inclusive design, Augmented Reality, Augmented reality technology, Display technologies, Generative AI, Inclusive design, Interactive computer graphics, Mixed reality, Mixed reality technologies, Rapid prototyping, Rapid-prototyping, Sensing technology, Spatial computing
@inproceedings{nebeling_designing_2024,
title = {Designing Inclusive Future Augmented Realities},
author = {M. Nebeling and M. Oki and M. Gelsomini and G. R. Hayes and M. Billinghurst and K. Suzuki and R. Graf},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194176929&doi=10.1145%2F3613905.3636313&partnerID=40&md5=298fb08ec3634b0ac98be592366ef03f},
doi = {10.1145/3613905.3636313},
isbn = {9798400703317 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Augmented and mixed reality technology is rapidly advancing, driven by innovations in display, sensing, and AI technologies. This evolution, particularly in the era of generative AI with large language and text-to-image models such as GPT and Stable Diffusion, has the potential, not only to make it easier to create, but also to adapt and personalize, new content. Our workshop explores the pivotal role of augmented and mixed reality to shape a user's interactions with their physical surroundings. We aim to explore how inclusive future augmented realities can be designed, with increasing support for automation, such that environments can welcome users with different needs, emphasizing accessibility and inclusion through layers of augmentations. Our aim is not only to remove barriers by providing accommodations, but also to create a sense of belonging by directly engaging users. Our workshop consists of three main activities: (1) Through brainstorming and discussion of examples provided by the workshop organizers and participants, we critically review the landscape of accessible and inclusive design and their vital role in augmented and mixed reality experiences. (2) Through rapid prototyping activities including bodystorming and low-fidelity, mixed-media prototypes, participants explore how augmented and mixed reality can transform physical space into a more personal place, enhancing accessibility and inclusion based on novel interface and interaction techniques that are desirable, but not necessarily technically feasible just yet. In the workshop, we plan to focus on physical space to facilitate rapid prototyping without technical constraints, but techniques developed in the workshop are likely applicable to immersive virtual environments as well. (3) Finally, we collaborate to outline a research agenda for designing future augmented realities that promote equal opportunities, benefiting diverse user populations. Our workshop inspires innovation in augmented and mixed reality, reshaping physical environments to be more accessible and inclusive through immersive design. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Accessible and inclusive design, Augmented Reality, Augmented reality technology, Display technologies, Generative AI, Inclusive design, Interactive computer graphics, Mixed reality, Mixed reality technologies, Rapid prototyping, Rapid-prototyping, Sensing technology, Spatial computing},
pubstate = {published},
tppubtype = {inproceedings}
}
Torre, F. De La; Fang, C. M.; Huang, H.; Banburski-Fahey, A.; Fernandez, J. A.; Lanier, J.
LLMR: Real-time Prompting of Interactive Worlds using Large Language Models Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 979-840070330-0 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Computational Linguistics, Design goal, Interactive computer graphics, Interactive worlds, Internal dynamics, Language Model, Large language model, Mixed reality, Novel strategies, Real- time, Spatial Reasoning, Training data
@inproceedings{de_la_torre_llmr_2024,
title = {LLMR: Real-time Prompting of Interactive Worlds using Large Language Models},
author = {F. De La Torre and C. M. Fang and H. Huang and A. Banburski-Fahey and J. A. Fernandez and J. Lanier},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194848276&doi=10.1145%2f3613904.3642579&partnerID=40&md5=14969e96507a1f0110262021e5b1172d},
doi = {10.1145/3613904.3642579},
isbn = {979-840070330-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {We present Large Language Model for Mixed Reality (LLMR), a framework for the real-time creation and modification of interactive Mixed Reality experiences using LLMs. LLMR leverages novel strategies to tackle difficult cases where ideal training data is scarce, or where the design goal requires the synthesis of internal dynamics, intuitive analysis, or advanced interactivity. Our framework relies on text interaction and the Unity game engine. By incorporating techniques for scene understanding, task planning, self-debugging, and memory management, LLMR outperforms the standard GPT-4 by 4x in average error rate. We demonstrate LLMR's cross-platform interoperability with several example worlds, and evaluate it on a variety of creation and modification tasks to show that it can produce and edit diverse objects, tools, and scenes. Finally, we conducted a usability study (N=11) with a diverse set that revealed participants had positive experiences with the system and would use it again. © 2024 Copyright held by the owner/author(s)},
keywords = {Artificial intelligence, Computational Linguistics, Design goal, Interactive computer graphics, Interactive worlds, Internal dynamics, Language Model, Large language model, Mixed reality, Novel strategies, Real- time, Spatial Reasoning, Training data},
pubstate = {published},
tppubtype = {inproceedings}
}
He, K.; Yao, K.; Zhang, Q.; Yu, J.; Liu, L.; Xu, L.
DressCode: Autoregressively Sewing and Generating Garments from Text Guidance Journal Article
In: ACM Transactions on Graphics, vol. 43, no. 4, 2024, ISSN: 07300301 (ISSN).
Abstract | Links | BibTeX | Tags: 3D content, 3d garments, autoregressive model, Autoregressive modelling, Content creation, Digital humans, Embeddings, Fashion design, Garment generation, Interactive computer graphics, Sewing pattern, sewing patterns, Textures, Virtual Reality, Virtual Try-On
@article{he_dresscode_2024,
title = {DressCode: Autoregressively Sewing and Generating Garments from Text Guidance},
author = {K. He and K. Yao and Q. Zhang and J. Yu and L. Liu and L. Xu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199257820&doi=10.1145%2f3658147&partnerID=40&md5=8996e62e4d9dabb5a7034f8bf4df5a43},
doi = {10.1145/3658147},
issn = {07300301 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {ACM Transactions on Graphics},
volume = {43},
number = {4},
abstract = {Apparel's significant role in human appearance underscores the importance of garment digitalization for digital human creation. Recent advances in 3D content creation are pivotal for digital human creation. Nonetheless, garment generation from text guidance is still nascent. We introduce a text-driven 3D garment generation framework, DressCode, which aims to democratize design for novices and offer immense potential in fashion design, virtual try-on, and digital human creation. We first introduce SewingGPT, a GPT-based architecture integrating cross-attention with text-conditioned embedding to generate sewing patterns with text guidance. We then tailor a pre-trained Stable Diffusion to generate tile-based Physically-based Rendering (PBR) textures for the garments. By leveraging a large language model, our framework generates CG-friendly garments through natural language interaction. It also facilitates pattern completion and texture editing, streamlining the design process through user-friendly interaction. This framework fosters innovation by allowing creators to freely experiment with designs and incorporate unique elements into their work. With comprehensive evaluations and comparisons with other state-of-the-art methods, our method showcases superior quality and alignment with input prompts. User studies further validate our high-quality rendering results, highlighting its practical utility and potential in production settings. Copyright © 2024 held by the owner/author(s).},
keywords = {3D content, 3d garments, autoregressive model, Autoregressive modelling, Content creation, Digital humans, Embeddings, Fashion design, Garment generation, Interactive computer graphics, Sewing pattern, sewing patterns, Textures, Virtual Reality, Virtual Try-On},
pubstate = {published},
tppubtype = {article}
}
He, K.; Lapham, A.; Li, Z.
Enhancing Narratives with SayMotion's text-to-3D animation and LLMs Proceedings Article
In: Spencer, S. N. (Ed.): Proc. - SIGGRAPH Real-Time Live!, Association for Computing Machinery, Inc, 2024, ISBN: 9798400705267 (ISBN).
Abstract | Links | BibTeX | Tags: 3D animation, AI-based animation, Animation, Animation editing, Deep learning, Film production, Human motions, Interactive computer graphics, Interactive media, Language Model, Motion models, Physics simulation, Production medium, Simulation platform, Three dimensional computer graphics
@inproceedings{he_enhancing_2024,
title = {Enhancing Narratives with SayMotion's text-to-3D animation and LLMs},
author = {K. He and A. Lapham and Z. Li},
editor = {S. N. Spencer},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85200655076&doi=10.1145%2F3641520.3665309&partnerID=40&md5=16af33ce451919f43d1ba2ccab63f1af},
doi = {10.1145/3641520.3665309},
isbn = {9798400705267 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - SIGGRAPH Real-Time Live!},
publisher = {Association for Computing Machinery, Inc},
abstract = {SayMotion, a generative AI text-to-3D animation platform, utilizes deep generative learning and advanced physics simulation to transform text descriptions into realistic 3D human motions for applications in gaming, extended reality (XR), film production, education and interactive media. SayMotion addresses challenges due to the complexities of animation creation by employing a Large Language Model (LLM) fine-tuned to human motion with further AI-based animation editing components including spatial-temporal Inpainting via a proprietary Large Motion Model (LMM). SayMotion is a pioneer in the animation market by offering a comprehensive set of AI generation and AI editing functions for creating 3D animations efficiently and intuitively. With an LMM at its core, SayMotion aims to democratize 3D animations for everyone through language and generative motion. © 2024 Elsevier B.V., All rights reserved.},
keywords = {3D animation, AI-based animation, Animation, Animation editing, Deep learning, Film production, Human motions, Interactive computer graphics, Interactive media, Language Model, Motion models, Physics simulation, Production medium, Simulation platform, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Leong, C. W.; Jawahar, N.; Basheerabad, V.; Wortwein, T.; Emerson, A.; Sivan, G.
Combining Generative and Discriminative AI for High-Stakes Interview Practice Proceedings Article
In: ACM Int. Conf. Proc. Ser., pp. 94–96, Association for Computing Machinery, 2024, ISBN: 9798400704635 (ISBN).
Abstract | Links | BibTeX | Tags: AI systems, College admissions, Continuous improvements, End to end, Interactive computer graphics, Interactive dialog system, interactive dialogue systems, Language Model, Modeling languages, Multi-modal, Multimodal computing, Video interview, video interviews, Virtual avatar, Virtual environments, Virtual Reality
@inproceedings{leong_combining_2024,
title = {Combining Generative and Discriminative AI for High-Stakes Interview Practice},
author = {C. W. Leong and N. Jawahar and V. Basheerabad and T. Wortwein and A. Emerson and G. Sivan},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85211135262&doi=10.1145%2F3686215.3688377&partnerID=40&md5=6d4d229efe1cf8fee7ef701ff758bc87},
doi = {10.1145/3686215.3688377},
isbn = {9798400704635 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {ACM Int. Conf. Proc. Ser.},
pages = {94–96},
publisher = {Association for Computing Machinery},
abstract = {We present a demo comprising an end-to-end AI pipeline for practicing video interviews for a high-stakes scenarios (i.e., college admissions) with personalized, actionable feedback for continuous improvement of the user. This system provides personalized, actionable feedback for continuous user improvement. Utilizing large language models (LLMs), we generate questions and responses for a virtual avatar interviewer. Our focus on key qualities - such as concise responses with low latency, empathy, and smooth topic navigation - led to a comparative evaluation of several prominent LLMs, each undergoing evolutionary development. We also discuss the integration of avatar technology to create an immersive, virtual environment for naturalistic dyadic conversations. © 2024 Elsevier B.V., All rights reserved.},
keywords = {AI systems, College admissions, Continuous improvements, End to end, Interactive computer graphics, Interactive dialog system, interactive dialogue systems, Language Model, Modeling languages, Multi-modal, Multimodal computing, Video interview, video interviews, Virtual avatar, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}