AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Cao, X.; Ju, K. P.; Li, C.; Jain, D.
SceneGenA11y: How can Runtime Generative tools improve the Accessibility of a Virtual 3D Scene? Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071395-8 (ISBN).
Abstract | Links | BibTeX | Tags: 3D application, 3D modeling, 3D scenes, Accessibility, BLV, DHH, Discrete event simulation, Generative AI, Generative tools, Interactive computer graphics, One dimensional, Runtimes, Three dimensional computer graphics, Video-games, Virtual 3d scene, virtual 3D scenes, Virtual environments, Virtual Reality
@inproceedings{cao_scenegena11y_2025,
title = {SceneGenA11y: How can Runtime Generative tools improve the Accessibility of a Virtual 3D Scene?},
author = {X. Cao and K. P. Ju and C. Li and D. Jain},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005772656&doi=10.1145%2f3706599.3720265&partnerID=40&md5=9b0bf29c3e89b70efa2d6a3e740829fb},
doi = {10.1145/3706599.3720265},
isbn = {979-840071395-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {With the popularity of virtual 3D applications, from video games to educational content and virtual reality scenarios, the accessibility of 3D scene information is vital to ensure inclusive and equitable experiences for all. Previous work include information substitutions like audio description and captions, as well as personalized modifications, but they could only provide predefined accommodations. In this work, we propose SceneGenA11y, a system that responds to the user’s natural language prompts to improve accessibility of a 3D virtual scene in runtime. The system primes LLM agents with accessibility-related knowledge, allowing users to explore the scene and perform verifiable modifications to improve accessibility. We conducted a preliminary evaluation of our system with three blind and low-vision people and three deaf and hard-of-hearing people. The results show that our system is intuitive to use and can successfully improve accessibility. We discussed usage patterns of the system, potential improvements, and integration into apps. We ended with highlighting plans for future work. © 2025 Copyright held by the owner/author(s).},
keywords = {3D application, 3D modeling, 3D scenes, Accessibility, BLV, DHH, Discrete event simulation, Generative AI, Generative tools, Interactive computer graphics, One dimensional, Runtimes, Three dimensional computer graphics, Video-games, Virtual 3d scene, virtual 3D scenes, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Vachha, C.; Kang, Y.; Dive, Z.; Chidambaram, A.; Gupta, A.; Jun, E.; Hartmann, B.
Dreamcrafter: Immersive Editing of 3D Radiance Fields Through Flexible, Generative Inputs and Outputs Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071394-1 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D scenes, AI assisted creativity tool, Animation, Computer vision, Direct manipulation, Drawing (graphics), Gaussian Splatting, Gaussians, Generative AI, Graphic, Graphics, High level languages, Immersive, Interactive computer graphics, Splatting, Three dimensional computer graphics, Virtual Reality, Worldbuilding interface
@inproceedings{vachha_dreamcrafter_2025,
title = {Dreamcrafter: Immersive Editing of 3D Radiance Fields Through Flexible, Generative Inputs and Outputs},
author = {C. Vachha and Y. Kang and Z. Dive and A. Chidambaram and A. Gupta and E. Jun and B. Hartmann},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005725679&doi=10.1145%2f3706598.3714312&partnerID=40&md5=68cf2a08d3057fd9756e25d53959872b},
doi = {10.1145/3706598.3714312},
isbn = {979-840071394-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Authoring 3D scenes is a central task for spatial computing applications. Competing visions for lowering existing barriers are (1) focus on immersive, direct manipulation of 3D content or (2) leverage AI techniques that capture real scenes (3D Radiance Fields such as, NeRFs, 3D Gaussian Splatting) and modify them at a higher level of abstraction, at the cost of high latency. We unify the complementary strengths of these approaches and investigate how to integrate generative AI advances into real-time, immersive 3D Radiance Field editing. We introduce Dreamcrafter, a VR-based 3D scene editing system that: (1) provides a modular architecture to integrate generative AI algorithms; (2) combines different levels of control for creating objects, including natural language and direct manipulation; and (3) introduces proxy representations that support interaction during high-latency operations. We contribute empirical findings on control preferences and discuss how generative AI interfaces beyond text input enhance creativity in scene editing and world building. © 2025 Copyright held by the owner/author(s).},
keywords = {3D modeling, 3D scenes, AI assisted creativity tool, Animation, Computer vision, Direct manipulation, Drawing (graphics), Gaussian Splatting, Gaussians, Generative AI, Graphic, Graphics, High level languages, Immersive, Interactive computer graphics, Splatting, Three dimensional computer graphics, Virtual Reality, Worldbuilding interface},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, J.; Grubert, J.; Kristensson, P. O.
Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 206–216, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833153645-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality
@inproceedings{chen_analyzing_2025,
title = {Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes},
author = {J. Chen and J. Grubert and P. O. Kristensson},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002716635&doi=10.1109%2fVR59515.2025.00045&partnerID=40&md5=306aa7fbb3dad0aa9d43545f3c7eb9ea},
doi = {10.1109/VR59515.2025.00045},
isbn = {979-833153645-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {206–216},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {As more applications of large language models (LLMs) for 3D content in immersive environments emerge, it is crucial to study user behavior to identify interaction patterns and potential barriers to guide the future design of immersive content creation and editing systems which involve LLMs. In an empirical user study with 12 participants, we combine quantitative usage data with post-experience questionnaire feedback to reveal common interaction patterns and key barriers in LLM-assisted 3D scene editing systems. We identify opportunities for improving natural language interfaces in 3D design tools and propose design recommendations. Through an empirical study, we demonstrate that LLM-assisted interactive systems can be used productively in immersive environments. © 2025 IEEE.},
keywords = {3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Jiang, H.; Song, L.; Weng, D.; Sun, Z.; Li, H.; Dongye, X.; Zhang, Z.
In Situ 3D Scene Synthesis for Ubiquitous Embodied Interfaces Proceedings Article
In: MM - Proc. ACM Int. Conf. Multimed., pp. 3666–3675, Association for Computing Machinery, Inc, 2024, ISBN: 979-840070686-8 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D scenes, affordance, Affordances, Chatbots, Computer simulation languages, Digital elevation model, Embodied interfaces, Language Model, Large language model, Physical environments, Scene synthesis, Synthesised, Three dimensional computer graphics, user demand, User demands, Virtual environments, Virtual Reality, Virtual scenes
@inproceedings{jiang_situ_2024,
title = {In Situ 3D Scene Synthesis for Ubiquitous Embodied Interfaces},
author = {H. Jiang and L. Song and D. Weng and Z. Sun and H. Li and X. Dongye and Z. Zhang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85209812307&doi=10.1145%2f3664647.3681616&partnerID=40&md5=e58acd404c8785868c69a4647cecacb2},
doi = {10.1145/3664647.3681616},
isbn = {979-840070686-8 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {MM - Proc. ACM Int. Conf. Multimed.},
pages = {3666–3675},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual reality enables us to access and interact with immersive virtual environments anytime and anywhere in various fields such as entertainment, training, and education. However, users immersed in virtual scenes remain physically connected to their real-world surroundings, which can pose safety and immersion challenges. Although virtual scene synthesis has attracted widespread attention, many popular methods are limited to generating purely virtual scenes independent of physical environments or simply mapping physical objects as obstacles. To this end, we propose a scene agent that synthesizes situated 3D virtual scenes as a kind of ubiquitous embodied interface in VR for users. The scene agent synthesizes scenes by perceiving the user's physical environment as well as inferring the user's demands. The synthesized scenes maintain the affordances of the physical environment, enabling immersive users to interact with the physical environment and improving the user's sense of security. Meanwhile, the synthesized scenes maintain the style described by the user, improving the user's immersion. The comparison results show that the proposed scene agent can synthesize virtual scenes with better affordance maintenance, scene diversity, style maintenance, and 3D intersection over union compared to baselines. To the best of our knowledge, this is the first work that achieves in situ scene synthesis with virtual-real affordance consistency and user demand. © 2024 ACM.},
keywords = {3D modeling, 3D scenes, affordance, Affordances, Chatbots, Computer simulation languages, Digital elevation model, Embodied interfaces, Language Model, Large language model, Physical environments, Scene synthesis, Synthesised, Three dimensional computer graphics, user demand, User demands, Virtual environments, Virtual Reality, Virtual scenes},
pubstate = {published},
tppubtype = {inproceedings}
}
Numan, N.; Rajaram, S.; Kumaravel, B. T.; Marquardt, N.; Wilson, A. D.
SpaceBlender: Creating Context-Rich Collaborative Spaces Through Generative 3D Scene Blending Proceedings Article
In: UIST - Proc. Annual ACM Symp. User Interface Softw. Technol., Association for Computing Machinery, Inc, 2024, ISBN: 979-840070628-8 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D scenes, 3D spaces, AI techniques, Artificial environments, Collaborative spaces, Collaborative tasks, Generative adversarial networks, Generative AI, Telepresence, Virtual environments, Virtual Reality, Virtual reality telepresence, Virtual spaces, VR telepresence
@inproceedings{numan_spaceblender_2024,
title = {SpaceBlender: Creating Context-Rich Collaborative Spaces Through Generative 3D Scene Blending},
author = {N. Numan and S. Rajaram and B. T. Kumaravel and N. Marquardt and A. D. Wilson},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85209252034&doi=10.1145%2f3654777.3676361&partnerID=40&md5=8744057832f9098eabfd16c8b2b5fe62},
doi = {10.1145/3654777.3676361},
isbn = {979-840070628-8 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {UIST - Proc. Annual ACM Symp. User Interface Softw. Technol.},
publisher = {Association for Computing Machinery, Inc},
abstract = {There is increased interest in using generative AI to create 3D spaces for Virtual Reality (VR) applications. However, today's models produce artificial environments, falling short of supporting collaborative tasks that benefit from incorporating the user's physical context. To generate environments that support VR telepresence, we introduce SpaceBlender, a novel pipeline that utilizes generative AI techniques to blend users' physical surroundings into unified virtual spaces. This pipeline transforms user-provided 2D images into context-rich 3D environments through an iterative process consisting of depth estimation, mesh alignment, and diffusion-based space completion guided by geometric priors and adaptive text prompts. In a preliminary within-subjects study, where 20 participants performed a collaborative VR affinity diagramming task in pairs, we compared SpaceBlender with a generic virtual environment and a state-of-the-art scene generation framework, evaluating its ability to create virtual spaces suitable for collaboration. Participants appreciated the enhanced familiarity and context provided by SpaceBlender but also noted complexities in the generative environments that could detract from task focus. Drawing on participant feedback, we propose directions for improving the pipeline and discuss the value and design of blended spaces for different scenarios. © 2024 ACM.},
keywords = {3D modeling, 3D scenes, 3D spaces, AI techniques, Artificial environments, Collaborative spaces, Collaborative tasks, Generative adversarial networks, Generative AI, Telepresence, Virtual environments, Virtual Reality, Virtual reality telepresence, Virtual spaces, VR telepresence},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhang, L.; Pan, J.; Gettig, J.; Oney, S.; Guo, A.
VRCopilot: Authoring 3D Layouts with Generative AI Models in VR Proceedings Article
In: UIST - Proc. Annual ACM Symp. User Interface Softw. Technol., Association for Computing Machinery, Inc, 2024, ISBN: 979-840070628-8 (ISBN).
Abstract | Links | BibTeX | Tags: 3D layouts, 3D modeling, 3D scenes, Automatic creations, Co-creation, Direct manipulation, Fluid interactions, Generative adversarial networks, Generative AI, Human-AI Co-creation, Immersive authoring, Scaffolds, Three dimensional computer graphics, User agencies, Virtual environments, Virtual Reality
@inproceedings{zhang_vrcopilot_2024,
title = {VRCopilot: Authoring 3D Layouts with Generative AI Models in VR},
author = {L. Zhang and J. Pan and J. Gettig and S. Oney and A. Guo},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85215072893&doi=10.1145%2f3654777.3676451&partnerID=40&md5=3f0845d0dd85ef93b97750f4a7d8b44e},
doi = {10.1145/3654777.3676451},
isbn = {979-840070628-8 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {UIST - Proc. Annual ACM Symp. User Interface Softw. Technol.},
publisher = {Association for Computing Machinery, Inc},
abstract = {Immersive authoring provides an intuitive medium for users to create 3D scenes via direct manipulation in Virtual Reality (VR). Recent advances in generative AI have enabled the automatic creation of realistic 3D layouts. However, it is unclear how capabilities of generative AI can be used in immersive authoring to support fluid interactions, user agency, and creativity. We introduce VRCopilot, a mixed-initiative system that integrates pre-trained generative AI models into immersive authoring to facilitate human-AI co-creation in VR. VRCopilot presents multimodal interactions to support rapid prototyping and iterations with AI, and intermediate representations such as wireframes to augment user controllability over the created content. Through a series of user studies, we evaluated the potential and challenges in manual, scaffolded, and automatic creation in immersive authoring. We found that scaffolded creation using wireframes enhanced the user agency compared to automatic creation. We also found that manual creation via multimodal specification offers the highest sense of creativity and agency. © 2024 ACM.},
keywords = {3D layouts, 3D modeling, 3D scenes, Automatic creations, Co-creation, Direct manipulation, Fluid interactions, Generative adversarial networks, Generative AI, Human-AI Co-creation, Immersive authoring, Scaffolds, Three dimensional computer graphics, User agencies, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
de Oliveira, E. A. Masasi; Silva, D. F. C.; Filho, A. R. G.
Improving VR Accessibility Through Automatic 360 Scene Description Using Multimodal Large Language Models Proceedings Article
In: ACM Int. Conf. Proc. Ser., pp. 289–293, Association for Computing Machinery, 2024, ISBN: 979-840070979-1 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Scene, 3D scenes, Accessibility, Computer simulation languages, Descriptive information, Digital elevation model, Immersive, Language Model, Multi-modal, Multimodal large language model, Multimodal Large Language Models (MLLMs), Scene description, Virtual environments, Virtual Reality, Virtual Reality (VR), Virtual reality technology
@inproceedings{masasi_de_oliveira_improving_2024,
title = {Improving VR Accessibility Through Automatic 360 Scene Description Using Multimodal Large Language Models},
author = {E. A. Masasi de Oliveira and D. F. C. Silva and A. R. G. Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85206580797&doi=10.1145%2f3691573.3691619&partnerID=40&md5=6e80800fce0e6b56679fbcbe982bcfa7},
doi = {10.1145/3691573.3691619},
isbn = {979-840070979-1 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {ACM Int. Conf. Proc. Ser.},
pages = {289–293},
publisher = {Association for Computing Machinery},
abstract = {Advancements in Virtual Reality (VR) technology hold immense promise for enriching immersive experiences. Despite the advancements in VR technology, there remains a significant gap in addressing accessibility concerns, particularly in automatically providing descriptive information for VR scenes. This paper combines the potential of leveraging Multimodal Large Language Models (MLLMs) to automatically generate text descriptions for 360 VR scenes according to Speech-to-Text (STT) prompts. As a case study, we conduct experiments on educational settings in VR museums, improving dynamic experiences across various contexts. Despite minor challenges in adapting MLLMs to VR Scenes, the experiments demonstrate that they can generate descriptions with high quality. Our findings provide insights for enhancing VR experiences and ensuring accessibility to individuals with disabilities or diverse needs. © 2024 Copyright held by the owner/author(s).},
keywords = {3D Scene, 3D scenes, Accessibility, Computer simulation languages, Descriptive information, Digital elevation model, Immersive, Language Model, Multi-modal, Multimodal large language model, Multimodal Large Language Models (MLLMs), Scene description, Virtual environments, Virtual Reality, Virtual Reality (VR), Virtual reality technology},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Kouzelis, L. R.; Spantidi, O.
Synthesizing Play-Ready VR Scenes with Natural Language Prompts Through GPT API Proceedings Article
In: G., Bebis; G., Ghiasi; Y., Fang; A., Sharf; Y., Dong; C., Weaver; Z., Leo; J.J., LaViola Jr.; L., Kohli (Ed.): Lect. Notes Comput. Sci., pp. 15–26, Springer Science and Business Media Deutschland GmbH, 2023, ISBN: 03029743 (ISSN); 978-303147965-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3-d designs, 3D object, 3D scenes, AI-driven 3D Design, Language Model, Natural languages, Novel methodology, Scene Generation, Three dimensional computer graphics, Unity3d, Virtual Reality, Visual computing
@inproceedings{kouzelis_synthesizing_2023,
title = {Synthesizing Play-Ready VR Scenes with Natural Language Prompts Through GPT API},
author = {L. R. Kouzelis and O. Spantidi},
editor = {Bebis G. and Ghiasi G. and Fang Y. and Sharf A. and Dong Y. and Weaver C. and Leo Z. and LaViola Jr. J.J. and Kohli L.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85180626887&doi=10.1007%2f978-3-031-47966-3_2&partnerID=40&md5=d15c3e2f3260e2a68bdca91c29df7bbb},
doi = {10.1007/978-3-031-47966-3_2},
isbn = {03029743 (ISSN); 978-303147965-6 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {14362},
pages = {15–26},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {In visual computing, 3D scene generation stands as a crucial component, offering applications in various fields such as gaming, virtual reality (VR), and architectural visualization. Creating realistic and versatile virtual environments, however, poses significant challenges. This work presents a novel methodology that leverages the capabilities of a widely adopted large language model (LLM) to address these challenges. Our approach utilizes the GPT API to interpret natural language prompts and generate detailed, VR-ready scenes within Unity3D. Our work is also inherently scalable, since the model accepts any database of 3D objects with minimal prior configuration. The effectiveness of the proposed system is demonstrated through a series of case studies, revealing its potential to generate diverse and functional virtual spaces. © 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.},
keywords = {3-d designs, 3D object, 3D scenes, AI-driven 3D Design, Language Model, Natural languages, Novel methodology, Scene Generation, Three dimensional computer graphics, Unity3d, Virtual Reality, Visual computing},
pubstate = {published},
tppubtype = {inproceedings}
}