AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Zeng, S. -Y.; Liang, T. -Y.
PartConverter: A Part-Oriented Transformation Framework for Point Clouds Journal Article
In: IET Image Processing, vol. 19, no. 1, 2025, ISSN: 17519659 (ISSN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3d-modeling, Adversarial networks, attention mechanism, Attention mechanisms, Auto encoders, Cloud transformations, Generative Adversarial Network, Part assembler, Part-oriented, Point cloud transformation, Point-clouds
@article{zeng_partconverter_2025,
title = {PartConverter: A Part-Oriented Transformation Framework for Point Clouds},
author = {S. -Y. Zeng and T. -Y. Liang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005775417&doi=10.1049%2fipr2.70104&partnerID=40&md5=1ee3178fd6b4a03bc7e299e1292e9694},
doi = {10.1049/ipr2.70104},
issn = {17519659 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IET Image Processing},
volume = {19},
number = {1},
abstract = {With generative AI technologies advancing rapidly, the capabilities for 3D model generation and transformation are expanding across industries like manufacturing, healthcare, and virtual reality. However, existing methods based on generative adversarial networks (GANs), autoencoders, or transformers still have notable limitations. They primarily generate entire objects without providing flexibility for independent part transformation or precise control over model components. These constraints pose challenges for applications requiring complex object manipulation and fine-grained adjustments. To overcome these limitations, we propose PartConverter, a novel part-oriented point cloud transformation framework emphasizing flexibility and precision in 3D model transformations. PartConverter leverages attention mechanisms and autoencoders to capture crucial details within each part while modeling the relationships between components, thereby enabling highly customizable, part-wise transformations that maintain overall consistency. Additionally, our part assembler ensures that transformed parts align coherently, resulting in a consistent and realistic final 3D shape. This framework significantly enhances control over detailed part modeling, increasing the flexibility and efficiency of 3D model transformation workflows. © 2025 The Author(s). IET Image Processing published by John Wiley & Sons Ltd on behalf of The Institution of Engineering and Technology.},
keywords = {3D modeling, 3D models, 3d-modeling, Adversarial networks, attention mechanism, Attention mechanisms, Auto encoders, Cloud transformations, Generative Adversarial Network, Part assembler, Part-oriented, Point cloud transformation, Point-clouds},
pubstate = {published},
tppubtype = {article}
}
Kurai, R.; Hiraki, T.; Hiroi, Y.; Hirao, Y.; Perusquia-Hernandez, M.; Uchiyama, H.; Kiyokawa, K.
An implementation of MagicCraft: Generating Interactive 3D Objects and Their Behaviors from Text for Commercial Metaverse Platforms Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1284–1285, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3D object, 3D Object Generation, 3d-modeling, AI-Assisted Design, Generative AI, Immersive, Metaverse, Metaverses, Model skill, Object oriented programming, Programming skills
@inproceedings{kurai_implementation_2025,
title = {An implementation of MagicCraft: Generating Interactive 3D Objects and Their Behaviors from Text for Commercial Metaverse Platforms},
author = {R. Kurai and T. Hiraki and Y. Hiroi and Y. Hirao and M. Perusquia-Hernandez and H. Uchiyama and K. Kiyokawa},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005153642&doi=10.1109%2fVRW66409.2025.00288&partnerID=40&md5=53fa1ac92c3210f0ffa090ffa1af7e6e},
doi = {10.1109/VRW66409.2025.00288},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1284–1285},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Metaverse platforms are rapidly evolving to provide immersive spaces. However, the generation of dynamic and interactive 3D objects remains a challenge due to the need for advanced 3D modeling and programming skills. We present MagicCraft, a system that generates functional 3D objects from natural language prompts. MagicCraft uses generative AI models to manage the entire content creation pipeline: converting user text descriptions into images, transforming images into 3D models, predicting object behavior, and assigning necessary attributes and scripts. It also provides an interactive interface for users to refine generated objects by adjusting features like orientation, scale, seating positions, and grip points. © 2025 IEEE.},
keywords = {3D modeling, 3D models, 3D object, 3D Object Generation, 3d-modeling, AI-Assisted Design, Generative AI, Immersive, Metaverse, Metaverses, Model skill, Object oriented programming, Programming skills},
pubstate = {published},
tppubtype = {inproceedings}
}
Dong, Y.
Enhancing Painting Exhibition Experiences with the Application of Augmented Reality-Based AI Video Generation Technology Proceedings Article
In: P., Zaphiris; A., Ioannou; A., Ioannou; R.A., Sottilare; J., Schwarz; M., Rauterberg (Ed.): Lect. Notes Comput. Sci., pp. 256–262, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303176814-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, AI-generated art, Art and Technology, Arts computing, Augmented Reality, Augmented reality technology, Digital Exhibition Design, Dynamic content, E-Learning, Education computing, Generation technologies, Interactive computer graphics, Knowledge Management, Multi dimensional, Planning designs, Three dimensional computer graphics, Video contents, Video generation
@inproceedings{dong_enhancing_2025,
title = {Enhancing Painting Exhibition Experiences with the Application of Augmented Reality-Based AI Video Generation Technology},
author = {Y. Dong},
editor = {Zaphiris P. and Ioannou A. and Ioannou A. and Sottilare R.A. and Schwarz J. and Rauterberg M.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85213302959&doi=10.1007%2f978-3-031-76815-6_18&partnerID=40&md5=35484f5ed199a831f1a30f265a0d32d5},
doi = {10.1007/978-3-031-76815-6_18},
isbn = {03029743 (ISSN); 978-303176814-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15378 LNCS},
pages = {256–262},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Traditional painting exhibitions often rely on flat presentation methods, such as walls and stands, limiting their impact. Augmented Reality (AR) technology presents an opportunity to transform these experiences by turning static, flat artwork into dynamic, multi-dimensional presentations. However, creating and integrating video or dynamic content can be time-consuming and challenging, requiring meticulous planning, design, and production. In the context of urban renewal and community revitalization, particularly in China’s first-tier cities where real estate development has saturated the market, there is a growing trend to repurpose traditional commercial and office spaces with cultural and artistic exhibitions. These exhibitions not only enhance the spatial quality but also elevate the user experience, making the spaces more competitive. However, these non-traditional exhibition venues often lack the amenities of professional galleries, relying on walls, windows, and corners for displays, and requiring quick setup times. For visitors, who are often office workers or shoppers with limited time, the use of personal mobile devices for interaction is common. WeChat, China’s most widely used mobile application, provides a platform for convenient digital interactive experiences through mini-programs, which can support lightweight AR applications. AI video generation technologies, such as Conditional Generative Adversarial Networks (ControlNet) and Latent Consistency Models (LCM), have seen significant advancements. These technologies now allow for the creation of 3D models and video content from text and images. Tools like Meshy and Pika provide the ability to generate various video styles and offer precise control over video content. New AI video applications like Stable Video further expand the possibilities by rapidly converting static images into dynamic videos, facilitating easy adjustments and edits. This paper explores the application of AR-based AI video generation technology in enhancing the experience of painting exhibitions. By integrating these technologies, traditional paintings can be transformed into interactive, engaging displays that enrich the viewer’s experience. The study demonstrates the potential of these innovations to make art exhibitions more appealing and competitive in various public spaces, thereby improving both artistic expression and audience engagement. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {3D modeling, AI-generated art, Art and Technology, Arts computing, Augmented Reality, Augmented reality technology, Digital Exhibition Design, Dynamic content, E-Learning, Education computing, Generation technologies, Interactive computer graphics, Knowledge Management, Multi dimensional, Planning designs, Three dimensional computer graphics, Video contents, Video generation},
pubstate = {published},
tppubtype = {inproceedings}
}
Shen, Y.; Li, B.; Huang, J.; Wang, Z.
GaussianShopVR: Facilitating Immersive 3D Authoring Using Gaussian Splatting in VR Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1292–1293, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D authoring, 3D modeling, Digital replicas, Gaussian distribution, Gaussian Splatting editing, Gaussians, Graphical user interfaces, High quality, Immersive, Immersive environment, Interactive computer graphics, Rendering (computer graphics), Rendering pipelines, Splatting, Three dimensional computer graphics, User profile, Virtual Reality, Virtual reality user interface, Virtualization, VR user interface
@inproceedings{shen_gaussianshopvr_2025,
title = {GaussianShopVR: Facilitating Immersive 3D Authoring Using Gaussian Splatting in VR},
author = {Y. Shen and B. Li and J. Huang and Z. Wang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005138672&doi=10.1109%2fVRW66409.2025.00292&partnerID=40&md5=9b644bd19394a289d3027ab9a2dfed6a},
doi = {10.1109/VRW66409.2025.00292},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1292–1293},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual reality (VR) applications require massive high-quality 3D assets to create immersive environments. Generating mesh-based 3D assets typically involves a significant amount of manpower and effort, which makes VR applications less accessible. 3D Gaussian Splatting (3DGS) has attracted much attention for its ability to quickly create digital replicas of real-life scenes and its compatibility with traditional rendering pipelines. However, it remains a challenge to edit 3DGS in a flexible and controllable manner. We propose GaussianShopVR, a system that leverages VR user interfaces to specify target areas to achieve flexible and controllable editing of reconstructed 3DGS. In addition, selected areas can provide 3D information to generative AI models to facilitate the editing. GaussianShopVR integrates object hierarchy management while keeping the backpropagated gradient flow to allow local editing with context information. © 2025 IEEE.},
keywords = {3D authoring, 3D modeling, Digital replicas, Gaussian distribution, Gaussian Splatting editing, Gaussians, Graphical user interfaces, High quality, Immersive, Immersive environment, Interactive computer graphics, Rendering (computer graphics), Rendering pipelines, Splatting, Three dimensional computer graphics, User profile, Virtual Reality, Virtual reality user interface, Virtualization, VR user interface},
pubstate = {published},
tppubtype = {inproceedings}
}
Rasch, J.; Töws, J.; Hirzle, T.; Müller, F.; Schmitz, M.
CreepyCoCreator? Investigating AI Representation Modes for 3D Object Co-Creation in Virtual Reality Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071394-1 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Creation, 3D modeling, 3D object, Building process, Co-creation, Co-creative system, Co-creative systems, Creative systems, Creatives, Generative AI, Three dimensional computer graphics, User expectations, User Studies, User study, Virtual Reality, Virtualization
@inproceedings{rasch_creepycocreator_2025,
title = {CreepyCoCreator? Investigating AI Representation Modes for 3D Object Co-Creation in Virtual Reality},
author = {J. Rasch and J. Töws and T. Hirzle and F. Müller and M. Schmitz},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005742763&doi=10.1145%2f3706598.3713720&partnerID=40&md5=e6cdcb6cc7249a8836ecc39ae103cd53},
doi = {10.1145/3706598.3713720},
isbn = {979-840071394-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Generative AI in Virtual Reality offers the potential for collaborative object-building, yet challenges remain in aligning AI contributions with user expectations. In particular, users often struggle to understand and collaborate with AI when its actions are not transparently represented. This paper thus explores the co-creative object-building process through a Wizard-of-Oz study, focusing on how AI can effectively convey its intent to users during object customization in Virtual Reality. Inspired by human-to-human collaboration, we focus on three representation modes: the presence of an embodied avatar, whether the AI's contributions are visualized immediately or incrementally, and whether the areas modified are highlighted in advance. The findings provide insights into how these factors affect user perception and interaction with object-generating AI tools in Virtual Reality as well as satisfaction and ownership of the created objects. The results offer design implications for co-creative world-building systems, aiming to foster more effective and satisfying collaborations between humans and AI in Virtual Reality. © 2025 Copyright held by the owner/author(s).},
keywords = {3D Creation, 3D modeling, 3D object, Building process, Co-creation, Co-creative system, Co-creative systems, Creative systems, Creatives, Generative AI, Three dimensional computer graphics, User expectations, User Studies, User study, Virtual Reality, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Cao, X.; Ju, K. P.; Li, C.; Jain, D.
SceneGenA11y: How can Runtime Generative tools improve the Accessibility of a Virtual 3D Scene? Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071395-8 (ISBN).
Abstract | Links | BibTeX | Tags: 3D application, 3D modeling, 3D scenes, Accessibility, BLV, DHH, Discrete event simulation, Generative AI, Generative tools, Interactive computer graphics, One dimensional, Runtimes, Three dimensional computer graphics, Video-games, Virtual 3d scene, virtual 3D scenes, Virtual environments, Virtual Reality
@inproceedings{cao_scenegena11y_2025,
title = {SceneGenA11y: How can Runtime Generative tools improve the Accessibility of a Virtual 3D Scene?},
author = {X. Cao and K. P. Ju and C. Li and D. Jain},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005772656&doi=10.1145%2f3706599.3720265&partnerID=40&md5=9b0bf29c3e89b70efa2d6a3e740829fb},
doi = {10.1145/3706599.3720265},
isbn = {979-840071395-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {With the popularity of virtual 3D applications, from video games to educational content and virtual reality scenarios, the accessibility of 3D scene information is vital to ensure inclusive and equitable experiences for all. Previous work include information substitutions like audio description and captions, as well as personalized modifications, but they could only provide predefined accommodations. In this work, we propose SceneGenA11y, a system that responds to the user’s natural language prompts to improve accessibility of a 3D virtual scene in runtime. The system primes LLM agents with accessibility-related knowledge, allowing users to explore the scene and perform verifiable modifications to improve accessibility. We conducted a preliminary evaluation of our system with three blind and low-vision people and three deaf and hard-of-hearing people. The results show that our system is intuitive to use and can successfully improve accessibility. We discussed usage patterns of the system, potential improvements, and integration into apps. We ended with highlighting plans for future work. © 2025 Copyright held by the owner/author(s).},
keywords = {3D application, 3D modeling, 3D scenes, Accessibility, BLV, DHH, Discrete event simulation, Generative AI, Generative tools, Interactive computer graphics, One dimensional, Runtimes, Three dimensional computer graphics, Video-games, Virtual 3d scene, virtual 3D scenes, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Gračanin, D.
From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 150–155, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering
@inproceedings{behravan_voices_2025,
title = {From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality},
author = {M. Behravan and D. Gračanin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005153589&doi=10.1109%2fVRW66409.2025.00038&partnerID=40&md5=b8aaab4e2378cde3595d98d79266d371},
doi = {10.1109/VRW66409.2025.00038},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {150–155},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents Matrix, an advanced AI-powered framework designed for real-time 3D object generation in Augmented Reality (AR) environments. By integrating a cutting-edge text-to-3D generative AI model, multilingual speech-to-text translation, and large language models (LLMs), the system enables seamless user interactions through spoken commands. The framework processes speech inputs, generates 3D objects, and provides object recommendations based on contextual understanding, enhancing AR experiences. A key feature of this framework is its ability to optimize 3D models by reducing mesh complexity, resulting in significantly smaller file sizes and faster processing on resource-constrained AR devices. Our approach addresses the challenges of high GPU usage, large model output sizes, and real-time system responsiveness, ensuring a smoother user experience. Moreover, the system is equipped with a pre-generated object repository, further reducing GPU load and improving efficiency. We demonstrate the practical applications of this framework in various fields such as education, design, and accessibility, and discuss future enhancements including image-to-3D conversion, environmental object detection, and multimodal support. The open-source nature of the framework promotes ongoing innovation and its utility across diverse industries. © 2025 IEEE.},
keywords = {3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Haghani, M.; Gračanin, D.
Transcending Dimensions Using Generative AI: Real-Time 3D Model Generation in Augmented Reality Proceedings Article
In: J.Y.C., Chen; G., Fragomeni (Ed.): Lect. Notes Comput. Sci., pp. 13–32, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303193699-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Model Generation, 3D modeling, 3D models, 3d-modeling, Augmented Reality, Generative AI, Image-to-3D conversion, Model generation, Object Detection, Object recognition, Objects detection, Real- time, Specialized software, Technical expertise, Three dimensional computer graphics, Usability engineering
@inproceedings{behravan_transcending_2025,
title = {Transcending Dimensions Using Generative AI: Real-Time 3D Model Generation in Augmented Reality},
author = {M. Behravan and M. Haghani and D. Gračanin},
editor = {Chen J.Y.C. and Fragomeni G.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007690904&doi=10.1007%2f978-3-031-93700-2_2&partnerID=40&md5=1c4d643aad88d08cbbc9dd2c02413f10},
doi = {10.1007/978-3-031-93700-2_2},
isbn = {03029743 (ISSN); 978-303193699-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15788 LNCS},
pages = {13–32},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Traditional 3D modeling requires technical expertise, specialized software, and time-intensive processes, making it inaccessible for many users. Our research aims to lower these barriers by combining generative AI and augmented reality (AR) into a cohesive system that allows users to easily generate, manipulate, and interact with 3D models in real time, directly within AR environments. Utilizing cutting-edge AI models like Shap-E, we address the complex challenges of transforming 2D images into 3D representations in AR environments. Key challenges such as object isolation, handling intricate backgrounds, and achieving seamless user interaction are tackled through advanced object detection methods, such as Mask R-CNN. Evaluation results from 35 participants reveal an overall System Usability Scale (SUS) score of 69.64, with participants who engaged with AR/VR technologies more frequently rating the system significantly higher, at 80.71. This research is particularly relevant for applications in gaming, education, and AR-based e-commerce, offering intuitive, model creation for users without specialized skills. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {3D Model Generation, 3D modeling, 3D models, 3d-modeling, Augmented Reality, Generative AI, Image-to-3D conversion, Model generation, Object Detection, Object recognition, Objects detection, Real- time, Specialized software, Technical expertise, Three dimensional computer graphics, Usability engineering},
pubstate = {published},
tppubtype = {inproceedings}
}
Vachha, C.; Kang, Y.; Dive, Z.; Chidambaram, A.; Gupta, A.; Jun, E.; Hartmann, B.
Dreamcrafter: Immersive Editing of 3D Radiance Fields Through Flexible, Generative Inputs and Outputs Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071394-1 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D scenes, AI assisted creativity tool, Animation, Computer vision, Direct manipulation, Drawing (graphics), Gaussian Splatting, Gaussians, Generative AI, Graphic, Graphics, High level languages, Immersive, Interactive computer graphics, Splatting, Three dimensional computer graphics, Virtual Reality, Worldbuilding interface
@inproceedings{vachha_dreamcrafter_2025,
title = {Dreamcrafter: Immersive Editing of 3D Radiance Fields Through Flexible, Generative Inputs and Outputs},
author = {C. Vachha and Y. Kang and Z. Dive and A. Chidambaram and A. Gupta and E. Jun and B. Hartmann},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005725679&doi=10.1145%2f3706598.3714312&partnerID=40&md5=68cf2a08d3057fd9756e25d53959872b},
doi = {10.1145/3706598.3714312},
isbn = {979-840071394-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Authoring 3D scenes is a central task for spatial computing applications. Competing visions for lowering existing barriers are (1) focus on immersive, direct manipulation of 3D content or (2) leverage AI techniques that capture real scenes (3D Radiance Fields such as, NeRFs, 3D Gaussian Splatting) and modify them at a higher level of abstraction, at the cost of high latency. We unify the complementary strengths of these approaches and investigate how to integrate generative AI advances into real-time, immersive 3D Radiance Field editing. We introduce Dreamcrafter, a VR-based 3D scene editing system that: (1) provides a modular architecture to integrate generative AI algorithms; (2) combines different levels of control for creating objects, including natural language and direct manipulation; and (3) introduces proxy representations that support interaction during high-latency operations. We contribute empirical findings on control preferences and discuss how generative AI interfaces beyond text input enhance creativity in scene editing and world building. © 2025 Copyright held by the owner/author(s).},
keywords = {3D modeling, 3D scenes, AI assisted creativity tool, Animation, Computer vision, Direct manipulation, Drawing (graphics), Gaussian Splatting, Gaussians, Generative AI, Graphic, Graphics, High level languages, Immersive, Interactive computer graphics, Splatting, Three dimensional computer graphics, Virtual Reality, Worldbuilding interface},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, J.; Wu, X.; Lan, T.; Li, B.
LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2715–2724, 2025, ISSN: 10772626 (ISSN).
Abstract | Links | BibTeX | Tags: % reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality
@article{chen_llmer_2025,
title = {LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models},
author = {J. Chen and X. Wu and T. Lan and B. Li},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003825793&doi=10.1109%2fTVCG.2025.3549549&partnerID=40&md5=da4681d0714548e3a7e0c8c3295d2348},
doi = {10.1109/TVCG.2025.3549549},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2715–2724},
abstract = {The integration of Large Language Models (LLMs) like GPT-4 with Extended Reality (XR) technologies offers the potential to build truly immersive XR environments that interact with human users through natural language, e.g., generating and animating 3D scenes from audio inputs. However, the complexity of XR environments makes it difficult to accurately extract relevant contextual data and scene/object parameters from an overwhelming volume of XR artifacts. It leads to not only increased costs with pay-per-use models, but also elevated levels of generation errors. Moreover, existing approaches focusing on coding script generation are often prone to generation errors, resulting in flawed or invalid scripts, application crashes, and ultimately a degraded user experience. To overcome these challenges, we introduce LLMER, a novel framework that creates interactive XR worlds using JSON data generated by LLMs. Unlike prior approaches focusing on coding script generation, LLMER translates natural language inputs into JSON data, significantly reducing the likelihood of application crashes and processing latency. It employs a multi-stage strategy to supply only the essential contextual information adapted to the user's request and features multiple modules designed for various XR tasks. Our preliminary user study reveals the effectiveness of the proposed system, with over 80% reduction in consumed tokens and around 60% reduction in task completion time compared to state-of-the-art approaches. The analysis of users' feedback also illuminates a series of directions for further optimization. © 1995-2012 IEEE.},
keywords = {% reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Chen, J.; Grubert, J.; Kristensson, P. O.
Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 206–216, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833153645-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality
@inproceedings{chen_analyzing_2025,
title = {Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes},
author = {J. Chen and J. Grubert and P. O. Kristensson},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002716635&doi=10.1109%2fVR59515.2025.00045&partnerID=40&md5=306aa7fbb3dad0aa9d43545f3c7eb9ea},
doi = {10.1109/VR59515.2025.00045},
isbn = {979-833153645-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {206–216},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {As more applications of large language models (LLMs) for 3D content in immersive environments emerge, it is crucial to study user behavior to identify interaction patterns and potential barriers to guide the future design of immersive content creation and editing systems which involve LLMs. In an empirical user study with 12 participants, we combine quantitative usage data with post-experience questionnaire feedback to reveal common interaction patterns and key barriers in LLM-assisted 3D scene editing systems. We identify opportunities for improving natural language interfaces in 3D design tools and propose design recommendations. Through an empirical study, we demonstrate that LLM-assisted interactive systems can be used productively in immersive environments. © 2025 IEEE.},
keywords = {3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, J.; Weber, R.; Wen, E.; Lottridge, D.
Real-Time Full-body Interaction with AI Dance Models: Responsiveness to Contemporary Dance Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 1177–1187, Association for Computing Machinery, 2025, ISBN: 979-840071306-4 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Chatbots, Computer interaction, Deep learning, Deep-Learning Dance Model, Design of Human-Computer Interaction, Digital elevation model, Generative AI, Input output programs, Input sequence, Interactivity, Motion capture, Motion tracking, Movement analysis, Output sequences, Problem oriented languages, Real- time, Text mining, Three dimensional computer graphics, User input, Virtual environments, Virtual Reality
@inproceedings{zhou_real-time_2025,
title = {Real-Time Full-body Interaction with AI Dance Models: Responsiveness to Contemporary Dance},
author = {J. Zhou and R. Weber and E. Wen and D. Lottridge},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001922427&doi=10.1145%2f3708359.3712077&partnerID=40&md5=cea9213198220480b80b7a4840d26ccc},
doi = {10.1145/3708359.3712077},
isbn = {979-840071306-4 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {1177–1187},
publisher = {Association for Computing Machinery},
abstract = {Interactive AI chatbots put the power of Large-Language Models (LLMs) into people's hands; it is this interactivity that fueled explosive worldwide influence. In the generative dance space, however, there are few deep-learning-based generative dance models built with interactivity in mind. The release of the AIST++ dance dataset in 2021 led to an uptick of capabilities in generative dance models. Whether these models could be adapted to support interactivity and how well this approach will work is not known. In this study, we explore the capabilities of existing generative dance models for motion-to-motion synthesis on real-time, full-body motion-captured contemporary dance data. We identify an existing model that we adapted to support interactivity: the Bailando++ model, which is trained on the AIST++ dataset and was modified to take music and a motion sequence as input parameters in an interactive loop. We worked with two professional contemporary choreographers and dancers to record and curate a diverse set of 203 motion-captured dance sequences as a set of "user inputs"captured through the Optitrack high-precision motion capture 3D tracking system. We extracted 17 quantitative movement features from the motion data using the well-established Laban Movement Analysis theory, which allowed for quantitative comparisons of inter-movement correlations, which we used for clustering input data and comparing input and output sequences. A total of 10 pieces of music were used to generate a variety of outputs using the adapted Bailando++ model. We found that, on average, the generated output motion achieved only moderate correlations to the user input, with some exceptions of movement and music pairs achieving high correlation. The high-correlation generated output sequences were deemed responsive and relevant co-creations in relation to the input sequences. We discuss implications for interactive generative dance agents, where the use of 3D joint coordinate data should be used over SMPL parameters for ease of real-time generation, and how the use of Laban Movement Analysis could be used to extract useful features and fine-tune deep-learning models. © 2025 Copyright held by the owner/author(s).},
keywords = {3D modeling, Chatbots, Computer interaction, Deep learning, Deep-Learning Dance Model, Design of Human-Computer Interaction, Digital elevation model, Generative AI, Input output programs, Input sequence, Interactivity, Motion capture, Motion tracking, Movement analysis, Output sequences, Problem oriented languages, Real- time, Text mining, Three dimensional computer graphics, User input, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Xing, Y.; Ban, J.; Hubbard, T. D.; Villano, M.; Gómez-Zará, D.
Immersed in my Ideas: Using Virtual Reality and LLMs to Visualize Users’ Ideas and Thoughts Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 60–65, Association for Computing Machinery, 2025, ISBN: 979-840071409-2 (ISBN).
Abstract | Links | BibTeX | Tags: 3-D environments, 3D modeling, Computer simulation languages, Creativity, Idea Generation, Immersive, Interactive virtual reality, Language Model, Large language model, Multimodal Interaction, Reflection, Text Visualization, Think aloud, Virtual environments, Virtual Reality, Visualization
@inproceedings{xing_immersed_2025,
title = {Immersed in my Ideas: Using Virtual Reality and LLMs to Visualize Users’ Ideas and Thoughts},
author = {Y. Xing and J. Ban and T. D. Hubbard and M. Villano and D. Gómez-Zará},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001675169&doi=10.1145%2f3708557.3716330&partnerID=40&md5=20fb0623d2a1fff92282116b01fac4f3},
doi = {10.1145/3708557.3716330},
isbn = {979-840071409-2 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {60–65},
publisher = {Association for Computing Machinery},
abstract = {We introduce the Voice Interactive Virtual Reality Annotation (VIVRA), an application that employs Large Language Models to facilitate brainstorming and idea exploration in an immersive 3D environment. As users think aloud to brainstorm and ideate, the application automatically detects, summarizes, suggests, and connects their ideas in real time. The experience brings participants into a room where their ideas emerge as interactive objects that embody the topics detected from their ideas. We evaluated the effectiveness of VIVRA in an exploratory study with 29 participants, followed by a user study with 10 participants comparing the application with other visualizations. Our results show that VIVRA helped participants reflect and think more about their ideas, serving as a valuable tool for personal exploration. We discuss the potential benefits and applications, highlighting the benefits of combining immersive 3D spaces and LLMs to explore, learn, and reflect on ideas. © 2025 Copyright held by the owner/author(s).},
keywords = {3-D environments, 3D modeling, Computer simulation languages, Creativity, Idea Generation, Immersive, Interactive virtual reality, Language Model, Large language model, Multimodal Interaction, Reflection, Text Visualization, Think aloud, Virtual environments, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhang, H.; Chen, P.; Xie, X.; Jiang, Z.; Wu, Y.; Li, Z.; Chen, X.; Sun, L.
FusionProtor: A Mixed-Prototype Tool for Component-level Physical-to-Virtual 3D Transition and Simulation Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071394-1 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D prototype, 3D simulations, 3d transition, Component levels, Conceptual design, Creatives, Generative AI, High-fidelity, Integrated circuit layout, Mixed reality, Product conceptual designs, Prototype tools, Prototype workflow, Three dimensional computer graphics, Usability engineering, Virtual Prototyping
@inproceedings{zhang_fusionprotor_2025,
title = {FusionProtor: A Mixed-Prototype Tool for Component-level Physical-to-Virtual 3D Transition and Simulation},
author = {H. Zhang and P. Chen and X. Xie and Z. Jiang and Y. Wu and Z. Li and X. Chen and L. Sun},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005745450&doi=10.1145%2f3706598.3713686&partnerID=40&md5=e51eac0cc99293538422d98a4070cd09},
doi = {10.1145/3706598.3713686},
isbn = {979-840071394-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Developing and simulating 3D prototypes is crucial in product conceptual design for ideation and presentation. Traditional methods often keep physical and virtual prototypes separate, leading to a disjointed prototype workflow. In addition, acquiring high-fidelity prototypes is time-consuming and resource-intensive, distracting designers from creative exploration. Recent advancements in generative artificial intelligence (GAI) and extended reality (XR) provided new solutions for rapid prototype transition and mixed simulation. We conducted a formative study to understand current challenges in the traditional prototype process and explore how to effectively utilize GAI and XR ability in prototype. Then we introduced FusionProtor, a mixed-prototype tool for component-level 3D prototype transition and simulation. We proposed a step-by-step generation pipeline in FusionProtor, effectively transiting 3D prototypes from physical to virtual and low- to high-fidelity for rapid ideation and iteration. We also innovated a component-level 3D creation method and applied it in XR environment for the mixed-prototype presentation and interaction. We conducted technical and user experiments to verify FusionProtor's usability in supporting diverse designs. Our results verified that it achieved a seamless workflow between physical and virtual domains, enhancing efficiency and promoting ideation. We also explored the effect of mixed interaction on design and critically discussed its best practices for HCI community. © 2025 Copyright held by the owner/author(s). Publication rights licensed to ACM.},
keywords = {3D modeling, 3D prototype, 3D simulations, 3d transition, Component levels, Conceptual design, Creatives, Generative AI, High-fidelity, Integrated circuit layout, Mixed reality, Product conceptual designs, Prototype tools, Prototype workflow, Three dimensional computer graphics, Usability engineering, Virtual Prototyping},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Gaudi, T.; Kapralos, B.; Quevedo, A.
Structural and Functional Fidelity of Virtual Humans in Immersive Virtual Learning Environments Proceedings Article
In: IEEE Gaming, Entertain., Media Conf., GEM, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037453-7 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Computer aided instruction, Digital representations, E-Learning, Engagement, fidelity, Immersive, Immersive virtual learning environment, Serious game, Serious games, Three dimensional computer graphics, Virtual character, virtual human, Virtual humans, Virtual instructors, Virtual learning environments, Virtual Reality, virtual simulation, Virtual simulations
@inproceedings{gaudi_structural_2024,
title = {Structural and Functional Fidelity of Virtual Humans in Immersive Virtual Learning Environments},
author = {T. Gaudi and B. Kapralos and A. Quevedo},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199517136&doi=10.1109%2fGEM61861.2024.10585535&partnerID=40&md5=bf271019e077b5e464bcd62b1b28312b},
doi = {10.1109/GEM61861.2024.10585535},
isbn = {979-835037453-7 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE Gaming, Entertain., Media Conf., GEM},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Central to many immersive virtual learning environments (iVLEs) are virtual humans, or characters that are digital representations, which can serve as virtual instructors to facilitate learning. Current technology is allowing the production of photo-realistic (high fidelity/highly realistic) avatars, whether using traditional approaches relying on 3D modeling, or modern tools leveraging generative AI and virtual character creation tools. However, fidelity (i.e., level of realism) is complex as it can be analyzed from various points of view referring to its structure, function, interactivity, and behavior among others. Given its relevance, fidelity can influence various aspects of iVLEs including engagement and ultimately learning outcomes. In this work-in-progress paper, we propose a study that will examine the effect of structural and functional fidelity of a virtual human assistant on engagement within a virtual simulation designed to teach the cognitive aspects (e.g., the steps of a procedure) of the heart auscultation procedure. © 2024 IEEE.},
keywords = {3D modeling, Computer aided instruction, Digital representations, E-Learning, Engagement, fidelity, Immersive, Immersive virtual learning environment, Serious game, Serious games, Three dimensional computer graphics, Virtual character, virtual human, Virtual humans, Virtual instructors, Virtual learning environments, Virtual Reality, virtual simulation, Virtual simulations},
pubstate = {published},
tppubtype = {inproceedings}
}
Weng, S. C. -C.
Studying How Prompt-Generated 3D Models Affect the Creation Process of Mixed Reality Applications Proceedings Article
In: U., Eck; M., Sra; J., Stefanucci; M., Sugimoto; M., Tatzgern; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 654–655, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833150691-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3d-modeling, Creation process, Generative AI, Mixed reality, Prompt-generated 3d model, Prompt-generated 3D models, Research prototype, Study plans, User study
@inproceedings{weng_studying_2024,
title = {Studying How Prompt-Generated 3D Models Affect the Creation Process of Mixed Reality Applications},
author = {S. C. -C. Weng},
editor = {Eck U. and Sra M. and Stefanucci J. and Sugimoto M. and Tatzgern M. and Williams I.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214403987&doi=10.1109%2fISMAR-Adjunct64951.2024.00196&partnerID=40&md5=46d553927e96356d73ffc5996fbbdc71},
doi = {10.1109/ISMAR-Adjunct64951.2024.00196},
isbn = {979-833150691-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {654–655},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {In this doctoral consortium, we build upon our previous research prototype, Dream Mesh, a Mixed Reality application that generates models in MR based on user speech prompts. To evaluate the application and answer the questions derived from our pilot research, I propose a future user study plan. This plan aims to investigate how prompt-generated 3D models affect the creation process of Mixed Reality applications. © 2024 IEEE.},
keywords = {3D modeling, 3D models, 3d-modeling, Creation process, Generative AI, Mixed reality, Prompt-generated 3d model, Prompt-generated 3D models, Research prototype, Study plans, User study},
pubstate = {published},
tppubtype = {inproceedings}
}
Hart, A.; Shakir, M. Z.
Realtime AI Driven Environment Development for Virtual Metaverse Proceedings Article
In: IEEE Int. Conf. Metrol. Ext. Real., Artif. Intell. Neural Eng., MetroXRAINE - Proc., pp. 313–318, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037800-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3d-modeling, AI in Metaverse Development, Artificial intelligence in metaverse development, Digital elevation model, Digital Innovation, Digital innovations, Metaverses, Real- time, Real-Time Adaptation, Scalable virtual world, Scalable Virtual Worlds, Unity Integration, Virtual environments, Virtual worlds
@inproceedings{hart_realtime_2024,
title = {Realtime AI Driven Environment Development for Virtual Metaverse},
author = {A. Hart and M. Z. Shakir},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85216090810&doi=10.1109%2fMetroXRAINE62247.2024.10796022&partnerID=40&md5=e339d3117291e480231b7bc32f117506},
doi = {10.1109/MetroXRAINE62247.2024.10796022},
isbn = {979-835037800-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE Int. Conf. Metrol. Ext. Real., Artif. Intell. Neural Eng., MetroXRAINE - Proc.},
pages = {313–318},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The integration of Artificial Intelligence (AI) into the development of Metaverse environments denotes a noteworthy shift towards crafting virtual spaces with improved interactivity, immersion, and realism. This study looks to delve into the various roles AI plays in using 3D models, enriching experiences in virtual and augmented reality, to create scalable, dynamic virtual environments. It carefully examines the challenges related to computational demands, such as processing power and data storage, scalability issues, and ethical considerations concerning privacy and the misuse of AI -generated content. By exploring AI's application in game engine platforms such as Unity through research ongoing, this paper highlights the technical achievements and ever growing possibilities unlocked by AI, such as creating lifelike virtual environments. © 2024 IEEE.},
keywords = {3D modeling, 3D models, 3d-modeling, AI in Metaverse Development, Artificial intelligence in metaverse development, Digital elevation model, Digital Innovation, Digital innovations, Metaverses, Real- time, Real-Time Adaptation, Scalable virtual world, Scalable Virtual Worlds, Unity Integration, Virtual environments, Virtual worlds},
pubstate = {published},
tppubtype = {inproceedings}
}
Guo, Y.; Hou, K.; Yan, Z.; Chen, H.; Xing, G.; Jiang, X.
Sensor2Scene: Foundation Model-Driven Interactive Realities Proceedings Article
In: Proc. - IEEE Int. Workshop Found. Model. Cyber-Phys. Syst. Internet Things, FMSys, pp. 13–19, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835036345-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Augmented Reality, Computational Linguistics, Data integration, Data visualization, Foundation models, Generative model, Language Model, Large language model, large language models, Model-driven, Sensor Data Integration, Sensors data, Text-to-3d generative model, Text-to-3D Generative Models, Three dimensional computer graphics, User interaction, User Interaction in AR, User interaction in augmented reality, User interfaces, Virtual Reality, Visualization
@inproceedings{guo_sensor2scene_2024,
title = {Sensor2Scene: Foundation Model-Driven Interactive Realities},
author = {Y. Guo and K. Hou and Z. Yan and H. Chen and G. Xing and X. Jiang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199893762&doi=10.1109%2fFMSys62467.2024.00007&partnerID=40&md5=c3bf1739e8c1dc6227d61609ddc66910},
doi = {10.1109/FMSys62467.2024.00007},
isbn = {979-835036345-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Workshop Found. Model. Cyber-Phys. Syst. Internet Things, FMSys},
pages = {13–19},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Augmented Reality (AR) is acclaimed for its potential to bridge the physical and virtual worlds. Yet, current integration between these realms often lacks a deep under-standing of the physical environment and the subsequent scene generation that reflects this understanding. This research introduces Sensor2Scene, a novel system framework designed to enhance user interactions with sensor data through AR. At its core, an AI agent leverages large language models (LLMs) to decode subtle information from sensor data, constructing detailed scene descriptions for visualization. To enable these scenes to be rendered in AR, we decompose the scene creation process into tasks of text-to-3D model generation and spatial composition, allowing new AR scenes to be sketched from the descriptions. We evaluated our framework using an LLM evaluator based on five metrics on various datasets to examine the correlation between sensor readings and corresponding visualizations, and demonstrated the system's effectiveness with scenes generated from end-to-end. The results highlight the potential of LLMs to understand IoT sensor data. Furthermore, generative models can aid in transforming these interpretations into visual formats, thereby enhancing user interaction. This work not only displays the capabilities of Sensor2Scene but also lays a foundation for advancing AR with the goal of creating more immersive and contextually rich experiences. © 2024 IEEE.},
keywords = {3D modeling, Augmented Reality, Computational Linguistics, Data integration, Data visualization, Foundation models, Generative model, Language Model, Large language model, large language models, Model-driven, Sensor Data Integration, Sensors data, Text-to-3d generative model, Text-to-3D Generative Models, Three dimensional computer graphics, User interaction, User Interaction in AR, User interaction in augmented reality, User interfaces, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Krauss, C.; Bassbouss, L.; Upravitelev, M.; An, T. -S.; Altun, D.; Reray, L.; Balitzki, E.; Tamimi, T. El; Karagülle, M.
Opportunities and Challenges in Developing Educational AI-Assistants for the Metaverse Proceedings Article
In: R.A., Sottilare; J., Schwarz (Ed.): Lect. Notes Comput. Sci., pp. 219–238, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 03029743 (ISSN); 978-303160608-3 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, AI-assistant, AI-Assistants, Computational Linguistics, Computer aided instruction, Concept-based, E-Learning, Education, Interoperability, Language Model, Large language model, large language models, Learning Environments, Learning systems, Learning Technologies, Learning technology, LLM, Metaverse, Metaverses, Natural language processing systems, Proof of concept, User interfaces, Virtual assistants, Virtual Reality
@inproceedings{krauss_opportunities_2024,
title = {Opportunities and Challenges in Developing Educational AI-Assistants for the Metaverse},
author = {C. Krauss and L. Bassbouss and M. Upravitelev and T. -S. An and D. Altun and L. Reray and E. Balitzki and T. El Tamimi and M. Karagülle},
editor = {Sottilare R.A. and Schwarz J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85196214138&doi=10.1007%2f978-3-031-60609-0_16&partnerID=40&md5=9a66876cb30e9e5d287a86e6cfa66e05},
doi = {10.1007/978-3-031-60609-0_16},
isbn = {03029743 (ISSN); 978-303160608-3 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {14727 LNCS},
pages = {219–238},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {The paper explores the opportunities and challenges for metaverse learning environments with AI-Assistants based on Large Language Models. A proof of concept based on popular but proprietary technologies is presented that enables a natural language exchange between the user and an AI-based medical expert in a highly immersive environment based on the Unreal Engine. The answers generated by ChatGPT are not only played back lip-synchronously, but also visualized in the VR environment using a 3D model of a skeleton. Usability and user experience play a particularly important role in the development of the highly immersive AI-Assistant. The proof of concept serves to illustrate the opportunities and challenges that lie in the merging of large language models, metaverse applications and educational ecosystems, which are self-contained research areas. Development strategies, tools and interoperability standards will be presented to facilitate future developments in this triangle of tension. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {3D modeling, AI-assistant, AI-Assistants, Computational Linguistics, Computer aided instruction, Concept-based, E-Learning, Education, Interoperability, Language Model, Large language model, large language models, Learning Environments, Learning systems, Learning Technologies, Learning technology, LLM, Metaverse, Metaverses, Natural language processing systems, Proof of concept, User interfaces, Virtual assistants, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Jeong, E.; Kim, H.; Park, S.; Yoon, S.; Ahn, J.; Woo, W.
Function-Adaptive Affordance Extraction from 3D Objects Using LLM for Interaction Authoring with Augmented Artifacts Proceedings Article
In: U., Eck; M., Sra; J., Stefanucci; M., Sugimoto; M., Tatzgern; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 205–208, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833150691-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Applied computing, Art and humanity, Artificial intelligence, Arts and humanities, Augmented Reality, Computer interaction, Computer vision, Computing methodologies, computing methodology, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Humanities computing, Interaction paradigm, Interaction paradigms, Language processing, Mixed / augmented reality, Mixed reality, Modeling languages, Natural Language Processing, Natural language processing systems, Natural languages, Three dimensional computer graphics
@inproceedings{jeong_function-adaptive_2024,
title = {Function-Adaptive Affordance Extraction from 3D Objects Using LLM for Interaction Authoring with Augmented Artifacts},
author = {E. Jeong and H. Kim and S. Park and S. Yoon and J. Ahn and W. Woo},
editor = {Eck U. and Sra M. and Stefanucci J. and Sugimoto M. and Tatzgern M. and Williams I.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214379963&doi=10.1109%2fISMAR-Adjunct64951.2024.00050&partnerID=40&md5=7222e0599a7e2aa0adaea38e4b9e13cc},
doi = {10.1109/ISMAR-Adjunct64951.2024.00050},
isbn = {979-833150691-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {205–208},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {We propose an algorithm that extracts the most suitable affordances, interaction targets, and corresponding coordinates adaptively from 3D models of various artifacts based on their functional context for efficient authoring of XR content with artifacts. Traditionally, authoring AR scenes to convey artifact context required one-to-one manual work. Our approach leverages a Large Language Model (LLM) to extract interaction types, positions, and subjects based on the artifact's name and usage context. This enables templated XR experience creation, replacing repetitive manual labor. Consequently, our system streamlines the XR authoring process, making it more efficient and scalable. © 2024 IEEE.},
keywords = {3D modeling, Applied computing, Art and humanity, Artificial intelligence, Arts and humanities, Augmented Reality, Computer interaction, Computer vision, Computing methodologies, computing methodology, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Humanities computing, Interaction paradigm, Interaction paradigms, Language processing, Mixed / augmented reality, Mixed reality, Modeling languages, Natural Language Processing, Natural language processing systems, Natural languages, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Kim, S. J.; Cao, D. D.; Spinola, F.; Lee, S. J.; Cho, K. S.
RoomRecon: High-Quality Textured Room Layout Reconstruction on Mobile Devices Proceedings Article
In: U., Eck; M., Sra; J., Stefanucci; M., Sugimoto; M., Tatzgern; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real., ISMAR, pp. 544–553, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833151647-5 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3D reconstruction, 3d-modeling, AR-assisted image capturing, Architectural design, Augmented Reality, Augmented reality-assisted image capturing, Image capturing, Indoor 3D reconstruction, Indoor space, Mobile application, Mobile Applications, Mortar, Room layout, Texturing, Texturing quality
@inproceedings{kim_roomrecon_2024,
title = {RoomRecon: High-Quality Textured Room Layout Reconstruction on Mobile Devices},
author = {S. J. Kim and D. D. Cao and F. Spinola and S. J. Lee and K. S. Cho},
editor = {Eck U. and Sra M. and Stefanucci J. and Sugimoto M. and Tatzgern M. and Williams I.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85213494599&doi=10.1109%2fISMAR62088.2024.00069&partnerID=40&md5=0f6b9d4c44d9c55cafba7ad76651ea07},
doi = {10.1109/ISMAR62088.2024.00069},
isbn = {979-833151647-5 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real., ISMAR},
pages = {544–553},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Widespread RGB-Depth (RGB-D) sensors and advanced 3D reconstruction technologies facilitate the capture of indoor spaces, improving the fields of augmented reality (AR), virtual reality (VR), and extended reality (XR). Nevertheless, current technologies still face limitations, such as the inability to reflect minor scene changes without a complete recapture, the lack of semantic scene understanding, and various texturing challenges that affect the 3D model's visual quality. These issues affect the realism required for VR experiences and other applications such as in interior design and real estate. To address these challenges, we introduce RoomRecon, an interactive, real-time scanning and texturing pipeline for 3D room models. We propose a two-phase texturing pipeline that integrates AR-guided image capturing for texturing and generative AI models to improve texturing quality and provide better replicas of indoor spaces. Moreover, we suggest to focus only on permanent room elements such as walls, floors, and ceilings, to allow for easily customizable 3D models. We conduct experiments in a variety of indoor spaces to assess the texturing quality and speed of our method. The quantitative results and user study demonstrate that RoomRecon surpasses state-of-the-art methods in terms of texturing quality and on-device computation time. © 2024 IEEE.},
keywords = {3D modeling, 3D models, 3D reconstruction, 3d-modeling, AR-assisted image capturing, Architectural design, Augmented Reality, Augmented reality-assisted image capturing, Image capturing, Indoor 3D reconstruction, Indoor space, Mobile application, Mobile Applications, Mortar, Room layout, Texturing, Texturing quality},
pubstate = {published},
tppubtype = {inproceedings}
}
Shabanijou, M.; Sharma, V.; Ray, S.; Lu, R.; Xiong, P.
Large Language Model Empowered Spatio-Visual Queries for Extended Reality Environments Proceedings Article
In: W., Ding; C.-T., Lu; F., Wang; L., Di; K., Wu; J., Huan; R., Nambiar; J., Li; F., Ilievski; R., Baeza-Yates; X., Hu (Ed.): Proc. - IEEE Int. Conf. Big Data, BigData, pp. 5843–5846, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835036248-0 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Digital elevation model, Emerging applications, Immersive environment, Language Model, Metaverses, Modeling languages, Natural language interfaces, Query languages, spatial data, Spatial queries, Structured Query Language, Technological advances, Users perspective, Virtual environments, Visual languages, Visual query
@inproceedings{shabanijou_large_2024,
title = {Large Language Model Empowered Spatio-Visual Queries for Extended Reality Environments},
author = {M. Shabanijou and V. Sharma and S. Ray and R. Lu and P. Xiong},
editor = {Ding W. and Lu C.-T. and Wang F. and Di L. and Wu K. and Huan J. and Nambiar R. and Li J. and Ilievski F. and Baeza-Yates R. and Hu X.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85218011140&doi=10.1109%2fBigData62323.2024.10825084&partnerID=40&md5=fdd78814b8e19830d1b8ecd4b33b0102},
doi = {10.1109/BigData62323.2024.10825084},
isbn = {979-835036248-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Conf. Big Data, BigData},
pages = {5843–5846},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {With the technological advances in creation and capture of 3D spatial data, new emerging applications are being developed. Digital Twins, metaverse and extended reality (XR) based immersive environments can be enriched by leveraging geocoded 3D spatial data. Unlike 2D spatial queries, queries involving 3D immersive environments need to take the query user's viewpoint into account. Spatio-visual queries return objects that are visible from the user's perspective.In this paper, we propose enhancing 3D spatio-visual queries with large language models (LLM). These kinds of queries allow a user to interact with the visible objects using a natural language interface. We have implemented a proof-of-concept prototype and conducted preliminary evaluation. Our results demonstrate the potential of truly interactive immersive environments. © 2024 IEEE.},
keywords = {3D modeling, Digital elevation model, Emerging applications, Immersive environment, Language Model, Metaverses, Modeling languages, Natural language interfaces, Query languages, spatial data, Spatial queries, Structured Query Language, Technological advances, Users perspective, Virtual environments, Visual languages, Visual query},
pubstate = {published},
tppubtype = {inproceedings}
}
Weid, M.; Khezrian, N.; Mana, A. P.; Farzinnejad, F.; Grubert, J.
GenDeck: Towards a HoloDeck with Text-to-3D Model Generation Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1188–1189, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037449-0 (ISBN).
Abstract | Links | BibTeX | Tags: 3D content, 3D modeling, 3D models, 3d-modeling, Computational costs, Extende Reality, Human computer interaction, Immersive virtual reality, Knowledge Work, Model generation, Proof of concept, Three dimensional computer graphics, Virtual Reality, Visual fidelity
@inproceedings{weid_gendeck_2024,
title = {GenDeck: Towards a HoloDeck with Text-to-3D Model Generation},
author = {M. Weid and N. Khezrian and A. P. Mana and F. Farzinnejad and J. Grubert},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195600251&doi=10.1109%2fVRW62533.2024.00388&partnerID=40&md5=6dab0cc05259fa2dbe0a2b3806e569af},
doi = {10.1109/VRW62533.2024.00388},
isbn = {979-835037449-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1188–1189},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Generative Artificial Intelligence has the potential to substantially transform the way 3D content for Extended Reality applications is produced. Specifically, the development of text-to-3D and image-to-3D generators with increasing visual fidelity and decreasing computational costs is thriving quickly. Within this work, we present GenDeck, a proof-of-concept application to experience text-to-3D model generation inside an immersive Virtual Reality environment. © 2024 IEEE.},
keywords = {3D content, 3D modeling, 3D models, 3d-modeling, Computational costs, Extende Reality, Human computer interaction, Immersive virtual reality, Knowledge Work, Model generation, Proof of concept, Three dimensional computer graphics, Virtual Reality, Visual fidelity},
pubstate = {published},
tppubtype = {inproceedings}
}
Upadhyay, A.; Dubey, A.; Bhardwaj, N.; Kuriakose, S. M.; Mohan, R.
CIGMA: Automated 3D House Layout Generation through Generative Models Proceedings Article
In: ACM Int. Conf. Proc. Ser., pp. 542–546, Association for Computing Machinery, 2024, ISBN: 979-840071634-8 (ISBN).
Abstract | Links | BibTeX | Tags: 3d house, 3D House Layout, 3D modeling, Floor Plan, Floorplans, Floors, Generative AI, Generative model, Houses, Large datasets, Layout designs, Layout generations, Metaverses, Textures, User constraints, Wall design
@inproceedings{upadhyay_cigma_2024,
title = {CIGMA: Automated 3D House Layout Generation through Generative Models},
author = {A. Upadhyay and A. Dubey and N. Bhardwaj and S. M. Kuriakose and R. Mohan},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85183577885&doi=10.1145%2f3632410.3632490&partnerID=40&md5=cf0c249faf0ce03590010426e0f6c1e0},
doi = {10.1145/3632410.3632490},
isbn = {979-840071634-8 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {ACM Int. Conf. Proc. Ser.},
pages = {542–546},
publisher = {Association for Computing Machinery},
abstract = {In this work, we introduce CIGMA, a metaverse platform that empowers designers to generate multiple house layout designs using generative models. We propose a generative adversarial network that synthesizes 2D layouts guided by user constraints. Our platform generates 3D views of house layouts and provides users with the ability to customize the 3D house model by generating furniture items and applying various textures for personalized floor and wall designs. We evaluate our approach on a large-scale dataset, RPLAN, consisting of 80,000 real floor plans from residential buildings. The qualitative and quantitative evaluations demonstrate the effectiveness of our approach over the existing baselines. The demo is accessible at https://youtu.be/lgb_V-yZ5lw. © 2024 Owner/Author.},
keywords = {3d house, 3D House Layout, 3D modeling, Floor Plan, Floorplans, Floors, Generative AI, Generative model, Houses, Large datasets, Layout designs, Layout generations, Metaverses, Textures, User constraints, Wall design},
pubstate = {published},
tppubtype = {inproceedings}
}
Rausa, M.; Gaglio, S.; Augello, A.; Caggianese, G.; Franchini, S.; Gallo, L.; Sabatucci, L.
Enriching Metaverse with Memories Through Generative AI: A Case Study Proceedings Article
In: IEEE Int. Conf. Metrol. Ext. Real., Artif. Intell. Neural Eng., MetroXRAINE - Proc., pp. 371–376, Institute of Electrical and Electronics Engineers Inc., St Albans, United Kingdom, 2024, ISBN: 979-835037800-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3D reconstruction, 3d-modeling, Case-studies, Generative adversarial networks, Generative AI, Input modes, Metamemory, Metaverses, Synthetic Data Generation, Synthetic data generations, Textual description, Virtual environments, Virtual Reality
@inproceedings{rausa_enriching_2024,
title = {Enriching Metaverse with Memories Through Generative AI: A Case Study},
author = {M. Rausa and S. Gaglio and A. Augello and G. Caggianese and S. Franchini and L. Gallo and L. Sabatucci},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85216124702&doi=10.1109%2fMetroXRAINE62247.2024.10796338&partnerID=40&md5=580d0727ab8740a6ada62eeef5ac283f},
doi = {10.1109/MetroXRAINE62247.2024.10796338},
isbn = {979-835037800-9 (ISBN)},
year = {2024},
date = {2024-01-01},
urldate = {2025-01-07},
booktitle = {IEEE Int. Conf. Metrol. Ext. Real., Artif. Intell. Neural Eng., MetroXRAINE - Proc.},
pages = {371–376},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
address = {St Albans, United Kingdom},
abstract = {The paper introduces MetaMemory, an approach to generate 3D models from either textual descriptions or photographs of objects, offering dual input modes for enhanced representation. MetaMemory's architecture is discussed presenting the tools employed in extracting the object from the image, generating the 3D mesh from texts or images, and visualizing the object reconstruction in an immersive scenario. Afterwards, a case study in which we experienced reconstructing memories of ancient crafts is examined together with the achieved results, by highlighting current limitations and potential applications. © 2024 IEEE.},
keywords = {3D modeling, 3D models, 3D reconstruction, 3d-modeling, Case-studies, Generative adversarial networks, Generative AI, Input modes, Metamemory, Metaverses, Synthetic Data Generation, Synthetic data generations, Textual description, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}