AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Kurai, R.; Hiraki, T.; Hiroi, Y.; Hirao, Y.; Perusquia-Hernandez, M.; Uchiyama, H.; Kiyokawa, K.
An implementation of MagicCraft: Generating Interactive 3D Objects and Their Behaviors from Text for Commercial Metaverse Platforms Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1284–1285, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3D object, 3D Object Generation, 3d-modeling, AI-Assisted Design, Generative AI, Immersive, Metaverse, Metaverses, Model skill, Object oriented programming, Programming skills
@inproceedings{kurai_implementation_2025,
title = {An implementation of MagicCraft: Generating Interactive 3D Objects and Their Behaviors from Text for Commercial Metaverse Platforms},
author = {R. Kurai and T. Hiraki and Y. Hiroi and Y. Hirao and M. Perusquia-Hernandez and H. Uchiyama and K. Kiyokawa},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005153642&doi=10.1109%2fVRW66409.2025.00288&partnerID=40&md5=53fa1ac92c3210f0ffa090ffa1af7e6e},
doi = {10.1109/VRW66409.2025.00288},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1284–1285},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Metaverse platforms are rapidly evolving to provide immersive spaces. However, the generation of dynamic and interactive 3D objects remains a challenge due to the need for advanced 3D modeling and programming skills. We present MagicCraft, a system that generates functional 3D objects from natural language prompts. MagicCraft uses generative AI models to manage the entire content creation pipeline: converting user text descriptions into images, transforming images into 3D models, predicting object behavior, and assigning necessary attributes and scripts. It also provides an interactive interface for users to refine generated objects by adjusting features like orientation, scale, seating positions, and grip points. © 2025 IEEE.},
keywords = {3D modeling, 3D models, 3D object, 3D Object Generation, 3d-modeling, AI-Assisted Design, Generative AI, Immersive, Metaverse, Metaverses, Model skill, Object oriented programming, Programming skills},
pubstate = {published},
tppubtype = {inproceedings}
}
Rasch, J.; Töws, J.; Hirzle, T.; Müller, F.; Schmitz, M.
CreepyCoCreator? Investigating AI Representation Modes for 3D Object Co-Creation in Virtual Reality Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071394-1 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Creation, 3D modeling, 3D object, Building process, Co-creation, Co-creative system, Co-creative systems, Creative systems, Creatives, Generative AI, Three dimensional computer graphics, User expectations, User Studies, User study, Virtual Reality, Virtualization
@inproceedings{rasch_creepycocreator_2025,
title = {CreepyCoCreator? Investigating AI Representation Modes for 3D Object Co-Creation in Virtual Reality},
author = {J. Rasch and J. Töws and T. Hirzle and F. Müller and M. Schmitz},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005742763&doi=10.1145%2f3706598.3713720&partnerID=40&md5=e6cdcb6cc7249a8836ecc39ae103cd53},
doi = {10.1145/3706598.3713720},
isbn = {979-840071394-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Generative AI in Virtual Reality offers the potential for collaborative object-building, yet challenges remain in aligning AI contributions with user expectations. In particular, users often struggle to understand and collaborate with AI when its actions are not transparently represented. This paper thus explores the co-creative object-building process through a Wizard-of-Oz study, focusing on how AI can effectively convey its intent to users during object customization in Virtual Reality. Inspired by human-to-human collaboration, we focus on three representation modes: the presence of an embodied avatar, whether the AI's contributions are visualized immediately or incrementally, and whether the areas modified are highlighted in advance. The findings provide insights into how these factors affect user perception and interaction with object-generating AI tools in Virtual Reality as well as satisfaction and ownership of the created objects. The results offer design implications for co-creative world-building systems, aiming to foster more effective and satisfying collaborations between humans and AI in Virtual Reality. © 2025 Copyright held by the owner/author(s).},
keywords = {3D Creation, 3D modeling, 3D object, Building process, Co-creation, Co-creative system, Co-creative systems, Creative systems, Creatives, Generative AI, Three dimensional computer graphics, User expectations, User Studies, User study, Virtual Reality, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Tong, Y.; Qiu, Y.; Li, R.; Qiu, S.; Heng, P. -A.
MS2Mesh-XR: Multi-Modal Sketch-to-Mesh Generation in XR Environments Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 272–276, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: 3D meshes, 3D object, ControlNet, Hand-drawn sketches, Hands movement, High quality, Image-based, immersive visualization, Mesh generation, Multi-modal, Pipeline codes, Realistic images, Three dimensional computer graphics, Virtual environments, Virtual Reality
@inproceedings{tong_ms2mesh-xr_2025,
title = {MS2Mesh-XR: Multi-Modal Sketch-to-Mesh Generation in XR Environments},
author = {Y. Tong and Y. Qiu and R. Li and S. Qiu and P. -A. Heng},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000423684&doi=10.1109%2fAIxVR63409.2025.00052&partnerID=40&md5=caeace6850dcbdf8c1fa0441b98fa8d9},
doi = {10.1109/AIxVR63409.2025.00052},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {272–276},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {We present MS2Mesh-XR, a novel multimodal sketch-to-mesh generation pipeline that enables users to create realistic 3D objects in extended reality (XR) environments using hand-drawn sketches assisted by voice inputs. In specific, users can intuitively sketch objects using natural hand movements in mid-air within a virtual environment. By integrating voice inputs, we devise ControlNet to infer realistic images based on the drawn sketches and interpreted text prompts. Users can then review and select their preferred image, which is subsequently reconstructed into a detailed 3D mesh using the Convolutional Reconstruction Model. In particular, our proposed pipeline can generate a high-quality 3D mesh in less than 20 seconds, allowing for immersive visualization and manipulation in runtime XR scenes. We demonstrate the practicability of our pipeline through two use cases in XR settings. By leveraging natural user inputs and cutting-edge generative AI capabilities, our approach can significantly facilitate XR-based creative production and enhance user experiences. Our code and demo will be available at: https://yueqiu0911.github.io/MS2Mesh-XR/. © 2025 IEEE.},
keywords = {3D meshes, 3D object, ControlNet, Hand-drawn sketches, Hands movement, High quality, Image-based, immersive visualization, Mesh generation, Multi-modal, Pipeline codes, Realistic images, Three dimensional computer graphics, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Gračanin, D.
From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 150–155, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering
@inproceedings{behravan_voices_2025,
title = {From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality},
author = {M. Behravan and D. Gračanin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005153589&doi=10.1109%2fVRW66409.2025.00038&partnerID=40&md5=b8aaab4e2378cde3595d98d79266d371},
doi = {10.1109/VRW66409.2025.00038},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {150–155},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents Matrix, an advanced AI-powered framework designed for real-time 3D object generation in Augmented Reality (AR) environments. By integrating a cutting-edge text-to-3D generative AI model, multilingual speech-to-text translation, and large language models (LLMs), the system enables seamless user interactions through spoken commands. The framework processes speech inputs, generates 3D objects, and provides object recommendations based on contextual understanding, enhancing AR experiences. A key feature of this framework is its ability to optimize 3D models by reducing mesh complexity, resulting in significantly smaller file sizes and faster processing on resource-constrained AR devices. Our approach addresses the challenges of high GPU usage, large model output sizes, and real-time system responsiveness, ensuring a smoother user experience. Moreover, the system is equipped with a pre-generated object repository, further reducing GPU load and improving efficiency. We demonstrate the practical applications of this framework in various fields such as education, design, and accessibility, and discuss future enhancements including image-to-3D conversion, environmental object detection, and multimodal support. The open-source nature of the framework promotes ongoing innovation and its utility across diverse industries. © 2025 IEEE.},
keywords = {3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Matković, K.; Gračanin, D.
Generative AI for Context-Aware 3D Object Creation Using Vision-Language Models in Augmented Reality Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 73–81, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: 3D object, 3D Object Generation, Artificial intelligence systems, Augmented Reality, Capture images, Context-Aware, Generative adversarial networks, Generative AI, generative artificial intelligence, Generative model, Language Model, Object creation, Vision language model, vision language models, Visual languages
@inproceedings{behravan_generative_2025,
title = {Generative AI for Context-Aware 3D Object Creation Using Vision-Language Models in Augmented Reality},
author = {M. Behravan and K. Matković and D. Gračanin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000292700&doi=10.1109%2fAIxVR63409.2025.00018&partnerID=40&md5=b40fa769a6b427918c3fcd86f7c52a75},
doi = {10.1109/AIxVR63409.2025.00018},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {73–81},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {We present a novel Artificial Intelligence (AI) system that functions as a designer assistant in augmented reality (AR) environments. Leveraging Vision Language Models (VLMs) like LLaVA and advanced text-to-3D generative models, users can capture images of their surroundings with an Augmented Reality (AR) headset. The system analyzes these images to recommend contextually relevant objects that enhance both functionality and visual appeal. The recommended objects are generated as 3D models and seamlessly integrated into the AR environment for interactive use. Our system utilizes open-source AI models running on local systems to enhance data security and reduce operational costs. Key features include context-aware object suggestions, optimal placement guidance, aesthetic matching, and an intuitive user interface for real-time interaction. Evaluations using the COCO 2017 dataset and real-world AR testing demonstrated high accuracy in object detection and contextual fit rating of 4.1 out of 5. By addressing the challenge of providing context-aware object recommendations in AR, our system expands the capabilities of AI applications in this domain. It enables users to create personalized digital spaces efficiently, leveraging AI for contextually relevant suggestions. © 2025 IEEE.},
keywords = {3D object, 3D Object Generation, Artificial intelligence systems, Augmented Reality, Capture images, Context-Aware, Generative adversarial networks, Generative AI, generative artificial intelligence, Generative model, Language Model, Object creation, Vision language model, vision language models, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Lee, S.; Park, W.; Lee, K.
Building Knowledge Base of 3D Object Assets Using Multimodal LLM AI Model Proceedings Article
In: Int. Conf. ICT Convergence, pp. 416–418, IEEE Computer Society, 2024, ISBN: 21621233 (ISSN); 979-835036463-7 (ISBN).
Abstract | Links | BibTeX | Tags: 3D object, Asset management, Content services, Exponentials, Information Management, Knowledge Base, Language Model, Large language model, LLM, Multi-modal, Multi-Modal AI, Reusability, Visual effects, XR
@inproceedings{lee_building_2024,
title = {Building Knowledge Base of 3D Object Assets Using Multimodal LLM AI Model},
author = {S. Lee and W. Park and K. Lee},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85217636269&doi=10.1109%2fICTC62082.2024.10827434&partnerID=40&md5=581ee8ca50eb3dae15dc9675971cf428},
doi = {10.1109/ICTC62082.2024.10827434},
isbn = {21621233 (ISSN); 979-835036463-7 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Int. Conf. ICT Convergence},
pages = {416–418},
publisher = {IEEE Computer Society},
abstract = {The proliferation of various XR (eXtended Reality) services and the increasing incorporation of visual effects into existing content services have led to an exponential rise in the demand for 3D object assets. This paper describes an LLM (Large Language Model)-based multimodal AI model pipeline that can be applied to a generative AI model for creating new 3D objects or restructuring the asset management system to enhance the reusability of existing 3D objects. By leveraging a multimodal AI model, we derived descriptive text for assets such as 3D object, 2D image at a human-perceptible level, rather than mere data, and subsequently used an LLM to generate knowledge triplets for constructing an asset knowledge base. The applicability of this pipeline was verified using actual 3D objects from a content production company. Future work will focus on improving the quality of the generated knowledge triplets themselves by training the multimodal AI model with real-world content usage assets. © 2024 IEEE.},
keywords = {3D object, Asset management, Content services, Exponentials, Information Management, Knowledge Base, Language Model, Large language model, LLM, Multi-modal, Multi-Modal AI, Reusability, Visual effects, XR},
pubstate = {published},
tppubtype = {inproceedings}
}
Behravan, M.; Gracanin, D.
Generative Multi-Modal Artificial Intelligence for Dynamic Real-Time Context-Aware Content Creation in Augmented Reality Proceedings Article
In: S.N., Spencer (Ed.): Proc. ACM Symp. Virtual Reality Softw. Technol. VRST, Association for Computing Machinery, 2024, ISBN: 979-840070535-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D object, 3D Object Generation, Augmented Reality, Content creation, Context-Aware, Generative adversarial networks, Generative AI, generative artificial intelligence, Language Model, Multi-modal, Real- time, Time contexts, Vision language model, vision language models, Visual languages
@inproceedings{behravan_generative_2024,
title = {Generative Multi-Modal Artificial Intelligence for Dynamic Real-Time Context-Aware Content Creation in Augmented Reality},
author = {M. Behravan and D. Gracanin},
editor = {Spencer S.N.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85212524068&doi=10.1145%2f3641825.3689685&partnerID=40&md5=daf8aa8960d9dd4dbdbf67ccb1e7fb83},
doi = {10.1145/3641825.3689685},
isbn = {979-840070535-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. ACM Symp. Virtual Reality Softw. Technol. VRST},
publisher = {Association for Computing Machinery},
abstract = {We introduce a framework that uses generative Artificial Intelligence (AI) for dynamic and context-aware content creation in Augmented Reality (AR). By integrating Vision Language Models (VLMs), our system detects and understands the physical space around the user, recommending contextually relevant objects. These objects are transformed into 3D models using a text-to-3D generative AI techniques, allowing for real-time content inclusion within the AR space. This approach enhances user experience by enabling intuitive customization through spoken commands, while reducing costs and improving accessibility to advanced AR interactions. The framework's vision and language capabilities support the generation of comprehensive and context-specific 3D objects. © 2024 Owner/Author.},
keywords = {3D object, 3D Object Generation, Augmented Reality, Content creation, Context-Aware, Generative adversarial networks, Generative AI, generative artificial intelligence, Language Model, Multi-modal, Real- time, Time contexts, Vision language model, vision language models, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Chamola, V.; Bansal, G.; Das, T. K.; Hassija, V.; Sai, S.; Wang, J.; Zeadally, S.; Hussain, A.; Yu, F. R.; Guizani, M.; Niyato, D.
Beyond Reality: The Pivotal Role of Generative AI in the Metaverse Journal Article
In: IEEE Internet of Things Magazine, vol. 7, no. 4, pp. 126–135, 2024, ISSN: 25763180 (ISSN).
Abstract | Links | BibTeX | Tags: ]+ catalyst, 3D object, Diffusion, Generative adversarial networks, Generative model, Image objects, Immersive, Interconnected network, Metaverses, Physical reality, Video objects, Virtual landscapes, Virtual Reality
@article{chamola_beyond_2024,
title = {Beyond Reality: The Pivotal Role of Generative AI in the Metaverse},
author = {V. Chamola and G. Bansal and T. K. Das and V. Hassija and S. Sai and J. Wang and S. Zeadally and A. Hussain and F. R. Yu and M. Guizani and D. Niyato},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85198004913&doi=10.1109%2fIOTM.001.2300174&partnerID=40&md5=03c679195e42e677de596d7a38df0333},
doi = {10.1109/IOTM.001.2300174},
issn = {25763180 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {IEEE Internet of Things Magazine},
volume = {7},
number = {4},
pages = {126–135},
abstract = {The Metaverse, an interconnected network of immersive digital realms, is poised to reshape the future by seamlessly merging physical reality with virtual environments. Its potential to revolutionize diverse aspects of human existence, from entertainment to commerce, underscores its significance. At the heart of this transformation lies Generative AI, a branch of artificial intelligence focused on creating novel content. Generative AI serves as a catalyst, propelling the Metaverse's evolution by enhancing it with immersive experiences. The Metaverse is comprised of three pivotal domains, namely, text, visual, and audio. The Metaverse's fabric intertwines with Generative AI models, ushering in innovative interactions. Within Visual, the triad of image, video, and 3D Object generation sets the stage for engaging virtual landscapes. Key to this evolution is five generative models: Transformers, Diffusion, Autoencoders, Autoregressive, and Generative Adversarial Networks (GANs). These models empower the Metaverse, enhancing it with dynamic and diverse content. Notably, technologies like BARD, Point-E, Stable Diffusion, DALL-E, GPT, and AIVA, among others, wield these models to enrich the Metaverse across domains. By discussing the technical issues and real-world applications, this study reveals the intricate tapestry of AI's role in the Metaverse. Anchoring these insights is a case study illuminating Stable Diffusion's role in metamorphosing the virtual realm. Collectively, this exploration illuminates the symbiotic relationship between Generative AI and the Metaverse, foreshadowing a future where immersive, interactive, and personalized experiences blackefine human engagement with digital landscapes. © 2018 IEEE.},
keywords = {]+ catalyst, 3D object, Diffusion, Generative adversarial networks, Generative model, Image objects, Immersive, Interconnected network, Metaverses, Physical reality, Video objects, Virtual landscapes, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2023
Kouzelis, L. R.; Spantidi, O.
Synthesizing Play-Ready VR Scenes with Natural Language Prompts Through GPT API Proceedings Article
In: G., Bebis; G., Ghiasi; Y., Fang; A., Sharf; Y., Dong; C., Weaver; Z., Leo; J.J., LaViola Jr.; L., Kohli (Ed.): Lect. Notes Comput. Sci., pp. 15–26, Springer Science and Business Media Deutschland GmbH, 2023, ISBN: 03029743 (ISSN); 978-303147965-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3-d designs, 3D object, 3D scenes, AI-driven 3D Design, Language Model, Natural languages, Novel methodology, Scene Generation, Three dimensional computer graphics, Unity3d, Virtual Reality, Visual computing
@inproceedings{kouzelis_synthesizing_2023,
title = {Synthesizing Play-Ready VR Scenes with Natural Language Prompts Through GPT API},
author = {L. R. Kouzelis and O. Spantidi},
editor = {Bebis G. and Ghiasi G. and Fang Y. and Sharf A. and Dong Y. and Weaver C. and Leo Z. and LaViola Jr. J.J. and Kohli L.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85180626887&doi=10.1007%2f978-3-031-47966-3_2&partnerID=40&md5=d15c3e2f3260e2a68bdca91c29df7bbb},
doi = {10.1007/978-3-031-47966-3_2},
isbn = {03029743 (ISSN); 978-303147965-6 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {14362},
pages = {15–26},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {In visual computing, 3D scene generation stands as a crucial component, offering applications in various fields such as gaming, virtual reality (VR), and architectural visualization. Creating realistic and versatile virtual environments, however, poses significant challenges. This work presents a novel methodology that leverages the capabilities of a widely adopted large language model (LLM) to address these challenges. Our approach utilizes the GPT API to interpret natural language prompts and generate detailed, VR-ready scenes within Unity3D. Our work is also inherently scalable, since the model accepts any database of 3D objects with minimal prior configuration. The effectiveness of the proposed system is demonstrated through a series of case studies, revealing its potential to generate diverse and functional virtual spaces. © 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.},
keywords = {3-d designs, 3D object, 3D scenes, AI-driven 3D Design, Language Model, Natural languages, Novel methodology, Scene Generation, Three dimensional computer graphics, Unity3d, Virtual Reality, Visual computing},
pubstate = {published},
tppubtype = {inproceedings}
}
Lee, S.; Lee, H.; Lee, K.
Knowledge Generation Pipeline using LLM for Building 3D Object Knowledge Base Proceedings Article
In: Int. Conf. ICT Convergence, pp. 1303–1305, IEEE Computer Society, 2023, ISBN: 21621233 (ISSN); 979-835031327-7 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3D object, 3d-modeling, Augmented Reality, Data Mining, Knowledge Base, Knowledge based systems, Knowledge generations, Language Model, Metaverse, Metaverses, Multi-modal, MultiModal AI, Multimodal artificial intelligence, Pipelines, Virtual Reality, XR
@inproceedings{lee_knowledge_2023,
title = {Knowledge Generation Pipeline using LLM for Building 3D Object Knowledge Base},
author = {S. Lee and H. Lee and K. Lee},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85184593202&doi=10.1109%2fICTC58733.2023.10392933&partnerID=40&md5=b877638607a04e5a31a2d5723af6e11b},
doi = {10.1109/ICTC58733.2023.10392933},
isbn = {21621233 (ISSN); 979-835031327-7 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Int. Conf. ICT Convergence},
pages = {1303–1305},
publisher = {IEEE Computer Society},
abstract = {With the wide spread of XR(eXtended Reality) contents such as Metaverse and VR(Virtual Reality) / AR(Augmented Reality), the utilization and importance of 3D objects are increasing. In this paper, we describe a knowledge generation pipeline of 3D object for reuse of existing 3D objects and production of new 3D object using generative AI(Artificial Intelligence). 3D object knowledge includes not only the object itself data that are generated in object editing phase but the information for human to recognize and understand objects. The target 3D model for building knowledge is the space model of office for business Metaverse service and the model of objects composing the space. LLM(Large Language Model)-based multimodal AI was used to extract knowledge from 3D model in a systematic and automated way. We plan to expand the pipeline to utilize knowledge base for managing extracted knowledge and correcting errors occurred during the LLM process for the knowledge extraction. © 2023 IEEE.},
keywords = {3D modeling, 3D models, 3D object, 3d-modeling, Augmented Reality, Data Mining, Knowledge Base, Knowledge based systems, Knowledge generations, Language Model, Metaverse, Metaverses, Multi-modal, MultiModal AI, Multimodal artificial intelligence, Pipelines, Virtual Reality, XR},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Wong, S. M.; Chen, C. -W.; Pan, T. -Y.; Chu, H. -K.; Hu, M. -C.
GetWild: A VR Editing System with AI-Generated 3D Object and Terrain Proceedings Article
In: MM - Proc. ACM Int. Conf. Multimed., pp. 6988–6990, Association for Computing Machinery, Inc, 2022, ISBN: 978-145039203-7 (ISBN).
Abstract | Links | BibTeX | Tags: 3-D environments, 3-d terrains, 3D modeling, 3D models, 3D object, 3d-modeling, Editing systems, Landforms, Modeling softwares, Object generation, terrain generation, Terrain generations, Virtual Reality, Vr editing
@inproceedings{wong_getwild_2022,
title = {GetWild: A VR Editing System with AI-Generated 3D Object and Terrain},
author = {S. M. Wong and C. -W. Chen and T. -Y. Pan and H. -K. Chu and M. -C. Hu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85151159899&doi=10.1145%2f3503161.3547733&partnerID=40&md5=668c107b586a77f7ef9bfde37d4dfb9f},
doi = {10.1145/3503161.3547733},
isbn = {978-145039203-7 (ISBN)},
year = {2022},
date = {2022-01-01},
booktitle = {MM - Proc. ACM Int. Conf. Multimed.},
pages = {6988–6990},
publisher = {Association for Computing Machinery, Inc},
abstract = {3D environment artists typically use 2D screens and 3D modeling software to achieve their creation. However, creating 3D content using 2D tools is counterintuitive. Moreover, the process would be inefficient for junior artists in the absence of a reference. We develop a system called GetWild, which employs artificial intelligence (AI) models to generate the prototype of 3D objects/terrain and allows users to further edit the generated content in the virtual space. With the aid of AI, the user can capture an image to obtain a rough 3D object model, or start with drawing simple sketches representing the river, the mountain peak and the mountain ridge to create a 3D terrain prototype. Further, the virtual reality (VR) technique is used to provide an immersive design environment and intuitive interaction (such as painting, sculpturing, coloring, and transformation) for users to edit the generated prototypes. Compared with the existing 3D modeling software and systems, the proposed VR editing system with AI-generated 3D objects/terrain provides a more efficient way for the user to create virtual artwork. © 2022 Owner/Author.},
keywords = {3-D environments, 3-d terrains, 3D modeling, 3D models, 3D object, 3d-modeling, Editing systems, Landforms, Modeling softwares, Object generation, terrain generation, Terrain generations, Virtual Reality, Vr editing},
pubstate = {published},
tppubtype = {inproceedings}
}