AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Zhang, G.; Wang, Y.; Luo, C.; Xu, S.; Ming, Y.; Peng, J.; Zhang, M.
Visual Harmony: LLM’s Power in Crafting Coherent Indoor Scenes from Images Proceedings Article
In: Z., Lin; H., Zha; M.-M., Cheng; R., He; C.-L., Liu; K., Ubul; W., Silamu; J., Zhou (Ed.): Lect. Notes Comput. Sci., pp. 3–17, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-981978507-0 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Depth perception, Indoor scene generation, Input image, Language Model, Large language model, Metaverses, Point-clouds, Power, Scene completion, Scene Generation, Scene-graphs, Semantic Segmentation, Semantics, Virtual Reality, Visual languages
@inproceedings{zhang_visual_2025,
title = {Visual Harmony: LLM’s Power in Crafting Coherent Indoor Scenes from Images},
author = {G. Zhang and Y. Wang and C. Luo and S. Xu and Y. Ming and J. Peng and M. Zhang},
editor = {Lin Z. and Zha H. and Cheng M.-M. and He R. and Liu C.-L. and Ubul K. and Silamu W. and Zhou J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85209374797&doi=10.1007%2f978-981-97-8508-7_1&partnerID=40&md5=5231ab0bce95fb3f09db80392acd58ff},
doi = {10.1007/978-981-97-8508-7_1},
isbn = {03029743 (ISSN); 978-981978507-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15036 LNCS},
pages = {3–17},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Indoor scene generation has recently attracted significant attention as it is crucial for metaverse, 3D animation, visual effects in movies, and virtual/augmented reality. Existing image-based indoor scene generation methods often produce scenes that are not realistic enough, with issues such as floating objects, incorrect object orientations, and incomplete scenes that only include the part of the scenes captured by the input image. To address these challenges, we propose Visual Harmony, a method that leverages the powerful spatial imagination capabilities of Large Language Model (LLM) to generate corresponding indoor scenes based on the input image. Specifically, we first extract information from the input image through depth estimation and panorama segmentation, reconstructing a semantic point cloud. Using this reconstructed semantic point cloud, we extract a scene graph that describes only the objects in the image. Then we leverage the strong spatial imagination capabilities of LLM to complete the scene graph, forming a representation of a complete room scene. Based on this fine scene graph, we can generate entire indoor scene that includes both the captured and not captured parts of the input image. Extensive experiments demonstrate that our method can generate realistic, plausible, and highly relevant complete indoor scenes related to the input image. © The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.},
keywords = {Augmented Reality, Depth perception, Indoor scene generation, Input image, Language Model, Large language model, Metaverses, Point-clouds, Power, Scene completion, Scene Generation, Scene-graphs, Semantic Segmentation, Semantics, Virtual Reality, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Liebers, C.; Pfützenreuter, N.; Auda, J.; Gruenefeld, U.; Schneegass, S.
"computer, Generate!" - Investigating User-Controlled Generation of Immersive Virtual Environments Proceedings Article
In: F., Lorig; J., Tucker; A.D., Lindstrom; F., Dignum; P., Murukannaiah; A., Theodorou; P., Yolum (Ed.): Front. Artif. Intell. Appl., pp. 213–227, IOS Press BV, 2024, ISBN: 09226389 (ISSN); 978-164368522-9 (ISBN).
Abstract | Links | BibTeX | Tags: All-at-once, Controllers, Generative AI, Human-controled scene generation, Human-Controlled Scene Generation, Immersive, Immersive Virtual Environments, In-control, Process control, Scene Generation, Three-level, User study, User-centred, Virtual Reality
@inproceedings{liebers_computer_2024,
title = {"computer, Generate!" - Investigating User-Controlled Generation of Immersive Virtual Environments},
author = {C. Liebers and N. Pfützenreuter and J. Auda and U. Gruenefeld and S. Schneegass},
editor = {Lorig F. and Tucker J. and Lindstrom A.D. and Dignum F. and Murukannaiah P. and Theodorou A. and Yolum P.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85198740032&doi=10.3233%2fFAIA240196&partnerID=40&md5=215c47e3c831cbb44e5dc10604cda8af},
doi = {10.3233/FAIA240196},
isbn = {09226389 (ISSN); 978-164368522-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Front. Artif. Intell. Appl.},
volume = {386},
pages = {213–227},
publisher = {IOS Press BV},
abstract = {For immersive experiences such as virtual reality, explorable worlds are often fundamental. Generative artificial intelligence looks promising to accelerate the creation of such environments. However, it remains unclear how existing interaction modalities can support user-centered world generation and how users remain in control of the process. Thus, in this paper, we present a virtual reality application to generate virtual environments and compare three common interaction modalities (voice, controller, and hands) in a pre-study (N = 18), revealing a combination of initial voice input and continued controller manipulation as best suitable. We then investigate three levels of process control (all-at-once, creation-before-manipulation, and step-by-step) in a user study (N = 27). Our results show that although all-at-once reduced the number of object manipulations, participants felt more in control when using the step-by-step approach. © 2024 The Authors.},
keywords = {All-at-once, Controllers, Generative AI, Human-controled scene generation, Human-Controlled Scene Generation, Immersive, Immersive Virtual Environments, In-control, Process control, Scene Generation, Three-level, User study, User-centred, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Kouzelis, L. R.; Spantidi, O.
Synthesizing Play-Ready VR Scenes with Natural Language Prompts Through GPT API Proceedings Article
In: G., Bebis; G., Ghiasi; Y., Fang; A., Sharf; Y., Dong; C., Weaver; Z., Leo; J.J., LaViola Jr.; L., Kohli (Ed.): Lect. Notes Comput. Sci., pp. 15–26, Springer Science and Business Media Deutschland GmbH, 2023, ISBN: 03029743 (ISSN); 978-303147965-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3-d designs, 3D object, 3D scenes, AI-driven 3D Design, Language Model, Natural languages, Novel methodology, Scene Generation, Three dimensional computer graphics, Unity3d, Virtual Reality, Visual computing
@inproceedings{kouzelis_synthesizing_2023,
title = {Synthesizing Play-Ready VR Scenes with Natural Language Prompts Through GPT API},
author = {L. R. Kouzelis and O. Spantidi},
editor = {Bebis G. and Ghiasi G. and Fang Y. and Sharf A. and Dong Y. and Weaver C. and Leo Z. and LaViola Jr. J.J. and Kohli L.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85180626887&doi=10.1007%2f978-3-031-47966-3_2&partnerID=40&md5=d15c3e2f3260e2a68bdca91c29df7bbb},
doi = {10.1007/978-3-031-47966-3_2},
isbn = {03029743 (ISSN); 978-303147965-6 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {14362},
pages = {15–26},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {In visual computing, 3D scene generation stands as a crucial component, offering applications in various fields such as gaming, virtual reality (VR), and architectural visualization. Creating realistic and versatile virtual environments, however, poses significant challenges. This work presents a novel methodology that leverages the capabilities of a widely adopted large language model (LLM) to address these challenges. Our approach utilizes the GPT API to interpret natural language prompts and generate detailed, VR-ready scenes within Unity3D. Our work is also inherently scalable, since the model accepts any database of 3D objects with minimal prior configuration. The effectiveness of the proposed system is demonstrated through a series of case studies, revealing its potential to generate diverse and functional virtual spaces. © 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.},
keywords = {3-d designs, 3D object, 3D scenes, AI-driven 3D Design, Language Model, Natural languages, Novel methodology, Scene Generation, Three dimensional computer graphics, Unity3d, Virtual Reality, Visual computing},
pubstate = {published},
tppubtype = {inproceedings}
}