AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Zhang, G.; Wang, Y.; Luo, C.; Xu, S.; Ming, Y.; Peng, J.; Zhang, M.
Visual Harmony: LLM’s Power in Crafting Coherent Indoor Scenes from Images Proceedings Article
In: Z., Lin; H., Zha; M.-M., Cheng; R., He; C.-L., Liu; K., Ubul; W., Silamu; J., Zhou (Ed.): Lect. Notes Comput. Sci., pp. 3–17, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-981978507-0 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Depth perception, Indoor scene generation, Input image, Language Model, Large language model, Metaverses, Point-clouds, Power, Scene completion, Scene Generation, Scene-graphs, Semantic Segmentation, Semantics, Virtual Reality, Visual languages
@inproceedings{zhang_visual_2025,
title = {Visual Harmony: LLM’s Power in Crafting Coherent Indoor Scenes from Images},
author = {G. Zhang and Y. Wang and C. Luo and S. Xu and Y. Ming and J. Peng and M. Zhang},
editor = {Lin Z. and Zha H. and Cheng M.-M. and He R. and Liu C.-L. and Ubul K. and Silamu W. and Zhou J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85209374797&doi=10.1007%2f978-981-97-8508-7_1&partnerID=40&md5=5231ab0bce95fb3f09db80392acd58ff},
doi = {10.1007/978-981-97-8508-7_1},
isbn = {03029743 (ISSN); 978-981978507-0 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15036 LNCS},
pages = {3–17},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Indoor scene generation has recently attracted significant attention as it is crucial for metaverse, 3D animation, visual effects in movies, and virtual/augmented reality. Existing image-based indoor scene generation methods often produce scenes that are not realistic enough, with issues such as floating objects, incorrect object orientations, and incomplete scenes that only include the part of the scenes captured by the input image. To address these challenges, we propose Visual Harmony, a method that leverages the powerful spatial imagination capabilities of Large Language Model (LLM) to generate corresponding indoor scenes based on the input image. Specifically, we first extract information from the input image through depth estimation and panorama segmentation, reconstructing a semantic point cloud. Using this reconstructed semantic point cloud, we extract a scene graph that describes only the objects in the image. Then we leverage the strong spatial imagination capabilities of LLM to complete the scene graph, forming a representation of a complete room scene. Based on this fine scene graph, we can generate entire indoor scene that includes both the captured and not captured parts of the input image. Extensive experiments demonstrate that our method can generate realistic, plausible, and highly relevant complete indoor scenes related to the input image. © The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.},
keywords = {Augmented Reality, Depth perception, Indoor scene generation, Input image, Language Model, Large language model, Metaverses, Point-clouds, Power, Scene completion, Scene Generation, Scene-graphs, Semantic Segmentation, Semantics, Virtual Reality, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Xi, Z.; Yao, Z.; Huang, J.; Lu, Z. -Q.; Yan, H.; Mu, T. -J.; Wang, Z.; Xu, Q. -C.
TerraCraft: City-scale generative procedural modeling with natural languages Journal Article
In: Graphical Models, vol. 141, 2025, ISSN: 15240703 (ISSN), (Publisher: Elsevier Inc.).
Abstract | Links | BibTeX | Tags: 3D scene generation, 3D scenes, algorithm, Automation, City layout, City scale, data set, Diffusion Model, Game design, Geometry, High quality, Language, Language Model, Large datasets, Large language model, LLMs, Modeling languages, Natural language processing systems, Procedural modeling, Procedural models, Scene Generation, Three dimensional computer graphics, three-dimensional modeling, urban area, Virtual Reality
@article{xi_terracraft_2025,
title = {TerraCraft: City-scale generative procedural modeling with natural languages},
author = {Z. Xi and Z. Yao and J. Huang and Z. -Q. Lu and H. Yan and T. -J. Mu and Z. Wang and Q. -C. Xu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105012397682&doi=10.1016%2Fj.gmod.2025.101285&partnerID=40&md5=15a84050280e5015b1f7b1ef40c62100},
doi = {10.1016/j.gmod.2025.101285},
issn = {15240703 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Graphical Models},
volume = {141},
abstract = {Automated generation of large-scale 3D scenes presents a significant challenge due to the resource-intensive training and datasets required. This is in sharp contrast to the 2D counterparts that have become readily available due to their superior speed and quality. However, prior work in 3D procedural modeling has demonstrated promise in generating high-quality assets using the combination of algorithms and user-defined rules. To leverage the best of both 2D generative models and procedural modeling tools, we present TerraCraft, a novel framework for generating geometrically high-quality 3D city-scale scenes. By utilizing Large Language Models (LLMs), TerraCraft can generate city-scale 3D scenes from natural text descriptions. With its intuitive operation and powerful capabilities, TerraCraft enables users to easily create geometrically high-quality scenes readily for various applications, such as virtual reality and game design. We validate TerraCraft's effectiveness through extensive experiments and user studies, showing its superior performance compared to existing baselines. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Elsevier Inc.},
keywords = {3D scene generation, 3D scenes, algorithm, Automation, City layout, City scale, data set, Diffusion Model, Game design, Geometry, High quality, Language, Language Model, Large datasets, Large language model, LLMs, Modeling languages, Natural language processing systems, Procedural modeling, Procedural models, Scene Generation, Three dimensional computer graphics, three-dimensional modeling, urban area, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Liebers, C.; Pfützenreuter, N.; Auda, J.; Gruenefeld, U.; Schneegass, S.
"computer, Generate!" - Investigating User-Controlled Generation of Immersive Virtual Environments Proceedings Article
In: F., Lorig; J., Tucker; A.D., Lindstrom; F., Dignum; P., Murukannaiah; A., Theodorou; P., Yolum (Ed.): Front. Artif. Intell. Appl., pp. 213–227, IOS Press BV, 2024, ISBN: 09226389 (ISSN); 978-164368522-9 (ISBN).
Abstract | Links | BibTeX | Tags: All-at-once, Controllers, Generative AI, Human-controled scene generation, Human-Controlled Scene Generation, Immersive, Immersive Virtual Environments, In-control, Process control, Scene Generation, Three-level, User study, User-centred, Virtual Reality
@inproceedings{liebers_computer_2024,
title = {"computer, Generate!" - Investigating User-Controlled Generation of Immersive Virtual Environments},
author = {C. Liebers and N. Pfützenreuter and J. Auda and U. Gruenefeld and S. Schneegass},
editor = {Lorig F. and Tucker J. and Lindstrom A.D. and Dignum F. and Murukannaiah P. and Theodorou A. and Yolum P.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85198740032&doi=10.3233%2fFAIA240196&partnerID=40&md5=215c47e3c831cbb44e5dc10604cda8af},
doi = {10.3233/FAIA240196},
isbn = {09226389 (ISSN); 978-164368522-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Front. Artif. Intell. Appl.},
volume = {386},
pages = {213–227},
publisher = {IOS Press BV},
abstract = {For immersive experiences such as virtual reality, explorable worlds are often fundamental. Generative artificial intelligence looks promising to accelerate the creation of such environments. However, it remains unclear how existing interaction modalities can support user-centered world generation and how users remain in control of the process. Thus, in this paper, we present a virtual reality application to generate virtual environments and compare three common interaction modalities (voice, controller, and hands) in a pre-study (N = 18), revealing a combination of initial voice input and continued controller manipulation as best suitable. We then investigate three levels of process control (all-at-once, creation-before-manipulation, and step-by-step) in a user study (N = 27). Our results show that although all-at-once reduced the number of object manipulations, participants felt more in control when using the step-by-step approach. © 2024 The Authors.},
keywords = {All-at-once, Controllers, Generative AI, Human-controled scene generation, Human-Controlled Scene Generation, Immersive, Immersive Virtual Environments, In-control, Process control, Scene Generation, Three-level, User study, User-centred, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Kouzelis, L. R.; Spantidi, O.
Synthesizing Play-Ready VR Scenes with Natural Language Prompts Through GPT API Proceedings Article
In: G., Bebis; G., Ghiasi; Y., Fang; A., Sharf; Y., Dong; C., Weaver; Z., Leo; J.J., LaViola Jr.; L., Kohli (Ed.): Lect. Notes Comput. Sci., pp. 15–26, Springer Science and Business Media Deutschland GmbH, 2023, ISBN: 03029743 (ISSN); 978-303147965-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3-d designs, 3D object, 3D scenes, AI-driven 3D Design, Language Model, Natural languages, Novel methodology, Scene Generation, Three dimensional computer graphics, Unity3d, Virtual Reality, Visual computing
@inproceedings{kouzelis_synthesizing_2023,
title = {Synthesizing Play-Ready VR Scenes with Natural Language Prompts Through GPT API},
author = {L. R. Kouzelis and O. Spantidi},
editor = {Bebis G. and Ghiasi G. and Fang Y. and Sharf A. and Dong Y. and Weaver C. and Leo Z. and LaViola Jr. J.J. and Kohli L.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85180626887&doi=10.1007%2f978-3-031-47966-3_2&partnerID=40&md5=d15c3e2f3260e2a68bdca91c29df7bbb},
doi = {10.1007/978-3-031-47966-3_2},
isbn = {03029743 (ISSN); 978-303147965-6 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {14362},
pages = {15–26},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {In visual computing, 3D scene generation stands as a crucial component, offering applications in various fields such as gaming, virtual reality (VR), and architectural visualization. Creating realistic and versatile virtual environments, however, poses significant challenges. This work presents a novel methodology that leverages the capabilities of a widely adopted large language model (LLM) to address these challenges. Our approach utilizes the GPT API to interpret natural language prompts and generate detailed, VR-ready scenes within Unity3D. Our work is also inherently scalable, since the model accepts any database of 3D objects with minimal prior configuration. The effectiveness of the proposed system is demonstrated through a series of case studies, revealing its potential to generate diverse and functional virtual spaces. © 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.},
keywords = {3-d designs, 3D object, 3D scenes, AI-driven 3D Design, Language Model, Natural languages, Novel methodology, Scene Generation, Three dimensional computer graphics, Unity3d, Virtual Reality, Visual computing},
pubstate = {published},
tppubtype = {inproceedings}
}