AHCI RESEARCH GROUP

Publications

Papers published in international journals,
proceedings of conferences, workshops and books.

OUR RESEARCH

Scientific Publications

How to

Here you can find the complete list of our publications.
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.

Show all

2025

Chen, J.; Wu, X.; Lan, T.; Li, B.

LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models Journal Article

In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2715–2724, 2025, ISSN: 10772626 (ISSN), (Publisher: IEEE Computer Society).

Abstract | Links | BibTeX | Tags: % reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality

@article{chen_llmer_2025,

title = {LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models},

author = {J. Chen and X. Wu and T. Lan and B. Li},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003825793&doi=10.1109%2FTVCG.2025.3549549&partnerID=40&md5=50597473616678390f143a33082a13d3},

doi = {10.1109/TVCG.2025.3549549},

issn = {10772626 (ISSN)},

year  = {2025},

date = {2025-01-01},

journal = {IEEE Transactions on Visualization and Computer Graphics},

volume = {31},

number = {5},

pages = {2715–2724},

abstract = {The integration of Large Language Models (LLMs) like GPT-4 with Extended Reality (XR) technologies offers the potential to build truly immersive XR environments that interact with human users through natural language, e.g., generating and animating 3D scenes from audio inputs. However, the complexity of XR environments makes it difficult to accurately extract relevant contextual data and scene/object parameters from an overwhelming volume of XR artifacts. It leads to not only increased costs with pay-per-use models, but also elevated levels of generation errors. Moreover, existing approaches focusing on coding script generation are often prone to generation errors, resulting in flawed or invalid scripts, application crashes, and ultimately a degraded user experience. To overcome these challenges, we introduce LLMER, a novel framework that creates interactive XR worlds using JSON data generated by LLMs. Unlike prior approaches focusing on coding script generation, LLMER translates natural language inputs into JSON data, significantly reducing the likelihood of application crashes and processing latency. It employs a multi-stage strategy to supply only the essential contextual information adapted to the user's request and features multiple modules designed for various XR tasks. Our preliminary user study reveals the effectiveness of the proposed system, with over 80% reduction in consumed tokens and around 60% reduction in task completion time compared to state-of-the-art approaches. The analysis of users' feedback also illuminates a series of directions for further optimization. © 2025 Elsevier B.V., All rights reserved.},

note = {Publisher: IEEE Computer Society},

keywords = {% reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality},

pubstate = {published},

tppubtype = {article}

}

Xi, Z.; Yao, Z.; Huang, J.; Lu, Z. -Q.; Yan, H.; Mu, T. -J.; Wang, Z.; Xu, Q. -C.

TerraCraft: City-scale generative procedural modeling with natural languages Journal Article

In: Graphical Models, vol. 141, 2025, ISSN: 15240703 (ISSN), (Publisher: Elsevier Inc.).

Abstract | Links | BibTeX | Tags: 3D scene generation, 3D scenes, algorithm, Automation, City layout, City scale, data set, Diffusion Model, Game design, Geometry, High quality, Language, Language Model, Large datasets, Large language model, LLMs, Modeling languages, Natural language processing systems, Procedural modeling, Procedural models, Scene Generation, Three dimensional computer graphics, three-dimensional modeling, urban area, Virtual Reality

@article{xi_terracraft_2025,

title = {TerraCraft: City-scale generative procedural modeling with natural languages},

author = {Z. Xi and Z. Yao and J. Huang and Z. -Q. Lu and H. Yan and T. -J. Mu and Z. Wang and Q. -C. Xu},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105012397682&doi=10.1016%2Fj.gmod.2025.101285&partnerID=40&md5=15a84050280e5015b1f7b1ef40c62100},

doi = {10.1016/j.gmod.2025.101285},

issn = {15240703 (ISSN)},

year  = {2025},

date = {2025-01-01},

journal = {Graphical Models},

volume = {141},

abstract = {Automated generation of large-scale 3D scenes presents a significant challenge due to the resource-intensive training and datasets required. This is in sharp contrast to the 2D counterparts that have become readily available due to their superior speed and quality. However, prior work in 3D procedural modeling has demonstrated promise in generating high-quality assets using the combination of algorithms and user-defined rules. To leverage the best of both 2D generative models and procedural modeling tools, we present TerraCraft, a novel framework for generating geometrically high-quality 3D city-scale scenes. By utilizing Large Language Models (LLMs), TerraCraft can generate city-scale 3D scenes from natural text descriptions. With its intuitive operation and powerful capabilities, TerraCraft enables users to easily create geometrically high-quality scenes readily for various applications, such as virtual reality and game design. We validate TerraCraft's effectiveness through extensive experiments and user studies, showing its superior performance compared to existing baselines. © 2025 Elsevier B.V., All rights reserved.},

note = {Publisher: Elsevier Inc.},

keywords = {3D scene generation, 3D scenes, algorithm, Automation, City layout, City scale, data set, Diffusion Model, Game design, Geometry, High quality, Language, Language Model, Large datasets, Large language model, LLMs, Modeling languages, Natural language processing systems, Procedural modeling, Procedural models, Scene Generation, Three dimensional computer graphics, three-dimensional modeling, urban area, Virtual Reality},

pubstate = {published},

tppubtype = {article}

}