AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Li, C.; Da, F.
Refined dense face alignment through image matching Journal Article
In: Visual Computer, vol. 41, no. 1, pp. 157–171, 2025, ISSN: 01782789 (ISSN).
Abstract | Links | BibTeX | Tags: 3D Avatars, Alignment, Dense geometric supervision, Face alignment, Face deformations, Face reconstruction, Geometry, Human computer interaction, Image enhancement, Image matching, Image Reconstruction, Metaverses, Outlier mixup, Pixels, Rendered images, Rendering (computer graphics), State of the art, Statistics, Target images, Three dimensional computer graphics
@article{li_refined_2025,
title = {Refined dense face alignment through image matching},
author = {C. Li and F. Da},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187924785&doi=10.1007%2fs00371-024-03316-3&partnerID=40&md5=839834c6ff3320398d5ef75b055947cb},
doi = {10.1007/s00371-024-03316-3},
issn = {01782789 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Visual Computer},
volume = {41},
number = {1},
pages = {157–171},
abstract = {Face alignment is the foundation of building 3D avatars for virtue communication in the metaverse, human-computer interaction, AI-generated content, etc., and therefore, it is critical that face deformation is reflected precisely to better convey expression, pose and identity. However, misalignment exists in the currently best methods that fit a face model to a target image and can be easily captured by human perception, thus degrading the reconstruction quality. The main reason is that the widely used metrics for training, including the landmark re-projection loss, pixel-wise loss and perception-level loss, are insufficient to address the misalignment and suffer from ambiguity and local minimums. To address misalignment, we propose an image MAtchinG-driveN dEnse geomeTrIC supervision (MAGNETIC). Specifically, we treat face alignment as a matching problem and establish pixel-wise correspondences between the target and rendered images. Then reconstructed facial points are guided towards their corresponding points on the target image, thus improving reconstruction. Synthesized image pairs are mixed up with face outliers to simulate the target and rendered images with ground-truth pixel-wise correspondences to enable the training of a robust prediction network. Compared with existing methods that turn to 3D scans for dense geometric supervision, our method reaches comparable shape reconstruction results with much lower effort. Experimental results on the NoW testset show that we reach the state-of-the-art among all self-supervised methods and even outperform methods using photo-realistic images. We also achieve comparable results with the state-of-the-art on the benchmark of Feng et al. Codes will be available at: github.com/ChunLLee/ReconstructionFromMatching. © The Author(s), under exclusive licence to Springer-Verlag GmbH Germany, part of Springer Nature 2024.},
keywords = {3D Avatars, Alignment, Dense geometric supervision, Face alignment, Face deformations, Face reconstruction, Geometry, Human computer interaction, Image enhancement, Image matching, Image Reconstruction, Metaverses, Outlier mixup, Pixels, Rendered images, Rendering (computer graphics), State of the art, Statistics, Target images, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {article}
}
Shen, Y.; Li, B.; Huang, J.; Wang, Z.
GaussianShopVR: Facilitating Immersive 3D Authoring Using Gaussian Splatting in VR Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1292–1293, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D authoring, 3D modeling, Digital replicas, Gaussian distribution, Gaussian Splatting editing, Gaussians, Graphical user interfaces, High quality, Immersive, Immersive environment, Interactive computer graphics, Rendering (computer graphics), Rendering pipelines, Splatting, Three dimensional computer graphics, User profile, Virtual Reality, Virtual reality user interface, Virtualization, VR user interface
@inproceedings{shen_gaussianshopvr_2025,
title = {GaussianShopVR: Facilitating Immersive 3D Authoring Using Gaussian Splatting in VR},
author = {Y. Shen and B. Li and J. Huang and Z. Wang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005138672&doi=10.1109%2fVRW66409.2025.00292&partnerID=40&md5=9b644bd19394a289d3027ab9a2dfed6a},
doi = {10.1109/VRW66409.2025.00292},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1292–1293},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual reality (VR) applications require massive high-quality 3D assets to create immersive environments. Generating mesh-based 3D assets typically involves a significant amount of manpower and effort, which makes VR applications less accessible. 3D Gaussian Splatting (3DGS) has attracted much attention for its ability to quickly create digital replicas of real-life scenes and its compatibility with traditional rendering pipelines. However, it remains a challenge to edit 3DGS in a flexible and controllable manner. We propose GaussianShopVR, a system that leverages VR user interfaces to specify target areas to achieve flexible and controllable editing of reconstructed 3DGS. In addition, selected areas can provide 3D information to generative AI models to facilitate the editing. GaussianShopVR integrates object hierarchy management while keeping the backpropagated gradient flow to allow local editing with context information. © 2025 IEEE.},
keywords = {3D authoring, 3D modeling, Digital replicas, Gaussian distribution, Gaussian Splatting editing, Gaussians, Graphical user interfaces, High quality, Immersive, Immersive environment, Interactive computer graphics, Rendering (computer graphics), Rendering pipelines, Splatting, Three dimensional computer graphics, User profile, Virtual Reality, Virtual reality user interface, Virtualization, VR user interface},
pubstate = {published},
tppubtype = {inproceedings}
}
Mao, H.; Xu, Z.; Wei, S.; Quan, Y.; Deng, N.; Yang, X.
LLM-powered Gaussian Splatting in VR interactions Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1654–1655, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Gaussian Splatting, 3D reconstruction, Content creation, Digital elevation model, Gaussians, High quality, Language Model, material analysis, Materials analysis, Physical simulation, Quality rendering, Rendering (computer graphics), Splatting, Virtual Reality, Volume Rendering, VR systems
@inproceedings{mao_llm-powered_2025,
title = {LLM-powered Gaussian Splatting in VR interactions},
author = {H. Mao and Z. Xu and S. Wei and Y. Quan and N. Deng and X. Yang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005148017&doi=10.1109%2fVRW66409.2025.00472&partnerID=40&md5=ee725f655a37251ff335ad2098d15f22},
doi = {10.1109/VRW66409.2025.00472},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1654–1655},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent advances in radiance field rendering, particularly 3D Gaussian Splatting (3DGS), have demonstrated significant potential for VR content creation, offering both high-quality rendering and an efficient production pipeline. However, current physics-based interaction systems for 3DGS are limited to either simplistic, unrealistic simulations or require substantial user input for complex scenes, largely due to the lack of scene comprehension. In this demonstration, we present a highly realistic interactive VR system powered by large language models (LLMs). After object-aware GS reconstruction, we prompt GPT-4o to analyze the physical properties of objects in the scene, which then guide physical simulations that adhere to real-world phenomena. Additionally, We design a GPT-assisted GS inpainting module to complete the areas occluded by manipulated objects. To facilitate rich interaction, we introduce a computationally efficient physical simulation framework through a PBD-based unified interpolation method, which supports various forms of physical interactions. In our research demonstrations, we reconstruct varieties of scenes enhanced by LLM's understanding, showcasing how our VR system can support complex, realistic interactions without additional manual design or annotation. © 2025 IEEE.},
keywords = {3D Gaussian Splatting, 3D reconstruction, Content creation, Digital elevation model, Gaussians, High quality, Language Model, material analysis, Materials analysis, Physical simulation, Quality rendering, Rendering (computer graphics), Splatting, Virtual Reality, Volume Rendering, VR systems},
pubstate = {published},
tppubtype = {inproceedings}
}
Xing, Y.; Liu, Q.; Wang, J.; Gómez-Zará, D.
sMoRe: Spatial Mapping and Object Rendering Environment Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 115–119, Association for Computing Machinery, 2025, ISBN: 979-840071409-2 (ISBN).
Abstract | Links | BibTeX | Tags: Generative adversarial networks, Generative AI, Language Model, Large language model, large language models, Mapping, Mixed reality, Mixed-reality environment, Object rendering, Rendering (computer graphics), Space Manipulation, Spatial mapping, Spatial objects, Users' experiences, Virtual environments, Virtual objects
@inproceedings{xing_smore_2025,
title = {sMoRe: Spatial Mapping and Object Rendering Environment},
author = {Y. Xing and Q. Liu and J. Wang and D. Gómez-Zará},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001670668&doi=10.1145%2f3708557.3716337&partnerID=40&md5=8ef4c5c4ef2b3ee30d00e4b8d19d19b8},
doi = {10.1145/3708557.3716337},
isbn = {979-840071409-2 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {115–119},
publisher = {Association for Computing Machinery},
abstract = {In mixed reality (MR) environments, understanding space and creating virtual objects is crucial to providing an intuitive user experience. This paper introduces sMoRe (Spatial Mapping and Object Rendering Environment), an MR application that combines Generative AI (GenAI) to assist users in creating, placing, and managing virtual objects within physical spaces. sMoRe allows users to use voice or typed text commands to create and place virtual objects using GenAI while specifying spatial constraints. The system employs Large Language Models (LLMs) to interpret users’ commands, analyze the current scene, and identify optimal locations. Additionally, sMoRe integrates a text-to-3D generative model to dynamically create 3D objects based on users’ descriptions. Our user study demonstrates the effectiveness of sMoRe in enhancing user comprehension, interaction, and organization of the MR environment. © 2025 Copyright held by the owner/author(s).},
keywords = {Generative adversarial networks, Generative AI, Language Model, Large language model, large language models, Mapping, Mixed reality, Mixed-reality environment, Object rendering, Rendering (computer graphics), Space Manipulation, Spatial mapping, Spatial objects, Users' experiences, Virtual environments, Virtual objects},
pubstate = {published},
tppubtype = {inproceedings}
}
Ademola, A.; Sinclair, D.; Koniaris, B.; Hannah, S.; Mitchell, K.
NeFT-Net: N-window extended frequency transformer for rhythmic motion prediction Journal Article
In: Computers and Graphics, vol. 129, 2025, ISSN: 00978493 (ISSN).
Abstract | Links | BibTeX | Tags: Cosine transforms, Discrete cosine transforms, Human motions, Immersive, machine learning, Machine-learning, Motion analysis, Motion prediction, Motion processing, Motion sequences, Motion tracking, Real-world, Rendering, Rendering (computer graphics), Rhythmic motion, Three dimensional computer graphics, Virtual environments, Virtual Reality
@article{ademola_neft-net_2025,
title = {NeFT-Net: N-window extended frequency transformer for rhythmic motion prediction},
author = {A. Ademola and D. Sinclair and B. Koniaris and S. Hannah and K. Mitchell},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105006724723&doi=10.1016%2fj.cag.2025.104244&partnerID=40&md5=08fd0792837332404ec9acdd16f608bf},
doi = {10.1016/j.cag.2025.104244},
issn = {00978493 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Computers and Graphics},
volume = {129},
abstract = {Advancements in prediction of human motion sequences are critical for enabling online virtual reality (VR) users to dance and move in ways that accurately mirror real-world actions, delivering a more immersive and connected experience. However, latency in networked motion tracking remains a significant challenge, disrupting engagement and necessitating predictive solutions to achieve real-time synchronization of remote motions. To address this issue, we propose a novel approach leveraging a synthetically generated dataset based on supervised foot anchor placement timings for rhythmic motions, ensuring periodicity and reducing prediction errors. Our model integrates a discrete cosine transform (DCT) to encode motion, refine high-frequency components, and smooth motion sequences, mitigating jittery artifacts. Additionally, we introduce a feed-forward attention mechanism designed to learn from N-window pairs of 3D key-point pose histories for precise future motion prediction. Quantitative and qualitative evaluations on the Human3.6M dataset highlight significant improvements in mean per joint position error (MPJPE) metrics, demonstrating the superiority of our technique over state-of-the-art approaches. We further introduce novel result pose visualizations through the use of generative AI methods. © 2025 The Authors},
keywords = {Cosine transforms, Discrete cosine transforms, Human motions, Immersive, machine learning, Machine-learning, Motion analysis, Motion prediction, Motion processing, Motion sequences, Motion tracking, Real-world, Rendering, Rendering (computer graphics), Rhythmic motion, Three dimensional computer graphics, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Martini, M.; Valentini, V.; Ciprian, A.; Bottino, A.; Iacoviello, R.; Montagnuolo, M.; Messina, A.; Strada, F.; Zappia, D.
Semi -Automated Digital Human Production for Enhanced Media Broadcasting Proceedings Article
In: IEEE Gaming, Entertain., Media Conf., GEM, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037453-7 (ISBN).
Abstract | Links | BibTeX | Tags: AI automation, Automation, Creation process, Digital humans, Economic and social effects, Extensive explorations, Face reconstruction, Generative AI, Image enhancement, media archive, Media archives, Metaverses, Rendering (computer graphics), Synthetic human, Synthetic Humans, Textures, Three dimensional computer graphics, Virtual production, Virtual Reality
@inproceedings{martini_semi_2024,
title = {Semi -Automated Digital Human Production for Enhanced Media Broadcasting},
author = {M. Martini and V. Valentini and A. Ciprian and A. Bottino and R. Iacoviello and M. Montagnuolo and A. Messina and F. Strada and D. Zappia},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199536742&doi=10.1109%2fGEM61861.2024.10585601&partnerID=40&md5=3703fba931b02f9615316db8ebbca70c},
doi = {10.1109/GEM61861.2024.10585601},
isbn = {979-835037453-7 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE Gaming, Entertain., Media Conf., GEM},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {In recent years, the application of synthetic humans in various fields has attracted considerable attention, leading to extensive exploration of their integration into the Metaverse and virtual production environments. This work presents a semi-automated approach that aims to find a fair trade-off between high-quality outputs and efficient production times. The project focuses on the Rai photo and video archives to find images of target characters for texturing and 3D reconstruction with the goal of reviving Rai's 2D footage and enhance the media experience. A key aspect of this study is to minimize the human intervention, ensuring an efficient, flexible, and scalable creation process. In this work, the improvements have been distributed among different stages of the digital human creation process, starting with the generation of 3D head meshes from 2D images of the reference character and then moving on to the generation, using a Diffusion model, of suitable images for texture development. These assets are then integrated into the Unreal Engine, where a custom widget facilitates posing, rendering, and texturing of Synthetic Humans models. Finally, an in-depth quantitative comparison and subjective tests were carried out between the original character images and the rendered synthetic humans, confirming the validity of the approach. © 2024 IEEE.},
keywords = {AI automation, Automation, Creation process, Digital humans, Economic and social effects, Extensive explorations, Face reconstruction, Generative AI, Image enhancement, media archive, Media archives, Metaverses, Rendering (computer graphics), Synthetic human, Synthetic Humans, Textures, Three dimensional computer graphics, Virtual production, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Vincent, B.; Ayyar, K.
Roblox Generative AI in action Proceedings Article
In: S.N., Spencer (Ed.): Proc. - SIGGRAPH Real-Time Live!, Association for Computing Machinery, Inc, 2023, ISBN: 979-840070158-0 (ISBN).
Abstract | Links | BibTeX | Tags: AI techniques, Complex model, Creation process, Education, Game, Games, Interactive computer graphics, Interactive objects, Lighting, Metaverse, Metaverses, Modeling, Modeling languages, Natural languages, Object and scenes, Pipeline, Real-Time Rendering, Rendering (computer graphics)
@inproceedings{vincent_roblox_2023,
title = {Roblox Generative AI in action},
author = {B. Vincent and K. Ayyar},
editor = {Spencer S.N.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85167946022&doi=10.1145%2f3588430.3597250&partnerID=40&md5=61fda81c33eb3623240f7d14f51607b0},
doi = {10.1145/3588430.3597250},
isbn = {979-840070158-0 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Proc. - SIGGRAPH Real-Time Live!},
publisher = {Association for Computing Machinery, Inc},
abstract = {Roblox is investing in generative AI techniques to revolutionize the creation process on its platform. By leveraging natural language and other intuitive expressions of intent, creators can build interactive objects and scenes without complex modeling or coding. The use of AI image generation services and large language models aim to make creation faster and easier for every user on the platform. © 2023 Owner/Author.},
keywords = {AI techniques, Complex model, Creation process, Education, Game, Games, Interactive computer graphics, Interactive objects, Lighting, Metaverse, Metaverses, Modeling, Modeling languages, Natural languages, Object and scenes, Pipeline, Real-Time Rendering, Rendering (computer graphics)},
pubstate = {published},
tppubtype = {inproceedings}
}