AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Shen, Y.; Li, B.; Huang, J.; Wang, Z.
GaussianShopVR: Facilitating Immersive 3D Authoring Using Gaussian Splatting in VR Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1292–1293, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331514846 (ISBN).
Abstract | Links | BibTeX | Tags: 3D authoring, 3D modeling, Digital replicas, Gaussian distribution, Gaussian Splatting editing, Gaussians, Graphical user interfaces, High quality, Immersive, Immersive environment, Interactive computer graphics, Rendering (computer graphics), Rendering pipelines, Splatting, Three dimensional computer graphics, User profile, Virtual Reality, Virtual reality user interface, Virtualization, VR user interface
@inproceedings{shen_gaussianshopvr_2025,
title = {GaussianShopVR: Facilitating Immersive 3D Authoring Using Gaussian Splatting in VR},
author = {Y. Shen and B. Li and J. Huang and Z. Wang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005138672&doi=10.1109%2FVRW66409.2025.00292&partnerID=40&md5=2290016d250649f8d7f262212b1f59cb},
doi = {10.1109/VRW66409.2025.00292},
isbn = {9798331514846 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1292–1293},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual reality (VR) applications require massive high-quality 3D assets to create immersive environments. Generating mesh-based 3D assets typically involves a significant amount of manpower and effort, which makes VR applications less accessible. 3D Gaussian Splatting (3DGS) has attracted much attention for its ability to quickly create digital replicas of real-life scenes and its compatibility with traditional rendering pipelines. However, it remains a challenge to edit 3DGS in a flexible and controllable manner. We propose GaussianShopVR, a system that leverages VR user interfaces to specify target areas to achieve flexible and controllable editing of reconstructed 3DGS. In addition, selected areas can provide 3D information to generative AI models to facilitate the editing. GaussianShopVR integrates object hierarchy management while keeping the backpropagated gradient flow to allow local editing with context information. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D authoring, 3D modeling, Digital replicas, Gaussian distribution, Gaussian Splatting editing, Gaussians, Graphical user interfaces, High quality, Immersive, Immersive environment, Interactive computer graphics, Rendering (computer graphics), Rendering pipelines, Splatting, Three dimensional computer graphics, User profile, Virtual Reality, Virtual reality user interface, Virtualization, VR user interface},
pubstate = {published},
tppubtype = {inproceedings}
}
Mao, H.; Xu, Z.; Wei, S.; Quan, Y.; Deng, N.; Yang, X.
LLM-powered Gaussian Splatting in VR interactions Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1654–1655, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331514846 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Gaussian Splatting, 3D reconstruction, Content creation, Digital elevation model, Gaussians, High quality, Language Model, material analysis, Materials analysis, Physical simulation, Quality rendering, Rendering (computer graphics), Splatting, Virtual Reality, Volume Rendering, VR systems
@inproceedings{mao_llm-powered_2025,
title = {LLM-powered Gaussian Splatting in VR interactions},
author = {H. Mao and Z. Xu and S. Wei and Y. Quan and N. Deng and X. Yang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005148017&doi=10.1109%2FVRW66409.2025.00472&partnerID=40&md5=e673f023f1537bd579dcf0685f611adb},
doi = {10.1109/VRW66409.2025.00472},
isbn = {9798331514846 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1654–1655},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent advances in radiance field rendering, particularly 3D Gaussian Splatting (3DGS), have demonstrated significant potential for VR content creation, offering both high-quality rendering and an efficient production pipeline. However, current physics-based interaction systems for 3DGS are limited to either simplistic, unrealistic simulations or require substantial user input for complex scenes, largely due to the lack of scene comprehension. In this demonstration, we present a highly realistic interactive VR system powered by large language models (LLMs). After object-aware GS reconstruction, we prompt GPT-4o to analyze the physical properties of objects in the scene, which then guide physical simulations that adhere to real-world phenomena. Additionally, We design a GPT-assisted GS inpainting module to complete the areas occluded by manipulated objects. To facilitate rich interaction, we introduce a computationally efficient physical simulation framework through a PBD-based unified interpolation method, which supports various forms of physical interactions. In our research demonstrations, we reconstruct varieties of scenes enhanced by LLM's understanding, showcasing how our VR system can support complex, realistic interactions without additional manual design or annotation. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D Gaussian Splatting, 3D reconstruction, Content creation, Digital elevation model, Gaussians, High quality, Language Model, material analysis, Materials analysis, Physical simulation, Quality rendering, Rendering (computer graphics), Splatting, Virtual Reality, Volume Rendering, VR systems},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, C.; Da, F.
Refined dense face alignment through image matching Journal Article
In: Visual Computer, vol. 41, no. 1, pp. 157–171, 2025, ISSN: 01782789 (ISSN); 14322315 (ISSN), (Publisher: Springer Science and Business Media Deutschland GmbH).
Abstract | Links | BibTeX | Tags: 3D Avatars, Alignment, Dense geometric supervision, Face alignment, Face deformations, Face reconstruction, Geometry, Human computer interaction, Image enhancement, Image matching, Image Reconstruction, Metaverses, Outlier mixup, Pixels, Rendered images, Rendering (computer graphics), State of the art, Statistics, Target images, Three dimensional computer graphics
@article{li_refined_2025,
title = {Refined dense face alignment through image matching},
author = {C. Li and F. Da},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187924785&doi=10.1007%2Fs00371-024-03316-3&partnerID=40&md5=2de9f0dbdf9ea162871458c08e711c94},
doi = {10.1007/s00371-024-03316-3},
issn = {01782789 (ISSN); 14322315 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Visual Computer},
volume = {41},
number = {1},
pages = {157–171},
abstract = {Face alignment is the foundation of building 3D avatars for virtue communication in the metaverse, human-computer interaction, AI-generated content, etc., and therefore, it is critical that face deformation is reflected precisely to better convey expression, pose and identity. However, misalignment exists in the currently best methods that fit a face model to a target image and can be easily captured by human perception, thus degrading the reconstruction quality. The main reason is that the widely used metrics for training, including the landmark re-projection loss, pixel-wise loss and perception-level loss, are insufficient to address the misalignment and suffer from ambiguity and local minimums. To address misalignment, we propose an image MAtchinG-driveN dEnse geomeTrIC supervision (MAGNETIC). Specifically, we treat face alignment as a matching problem and establish pixel-wise correspondences between the target and rendered images. Then reconstructed facial points are guided towards their corresponding points on the target image, thus improving reconstruction. Synthesized image pairs are mixed up with face outliers to simulate the target and rendered images with ground-truth pixel-wise correspondences to enable the training of a robust prediction network. Compared with existing methods that turn to 3D scans for dense geometric supervision, our method reaches comparable shape reconstruction results with much lower effort. Experimental results on the NoW testset show that we reach the state-of-the-art among all self-supervised methods and even outperform methods using photo-realistic images. We also achieve comparable results with the state-of-the-art on the benchmark of Feng et al. Codes will be available at: github.com/ChunLLee/ReconstructionFromMatching. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Springer Science and Business Media Deutschland GmbH},
keywords = {3D Avatars, Alignment, Dense geometric supervision, Face alignment, Face deformations, Face reconstruction, Geometry, Human computer interaction, Image enhancement, Image matching, Image Reconstruction, Metaverses, Outlier mixup, Pixels, Rendered images, Rendering (computer graphics), State of the art, Statistics, Target images, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {article}
}
Xing, Y.; Liu, Q.; Wang, J.; Gómez-Zará, D.
sMoRe: Spatial Mapping and Object Rendering Environment Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 115–119, Association for Computing Machinery, 2025, ISBN: 9798400714092 (ISBN).
Abstract | Links | BibTeX | Tags: Generative adversarial networks, Generative AI, Language Model, Large language model, large language models, Mapping, Mixed reality, Mixed-reality environment, Object rendering, Rendering (computer graphics), Space Manipulation, Spatial mapping, Spatial objects, Users' experiences, Virtual environments, Virtual objects
@inproceedings{xing_smore_2025,
title = {sMoRe: Spatial Mapping and Object Rendering Environment},
author = {Y. Xing and Q. Liu and J. Wang and D. Gómez-Zará},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001670668&doi=10.1145%2F3708557.3716337&partnerID=40&md5=c23b3e19f42dbd8796e43f5ab71e12b6},
doi = {10.1145/3708557.3716337},
isbn = {9798400714092 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {115–119},
publisher = {Association for Computing Machinery},
abstract = {In mixed reality (MR) environments, understanding space and creating virtual objects is crucial to providing an intuitive user experience. This paper introduces sMoRe (Spatial Mapping and Object Rendering Environment), an MR application that combines Generative AI (GenAI) to assist users in creating, placing, and managing virtual objects within physical spaces. sMoRe allows users to use voice or typed text commands to create and place virtual objects using GenAI while specifying spatial constraints. The system employs Large Language Models (LLMs) to interpret users’ commands, analyze the current scene, and identify optimal locations. Additionally, sMoRe integrates a text-to-3D generative model to dynamically create 3D objects based on users’ descriptions. Our user study demonstrates the effectiveness of sMoRe in enhancing user comprehension, interaction, and organization of the MR environment. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Generative adversarial networks, Generative AI, Language Model, Large language model, large language models, Mapping, Mixed reality, Mixed-reality environment, Object rendering, Rendering (computer graphics), Space Manipulation, Spatial mapping, Spatial objects, Users' experiences, Virtual environments, Virtual objects},
pubstate = {published},
tppubtype = {inproceedings}
}
Ademola, A.; Sinclair, D.; Koniaris, B.; Hannah, S.; Mitchell, K.
NeFT-Net: N-window extended frequency transformer for rhythmic motion prediction Journal Article
In: Computers and Graphics, vol. 129, 2025, ISSN: 00978493 (ISSN), (Publisher: Elsevier Ltd).
Abstract | Links | BibTeX | Tags: Cosine transforms, Discrete cosine transforms, Human motions, Immersive, machine learning, Machine-learning, Motion analysis, Motion prediction, Motion processing, Motion sequences, Motion tracking, Real-world, Rendering, Rendering (computer graphics), Rhythmic motion, Three dimensional computer graphics, Virtual environments, Virtual Reality
@article{ademola_neft-net_2025,
title = {NeFT-Net: N-window extended frequency transformer for rhythmic motion prediction},
author = {A. Ademola and D. Sinclair and B. Koniaris and S. Hannah and K. Mitchell},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105006724723&doi=10.1016%2Fj.cag.2025.104244&partnerID=40&md5=017d869a34df5180e00e249e97227efe},
doi = {10.1016/j.cag.2025.104244},
issn = {00978493 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Computers and Graphics},
volume = {129},
abstract = {Advancements in prediction of human motion sequences are critical for enabling online virtual reality (VR) users to dance and move in ways that accurately mirror real-world actions, delivering a more immersive and connected experience. However, latency in networked motion tracking remains a significant challenge, disrupting engagement and necessitating predictive solutions to achieve real-time synchronization of remote motions. To address this issue, we propose a novel approach leveraging a synthetically generated dataset based on supervised foot anchor placement timings for rhythmic motions, ensuring periodicity and reducing prediction errors. Our model integrates a discrete cosine transform (DCT) to encode motion, refine high-frequency components, and smooth motion sequences, mitigating jittery artifacts. Additionally, we introduce a feed-forward attention mechanism designed to learn from N-window pairs of 3D key-point pose histories for precise future motion prediction. Quantitative and qualitative evaluations on the Human3.6M dataset highlight significant improvements in mean per joint position error (MPJPE) metrics, demonstrating the superiority of our technique over state-of-the-art approaches. We further introduce novel result pose visualizations through the use of generative AI methods. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Elsevier Ltd},
keywords = {Cosine transforms, Discrete cosine transforms, Human motions, Immersive, machine learning, Machine-learning, Motion analysis, Motion prediction, Motion processing, Motion sequences, Motion tracking, Real-world, Rendering, Rendering (computer graphics), Rhythmic motion, Three dimensional computer graphics, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Oh, S.; Jung, M.; Kim, T.
EnvMat: A Network for Simultaneous Generation of PBR Maps and Environment Maps from a Single Image Journal Article
In: Electronics (Switzerland), vol. 14, no. 13, 2025, ISSN: 20799292 (ISSN), (Publisher: Multidisciplinary Digital Publishing Institute (MDPI)).
Abstract | Links | BibTeX | Tags: 3D graphics, Auto encoders, Cameras, Diffusion, Diffusion Model, Environment maps, generative artificial intelligence, Image understanding, Latent diffusion model, latent diffusion models, Metaverse, Metaverses, Neural Networks, Physically based rendering, physically based rendering (PBR), Rendering (computer graphics), Tellurium compounds, Three dimensional computer graphics, Variational Autoencoder, Variational Autoencoders (VAEs), Variational techniques, Virtual Reality, Visualization
@article{oh_envmat_2025,
title = {EnvMat: A Network for Simultaneous Generation of PBR Maps and Environment Maps from a Single Image},
author = {S. Oh and M. Jung and T. Kim},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105010306182&doi=10.3390%2Felectronics14132554&partnerID=40&md5=a6e24d71cb6f1e632ee2415b99f68c0e},
doi = {10.3390/electronics14132554},
issn = {20799292 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Electronics (Switzerland)},
volume = {14},
number = {13},
abstract = {Generative neural networks have expanded from text and image generation to creating realistic 3D graphics, which are critical for immersive virtual environments. Physically Based Rendering (PBR)—crucial for realistic 3D graphics—depends on PBR maps, environment (env) maps for lighting, and camera viewpoints. Current research mainly generates PBR maps separately, often using fixed env maps and camera poses. This limitation reduces visual consistency and immersion in 3D spaces. Addressing this, we propose EnvMat, a diffusion-based model that simultaneously generates PBR and env maps. EnvMat uses two Variational Autoencoders (VAEs) for map reconstruction and a Latent Diffusion UNet. Experimental results show that EnvMat surpasses the existing methods in preserving visual accuracy, as validated through metrics like L-PIPS, MS-SSIM, and CIEDE2000. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Multidisciplinary Digital Publishing Institute (MDPI)},
keywords = {3D graphics, Auto encoders, Cameras, Diffusion, Diffusion Model, Environment maps, generative artificial intelligence, Image understanding, Latent diffusion model, latent diffusion models, Metaverse, Metaverses, Neural Networks, Physically based rendering, physically based rendering (PBR), Rendering (computer graphics), Tellurium compounds, Three dimensional computer graphics, Variational Autoencoder, Variational Autoencoders (VAEs), Variational techniques, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {article}
}
Tian, Y.; Li, X.; Cheng, Z.; Huang, Y.; Yu, T.
In: Sensors, vol. 25, no. 15, 2025, ISSN: 14248220 (ISSN), (Publisher: Multidisciplinary Digital Publishing Institute (MDPI)).
Abstract | Links | BibTeX | Tags: 3D faces, 3d facial model, 3D facial models, 3D modeling, adaptation, adult, Article, Audience perception evaluation, benchmarking, controlled study, Cross-modal, Face generation, Facial modeling, facies, Feature extraction, feedback, feedback system, female, Geometry, High-fidelity, human, illumination, Immersive media, Lighting, male, movie, Neural radiance field, Neural Radiance Fields, perception, Quality control, Rendering (computer graphics), Semantics, sensor, Three dimensional computer graphics, Virtual production, Virtual Reality
@article{tian_design_2025,
title = {Design of Realistic and Artistically Expressive 3D Facial Models for Film AIGC: A Cross-Modal Framework Integrating Audience Perception Evaluation},
author = {Y. Tian and X. Li and Z. Cheng and Y. Huang and T. Yu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013137724&doi=10.3390%2Fs25154646&partnerID=40&md5=8508a27b693f0857ce7cb58e97a2705c},
doi = {10.3390/s25154646},
issn = {14248220 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Sensors},
volume = {25},
number = {15},
abstract = {The rise of virtual production has created an urgent need for both efficient and high-fidelity 3D face generation schemes for cinema and immersive media, but existing methods are often limited by lighting–geometry coupling, multi-view dependency, and insufficient artistic quality. To address this, this study proposes a cross-modal 3D face generation framework based on single-view semantic masks. It utilizes Swin Transformer for multi-level feature extraction and combines with NeRF for illumination decoupled rendering. We utilize physical rendering equations to explicitly separate surface reflectance from ambient lighting to achieve robust adaptation to complex lighting variations. In addition, to address geometric errors across illumination scenes, we construct geometric a priori constraint networks by mapping 2D facial features to 3D parameter space as regular terms with the help of semantic masks. On the CelebAMask-HQ dataset, this method achieves a leading score of SSIM = 0.892 (37.6% improvement from baseline) with FID = 40.6. The generated faces excel in symmetry and detail fidelity with realism and aesthetic scores of 8/10 and 7/10, respectively, in a perceptual evaluation with 1000 viewers. By combining physical-level illumination decoupling with semantic geometry a priori, this paper establishes a quantifiable feedback mechanism between objective metrics and human aesthetic evaluation, providing a new paradigm for aesthetic quality assessment of AI-generated content. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Multidisciplinary Digital Publishing Institute (MDPI)},
keywords = {3D faces, 3d facial model, 3D facial models, 3D modeling, adaptation, adult, Article, Audience perception evaluation, benchmarking, controlled study, Cross-modal, Face generation, Facial modeling, facies, Feature extraction, feedback, feedback system, female, Geometry, High-fidelity, human, illumination, Immersive media, Lighting, male, movie, Neural radiance field, Neural Radiance Fields, perception, Quality control, Rendering (computer graphics), Semantics, sensor, Three dimensional computer graphics, Virtual production, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Zhao, Y.; Dasari, M.; Guo, T.
CleAR: Robust Context-Guided Generative Lighting Estimation for Mobile Augmented Reality Journal Article
In: Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies, vol. 9, no. 3, 2025, ISSN: 24749567 (ISSN), (Publisher: Association for Computing Machinery).
Abstract | Links | BibTeX | Tags: Augmented Reality, Color computer graphics, Environment lighting, Estimation results, Generative model, High quality, Human engineering, Immersive, Lighting, Lighting conditions, Lighting estimation, Mobile augmented reality, Real-time refinement, Rendering (computer graphics), Statistical tests, Virtual objects, Virtual Reality
@article{zhao_clear_2025,
title = {CleAR: Robust Context-Guided Generative Lighting Estimation for Mobile Augmented Reality},
author = {Y. Zhao and M. Dasari and T. Guo},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105015452988&doi=10.1145%2F3749535&partnerID=40&md5=ed970d47cbf7f547555eca43b32cd7e7},
doi = {10.1145/3749535},
issn = {24749567 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies},
volume = {9},
number = {3},
abstract = {High-quality environment lighting is essential for creating immersive mobile augmented reality (AR) experiences. However, achieving visually coherent estimation for mobile AR is challenging due to several key limitations in AR device sensing capabilities, including low camera FoV and limited pixel dynamic ranges. Recent advancements in generative AI, which can generate high-quality images from different types of prompts, including texts and images, present a potential solution for high-quality lighting estimation. Still, to effectively use generative image diffusion models, we must address two key limitations of content quality and slow inference. In this work, we design and implement a generative lighting estimation system called CleAR that can produce high-quality, diverse environment maps in the format of 360◦ HDR images. Specifically, we design a two-step generation pipeline guided by AR environment context data to ensure the output aligns with the physical environment’s visual context and color appearance. To improve the estimation robustness under different lighting conditions, we design a real-time refinement component to adjust lighting estimation results on AR devices. To train and test our generative models, we curate a large-scale environment lighting estimation dataset with diverse lighting conditions. Through a combination of quantitative and qualitative evaluations, we show that CleAR outperforms state-of-the-art lighting estimation methods on both estimation accuracy, latency, and robustness, and is rated by 31 participants as producing better renderings for most virtual objects. For example, CleAR achieves 51% to 56% accuracy improvement on virtual object renderings across objects of three distinctive types of materials and reflective properties. CleAR produces lighting estimates of comparable or better quality in just 3.2 seconds—over 110X faster than state-of-the-art methods. Moreover, CleAR supports real-time refinement of lighting estimation results, ensuring robust and timely updates for AR applications. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Association for Computing Machinery},
keywords = {Augmented Reality, Color computer graphics, Environment lighting, Estimation results, Generative model, High quality, Human engineering, Immersive, Lighting, Lighting conditions, Lighting estimation, Mobile augmented reality, Real-time refinement, Rendering (computer graphics), Statistical tests, Virtual objects, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Martini, M.; Valentini, V.; Ciprian, A.; Bottino, A.; Iacoviello, R.; Montagnuolo, M.; Messina, A.; Strada, F.; Zappia, D.
Semi -Automated Digital Human Production for Enhanced Media Broadcasting Proceedings Article
In: IEEE Gaming, Entertain., Media Conf., GEM, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350374537 (ISBN).
Abstract | Links | BibTeX | Tags: AI automation, Automation, Creation process, Digital humans, Economic and social effects, Extensive explorations, Face reconstruction, Generative AI, Image enhancement, media archive, Media archives, Metaverses, Rendering (computer graphics), Synthetic human, Synthetic Humans, Textures, Three dimensional computer graphics, Virtual production, Virtual Reality
@inproceedings{martini_semi_2024,
title = {Semi -Automated Digital Human Production for Enhanced Media Broadcasting},
author = {M. Martini and V. Valentini and A. Ciprian and A. Bottino and R. Iacoviello and M. Montagnuolo and A. Messina and F. Strada and D. Zappia},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199536742&doi=10.1109%2FGEM61861.2024.10585601&partnerID=40&md5=a7d19507124982fecf34297e01bee45e},
doi = {10.1109/GEM61861.2024.10585601},
isbn = {9798350374537 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE Gaming, Entertain., Media Conf., GEM},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {In recent years, the application of synthetic humans in various fields has attracted considerable attention, leading to extensive exploration of their integration into the Metaverse and virtual production environments. This work presents a semi-automated approach that aims to find a fair trade-off between high-quality outputs and efficient production times. The project focuses on the Rai photo and video archives to find images of target characters for texturing and 3D reconstruction with the goal of reviving Rai's 2D footage and enhance the media experience. A key aspect of this study is to minimize the human intervention, ensuring an efficient, flexible, and scalable creation process. In this work, the improvements have been distributed among different stages of the digital human creation process, starting with the generation of 3D head meshes from 2D images of the reference character and then moving on to the generation, using a Diffusion model, of suitable images for texture development. These assets are then integrated into the Unreal Engine, where a custom widget facilitates posing, rendering, and texturing of Synthetic Humans models. Finally, an in-depth quantitative comparison and subjective tests were carried out between the original character images and the rendered synthetic humans, confirming the validity of the approach. © 2024 Elsevier B.V., All rights reserved.},
keywords = {AI automation, Automation, Creation process, Digital humans, Economic and social effects, Extensive explorations, Face reconstruction, Generative AI, Image enhancement, media archive, Media archives, Metaverses, Rendering (computer graphics), Synthetic human, Synthetic Humans, Textures, Three dimensional computer graphics, Virtual production, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Vincent, B.; Ayyar, K.
Roblox Generative AI in action Proceedings Article
In: Spencer, S. N. (Ed.): Proc. - SIGGRAPH Real-Time Live!, Association for Computing Machinery, Inc, 2023, ISBN: 9798400701580 (ISBN).
Abstract | Links | BibTeX | Tags: AI techniques, Complex model, Creation process, Education, Game, Games, Interactive computer graphics, Interactive objects, Lighting, Metaverse, Metaverses, Modeling, Modeling languages, Natural languages, Object and scenes, Pipeline, Real-Time Rendering, Rendering (computer graphics)
@inproceedings{vincent_roblox_2023,
title = {Roblox Generative AI in action},
author = {B. Vincent and K. Ayyar},
editor = {S. N. Spencer},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85167946022&doi=10.1145%2F3588430.3597250&partnerID=40&md5=40f0284036e544eeb5c6c825849d5466},
doi = {10.1145/3588430.3597250},
isbn = {9798400701580 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Proc. - SIGGRAPH Real-Time Live!},
publisher = {Association for Computing Machinery, Inc},
abstract = {Roblox is investing in generative AI techniques to revolutionize the creation process on its platform. By leveraging natural language and other intuitive expressions of intent, creators can build interactive objects and scenes without complex modeling or coding. The use of AI image generation services and large language models aim to make creation faster and easier for every user on the platform. © 2025 Elsevier B.V., All rights reserved.},
keywords = {AI techniques, Complex model, Creation process, Education, Game, Games, Interactive computer graphics, Interactive objects, Lighting, Metaverse, Metaverses, Modeling, Modeling languages, Natural languages, Object and scenes, Pipeline, Real-Time Rendering, Rendering (computer graphics)},
pubstate = {published},
tppubtype = {inproceedings}
}