AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Li, C.; Da, F.
Refined dense face alignment through image matching Journal Article
In: Visual Computer, vol. 41, no. 1, pp. 157–171, 2025, ISSN: 01782789 (ISSN).
Abstract | Links | BibTeX | Tags: 3D Avatars, Alignment, Dense geometric supervision, Face alignment, Face deformations, Face reconstruction, Geometry, Human computer interaction, Image enhancement, Image matching, Image Reconstruction, Metaverses, Outlier mixup, Pixels, Rendered images, Rendering (computer graphics), State of the art, Statistics, Target images, Three dimensional computer graphics
@article{li_refined_2025,
title = {Refined dense face alignment through image matching},
author = {C. Li and F. Da},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187924785&doi=10.1007%2fs00371-024-03316-3&partnerID=40&md5=839834c6ff3320398d5ef75b055947cb},
doi = {10.1007/s00371-024-03316-3},
issn = {01782789 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Visual Computer},
volume = {41},
number = {1},
pages = {157–171},
abstract = {Face alignment is the foundation of building 3D avatars for virtue communication in the metaverse, human-computer interaction, AI-generated content, etc., and therefore, it is critical that face deformation is reflected precisely to better convey expression, pose and identity. However, misalignment exists in the currently best methods that fit a face model to a target image and can be easily captured by human perception, thus degrading the reconstruction quality. The main reason is that the widely used metrics for training, including the landmark re-projection loss, pixel-wise loss and perception-level loss, are insufficient to address the misalignment and suffer from ambiguity and local minimums. To address misalignment, we propose an image MAtchinG-driveN dEnse geomeTrIC supervision (MAGNETIC). Specifically, we treat face alignment as a matching problem and establish pixel-wise correspondences between the target and rendered images. Then reconstructed facial points are guided towards their corresponding points on the target image, thus improving reconstruction. Synthesized image pairs are mixed up with face outliers to simulate the target and rendered images with ground-truth pixel-wise correspondences to enable the training of a robust prediction network. Compared with existing methods that turn to 3D scans for dense geometric supervision, our method reaches comparable shape reconstruction results with much lower effort. Experimental results on the NoW testset show that we reach the state-of-the-art among all self-supervised methods and even outperform methods using photo-realistic images. We also achieve comparable results with the state-of-the-art on the benchmark of Feng et al. Codes will be available at: github.com/ChunLLee/ReconstructionFromMatching. © The Author(s), under exclusive licence to Springer-Verlag GmbH Germany, part of Springer Nature 2024.},
keywords = {3D Avatars, Alignment, Dense geometric supervision, Face alignment, Face deformations, Face reconstruction, Geometry, Human computer interaction, Image enhancement, Image matching, Image Reconstruction, Metaverses, Outlier mixup, Pixels, Rendered images, Rendering (computer graphics), State of the art, Statistics, Target images, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {article}
}
Sajiukumar, A.; Ranjan, A.; Parvathi, P. K.; Satheesh, A.; Udayan, J. Divya; Subramaniam, U.
Generative AI-Enabled Virtual Twin for Meeting Assistants Proceedings Article
In: T., Saba; A., Rehman (Ed.): Proc. - Int. Women Data Sci. Conf. at Prince Sultan Univ., WiDS-PSU, pp. 60–65, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152092-2 (ISBN).
Abstract | Links | BibTeX | Tags: 3D avatar generation, 3D Avatars, 3D reconstruction, AI-augmented interaction, Augmented Reality, Communication and collaborations, Conversational AI, Neural radiation field, neural radiation fields (NeRF), Radiation field, Real time performance, real-time performance, Three dimensional computer graphics, Virtual spaces, Voice cloning
@inproceedings{sajiukumar_generative_2025,
title = {Generative AI-Enabled Virtual Twin for Meeting Assistants},
author = {A. Sajiukumar and A. Ranjan and P. K. Parvathi and A. Satheesh and J. Divya Udayan and U. Subramaniam},
editor = {Saba T. and Rehman A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007691247&doi=10.1109%2fWiDS-PSU64963.2025.00025&partnerID=40&md5=f0bfb74a8f854c427054c73582909185},
doi = {10.1109/WiDS-PSU64963.2025.00025},
isbn = {979-833152092-2 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - Int. Women Data Sci. Conf. at Prince Sultan Univ., WiDS-PSU},
pages = {60–65},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The growing dependence on virtual spaces for communication and collaboration has transformed interactions in numerous industries, ranging from professional meetings to education, entertainment, and healthcare. Despite the advancement of AI technologies such as three-dimensional modeling, voice cloning, and conversational AI, the convergence of these technologies in a single platform is still challenging. This paper introduces a unified framework that brings together state-of-the-art 3D avatar generation, real-time voice cloning, and conversational AI to enhance virtual interactions. The system utilizes Triplane neural representations and neural radiation fields (NeRF) for high-fidelity 3D avatar generation, speaker encoders coupled with Tacotron 2 and WaveRNN for natural voice cloning, and a context-aware chat algorithm for adaptive conversations. By overcoming the challenges of customization, integration, and real-time performance, the proposed framework addresses the increasing needs for realistic virtual representations, setting new benchmarks for AI-augmented interaction in virtual conferences, online representation, education, and healthcare. © 2025 IEEE.},
keywords = {3D avatar generation, 3D Avatars, 3D reconstruction, AI-augmented interaction, Augmented Reality, Communication and collaborations, Conversational AI, Neural radiation field, neural radiation fields (NeRF), Radiation field, Real time performance, real-time performance, Three dimensional computer graphics, Virtual spaces, Voice cloning},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Liu, Z.; Zhu, Z.; Zhu, L.; Jiang, E.; Hu, X.; Peppler, K.; Ramani, K.
ClassMeta: Designing Interactive Virtual Classmate to Promote VR Classroom Participation Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 979-840070330-0 (ISBN).
Abstract | Links | BibTeX | Tags: 3D Avatars, Behavioral Research, Classroom learning, Collaborative learning, Computational Linguistics, Condition, E-Learning, Human behaviors, Language Model, Large language model, Learning experiences, Learning systems, pedagogical agent, Pedagogical agents, Students, Three dimensional computer graphics, Virtual Reality, VR classroom
@inproceedings{liu_classmeta_2024,
title = {ClassMeta: Designing Interactive Virtual Classmate to Promote VR Classroom Participation},
author = {Z. Liu and Z. Zhu and L. Zhu and E. Jiang and X. Hu and K. Peppler and K. Ramani},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194868458&doi=10.1145%2f3613904.3642947&partnerID=40&md5=0592b2f977a2ad2e6366c6fa05808a6a},
doi = {10.1145/3613904.3642947},
isbn = {979-840070330-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Peer influence plays a crucial role in promoting classroom participation, where behaviors from active students can contribute to a collective classroom learning experience. However, the presence of these active students depends on several conditions and is not consistently available across all circumstances. Recently, Large Language Models (LLMs) such as GPT have demonstrated the ability to simulate diverse human behaviors convincingly due to their capacity to generate contextually coherent responses based on their role settings. Inspired by this advancement in technology, we designed ClassMeta, a GPT-4 powered agent to help promote classroom participation by playing the role of an active student. These agents, which are embodied as 3D avatars in virtual reality, interact with actual instructors and students with both spoken language and body gestures. We conducted a comparative study to investigate the potential of ClassMeta for improving the overall learning experience of the class. © 2024 Copyright held by the owner/author(s)},
keywords = {3D Avatars, Behavioral Research, Classroom learning, Collaborative learning, Computational Linguistics, Condition, E-Learning, Human behaviors, Language Model, Large language model, Learning experiences, Learning systems, pedagogical agent, Pedagogical agents, Students, Three dimensional computer graphics, Virtual Reality, VR classroom},
pubstate = {published},
tppubtype = {inproceedings}
}
Amato, N.; Carolis, B. De; Gioia, F.; Venezia, M. N.; Palestra, G.; Loglisci, C.
Can an AI-driven VTuber engage People? The KawAIi Case Study Proceedings Article
In: A., Soto; E., Zangerle (Ed.): CEUR Workshop Proc., CEUR-WS, 2024, ISBN: 16130073 (ISSN).
Abstract | Links | BibTeX | Tags: 3D Avatars, Case-studies, Conversational Agents, Facial Expressions, Language Model, Live streaming, LLM, LLMs, Real- time, Three dimensional computer graphics, Virtual agent, Virtual Reality, YouTube
@inproceedings{amato_can_2024,
title = {Can an AI-driven VTuber engage People? The KawAIi Case Study},
author = {N. Amato and B. De Carolis and F. Gioia and M. N. Venezia and G. Palestra and C. Loglisci},
editor = {Soto A. and Zangerle E.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85190754935&partnerID=40&md5=bd76d56b13e328027aa1b458849cf73f},
isbn = {16130073 (ISSN)},
year = {2024},
date = {2024-01-01},
booktitle = {CEUR Workshop Proc.},
volume = {3660},
publisher = {CEUR-WS},
abstract = {Live streaming has become increasingly popular, with most streamers presenting their real-life appearance. However, Virtual YouTubers (VTubers), virtual 2D or 3D avatars that are voiced by humans, are emerging as live streamers and attracting a growing viewership. This paper presents the development of a conversational agent, named KawAIi, embodied in a 2D character that, while accurately and promptly responding to user requests, provides an entertaining experience in streaming chat platforms such as YouTube while providing adequate real-time support. The agent relies on the Vicuna 7B GPTQ 4-bit Large Language Model (LLM). In addition, KawAIi uses a BERT-based model for analyzing the sentence generated by the model in terms of conveyed emotion and shows self-emotion awareness through facial expressions. Tested with users, the system has demonstrated a good ability to handle the interaction with the user while maintaining a pleasant user experience. In particular, KawAIi has been evaluated positively in terms of engagement and competence on various topics. The results show the potential of this technology to enrich interactivity in streaming platforms and offer a promising model for future online assistance contexts. © 2024 Copyright for this paper by its authors.},
keywords = {3D Avatars, Case-studies, Conversational Agents, Facial Expressions, Language Model, Live streaming, LLM, LLMs, Real- time, Three dimensional computer graphics, Virtual agent, Virtual Reality, YouTube},
pubstate = {published},
tppubtype = {inproceedings}
}