AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Vachha, C.; Kang, Y.; Dive, Z.; Chidambaram, A.; Gupta, A.; Jun, E.; Hartmann, B.
Dreamcrafter: Immersive Editing of 3D Radiance Fields Through Flexible, Generative Inputs and Outputs Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071394-1 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D scenes, AI assisted creativity tool, Animation, Computer vision, Direct manipulation, Drawing (graphics), Gaussian Splatting, Gaussians, Generative AI, Graphic, Graphics, High level languages, Immersive, Interactive computer graphics, Splatting, Three dimensional computer graphics, Virtual Reality, Worldbuilding interface
@inproceedings{vachha_dreamcrafter_2025,
title = {Dreamcrafter: Immersive Editing of 3D Radiance Fields Through Flexible, Generative Inputs and Outputs},
author = {C. Vachha and Y. Kang and Z. Dive and A. Chidambaram and A. Gupta and E. Jun and B. Hartmann},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005725679&doi=10.1145%2f3706598.3714312&partnerID=40&md5=68cf2a08d3057fd9756e25d53959872b},
doi = {10.1145/3706598.3714312},
isbn = {979-840071394-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Authoring 3D scenes is a central task for spatial computing applications. Competing visions for lowering existing barriers are (1) focus on immersive, direct manipulation of 3D content or (2) leverage AI techniques that capture real scenes (3D Radiance Fields such as, NeRFs, 3D Gaussian Splatting) and modify them at a higher level of abstraction, at the cost of high latency. We unify the complementary strengths of these approaches and investigate how to integrate generative AI advances into real-time, immersive 3D Radiance Field editing. We introduce Dreamcrafter, a VR-based 3D scene editing system that: (1) provides a modular architecture to integrate generative AI algorithms; (2) combines different levels of control for creating objects, including natural language and direct manipulation; and (3) introduces proxy representations that support interaction during high-latency operations. We contribute empirical findings on control preferences and discuss how generative AI interfaces beyond text input enhance creativity in scene editing and world building. © 2025 Copyright held by the owner/author(s).},
keywords = {3D modeling, 3D scenes, AI assisted creativity tool, Animation, Computer vision, Direct manipulation, Drawing (graphics), Gaussian Splatting, Gaussians, Generative AI, Graphic, Graphics, High level languages, Immersive, Interactive computer graphics, Splatting, Three dimensional computer graphics, Virtual Reality, Worldbuilding interface},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Clocchiatti, A.; Fumero, N.; Soccini, A. M.
Character Animation Pipeline based on Latent Diffusion and Large Language Models Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 398–405, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037202-1 (ISBN).
Abstract | Links | BibTeX | Tags: Animation, Animation pipeline, Artificial intelligence, Augmented Reality, Character animation, Computational Linguistics, Computer animation, Deep learning, Diffusion, E-Learning, Extended reality, Film production, Generative art, Language Model, Learning systems, Learning techniques, Natural language processing systems, Pipelines, Production pipelines, Virtual Reality
@inproceedings{clocchiatti_character_2024,
title = {Character Animation Pipeline based on Latent Diffusion and Large Language Models},
author = {A. Clocchiatti and N. Fumero and A. M. Soccini},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187217072&doi=10.1109%2fAIxVR59861.2024.00067&partnerID=40&md5=d88b9ba7c80d49b60fd0d7acd5e7c4f0},
doi = {10.1109/AIxVR59861.2024.00067},
isbn = {979-835037202-1 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {398–405},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Artificial intelligence and deep learning techniques are revolutionizing the film production pipeline. The majority of the current screenplay-to-animation pipelines focus on understanding the screenplay through natural language processing techniques, and on the generation of the animation through custom engines, missing the possibility to customize the characters. To address these issues, we propose a high-level pipeline for generating 2D characters and animations starting from screenplays, through a combination of Latent Diffusion Models and Large Language Models. Our approach uses ChatGPT to generate character descriptions starting from the screenplay. Then, using that data, it generates images of custom characters with Stable Diffusion and animates them according to their actions in different scenes. The proposed approach avoids well-known problems in generative AI tools such as temporal inconsistency and lack of control on the outcome. The results suggest that the pipeline is consistent and reliable, benefiting industries ranging from film production to virtual, augmented and extended reality content creation. © 2024 IEEE.},
keywords = {Animation, Animation pipeline, Artificial intelligence, Augmented Reality, Character animation, Computational Linguistics, Computer animation, Deep learning, Diffusion, E-Learning, Extended reality, Film production, Generative art, Language Model, Learning systems, Learning techniques, Natural language processing systems, Pipelines, Production pipelines, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
He, K.; Lapham, A.; Li, Z.
Enhancing Narratives with SayMotion's text-to-3D animation and LLMs Proceedings Article
In: S.N., Spencer (Ed.): Proc. - SIGGRAPH Real-Time Live!, Association for Computing Machinery, Inc, 2024, ISBN: 979-840070526-7 (ISBN).
Abstract | Links | BibTeX | Tags: 3D animation, AI-based animation, Animation, Animation editing, Deep learning, Film production, Human motions, Interactive computer graphics, Interactive media, Language Model, Motion models, Physics simulation, Production medium, Simulation platform, Three dimensional computer graphics
@inproceedings{he_enhancing_2024,
title = {Enhancing Narratives with SayMotion's text-to-3D animation and LLMs},
author = {K. He and A. Lapham and Z. Li},
editor = {Spencer S.N.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85200655076&doi=10.1145%2f3641520.3665309&partnerID=40&md5=458f935043e3372e633ed5fc13bf6cd7},
doi = {10.1145/3641520.3665309},
isbn = {979-840070526-7 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - SIGGRAPH Real-Time Live!},
publisher = {Association for Computing Machinery, Inc},
abstract = {SayMotion, a generative AI text-to-3D animation platform, utilizes deep generative learning and advanced physics simulation to transform text descriptions into realistic 3D human motions for applications in gaming, extended reality (XR), film production, education and interactive media. SayMotion addresses challenges due to the complexities of animation creation by employing a Large Language Model (LLM) fine-tuned to human motion with further AI-based animation editing components including spatial-temporal Inpainting via a proprietary Large Motion Model (LMM). SayMotion is a pioneer in the animation market by offering a comprehensive set of AI generation and AI editing functions for creating 3D animations efficiently and intuitively. With an LMM at its core, SayMotion aims to democratize 3D animations for everyone through language and generative motion. © 2024 Owner/Author.},
keywords = {3D animation, AI-based animation, Animation, Animation editing, Deep learning, Film production, Human motions, Interactive computer graphics, Interactive media, Language Model, Motion models, Physics simulation, Production medium, Simulation platform, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
2009
Gambino, Orazio; Augello, Agnese; Caronia, Alessandro; Pilato, Giovanni; Pirrone, Roberto; Gaglio, Salvatore
A Web-Oriented Java 3D Talking Head Journal Article
In: Advances in Intelligent and Soft Computing, vol. 60, pp. 295–311, 2009, ISSN: 18675662.
Abstract | Links | BibTeX | Tags: 3D Modelling, Animation, Human computer interaction, Natural Language Processing
@article{gambinoWeborientedJava3D2009,
title = {A Web-Oriented Java 3D Talking Head},
author = { Orazio Gambino and Agnese Augello and Alessandro Caronia and Giovanni Pilato and Roberto Pirrone and Salvatore Gaglio},
editor = { Kulikowski J.L. Hippe Z.S.},
doi = {10.1007/978-3-642-03202-8_24},
issn = {18675662},
year = {2009},
date = {2009-01-01},
journal = {Advances in Intelligent and Soft Computing},
volume = {60},
pages = {295--311},
abstract = {Facial animation denotes all those systems performing speech synchronization with an animated face model. These kinds of systems are named Talking Heads or Talking Faces. At the same time simple dialogue systems called chatbots have been developed. Chatbots are software agents able to interact with users through pattern-matching based rules. In this paper a Talking Head oriented to the creation of a Chatbot is presented. An answer is generated in form of text triggered by an input query. The answer is converted into a facial animation using a 3D face model whose lips movements are synchronized with the sound produced by a speech synthesis module. Our Talking Head exploits the naturalness of the facial animation and provides a real-time interactive interface to the user. Besides, it is specifically suited for being used on the web. This leads to a set of requirements to be satisfied, like: simple installation, visual quality, fast download, and interactivity in real time. The web infrastructure has been realized using the Client-Server model. The Chatbot, the Natural Language Processing and the Digital Signal Processing services are delegated to the server. The client is involved in animation and synchronization. This way, the server can handle multiple requests from clients. The conversation module has been implemented using the A.L.I.C.E. (Artificial Linguistic Internet Computer Entity) technology. The output of the chatbot is given input to the Natural Language Processing (Comedia Speech), incorporating a text analyzer, a letter-to-sound module and a module for the generation of prosody. The client, through the synchronization module, computes the time of real duration of the animation and the duration of each phoneme and consequently of each viseme. The morphing module performs the animation of the facial model and the voice reproduction. As a result, the user will see the answer to question both in textual form and in the form of visual animation. textcopyright Springer-Verlag Berlin Heidelberg 2009.},
keywords = {3D Modelling, Animation, Human computer interaction, Natural Language Processing},
pubstate = {published},
tppubtype = {article}
}
Gambino, Orazio; Augello, Agnese; Caronia, Alessandro; Pilato, Giovanni; Pirrone, Roberto; Gaglio, Salvatore
A web-oriented java 3D talking head Journal Article
In: Advances in Intelligent and Soft Computing, vol. 60, pp. 295–311, 2009, ISSN: 18675662.
Abstract | Links | BibTeX | Tags: 3D Modelling, Animation, Human computer interaction, Natural Language Processing
@article{gambino_web-oriented_2009,
title = {A web-oriented java 3D talking head},
author = {Orazio Gambino and Agnese Augello and Alessandro Caronia and Giovanni Pilato and Roberto Pirrone and Salvatore Gaglio},
editor = {Kulikowski J. L. Hippe Z.S.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84994752280&doi=10.1007%2f978-3-642-03202-8_24&partnerID=40&md5=1ff9a3013619a32fc37ca7dbbe5cf66a},
doi = {10.1007/978-3-642-03202-8_24},
issn = {18675662},
year = {2009},
date = {2009-01-01},
journal = {Advances in Intelligent and Soft Computing},
volume = {60},
pages = {295–311},
abstract = {Facial animation denotes all those systems performing speech synchronization with an animated face model. These kinds of systems are named Talking Heads or Talking Faces. At the same time simple dialogue systems called chatbots have been developed. Chatbots are software agents able to interact with users through pattern-matching based rules. In this paper a Talking Head oriented to the creation of a Chatbot is presented. An answer is generated in form of text triggered by an input query. The answer is converted into a facial animation using a 3D face model whose lips movements are synchronized with the sound produced by a speech synthesis module. Our Talking Head exploits the naturalness of the facial animation and provides a real-time interactive interface to the user. Besides, it is specifically suited for being used on the web. This leads to a set of requirements to be satisfied, like: simple installation, visual quality, fast download, and interactivity in real time. The web infrastructure has been realized using the Client-Server model. The Chatbot, the Natural Language Processing and the Digital Signal Processing services are delegated to the server. The client is involved in animation and synchronization. This way, the server can handle multiple requests from clients. The conversation module has been implemented using the A.L.I.C.E. (Artificial Linguistic Internet Computer Entity) technology. The output of the chatbot is given input to the Natural Language Processing (Comedia Speech), incorporating a text analyzer, a letter-to-sound module and a module for the generation of prosody. The client, through the synchronization module, computes the time of real duration of the animation and the duration of each phoneme and consequently of each viseme. The morphing module performs the animation of the facial model and the voice reproduction. As a result, the user will see the answer to question both in textual form and in the form of visual animation. © Springer-Verlag Berlin Heidelberg 2009.},
keywords = {3D Modelling, Animation, Human computer interaction, Natural Language Processing},
pubstate = {published},
tppubtype = {article}
}
2008
Gaglio, Salvatore; Pilato, Giovanni; Pirrone, Roberto; Gambino, Orazio; Augello, Agnese; Caronia, Alessandro
A Java3D Talking Head for a Chatbot Proceedings Article
In: Proceedings - CISIS 2008: 2nd International Conference on Complex, Intelligent and Software Intensive Systems, pp. 709–714, 2008, ISBN: 0-7695-3109-1 978-0-7695-3109-0.
Abstract | Links | BibTeX | Tags: 3D Modelling, Animation, Artificial intelligence, Human computer interaction, Talking Heads
@inproceedings{gaglioJava3DTalkingHead2008,
title = {A Java3D Talking Head for a Chatbot},
author = { Salvatore Gaglio and Giovanni Pilato and Roberto Pirrone and Orazio Gambino and Agnese Augello and Alessandro Caronia},
doi = {10.1109/CISIS.2008.57},
isbn = {0-7695-3109-1 978-0-7695-3109-0},
year = {2008},
date = {2008-01-01},
booktitle = {Proceedings - CISIS 2008: 2nd International Conference on Complex, Intelligent and Software Intensive Systems},
pages = {709--714},
abstract = {Facial animation is referred to all those systems performing the speech synchronization with an animated face model. This kind of systems are called "Talking Head" or "Talking Face". In this paper a Talking Head oriented to the creation of a Chatbot is presented. It requires an input query and an answer is generated in form of text. The answer is transduced into a facial animation using a 3D face model whose lips movements are synchronized with the sound produced by a speech synthesis module. Our "Talking Head" explores the naturalness of the facial animation and provides a real-time interactive interface to the user. The WEB infrastructure has been realized using the Client-Server model delegating the Chatbot, the Natural Language Processing and the Digital Signal Processing services to the server, while the client is involved in animation, synchronization; in this way, the server can handle multiple requests from clients. textcopyright 2008 IEEE.},
keywords = {3D Modelling, Animation, Artificial intelligence, Human computer interaction, Talking Heads},
pubstate = {published},
tppubtype = {inproceedings}
}
Gambino, Orazio; Augello, Agnese; Caronia, Alessandro; Pilato, Giovanni; Pirrone, Roberto; Gaglio, Salvatore
Virtual Conversation with a Real Talking Head Proceedings Article
In: 2008 Conference on Human System Interaction, HSI 2008, pp. 263–268, 2008, ISBN: 1-4244-1543-8 978-1-4244-1543-4.
Abstract | Links | BibTeX | Tags: 3D Modelling, Animation, Artificial intelligence, Computational Linguistics, Conversational Agents, Human computer interaction, Natural Language Processing, Talking Heads
@inproceedings{gambinoVirtualConversationReal2008,
title = {Virtual Conversation with a Real Talking Head},
author = { Orazio Gambino and Agnese Augello and Alessandro Caronia and Giovanni Pilato and Roberto Pirrone and Salvatore Gaglio},
doi = {10.1109/HSI.2008.4581446},
isbn = {1-4244-1543-8 978-1-4244-1543-4},
year = {2008},
date = {2008-01-01},
booktitle = {2008 Conference on Human System Interaction, HSI 2008},
pages = {263--268},
abstract = {A talking head is system performing an animated face model synchronized with a speech synthesis module. It is used as a presentation layer of a conversational Agent which provide an answer . It provides an answer when a query is written as an input by the user. The textual answer is converted into facial movements of a 3D face model whose lips and tongue movements are synchronized with the sound of the synthetic voice. The Client-Server paradigm has been used for the WEB infrastructure delegating the animation and synchronization to the client, so that the server can satisfy multiple requests from clients; while the Chatbot, the Digital Signal Processing and the Natural language Processing are provided by the server. textcopyright 2008 IEEE.},
keywords = {3D Modelling, Animation, Artificial intelligence, Computational Linguistics, Conversational Agents, Human computer interaction, Natural Language Processing, Talking Heads},
pubstate = {published},
tppubtype = {inproceedings}
}
Gambino, Orazio; Augello, Agnese; Caronia, Alessandro; Pilato, Giovanni; Pirrone, Roberto; Gaglio, Salvatore
Virtual conversation with a real talking head Proceedings Article
In: 2008 Conference on Human System Interaction, HSI 2008, pp. 263–268, 2008, ISBN: 1-4244-1543-8 978-1-4244-1543-4.
Abstract | Links | BibTeX | Tags: 3D Modelling, Animation, Artificial intelligence, Computational Linguistics, Conversational Agents, Human computer interaction, Natural Language Processing, Talking Heads
@inproceedings{gambino_virtual_2008,
title = {Virtual conversation with a real talking head},
author = {Orazio Gambino and Agnese Augello and Alessandro Caronia and Giovanni Pilato and Roberto Pirrone and Salvatore Gaglio},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-52149107110&doi=10.1109%2fHSI.2008.4581446&partnerID=40&md5=2ecaaf7a50db7564a6fa4e5b1e1da73e},
doi = {10.1109/HSI.2008.4581446},
isbn = {1-4244-1543-8 978-1-4244-1543-4},
year = {2008},
date = {2008-01-01},
booktitle = {2008 Conference on Human System Interaction, HSI 2008},
pages = {263–268},
abstract = {A talking head is system performing an animated face model synchronized with a speech synthesis module. It is used as a presentation layer of a conversational Agent which provide an answer . It provides an answer when a query is written as an input by the user. The textual answer is converted into facial movements of a 3D face model whose lips and tongue movements are synchronized with the sound of the synthetic voice. The Client-Server paradigm has been used for the WEB infrastructure delegating the animation and synchronization to the client, so that the server can satisfy multiple requests from clients; while the Chatbot, the Digital Signal Processing and the Natural language Processing are provided by the server. ©2008 IEEE.},
keywords = {3D Modelling, Animation, Artificial intelligence, Computational Linguistics, Conversational Agents, Human computer interaction, Natural Language Processing, Talking Heads},
pubstate = {published},
tppubtype = {inproceedings}
}
Gaglio, Salvatore; Pilato, Giovanni; Pirrone, Roberto; Gambino, Orazio; Augello, Agnese; Caronia, Alessandro
A Java3D Talking Head for a Chatbot Proceedings Article
In: Proceedings - CISIS 2008: 2nd International Conference on Complex, Intelligent and Software Intensive Systems, pp. 709–714, 2008, ISBN: 0-7695-3109-1 978-0-7695-3109-0.
Abstract | Links | BibTeX | Tags: 3D Modelling, Animation, Artificial intelligence, Human computer interaction, Talking Heads
@inproceedings{gaglio_java3d_2008,
title = {A Java3D Talking Head for a Chatbot},
author = {Salvatore Gaglio and Giovanni Pilato and Roberto Pirrone and Orazio Gambino and Agnese Augello and Alessandro Caronia},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-54749105850&doi=10.1109%2fCISIS.2008.57&partnerID=40&md5=6fa3ebbb7631bb6a0acdff3820f29546},
doi = {10.1109/CISIS.2008.57},
isbn = {0-7695-3109-1 978-0-7695-3109-0},
year = {2008},
date = {2008-01-01},
booktitle = {Proceedings - CISIS 2008: 2nd International Conference on Complex, Intelligent and Software Intensive Systems},
pages = {709–714},
abstract = {Facial animation is referred to all those systems performing the speech synchronization with an animated face model. This kind of systems are called "Talking Head" or "Talking Face". In this paper a Talking Head oriented to the creation of a Chatbot is presented. It requires an input query and an answer is generated in form of text. The answer is transduced into a facial animation using a 3D face model whose lips movements are synchronized with the sound produced by a speech synthesis module. Our "Talking Head" explores the naturalness of the facial animation and provides a real-time interactive interface to the user. The WEB infrastructure has been realized using the Client-Server model delegating the Chatbot, the Natural Language Processing and the Digital Signal Processing services to the server, while the client is involved in animation, synchronization; in this way, the server can handle multiple requests from clients. © 2008 IEEE.},
keywords = {3D Modelling, Animation, Artificial intelligence, Human computer interaction, Talking Heads},
pubstate = {published},
tppubtype = {inproceedings}
}