AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2024
Gemeinhardt, J.; Zöllner, M.; Jahn, C.
Generative AI Tool Pipeline for Creating Artificial Historical Characters for Cultural Heritage XR Proceedings Article
In: C., Stephanidis; M., Antona; S., Ntoa; G., Salvendy (Ed.): Commun. Comput. Info. Sci., pp. 41–46, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 18650929 (ISSN); 978-303161949-6 (ISBN).
Abstract | Links | BibTeX | Tags: Bavaria, Cultural heritage, Cultural heritages, Extended reality (XR), Generative AI, Historical characters, Immersive, Media production, Open source software, Open systems, Pipelines, Reproducibilities, Smart phones, Virtual representations, Web browsers
@inproceedings{gemeinhardt_generative_2024,
title = {Generative AI Tool Pipeline for Creating Artificial Historical Characters for Cultural Heritage XR},
author = {J. Gemeinhardt and M. Zöllner and C. Jahn},
editor = {Stephanidis C. and Antona M. and Ntoa S. and Salvendy G.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85197123898&doi=10.1007%2f978-3-031-61950-2_5&partnerID=40&md5=8f8a3cf4f4bf024b42f6490f64345df2},
doi = {10.1007/978-3-031-61950-2_5},
isbn = {18650929 (ISSN); 978-303161949-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Commun. Comput. Info. Sci.},
volume = {2116 CCIS},
pages = {41–46},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {In our project, we aimed to create historically authentic and vivid virtual representations of historic personalities that are connected to the regional Fichtelgebirge (Bavaria, Germany) to support the storytelling of our immersive XR applications. We are describing the tools in detail, the process of the tool chain and the resulting media. Next, we are discussing the challenges in media production like historical correctness and the consultation of historians. In order to create visual reproducibility we are explaining the detailed text prompts, their limitations and how to cope with resulting errors of the human physiognomy. Finally, we are briefly describing the application of the animated and talking generated historic characters in an immersive interactive WebXR environment. The XR experience is presented in web browsers on smartphones, tablets and XR headsets and the underlying software is based on the open-source framework Aframe. Our paper will describe the process, the results and the limitations in detail. Furthermore, we will provide a flow chart of the tool pipeline with visual examples of these aspects. The animations and voices of the historic characters will be demonstrated in videos of the XR application. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {Bavaria, Cultural heritage, Cultural heritages, Extended reality (XR), Generative AI, Historical characters, Immersive, Media production, Open source software, Open systems, Pipelines, Reproducibilities, Smart phones, Virtual representations, Web browsers},
pubstate = {published},
tppubtype = {inproceedings}
}
Clocchiatti, A.; Fumero, N.; Soccini, A. M.
Character Animation Pipeline based on Latent Diffusion and Large Language Models Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 398–405, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037202-1 (ISBN).
Abstract | Links | BibTeX | Tags: Animation, Animation pipeline, Artificial intelligence, Augmented Reality, Character animation, Computational Linguistics, Computer animation, Deep learning, Diffusion, E-Learning, Extended reality, Film production, Generative art, Language Model, Learning systems, Learning techniques, Natural language processing systems, Pipelines, Production pipelines, Virtual Reality
@inproceedings{clocchiatti_character_2024,
title = {Character Animation Pipeline based on Latent Diffusion and Large Language Models},
author = {A. Clocchiatti and N. Fumero and A. M. Soccini},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187217072&doi=10.1109%2fAIxVR59861.2024.00067&partnerID=40&md5=d88b9ba7c80d49b60fd0d7acd5e7c4f0},
doi = {10.1109/AIxVR59861.2024.00067},
isbn = {979-835037202-1 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {398–405},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Artificial intelligence and deep learning techniques are revolutionizing the film production pipeline. The majority of the current screenplay-to-animation pipelines focus on understanding the screenplay through natural language processing techniques, and on the generation of the animation through custom engines, missing the possibility to customize the characters. To address these issues, we propose a high-level pipeline for generating 2D characters and animations starting from screenplays, through a combination of Latent Diffusion Models and Large Language Models. Our approach uses ChatGPT to generate character descriptions starting from the screenplay. Then, using that data, it generates images of custom characters with Stable Diffusion and animates them according to their actions in different scenes. The proposed approach avoids well-known problems in generative AI tools such as temporal inconsistency and lack of control on the outcome. The results suggest that the pipeline is consistent and reliable, benefiting industries ranging from film production to virtual, augmented and extended reality content creation. © 2024 IEEE.},
keywords = {Animation, Animation pipeline, Artificial intelligence, Augmented Reality, Character animation, Computational Linguistics, Computer animation, Deep learning, Diffusion, E-Learning, Extended reality, Film production, Generative art, Language Model, Learning systems, Learning techniques, Natural language processing systems, Pipelines, Production pipelines, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Lee, S.; Lee, H.; Lee, K.
Knowledge Generation Pipeline using LLM for Building 3D Object Knowledge Base Proceedings Article
In: Int. Conf. ICT Convergence, pp. 1303–1305, IEEE Computer Society, 2023, ISBN: 21621233 (ISSN); 979-835031327-7 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3D object, 3d-modeling, Augmented Reality, Data Mining, Knowledge Base, Knowledge based systems, Knowledge generations, Language Model, Metaverse, Metaverses, Multi-modal, MultiModal AI, Multimodal artificial intelligence, Pipelines, Virtual Reality, XR
@inproceedings{lee_knowledge_2023,
title = {Knowledge Generation Pipeline using LLM for Building 3D Object Knowledge Base},
author = {S. Lee and H. Lee and K. Lee},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85184593202&doi=10.1109%2fICTC58733.2023.10392933&partnerID=40&md5=b877638607a04e5a31a2d5723af6e11b},
doi = {10.1109/ICTC58733.2023.10392933},
isbn = {21621233 (ISSN); 979-835031327-7 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Int. Conf. ICT Convergence},
pages = {1303–1305},
publisher = {IEEE Computer Society},
abstract = {With the wide spread of XR(eXtended Reality) contents such as Metaverse and VR(Virtual Reality) / AR(Augmented Reality), the utilization and importance of 3D objects are increasing. In this paper, we describe a knowledge generation pipeline of 3D object for reuse of existing 3D objects and production of new 3D object using generative AI(Artificial Intelligence). 3D object knowledge includes not only the object itself data that are generated in object editing phase but the information for human to recognize and understand objects. The target 3D model for building knowledge is the space model of office for business Metaverse service and the model of objects composing the space. LLM(Large Language Model)-based multimodal AI was used to extract knowledge from 3D model in a systematic and automated way. We plan to expand the pipeline to utilize knowledge base for managing extracted knowledge and correcting errors occurred during the LLM process for the knowledge extraction. © 2023 IEEE.},
keywords = {3D modeling, 3D models, 3D object, 3d-modeling, Augmented Reality, Data Mining, Knowledge Base, Knowledge based systems, Knowledge generations, Language Model, Metaverse, Metaverses, Multi-modal, MultiModal AI, Multimodal artificial intelligence, Pipelines, Virtual Reality, XR},
pubstate = {published},
tppubtype = {inproceedings}
}
Jacoby, D.; Xu, D.; Ribas, W.; Xu, M.; Liu, T.; Jeyaraman, V.; Wei, M.; Blois, E. D.; Coady, Y.
Efficient Cloud Pipelines for Neural Radiance Fields Proceedings Article
In: S., Chakrabarti; R., Paul (Ed.): IEEE Annu. Ubiquitous Comput., Electron. Mob. Commun. Conf., UEMCON, pp. 114–119, Institute of Electrical and Electronics Engineers Inc., 2023, ISBN: 979-835030413-8 (ISBN).
Abstract | Links | BibTeX | Tags: Azure, Change detection, Cloud analytics, Cloud computing, Cloud-computing, Cluster computing, Containerization, Creatives, Geo-spatial, Multi-views, Neural radiance field, Neural Radiance Fields, Pipelines, User interfaces, Virtual production, Vision communities, Windows operating system
@inproceedings{jacoby_efficient_2023,
title = {Efficient Cloud Pipelines for Neural Radiance Fields},
author = {D. Jacoby and D. Xu and W. Ribas and M. Xu and T. Liu and V. Jeyaraman and M. Wei and E. D. Blois and Y. Coady},
editor = {Chakrabarti S. and Paul R.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85179765347&doi=10.1109%2fUEMCON59035.2023.10316126&partnerID=40&md5=2640a2b033c9200560f93898a178dbbe},
doi = {10.1109/UEMCON59035.2023.10316126},
isbn = {979-835030413-8 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {IEEE Annu. Ubiquitous Comput., Electron. Mob. Commun. Conf., UEMCON},
pages = {114–119},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Since their introduction in 2020, Neural Radiance Fields (NeRFs) have taken the computer vision community by storm. They provide a multi-view representation of a scene or object that is ideal for eXtended Reality (XR) applications and for creative endeavors such as virtual production, as well as change detection operations in geospatial analytics. The computational cost of these generative AI models is quite high, however, and the construction of cloud pipelines to generate NeRFs is neccesary to realize their potential in client applications. In this paper, we present pipelines on a high performance academic computing cluster and compare it with a pipeline implemented on Microsoft Azure. Along the way, we describe some uses of NeRFs in enabling novel user interaction scenarios. © 2023 IEEE.},
keywords = {Azure, Change detection, Cloud analytics, Cloud computing, Cloud-computing, Cluster computing, Containerization, Creatives, Geo-spatial, Multi-views, Neural radiance field, Neural Radiance Fields, Pipelines, User interfaces, Virtual production, Vision communities, Windows operating system},
pubstate = {published},
tppubtype = {inproceedings}
}