AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Behravan, M.; Gračanin, D.
From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 150–155, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering
@inproceedings{behravan_voices_2025,
title = {From Voices to Worlds: Developing an AI-Powered Framework for 3D Object Generation in Augmented Reality},
author = {M. Behravan and D. Gračanin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005153589&doi=10.1109%2fVRW66409.2025.00038&partnerID=40&md5=b8aaab4e2378cde3595d98d79266d371},
doi = {10.1109/VRW66409.2025.00038},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {150–155},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents Matrix, an advanced AI-powered framework designed for real-time 3D object generation in Augmented Reality (AR) environments. By integrating a cutting-edge text-to-3D generative AI model, multilingual speech-to-text translation, and large language models (LLMs), the system enables seamless user interactions through spoken commands. The framework processes speech inputs, generates 3D objects, and provides object recommendations based on contextual understanding, enhancing AR experiences. A key feature of this framework is its ability to optimize 3D models by reducing mesh complexity, resulting in significantly smaller file sizes and faster processing on resource-constrained AR devices. Our approach addresses the challenges of high GPU usage, large model output sizes, and real-time system responsiveness, ensuring a smoother user experience. Moreover, the system is equipped with a pre-generated object repository, further reducing GPU load and improving efficiency. We demonstrate the practical applications of this framework in various fields such as education, design, and accessibility, and discuss future enhancements including image-to-3D conversion, environmental object detection, and multimodal support. The open-source nature of the framework promotes ongoing innovation and its utility across diverse industries. © 2025 IEEE.},
keywords = {3D modeling, 3D object, 3D Object Generation, 3D reconstruction, Augmented Reality, Cutting edges, Generative AI, Interactive computer systems, Language Model, Large language model, large language models, matrix, Multilingual speech interaction, Real- time, Speech enhancement, Speech interaction, Volume Rendering},
pubstate = {published},
tppubtype = {inproceedings}
}
Stroinski, M.; Kwarciak, K.; Kowalewski, M.; Hemmerling, D.; Frier, W.; Georgiou, O.
Text-to-Haptics: Enhancing Multisensory Storytelling through Emotionally Congruent Midair Haptics Journal Article
In: Advanced Intelligent Systems, vol. 7, no. 4, 2025, ISSN: 26404567 (ISSN).
Abstract | Links | BibTeX | Tags: Audiovisual, Augmented Reality, Extended reality, Haptic interfaces, Haptics, Haptics interfaces, HMI, hybrid AI, Hybrid artificial intelligences, Metaverses, Mixed reality, Multisensory, Natural Language Processing, perception, Sentiment Analysis, Sound speech, Special issue and section, Speech enhancement, Virtual environments, Visual elements
@article{stroinski_text–haptics_2025,
title = {Text-to-Haptics: Enhancing Multisensory Storytelling through Emotionally Congruent Midair Haptics},
author = {M. Stroinski and K. Kwarciak and M. Kowalewski and D. Hemmerling and W. Frier and O. Georgiou},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002269591&doi=10.1002%2faisy.202400758&partnerID=40&md5=a4c8ce7a01c9bc90d9805a81d34df982},
doi = {10.1002/aisy.202400758},
issn = {26404567 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Advanced Intelligent Systems},
volume = {7},
number = {4},
abstract = {In multisensory storytelling, the integration of touch, sound, speech, and visual elements plays a crucial role in enhancing the narrative immersion and audience engagement. In light of this, this article presents a scalable and intelligent hybrid artificial intelligence (AI) method that uses emotional text analysis for deciding when and what midair haptics to display alongside audiovisual content generated by latent stable diffusion methods. Then, a user study involving 40 participants is described, the results of which suggest that the proposed approach enhances the audience level of engagement as they experience a short AI-generated multisensory (audio–visual–haptic) story. © 2024 The Author(s). Advanced Intelligent Systems published by Wiley-VCH GmbH.},
keywords = {Audiovisual, Augmented Reality, Extended reality, Haptic interfaces, Haptics, Haptics interfaces, HMI, hybrid AI, Hybrid artificial intelligences, Metaverses, Mixed reality, Multisensory, Natural Language Processing, perception, Sentiment Analysis, Sound speech, Special issue and section, Speech enhancement, Virtual environments, Visual elements},
pubstate = {published},
tppubtype = {article}
}
2024
Liang, X.; Wang, Y.; Yan, F.; Ouyang, Z.; Hu, Y.; Luo, S.
Reborn of the White Bone Demon: Role-Playing Game Design Using Generative AI in XR Proceedings Article
In: S.N., Spencer (Ed.): Proc. - SIGGRAPH Asia Posters, SA, Association for Computing Machinery, Inc, 2024, ISBN: 979-840071138-1 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence techniques, Emotion Recognition, Game design, Gaming experiences, Real- time, Role-playing game, Speech emotion recognition, Speech enhancement, Speech recognition, Time based
@inproceedings{liang_reborn_2024,
title = {Reborn of the White Bone Demon: Role-Playing Game Design Using Generative AI in XR},
author = {X. Liang and Y. Wang and F. Yan and Z. Ouyang and Y. Hu and S. Luo},
editor = {Spencer S.N.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85215520655&doi=10.1145%2f3681756.3697949&partnerID=40&md5=a255cbdfd881f70df82341875f16d546},
doi = {10.1145/3681756.3697949},
isbn = {979-840071138-1 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - SIGGRAPH Asia Posters, SA},
publisher = {Association for Computing Machinery, Inc},
abstract = {This paper explores the application of Generative Artificial Intelligence (GenAI) techniques to the design of Role Playing Games (RPGs) in Extended Reality (XR) environments. We developed the game Reborn of the White Bone Demon, which utilizes AI speech emotion recognition technology to generate story lines and game assets in real-time based on the player's conversations with NPCs, enhancing the player's immersion and personalized experience, demonstrating the potential of GenAI in enhancing the XR gaming experience. © 2024 Copyright held by the owner/author(s).},
keywords = {Artificial intelligence techniques, Emotion Recognition, Game design, Gaming experiences, Real- time, Role-playing game, Speech emotion recognition, Speech enhancement, Speech recognition, Time based},
pubstate = {published},
tppubtype = {inproceedings}
}
Christiansen, F. R.; Hollensberg, L. Nø.; Jensen, N. B.; Julsgaard, K.; Jespersen, K. N.; Nikolov, I.
Exploring Presence in Interactions with LLM-Driven NPCs: A Comparative Study of Speech Recognition and Dialogue Options Proceedings Article
In: S.N., Spencer (Ed.): Proc. ACM Symp. Virtual Reality Softw. Technol. VRST, Association for Computing Machinery, 2024, ISBN: 979-840070535-9 (ISBN).
Abstract | Links | BibTeX | Tags: Comparatives studies, Computer simulation languages, Economic and social effects, Immersive System, Immersive systems, Language Model, Large language model, Large language models (LLM), Model-driven, Modern technologies, Non-playable character, NPC, Presence, Social Actors, Speech enhancement, Speech recognition, Text to speech, Virtual environments, Virtual Reality, VR
@inproceedings{christiansen_exploring_2024,
title = {Exploring Presence in Interactions with LLM-Driven NPCs: A Comparative Study of Speech Recognition and Dialogue Options},
author = {F. R. Christiansen and L. Nø. Hollensberg and N. B. Jensen and K. Julsgaard and K. N. Jespersen and I. Nikolov},
editor = {Spencer S.N.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85212512351&doi=10.1145%2f3641825.3687716&partnerID=40&md5=56ec6982b399fd97196ea73e7c659c31},
doi = {10.1145/3641825.3687716},
isbn = {979-840070535-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. ACM Symp. Virtual Reality Softw. Technol. VRST},
publisher = {Association for Computing Machinery},
abstract = {Combining modern technologies like large-language models (LLMs), speech-to-text, and text-to-speech can enhance immersion in virtual reality (VR) environments. However, challenges exist in effectively implementing LLMs and educating users. This paper explores implementing LLM-powered virtual social actors and facilitating user communication. We developed a murder mystery game where users interact with LLM-based non-playable characters (NPCs) through interrogation, clue-gathering, and exploration. Two versions were tested: one using speech recognition and another with traditional dialog boxes. While both provided similar social presence, users felt more immersed with speech recognition but found it overwhelming, while the dialog version was more challenging. Slow NPC response times were a source of frustration, highlighting the need for faster generation or better masking for a seamless experience. © 2024 Owner/Author.},
keywords = {Comparatives studies, Computer simulation languages, Economic and social effects, Immersive System, Immersive systems, Language Model, Large language model, Large language models (LLM), Model-driven, Modern technologies, Non-playable character, NPC, Presence, Social Actors, Speech enhancement, Speech recognition, Text to speech, Virtual environments, Virtual Reality, VR},
pubstate = {published},
tppubtype = {inproceedings}
}