AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2024
Guo, Y.; Hou, K.; Yan, Z.; Chen, H.; Xing, G.; Jiang, X.
Sensor2Scene: Foundation Model-Driven Interactive Realities Proceedings Article
In: Proc. - IEEE Int. Workshop Found. Model. Cyber-Phys. Syst. Internet Things, FMSys, pp. 13–19, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835036345-6 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Augmented Reality, Computational Linguistics, Data integration, Data visualization, Foundation models, Generative model, Language Model, Large language model, large language models, Model-driven, Sensor Data Integration, Sensors data, Text-to-3d generative model, Text-to-3D Generative Models, Three dimensional computer graphics, User interaction, User Interaction in AR, User interaction in augmented reality, User interfaces, Virtual Reality, Visualization
@inproceedings{guo_sensor2scene_2024,
title = {Sensor2Scene: Foundation Model-Driven Interactive Realities},
author = {Y. Guo and K. Hou and Z. Yan and H. Chen and G. Xing and X. Jiang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199893762&doi=10.1109%2fFMSys62467.2024.00007&partnerID=40&md5=c3bf1739e8c1dc6227d61609ddc66910},
doi = {10.1109/FMSys62467.2024.00007},
isbn = {979-835036345-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Workshop Found. Model. Cyber-Phys. Syst. Internet Things, FMSys},
pages = {13–19},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Augmented Reality (AR) is acclaimed for its potential to bridge the physical and virtual worlds. Yet, current integration between these realms often lacks a deep under-standing of the physical environment and the subsequent scene generation that reflects this understanding. This research introduces Sensor2Scene, a novel system framework designed to enhance user interactions with sensor data through AR. At its core, an AI agent leverages large language models (LLMs) to decode subtle information from sensor data, constructing detailed scene descriptions for visualization. To enable these scenes to be rendered in AR, we decompose the scene creation process into tasks of text-to-3D model generation and spatial composition, allowing new AR scenes to be sketched from the descriptions. We evaluated our framework using an LLM evaluator based on five metrics on various datasets to examine the correlation between sensor readings and corresponding visualizations, and demonstrated the system's effectiveness with scenes generated from end-to-end. The results highlight the potential of LLMs to understand IoT sensor data. Furthermore, generative models can aid in transforming these interpretations into visual formats, thereby enhancing user interaction. This work not only displays the capabilities of Sensor2Scene but also lays a foundation for advancing AR with the goal of creating more immersive and contextually rich experiences. © 2024 IEEE.},
keywords = {3D modeling, Augmented Reality, Computational Linguistics, Data integration, Data visualization, Foundation models, Generative model, Language Model, Large language model, large language models, Model-driven, Sensor Data Integration, Sensors data, Text-to-3d generative model, Text-to-3D Generative Models, Three dimensional computer graphics, User interaction, User Interaction in AR, User interaction in augmented reality, User interfaces, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Christiansen, F. R.; Hollensberg, L. Nø.; Jensen, N. B.; Julsgaard, K.; Jespersen, K. N.; Nikolov, I.
Exploring Presence in Interactions with LLM-Driven NPCs: A Comparative Study of Speech Recognition and Dialogue Options Proceedings Article
In: S.N., Spencer (Ed.): Proc. ACM Symp. Virtual Reality Softw. Technol. VRST, Association for Computing Machinery, 2024, ISBN: 979-840070535-9 (ISBN).
Abstract | Links | BibTeX | Tags: Comparatives studies, Computer simulation languages, Economic and social effects, Immersive System, Immersive systems, Language Model, Large language model, Large language models (LLM), Model-driven, Modern technologies, Non-playable character, NPC, Presence, Social Actors, Speech enhancement, Speech recognition, Text to speech, Virtual environments, Virtual Reality, VR
@inproceedings{christiansen_exploring_2024,
title = {Exploring Presence in Interactions with LLM-Driven NPCs: A Comparative Study of Speech Recognition and Dialogue Options},
author = {F. R. Christiansen and L. Nø. Hollensberg and N. B. Jensen and K. Julsgaard and K. N. Jespersen and I. Nikolov},
editor = {Spencer S.N.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85212512351&doi=10.1145%2f3641825.3687716&partnerID=40&md5=56ec6982b399fd97196ea73e7c659c31},
doi = {10.1145/3641825.3687716},
isbn = {979-840070535-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. ACM Symp. Virtual Reality Softw. Technol. VRST},
publisher = {Association for Computing Machinery},
abstract = {Combining modern technologies like large-language models (LLMs), speech-to-text, and text-to-speech can enhance immersion in virtual reality (VR) environments. However, challenges exist in effectively implementing LLMs and educating users. This paper explores implementing LLM-powered virtual social actors and facilitating user communication. We developed a murder mystery game where users interact with LLM-based non-playable characters (NPCs) through interrogation, clue-gathering, and exploration. Two versions were tested: one using speech recognition and another with traditional dialog boxes. While both provided similar social presence, users felt more immersed with speech recognition but found it overwhelming, while the dialog version was more challenging. Slow NPC response times were a source of frustration, highlighting the need for faster generation or better masking for a seamless experience. © 2024 Owner/Author.},
keywords = {Comparatives studies, Computer simulation languages, Economic and social effects, Immersive System, Immersive systems, Language Model, Large language model, Large language models (LLM), Model-driven, Modern technologies, Non-playable character, NPC, Presence, Social Actors, Speech enhancement, Speech recognition, Text to speech, Virtual environments, Virtual Reality, VR},
pubstate = {published},
tppubtype = {inproceedings}
}