AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Häfner, P.; Eisenlohr, F.; Karande, A.; Grethler, M.; Mukherjee, A.; Tran, N.
Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 281–285, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833152157-8 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method
@inproceedings{hafner_leveraging_2025,
title = {Leveraging Virtual Prototypes for Training Data Collection in LLM-Based Voice User Interface Development for Machines},
author = {P. Häfner and F. Eisenlohr and A. Karande and M. Grethler and A. Mukherjee and N. Tran},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000344182&doi=10.1109%2fAIxVR63409.2025.00054&partnerID=40&md5=05fe014eddba395881575bec5d96ce15},
doi = {10.1109/AIxVR63409.2025.00054},
isbn = {979-833152157-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {281–285},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Voice User Interfaces (VUIs) are becoming increasingly valuable in industrial applications, offering hands-free control in complex environments. However, developing and validating VUIs for such applications faces challenges, including limited access to physical prototypes and high testing costs. This paper presents a methodology that utilizes virtual reality (VR) prototypes to collect training data for large language model (LLM)-based VUIs, allowing early-stage voice control development before physical prototypes are accessible. Through an immersive Wizard-of-Oz (WoZ) method, participants interact with a virtual reality representation of a machine, generating realistic, scenario-based conversational data. This combined WoZ and VR approach enables high-quality data collection and iterative model training, offering an effective solution that can be applied across various types of machine. Preliminary findings demonstrate the viability of VR in generating diverse and robust data sets that closely simulate real-world dialogs for voice interactions in industrial settings. © 2025 IEEE.},
keywords = {Artificial intelligence, Behavioral Research, Data collection, Language Model, Large language model, large language models, Model-based OPC, Training data, User interface development, Virtual environments, Virtual Prototype, Virtual Prototyping, Virtual Reality, Voice User Interface, Voice User Interfaces, Wizard of Oz, Wizard-of-Oz Method},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Jones, D.; Gračanin, D.; Azab, M.
Augmented Reality Research: Benefit or Detriment for Neurodiverse People Proceedings Article
In: U., Eck; M., Sra; J., Stefanucci; M., Sugimoto; M., Tatzgern; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 26–28, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833150691-9 (ISBN).
Abstract | Links | BibTeX | Tags: Anonymity, Attention Deficit, Augmented Reality, Benefit/risk, Cyber Attack, Cyber attacks, Cyber Defense, Cyber-attacks, Cyber-defense, Language Model, Model training, Potential risks, Privacy invasions, Quality of life, Training data
@inproceedings{jones_augmented_2024,
title = {Augmented Reality Research: Benefit or Detriment for Neurodiverse People},
author = {D. Jones and D. Gračanin and M. Azab},
editor = {Eck U. and Sra M. and Stefanucci J. and Sugimoto M. and Tatzgern M. and Williams I.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214361441&doi=10.1109%2fISMAR-Adjunct64951.2024.00015&partnerID=40&md5=c2e684986face0f49335d711fecf58c2},
doi = {10.1109/ISMAR-Adjunct64951.2024.00015},
isbn = {979-833150691-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {26–28},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The intersection of technology and innovation has always been a double-edged sword for humanity, offering both profound benefits and potential risks. This paper examines the positive and negative impacts of augmented reality (AR) and generative artificial intelligence (GAI) on neurodiverse users (NDU). While AR, coupled with large language models (LLM), has the potential to revolutionize the diagnosis and training environments for NDUs, inherent biases in LLM training data, which predominantly reflects neurotypical user (NTU) content, pose significant risks. These biases can result in environments and interactions that are less accessible and potentially harmful to NDUs. The paper explores the implications of these biases, including the possibility of privacy invasion and the misuse of technology for diagnosing undiagnosed NDUs, leading to severe personal and professional consequences. The study advocates for industry-wide collaboration to mitigate these biases, develop NDU-specific datasets, and create secure AR frameworks that safeguard the neurodiverse population while enhancing their quality of life. © 2024 IEEE.},
keywords = {Anonymity, Attention Deficit, Augmented Reality, Benefit/risk, Cyber Attack, Cyber attacks, Cyber Defense, Cyber-attacks, Cyber-defense, Language Model, Model training, Potential risks, Privacy invasions, Quality of life, Training data},
pubstate = {published},
tppubtype = {inproceedings}
}
Torre, F. De La; Fang, C. M.; Huang, H.; Banburski-Fahey, A.; Fernandez, J. A.; Lanier, J.
LLMR: Real-time Prompting of Interactive Worlds using Large Language Models Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 979-840070330-0 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Computational Linguistics, Design goal, Interactive computer graphics, Interactive worlds, Internal dynamics, Language Model, Large language model, Mixed reality, Novel strategies, Real- time, Spatial Reasoning, Training data
@inproceedings{de_la_torre_llmr_2024,
title = {LLMR: Real-time Prompting of Interactive Worlds using Large Language Models},
author = {F. De La Torre and C. M. Fang and H. Huang and A. Banburski-Fahey and J. A. Fernandez and J. Lanier},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194848276&doi=10.1145%2f3613904.3642579&partnerID=40&md5=14969e96507a1f0110262021e5b1172d},
doi = {10.1145/3613904.3642579},
isbn = {979-840070330-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {We present Large Language Model for Mixed Reality (LLMR), a framework for the real-time creation and modification of interactive Mixed Reality experiences using LLMs. LLMR leverages novel strategies to tackle difficult cases where ideal training data is scarce, or where the design goal requires the synthesis of internal dynamics, intuitive analysis, or advanced interactivity. Our framework relies on text interaction and the Unity game engine. By incorporating techniques for scene understanding, task planning, self-debugging, and memory management, LLMR outperforms the standard GPT-4 by 4x in average error rate. We demonstrate LLMR's cross-platform interoperability with several example worlds, and evaluate it on a variety of creation and modification tasks to show that it can produce and edit diverse objects, tools, and scenes. Finally, we conducted a usability study (N=11) with a diverse set that revealed participants had positive experiences with the system and would use it again. © 2024 Copyright held by the owner/author(s)},
keywords = {Artificial intelligence, Computational Linguistics, Design goal, Interactive computer graphics, Interactive worlds, Internal dynamics, Language Model, Large language model, Mixed reality, Novel strategies, Real- time, Spatial Reasoning, Training data},
pubstate = {published},
tppubtype = {inproceedings}
}
Jayaraman, S.; Bhavya, R.; Srihari, V.; Rajam, V. Mary Anita
TexAVi: Generating Stereoscopic VR Video Clips from Text Descriptions Proceedings Article
In: IEEE Int. Conf. Comput. Vis. Mach. Intell., CVMI, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037687-6 (ISBN).
Abstract | Links | BibTeX | Tags: Adversarial networks, Computer simulation languages, Deep learning, Depth Estimation, Depth perception, Diffusion Model, diffusion models, Digital elevation model, Generative adversarial networks, Generative model, Generative systems, Language Model, Motion capture, Stereo image processing, Text-to-image, Training data, Video analysis, Video-clips, Virtual environments, Virtual Reality
@inproceedings{jayaraman_texavi_2024,
title = {TexAVi: Generating Stereoscopic VR Video Clips from Text Descriptions},
author = {S. Jayaraman and R. Bhavya and V. Srihari and V. Mary Anita Rajam},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85215265234&doi=10.1109%2fCVMI61877.2024.10782691&partnerID=40&md5=8e20576af67b917ecfad83873a87ef29},
doi = {10.1109/CVMI61877.2024.10782691},
isbn = {979-835037687-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE Int. Conf. Comput. Vis. Mach. Intell., CVMI},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {While generative models such as text-to-image, large language models and text-to-video have seen significant progress, the extension to text-to-virtual-reality remains largely unexplored, due to a deficit in training data and the complexity of achieving realistic depth and motion in virtual environments. This paper proposes an approach to coalesce existing generative systems to form a stereoscopic virtual reality video from text. Carried out in three main stages, we start with a base text-to-image model that captures context from an input text. We then employ Stable Diffusion on the rudimentary image produced, to generate frames with enhanced realism and overall quality. These frames are processed with depth estimation algorithms to create left-eye and right-eye views, which are stitched side-by-side to create an immersive viewing experience. Such systems would be highly beneficial in virtual reality production, since filming and scene building often require extensive hours of work and post-production effort. We utilize image evaluation techniques, specifically Fréchet Inception Distance and CLIP Score, to assess the visual quality of frames produced for the video. These quantitative measures establish the proficiency of the proposed method. Our work highlights the exciting possibilities of using natural language-driven graphics in fields like virtual reality simulations. © 2024 IEEE.},
keywords = {Adversarial networks, Computer simulation languages, Deep learning, Depth Estimation, Depth perception, Diffusion Model, diffusion models, Digital elevation model, Generative adversarial networks, Generative model, Generative systems, Language Model, Motion capture, Stereo image processing, Text-to-image, Training data, Video analysis, Video-clips, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Leng, Z.; Kwon, H.; Ploetz, T.
Generating Virtual On-body Accelerometer Data from Virtual Textual Descriptions for Human Activity Recognition Proceedings Article
In: ISWC - Proc. Int. Symp. Wearable Comput., pp. 39–43, Association for Computing Machinery, Inc, 2023, ISBN: 979-840070199-3 (ISBN).
Abstract | Links | BibTeX | Tags: Activity recognition, Computational Linguistics, E-Learning, Human activity recognition, Language Model, Large language model, large language models, Motion estimation, Motion Synthesis, On-body, Pattern recognition, Recognition models, Textual description, Training data, Virtual IMU Data, Virtual Reality, Wearable Sensors
@inproceedings{leng_generating_2023,
title = {Generating Virtual On-body Accelerometer Data from Virtual Textual Descriptions for Human Activity Recognition},
author = {Z. Leng and H. Kwon and T. Ploetz},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85175788497&doi=10.1145%2f3594738.3611361&partnerID=40&md5=ddecaf6d81f71511c8152ca14f33cd7f},
doi = {10.1145/3594738.3611361},
isbn = {979-840070199-3 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {ISWC - Proc. Int. Symp. Wearable Comput.},
pages = {39–43},
publisher = {Association for Computing Machinery, Inc},
abstract = {The development of robust, generalized models for human activity recognition (HAR) has been hindered by the scarcity of large-scale, labeled data sets. Recent work has shown that virtual IMU data extracted from videos using computer vision techniques can lead to substantial performance improvements when training HAR models combined with small portions of real IMU data. Inspired by recent advances in motion synthesis from textual descriptions and connecting Large Language Models (LLMs) to various AI models, we introduce an automated pipeline that first uses ChatGPT to generate diverse textual descriptions of activities. These textual descriptions are then used to generate 3D human motion sequences via a motion synthesis model, T2M-GPT, and later converted to streams of virtual IMU data. We benchmarked our approach on three HAR datasets (RealWorld, PAMAP2, and USC-HAD) and demonstrate that the use of virtual IMU training data generated using our new approach leads to significantly improved HAR model performance compared to only using real IMU data. Our approach contributes to the growing field of cross-modality transfer methods and illustrate how HAR models can be improved through the generation of virtual training data that do not require any manual effort. © 2023 Owner/Author.},
keywords = {Activity recognition, Computational Linguistics, E-Learning, Human activity recognition, Language Model, Large language model, large language models, Motion estimation, Motion Synthesis, On-body, Pattern recognition, Recognition models, Textual description, Training data, Virtual IMU Data, Virtual Reality, Wearable Sensors},
pubstate = {published},
tppubtype = {inproceedings}
}