AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Casas, L.; Hannah, S.; Mitchell, K.
HoloJig: Interactive Spoken Prompt Specified Generative AI Environments Journal Article
In: IEEE Computer Graphics and Applications, vol. 45, no. 2, pp. 69–77, 2025, ISSN: 02721716 (ISSN).
Abstract | Links | BibTeX | Tags: 3-D rendering, Article, Collaborative workspace, customer experience, Economic and social effects, generative artificial intelligence, human, Immersive, Immersive environment, parallax, Real- time, simulation, Simulation training, speech, Time based, Virtual environments, Virtual Reality, Virtual reality experiences, Virtual spaces, VR systems
@article{casas_holojig_2025,
title = {HoloJig: Interactive Spoken Prompt Specified Generative AI Environments},
author = {L. Casas and S. Hannah and K. Mitchell},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001182100&doi=10.1109%2fMCG.2025.3553780&partnerID=40&md5=ec5dc44023314b6f9221169357d81dcd},
doi = {10.1109/MCG.2025.3553780},
issn = {02721716 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Computer Graphics and Applications},
volume = {45},
number = {2},
pages = {69–77},
abstract = {HoloJig offers an interactive, speech-to-virtual reality (VR), VR experience that generates diverse environments in real time based on live spoken descriptions. Unlike traditional VR systems that rely on prebuilt assets, HoloJig dynamically creates personalized and immersive virtual spaces with depth-based parallax 3-D rendering, allowing users to define the characteristics of their immersive environment through verbal prompts. This generative approach opens up new possibilities for interactive experiences, including simulations, training, collaborative workspaces, and entertainment. In addition to speech-to-VR environment generation, a key innovation of HoloJig is its progressive visual transition mechanism, which smoothly dissolves between previously generated and newly requested environments, mitigating the delay caused by neural computations. This feature ensures a seamless and continuous user experience, even as new scenes are being rendered on remote servers. © 1981-2012 IEEE.},
keywords = {3-D rendering, Article, Collaborative workspace, customer experience, Economic and social effects, generative artificial intelligence, human, Immersive, Immersive environment, parallax, Real- time, simulation, Simulation training, speech, Time based, Virtual environments, Virtual Reality, Virtual reality experiences, Virtual spaces, VR systems},
pubstate = {published},
tppubtype = {article}
}
Alibrahim, Y.; Ibrahim, M.; Gurdayal, D.; Munshi, M.
AI speechbots and 3D segmentations in virtual reality improve radiology on-call training in resource-limited settings Journal Article
In: Intelligence-Based Medicine, vol. 11, 2025, ISSN: 26665212 (ISSN).
Abstract | Links | BibTeX | Tags: 3D segmentation, AI speechbots, Article, artificial intelligence chatbot, ChatGPT, computer assisted tomography, Deep learning, headache, human, Image segmentation, interventional radiology, Large language model, Likert scale, nausea, Proof of concept, prospective study, radiology, radiology on call training, resource limited setting, Teaching, Training, ultrasound, Virtual Reality, voice recognition
@article{alibrahim_ai_2025,
title = {AI speechbots and 3D segmentations in virtual reality improve radiology on-call training in resource-limited settings},
author = {Y. Alibrahim and M. Ibrahim and D. Gurdayal and M. Munshi},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001472313&doi=10.1016%2fj.ibmed.2025.100245&partnerID=40&md5=623a0ceaa07e5516a296420d25c3033b},
doi = {10.1016/j.ibmed.2025.100245},
issn = {26665212 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Intelligence-Based Medicine},
volume = {11},
abstract = {Objective: Evaluate the use of large-language model (LLM) speechbot tools and deep learning-assisted generation of 3D reconstructions when integrated in a virtual reality (VR) setting to teach radiology on-call topics to radiology residents. Methods: Three first year radiology residents in Guyana were enrolled in an 8-week radiology course that focused on preparation for on-call duties. The course, delivered via VR headsets with custom software integrating LLM-powered speechbots trained on imaging reports and 3D reconstructions segmented with the help of a deep learning model. Each session focused on a specific radiology area, employing a didactic and case-based learning approach, enhanced with 3D reconstructions and an LLM-powered speechbot. Post-session, residents reassessed their knowledge and provided feedback on their VR and LLM-powered speechbot experiences. Results/discussion: Residents found that the 3D reconstructions segmented semi-automatically by deep learning algorithms and AI-driven self-learning via speechbot was highly valuable. The 3D reconstructions, especially in the interventional radiology session, were helpful and the benefit is augmented by VR where navigating the models is seamless and perception of depth is pronounced. Residents also found conversing with the AI-speechbot seamless and was valuable in their post session self-learning. The major drawback of VR was motion sickness, which was mild and improved over time. Conclusion: AI-assisted VR radiology education could be used to develop new and accessible ways of teaching a variety of radiology topics in a seamless and cost-effective way. This could be especially useful in supporting radiology education remotely in regions which lack local radiology expertise. © 2025},
keywords = {3D segmentation, AI speechbots, Article, artificial intelligence chatbot, ChatGPT, computer assisted tomography, Deep learning, headache, human, Image segmentation, interventional radiology, Large language model, Likert scale, nausea, Proof of concept, prospective study, radiology, radiology on call training, resource limited setting, Teaching, Training, ultrasound, Virtual Reality, voice recognition},
pubstate = {published},
tppubtype = {article}
}
Afzal, M. Z.; Ali, S. K. A.; Stricker, D.; Eisert, P.; Hilsmann, A.; Perez-Marcos, D.; Bianchi, M.; Crottaz-Herbette, S.; Ioris, R. De; Mangina, E.; Sanguineti, M.; Salaberria, A.; Lacalle, O. Lopez De; Garcia-Pablos, A.; Cuadros, M.
Next Generation XR Systems - Large Language Models Meet Augmented and Virtual Reality Journal Article
In: IEEE Computer Graphics and Applications, vol. 45, no. 1, pp. 43–55, 2025, ISSN: 02721716 (ISSN).
Abstract | Links | BibTeX | Tags: adult, Article, Augmented and virtual realities, Augmented Reality, Awareness, Context-Aware, human, Information Retrieval, Knowledge model, Knowledge reasoning, Knowledge retrieval, Language Model, Large language model, Mixed reality, neurorehabilitation, Position papers, privacy, Real- time, Reasoning, Situational awareness, Virtual environments, Virtual Reality
@article{afzal_next_2025,
title = {Next Generation XR Systems - Large Language Models Meet Augmented and Virtual Reality},
author = {M. Z. Afzal and S. K. A. Ali and D. Stricker and P. Eisert and A. Hilsmann and D. Perez-Marcos and M. Bianchi and S. Crottaz-Herbette and R. De Ioris and E. Mangina and M. Sanguineti and A. Salaberria and O. Lopez De Lacalle and A. Garcia-Pablos and M. Cuadros},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003598602&doi=10.1109%2fMCG.2025.3548554&partnerID=40&md5=b709a0c8cf47cc55a52cea73eb9ef15d},
doi = {10.1109/MCG.2025.3548554},
issn = {02721716 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Computer Graphics and Applications},
volume = {45},
number = {1},
pages = {43–55},
abstract = {Extended reality (XR) is evolving rapidly, offering new paradigms for human-computer interaction. This position paper argues that integrating large language models (LLMs) with XR systems represents a fundamental shift toward more intelligent, context-aware, and adaptive mixed-reality experiences. We propose a structured framework built on three key pillars: first, perception and situational awareness, second, knowledge modeling and reasoning, and third, visualization and interaction. We believe leveraging LLMs within XR environments enables enhanced situational awareness, real-time knowledge retrieval, and dynamic user interaction, surpassing traditional XR capabilities. We highlight the potential of this integration in neurorehabilitation, safety training, and architectural design while underscoring ethical considerations, such as privacy, transparency, and inclusivity. This vision aims to spark discussion and drive research toward more intelligent, human-centric XR systems. © 2025 IEEE.},
keywords = {adult, Article, Augmented and virtual realities, Augmented Reality, Awareness, Context-Aware, human, Information Retrieval, Knowledge model, Knowledge reasoning, Knowledge retrieval, Language Model, Large language model, Mixed reality, neurorehabilitation, Position papers, privacy, Real- time, Reasoning, Situational awareness, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Kim, Y.; Aamir, Z.; Singh, M.; Boorboor, S.; Mueller, K.; Kaufman, A. E.
Explainable XR: Understanding User Behaviors of XR Environments Using LLM-Assisted Analytics Framework Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2756–2766, 2025, ISSN: 10772626 (ISSN).
Abstract | Links | BibTeX | Tags: adult, Agnostic, Article, Assistive, Cross Reality, Data Analytics, Data collection, data interpretation, Data recording, Data visualization, Extended reality, human, Language Model, Large language model, large language models, Multi-modal, Multimodal Data Collection, normal human, Personalized assistive technique, Personalized Assistive Techniques, recorder, Spatio-temporal data, therapy, user behavior, User behaviors, Virtual addresses, Virtual environments, Virtual Reality, Visual analytics, Visual languages
@article{kim_explainable_2025,
title = {Explainable XR: Understanding User Behaviors of XR Environments Using LLM-Assisted Analytics Framework},
author = {Y. Kim and Z. Aamir and M. Singh and S. Boorboor and K. Mueller and A. E. Kaufman},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003815583&doi=10.1109%2fTVCG.2025.3549537&partnerID=40&md5=1085b698db06656985f80418cb37b773},
doi = {10.1109/TVCG.2025.3549537},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2756–2766},
abstract = {We present Explainable XR, an end-to-end framework for analyzing user behavior in diverse eXtended Reality (XR) environments by leveraging Large Language Models (LLMs) for data interpretation assistance. Existing XR user analytics frameworks face challenges in handling cross-virtuality - AR, VR, MR - transitions, multi-user collaborative application scenarios, and the complexity of multimodal data. Explainable XR addresses these challenges by providing a virtuality-agnostic solution for the collection, analysis, and visualization of immersive sessions. We propose three main components in our framework: (1) A novel user data recording schema, called User Action Descriptor (UAD), that can capture the users' multimodal actions, along with their intents and the contexts; (2) a platform-agnostic XR session recorder, and (3) a visual analytics interface that offers LLM-assisted insights tailored to the analysts' perspectives, facilitating the exploration and analysis of the recorded XR session data. We demonstrate the versatility of Explainable XR by demonstrating five use-case scenarios, in both individual and collaborative XR applications across virtualities. Our technical evaluation and user studies show that Explainable XR provides a highly usable analytics solution for understanding user actions and delivering multifaceted, actionable insights into user behaviors in immersive environments. © 1995-2012 IEEE.},
keywords = {adult, Agnostic, Article, Assistive, Cross Reality, Data Analytics, Data collection, data interpretation, Data recording, Data visualization, Extended reality, human, Language Model, Large language model, large language models, Multi-modal, Multimodal Data Collection, normal human, Personalized assistive technique, Personalized Assistive Techniques, recorder, Spatio-temporal data, therapy, user behavior, User behaviors, Virtual addresses, Virtual environments, Virtual Reality, Visual analytics, Visual languages},
pubstate = {published},
tppubtype = {article}
}
Stacchio, L.; Balloni, E.; Frontoni, E.; Paolanti, M.; Zingaretti, P.; Pierdicca, R.
MineVRA: Exploring the Role of Generative AI-Driven Content Development in XR Environments through a Context-Aware Approach Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 3602–3612, 2025, ISSN: 10772626 (ISSN).
Abstract | Links | BibTeX | Tags: adult, Article, Artificial intelligence, Computer graphics, Computer vision, Content Development, Contents development, Context-Aware, Context-aware approaches, Extended reality, female, Generative adversarial networks, Generative AI, generative artificial intelligence, human, Human-in-the-loop, Immersive, Immersive environment, male, Multi-modal, User need, Virtual environments, Virtual Reality
@article{stacchio_minevra_2025,
title = {MineVRA: Exploring the Role of Generative AI-Driven Content Development in XR Environments through a Context-Aware Approach},
author = {L. Stacchio and E. Balloni and E. Frontoni and M. Paolanti and P. Zingaretti and R. Pierdicca},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003746367&doi=10.1109%2fTVCG.2025.3549160&partnerID=40&md5=70b162b574eebbb0cb71db871aa787e1},
doi = {10.1109/TVCG.2025.3549160},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {3602–3612},
abstract = {The convergence of Artificial Intelligence (AI), Computer Vision (CV), Computer Graphics (CG), and Extended Reality (XR) is driving innovation in immersive environments. A key challenge in these environments is the creation of personalized 3D assets, traditionally achieved through manual modeling, a time-consuming process that often fails to meet individual user needs. More recently, Generative AI (GenAI) has emerged as a promising solution for automated, context-aware content generation. In this paper, we present MineVRA (Multimodal generative artificial iNtelligence for contExt-aware Virtual Reality Assets), a novel Human-In-The-Loop (HITL) XR framework that integrates GenAI to facilitate coherent and adaptive 3D content generation in immersive scenarios. To evaluate the effectiveness of this approach, we conducted a comparative user study analyzing the performance and user satisfaction of GenAI-generated 3D objects compared to those generated by Sketchfab in different immersive contexts. The results suggest that GenAI can significantly complement traditional 3D asset libraries, with valuable design implications for the development of human-centered XR environments. © 1995-2012 IEEE.},
keywords = {adult, Article, Artificial intelligence, Computer graphics, Computer vision, Content Development, Contents development, Context-Aware, Context-aware approaches, Extended reality, female, Generative adversarial networks, Generative AI, generative artificial intelligence, human, Human-in-the-loop, Immersive, Immersive environment, male, Multi-modal, User need, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Sheehy, L.; Bouchard, S.; Kakkar, A.; Hakim, R. El; Lhoest, J.; Frank, A.
Development and Initial Testing of an Artificial Intelligence-Based Virtual Reality Companion for People Living with Dementia in Long-Term Care Journal Article
In: Journal of Clinical Medicine, vol. 13, no. 18, 2024, ISSN: 20770383 (ISSN).
Abstract | Links | BibTeX | Tags: aged, Article, Artificial intelligence, cognitive decline, cognitive impairment, compassion, conversation, Dementia, Elderly, female, human, large language models, long term care, long-term care, major clinical study, male, program acceptability, program feasibility, reaction time, reminiscence, speech discrimination, very elderly, Virtual Reality
@article{sheehy_development_2024,
title = {Development and Initial Testing of an Artificial Intelligence-Based Virtual Reality Companion for People Living with Dementia in Long-Term Care},
author = {L. Sheehy and S. Bouchard and A. Kakkar and R. El Hakim and J. Lhoest and A. Frank},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85205071099&doi=10.3390%2fjcm13185574&partnerID=40&md5=844732ff858a0d5feb0a95a54093ad4d},
doi = {10.3390/jcm13185574},
issn = {20770383 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {Journal of Clinical Medicine},
volume = {13},
number = {18},
abstract = {Background/Objectives: Feelings of loneliness are common in people living with dementia (PLWD) in long-term care (LTC). The goals of this study were to describe the development of a novel virtual companion for PLWD living in LTC and assess its feasibility and acceptability. Methods: The computer-generated virtual companion, presented using a head-mounted virtual reality display, was developed in two stages. In Stage 1, the virtual companion asked questions designed to encourage conversation and reminiscence. In Stage 2, more powerful artificial intelligence tools allowed the virtual companion to engage users in nuanced discussions on any topic. PLWD in LTC tested the application at each stage to assess feasibility and acceptability. Results: Ten PLWD living in LTC participated in Stage 1 (4 men and 6 women; average 82 years old) and Stage 2 (2 men and 8 women; average 87 years old). Session lengths ranged from 0:00 to 5:30 min in Stage 1 and 0:00 to 53:50 min in Stage 2. Speech recognition issues and a limited repertoire of questions limited acceptance in Stage 1. Enhanced conversational ability in Stage 2 led to intimate and meaningful conversations with many participants. Many users found the head-mounted display heavy. There were no complaints of simulator sickness. The virtual companion was best suited to PLWD who could engage in reciprocal conversation. After Stage 2, response latency was identified as an opportunity for improvement in future versions. Conclusions: Virtual reality and artificial intelligence can be used to create a virtual companion that is acceptable and enjoyable to some PLWD living in LTC. Ongoing innovations in hardware and software will allow future iterations to provide more natural conversational interaction and an enhanced social experience. © 2024 by the authors.},
keywords = {aged, Article, Artificial intelligence, cognitive decline, cognitive impairment, compassion, conversation, Dementia, Elderly, female, human, large language models, long term care, long-term care, major clinical study, male, program acceptability, program feasibility, reaction time, reminiscence, speech discrimination, very elderly, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Hubal, R.
Rethinking some Virtual Human Applications Journal Article
In: Annual Review of CyberTherapy and Telemedicine, vol. 22, pp. 28–33, 2024, ISSN: 15548716 (ISSN).
Abstract | Links | BibTeX | Tags: Article, Artificial intelligence, character and application fidelity, ChatGPT, Consequential conversations, conversation, Engagement, human, Large language model, Learning, responsibility, responsive virtual humans, social competence, telehealth, Virtual Reality
@article{hubal_rethinking_2024,
title = {Rethinking some Virtual Human Applications},
author = {R. Hubal},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85215435480&partnerID=40&md5=4526a7d54606ef0f1cc6234099eb4aae},
issn = {15548716 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {Annual Review of CyberTherapy and Telemedicine},
volume = {22},
pages = {28–33},
abstract = {Increasingly realistic virtual environments incorporating virtual characters have been used to train or assess actual behavior, such as of people at risk, and identify reasons to remediate or intervene. Technology has improved so rapidly that today’s capabilities to create situations to focus training and intervention outshine past efforts. To name just a few current examples, tools like Unreal’s MetaHuman Creator for creating characters, Midjourney for creating environments, OpenAI’s ChatGPT for scripting, and GIFT for tutoring have enormous potential, as these tools promise to reduce simulation costs and increase realism. This paper, in contrast, discusses some movement in the other direction: Recent efforts suggest that increased realism may not always have resulting cost-benefit for training and assessment. Lessons learned and recommendations are presented to guide future developers. © 2024, Interactive Media Institute. All rights reserved.},
keywords = {Article, Artificial intelligence, character and application fidelity, ChatGPT, Consequential conversations, conversation, Engagement, human, Large language model, Learning, responsibility, responsive virtual humans, social competence, telehealth, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2023
Vlasov, A. V.
GALA Inspired by Klimt's Art: Text-to-image Processing with Implementation in Interaction and Perception Studies: Library and Case Examples Journal Article
In: Annual Review of CyberTherapy and Telemedicine, vol. 21, pp. 200–205, 2023, ISSN: 15548716 (ISSN).
Abstract | Links | BibTeX | Tags: AIGC, applied research, art library, Article, Artificial intelligence, benchmarking, dataset, GALA, human, Human computer interaction, Image processing, Klimt, library, life satisfaction, neuropoem, Text-to-image, Virtual Reality, Wellbeing
@article{vlasov_gala_2023,
title = {GALA Inspired by Klimt's Art: Text-to-image Processing with Implementation in Interaction and Perception Studies: Library and Case Examples},
author = {A. V. Vlasov},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85182461798&partnerID=40&md5=0c3f5f4214a46db51f46f0092495eb2b},
issn = {15548716 (ISSN)},
year = {2023},
date = {2023-01-01},
journal = {Annual Review of CyberTherapy and Telemedicine},
volume = {21},
pages = {200–205},
abstract = {Objectives: (a) to develop a library with AI generated content (AIGC) based on а combinatorial scheme of prompting for interaction and perception research; (b) to show examples of AIGC implementation. The result is a public library for applied research in the cyber-psychological community (CYPSY). The Generative Art Library Abstractions (GALA) include images (Figures 1-2) based on the text-image model and inspired by the artwork of Gustav Klimt. They can be used for comparative analysis (benchmarking), end-to-end evaluation, and advanced design. This allows experimentation with complex human-computer interaction (HCI) architectures and visual communication systems, and provides creative design support for experimenting. Examples include: interactive perception of positively colored generative images; HCI dialogues using visual language; generated moods in a VR environment; brain-computer interface for HCI. Respectfully, these visualization resources are a valuable example of AIGC for next-generation R&D. Any suggestions from the CYPSY community are welcome. © 2023, Interactive Media Institute. All rights reserved.},
keywords = {AIGC, applied research, art library, Article, Artificial intelligence, benchmarking, dataset, GALA, human, Human computer interaction, Image processing, Klimt, library, life satisfaction, neuropoem, Text-to-image, Virtual Reality, Wellbeing},
pubstate = {published},
tppubtype = {article}
}