AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Tracy, K.; Spantidi, O.
Impact of GPT-Driven Teaching Assistants in VR Learning Environments Journal Article
In: IEEE Transactions on Learning Technologies, vol. 18, pp. 192–205, 2025, ISSN: 19391382 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: Adversarial machine learning, Cognitive loads, Computer interaction, Contrastive Learning, Control groups, Experimental groups, Federated learning, Generative AI, Generative artificial intelligence (GenAI), human–computer interaction, Interactive learning environment, interactive learning environments, Learning efficacy, Learning outcome, learning outcomes, Student engagement, Teaching assistants, Virtual environments, Virtual Reality (VR)
@article{tracy_impact_2025,
title = {Impact of GPT-Driven Teaching Assistants in VR Learning Environments},
author = {K. Tracy and O. Spantidi},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001083336&doi=10.1109%2FTLT.2025.3539179&partnerID=40&md5=fc4deb58acaf5bac8f4805ef7035396d},
doi = {10.1109/TLT.2025.3539179},
issn = {19391382 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Learning Technologies},
volume = {18},
pages = {192–205},
abstract = {Virtual reality (VR) has emerged as a transformative educational tool, enabling immersive learning environments that promote student engagement and understanding of complex concepts. However, despite the growing adoption of VR in education, there remains a significant gap in research exploring how generative artificial intelligence (AI), such as generative pretrained transformer can further enhance these experiences by reducing cognitive load and improving learning outcomes. This study examines the impact of an AI-driven instructor assistant in VR classrooms on student engagement, cognitive load, knowledge retention, and performance. A total of 52 participants were divided into two groups experiencing a VR lesson on the bubble sort algorithm, one with only a prescripted virtual instructor (control group), and the other with the addition of an AI instructor assistant (experimental group). Statistical analysis of postlesson quizzes and cognitive load assessments was conducted using independent t-tests and analysis of variance (ANOVA), with the cognitive load being measured through a postexperiment questionnaire. The study results indicate that the experimental group reported significantly higher engagement compared to the control group. While the AI assistant did not significantly improve postlesson assessment scores, it enhanced conceptual knowledge transfer. The experimental group also demonstrated lower intrinsic cognitive load, suggesting the assistant reduced the perceived complexity of the material. Higher germane and general cognitive loads indicated that students were more invested in meaningful learning without feeling overwhelmed. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {Adversarial machine learning, Cognitive loads, Computer interaction, Contrastive Learning, Control groups, Experimental groups, Federated learning, Generative AI, Generative artificial intelligence (GenAI), human–computer interaction, Interactive learning environment, interactive learning environments, Learning efficacy, Learning outcome, learning outcomes, Student engagement, Teaching assistants, Virtual environments, Virtual Reality (VR)},
pubstate = {published},
tppubtype = {article}
}
Chen, J.; Wu, X.; Lan, T.; Li, B.
LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models Journal Article
In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 2715–2724, 2025, ISSN: 10772626 (ISSN), (Publisher: IEEE Computer Society).
Abstract | Links | BibTeX | Tags: % reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality
@article{chen_llmer_2025,
title = {LLMER: Crafting Interactive Extended Reality Worlds with JSON Data Generated by Large Language Models},
author = {J. Chen and X. Wu and T. Lan and B. Li},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003825793&doi=10.1109%2FTVCG.2025.3549549&partnerID=40&md5=50597473616678390f143a33082a13d3},
doi = {10.1109/TVCG.2025.3549549},
issn = {10772626 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Visualization and Computer Graphics},
volume = {31},
number = {5},
pages = {2715–2724},
abstract = {The integration of Large Language Models (LLMs) like GPT-4 with Extended Reality (XR) technologies offers the potential to build truly immersive XR environments that interact with human users through natural language, e.g., generating and animating 3D scenes from audio inputs. However, the complexity of XR environments makes it difficult to accurately extract relevant contextual data and scene/object parameters from an overwhelming volume of XR artifacts. It leads to not only increased costs with pay-per-use models, but also elevated levels of generation errors. Moreover, existing approaches focusing on coding script generation are often prone to generation errors, resulting in flawed or invalid scripts, application crashes, and ultimately a degraded user experience. To overcome these challenges, we introduce LLMER, a novel framework that creates interactive XR worlds using JSON data generated by LLMs. Unlike prior approaches focusing on coding script generation, LLMER translates natural language inputs into JSON data, significantly reducing the likelihood of application crashes and processing latency. It employs a multi-stage strategy to supply only the essential contextual information adapted to the user's request and features multiple modules designed for various XR tasks. Our preliminary user study reveals the effectiveness of the proposed system, with over 80% reduction in consumed tokens and around 60% reduction in task completion time compared to state-of-the-art approaches. The analysis of users' feedback also illuminates a series of directions for further optimization. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: IEEE Computer Society},
keywords = {% reductions, 3D modeling, algorithm, Algorithms, Augmented Reality, Coding errors, Computer graphics, Computer interaction, computer interface, Computer simulation languages, Extended reality, generative artificial intelligence, human, Human users, human-computer interaction, Humans, Imaging, Immersive, Language, Language Model, Large language model, large language models, Metadata, Natural Language Processing, Natural language processing systems, Natural languages, procedures, Script generation, Spatio-temporal data, Three dimensional computer graphics, Three-Dimensional, three-dimensional imaging, User-Computer Interface, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Ding, S.; Chen, Y.
RAG-VR: Leveraging Retrieval-Augmented Generation for 3D Question Answering in VR Environments Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 131–136, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331514846 (ISBN).
Abstract | Links | BibTeX | Tags: Ambient intelligence, Computational Linguistics, Computer interaction, Computing methodologies, Computing methodologies-Artificial intelligence-Natural language processing-Natural language generation, Computing methodology-artificial intelligence-natural language processing-natural language generation, Data handling, Formal languages, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Language Model, Language processing, Natural language generation, Natural language processing systems, Natural languages, Virtual Reality, Word processing
@inproceedings{ding_rag-vr_2025,
title = {RAG-VR: Leveraging Retrieval-Augmented Generation for 3D Question Answering in VR Environments},
author = {S. Ding and Y. Chen},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005140593&doi=10.1109%2FVRW66409.2025.00034&partnerID=40&md5=0bd7d96a9bf05f93d17850cd3b380ff4},
doi = {10.1109/VRW66409.2025.00034},
isbn = {9798331514846 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {131–136},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Recent advances in large language models (LLMs) provide new opportunities for context understanding in virtual reality (VR). However, VR contexts are often highly localized and personalized, limiting the effectiveness of general-purpose LLMs. To address this challenge, we present RAG-VR, the first 3D question-answering system for VR that incorporates retrieval-augmented generation (RAG), which augments an LLM with external knowledge retrieved from a localized knowledge database to improve the answer quality. RAG-VR includes a pipeline for extracting comprehensive knowledge about virtual environments and user conditions for accurate answer generation. To ensure efficient retrieval, RAG-VR offloads the retrieval process to a nearby edge server and uses only essential information during retrieval. Moreover, we train the retriever to effectively distinguish among relevant, irrelevant, and hard-to-differentiate information in relation to questions. RAG-VR improves answer accuracy by 17.9%-41.8% and reduces end-to-end latency by 34.5%-47.3% compared with two baseline systems. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Ambient intelligence, Computational Linguistics, Computer interaction, Computing methodologies, Computing methodologies-Artificial intelligence-Natural language processing-Natural language generation, Computing methodology-artificial intelligence-natural language processing-natural language generation, Data handling, Formal languages, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Language Model, Language processing, Natural language generation, Natural language processing systems, Natural languages, Virtual Reality, Word processing},
pubstate = {published},
tppubtype = {inproceedings}
}
Ozeki, R.; Yonekura, H.; Rizk, H.; Yamaguchi, H.
Cellular-based Indoor Localization with Adapted LLM and Label-aware Contrastive Learning Proceedings Article
In: pp. 138–145, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331586461 (ISBN).
Abstract | Links | BibTeX | Tags: Cellular Network, Cellulars, Computer interaction, Contrastive Learning, Deep learning, Human computer interaction, Indoor Localization, Indoor Navigation, Indoor positioning, Indoor positioning systems, Language Model, Large language model, Learning systems, Mobile computing, Mobile-computing, Signal processing, Smart Environment, Wireless networks
@inproceedings{ozeki_cellular-based_2025,
title = {Cellular-based Indoor Localization with Adapted LLM and Label-aware Contrastive Learning},
author = {R. Ozeki and H. Yonekura and H. Rizk and H. Yamaguchi},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105010820397&doi=10.1109%2FSMARTCOMP65954.2025.00070&partnerID=40&md5=9e15d9f4225f00cd57bedc511aad27d9},
doi = {10.1109/SMARTCOMP65954.2025.00070},
isbn = {9798331586461 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {138–145},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Accurate indoor positioning is essential for mobile computing, human-computer interaction, and next-generation smart environments, enabling applications in indoor navigation, augmented reality, personalized services, healthcare, and emergency response. Cellular signal fingerprinting has emerged as a widely adopted solution, with deep learning models achieving state-of-the-art performance. However, existing approaches face critical deployment challenges, including labor-intensive fingerprinting, sparse reference points, and missing RSS values caused by environmental interference, hardware variability, and dynamic signal fluctuations. These limitations hinder their scalability, adaptability, and real-world usability in complex indoor environments. To address these challenges, we present GPT2Loc a novel indoor localization framework that integrates LLM with label-aware contrastive learning, improving accuracy while reducing reliance on extensive fingerprinting. LLMs effectively extract meaningful spatial features from incomplete and noisy RSS data, enabling robust localization even in sparsely finger-printed areas. Our label-aware contrastive learning approach further enhances generalization by aligning latent representations with spatial relationships, allowing GPT2Loc to interpolate user locations in unseen areas and mitigate signal inconsistencies. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Cellular Network, Cellulars, Computer interaction, Contrastive Learning, Deep learning, Human computer interaction, Indoor Localization, Indoor Navigation, Indoor positioning, Indoor positioning systems, Language Model, Large language model, Learning systems, Mobile computing, Mobile-computing, Signal processing, Smart Environment, Wireless networks},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, Y.; Wang, S.; Sun, X.; Yang, L.; Zhu, T.; Chen, Y.; Zhao, K.; Zhao, Y.; Li, M.; Lc, R.
In: International Journal of Human-Computer Interaction, 2025, ISSN: 10447318 (ISSN); 15327590 (ISSN), (Publisher: Taylor and Francis Ltd.).
Abstract | Links | BibTeX | Tags: Across time, Artificial intelligence, Computer interaction, Cultural heritages, Design and evaluations, Extended reality, Generative AI, Hong-kong, Human computer interaction, human–computer interaction, Immersive, Mixed reality, TeleAbsence, Urban cultural heritage narrative, Urban cultural heritage narratives
@article{li_reality_2025,
title = {Reality as Imagined: Design and Evaluation of a TeleAbsence-Driven Extended Reality Experience for (Re) Interpreting Urban Cultural Heritage Narratives Across Time},
author = {Y. Li and S. Wang and X. Sun and L. Yang and T. Zhu and Y. Chen and K. Zhao and Y. Zhao and M. Li and R. Lc},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105016721876&doi=10.1080%2F10447318.2025.2554296&partnerID=40&md5=1ecd1a643f4ba85ae08d549db04a8c9b},
doi = {10.1080/10447318.2025.2554296},
issn = {10447318 (ISSN); 15327590 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Human-Computer Interaction},
abstract = {Visitors to Urban Cultural Heritage (UCH) often encounter official narratives but not the imaginations and relationships shaping its intangible aspects. Existing immersive experiences emphasize historical realities, overlooking personal and collective imaginations that shift with rapid development. To address this, we designed an Extended Reality (XR) experience around eight Hong Kong landmarks, enabling transitions between virtual and mixed-reality environments where users explore UCH narratives across past, present, and future. These narratives integrate (1) historical documentation with 360° visualizations and (2) images created in workshops supported by Generative AI tools. A mixed-method study with 24 participants examined their experiences and reflections. Results revealed deep immersion in both real and imagined worlds, as well as personal reinterpretations of UCH. This work demonstrates how XR can blend reality and imagination within one immersive experience and highlights design implications for archiving human imagination as an intangible form of cultural heritage. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Taylor and Francis Ltd.},
keywords = {Across time, Artificial intelligence, Computer interaction, Cultural heritages, Design and evaluations, Extended reality, Generative AI, Hong-kong, Human computer interaction, human–computer interaction, Immersive, Mixed reality, TeleAbsence, Urban cultural heritage narrative, Urban cultural heritage narratives},
pubstate = {published},
tppubtype = {article}
}
Casas, L.; Mitchell, K.
Structured Teaching Prompt Articulation for Generative-AI Role Embodiment with Augmented Mirror Video Displays Proceedings Article
In: Spencer, S. N. (Ed.): Proc.: VRCAI - ACM SIGGRAPH Int. Conf. Virtual-Reality Contin. Appl. Ind., Association for Computing Machinery, Inc, 2025, ISBN: 9798400713484 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Computer interaction, Contrastive Learning, Cultural icon, Experiential learning, Generative adversarial networks, Generative AI, human-computer interaction, Immersive, Pedagogical practices, Role-based, Teachers', Teaching, Video display, Virtual environments, Virtual Reality
@inproceedings{casas_structured_2025,
title = {Structured Teaching Prompt Articulation for Generative-AI Role Embodiment with Augmented Mirror Video Displays},
author = {L. Casas and K. Mitchell},
editor = {S. N. Spencer},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85217997060&doi=10.1145%2F3703619.3706049&partnerID=40&md5=fb1b42dadbdc8ac44eeaafa93abc7f2c},
doi = {10.1145/3703619.3706049},
isbn = {9798400713484 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc.: VRCAI - ACM SIGGRAPH Int. Conf. Virtual-Reality Contin. Appl. Ind.},
publisher = {Association for Computing Machinery, Inc},
abstract = {We present a classroom enhanced with augmented reality video display in which students adopt snapshots of their corresponding virtual personas according to their teacher's live articulated spoken educational theme, linearly, such as historical figures, famous scientists, cultural icons, and laterally according to archetypal categories such as world dance styles. We define a structure of generative AI prompt guidance to assist teachers with focused specified visual role embodiment stylization. By leveraging role-based immersive embodiment, our proposed approach enriches pedagogical practices that prioritize experiential learning. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Artificial intelligence, Augmented Reality, Computer interaction, Contrastive Learning, Cultural icon, Experiential learning, Generative adversarial networks, Generative AI, human-computer interaction, Immersive, Pedagogical practices, Role-based, Teachers', Teaching, Video display, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, J.; Weber, R.; Wen, E.; Lottridge, D.
Real-Time Full-body Interaction with AI Dance Models: Responsiveness to Contemporary Dance Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 1177–1187, Association for Computing Machinery, 2025, ISBN: 9798400713064 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Chatbots, Computer interaction, Deep learning, Deep-Learning Dance Model, Design of Human-Computer Interaction, Digital elevation model, Generative AI, Input output programs, Input sequence, Interactivity, Motion capture, Motion tracking, Movement analysis, Output sequences, Problem oriented languages, Real- time, Text mining, Three dimensional computer graphics, User input, Virtual environments, Virtual Reality
@inproceedings{zhou_real-time_2025,
title = {Real-Time Full-body Interaction with AI Dance Models: Responsiveness to Contemporary Dance},
author = {J. Zhou and R. Weber and E. Wen and D. Lottridge},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001922427&doi=10.1145%2F3708359.3712077&partnerID=40&md5=3eab32d7c5b4708f4005393b2db25291},
doi = {10.1145/3708359.3712077},
isbn = {9798400713064 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {1177–1187},
publisher = {Association for Computing Machinery},
abstract = {Interactive AI chatbots put the power of Large-Language Models (LLMs) into people's hands; it is this interactivity that fueled explosive worldwide influence. In the generative dance space, however, there are few deep-learning-based generative dance models built with interactivity in mind. The release of the AIST++ dance dataset in 2021 led to an uptick of capabilities in generative dance models. Whether these models could be adapted to support interactivity and how well this approach will work is not known. In this study, we explore the capabilities of existing generative dance models for motion-to-motion synthesis on real-time, full-body motion-captured contemporary dance data. We identify an existing model that we adapted to support interactivity: the Bailando++ model, which is trained on the AIST++ dataset and was modified to take music and a motion sequence as input parameters in an interactive loop. We worked with two professional contemporary choreographers and dancers to record and curate a diverse set of 203 motion-captured dance sequences as a set of "user inputs"captured through the Optitrack high-precision motion capture 3D tracking system. We extracted 17 quantitative movement features from the motion data using the well-established Laban Movement Analysis theory, which allowed for quantitative comparisons of inter-movement correlations, which we used for clustering input data and comparing input and output sequences. A total of 10 pieces of music were used to generate a variety of outputs using the adapted Bailando++ model. We found that, on average, the generated output motion achieved only moderate correlations to the user input, with some exceptions of movement and music pairs achieving high correlation. The high-correlation generated output sequences were deemed responsive and relevant co-creations in relation to the input sequences. We discuss implications for interactive generative dance agents, where the use of 3D joint coordinate data should be used over SMPL parameters for ease of real-time generation, and how the use of Laban Movement Analysis could be used to extract useful features and fine-tune deep-learning models. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D modeling, Chatbots, Computer interaction, Deep learning, Deep-Learning Dance Model, Design of Human-Computer Interaction, Digital elevation model, Generative AI, Input output programs, Input sequence, Interactivity, Motion capture, Motion tracking, Movement analysis, Output sequences, Problem oriented languages, Real- time, Text mining, Three dimensional computer graphics, User input, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Leininger, P.; Weber, C. J.; Rothe, S.
Understanding Creative Potential and Use Cases of AI-Generated Environments for Virtual Film Productions: Insights from Industry Professionals Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 60–78, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: 3-D environments, 3D reconstruction, 3D Scene Reconstruction, 3d scenes reconstruction, AI-generated 3d environment, AI-Generated 3D Environments, Computer interaction, Creative Collaboration, Creatives, Digital content creation, Digital Content Creation., Filmmaking workflow, Filmmaking Workflows, Gaussian distribution, Gaussian Splatting, Gaussians, Generative AI, Graphical user interface, Graphical User Interface (GUI), Graphical user interfaces, Human computer interaction, human-computer interaction, Human-Computer Interaction (HCI), Immersive, Immersive Storytelling, Interactive computer graphics, Interactive computer systems, Interactive media, Mesh generation, Previsualization, Real-Time Rendering, Splatting, Three dimensional computer graphics, Virtual production, Virtual Production (VP), Virtual Reality, Work-flows
@inproceedings{leininger_understanding_2025,
title = {Understanding Creative Potential and Use Cases of AI-Generated Environments for Virtual Film Productions: Insights from Industry Professionals},
author = {P. Leininger and C. J. Weber and S. Rothe},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007976841&doi=10.1145%2F3706370.3727853&partnerID=40&md5=e74b2fa9e7644ddee1b51d3fc34b4af2},
doi = {10.1145/3706370.3727853},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {60–78},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual production (VP) is transforming filmmaking by integrating real-time digital elements with live-action footage, offering new creative possibilities and streamlined workflows. While industry experts recognize AI's potential to revolutionize VP, its practical applications and value across different production phases and user groups remain underexplored. Building on initial research into generative and data-driven approaches, this paper presents the first systematic pilot study evaluating three types of AI-generated 3D environments - Depth Mesh, 360° Panoramic Meshes, and Gaussian Splatting - through the participation of 15 filmmaking professionals from diverse roles. Unlike commonly used 2D AI-generated visuals, our approach introduces navigable 3D environments that offer greater control and flexibility, aligning more closely with established VP workflows. Through expert interviews and literature research, we developed evaluation criteria to assess their usefulness beyond concept development, extending to previsualization, scene exploration, and interdisciplinary collaboration. Our findings indicate that different environments cater to distinct production needs, from early ideation to detailed visualization. Gaussian Splatting proved effective for high-fidelity previsualization, while 360° Panoramic Meshes excelled in rapid concept ideation. Despite their promise, challenges such as limited interactivity and customization highlight areas for improvement. Our prototype, EnVisualAIzer, built in Unreal Engine 5, provides an accessible platform for diverse filmmakers to engage with AI-generated environments, fostering a more inclusive production process. By lowering technical barriers, these environments have the potential to make advanced VP tools more widely available. This study offers valuable insights into the evolving role of AI in VP and sets the stage for future research and development. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3-D environments, 3D reconstruction, 3D Scene Reconstruction, 3d scenes reconstruction, AI-generated 3d environment, AI-Generated 3D Environments, Computer interaction, Creative Collaboration, Creatives, Digital content creation, Digital Content Creation., Filmmaking workflow, Filmmaking Workflows, Gaussian distribution, Gaussian Splatting, Gaussians, Generative AI, Graphical user interface, Graphical User Interface (GUI), Graphical user interfaces, Human computer interaction, human-computer interaction, Human-Computer Interaction (HCI), Immersive, Immersive Storytelling, Interactive computer graphics, Interactive computer systems, Interactive media, Mesh generation, Previsualization, Real-Time Rendering, Splatting, Three dimensional computer graphics, Virtual production, Virtual Production (VP), Virtual Reality, Work-flows},
pubstate = {published},
tppubtype = {inproceedings}
}
Lin, J.; Wang, J.; Feng, P.; Zhang, X.; Yu, D.; Zhang, J.
AI-aided Automated AR-Assisted Assembly Instruction Authoring and Generation method Journal Article
In: Journal of Manufacturing Systems, vol. 83, pp. 405–423, 2025, ISSN: 02786125 (ISSN), (Publisher: Elsevier B.V.).
Abstract | Links | BibTeX | Tags: Ai-aided, Assembly, Assembly instructions, Assembly system, Assembly systems, Augmented Reality, Automatic programming, Computer aided instruction, Computer interaction, Generation method, Hand manipulation, Human computer interaction, human–computer interaction, Industrial assemblies, Intelligent method, Point cloud, Point-clouds, Real- time, Virtual Reality
@article{lin_ai-aided_2025,
title = {AI-aided Automated AR-Assisted Assembly Instruction Authoring and Generation method},
author = {J. Lin and J. Wang and P. Feng and X. Zhang and D. Yu and J. Zhang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105017229936&doi=10.1016%2Fj.jmsy.2025.08.019&partnerID=40&md5=7957487b03f997dce9b6600e75481319},
doi = {10.1016/j.jmsy.2025.08.019},
issn = {02786125 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Journal of Manufacturing Systems},
volume = {83},
pages = {405–423},
abstract = {While Augmented Reality (AR) offers the potential to provide real-time guidance, one of the barriers to its adoption in industrial assembly is the lack of fast, no-code, intelligent methods for generating AR-assisted assembly programs. This paper proposes an AI-aided AR-Assisted Assembly Instruction Authoring and Generation method (ARAIAG) to address these challenges. ARAIAG allows engineers, without coding expertise, to intuitively design AR-assisted assembly instructions based on assembly demonstrations captured through RGBD cameras. Based on ARAIAG, we propose a new algorithm considering hand manipulation and model characteristics to achieve spatial registration for models, virtual-physical fusion, and assembly direction recognition. Additionally, we employed a novel human–computer interaction method and Large Language Model (LLM)-assisted content generation to achieve the automatic creation of interactive and instructive AR-assisted assembly programs. Through this approach, we streamline program development and enable more efficient AR-assisted assembly in dynamic manufacturing environments. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Elsevier B.V.},
keywords = {Ai-aided, Assembly, Assembly instructions, Assembly system, Assembly systems, Augmented Reality, Automatic programming, Computer aided instruction, Computer interaction, Generation method, Hand manipulation, Human computer interaction, human–computer interaction, Industrial assemblies, Intelligent method, Point cloud, Point-clouds, Real- time, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Harinee, S.; Raja, R. Vimal; Mugila, E.; Govindharaj, I.; Sanjaykumar, V.; Ragavendhiran, T.
Elevating Medical Training: A Synergistic Fusion of AI and VR for Immersive Anatomy Learning and Practical Procedure Mastery Proceedings Article
In: Int. Conf. Syst., Comput., Autom. Netw., ICSCAN, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798331510022 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Anatomy education, Anatomy educations, Computer interaction, Curricula, Embodied virtual assistant, Embodied virtual assistants, Generative AI, Human- Computer Interaction, Immersive, Intelligent virtual agents, Medical computing, Medical education, Medical procedure practice, Medical procedures, Medical training, Personnel training, Students, Teaching, Three dimensional computer graphics, Usability engineering, Virtual assistants, Virtual environments, Virtual Reality, Visualization
@inproceedings{harinee_elevating_2024,
title = {Elevating Medical Training: A Synergistic Fusion of AI and VR for Immersive Anatomy Learning and Practical Procedure Mastery},
author = {S. Harinee and R. Vimal Raja and E. Mugila and I. Govindharaj and V. Sanjaykumar and T. Ragavendhiran},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000334626&doi=10.1109%2FICSCAN62807.2024.10894451&partnerID=40&md5=ae7a491686ade8cebdc276f585a6f4f0},
doi = {10.1109/ICSCAN62807.2024.10894451},
isbn = {9798331510022 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Int. Conf. Syst., Comput., Autom. Netw., ICSCAN},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual reality with its 3D visualization have brought an overwhelming change in the face of medical education, especially for courses like human anatomy. The proposed virtual reality system to bring massive improvements in the education received by a medical student studying for their degree courses. The project puts forward the text-to-speech and speech-to-text aligned system that simplifies the usage of a chatbot empowered by OpenAI GPT-4 and allows pupils to vocally speak with Avatar, the set virtual assistant. Contrary to the current methodologies, the setup of virtual reality is powered by avatars and thus covers an enhanced virtual assistant environment. Avatars offer students the set of repeated practicing of medical procedures on it, and the real uniqueness in the proposed product. The developed virtual reality environment is enhanced over other current training techniques where a student should interact and immerse in three-dimensional human organs for visualization in three dimensions and hence get better knowledge of the subjects in greater depth. A virtual assistant guides the whole process, giving insights and support to help the student bridge the gap from theory to practice. Then, the system is essentially Knowledge based and Analysis based approach. The combination of generative AI along with embodied virtual agents has great potential when it comes to customized virtual conversation assistant for much wider range of applications. The study brings out the value of acquiring hands-on skills through simulated medical procedures and opens new frontiers of research and development in AI, VR, and medical education. In addition to assessing the effectiveness of such novel functionalities, the study also explores user experience related dimensions such as usability, task loading, and the sense of presence in proposed virtual medical environment. © 2025 Elsevier B.V., All rights reserved.},
keywords = {'current, Anatomy education, Anatomy educations, Computer interaction, Curricula, Embodied virtual assistant, Embodied virtual assistants, Generative AI, Human- Computer Interaction, Immersive, Intelligent virtual agents, Medical computing, Medical education, Medical procedure practice, Medical procedures, Medical training, Personnel training, Students, Teaching, Three dimensional computer graphics, Usability engineering, Virtual assistants, Virtual environments, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Geetha, S.; Aditya, G.; Reddy, M. Chetan; Nischith, G.
Human Interaction in Virtual and Mixed Reality Through Hand Tracking Proceedings Article
In: Proc. CONECCT - IEEE Int. Conf. Electron., Comput. Commun. Technol., Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350385922 (ISBN).
Abstract | Links | BibTeX | Tags: Computer interaction, Computer simulation languages, Daily lives, Digital elevation model, Hand gesture, hand tracking, Hand-tracking, human-computer interaction, Humaninteraction, Interaction dynamics, Mixed reality, Unity, User friendly interface, User interfaces, Virtual environments, Virtual Reality, Virtual spaces
@inproceedings{geetha_human_2024,
title = {Human Interaction in Virtual and Mixed Reality Through Hand Tracking},
author = {S. Geetha and G. Aditya and M. Chetan Reddy and G. Nischith},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85205768661&doi=10.1109%2FCONECCT62155.2024.10677239&partnerID=40&md5=10a6cb2b19648071937ae24e789d05a4},
doi = {10.1109/CONECCT62155.2024.10677239},
isbn = {9798350385922 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. CONECCT - IEEE Int. Conf. Electron., Comput. Commun. Technol.},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper explores the potential and possibilities of hand tracking in virtual reality (VR) and mixed reality (MR), focusing on its role in human interaction dynamics. An application was designed in Unity leveraging the XR Interaction toolkit, within which various items across three important domains: daily life, education, and recreation, were crafted to demonstrate the versatility of hand tracking along with hand gesture-based shortcuts for interaction. Integration of elements in MR ensures that users can seamlessly enjoy virtual experiences while remaining connected to their physical surroundings. Precise hand tracking enables effortless interaction with the virtual space, enhancing presence and control with a user-friendly interface. Additionally, the paper explores the effectiveness of integrating hand tracking into education and training scenarios. A computer assembly simulation was created to demonstrate this, featuring component inspection and zoom capabilities along with a large language model (LLM) integrated with hand gestures to provide for interaction capabilities. © 2024 Elsevier B.V., All rights reserved.},
keywords = {Computer interaction, Computer simulation languages, Daily lives, Digital elevation model, Hand gesture, hand tracking, Hand-tracking, human-computer interaction, Humaninteraction, Interaction dynamics, Mixed reality, Unity, User friendly interface, User interfaces, Virtual environments, Virtual Reality, Virtual spaces},
pubstate = {published},
tppubtype = {inproceedings}
}
Lee, L. -K.; Chan, E. H.; Tong, K. K. -L.; Wong, N. K. -H.; Wu, B. S. -Y.; Fung, Y. -C.; Fong, E. K. S.; U, U.; Wu, N. -I.
Utilizing Virtual Reality and Generative AI Chatbot for Job Interview Simulations Proceedings Article
In: Chui, K. T.; Hui, Y. K.; Yang, D.; Lee, L. -K.; Wong, L. -P.; Reynolds, B. L. (Ed.): Proc. - Int. Symp. Educ. Technol., ISET, pp. 209–212, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350361414 (ISBN).
Abstract | Links | BibTeX | Tags: chatbot, Chatbots, Computer interaction, Computer simulation languages, Generative adversarial networks, Generative AI, Hong-kong, Human computer interaction, ITS applications, Job interview simulation, Job interviews, Performance, Science graduates, User friendliness, Virtual environments, Virtual Reality
@inproceedings{lee_utilizing_2024,
title = {Utilizing Virtual Reality and Generative AI Chatbot for Job Interview Simulations},
author = {L. -K. Lee and E. H. Chan and K. K. -L. Tong and N. K. -H. Wong and B. S. -Y. Wu and Y. -C. Fung and E. K. S. Fong and U. U and N. -I. Wu},
editor = {K. T. Chui and Y. K. Hui and D. Yang and L. -K. Lee and L. -P. Wong and B. L. Reynolds},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85206582338&doi=10.1109%2FISET61814.2024.00048&partnerID=40&md5=c099b8565f348c8bf250c0a9e62cf864},
doi = {10.1109/ISET61814.2024.00048},
isbn = {9798350361414 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - Int. Symp. Educ. Technol., ISET},
pages = {209–212},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Stress and anxiety experienced by interviewees, particularly fresh graduates, would significantly impact their performance in job interviews. Due to the increased affordability and user-friendliness of virtual reality (VR), VR has seen a surge in its application within the educational sector. This paper presents the design and implementation of a job interview simulation system, leveraging VR and a generative AI chatbot to provide an immersive environment for computer science graduates in Hong Kong. The system aims to help graduates practice and familiarize themselves with various real-world scenarios of a job interview in English, Mandarin, and Cantonese, tailored to the unique language requirements of Hong Kong's professional environment. The system comprises three core modules: a mock question and answer reading module, an AI speech analysis module, and a virtual interview module facilitated by the generative AI chatbot, ChatGPT. We anticipate that the proposed simulator will provide valuable insights to education practitioners on utilizing VR and generative AI for job interview training, extending beyond computer science graduates. © 2024 Elsevier B.V., All rights reserved.},
keywords = {chatbot, Chatbots, Computer interaction, Computer simulation languages, Generative adversarial networks, Generative AI, Hong-kong, Human computer interaction, ITS applications, Job interview simulation, Job interviews, Performance, Science graduates, User friendliness, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Jeong, E.; Kim, H.; Park, S.; Yoon, S.; Ahn, J.; Woo, W.
Function-Adaptive Affordance Extraction from 3D Objects Using LLM for Interaction Authoring with Augmented Artifacts Proceedings Article
In: Eck, U.; Sra, M.; Stefanucci, J.; Sugimoto, M.; Tatzgern, M.; Williams, I. (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 205–208, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798331506919 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Applied computing, Art and humanity, Artificial intelligence, Arts and humanities, Augmented Reality, Computer interaction, Computer vision, Computing methodologies, computing methodology, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Humanities computing, Interaction paradigm, Interaction paradigms, Language processing, Mixed / augmented reality, Mixed reality, Modeling languages, Natural Language Processing, Natural language processing systems, Natural languages, Three dimensional computer graphics
@inproceedings{jeong_function-adaptive_2024,
title = {Function-Adaptive Affordance Extraction from 3D Objects Using LLM for Interaction Authoring with Augmented Artifacts},
author = {E. Jeong and H. Kim and S. Park and S. Yoon and J. Ahn and W. Woo},
editor = {U. Eck and M. Sra and J. Stefanucci and M. Sugimoto and M. Tatzgern and I. Williams},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214379963&doi=10.1109%2FISMAR-Adjunct64951.2024.00050&partnerID=40&md5=45841ebd83189e4d3f3190dab9c1ba8c},
doi = {10.1109/ISMAR-Adjunct64951.2024.00050},
isbn = {9798331506919 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {205–208},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {We propose an algorithm that extracts the most suitable affordances, interaction targets, and corresponding coordinates adaptively from 3D models of various artifacts based on their functional context for efficient authoring of XR content with artifacts. Traditionally, authoring AR scenes to convey artifact context required one-to-one manual work. Our approach leverages a Large Language Model (LLM) to extract interaction types, positions, and subjects based on the artifact's name and usage context. This enables templated XR experience creation, replacing repetitive manual labor. Consequently, our system streamlines the XR authoring process, making it more efficient and scalable. © 2025 Elsevier B.V., All rights reserved.},
keywords = {3D modeling, Applied computing, Art and humanity, Artificial intelligence, Arts and humanities, Augmented Reality, Computer interaction, Computer vision, Computing methodologies, computing methodology, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Humanities computing, Interaction paradigm, Interaction paradigms, Language processing, Mixed / augmented reality, Mixed reality, Modeling languages, Natural Language Processing, Natural language processing systems, Natural languages, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {inproceedings}
}