AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Song, T.; Liu, Z.; Zhao, R.; Fu, J.
ElderEase AR: Enhancing Elderly Daily Living with the Multimodal Large Language Model and Augmented Reality Proceedings Article
In: ICVRT - Proc. Int. Conf. Virtual Real. Technol., pp. 60–67, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071018-6 (ISBN).
Abstract | Links | BibTeX | Tags: Age-related, Assisted living, Augmented Reality, Augmented reality technology, Daily Life Support, Daily living, Daily-life supports, Elderly, Elderly users, Independent living, Independent living systems, Language Model, Modeling languages, Multi agent systems, Multi-modal, Multimodal large language model
@inproceedings{song_elderease_2025,
title = {ElderEase AR: Enhancing Elderly Daily Living with the Multimodal Large Language Model and Augmented Reality},
author = {T. Song and Z. Liu and R. Zhao and J. Fu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001924899&doi=10.1145%2f3711496.3711505&partnerID=40&md5=4df693735547b505172657a73359f3ca},
doi = {10.1145/3711496.3711505},
isbn = {979-840071018-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {ICVRT - Proc. Int. Conf. Virtual Real. Technol.},
pages = {60–67},
publisher = {Association for Computing Machinery, Inc},
abstract = {Elderly individuals often face challenges in independent living due to age-related cognitive and physical decline. To address these issues, we propose an innovative Augmented Reality (AR) system, “ElderEase AR”, designed to assist elderly users in their daily lives by leveraging a Multimodal Large Language Model (MLLM). This system enables elderly users to capture images of their surroundings and ask related questions, providing context-aware feedback. We evaluated the system’s perceived ease-of-use and feasibility through a pilot study involving 30 elderly users, aiming to enhance their independence and quality of life. Our system integrates advanced AR technology with an intelligent agent trained on multimodal datasets. Through prompt engineering, the agent is tailored to respond in a manner that aligns with the speaking style of elderly users. Experimental results demonstrate high accuracy in object recognition and question answering, with positive feedback from user trials. Specifically, the system accurately identified objects in various environments and provided relevant answers to user queries. This study highlights the powerful potential of AR and AI technologies in creating support tools for the elderly. It suggests directions for future improvements and applications, such as enhancing the system’s adaptability to different user needs and expanding its functionality to cover more aspects of daily living. © 2024 Copyright held by the owner/author(s).},
keywords = {Age-related, Assisted living, Augmented Reality, Augmented reality technology, Daily Life Support, Daily living, Daily-life supports, Elderly, Elderly users, Independent living, Independent living systems, Language Model, Modeling languages, Multi agent systems, Multi-modal, Multimodal large language model},
pubstate = {published},
tppubtype = {inproceedings}
}
Kai, W. -H.; Xing, K. -X.
Video-driven musical composition using large language model with memory-augmented state space Journal Article
In: Visual Computer, vol. 41, no. 5, pp. 3345–3357, 2025, ISSN: 01782789 (ISSN).
Abstract | Links | BibTeX | Tags: 'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space
@article{kai_video-driven_2025,
title = {Video-driven musical composition using large language model with memory-augmented state space},
author = {W. -H. Kai and K. -X. Xing},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001073242&doi=10.1007%2fs00371-024-03606-w&partnerID=40&md5=7ea24f13614a9a24caf418c37a10bd8c},
doi = {10.1007/s00371-024-03606-w},
issn = {01782789 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Visual Computer},
volume = {41},
number = {5},
pages = {3345–3357},
abstract = {The current landscape of research leveraging large language models (LLMs) is experiencing a surge. Many works harness the powerful reasoning capabilities of these models to comprehend various modalities, such as text, speech, images, videos, etc. However, the research work on LLms for music inspiration is still in its infancy. To fill the gap in this field and break through the dilemma that LLMs can only understand short videos with limited frames, we propose a large language model with state space for long-term video-to-music generation. To capture long-range dependency and maintaining high performance, while further decrease the computing cost, our overall network includes the Enhanced Video Mamba, which incorporates continuous moving window partitioning and local feature augmentation, and a long-term memory bank that captures and aggregates historical video information to mitigate information loss in long sequences. This framework achieves both subquadratic-time computation and near-linear memory complexity, enabling effective long-term video-to-music generation. We conduct a thorough evaluation of our proposed framework. The experimental results demonstrate that our model achieves or surpasses the performance of the current state-of-the-art models. Our code released on https://github.com/kai211233/S2L2-V2M. © The Author(s), under exclusive licence to Springer-Verlag GmbH Germany, part of Springer Nature 2024.},
keywords = {'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space},
pubstate = {published},
tppubtype = {article}
}
Graziano, M.; Cante, L. Colucci; Martino, B. Di
Deploying Large Language Model on Cloud-Edge Architectures: A Case Study for Conversational Historical Characters Book Section
In: Lecture Notes on Data Engineering and Communications Technologies, vol. 250, pp. 196–205, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 23674512 (ISSN).
Abstract | Links | BibTeX | Tags: Agent based, Augmented Reality, Case-studies, Chatbots, Cloud computing architecture, Conversational Agents, EDGE architectures, Historical characters, Language Model, Modeling languages, Real time performance, WEB application, Web applications, Work analysis
@incollection{graziano_deploying_2025,
title = {Deploying Large Language Model on Cloud-Edge Architectures: A Case Study for Conversational Historical Characters},
author = {M. Graziano and L. Colucci Cante and B. Di Martino},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002995405&doi=10.1007%2f978-3-031-87778-0_19&partnerID=40&md5=c54e9ce66901050a05de68602e4a8266},
doi = {10.1007/978-3-031-87778-0_19},
isbn = {23674512 (ISSN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lecture Notes on Data Engineering and Communications Technologies},
volume = {250},
pages = {196–205},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {This work analyzes the deployment of conversational agents based on large language models (LLMs) in cloud-edge architectures, placing emphasis on scalability, efficiency and real-time performance. Through a case study, we present a web application that allows users to interact with an augmented reality avatar that impersonates a historical character. The agent, powered by an LLM delivers immersive and contextually coherent dialogues. We discuss the solutions adopted to manage latency and distribute the computational load between the cloud, which takes care of language processing, and the edge nodes, ensuring a smooth user experience. The results obtained demonstrate how accurate design can optimize the use of LLMs in distributed environments, offering advanced and high-performance interactions even in applications with high reactivity and customization requirements. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Agent based, Augmented Reality, Case-studies, Chatbots, Cloud computing architecture, Conversational Agents, EDGE architectures, Historical characters, Language Model, Modeling languages, Real time performance, WEB application, Web applications, Work analysis},
pubstate = {published},
tppubtype = {incollection}
}
Guo, P.; Zhang, Q.; Tian, C.; Xue, W.; Feng, X.
Digital Human Techniques for Education Reform Proceedings Article
In: ICETM - Proc. Int. Conf. Educ. Technol. Manag., pp. 173–178, Association for Computing Machinery, Inc, 2025, ISBN: 979-840071746-8 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Contrastive Learning, Digital elevation model, Digital human technique, Digital Human Techniques, Digital humans, Education Reform, Education reforms, Educational Technology, Express emotions, Federated learning, Human behaviors, Human form models, Human techniques, Immersive, Innovative technology, Modeling languages, Natural language processing systems, Teachers', Teaching, Virtual environments, Virtual humans
@inproceedings{guo_digital_2025,
title = {Digital Human Techniques for Education Reform},
author = {P. Guo and Q. Zhang and C. Tian and W. Xue and X. Feng},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001671326&doi=10.1145%2f3711403.3711428&partnerID=40&md5=dd96647315af9409d119f68f9cf4e980},
doi = {10.1145/3711403.3711428},
isbn = {979-840071746-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {ICETM - Proc. Int. Conf. Educ. Technol. Manag.},
pages = {173–178},
publisher = {Association for Computing Machinery, Inc},
abstract = {The rapid evolution of artificial intelligence, big data, and generative AI models has ushered in significant transformations across various sectors, including education. Digital Human Technique, an innovative technology grounded in advanced computer science and artificial intelligence, is reshaping educational paradigms by enabling virtual humans to simulate human behavior, express emotions, and interact with users. This paper explores the application of Digital Human Technique in education reform, focusing on creating immersive, intelligent classroom experiences that foster meaningful interactions between teachers and students. We define Digital Human Technique and delve into its key technical components such as character modeling and rendering, natural language processing, computer vision, and augmented reality technologies. Our methodology involves analyzing the role of educational digital humans created through these technologies, assessing their impact on educational processes, and examining various application scenarios in educational reform. Results indicate that Digital Human Technique significantly enhances the learning experience by enabling personalized teaching, increasing engagement, and fostering emotional connections. Educational digital humans serve as virtual teachers, interactive learning aids, and facilitators of emotional interaction, effectively addressing the challenges of traditional educational methods. They also promote a deeper understanding of complex concepts through simulated environments and interactive digital content. © 2024 Copyright held by the owner/author(s).},
keywords = {Augmented Reality, Contrastive Learning, Digital elevation model, Digital human technique, Digital Human Techniques, Digital humans, Education Reform, Education reforms, Educational Technology, Express emotions, Federated learning, Human behaviors, Human form models, Human techniques, Immersive, Innovative technology, Modeling languages, Natural language processing systems, Teachers', Teaching, Virtual environments, Virtual humans},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Vasic, I.; Fill, H. -G.; Quattrini, R.; Pierdicca, R.
LLM-Aided Museum Guide: Personalized Tours Based on User Preferences Proceedings Article
In: L.T., De Paolis; P., Arpaia; M., Sacco (Ed.): Lect. Notes Comput. Sci., pp. 249–262, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 03029743 (ISSN); 978-303171709-3 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence techniques, Automated process, Cultural heritages, Extended reality, Language Model, Large language model, large language models, Modeling languages, Museum guide, User's preferences, Virtual environments, Virtual museum, Virtual museums, Virtual tour
@inproceedings{vasic_llm-aided_2024,
title = {LLM-Aided Museum Guide: Personalized Tours Based on User Preferences},
author = {I. Vasic and H. -G. Fill and R. Quattrini and R. Pierdicca},
editor = {De Paolis L.T. and Arpaia P. and Sacco M.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85205127699&doi=10.1007%2f978-3-031-71710-9_18&partnerID=40&md5=fba73e38a432e0749b8e79197ef85310},
doi = {10.1007/978-3-031-71710-9_18},
isbn = {03029743 (ISSN); 978-303171709-3 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15029 LNCS},
pages = {249–262},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {The quick development of generative artificial intelligence (GenAI) techniques is a promising step toward automated processes in the field of cultural heritage (CH). The recent rise of powerful Large Language Models (LLMs) like ChatGPT has made them a commonly utilized tool for a wide range of tasks across various fields. In this paper, we introduce LLMs as a guide in the three-dimensional (3D) panoramic virtual tour of the Civic Art Gallery of Ascoli to enable visitors to express their interest and show them the requested content. The input to our algorithm is a user request in natural language. The processing tasks are performed with the OpenAI’s Generative Pre-trained Transformer (GPT) 4o model. Requests are handled through the OpenAI’s API. We demonstrate all the functionalities within a developed local web-based application. This novel approach is capable of solving the problem of generic guided tours in the museum and offers a solution for the more automatized and personalized ones. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {Artificial intelligence techniques, Automated process, Cultural heritages, Extended reality, Language Model, Large language model, large language models, Modeling languages, Museum guide, User's preferences, Virtual environments, Virtual museum, Virtual museums, Virtual tour},
pubstate = {published},
tppubtype = {inproceedings}
}
Omirgaliyev, R.; Kenzhe, D.; Mirambekov, S.
Simulating life: the application of generative agents in virtual environments Proceedings Article
In: IEEE AITU: Digit. Gener., Conf. Proc. - AITU, pp. 181–187, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835036437-8 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Artificial intelligence agent, Artificial Intelligence Agents, Autonomous agents, Behavioral Research, Behaviour models, Computational Linguistics, Decision making, Dynamics, Dynamics simulation, Economic and social effects, Game Development, Game environment, Language Model, Large language model, large language models, Modeling languages, Social dynamic simulation, Social dynamics, Social Dynamics Simulation, Software design, Virtual Reality, Virtual Societies
@inproceedings{omirgaliyev_simulating_2024,
title = {Simulating life: the application of generative agents in virtual environments},
author = {R. Omirgaliyev and D. Kenzhe and S. Mirambekov},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199876250&doi=10.1109%2fIEEECONF61558.2024.10585387&partnerID=40&md5=70f8b598d10bec13c39d3506a15534a1},
doi = {10.1109/IEEECONF61558.2024.10585387},
isbn = {979-835036437-8 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE AITU: Digit. Gener., Conf. Proc. - AITU},
pages = {181–187},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This research explores the innovative integration of Large Language Models (LLMs) in game development, focusing on the autonomous creation, development, and governance of a virtual village by AI agents within a 2D game environment. The core of this study lies in observing and analyzing the interactions and societal development among AI agents, utilizing advanced algorithms for generative behavior modeling and dynamic skill tree learning. These AI agents are endowed with human-like decision-making capabilities, enabled by LLMs, allowing them to engage in complex social interactions and contribute to emergent societal structures within the game. The uniqueness of this project stems from its approach to simulating lifelike social dynamics in a virtual setting, thus addressing a gap in existing research and marking a significant contribution to the interdisciplinary fields of artificial intelligence and game development. By comparing AI-generated societal behaviors with human social interactions, the study delves into the potential of AI to mirror or enhance human social structures, offering a fresh perspective on the capabilities of AI in game development. This research not only aims to push the boundaries of AI applications in game development but also seeks to provide valuable insights into the potential for AI-driven simulations in studying complex social and behavioral dynamics. ©2024 IEEE.},
keywords = {Artificial intelligence, Artificial intelligence agent, Artificial Intelligence Agents, Autonomous agents, Behavioral Research, Behaviour models, Computational Linguistics, Decision making, Dynamics, Dynamics simulation, Economic and social effects, Game Development, Game environment, Language Model, Large language model, large language models, Modeling languages, Social dynamic simulation, Social dynamics, Social Dynamics Simulation, Software design, Virtual Reality, Virtual Societies},
pubstate = {published},
tppubtype = {inproceedings}
}
Jeong, E.; Kim, H.; Park, S.; Yoon, S.; Ahn, J.; Woo, W.
Function-Adaptive Affordance Extraction from 3D Objects Using LLM for Interaction Authoring with Augmented Artifacts Proceedings Article
In: U., Eck; M., Sra; J., Stefanucci; M., Sugimoto; M., Tatzgern; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 205–208, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833150691-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Applied computing, Art and humanity, Artificial intelligence, Arts and humanities, Augmented Reality, Computer interaction, Computer vision, Computing methodologies, computing methodology, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Humanities computing, Interaction paradigm, Interaction paradigms, Language processing, Mixed / augmented reality, Mixed reality, Modeling languages, Natural Language Processing, Natural language processing systems, Natural languages, Three dimensional computer graphics
@inproceedings{jeong_function-adaptive_2024,
title = {Function-Adaptive Affordance Extraction from 3D Objects Using LLM for Interaction Authoring with Augmented Artifacts},
author = {E. Jeong and H. Kim and S. Park and S. Yoon and J. Ahn and W. Woo},
editor = {Eck U. and Sra M. and Stefanucci J. and Sugimoto M. and Tatzgern M. and Williams I.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214379963&doi=10.1109%2fISMAR-Adjunct64951.2024.00050&partnerID=40&md5=7222e0599a7e2aa0adaea38e4b9e13cc},
doi = {10.1109/ISMAR-Adjunct64951.2024.00050},
isbn = {979-833150691-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {205–208},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {We propose an algorithm that extracts the most suitable affordances, interaction targets, and corresponding coordinates adaptively from 3D models of various artifacts based on their functional context for efficient authoring of XR content with artifacts. Traditionally, authoring AR scenes to convey artifact context required one-to-one manual work. Our approach leverages a Large Language Model (LLM) to extract interaction types, positions, and subjects based on the artifact's name and usage context. This enables templated XR experience creation, replacing repetitive manual labor. Consequently, our system streamlines the XR authoring process, making it more efficient and scalable. © 2024 IEEE.},
keywords = {3D modeling, Applied computing, Art and humanity, Artificial intelligence, Arts and humanities, Augmented Reality, Computer interaction, Computer vision, Computing methodologies, computing methodology, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Humanities computing, Interaction paradigm, Interaction paradigms, Language processing, Mixed / augmented reality, Mixed reality, Modeling languages, Natural Language Processing, Natural language processing systems, Natural languages, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Shabanijou, M.; Sharma, V.; Ray, S.; Lu, R.; Xiong, P.
Large Language Model Empowered Spatio-Visual Queries for Extended Reality Environments Proceedings Article
In: W., Ding; C.-T., Lu; F., Wang; L., Di; K., Wu; J., Huan; R., Nambiar; J., Li; F., Ilievski; R., Baeza-Yates; X., Hu (Ed.): Proc. - IEEE Int. Conf. Big Data, BigData, pp. 5843–5846, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835036248-0 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Digital elevation model, Emerging applications, Immersive environment, Language Model, Metaverses, Modeling languages, Natural language interfaces, Query languages, spatial data, Spatial queries, Structured Query Language, Technological advances, Users perspective, Virtual environments, Visual languages, Visual query
@inproceedings{shabanijou_large_2024,
title = {Large Language Model Empowered Spatio-Visual Queries for Extended Reality Environments},
author = {M. Shabanijou and V. Sharma and S. Ray and R. Lu and P. Xiong},
editor = {Ding W. and Lu C.-T. and Wang F. and Di L. and Wu K. and Huan J. and Nambiar R. and Li J. and Ilievski F. and Baeza-Yates R. and Hu X.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85218011140&doi=10.1109%2fBigData62323.2024.10825084&partnerID=40&md5=fdd78814b8e19830d1b8ecd4b33b0102},
doi = {10.1109/BigData62323.2024.10825084},
isbn = {979-835036248-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Conf. Big Data, BigData},
pages = {5843–5846},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {With the technological advances in creation and capture of 3D spatial data, new emerging applications are being developed. Digital Twins, metaverse and extended reality (XR) based immersive environments can be enriched by leveraging geocoded 3D spatial data. Unlike 2D spatial queries, queries involving 3D immersive environments need to take the query user's viewpoint into account. Spatio-visual queries return objects that are visible from the user's perspective.In this paper, we propose enhancing 3D spatio-visual queries with large language models (LLM). These kinds of queries allow a user to interact with the visible objects using a natural language interface. We have implemented a proof-of-concept prototype and conducted preliminary evaluation. Our results demonstrate the potential of truly interactive immersive environments. © 2024 IEEE.},
keywords = {3D modeling, Digital elevation model, Emerging applications, Immersive environment, Language Model, Metaverses, Modeling languages, Natural language interfaces, Query languages, spatial data, Spatial queries, Structured Query Language, Technological advances, Users perspective, Virtual environments, Visual languages, Visual query},
pubstate = {published},
tppubtype = {inproceedings}
}
Scott, A. J. S.; McCuaig, F.; Lim, V.; Watkins, W.; Wang, J.; Strachan, G.
Revolutionizing Nurse Practitioner Training: Integrating Virtual Reality and Large Language Models for Enhanced Clinical Education Proceedings Article
In: G., Strudwick; N.R., Hardiker; G., Rees; R., Cook; R., Cook; Y.J., Lee (Ed.): Stud. Health Technol. Informatics, pp. 671–672, IOS Press BV, 2024, ISBN: 09269630 (ISSN); 978-164368527-4 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3d-modeling, adult, anamnesis, clinical decision making, clinical education, Clinical Simulation, Computational Linguistics, computer interface, Computer-Assisted Instruction, conference paper, Curriculum, Decision making, E-Learning, Education, Health care education, Healthcare Education, human, Humans, Language Model, Large language model, large language models, Mesh generation, Model animations, Modeling languages, nurse practitioner, Nurse Practitioners, Nursing, nursing education, nursing student, OSCE preparation, procedures, simulation, Teaching, therapy, Training, Training program, User-Computer Interface, Virtual Reality, Virtual reality training
@inproceedings{scott_revolutionizing_2024,
title = {Revolutionizing Nurse Practitioner Training: Integrating Virtual Reality and Large Language Models for Enhanced Clinical Education},
author = {A. J. S. Scott and F. McCuaig and V. Lim and W. Watkins and J. Wang and G. Strachan},
editor = {Strudwick G. and Hardiker N.R. and Rees G. and Cook R. and Cook R. and Lee Y.J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199593781&doi=10.3233%2fSHTI240272&partnerID=40&md5=90c7bd43ba978f942723e6cf1983ffb3},
doi = {10.3233/SHTI240272},
isbn = {09269630 (ISSN); 978-164368527-4 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Stud. Health Technol. Informatics},
volume = {315},
pages = {671–672},
publisher = {IOS Press BV},
abstract = {This project introduces an innovative virtual reality (VR) training program for student Nurse Practitioners, incorporating advanced 3D modeling, animation, and Large Language Models (LLMs). Designed to simulate realistic patient interactions, the program aims to improve communication, history taking, and clinical decision-making skills in a controlled, authentic setting. This abstract outlines the methods, results, and potential impact of this cutting-edge educational tool on nursing education. © 2024 The Authors.},
keywords = {3D modeling, 3D models, 3d-modeling, adult, anamnesis, clinical decision making, clinical education, Clinical Simulation, Computational Linguistics, computer interface, Computer-Assisted Instruction, conference paper, Curriculum, Decision making, E-Learning, Education, Health care education, Healthcare Education, human, Humans, Language Model, Large language model, large language models, Mesh generation, Model animations, Modeling languages, nurse practitioner, Nurse Practitioners, Nursing, nursing education, nursing student, OSCE preparation, procedures, simulation, Teaching, therapy, Training, Training program, User-Computer Interface, Virtual Reality, Virtual reality training},
pubstate = {published},
tppubtype = {inproceedings}
}
Leong, C. W.; Jawahar, N.; Basheerabad, V.; Wörtwein, T.; Emerson, A.; Sivan, G.
Combining Generative and Discriminative AI for High-Stakes Interview Practice Proceedings Article
In: ACM Int. Conf. Proc. Ser., pp. 94–96, Association for Computing Machinery, 2024, ISBN: 979-840070463-5 (ISBN).
Abstract | Links | BibTeX | Tags: AI systems, College admissions, Continuous improvements, End to end, Interactive computer graphics, Interactive dialog system, interactive dialogue systems, Language Model, Modeling languages, Multi-modal, Multimodal computing, Video interview, video interviews, Virtual avatar, Virtual environments, Virtual Reality
@inproceedings{leong_combining_2024,
title = {Combining Generative and Discriminative AI for High-Stakes Interview Practice},
author = {C. W. Leong and N. Jawahar and V. Basheerabad and T. Wörtwein and A. Emerson and G. Sivan},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85211135262&doi=10.1145%2f3686215.3688377&partnerID=40&md5=4f53f4466d43840510a36c125eeefa16},
doi = {10.1145/3686215.3688377},
isbn = {979-840070463-5 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {ACM Int. Conf. Proc. Ser.},
pages = {94–96},
publisher = {Association for Computing Machinery},
abstract = {We present a demo comprising an end-to-end AI pipeline for practicing video interviews for a high-stakes scenarios (i.e., college admissions) with personalized, actionable feedback for continuous improvement of the user. This system provides personalized, actionable feedback for continuous user improvement. Utilizing large language models (LLMs), we generate questions and responses for a virtual avatar interviewer. Our focus on key qualities - such as concise responses with low latency, empathy, and smooth topic navigation - led to a comparative evaluation of several prominent LLMs, each undergoing evolutionary development. We also discuss the integration of avatar technology to create an immersive, virtual environment for naturalistic dyadic conversations. © 2024 Owner/Author.},
keywords = {AI systems, College admissions, Continuous improvements, End to end, Interactive computer graphics, Interactive dialog system, interactive dialogue systems, Language Model, Modeling languages, Multi-modal, Multimodal computing, Video interview, video interviews, Virtual avatar, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Vincent, B.; Ayyar, K.
Roblox Generative AI in action Proceedings Article
In: S.N., Spencer (Ed.): Proc. - SIGGRAPH Real-Time Live!, Association for Computing Machinery, Inc, 2023, ISBN: 979-840070158-0 (ISBN).
Abstract | Links | BibTeX | Tags: AI techniques, Complex model, Creation process, Education, Game, Games, Interactive computer graphics, Interactive objects, Lighting, Metaverse, Metaverses, Modeling, Modeling languages, Natural languages, Object and scenes, Pipeline, Real-Time Rendering, Rendering (computer graphics)
@inproceedings{vincent_roblox_2023,
title = {Roblox Generative AI in action},
author = {B. Vincent and K. Ayyar},
editor = {Spencer S.N.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85167946022&doi=10.1145%2f3588430.3597250&partnerID=40&md5=61fda81c33eb3623240f7d14f51607b0},
doi = {10.1145/3588430.3597250},
isbn = {979-840070158-0 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Proc. - SIGGRAPH Real-Time Live!},
publisher = {Association for Computing Machinery, Inc},
abstract = {Roblox is investing in generative AI techniques to revolutionize the creation process on its platform. By leveraging natural language and other intuitive expressions of intent, creators can build interactive objects and scenes without complex modeling or coding. The use of AI image generation services and large language models aim to make creation faster and easier for every user on the platform. © 2023 Owner/Author.},
keywords = {AI techniques, Complex model, Creation process, Education, Game, Games, Interactive computer graphics, Interactive objects, Lighting, Metaverse, Metaverses, Modeling, Modeling languages, Natural languages, Object and scenes, Pipeline, Real-Time Rendering, Rendering (computer graphics)},
pubstate = {published},
tppubtype = {inproceedings}
}