AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Tsai, Y. -J.; Liu, S. -T.; Hsu, S. -C.
The Development of an Interactive IoT Cross-Media Survey System and Real-Time Re-presentation of Mass Learning Proceedings Article
In: J., Wei; G., Margetis (Ed.): Lect. Notes Comput. Sci., pp. 145–157, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303193060-7 (ISBN).
Abstract | Links | BibTeX | Tags: Cross-media, Data Re-presentation, Internet of Things, IoT Cross-Media System, IoT cross-medium system, Learning outcome, Learning systems, Mass Learning, Media systems, Smart phones, Smartphone, Smartphones, STEM with A, Survey System, Survey systems, Surveying, Tangible User Interface, Tangible user interfaces, User interfaces, Virtual Reality
@inproceedings{tsai_development_2025,
title = {The Development of an Interactive IoT Cross-Media Survey System and Real-Time Re-presentation of Mass Learning},
author = {Y. -J. Tsai and S. -T. Liu and S. -C. Hsu},
editor = {Wei J. and Margetis G.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105008756188&doi=10.1007%2f978-3-031-93061-4_10&partnerID=40&md5=c487828eeacfdf18cf4e726e6ce28146},
doi = {10.1007/978-3-031-93061-4_10},
isbn = {03029743 (ISSN); 978-303193060-7 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15823 LNCS},
pages = {145–157},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {In this study, we propose the Interactive IoT Cross-Media Survey System, integrating tangible interaction in a game-like manner with real-time data re-presentation. This system was implemented in the “STEM with A” Interactive Exploration Hall at National Tsing Hua University in 2020. It enabled participants to use their smartphones as tangible user interfaces to “scoop-up questions” from interactive sensing points within the exhibition areas. After completing the questions, participants could “pour-in” their responses and observe digital data re-presentation artworks generated from survey results, showcasing mass learning outcomes. Furthermore, the data re-presentation content was tailored to participants’ group characteristics, showing how their responses impact the group’s overall learning outcomes with each “pour-in response.” The study achieved several key outcomes: (1) transforming traditional surveys into a gamified survey system, enhancing participants’ engagement, (2) providing real-time, group-based data re-presentations, enabling participants to contribute to the group’s learning outcomes, and (3) implementing a grouping mechanism to foster collaboration within groups and healthy competition between them. This system provides flexible and customizable data re-presentation, making it suitable for diverse environments requiring real-time data-driven engagement. Future applications can integrate emerging technologies, such as generative AI to dynamically generate questions or virtual reality to offer immersive experiences. Additionally, data re-presentations can be designed as dynamic mass artistic creations, allowing participants to become co-creators of an evolving collective masterpiece. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {Cross-media, Data Re-presentation, Internet of Things, IoT Cross-Media System, IoT cross-medium system, Learning outcome, Learning systems, Mass Learning, Media systems, Smart phones, Smartphone, Smartphones, STEM with A, Survey System, Survey systems, Surveying, Tangible User Interface, Tangible user interfaces, User interfaces, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Carcangiu, A.; Manca, M.; Mereu, J.; Santoro, C.; Simeoli, L.; Spano, L. D.
Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation Proceedings Article
In: C., Santoro; A., Schmidt; M., Matera; A., Bellucci (Ed.): Lect. Notes Comput. Sci., pp. 59–79, Springer Science and Business Media Deutschland GmbH, 2025, ISBN: 03029743 (ISSN); 978-303195451-1 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces
@inproceedings{carcangiu_conversational_2025,
title = {Conversational Rule Creation in XR: User’s Strategies in VR and AR Automation},
author = {A. Carcangiu and M. Manca and J. Mereu and C. Santoro and L. Simeoli and L. D. Spano},
editor = {Santoro C. and Schmidt A. and Matera M. and Bellucci A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105009012634&doi=10.1007%2f978-3-031-95452-8_4&partnerID=40&md5=67e2b8ca4bb2b508cd41548e3471705b},
doi = {10.1007/978-3-031-95452-8_4},
isbn = {03029743 (ISSN); 978-303195451-1 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {15713 LNCS},
pages = {59–79},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Rule-based approaches allow users to customize XR environments. However, the current menu-based interfaces still create barriers for end-user developers. Chatbots based on Large Language Models (LLMs) have the potential to reduce the threshold needed for rule creation, but how users articulate their intentions through conversation remains under-explored. This work investigates how users express event-condition-action automation rules in Virtual Reality (VR) and Augmented Reality (AR) environments. Through two user studies, we show that the dialogues share consistent strategies across the interaction setting (keywords, difficulties in expressing conditions, task success), even if we registered different adaptations for each setting (verbal structure, event vs action first rules). Our findings are relevant for the design and implementation of chatbot-based support for expressing automations in an XR setting. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.},
keywords = {'current, Automation, Chatbots, Condition, End-User Development, Extended reality, Human computer interaction, Immersive authoring, Language Model, Large language model, large language models, Rule, Rule-based approach, rules, User interfaces},
pubstate = {published},
tppubtype = {inproceedings}
}
Kurai, R.; Hiraki, T.; Hiroi, Y.; Hirao, Y.; Perusquía-Hernández, M.; Uchiyama, H.; Kiyokawa, K.
MagicCraft: Natural Language-Driven Generation of Dynamic and Interactive 3D Objects for Commercial Metaverse Platforms Journal Article
In: IEEE Access, vol. 13, pp. 132459–132474, 2025, ISSN: 21693536 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: 3D models, 3D object, 3D Object Generation, 3d-modeling, AI-Assisted Design, Artificial intelligence, Behavioral Research, Content creation, Generative AI, Immersive, Metaverse, Metaverses, Natural language processing systems, Natural languages, Object oriented programming, Three dimensional computer graphics, user experience, User interfaces
@article{kurai_magiccraft_2025,
title = {MagicCraft: Natural Language-Driven Generation of Dynamic and Interactive 3D Objects for Commercial Metaverse Platforms},
author = {R. Kurai and T. Hiraki and Y. Hiroi and Y. Hirao and M. Perusquía-Hernández and H. Uchiyama and K. Kiyokawa},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105010187256&doi=10.1109%2FACCESS.2025.3587232&partnerID=40&md5=9b7a8115c62a8f9da4956dbbbb53dc4e},
doi = {10.1109/ACCESS.2025.3587232},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {132459–132474},
abstract = {Metaverse platforms are rapidly evolving to provide immersive spaces for user interaction and content creation. However, the generation of dynamic and interactive 3D objects remains challenging due to the need for advanced 3D modeling and programming skills. To address this challenge, we present MagicCraft, a system that generates functional 3D objects from natural language prompts for metaverse platforms. MagicCraft uses generative AI models to manage the entire content creation pipeline: converting user text descriptions into images, transforming images into 3D models, predicting object behavior, and assigning necessary attributes and scripts. It also provides an interactive interface for users to refine generated objects by adjusting features such as orientation, scale, seating positions, and grip points. Implemented on Cluster, a commercial metaverse platform, MagicCraft was evaluated by 7 expert CG designers and 51 general users. Results show that MagicCraft significantly reduces the time and skill required to create 3D objects. Users with no prior experience in 3D modeling or programming successfully created complex, interactive objects and deployed them in the metaverse. Expert feedback highlighted the system's potential to improve content creation workflows and support rapid prototyping. By integrating AI-generated content into metaverse platforms, MagicCraft makes 3D content creation more accessible. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {3D models, 3D object, 3D Object Generation, 3d-modeling, AI-Assisted Design, Artificial intelligence, Behavioral Research, Content creation, Generative AI, Immersive, Metaverse, Metaverses, Natural language processing systems, Natural languages, Object oriented programming, Three dimensional computer graphics, user experience, User interfaces},
pubstate = {published},
tppubtype = {article}
}
Mendoza, A. P.; Quiroga, K. J. Barrios; Celis, S. D. Solano; M., C. G. Quintero
NAIA: A Multi-Technology Virtual Assistant for Boosting Academic Environments—A Case Study Journal Article
In: IEEE Access, vol. 13, pp. 141461–141483, 2025, ISSN: 21693536 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: Academic environment, Artificial intelligence, Case-studies, Computational Linguistics, Computer vision, Digital avatar, Digital avatars, Efficiency, Human computer interaction, Human-AI Interaction, Interactive computer graphics, Language Model, Large language model, large language model (LLM), Learning systems, Natural language processing systems, Personal digital assistants, Personnel training, Population statistics, Speech communication, Speech processing, Speech to text, speech to text (STT), Text to speech, text to speech (TTS), user experience, User interfaces, Virtual assistant, Virtual assistants, Virtual Reality
@article{mendoza_naia_2025,
title = {NAIA: A Multi-Technology Virtual Assistant for Boosting Academic Environments—A Case Study},
author = {A. P. Mendoza and K. J. Barrios Quiroga and S. D. Solano Celis and C. G. Quintero M.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013598763&doi=10.1109%2FACCESS.2025.3597565&partnerID=40&md5=7ad6b037cfedb943fc026642c4854284},
doi = {10.1109/ACCESS.2025.3597565},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {141461–141483},
abstract = {Virtual assistants have become essential tools for improving productivity and efficiency in various domains. This paper presents NAIA (Nimble Artificial Intelligence Assistant), an advanced multi-role and multi-task virtual assistant enhanced with artificial intelligence, designed to serve a university community case study. The system integrates AI technologies including Large Language Models (LLM), Computer Vision, and voice processing to create an immersive and efficient interaction through animated digital avatars. NAIA features five specialized roles: researcher, receptionist, personal skills trainer, personal assistant, and university guide, each equipped with specific capabilities to support different aspects of academic life. The system’s Computer Vision capabilities enable it to comment on users’ physical appearance and environment, enriching the interaction. Through natural language processing and voice interaction, NAIA aims to improve productivity and efficiency within the university environment while providing personalized assistance through a ubiquitous platform accessible across multiple devices. NAIA is evaluated through a user experience survey involving 30 participants with different demographic characteristics, this is the most accepted way by the community to evaluate this type of solution. Participants give their feedback after using one role of NAIA after using it for 30 minutes. The experiment showed that 90% of the participants considered NAIA-assisted tasks of higher quality and, on average, NAIA has a score of 4.27 out of 5 on user satisfaction. Participants particularly appreciated the assistant’s visual recognition, natural conversation flow, and user interaction capabilities. Results demonstrate NAIA’s capabilities and effectiveness across the five roles. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {Academic environment, Artificial intelligence, Case-studies, Computational Linguistics, Computer vision, Digital avatar, Digital avatars, Efficiency, Human computer interaction, Human-AI Interaction, Interactive computer graphics, Language Model, Large language model, large language model (LLM), Learning systems, Natural language processing systems, Personal digital assistants, Personnel training, Population statistics, Speech communication, Speech processing, Speech to text, speech to text (STT), Text to speech, text to speech (TTS), user experience, User interfaces, Virtual assistant, Virtual assistants, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Liu, Y.; Li, Z.
Research on the Design System of Bamboo-Woven Products Based on Traditional Bamboo-Weaving Craft VR Experience Journal Article
In: Forest Products Journal, vol. 75, no. 3, pp. 238–250, 2025, ISSN: 00157473 (ISSN), (Publisher: Forest Products Society).
Abstract | Links | BibTeX | Tags: 3D modeling, Artificial intelligence, Bamboo, Design models, Design systems, evaluation, Experience design, Function evaluation, Human computer interaction, Learn+, Low-costs, Novel techniques, Product design, Product experience, Products, Reliability analysis, Systems analysis, Systems Engineering, Techniques, user experience, User interfaces, Virtual Reality, Virtual reality experiences, Weaving, Weaving technique, Woven products
@article{liu_research_2025,
title = {Research on the Design System of Bamboo-Woven Products Based on Traditional Bamboo-Weaving Craft VR Experience},
author = {Y. Liu and Z. Li},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105013654324&doi=10.13073%2FFPJ-D-25-00005&partnerID=40&md5=d03d78771ee8194ffc8450d259b5f129},
doi = {10.13073/FPJ-D-25-00005},
issn = {00157473 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Forest Products Journal},
volume = {75},
number = {3},
pages = {238–250},
abstract = {Virtual reality (VR) is a simulated experience capable of replicating or creating an entirely new environment. Through VR experience, designers can learn bamboo-weaving techniques at a low cost and showcase their design models of bamboo-woven products virtually, allowing these products to be put into production after experience and evaluation. This study introduces novel techniques to transform and innovate traditional bamboo-woven products to establish a comprehensive VR-based product experience design system. This system follows a pioneering pathway, including the following steps: VR weaving skill experience, generative artificial intelligence design (AIGC)–driven bamboo design creativity, 3D modeling technology support, and VR product evaluation. Moreover, the framework conducts user experience research from three dimensions: visual design, system function design, and human–computer interaction design. Usability assessments and statistical analysis were employed before and after the VR experience to assess the system’s reliability. The findings indicate that designers and users can remarkably use and evaluate the new system, offering a practical technical pathway for the modern design exploration of traditional bamboo products. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Forest Products Society},
keywords = {3D modeling, Artificial intelligence, Bamboo, Design models, Design systems, evaluation, Experience design, Function evaluation, Human computer interaction, Learn+, Low-costs, Novel techniques, Product design, Product experience, Products, Reliability analysis, Systems analysis, Systems Engineering, Techniques, user experience, User interfaces, Virtual Reality, Virtual reality experiences, Weaving, Weaving technique, Woven products},
pubstate = {published},
tppubtype = {article}
}
Ding, S.; Yalla, J. P.; Chen, Y.
Demo Abstract: RAG-Driven 3D Question Answering in Edge-Assisted Virtual Reality Proceedings Article
In: Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331543709 (ISBN).
Abstract | Links | BibTeX | Tags: Edge computing, Edge server, Interface states, Knowledge database, Language Model, Local knowledge, Office environments, Question Answering, Real- time, User interaction, User interfaces, Virtual environments, Virtual Reality, Virtual reality system, Virtual-reality environment
@inproceedings{ding_demo_2025,
title = {Demo Abstract: RAG-Driven 3D Question Answering in Edge-Assisted Virtual Reality},
author = {S. Ding and J. P. Yalla and Y. Chen},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105017970015&doi=10.1109%2FINFOCOMWKSHPS65812.2025.11152992&partnerID=40&md5=0e079de018ae9c4a564b98c304a9ea6c},
doi = {10.1109/INFOCOMWKSHPS65812.2025.11152992},
isbn = {9798331543709 (ISBN)},
year = {2025},
date = {2025-01-01},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The rapid development of large language models (LLMs) has created new opportunities in 3D question answering (3D-QA) for virtual reality (VR). 3D-QA enhances user interaction by answering questions about virtual environments. However, performing 3D-QA in VR systems using LLM-based approaches is computation-intensive. Furthermore, general LLMs tend to generate inaccurate responses as they lack context-specific information in VR environments. To mitigate these limitations, we propose OfficeVR-QA, a 3D-QA framework for edge-assisted VR to alleviate the resource constraints of VR devices with the help of edge servers, demonstrated in a virtual office environment. To improve the accuracy of the generated answers, the edge server of OfficeVR-QA hosts retrieval-augmented generation (RAG) that augments LLMs with external knowledge retrieved from a local knowledge database extracted from VR environments and users. During an interactive demo, OfficeVR-QA will continuously update the local knowledge database in real time by transmitting participants' position and orientation data to the edge server, enabling adaptive responses to changes in the participants' states. Participants will navigate a VR office environment, interact with a VR user interface to ask questions, and observe the accuracy of dynamic responses based on their real-time state changes. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Edge computing, Edge server, Interface states, Knowledge database, Language Model, Local knowledge, Office environments, Question Answering, Real- time, User interaction, User interfaces, Virtual environments, Virtual Reality, Virtual reality system, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
de Oliveira, E. A. Masasi; Sousa, R. T.; Bastos, A. A.; de Freitas Cintra, L. Martins; Filho, A. R. G. Galvão
Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation Proceedings Article
In: IMX - Proc. ACM Int. Conf. Interact. Media Experiences, pp. 437–440, Association for Computing Machinery, Inc, 2025, ISBN: 9798400713910 (ISBN).
Abstract | Links | BibTeX | Tags: Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages
@inproceedings{masasi_de_oliveira_immersive_2025,
title = {Immersive Virtual Museums with Spatially-Aware Retrieval-Augmented Generation},
author = {E. A. Masasi de Oliveira and R. T. Sousa and A. A. Bastos and L. Martins de Freitas Cintra and A. R. G. Galvão Filho},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105007979183&doi=10.1145%2F3706370.3731643&partnerID=40&md5=47a47f3408a0e6cb35c16dd6101a15b0},
doi = {10.1145/3706370.3731643},
isbn = {9798400713910 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {IMX - Proc. ACM Int. Conf. Interact. Media Experiences},
pages = {437–440},
publisher = {Association for Computing Machinery, Inc},
abstract = {Virtual Reality has significantly expanded possibilities for immersive museum experiences, overcoming traditional constraints such as space, preservation, and geographic limitations. However, existing virtual museum platforms typically lack dynamic, personalized, and contextually accurate interactions. To address this, we propose Spatially-Aware Retrieval-Augmented Generation (SA-RAG), an innovative framework integrating visual attention tracking with Retrieval-Augmented Generation systems and advanced Large Language Models. By capturing users' visual attention in real time, SA-RAG dynamically retrieves contextually relevant data, enhancing the accuracy, personalization, and depth of user interactions within immersive virtual environments. The system's effectiveness is initially demonstrated through our preliminary tests within a realistic VR museum implemented using Unreal Engine. Although promising, comprehensive human evaluations involving broader user groups are planned for future studies to rigorously validate SA-RAG's effectiveness, educational enrichment potential, and accessibility improvements in virtual museums. The framework also presents opportunities for broader applications in immersive educational and storytelling domains. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Association reactions, Behavioral Research, Generation systems, Geographics, Human computer interaction, Human engineering, Immersive, Information Retrieval, Interactive computer graphics, Language Model, Large language model, large language models, Museums, Retrieval-Augmented Generation, Search engines, Spatially aware, User interfaces, Virtual environments, Virtual museum, Virtual museum., Virtual Reality, Visual Attention, Visual languages},
pubstate = {published},
tppubtype = {inproceedings}
}
Saddik, A. El; Ahmad, J.; Khan, M.; Abouzahir, S.; Gueaieb, W.
Unleashing Creativity in the Metaverse: Generative AI and Multimodal Content Journal Article
In: ACM Transactions on Multimedia Computing, Communications and Applications, vol. 21, no. 7, pp. 1–43, 2025, ISSN: 15516857 (ISSN); 15516865 (ISSN), (Publisher: Association for Computing Machinery).
Abstract | Links | BibTeX | Tags: Adversarial networks, Artificial intelligence, Content generation, Context information, Creatives, Diffusion Model, diffusion models, Generative adversarial networks, Generative AI, Human engineering, Information instructions, Interactive computer graphics, Interactive computer systems, Interactive devices, Interoperability, Metaverse, Metaverses, Multi-modal, multimodal, Simple++, Three dimensional computer graphics, user experience, User interfaces, Virtual Reality
@article{el_saddik_unleashing_2025,
title = {Unleashing Creativity in the Metaverse: Generative AI and Multimodal Content},
author = {A. El Saddik and J. Ahmad and M. Khan and S. Abouzahir and W. Gueaieb},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105011860002&doi=10.1145%2F3713075&partnerID=40&md5=20064843ced240c42e9353d747672cb3},
doi = {10.1145/3713075},
issn = {15516857 (ISSN); 15516865 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {ACM Transactions on Multimedia Computing, Communications and Applications},
volume = {21},
number = {7},
pages = {1–43},
abstract = {The metaverse presents an emerging creative expression and collaboration frontier where generative artificial intelligence (GenAI) can play a pivotal role with its ability to generate multimodal content from simple prompts. These prompts allow the metaverse to interact with GenAI, where context information, instructions, input data, or even output indications constituting the prompt can come from within the metaverse. However, their integration poses challenges regarding interoperability, lack of standards, scalability, and maintaining a high-quality user experience. This article explores how GenAI can productively assist in enhancing creativity within the contexts of the metaverse and unlock new opportunities. We provide a technical, in-depth overview of the different generative models for image, video, audio, and 3D content within the metaverse environments. We also explore the bottlenecks, opportunities, and innovative applications of GenAI from the perspectives of end users, developers, service providers, and AI researchers. This survey commences by highlighting the potential of GenAI for enhancing the metaverse experience through dynamic content generation to populate massive virtual worlds. Subsequently, we shed light on the ongoing research practices and trends in multimodal content generation, enhancing realism and creativity and alleviating bottlenecks related to standardization, computational cost, privacy, and safety. Last, we share insights into promising research directions toward the integration of GenAI with the metaverse for creative enhancement, improved immersion, and innovative interactive applications. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Association for Computing Machinery},
keywords = {Adversarial networks, Artificial intelligence, Content generation, Context information, Creatives, Diffusion Model, diffusion models, Generative adversarial networks, Generative AI, Human engineering, Information instructions, Interactive computer graphics, Interactive computer systems, Interactive devices, Interoperability, Metaverse, Metaverses, Multi-modal, multimodal, Simple++, Three dimensional computer graphics, user experience, User interfaces, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Cuervo-Rosillo, R.; Zarraonandia, T.; Díaz, P.
Using Generative AI to Support Non-Experts in the Creation of Immersive Experiences Proceedings Article
In: ACM Int. Conf. Proc. Ser., Association for Computing Machinery, 2024, ISBN: 979-840071764-2 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, End-Users, generative artificial intelligence, Immersive, immersive experience, Immersive Experiences, Natural languages, Speech commands, User interfaces, Virtual Reality
@inproceedings{cuervo-rosillo_using_2024,
title = {Using Generative AI to Support Non-Experts in the Creation of Immersive Experiences},
author = {R. Cuervo-Rosillo and T. Zarraonandia and P. Díaz},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195422750&doi=10.1145%2f3656650.3656733&partnerID=40&md5=00d53df1d6b30acc6d281bb86ead73ab},
doi = {10.1145/3656650.3656733},
isbn = {979-840071764-2 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {ACM Int. Conf. Proc. Ser.},
publisher = {Association for Computing Machinery},
abstract = {This work focuses on exploring the use of Generative Artificial Intelligence to assist end-users in creating immersive experiences. We present a prototype that supports the creation and edition of virtual environments using speech commands expressed in natural language. © 2024 Owner/Author.},
keywords = {Artificial intelligence, End-Users, generative artificial intelligence, Immersive, immersive experience, Immersive Experiences, Natural languages, Speech commands, User interfaces, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Weng, S. C. -C.; Chiou, Y. -M.; Do, E. Y. -L.
Dream Mesh: A Speech-to-3D Model Generative Pipeline in Mixed Reality Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 345–349, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350372021 (ISBN).
Abstract | Links | BibTeX | Tags: 3D content, 3D modeling, 3D models, 3d-modeling, Augmented Reality, Digital assets, Generative AI, generative artificial intelligence, Intelligence models, Mesh generation, Mixed reality, Modeling, Speech-to-3D, Text modeling, Three dimensional computer graphics, User interfaces
@inproceedings{weng_dream_2024,
title = {Dream Mesh: A Speech-to-3D Model Generative Pipeline in Mixed Reality},
author = {S. C. -C. Weng and Y. -M. Chiou and E. Y. -L. Do},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187218106&doi=10.1109%2FAIxVR59861.2024.00059&partnerID=40&md5=d7631bff934fc436251c34c52b489539},
doi = {10.1109/AIxVR59861.2024.00059},
isbn = {9798350372021 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {345–349},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Generative Artificial Intelligence (AI) models have risen to prominence due to their unparalleled ability to craft and generate digital assets, encompassing text, images, audio, video, and 3D models. Leveraging the capabilities of diffusion models, such as Stable Diffusion and Instruct pix2pix, users can guide AI with specific prompts, streamlining the creative journey for graphic designers. However, the primary application of these models has been to graphic content within desktop interfaces, prompting professionals in interior and architectural design to seek more tailored solutions for their daily operations. To bridge this gap, Augmented Reality (AR) and Mixed Reality (MR) technologies offer a promising solution, transforming traditional 2D artworks into engaging 3D interactive realms. In this paper, we present "Dream Mesh,"a MR application MR tool that combines a Speech-to-3D generative workflow besed on DreamFusion model without relying on pre-existing 3D content libraries. This innovative system empowers users to express 3D content needs through natural language input, promising transformative potential in real-time 3D content creation and an enhanced MR user experience. © 2024 Elsevier B.V., All rights reserved.},
keywords = {3D content, 3D modeling, 3D models, 3d-modeling, Augmented Reality, Digital assets, Generative AI, generative artificial intelligence, Intelligence models, Mesh generation, Mixed reality, Modeling, Speech-to-3D, Text modeling, Three dimensional computer graphics, User interfaces},
pubstate = {published},
tppubtype = {inproceedings}
}
Venkatachalam, N.; Rayana, M.; Vignesh, S. Bala; Prathamesh, S.
Voice-Driven Panoramic Imagery: Real-Time Generative AI for Immersive Experiences Proceedings Article
In: Int. Conf. Intell. Data Commun. Technol. Internet Things, IDCIoT, pp. 1133–1138, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350327533 (ISBN).
Abstract | Links | BibTeX | Tags: Adaptive Visual Experience, First person, First-Person view, generative artificial intelligence, Generative Artificial Intelligence (AI), Image processing, Immersive, Immersive visual scene, Immersive Visual Scenes, Language processing, Natural Language Processing, Natural Language Processing (NLP), Natural language processing systems, Natural languages, Panoramic Images, Patient treatment, Personalized environment, Personalized Environments, Phobia Treatment, Prompt, prompts, Psychological intervention, Psychological Interventions, Real-Time Synthesis, User interaction, User interfaces, Virtual experience, Virtual Experiences, Virtual Reality, Virtual Reality (VR), Virtual-reality headsets, Visual experiences, Visual languages, Visual scene, Voice command, Voice commands, VR Headsets
@inproceedings{venkatachalam_voice-driven_2024,
title = {Voice-Driven Panoramic Imagery: Real-Time Generative AI for Immersive Experiences},
author = {N. Venkatachalam and M. Rayana and S. Bala Vignesh and S. Prathamesh},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85190121845&doi=10.1109%2FIDCIoT59759.2024.10467441&partnerID=40&md5=867e723b20fb9fead7d1c55926af9642},
doi = {10.1109/IDCIoT59759.2024.10467441},
isbn = {9798350327533 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Int. Conf. Intell. Data Commun. Technol. Internet Things, IDCIoT},
pages = {1133–1138},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This research study introduces an innovative system that aims to synthesize 360-degree panoramic images in Realtime based on vocal prompts from the user, leveraging state-of-The-Art Generative AI with a combination of advanced NLP models. The primary objective of this system is to transform spoken descriptions into immersive and interactive visual scenes, specifically designed to provide users with first-person field views. This cutting-edge technology has the potential to revolutionize the realm of virtual reality (VR) experiences, enabling users to effortlessly create and navigate through personalized environments. The fundamental goal of this system is to enable the generation of real-Time images that are seamlessly compatible with VR headsets, offering a truly immersive and adaptive visual experience. Beyond its technological advancements, this research also highlights its significant potential for creating a positive social impact. One notable application lies in psychological interventions, particularly in the context of phobia treatment and therapeutic settings. Here, patients can safely confront and work through their fears within these synthesized environments, potentially offering new avenues for therapy. Furthermore, the system serves educational and entertainment purposes by bringing users' imaginations to life, providing an unparalleled platform for exploring the boundaries of virtual experiences. Overall, this research represents a promising stride towards a more immersive and adaptable future in VR technology, with the potential to enhance various aspects of human lives, from mental health treatment to entertainment and education. © 2024 Elsevier B.V., All rights reserved.},
keywords = {Adaptive Visual Experience, First person, First-Person view, generative artificial intelligence, Generative Artificial Intelligence (AI), Image processing, Immersive, Immersive visual scene, Immersive Visual Scenes, Language processing, Natural Language Processing, Natural Language Processing (NLP), Natural language processing systems, Natural languages, Panoramic Images, Patient treatment, Personalized environment, Personalized Environments, Phobia Treatment, Prompt, prompts, Psychological intervention, Psychological Interventions, Real-Time Synthesis, User interaction, User interfaces, Virtual experience, Virtual Experiences, Virtual Reality, Virtual Reality (VR), Virtual-reality headsets, Visual experiences, Visual languages, Visual scene, Voice command, Voice commands, VR Headsets},
pubstate = {published},
tppubtype = {inproceedings}
}
Gottsacker, M.; Bruder, G.; Welch, G. F.
rlty2rlty: Transitioning Between Realities with Generative AI Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1160–1161, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350374490 (ISBN).
Abstract | Links | BibTeX | Tags: Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Interactive computer graphics, Liminal spaces, Mixed / augmented reality, Mixed reality, Real environments, System use, User interfaces, Virtual worlds
@inproceedings{gottsacker_rlty2rlty_2024,
title = {rlty2rlty: Transitioning Between Realities with Generative AI},
author = {M. Gottsacker and G. Bruder and G. F. Welch},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195556960&doi=10.1109%2FVRW62533.2024.00374&partnerID=40&md5=cef1bfa9489c71c9e134cd9dc2326b42},
doi = {10.1109/VRW62533.2024.00374},
isbn = {9798350374490 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1160–1161},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {We present a system for visually transitioning a mixed reality (MR) user between two arbitrary realities (e.g., between two virtual worlds or between the real environment and a virtual world). The system uses artificial intelligence (AI) to generate a 360° video that transforms the user's starting environment to another environment, passing through a liminal space that could help them relax between tasks or prepare them for the ending environment. The video can then be viewed on an MR headset. © 2024 Elsevier B.V., All rights reserved.},
keywords = {Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Interactive computer graphics, Liminal spaces, Mixed / augmented reality, Mixed reality, Real environments, System use, User interfaces, Virtual worlds},
pubstate = {published},
tppubtype = {inproceedings}
}
Yin, Z.; Wang, Y.; Papatheodorou, T.; Hui, P.
Text2VRScene: Exploring the Framework of Automated Text-driven Generation System for VR Experience Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 701–711, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350374025 (ISBN).
Abstract | Links | BibTeX | Tags: Automated systems, Automation, Digital contents, Generation systems, Generative model, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Interaction techniques, Language Model, Natural language processing systems, Text input, User interfaces, Virtual Reality
@inproceedings{yin_text2vrscene_2024,
title = {Text2VRScene: Exploring the Framework of Automated Text-driven Generation System for VR Experience},
author = {Z. Yin and Y. Wang and T. Papatheodorou and P. Hui},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85191431035&doi=10.1109%2FVR58804.2024.00090&partnerID=40&md5=8d04e98b6579e58fb1c6293eac5fa7bc},
doi = {10.1109/VR58804.2024.00090},
isbn = {9798350374025 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {701–711},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {With the recent development of the Virtual Reality (VR) industry, the increasing number of VR users pushes the demand for the massive production of immersive and expressive VR scenes in related industries. However, creating expressive VR scenes involves the reasonable organization of various digital content to express a coherent and logical theme, which is time-consuming and labor-intensive. In recent years, Large Language Models (LLMs) such as ChatGPT 3.5 and generative models such as stable diffusion have emerged as powerful tools for comprehending natural language and generating digital contents such as text, code, images, and 3D objects. In this paper, we have explored how we can generate VR scenes from text by incorporating LLMs and various generative models into an automated system. To achieve this, we first identify the possible limitations of LLMs for an automated system and propose a systematic framework to mitigate them. Subsequently, we developed Text2VRScene, a VR scene generation system, based on our proposed framework with well-designed prompts. To validate the effectiveness of our proposed framework and the designed prompts, we carry out a series of test cases. The results show that the proposed framework contributes to improving the reliability of the system and the quality of the generated VR scenes. The results also illustrate the promising performance of the Text2VRScene in generating satisfying VR scenes with a clear theme regularized by our well-designed prompts. This paper ends with a discussion about the limitations of the current system and the potential of developing similar generation systems based on our framework. © 2024 Elsevier B.V., All rights reserved.},
keywords = {Automated systems, Automation, Digital contents, Generation systems, Generative model, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Interaction paradigm, Interaction paradigms, Interaction techniques, Language Model, Natural language processing systems, Text input, User interfaces, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Artizzu, V.; Carcangiu, A.; Manca, M.; Mattioli, A.; Mereu, J.; Paternò, F.; Santoro, C.; Simeoli, L.; Spano, L. D.
End-User Development for eXtended Reality using a multimodal Intelligent Conversational Agent Proceedings Article
In: N., Wang; A., Bellucci; C., Anthes; P., Daeijavad; J., Friedl-Knirsch; F., Maurer; F., Pointecker; L.D., Spano (Ed.): CEUR Workshop Proc., CEUR-WS, 2024, ISBN: 16130073 (ISSN).
Abstract | Links | BibTeX | Tags: Condition, Context, End-User Development, Event-condition-action, Extended reality, Immersive authoring, Language Model, Large language model, Meta-design, multimodal input, Multimodal inputs, Rule, rules, User interfaces
@inproceedings{artizzu_end-user_2024,
title = {End-User Development for eXtended Reality using a multimodal Intelligent Conversational Agent},
author = {V. Artizzu and A. Carcangiu and M. Manca and A. Mattioli and J. Mereu and F. Paternò and C. Santoro and L. Simeoli and L. D. Spano},
editor = {Wang N. and Bellucci A. and Anthes C. and Daeijavad P. and Friedl-Knirsch J. and Maurer F. and Pointecker F. and Spano L.D.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85196077262&partnerID=40&md5=3d5f022f30a1f0e3e5e81133d07823b5},
isbn = {16130073 (ISSN)},
year = {2024},
date = {2024-01-01},
booktitle = {CEUR Workshop Proc.},
volume = {3704},
publisher = {CEUR-WS},
abstract = {In the past years, both the research community and commercial products have proposed various solutions aiming to support end-user developers (EUDevs), namely users without extensive programming skills, to build and customize XR experiences. However, current tools may not fully eliminate the potential for user errors or misunderstandings. In this paper, we present EUD4XR, a methodology consisting of an intelligent conversational agent to provide contextual help, to EUDevs, during the authoring process. The key characteristics of this agent are its multimodality, comprehending the user’s voice, gaze, and pointing, combined with the environment status. Moreover, the agent could also demonstrate concepts, suggest components, and help explain errors further to reduce misunderstandings for end-user developers of VR/XR. © 2024 Copyright for this paper by its authors.},
keywords = {Condition, Context, End-User Development, Event-condition-action, Extended reality, Immersive authoring, Language Model, Large language model, Meta-design, multimodal input, Multimodal inputs, Rule, rules, User interfaces},
pubstate = {published},
tppubtype = {inproceedings}
}
Do, M. D.; Dahlem, N.; Paulus, M.; Krick, M.; Steffny, L.; Werth, D.
“Furnish Your Reality” - Intelligent Mobile AR Application for Personalized Furniture Proceedings Article
In: J., Wei; G., Margetis (Ed.): Lect. Notes Comput. Sci., pp. 196–210, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 03029743 (ISSN); 978-303160457-7 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Augmented reality applications, Electronic commerce, Generative AI, generative artificial intelligence, Human computer interaction, Human computer interfaces, LiDAR, Mobile augmented reality, Mobile human computer interface, Mobile Human Computer Interfaces, Personalized product design, Personalized products, Phygital customer journey, Physical environments, Product design, Recommender system, Recommender systems, Sales, User centered design, User interfaces, User-centered design
@inproceedings{do_furnish_2024,
title = {“Furnish Your Reality” - Intelligent Mobile AR Application for Personalized Furniture},
author = {M. D. Do and N. Dahlem and M. Paulus and M. Krick and L. Steffny and D. Werth},
editor = {Wei J. and Margetis G.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85196202642&doi=10.1007%2f978-3-031-60458-4_14&partnerID=40&md5=017510be06c286789867235cfd98bb36},
doi = {10.1007/978-3-031-60458-4_14},
isbn = {03029743 (ISSN); 978-303160457-7 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {14737 LNCS},
pages = {196–210},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Today’s online retailers are faced with the challenge of providing a convenient solution for their customers to browse through a wide range of products. Simultaneously, they must meet individual customer needs by creating unique, personalized, one-of-a-kind items. Technological advances in areas such as Augmented Reality (AR), Artificial Intelligence (AI) or sensors (e.g. LiDAR), have the potential to address these challenges by enhancing the customer experience in new ways. One option is to implement “phygital” commerce solutions, which combines the benefits of physical and digital environments to improve the customer journey. This work presents a concept for a mobile AR application that integrates LiDAR and an AI-powered recommender system to create a unique phygital customer journey in the context of furniture shopping. The combination of AR, LiDAR and AI enables an accurate immersive experience along with personalized product designs. This concept aims to deliver benefits in terms of usability, convenience, time savings and user experience, while bridging the gap between mass-produced and personalized products. The new possibilities for merging virtual with physical environments hold immense potential, but this work also highlights challenges for customers as well as for online platform providers and future researchers. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {Artificial intelligence, Augmented Reality, Augmented reality applications, Electronic commerce, Generative AI, generative artificial intelligence, Human computer interaction, Human computer interfaces, LiDAR, Mobile augmented reality, Mobile human computer interface, Mobile Human Computer Interfaces, Personalized product design, Personalized products, Phygital customer journey, Physical environments, Product design, Recommender system, Recommender systems, Sales, User centered design, User interfaces, User-centered design},
pubstate = {published},
tppubtype = {inproceedings}
}
Krauss, C.; Bassbouss, L.; Upravitelev, M.; An, T. -S.; Altun, D.; Reray, L.; Balitzki, E.; Tamimi, T. El; Karagülle, M.
Opportunities and Challenges in Developing Educational AI-Assistants for the Metaverse Proceedings Article
In: R.A., Sottilare; J., Schwarz (Ed.): Lect. Notes Comput. Sci., pp. 219–238, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 03029743 (ISSN); 978-303160608-3 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, AI-assistant, AI-Assistants, Computational Linguistics, Computer aided instruction, Concept-based, E-Learning, Education, Interoperability, Language Model, Large language model, large language models, Learning Environments, Learning systems, Learning Technologies, Learning technology, LLM, Metaverse, Metaverses, Natural language processing systems, Proof of concept, User interfaces, Virtual assistants, Virtual Reality
@inproceedings{krauss_opportunities_2024,
title = {Opportunities and Challenges in Developing Educational AI-Assistants for the Metaverse},
author = {C. Krauss and L. Bassbouss and M. Upravitelev and T. -S. An and D. Altun and L. Reray and E. Balitzki and T. El Tamimi and M. Karagülle},
editor = {Sottilare R.A. and Schwarz J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85196214138&doi=10.1007%2f978-3-031-60609-0_16&partnerID=40&md5=9a66876cb30e9e5d287a86e6cfa66e05},
doi = {10.1007/978-3-031-60609-0_16},
isbn = {03029743 (ISSN); 978-303160608-3 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Lect. Notes Comput. Sci.},
volume = {14727 LNCS},
pages = {219–238},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {The paper explores the opportunities and challenges for metaverse learning environments with AI-Assistants based on Large Language Models. A proof of concept based on popular but proprietary technologies is presented that enables a natural language exchange between the user and an AI-based medical expert in a highly immersive environment based on the Unreal Engine. The answers generated by ChatGPT are not only played back lip-synchronously, but also visualized in the VR environment using a 3D model of a skeleton. Usability and user experience play a particularly important role in the development of the highly immersive AI-Assistant. The proof of concept serves to illustrate the opportunities and challenges that lie in the merging of large language models, metaverse applications and educational ecosystems, which are self-contained research areas. Development strategies, tools and interoperability standards will be presented to facilitate future developments in this triangle of tension. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {3D modeling, AI-assistant, AI-Assistants, Computational Linguistics, Computer aided instruction, Concept-based, E-Learning, Education, Interoperability, Language Model, Large language model, large language models, Learning Environments, Learning systems, Learning Technologies, Learning technology, LLM, Metaverse, Metaverses, Natural language processing systems, Proof of concept, User interfaces, Virtual assistants, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Asadi, A. R.; Appiah, J.; Muntaka, S. A.; Kropczynski, J.
Actions, Not Apps: Toward Using LLMs to Reshape Context Aware Interactions in Mixed Reality Systems Proceedings Article
In: C., Stephanidis; M., Antona; S., Ntoa; G., Salvendy (Ed.): Commun. Comput. Info. Sci., pp. 166–176, Springer Science and Business Media Deutschland GmbH, 2024, ISBN: 18650929 (ISSN); 978-303162109-3 (ISBN).
Abstract | Links | BibTeX | Tags: Computation theory, Context Aware System, Context-aware interaction, Context-aware systems, Decision making, Digital information, Flat-screens, Interaction Design, Language Model, Mixed reality, Mixed reality systems, User interaction, User interfaces, User perceptions
@inproceedings{asadi_actions_2024,
title = {Actions, Not Apps: Toward Using LLMs to Reshape Context Aware Interactions in Mixed Reality Systems},
author = {A. R. Asadi and J. Appiah and S. A. Muntaka and J. Kropczynski},
editor = {Stephanidis C. and Antona M. and Ntoa S. and Salvendy G.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85196733497&doi=10.1007%2f978-3-031-62110-9_17&partnerID=40&md5=9cd702ff979c7f111a5172df8f155ddf},
doi = {10.1007/978-3-031-62110-9_17},
isbn = {18650929 (ISSN); 978-303162109-3 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Commun. Comput. Info. Sci.},
volume = {2120 CCIS},
pages = {166–176},
publisher = {Springer Science and Business Media Deutschland GmbH},
abstract = {Mixed reality computing merges user perception of the environment with digital information. As we move from flatscreen computing toward head-mounted computing, the necessity for developing alternative interactions and user flows becomes more evident. Activity theory provides a holistic overview of user interactions and motives. In this work in progress, we propose Action Sandbox Workspace as an interaction framework for the future of MR systems by focusing on action-centric interactions rather than application-centric interactions, aiming to bridge the gap between user goals and system functionalities in everyday tasks. By integrating the ontology of actions, user intentions, and context and connecting it to spatial data mapping, this forward-looking framework aims to create a contextually adaptive user interaction environment. The recent development in large language models (LLMs) has made the implementation of this interaction flow feasible by enabling inference and decision-making based on text-based descriptions of a user’s state and intentions with data and actions users have access to. We propose this approach as a future direction for developing mixed reality platforms and integrating AI in interacting with computers. © The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.},
keywords = {Computation theory, Context Aware System, Context-aware interaction, Context-aware systems, Decision making, Digital information, Flat-screens, Interaction Design, Language Model, Mixed reality, Mixed reality systems, User interaction, User interfaces, User perceptions},
pubstate = {published},
tppubtype = {inproceedings}
}
Bayat, R.; Maio, E.; Fiorenza, J.; Migliorini, M.; Lamberti, F.
Exploring Methodologies to Create a Unified VR User-Experience in the Field of Virtual Museum Experiences Proceedings Article
In: IEEE Gaming, Entertain., Media Conf., GEM, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350374537 (ISBN).
Abstract | Links | BibTeX | Tags: Cultural heritages, Meta-museum, Meta-museums, Metaverse, Metaverses, Research frontiers, Research opportunities, user experience, User experience design, User interfaces, User-Experience Design, Users' experiences, Virtual avatar, Virtual machine, Virtual museum, Virtual Reality, Virtual reality experiences
@inproceedings{bayat_exploring_2024,
title = {Exploring Methodologies to Create a Unified VR User-Experience in the Field of Virtual Museum Experiences},
author = {R. Bayat and E. Maio and J. Fiorenza and M. Migliorini and F. Lamberti},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199517817&doi=10.1109%2FGEM61861.2024.10585452&partnerID=40&md5=ced2ae6561acc66c71806ccf609ac9d1},
doi = {10.1109/GEM61861.2024.10585452},
isbn = {9798350374537 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE Gaming, Entertain., Media Conf., GEM},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The emergence of Virtual Reality (VR) and meta-verse have opened doors to new research opportunities and frontiers in User Experience (UX). Within the cultural heritage domain, one of the key concepts is that of the Virtual Museums (VMs), whose definition has been extended through time by many research works and applications. However, most of the studies performed so far focused on only one application, and studied its UX without taking into account the experience with other VR experiences possibly available in the VM. The purpose of this work is to give a contribution for an optimal design to create a unified UX across multiple VR experiences. More specifically, the research included the development of two applications, respectively a VM in a metaverse platform and a virtual learning workshop as an individual application. With this premise, the study will also consider two fundamental elements for an effective UX design: a Virtual Environment (VE) and an Intelligent Virtual Avatar (IVA). In particular, the latest was developed following current trends about generative AI, integrating an IVA powered by a Large Language Model (LLM). © 2024 Elsevier B.V., All rights reserved.},
keywords = {Cultural heritages, Meta-museum, Meta-museums, Metaverse, Metaverses, Research frontiers, Research opportunities, user experience, User experience design, User interfaces, User-Experience Design, Users' experiences, Virtual avatar, Virtual machine, Virtual museum, Virtual Reality, Virtual reality experiences},
pubstate = {published},
tppubtype = {inproceedings}
}
Guo, Y.; Hou, K.; Yan, Z.; Chen, H.; Xing, G.; Jiang, X.
Sensor2Scene: Foundation Model-Driven Interactive Realities Proceedings Article
In: Proc. - IEEE Int. Workshop Found. Model. Cyber-Phys. Syst. Internet Things, FMSys, pp. 13–19, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350363456 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, Augmented Reality, Computational Linguistics, Data integration, Data visualization, Foundation models, Generative model, Language Model, Large language model, large language models, Model-driven, Sensor Data Integration, Sensors data, Text-to-3d generative model, Text-to-3D Generative Models, Three dimensional computer graphics, User interaction, User Interaction in AR, User interaction in augmented reality, User interfaces, Virtual Reality, Visualization
@inproceedings{guo_sensor2scene_2024,
title = {Sensor2Scene: Foundation Model-Driven Interactive Realities},
author = {Y. Guo and K. Hou and Z. Yan and H. Chen and G. Xing and X. Jiang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199893762&doi=10.1109%2FFMSys62467.2024.00007&partnerID=40&md5=26eefef9074cb0c6f6aa75572cbd78ed},
doi = {10.1109/FMSys62467.2024.00007},
isbn = {9798350363456 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Workshop Found. Model. Cyber-Phys. Syst. Internet Things, FMSys},
pages = {13–19},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Augmented Reality (AR) is acclaimed for its potential to bridge the physical and virtual worlds. Yet, current integration between these realms often lacks a deep under-standing of the physical environment and the subsequent scene generation that reflects this understanding. This research introduces Sensor2Scene, a novel system framework designed to enhance user interactions with sensor data through AR. At its core, an AI agent leverages large language models (LLMs) to decode subtle information from sensor data, constructing detailed scene descriptions for visualization. To enable these scenes to be rendered in AR, we decompose the scene creation process into tasks of text-to-3D model generation and spatial composition, allowing new AR scenes to be sketched from the descriptions. We evaluated our framework using an LLM evaluator based on five metrics on various datasets to examine the correlation between sensor readings and corresponding visualizations, and demonstrated the system's effectiveness with scenes generated from end-to-end. The results highlight the potential of LLMs to understand IoT sensor data. Furthermore, generative models can aid in transforming these interpretations into visual formats, thereby enhancing user interaction. This work not only displays the capabilities of Sensor2Scene but also lays a foundation for advancing AR with the goal of creating more immersive and contextually rich experiences. © 2024 Elsevier B.V., All rights reserved.},
keywords = {3D modeling, Augmented Reality, Computational Linguistics, Data integration, Data visualization, Foundation models, Generative model, Language Model, Large language model, large language models, Model-driven, Sensor Data Integration, Sensors data, Text-to-3d generative model, Text-to-3D Generative Models, Three dimensional computer graphics, User interaction, User Interaction in AR, User interaction in augmented reality, User interfaces, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Geetha, S.; Aditya, G.; Reddy, M. Chetan; Nischith, G.
Human Interaction in Virtual and Mixed Reality Through Hand Tracking Proceedings Article
In: Proc. CONECCT - IEEE Int. Conf. Electron., Comput. Commun. Technol., Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350385922 (ISBN).
Abstract | Links | BibTeX | Tags: Computer interaction, Computer simulation languages, Daily lives, Digital elevation model, Hand gesture, hand tracking, Hand-tracking, human-computer interaction, Humaninteraction, Interaction dynamics, Mixed reality, Unity, User friendly interface, User interfaces, Virtual environments, Virtual Reality, Virtual spaces
@inproceedings{geetha_human_2024,
title = {Human Interaction in Virtual and Mixed Reality Through Hand Tracking},
author = {S. Geetha and G. Aditya and M. Chetan Reddy and G. Nischith},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85205768661&doi=10.1109%2FCONECCT62155.2024.10677239&partnerID=40&md5=10a6cb2b19648071937ae24e789d05a4},
doi = {10.1109/CONECCT62155.2024.10677239},
isbn = {9798350385922 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. CONECCT - IEEE Int. Conf. Electron., Comput. Commun. Technol.},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper explores the potential and possibilities of hand tracking in virtual reality (VR) and mixed reality (MR), focusing on its role in human interaction dynamics. An application was designed in Unity leveraging the XR Interaction toolkit, within which various items across three important domains: daily life, education, and recreation, were crafted to demonstrate the versatility of hand tracking along with hand gesture-based shortcuts for interaction. Integration of elements in MR ensures that users can seamlessly enjoy virtual experiences while remaining connected to their physical surroundings. Precise hand tracking enables effortless interaction with the virtual space, enhancing presence and control with a user-friendly interface. Additionally, the paper explores the effectiveness of integrating hand tracking into education and training scenarios. A computer assembly simulation was created to demonstrate this, featuring component inspection and zoom capabilities along with a large language model (LLM) integrated with hand gestures to provide for interaction capabilities. © 2024 Elsevier B.V., All rights reserved.},
keywords = {Computer interaction, Computer simulation languages, Daily lives, Digital elevation model, Hand gesture, hand tracking, Hand-tracking, human-computer interaction, Humaninteraction, Interaction dynamics, Mixed reality, Unity, User friendly interface, User interfaces, Virtual environments, Virtual Reality, Virtual spaces},
pubstate = {published},
tppubtype = {inproceedings}
}
He, Z.; Li, S.; Song, Y.; Cai, Z.
Towards Building Condition-Based Cross-Modality Intention-Aware Human-AI Cooperation under VR Environment Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 979-840070330-0 (ISBN).
Abstract | Links | BibTeX | Tags: Action Generation, Building conditions, Condition, Critical challenges, Cross modality, Human-AI Cooperation, Information presentation, Intention Detection, Language Model, Multi-modal, Purchasing, User interfaces, Virtual Reality
@inproceedings{he_towards_2024,
title = {Towards Building Condition-Based Cross-Modality Intention-Aware Human-AI Cooperation under VR Environment},
author = {Z. He and S. Li and Y. Song and Z. Cai},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194829231&doi=10.1145%2f3613904.3642360&partnerID=40&md5=44d237a6e2a686af74ffb684ef887ab6},
doi = {10.1145/3613904.3642360},
isbn = {979-840070330-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {To address critical challenges in effectively identifying user intent and forming relevant information presentations and recommendations in VR environments, we propose an innovative condition-based multi-modal human-AI cooperation framework. It highlights the intent tuples (intent, condition, intent prompt, action prompt) and 2-Large-Language-Models (2-LLMs) architecture. This design, utilizes “condition” as the core to describe tasks, dynamically match user interactions with intentions, and empower generations of various tailored multi-modal AI responses. The architecture of 2-LLMs separates the roles of intent detection and action generation, decreasing the prompt length and helping with generating appropriate responses. We implemented a VR-based intelligent furniture purchasing system based on the proposed framework and conducted a three-phase comparative user study. The results conclusively demonstrate the system's superiority in time efficiency and accuracy, intention conveyance improvements, effective product acquisitions, and user satisfaction and cooperation preference. Our framework provides a promising approach towards personalized and efficient user experiences in VR. © 2024 Copyright held by the owner/author(s)},
keywords = {Action Generation, Building conditions, Condition, Critical challenges, Cross modality, Human-AI Cooperation, Information presentation, Intention Detection, Language Model, Multi-modal, Purchasing, User interfaces, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Su, X.; Koh, E.; Xiao, C.
SonifyAR: Context-Aware Sound Effect Generation in Augmented Reality Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 9798400703317 (ISBN).
Abstract | Links | BibTeX | Tags: 'current, Augmented Reality, Augmented reality authoring, Authoring Tool, Context information, Context-Aware, Immersiveness, Iterative methods, Mixed reality, Real-world, Sound, Sound effects, User interfaces, Users' experiences
@inproceedings{su_sonifyar_2024,
title = {SonifyAR: Context-Aware Sound Effect Generation in Augmented Reality},
author = {X. Su and E. Koh and C. Xiao},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194146678&doi=10.1145%2F3613905.3650927&partnerID=40&md5=99fb9c578a8855982f5be47bdb2e45c4},
doi = {10.1145/3613905.3650927},
isbn = {9798400703317 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Sound plays crucial roles in enhancing user experience and immersiveness in Augmented Reality (AR). However, current AR authoring platforms lack support for creating sound effects that harmonize with both the virtual and the real-world contexts. In this work, we present SonifyAR, a novel system for generating context-aware sound effects in AR experiences. SonifyAR implements a Programming by Demonstration (PbD) AR authoring pipeline. We utilize computer vision models and a large language model (LLM) to generate text descriptions that incorporate context information of user, virtual object and real world environment. This context information is then used to acquire sound effects with recommendation, generation, and retrieval methods. The acquired sound effects can be tested and assigned to AR events. Our user interface also provides the flexibility to allow users to iteratively explore and fine-tune the sound effects. We conducted a preliminary user study to demonstrate the effectiveness and usability of our system. © 2025 Elsevier B.V., All rights reserved.},
keywords = {'current, Augmented Reality, Augmented reality authoring, Authoring Tool, Context information, Context-Aware, Immersiveness, Iterative methods, Mixed reality, Real-world, Sound, Sound effects, User interfaces, Users' experiences},
pubstate = {published},
tppubtype = {inproceedings}
}
Park, G. W.; Panda, P.; Tankelevitch, L.; Rintel, S.
CoExplorer: Generative AI Powered 2D and 3D Adaptive Interfaces to Support Intentionality in Video Meetings Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 9798400703317 (ISBN).
Abstract | Links | BibTeX | Tags: Adaptive interface, Adaptive user interface, design, Effectiveness, Facilitation, Generative AI, Goal, Goals, Intent recognition, Meeting, meetings, planning, Probes, Speech recognition, User interfaces, Video conferencing, Videoconferencing, Virtual Reality, Windowing
@inproceedings{park_coexplorer_2024,
title = {CoExplorer: Generative AI Powered 2D and 3D Adaptive Interfaces to Support Intentionality in Video Meetings},
author = {G. W. Park and P. Panda and L. Tankelevitch and S. Rintel},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194194151&doi=10.1145%2F3613905.3650797&partnerID=40&md5=f499444bdeb2a2031b185434cd5548ea},
doi = {10.1145/3613905.3650797},
isbn = {9798400703317 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Current online meeting technologies lack holistic support for reducing the effort of planning and running meetings. We present CoExplorer2D and CoExplorerVR, generative AI (GenAI)-driven technology probes for exploring the significant transformative potential of GenAI to augment these aspects of meetings. In each system, before the meeting, these systems generate tools that allow synthesis and ranking of attendees' key issues for discussion, and likely phases that a meeting would require to cover these issues. During the meeting, these systems use speech recognition to generate 2D or VR window layouts with appropriate applications and files for each phase, and recognize the attendees' progress through the meeting's phases. We argue that these probes show the potential of GenAI to contribute to reducing the effort required for planning and running meetings, providing participants with a more engaging and effective meeting experiences. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Adaptive interface, Adaptive user interface, design, Effectiveness, Facilitation, Generative AI, Goal, Goals, Intent recognition, Meeting, meetings, planning, Probes, Speech recognition, User interfaces, Video conferencing, Videoconferencing, Virtual Reality, Windowing},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, X. B.; Li, J. N.; Kim, D.; Chen, X.; Du, R.
Human I/O: Towards a Unified Approach to Detecting Situational Impairments Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 979-840070330-0 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Computational Linguistics, Context awareness, Context- awareness, In contexts, Language Model, Large language model, large language models, Multi tasking, Multimodal sensing, Situational impairment, situational impairments, Specific tasks, Unified approach, User interfaces, Users' experiences, Video recording
@inproceedings{liu_human_2024,
title = {Human I/O: Towards a Unified Approach to Detecting Situational Impairments},
author = {X. B. Liu and J. N. Li and D. Kim and X. Chen and R. Du},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194891045&doi=10.1145%2f3613904.3642065&partnerID=40&md5=01b3ece7c1bc2a758126fce88a15d14e},
doi = {10.1145/3613904.3642065},
isbn = {979-840070330-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Situationally Induced Impairments and Disabilities (SIIDs) can significantly hinder user experience in contexts such as poor lighting, noise, and multi-tasking. While prior research has introduced algorithms and systems to address these impairments, they predominantly cater to specific tasks or environments and fail to accommodate the diverse and dynamic nature of SIIDs. We introduce Human I/O, a unified approach to detecting a wide range of SIIDs by gauging the availability of human input/output channels. Leveraging egocentric vision, multimodal sensing and reasoning with large language models, Human I/O achieves a 0.22 mean absolute error and a 82% accuracy in availability prediction across 60 in-the-wild egocentric video recordings in 32 different scenarios. Furthermore, while the core focus of our work is on the detection of SIIDs rather than the creation of adaptive user interfaces, we showcase the efficacy of our prototype via a user study with 10 participants. Findings suggest that Human I/O significantly reduces effort and improves user experience in the presence of SIIDs, paving the way for more adaptive and accessible interactive systems in the future. © 2024 Copyright held by the owner/author(s)},
keywords = {Augmented Reality, Computational Linguistics, Context awareness, Context- awareness, In contexts, Language Model, Large language model, large language models, Multi tasking, Multimodal sensing, Situational impairment, situational impairments, Specific tasks, Unified approach, User interfaces, Users' experiences, Video recording},
pubstate = {published},
tppubtype = {inproceedings}
}
Geurts, E.; Warson, D.; Ruiz, G. Rovelo
Boosting Motivation in Sports with Data-Driven Visualizations in VR Proceedings Article
In: ACM Int. Conf. Proc. Ser., Association for Computing Machinery, 2024, ISBN: 979-840071764-2 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Asynchronoi social interaction, Asynchronous social interaction, Cycling, Data driven, Dynamics, Extended reality, Group dynamics, Language Model, Large language model, large language models, Motivation, Natural language processing systems, Real-world, Real-world data, Social interactions, Sports, User interface, User interfaces, Virtual Reality, Visualization, Visualizations
@inproceedings{geurts_boosting_2024,
title = {Boosting Motivation in Sports with Data-Driven Visualizations in VR},
author = {E. Geurts and D. Warson and G. Rovelo Ruiz},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195387493&doi=10.1145%2f3656650.3656669&partnerID=40&md5=ec69e7abe61e572a94261ad6bbfed11c},
doi = {10.1145/3656650.3656669},
isbn = {979-840071764-2 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {ACM Int. Conf. Proc. Ser.},
publisher = {Association for Computing Machinery},
abstract = {In recent years, the integration of Artificial Intelligence (AI) has sparked revolutionary progress across diverse domains, with sports applications being no exception. At the same time, using real-world data sources, such as GPS, weather, and traffic data, offers opportunities to improve the overall user engagement and effectiveness of such applications. Despite the substantial advancements, including proven success in mobile applications, there remains an untapped potential in leveraging these technologies to boost motivation and enhance social group dynamics in Virtual Reality (VR) sports solutions. Our innovative approach focuses on harnessing the power of AI and real-world data to facilitate the design of such VR systems. To validate our methodology, we conducted an exploratory study involving 18 participants, evaluating our approach within the context of indoor VR cycling. By incorporating GPX files and omnidirectional video (real-world data), we recreated a lifelike cycling environment in which users can compete with simulated cyclists navigating a chosen (real-world) route. Considering the user's performance and interactions with other cyclists, our system employs AI-driven natural language processing tools to generate encouraging and competitive messages automatically. The outcome of our study reveals a positive impact on motivation, competition dynamics, and the perceived sense of group dynamics when using real performance data alongside automatically generated motivational messages. This underscores the potential of AI-driven enhancements in user interfaces to not only optimize performance but also foster a more engaging and supportive sports environment. © 2024 ACM.},
keywords = {Artificial intelligence, Asynchronoi social interaction, Asynchronous social interaction, Cycling, Data driven, Dynamics, Extended reality, Group dynamics, Language Model, Large language model, large language models, Motivation, Natural language processing systems, Real-world, Real-world data, Social interactions, Sports, User interface, User interfaces, Virtual Reality, Visualization, Visualizations},
pubstate = {published},
tppubtype = {inproceedings}
}