AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Ding, S.; Yalla, J. P.; Chen, Y.
Demo Abstract: RAG-Driven 3D Question Answering in Edge-Assisted Virtual Reality Proceedings Article
In: Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331543709 (ISBN).
Abstract | Links | BibTeX | Tags: Edge computing, Edge server, Interface states, Knowledge database, Language Model, Local knowledge, Office environments, Question Answering, Real- time, User interaction, User interfaces, Virtual environments, Virtual Reality, Virtual reality system, Virtual-reality environment
@inproceedings{ding_demo_2025,
title = {Demo Abstract: RAG-Driven 3D Question Answering in Edge-Assisted Virtual Reality},
author = {S. Ding and J. P. Yalla and Y. Chen},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105017970015&doi=10.1109%2FINFOCOMWKSHPS65812.2025.11152992&partnerID=40&md5=0e079de018ae9c4a564b98c304a9ea6c},
doi = {10.1109/INFOCOMWKSHPS65812.2025.11152992},
isbn = {9798331543709 (ISBN)},
year = {2025},
date = {2025-01-01},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The rapid development of large language models (LLMs) has created new opportunities in 3D question answering (3D-QA) for virtual reality (VR). 3D-QA enhances user interaction by answering questions about virtual environments. However, performing 3D-QA in VR systems using LLM-based approaches is computation-intensive. Furthermore, general LLMs tend to generate inaccurate responses as they lack context-specific information in VR environments. To mitigate these limitations, we propose OfficeVR-QA, a 3D-QA framework for edge-assisted VR to alleviate the resource constraints of VR devices with the help of edge servers, demonstrated in a virtual office environment. To improve the accuracy of the generated answers, the edge server of OfficeVR-QA hosts retrieval-augmented generation (RAG) that augments LLMs with external knowledge retrieved from a local knowledge database extracted from VR environments and users. During an interactive demo, OfficeVR-QA will continuously update the local knowledge database in real time by transmitting participants' position and orientation data to the edge server, enabling adaptive responses to changes in the participants' states. Participants will navigate a VR office environment, interact with a VR user interface to ask questions, and observe the accuracy of dynamic responses based on their real-time state changes. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Edge computing, Edge server, Interface states, Knowledge database, Language Model, Local knowledge, Office environments, Question Answering, Real- time, User interaction, User interfaces, Virtual environments, Virtual Reality, Virtual reality system, Virtual-reality environment},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Zheng, P.; Li, C.; Fan, J.; Wang, L.
In: CIRP Annals, vol. 73, no. 1, pp. 341–344, 2024, ISSN: 00078506 (ISSN).
Abstract | Links | BibTeX | Tags: Collaboration task, Collaborative manufacturing, Deep learning, Helmet mounted displays, Human robots, Human-centric, Human-guided robot learning, Human-Robot Collaboration, Interface states, Manipulators, Manufacturing system, Manufacturing tasks, Mixed reality, Mixed reality head-mounted displays, Reinforcement Learning, Reinforcement learnings, Robot vision, Smart manufacturing
@article{zheng_vision-language-guided_2024,
title = {A vision-language-guided and deep reinforcement learning-enabled approach for unstructured human-robot collaborative manufacturing task fulfilment},
author = {P. Zheng and C. Li and J. Fan and L. Wang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85190754943&doi=10.1016%2fj.cirp.2024.04.003&partnerID=40&md5=59c453e1931e912472e76b86b77a881b},
doi = {10.1016/j.cirp.2024.04.003},
issn = {00078506 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {CIRP Annals},
volume = {73},
number = {1},
pages = {341–344},
abstract = {Human-Robot Collaboration (HRC) has emerged as a pivot in contemporary human-centric smart manufacturing scenarios. However, the fulfilment of HRC tasks in unstructured scenes brings many challenges to be overcome. In this work, mixed reality head-mounted display is modelled as an effective data collection, communication, and state representation interface/tool for HRC task settings. By integrating vision-language cues with large language model, a vision-language-guided HRC task planning approach is firstly proposed. Then, a deep reinforcement learning-enabled mobile manipulator motion control policy is generated to fulfil HRC task primitives. Its feasibility is demonstrated in several HRC unstructured manufacturing tasks with comparative results. © 2024 The Author(s)},
keywords = {Collaboration task, Collaborative manufacturing, Deep learning, Helmet mounted displays, Human robots, Human-centric, Human-guided robot learning, Human-Robot Collaboration, Interface states, Manipulators, Manufacturing system, Manufacturing tasks, Mixed reality, Mixed reality head-mounted displays, Reinforcement Learning, Reinforcement learnings, Robot vision, Smart manufacturing},
pubstate = {published},
tppubtype = {article}
}