AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Saengthongkam, S.; Ali, S.; Chokphantavee, S.; Chokphantavee, S.; Noisri, S.; Vanichchanunt, P.; Butcharoen, S.; Boontevee, S.; Phanomchoeng, G.; Deepaisarn, S.; Wuttisittikulkij, L.
AI-Powered Virtual Assistants in the Metaverse: Leveraging Retrieval-Augmented Generation for Smarter Interactions Proceedings Article
In: Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 9798331522230 (ISBN).
Abstract | Links | BibTeX | Tags: AI, Artificial intelligence, chatbot, Chatbots, Cosine similarity, Intelligent Agents, Load testing, Metaverse, Metaverses, On the spots, Performance, Search engines, Similarity scores, user experience, User query, User support, Users' satisfactions, Virtual assistants, Virtual Reality
@inproceedings{saengthongkam_ai-powered_2025,
title = {AI-Powered Virtual Assistants in the Metaverse: Leveraging Retrieval-Augmented Generation for Smarter Interactions},
author = {S. Saengthongkam and S. Ali and S. Chokphantavee and S. Chokphantavee and S. Noisri and P. Vanichchanunt and S. Butcharoen and S. Boontevee and G. Phanomchoeng and S. Deepaisarn and L. Wuttisittikulkij},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105014379689&doi=10.1109%2FECTI-CON64996.2025.11101141&partnerID=40&md5=3f81fb234377399184ad031c8aa65333},
doi = {10.1109/ECTI-CON64996.2025.11101141},
isbn = {9798331522230 (ISBN)},
year = {2025},
date = {2025-01-01},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The metaverse has evolved at an unprecedented pace, creating new demands for on the spot user support and more engaging digital encounters. This paper describes a chatbot system built for the NT metaverse that combines retrieval-based search with advanced generative AI methods to provide accurate, context-driven responses. At the core of our approach is Retrieval Augmented Generation (RAG), which adeptly interprets diverse user queries while sustaining high performance under concurrent usage, as evidenced by a cosine similarity score of 0.79. In addition to maintaining efficiency during load testing, the system manages compound queries with ease, enhancing user satisfaction in complex virtual environments. Although these results are promising, future upgrades such as integrating voice-based interactions, multilingual support, and adaptive learning could further expand the chatbot's utility. Overall, this study demonstrates the tangible benefits of AI-driven conversational agents in digital realms, laying the groundwork for richer, more intelligent user experiences in emerging metaverse platforms. © 2025 Elsevier B.V., All rights reserved.},
keywords = {AI, Artificial intelligence, chatbot, Chatbots, Cosine similarity, Intelligent Agents, Load testing, Metaverse, Metaverses, On the spots, Performance, Search engines, Similarity scores, user experience, User query, User support, Users' satisfactions, Virtual assistants, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Lv, J.; Słowik, A.; Rani, S.; Kim, B. -G.; Chen, C. -M.; Kumari, S.; Li, K.; Lyu, X.; Jiang, H.
In: Research, vol. 8, 2025, ISSN: 20965168 (ISSN); 26395274 (ISSN), (Publisher: American Association for the Advancement of Science).
Abstract | Links | BibTeX | Tags: Adaptive fusion, Collaborative representations, Diagnosis, Electronic health record, Generative adversarial networks, Health care application, Healthcare environments, Immersive, Learning frameworks, Metaverses, Multi-modal, Multi-modal learning, Performance
@article{lv_multimodal_2025,
title = {Multimodal Metaverse Healthcare: A Collaborative Representation and Adaptive Fusion Approach for Generative Artificial-Intelligence-Driven Diagnosis},
author = {J. Lv and A. Słowik and S. Rani and B. -G. Kim and C. -M. Chen and S. Kumari and K. Li and X. Lyu and H. Jiang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-86000613924&doi=10.34133%2Fresearch.0616&partnerID=40&md5=ce118b548f94bde494051760a217c33c},
doi = {10.34133/research.0616},
issn = {20965168 (ISSN); 26395274 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Research},
volume = {8},
abstract = {The metaverse enables immersive virtual healthcare environments, presenting opportunities for enhanced care delivery. A key challenge lies in effectively combining multimodal healthcare data and generative artificial intelligence abilities within metaverse-based healthcare applications, which is a problem that needs to be addressed. This paper proposes a novel multimodal learning framework for metaverse healthcare, MMLMH, based on collaborative intra- and intersample representation and adaptive fusion. Our framework introduces a collaborative representation learning approach that captures shared and modality-specific features across text, audio, and visual health data. By combining modality-specific and shared encoders with carefully formulated intrasample and intersample collaboration mechanisms, MMLMH achieves superior feature representation for complex health assessments. The framework’s adaptive fusion approach, utilizing attention mechanisms and gated neural networks, demonstrates robust performance across varying noise levels and data quality conditions. Experiments on metaverse healthcare datasets demonstrate MMLMH’s superior performance over baseline methods across multiple evaluation metrics. Longitudinal studies and visualization further illustrate MMLMH’s adaptability to evolving virtual environments and balanced performance across diagnostic accuracy, patient–system interaction efficacy, and data integration complexity. The proposed framework has a unique advantage in that a similar level of performance is maintained across various patient populations and virtual avatars, which could lead to greater personalization of healthcare experiences in the metaverse. MMLMH’s successful functioning in such complicated circumstances suggests that it can combine and process information streams from several sources. They can be successfully utilized in next-generation healthcare delivery through virtual reality. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: American Association for the Advancement of Science},
keywords = {Adaptive fusion, Collaborative representations, Diagnosis, Electronic health record, Generative adversarial networks, Health care application, Healthcare environments, Immersive, Learning frameworks, Metaverses, Multi-modal, Multi-modal learning, Performance},
pubstate = {published},
tppubtype = {article}
}
Kai, W. -H.; Xing, K. -X.
Video-driven musical composition using large language model with memory-augmented state space Journal Article
In: Visual Computer, vol. 41, no. 5, pp. 3345–3357, 2025, ISSN: 01782789 (ISSN); 14322315 (ISSN), (Publisher: Springer Science and Business Media Deutschland GmbH).
Abstract | Links | BibTeX | Tags: 'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space
@article{kai_video-driven_2025,
title = {Video-driven musical composition using large language model with memory-augmented state space},
author = {W. -H. Kai and K. -X. Xing},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001073242&doi=10.1007%2Fs00371-024-03606-w&partnerID=40&md5=71a40ea7584c5a5f210afc1c30aac468},
doi = {10.1007/s00371-024-03606-w},
issn = {01782789 (ISSN); 14322315 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Visual Computer},
volume = {41},
number = {5},
pages = {3345–3357},
abstract = {The current landscape of research leveraging large language models (LLMs) is experiencing a surge. Many works harness the powerful reasoning capabilities of these models to comprehend various modalities, such as text, speech, images, videos, etc. However, the research work on LLms for music inspiration is still in its infancy. To fill the gap in this field and break through the dilemma that LLMs can only understand short videos with limited frames, we propose a large language model with state space for long-term video-to-music generation. To capture long-range dependency and maintaining high performance, while further decrease the computing cost, our overall network includes the Enhanced Video Mamba, which incorporates continuous moving window partitioning and local feature augmentation, and a long-term memory bank that captures and aggregates historical video information to mitigate information loss in long sequences. This framework achieves both subquadratic-time computation and near-linear memory complexity, enabling effective long-term video-to-music generation. We conduct a thorough evaluation of our proposed framework. The experimental results demonstrate that our model achieves or surpasses the performance of the current state-of-the-art models. Our code released on https://github.com/kai211233/S2L2-V2M. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Springer Science and Business Media Deutschland GmbH},
keywords = {'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space},
pubstate = {published},
tppubtype = {article}
}
Lu, J.; Gao, J.; Feng, F.; He, Z.; Zheng, M.; Liu, K.; He, J.; Liao, B.; Xu, S.; Sun, K.; Mo, Y.; Peng, Q.; Luo, J.; Li, Q.; Lu, G.; Wang, Z.; Dong, J.; He, K.; Cheng, S.; Cao, J.; Jiao, H.; Zhang, P.; Ma, S.; Zhu, L.; Shi, C.; Zhang, Y.; Chen, Y.; Wang, W.; Zhu, S.; Li, X.; Wang, Q.; Liu, J.; Wang, C.; Lin, W.; Zhai, E.; Wu, J.; Liu, Q.; Fu, B.; Cai, D.
Alibaba Stellar: A New Generation RDMA Network for Cloud AI Proceedings Article
In: pp. 453–466, Association for Computing Machinery, Inc, 2025, ISBN: 9798400715242 (ISBN).
Abstract | Links | BibTeX | Tags: Access network, Cloud computing, Congestion control (communication), Containers, data center networking, Data center networkings, Language Model, Learning systems, Machine learning applications, Memory architecture, Network support, Network support for AI and machine learning application, network support for AI and machine learning applications, Performance, Program processors, Remote direct memory access, Stellars, Transport and congestion control, Virtual Reality, Virtualization
@inproceedings{lu_alibaba_2025,
title = {Alibaba Stellar: A New Generation RDMA Network for Cloud AI},
author = {J. Lu and J. Gao and F. Feng and Z. He and M. Zheng and K. Liu and J. He and B. Liao and S. Xu and K. Sun and Y. Mo and Q. Peng and J. Luo and Q. Li and G. Lu and Z. Wang and J. Dong and K. He and S. Cheng and J. Cao and H. Jiao and P. Zhang and S. Ma and L. Zhu and C. Shi and Y. Zhang and Y. Chen and W. Wang and S. Zhu and X. Li and Q. Wang and J. Liu and C. Wang and W. Lin and E. Zhai and J. Wu and Q. Liu and B. Fu and D. Cai},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105016208536&doi=10.1145%2F3718958.3750539&partnerID=40&md5=901fdd27c510072633f0390a0abfb653},
doi = {10.1145/3718958.3750539},
isbn = {9798400715242 (ISBN)},
year = {2025},
date = {2025-01-01},
pages = {453–466},
publisher = {Association for Computing Machinery, Inc},
abstract = {The rapid adoption of Large Language Models (LLMs) in cloud environments has intensified the demand for high-performance AI training and inference, where Remote Direct Memory Access (RDMA) plays a critical role. However, existing RDMA virtualization solutions, such as Single-Root Input/Output Virtualization (SR-IOV), face significant limitations in scalability, performance, and stability. These issues include lengthy container initialization times, hardware resource constraints, and inefficient traffic steering. To address these challenges, we propose Stellar, a new generation RDMA network for cloud AI. Stellar introduces three key innovations: Para-Virtualized Direct Memory Access (PVDMA) for on-demand memory pinning, extended Memory Translation Table (eMTT) for optimized GPU Direct RDMA (GDR) performance, and RDMA Packet Spray for efficient multi-path utilization. Deployed in our large-scale AI clusters, Stellar spins up virtual devices in seconds, reduces container initialization time by 15 times, and improves LLM training speed by up to 14%. Our evaluations demonstrate that Stellar significantly outperforms existing solutions, offering a scalable, stable, and high-performance RDMA network for cloud AI. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Access network, Cloud computing, Congestion control (communication), Containers, data center networking, Data center networkings, Language Model, Learning systems, Machine learning applications, Memory architecture, Network support, Network support for AI and machine learning application, network support for AI and machine learning applications, Performance, Program processors, Remote direct memory access, Stellars, Transport and congestion control, Virtual Reality, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Lee, L. -K.; Chan, E. H.; Tong, K. K. -L.; Wong, N. K. -H.; Wu, B. S. -Y.; Fung, Y. -C.; Fong, E. K. S.; U, U.; Wu, N. -I.
Utilizing Virtual Reality and Generative AI Chatbot for Job Interview Simulations Proceedings Article
In: Chui, K. T.; Hui, Y. K.; Yang, D.; Lee, L. -K.; Wong, L. -P.; Reynolds, B. L. (Ed.): Proc. - Int. Symp. Educ. Technol., ISET, pp. 209–212, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350361414 (ISBN).
Abstract | Links | BibTeX | Tags: chatbot, Chatbots, Computer interaction, Computer simulation languages, Generative adversarial networks, Generative AI, Hong-kong, Human computer interaction, ITS applications, Job interview simulation, Job interviews, Performance, Science graduates, User friendliness, Virtual environments, Virtual Reality
@inproceedings{lee_utilizing_2024,
title = {Utilizing Virtual Reality and Generative AI Chatbot for Job Interview Simulations},
author = {L. -K. Lee and E. H. Chan and K. K. -L. Tong and N. K. -H. Wong and B. S. -Y. Wu and Y. -C. Fung and E. K. S. Fong and U. U and N. -I. Wu},
editor = {K. T. Chui and Y. K. Hui and D. Yang and L. -K. Lee and L. -P. Wong and B. L. Reynolds},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85206582338&doi=10.1109%2FISET61814.2024.00048&partnerID=40&md5=c099b8565f348c8bf250c0a9e62cf864},
doi = {10.1109/ISET61814.2024.00048},
isbn = {9798350361414 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - Int. Symp. Educ. Technol., ISET},
pages = {209–212},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Stress and anxiety experienced by interviewees, particularly fresh graduates, would significantly impact their performance in job interviews. Due to the increased affordability and user-friendliness of virtual reality (VR), VR has seen a surge in its application within the educational sector. This paper presents the design and implementation of a job interview simulation system, leveraging VR and a generative AI chatbot to provide an immersive environment for computer science graduates in Hong Kong. The system aims to help graduates practice and familiarize themselves with various real-world scenarios of a job interview in English, Mandarin, and Cantonese, tailored to the unique language requirements of Hong Kong's professional environment. The system comprises three core modules: a mock question and answer reading module, an AI speech analysis module, and a virtual interview module facilitated by the generative AI chatbot, ChatGPT. We anticipate that the proposed simulator will provide valuable insights to education practitioners on utilizing VR and generative AI for job interview training, extending beyond computer science graduates. © 2024 Elsevier B.V., All rights reserved.},
keywords = {chatbot, Chatbots, Computer interaction, Computer simulation languages, Generative adversarial networks, Generative AI, Hong-kong, Human computer interaction, ITS applications, Job interview simulation, Job interviews, Performance, Science graduates, User friendliness, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Kang, Z.; Liu, Y.; Zheng, J.; Sun, Z.
Revealing the Difficulty in Jailbreak Defense on Language Models for Metaverse Proceedings Article
In: Gong, Q.; He, X. (Ed.): SocialMeta - Proc. Int. Workshop Soc. Metaverse Comput., Sens. Netw., Part: ACM SenSys, pp. 31–37, Association for Computing Machinery, Inc, 2024, ISBN: 9798400712999 (ISBN).
Abstract | Links | BibTeX | Tags: % reductions, Attack strategies, Computer simulation languages, Defense, Digital elevation model, Guard rails, Jailbreak, Language Model, Large language model, Metaverse Security, Metaverses, Natural languages, Performance, Virtual Reality
@inproceedings{kang_revealing_2024,
title = {Revealing the Difficulty in Jailbreak Defense on Language Models for Metaverse},
author = {Z. Kang and Y. Liu and J. Zheng and Z. Sun},
editor = {Q. Gong and X. He},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85212189363&doi=10.1145%2F3698387.3699998&partnerID=40&md5=7a7b1260748719041c58ac9e22e79633},
doi = {10.1145/3698387.3699998},
isbn = {9798400712999 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {SocialMeta - Proc. Int. Workshop Soc. Metaverse Comput., Sens. Netw., Part: ACM SenSys},
pages = {31–37},
publisher = {Association for Computing Machinery, Inc},
abstract = {Large language models (LLMs) have demonstrated exceptional capabilities in natural language processing tasks, fueling innovations in emerging areas such as the metaverse. These models enable dynamic virtual communities, enhancing user interactions and revolutionizing industries. However, their increasing deployment exposes vulnerabilities to jailbreak attacks, where adversaries can manipulate LLM-driven systems to generate harmful content. While various defense mechanisms have been proposed, their efficacy against diverse jailbreak techniques remains unclear. This paper addresses this gap by evaluating the performance of three popular defense methods (Backtranslation, Self-reminder, and Paraphrase) against different jailbreak attack strategies (GCG, BEAST, and Deepinception), while also utilizing three distinct models. Our findings reveal that while defenses are highly effective against optimization-based jailbreak attacks and reduce the attack success rate by 79% on average, they struggle in defending against attacks that alter attack motivations. Additionally, methods relying on self-reminding perform better when integrated with models featuring robust safety guardrails. For instance, Llama2-7b shows a 100% reduction in Attack Success Rate, while Vicuna-7b and Mistral-7b, lacking safety alignment, exhibit a lower average reduction of 65.8%. This study highlights the challenges in developing universal defense solutions for securing LLMs in dynamic environments like the metaverse. Furthermore, our study highlights that the three distinct models utilized demonstrate varying initial defense performance against different jailbreak attack strategies, underscoring the complexity of effectively securing LLMs. © 2024 Elsevier B.V., All rights reserved.},
keywords = {% reductions, Attack strategies, Computer simulation languages, Defense, Digital elevation model, Guard rails, Jailbreak, Language Model, Large language model, Metaverse Security, Metaverses, Natural languages, Performance, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Si, J.; Song, J.; Woo, M.; Kim, D.; Lee, Y.; Kim, S.
Generative AI Models for Virtual Interviewers: Applicability and Performance Comparison Proceedings Article
In: IET. Conf. Proc., pp. 27–28, Institution of Engineering and Technology, 2023, ISBN: 27324494 (ISSN).
Abstract | Links | BibTeX | Tags: 3D Generation, College admissions, Digital elevation model, Effective practices, Generative AI, Job hunting, Metaverse, Metaverses, Performance, Performance comparison, Virtual environments, Virtual Interview, Virtual Reality
@inproceedings{si_generative_2023,
title = {Generative AI Models for Virtual Interviewers: Applicability and Performance Comparison},
author = {J. Si and J. Song and M. Woo and D. Kim and Y. Lee and S. Kim},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85203492324&doi=10.1049%2ficp.2024.0193&partnerID=40&md5=84eb48f6b51c941da9c77fa3aba46262},
doi = {10.1049/icp.2024.0193},
isbn = {27324494 (ISSN)},
year = {2023},
date = {2023-01-01},
booktitle = {IET. Conf. Proc.},
volume = {2023},
pages = {27–28},
publisher = {Institution of Engineering and Technology},
abstract = {Interviewing processes are considered crucial steps in job hunting or college admissions, and effective practice plays a significant role in successfully navigating these stages. Although various platforms have recently emerged for practicing virtual interviews, they often lack the tension and realism of actual interviews due to repetitive and formal content. This study aims to analyze and compare the performance of different generative AI models for creating a diverse set of virtual interviewers. Specifically, we examine the characteristics and applicability of each model, as well as the differences and advantages between them, and evaluate the performance of the generated virtual interviewers. Through this analysis, we aim to propose solutions for enhancing the practicality and efficiency of virtual interviews. © The Institution of Engineering & Technology 2023.},
keywords = {3D Generation, College admissions, Digital elevation model, Effective practices, Generative AI, Job hunting, Metaverse, Metaverses, Performance, Performance comparison, Virtual environments, Virtual Interview, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}