AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2024
Weerasinghe, K.; Janapati, S.; Ge, X.; Kim, S.; Iyer, S.; Stankovic, J. A.; Alemzadeh, H.
Real-Time Multimodal Cognitive Assistant for Emergency Medical Services Proceedings Article
In: Proc. - ACM/IEEE Conf. Internet-of-Things Des. Implement., IoTDI, pp. 85–96, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037025-6 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Cognitive Assistance, Computational Linguistics, Decision making, Domain knowledge, Edge computing, Emergency medical services, Forecasting, Graphic methods, Language Model, machine learning, Machine-learning, Multi-modal, Real- time, Service protocols, Smart Health, Speech recognition, State of the art
@inproceedings{weerasinghe_real-time_2024,
title = {Real-Time Multimodal Cognitive Assistant for Emergency Medical Services},
author = {K. Weerasinghe and S. Janapati and X. Ge and S. Kim and S. Iyer and J. A. Stankovic and H. Alemzadeh},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85197769304&doi=10.1109%2fIoTDI61053.2024.00012&partnerID=40&md5=a3b7cf14e46ecb2d4e49905fb845f2c9},
doi = {10.1109/IoTDI61053.2024.00012},
isbn = {979-835037025-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - ACM/IEEE Conf. Internet-of-Things Des. Implement., IoTDI},
pages = {85–96},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Emergency Medical Services (EMS) responders often operate under time-sensitive conditions, facing cognitive overload and inherent risks, requiring essential skills in critical thinking and rapid decision-making. This paper presents CognitiveEMS, an end-to-end wearable cognitive assistant system that can act as a collaborative virtual partner engaging in the real-time acquisition and analysis of multimodal data from an emergency scene and interacting with EMS responders through Augmented Reality (AR) smart glasses. CognitiveEMS processes the continuous streams of data in real-time and leverages edge computing to provide assistance in EMS protocol selection and intervention recognition. We address key technical challenges in real-time cognitive assistance by introducing three novel components: (i) a Speech Recognition model that is fine-tuned for real-world medical emergency conversations using simulated EMS audio recordings, augmented with synthetic data generated by large language models (LLMs); (ii) an EMS Protocol Prediction model that combines state-of-the-art (SOTA) tiny language models with EMS domain knowledge using graph-based attention mechanisms; (iii) an EMS Action Recognition module which leverages multimodal audio and video data and protocol predictions to infer the intervention/treatment actions taken by the responders at the incident scene. Our results show that for speech recognition we achieve superior performance compared to SOTA (WER of 0.290 vs. 0.618) on conversational data. Our protocol prediction component also significantly outperforms SOTA (top-3 accuracy of 0.800 vs. 0.200) and the action recognition achieves an accuracy of 0.727, while maintaining an end-to-end latency of 3.78s for protocol prediction on the edge and 0.31s on the server. © 2024 IEEE.},
keywords = {Artificial intelligence, Augmented Reality, Cognitive Assistance, Computational Linguistics, Decision making, Domain knowledge, Edge computing, Emergency medical services, Forecasting, Graphic methods, Language Model, machine learning, Machine-learning, Multi-modal, Real- time, Service protocols, Smart Health, Speech recognition, State of the art},
pubstate = {published},
tppubtype = {inproceedings}
}
Park, G. W.; Panda, P.; Tankelevitch, L.; Rintel, S.
CoExplorer: Generative AI Powered 2D and 3D Adaptive Interfaces to Support Intentionality in Video Meetings Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2024, ISBN: 979-840070331-7 (ISBN).
Abstract | Links | BibTeX | Tags: Adaptive interface, Adaptive user interface, design, Effectiveness, Facilitation, Generative AI, Goal, Goals, Intent recognition, Meeting, meetings, planning, Probes, Speech recognition, User interfaces, Video conferencing, Videoconferencing, Virtual Reality, Windowing
@inproceedings{park_coexplorer_2024,
title = {CoExplorer: Generative AI Powered 2D and 3D Adaptive Interfaces to Support Intentionality in Video Meetings},
author = {G. W. Park and P. Panda and L. Tankelevitch and S. Rintel},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85194194151&doi=10.1145%2f3613905.3650797&partnerID=40&md5=19d083f7aee201cc73fe0c2386f04bb6},
doi = {10.1145/3613905.3650797},
isbn = {979-840070331-7 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {Current online meeting technologies lack holistic support for reducing the effort of planning and running meetings. We present CoExplorer2D and CoExplorerVR, generative AI (GenAI)-driven technology probes for exploring the significant transformative potential of GenAI to augment these aspects of meetings. In each system, before the meeting, these systems generate tools that allow synthesis and ranking of attendees' key issues for discussion, and likely phases that a meeting would require to cover these issues. During the meeting, these systems use speech recognition to generate 2D or VR window layouts with appropriate applications and files for each phase, and recognize the attendees' progress through the meeting's phases. We argue that these probes show the potential of GenAI to contribute to reducing the effort required for planning and running meetings, providing participants with a more engaging and effective meeting experiences. © 2024 Association for Computing Machinery. All rights reserved.},
keywords = {Adaptive interface, Adaptive user interface, design, Effectiveness, Facilitation, Generative AI, Goal, Goals, Intent recognition, Meeting, meetings, planning, Probes, Speech recognition, User interfaces, Video conferencing, Videoconferencing, Virtual Reality, Windowing},
pubstate = {published},
tppubtype = {inproceedings}
}
Imamura, S.; Hiraki, H.; Rekimoto, J.
Serendipity Wall: A Discussion Support System Using Real-Time Speech Recognition and Large Language Model Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 588–590, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037449-0 (ISBN).
Abstract | Links | BibTeX | Tags: Brainstorming sessions, Discussion support, Embeddings, Group discussions, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Language Model, Large displays, Real- time, Speech recognition, Support systems, Virtual Reality
@inproceedings{imamura_serendipity_2024,
title = {Serendipity Wall: A Discussion Support System Using Real-Time Speech Recognition and Large Language Model},
author = {S. Imamura and H. Hiraki and J. Rekimoto},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195557406&doi=10.1109%2fVRW62533.2024.00113&partnerID=40&md5=22c393aa1ea99a9e64d382f1b56fb877},
doi = {10.1109/VRW62533.2024.00113},
isbn = {979-835037449-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {588–590},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Group discussions are important for exploring new ideas. One method to support discussions is presenting relevant keywords or images. However, the context of the conversation and information tended not to be taken into account. Therefore, we propose a system that develops group discussions by presenting related information in response to discussions. As a specific example, this study addressed academic discussions among HCI researchers. During brainstorming sessions, the system continuously transcribes the dialogue and generates embedding vectors of the discussions. These vectors are matched against those of existing research articles to identify relevant studies. Then, the system presented relevant studies on the large display with summarizing by an LLM. In a case study, this system had the effect of broadening the topics of discussion and facilitating the acquisition of new knowledge. A larger display area is desirable in terms of information volume and size. Therefore, in addition to large displays, virtual reality environments with headsets could be suitable for this system. © 2024 IEEE.},
keywords = {Brainstorming sessions, Discussion support, Embeddings, Group discussions, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Language Model, Large displays, Real- time, Speech recognition, Support systems, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Liang, X.; Wang, Y.; Yan, F.; Ouyang, Z.; Hu, Y.; Luo, S.
Reborn of the White Bone Demon: Role-Playing Game Design Using Generative AI in XR Proceedings Article
In: S.N., Spencer (Ed.): Proc. - SIGGRAPH Asia Posters, SA, Association for Computing Machinery, Inc, 2024, ISBN: 979-840071138-1 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence techniques, Emotion Recognition, Game design, Gaming experiences, Real- time, Role-playing game, Speech emotion recognition, Speech enhancement, Speech recognition, Time based
@inproceedings{liang_reborn_2024,
title = {Reborn of the White Bone Demon: Role-Playing Game Design Using Generative AI in XR},
author = {X. Liang and Y. Wang and F. Yan and Z. Ouyang and Y. Hu and S. Luo},
editor = {Spencer S.N.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85215520655&doi=10.1145%2f3681756.3697949&partnerID=40&md5=a255cbdfd881f70df82341875f16d546},
doi = {10.1145/3681756.3697949},
isbn = {979-840071138-1 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - SIGGRAPH Asia Posters, SA},
publisher = {Association for Computing Machinery, Inc},
abstract = {This paper explores the application of Generative Artificial Intelligence (GenAI) techniques to the design of Role Playing Games (RPGs) in Extended Reality (XR) environments. We developed the game Reborn of the White Bone Demon, which utilizes AI speech emotion recognition technology to generate story lines and game assets in real-time based on the player's conversations with NPCs, enhancing the player's immersion and personalized experience, demonstrating the potential of GenAI in enhancing the XR gaming experience. © 2024 Copyright held by the owner/author(s).},
keywords = {Artificial intelligence techniques, Emotion Recognition, Game design, Gaming experiences, Real- time, Role-playing game, Speech emotion recognition, Speech enhancement, Speech recognition, Time based},
pubstate = {published},
tppubtype = {inproceedings}
}
Christiansen, F. R.; Hollensberg, L. Nø.; Jensen, N. B.; Julsgaard, K.; Jespersen, K. N.; Nikolov, I.
Exploring Presence in Interactions with LLM-Driven NPCs: A Comparative Study of Speech Recognition and Dialogue Options Proceedings Article
In: S.N., Spencer (Ed.): Proc. ACM Symp. Virtual Reality Softw. Technol. VRST, Association for Computing Machinery, 2024, ISBN: 979-840070535-9 (ISBN).
Abstract | Links | BibTeX | Tags: Comparatives studies, Computer simulation languages, Economic and social effects, Immersive System, Immersive systems, Language Model, Large language model, Large language models (LLM), Model-driven, Modern technologies, Non-playable character, NPC, Presence, Social Actors, Speech enhancement, Speech recognition, Text to speech, Virtual environments, Virtual Reality, VR
@inproceedings{christiansen_exploring_2024,
title = {Exploring Presence in Interactions with LLM-Driven NPCs: A Comparative Study of Speech Recognition and Dialogue Options},
author = {F. R. Christiansen and L. Nø. Hollensberg and N. B. Jensen and K. Julsgaard and K. N. Jespersen and I. Nikolov},
editor = {Spencer S.N.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85212512351&doi=10.1145%2f3641825.3687716&partnerID=40&md5=56ec6982b399fd97196ea73e7c659c31},
doi = {10.1145/3641825.3687716},
isbn = {979-840070535-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. ACM Symp. Virtual Reality Softw. Technol. VRST},
publisher = {Association for Computing Machinery},
abstract = {Combining modern technologies like large-language models (LLMs), speech-to-text, and text-to-speech can enhance immersion in virtual reality (VR) environments. However, challenges exist in effectively implementing LLMs and educating users. This paper explores implementing LLM-powered virtual social actors and facilitating user communication. We developed a murder mystery game where users interact with LLM-based non-playable characters (NPCs) through interrogation, clue-gathering, and exploration. Two versions were tested: one using speech recognition and another with traditional dialog boxes. While both provided similar social presence, users felt more immersed with speech recognition but found it overwhelming, while the dialog version was more challenging. Slow NPC response times were a source of frustration, highlighting the need for faster generation or better masking for a seamless experience. © 2024 Owner/Author.},
keywords = {Comparatives studies, Computer simulation languages, Economic and social effects, Immersive System, Immersive systems, Language Model, Large language model, Large language models (LLM), Model-driven, Modern technologies, Non-playable character, NPC, Presence, Social Actors, Speech enhancement, Speech recognition, Text to speech, Virtual environments, Virtual Reality, VR},
pubstate = {published},
tppubtype = {inproceedings}
}