AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Li, C.; Da, F.
Refined dense face alignment through image matching Journal Article
In: Visual Computer, vol. 41, no. 1, pp. 157–171, 2025, ISSN: 01782789 (ISSN).
Abstract | Links | BibTeX | Tags: 3D Avatars, Alignment, Dense geometric supervision, Face alignment, Face deformations, Face reconstruction, Geometry, Human computer interaction, Image enhancement, Image matching, Image Reconstruction, Metaverses, Outlier mixup, Pixels, Rendered images, Rendering (computer graphics), State of the art, Statistics, Target images, Three dimensional computer graphics
@article{li_refined_2025,
title = {Refined dense face alignment through image matching},
author = {C. Li and F. Da},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187924785&doi=10.1007%2fs00371-024-03316-3&partnerID=40&md5=839834c6ff3320398d5ef75b055947cb},
doi = {10.1007/s00371-024-03316-3},
issn = {01782789 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Visual Computer},
volume = {41},
number = {1},
pages = {157–171},
abstract = {Face alignment is the foundation of building 3D avatars for virtue communication in the metaverse, human-computer interaction, AI-generated content, etc., and therefore, it is critical that face deformation is reflected precisely to better convey expression, pose and identity. However, misalignment exists in the currently best methods that fit a face model to a target image and can be easily captured by human perception, thus degrading the reconstruction quality. The main reason is that the widely used metrics for training, including the landmark re-projection loss, pixel-wise loss and perception-level loss, are insufficient to address the misalignment and suffer from ambiguity and local minimums. To address misalignment, we propose an image MAtchinG-driveN dEnse geomeTrIC supervision (MAGNETIC). Specifically, we treat face alignment as a matching problem and establish pixel-wise correspondences between the target and rendered images. Then reconstructed facial points are guided towards their corresponding points on the target image, thus improving reconstruction. Synthesized image pairs are mixed up with face outliers to simulate the target and rendered images with ground-truth pixel-wise correspondences to enable the training of a robust prediction network. Compared with existing methods that turn to 3D scans for dense geometric supervision, our method reaches comparable shape reconstruction results with much lower effort. Experimental results on the NoW testset show that we reach the state-of-the-art among all self-supervised methods and even outperform methods using photo-realistic images. We also achieve comparable results with the state-of-the-art on the benchmark of Feng et al. Codes will be available at: github.com/ChunLLee/ReconstructionFromMatching. © The Author(s), under exclusive licence to Springer-Verlag GmbH Germany, part of Springer Nature 2024.},
keywords = {3D Avatars, Alignment, Dense geometric supervision, Face alignment, Face deformations, Face reconstruction, Geometry, Human computer interaction, Image enhancement, Image matching, Image Reconstruction, Metaverses, Outlier mixup, Pixels, Rendered images, Rendering (computer graphics), State of the art, Statistics, Target images, Three dimensional computer graphics},
pubstate = {published},
tppubtype = {article}
}
2024
Weerasinghe, K.; Janapati, S.; Ge, X.; Kim, S.; Iyer, S.; Stankovic, J. A.; Alemzadeh, H.
Real-Time Multimodal Cognitive Assistant for Emergency Medical Services Proceedings Article
In: Proc. - ACM/IEEE Conf. Internet-of-Things Des. Implement., IoTDI, pp. 85–96, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037025-6 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Cognitive Assistance, Computational Linguistics, Decision making, Domain knowledge, Edge computing, Emergency medical services, Forecasting, Graphic methods, Language Model, machine learning, Machine-learning, Multi-modal, Real- time, Service protocols, Smart Health, Speech recognition, State of the art
@inproceedings{weerasinghe_real-time_2024,
title = {Real-Time Multimodal Cognitive Assistant for Emergency Medical Services},
author = {K. Weerasinghe and S. Janapati and X. Ge and S. Kim and S. Iyer and J. A. Stankovic and H. Alemzadeh},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85197769304&doi=10.1109%2fIoTDI61053.2024.00012&partnerID=40&md5=a3b7cf14e46ecb2d4e49905fb845f2c9},
doi = {10.1109/IoTDI61053.2024.00012},
isbn = {979-835037025-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - ACM/IEEE Conf. Internet-of-Things Des. Implement., IoTDI},
pages = {85–96},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Emergency Medical Services (EMS) responders often operate under time-sensitive conditions, facing cognitive overload and inherent risks, requiring essential skills in critical thinking and rapid decision-making. This paper presents CognitiveEMS, an end-to-end wearable cognitive assistant system that can act as a collaborative virtual partner engaging in the real-time acquisition and analysis of multimodal data from an emergency scene and interacting with EMS responders through Augmented Reality (AR) smart glasses. CognitiveEMS processes the continuous streams of data in real-time and leverages edge computing to provide assistance in EMS protocol selection and intervention recognition. We address key technical challenges in real-time cognitive assistance by introducing three novel components: (i) a Speech Recognition model that is fine-tuned for real-world medical emergency conversations using simulated EMS audio recordings, augmented with synthetic data generated by large language models (LLMs); (ii) an EMS Protocol Prediction model that combines state-of-the-art (SOTA) tiny language models with EMS domain knowledge using graph-based attention mechanisms; (iii) an EMS Action Recognition module which leverages multimodal audio and video data and protocol predictions to infer the intervention/treatment actions taken by the responders at the incident scene. Our results show that for speech recognition we achieve superior performance compared to SOTA (WER of 0.290 vs. 0.618) on conversational data. Our protocol prediction component also significantly outperforms SOTA (top-3 accuracy of 0.800 vs. 0.200) and the action recognition achieves an accuracy of 0.727, while maintaining an end-to-end latency of 3.78s for protocol prediction on the edge and 0.31s on the server. © 2024 IEEE.},
keywords = {Artificial intelligence, Augmented Reality, Cognitive Assistance, Computational Linguistics, Decision making, Domain knowledge, Edge computing, Emergency medical services, Forecasting, Graphic methods, Language Model, machine learning, Machine-learning, Multi-modal, Real- time, Service protocols, Smart Health, Speech recognition, State of the art},
pubstate = {published},
tppubtype = {inproceedings}
}
Sahebnasi, M. J.; Farrokhimaleki, M.; Wang, N.; Zhao, R.; Maurer, F.
Exploring the Potential of Generative AI in Prototyping XR Applications Proceedings Article
In: N., Wang; A., Bellucci; C., Anthes; P., Daeijavad; J., Friedl-Knirsch; F., Maurer; F., Pointecker; L.D., Spano (Ed.): CEUR Workshop Proc., CEUR-WS, 2024, ISBN: 16130073 (ISSN).
Abstract | Links | BibTeX | Tags: AI techniques, Extended reality, generative artificial intelligence, Prototyping, Prototyping process, Scene composition, Software prototyping, State of the art
@inproceedings{sahebnasi_exploring_2024,
title = {Exploring the Potential of Generative AI in Prototyping XR Applications},
author = {M. J. Sahebnasi and M. Farrokhimaleki and N. Wang and R. Zhao and F. Maurer},
editor = {Wang N. and Bellucci A. and Anthes C. and Daeijavad P. and Friedl-Knirsch J. and Maurer F. and Pointecker F. and Spano L.D.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85196093148&partnerID=40&md5=a6264c6add18b5cd0ff99a0d3b25b822},
isbn = {16130073 (ISSN)},
year = {2024},
date = {2024-01-01},
booktitle = {CEUR Workshop Proc.},
volume = {3704},
publisher = {CEUR-WS},
abstract = {This paper presents the initial stage of our research to develop a novel approach to streamline the prototyping of Extended Reality applications using generative AI models. We introduce a tool that leverages state-of-the-art generative AI techniques to facilitate the prototyping process, including 3D asset generation and scene composition. The tool allows users to verbally articulate their prototypes, which are then generated by an AI model. We aim to make the development of XR applications more efficient by empowering the designers to gather early feedback from users through rapidly developed prototypes. © 2024 Copyright for this paper by its authors.},
keywords = {AI techniques, Extended reality, generative artificial intelligence, Prototyping, Prototyping process, Scene composition, Software prototyping, State of the art},
pubstate = {published},
tppubtype = {inproceedings}
}