AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Pielage, L.; Schmidle, P.; Marschall, B.; Risse, B.
Interactive High-Quality Skin Lesion Generation using Diffusion Models for VR-based Dermatological Education Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 878–897, Association for Computing Machinery, 2025, ISBN: 979-840071306-4 (ISBN).
Abstract | Links | BibTeX | Tags: Deep learning, Dermatology, Diffusion Model, diffusion models, Digital elevation model, Generative AI, Graphical user interfaces, Guidance Strategies, Guidance strategy, Image generation, Image generations, Inpainting, Interactive Generation, Medical education, Medical Imaging, Simulation training, Skin lesion, Upsampling, Virtual environments, Virtual Reality
@inproceedings{pielage_interactive_2025,
title = {Interactive High-Quality Skin Lesion Generation using Diffusion Models for VR-based Dermatological Education},
author = {L. Pielage and P. Schmidle and B. Marschall and B. Risse},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001923208&doi=10.1145%2f3708359.3712101&partnerID=40&md5=639eec55b08a54ce813f7c1016c621e7},
doi = {10.1145/3708359.3712101},
isbn = {979-840071306-4 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {878–897},
publisher = {Association for Computing Machinery},
abstract = {Malignant melanoma is one of the most lethal forms of cancer when not detected early. As a result, cancer screening programs have been implemented internationally, all of which require visual inspection of skin lesions. Early melanoma detection is a crucial competence in medical and dermatological education, and it is primarily trained using 2D imagery. However, given the intrinsic 3D nature of skin lesions and the importance of incorporating additional contextual information about the patient (e.g., skin type, nearby lesions, etc.), this approach falls short of providing a comprehensive and scalable learning experience. A potential solution is the use of Virtual Reality (VR) scenarios, which can offer an effective strategy to train skin cancer screenings in a realistic 3D setting, thereby enhancing medical students' awareness of early melanoma detection. In this paper, we present a comprehensive pipeline and models for generating malignant melanomas and benign nevi, which can be utilized in VR-based medical training. We use diffusion models for the generation of skin lesions, which we have enhanced with various guiding strategies to give educators maximum flexibility in designing scenarios and seamlessly placing lesions on virtual agents. Additionally, we have developed a tool which comprises a graphical user interface (GUI) enabling the generation of new lesions and adapting existing ones using an intuitive and interactive inpainting strategy. The tool also offers a novel custom upsampling strategy to achieve a sufficient resolution required for diagnostic purposes. The generated skin lesions have been validated in a user study with trained dermatologists, confirming the overall high quality of the generated lesions and the utility for educational purposes. © 2025 Copyright held by the owner/author(s).},
keywords = {Deep learning, Dermatology, Diffusion Model, diffusion models, Digital elevation model, Generative AI, Graphical user interfaces, Guidance Strategies, Guidance strategy, Image generation, Image generations, Inpainting, Interactive Generation, Medical education, Medical Imaging, Simulation training, Skin lesion, Upsampling, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Sinha, Y.; Shanmugam, S.; Sahu, Y. K.; Mukhopadhyay, A.; Biswas, P.
Diffuse Your Data Blues: Augmenting Low-Resource Datasets via User-Assisted Diffusion Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 538–552, Association for Computing Machinery, 2025, ISBN: 979-840071306-4 (ISBN).
Abstract | Links | BibTeX | Tags: Data gathering, Detection models, Diffusion Model, diffusion models, Efficient Augmentation, Image Composition, Industrial context, Mixed reality, Object Detection, Objects detection, Synthetic Dataset, Synthetic datasets, Training objects
@inproceedings{sinha_diffuse_2025,
title = {Diffuse Your Data Blues: Augmenting Low-Resource Datasets via User-Assisted Diffusion},
author = {Y. Sinha and S. Shanmugam and Y. K. Sahu and A. Mukhopadhyay and P. Biswas},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001924293&doi=10.1145%2f3708359.3712163&partnerID=40&md5=c13cb6b2ef757546239de8b3ba93fb14},
doi = {10.1145/3708359.3712163},
isbn = {979-840071306-4 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {538–552},
publisher = {Association for Computing Machinery},
abstract = {Mixed reality applications in industrial contexts necessitate extensive and varied datasets for training object detection models, yet actual data gathering may be obstructed by logistical or cost issues. This study investigates the implementation of generative AI methods to work on this issue for mixed reality applications, with an emphasis on assembly and disassembly tasks. The novel objects found in industrial settings are difficult to describe using words, making text-based models less effective. In this study, a diffusion model is used to generate images by combining novel objects with various backgrounds. The backgrounds are selected where object detection in specific applications has been ineffective. This approach efficiently produces a diverse range of training samples. We compare three approaches: traditional augmentation methods, GAN-based augmentation, and Diffusion-based augmentation. Results show that the diffusion model significantly improved detection metrics. For instance, applying diffusion models to the dataset containing mechanical components of a pneumatic cylinder raised the F1 Score from 69.77 to 84.21 and the mAP@50 from 76.48 to 88.77, resulting in an increase in object detection performance, with a 67% less dataset size compared to the traditional augmented dataset. The proposed image composition diffusion model and user-friendly interface further simplify dataset enrichment, proving effective for augmenting data and improving the robustness of detection models. © 2025 Copyright held by the owner/author(s).},
keywords = {Data gathering, Detection models, Diffusion Model, diffusion models, Efficient Augmentation, Image Composition, Industrial context, Mixed reality, Object Detection, Objects detection, Synthetic Dataset, Synthetic datasets, Training objects},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Jayaraman, S.; Bhavya, R.; Srihari, V.; Rajam, V. Mary Anita
TexAVi: Generating Stereoscopic VR Video Clips from Text Descriptions Proceedings Article
In: IEEE Int. Conf. Comput. Vis. Mach. Intell., CVMI, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037687-6 (ISBN).
Abstract | Links | BibTeX | Tags: Adversarial networks, Computer simulation languages, Deep learning, Depth Estimation, Depth perception, Diffusion Model, diffusion models, Digital elevation model, Generative adversarial networks, Generative model, Generative systems, Language Model, Motion capture, Stereo image processing, Text-to-image, Training data, Video analysis, Video-clips, Virtual environments, Virtual Reality
@inproceedings{jayaraman_texavi_2024,
title = {TexAVi: Generating Stereoscopic VR Video Clips from Text Descriptions},
author = {S. Jayaraman and R. Bhavya and V. Srihari and V. Mary Anita Rajam},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85215265234&doi=10.1109%2fCVMI61877.2024.10782691&partnerID=40&md5=8e20576af67b917ecfad83873a87ef29},
doi = {10.1109/CVMI61877.2024.10782691},
isbn = {979-835037687-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {IEEE Int. Conf. Comput. Vis. Mach. Intell., CVMI},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {While generative models such as text-to-image, large language models and text-to-video have seen significant progress, the extension to text-to-virtual-reality remains largely unexplored, due to a deficit in training data and the complexity of achieving realistic depth and motion in virtual environments. This paper proposes an approach to coalesce existing generative systems to form a stereoscopic virtual reality video from text. Carried out in three main stages, we start with a base text-to-image model that captures context from an input text. We then employ Stable Diffusion on the rudimentary image produced, to generate frames with enhanced realism and overall quality. These frames are processed with depth estimation algorithms to create left-eye and right-eye views, which are stitched side-by-side to create an immersive viewing experience. Such systems would be highly beneficial in virtual reality production, since filming and scene building often require extensive hours of work and post-production effort. We utilize image evaluation techniques, specifically Fréchet Inception Distance and CLIP Score, to assess the visual quality of frames produced for the video. These quantitative measures establish the proficiency of the proposed method. Our work highlights the exciting possibilities of using natural language-driven graphics in fields like virtual reality simulations. © 2024 IEEE.},
keywords = {Adversarial networks, Computer simulation languages, Deep learning, Depth Estimation, Depth perception, Diffusion Model, diffusion models, Digital elevation model, Generative adversarial networks, Generative model, Generative systems, Language Model, Motion capture, Stereo image processing, Text-to-image, Training data, Video analysis, Video-clips, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}