AHCI RESEARCH GROUP
Publications
Papers published in international journals, 
proceedings of conferences, workshops and books.
				OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Liu, G.; Du, H.; Wang, J.; Niyato, D.; Kim, D. I.
Contract-Inspired Contest Theory for Controllable Image Generation in Mobile Edge Metaverse Journal Article
In: IEEE Transactions on Mobile Computing, vol. 24, no. 8, pp. 7389–7405, 2025, ISSN: 15361233 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: Contest Theory, Deep learning, Deep reinforcement learning, Diffusion Model, Generative adversarial networks, Generative AI, High quality, Image generation, Image generations, Immersive technologies, Metaverses, Mobile edge computing, Reinforcement Learning, Reinforcement learnings, Resource allocation, Resources allocation, Semantic data, Virtual addresses, Virtual environments, Virtual Reality
@article{liu_contract-inspired_2025,
title = {Contract-Inspired Contest Theory for Controllable Image Generation in Mobile Edge Metaverse},
author = {G. Liu and H. Du and J. Wang and D. Niyato and D. I. Kim},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000066834&doi=10.1109%2FTMC.2025.3550815&partnerID=40&md5=f95abb0df00e3112fa2c15ee77eb41bc},
doi = {10.1109/TMC.2025.3550815},
issn = {15361233 (ISSN)},
year  = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Mobile Computing},
volume = {24},
number = {8},
pages = {7389–7405},
abstract = {The rapid advancement of immersive technologies has propelled the development of the Metaverse, where the convergence of virtual and physical realities necessitates the generation of high-quality, photorealistic images to enhance user experience. However, generating these images, especially through Generative Diffusion Models (GDMs), in mobile edge computing environments presents significant challenges due to the limited computing resources of edge devices and the dynamic nature of wireless networks. This paper proposes a novel framework that integrates contract-inspired contest theory, Deep Reinforcement Learning (DRL), and GDMs to optimize image generation in these resource-constrained environments. The framework addresses the critical challenges of resource allocation and semantic data transmission quality by incentivizing edge devices to efficiently transmit high-quality semantic data, which is essential for creating realistic and immersive images. The use of contest and contract theory ensures that edge devices are motivated to allocate resources effectively, while DRL dynamically adjusts to network conditions, optimizing the overall image generation process. Experimental results demonstrate that the proposed approach not only improves the quality of generated images but also achieves superior convergence speed and stability compared to traditional methods. This makes the framework particularly effective for optimizing complex resource allocation tasks in mobile edge Metaverse applications, offering enhanced performance and efficiency in creating immersive virtual environments. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {Contest Theory, Deep learning, Deep reinforcement learning, Diffusion Model, Generative adversarial networks, Generative AI, High quality, Image generation, Image generations, Immersive technologies, Metaverses, Mobile edge computing, Reinforcement Learning, Reinforcement learnings, Resource allocation, Resources allocation, Semantic data, Virtual addresses, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Zhang, Z.; Wang, J.; Chen, J.; Fang, Z.; Jiang, C.; Han, Z.
A Priority-Aware AI-Generated Content Resource Allocation Method for Multi-UAV Aided Metaverse Proceedings Article
In: IEEE Wireless Commun. Networking Conf. WCNC, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 15253511 (ISSN); 979-835036836-9 (ISBN).
Abstract | Links | BibTeX | Tags: Aerial vehicle, AI-generated content, AI-generated content (AIGC), Allocation methods, Content-resources, Diffusion Model, Drones, Metaverse, Metaverses, Priority-aware, Reinforcement Learning, Reinforcement learnings, Resource allocation, Resources allocation, Target drones, Unmanned aerial vehicle, Unmanned aerial vehicle (UAV)
@inproceedings{zhang_priority-aware_2025,
title = {A Priority-Aware AI-Generated Content Resource Allocation Method for Multi-UAV Aided Metaverse},
author = {Z. Zhang and J. Wang and J. Chen and Z. Fang and C. Jiang and Z. Han},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105006408540&doi=10.1109%2fWCNC61545.2025.10978443&partnerID=40&md5=69937c6fa9be1a038b28e7884dfe586b},
doi = {10.1109/WCNC61545.2025.10978443},
isbn = {15253511 (ISSN); 979-835036836-9 (ISBN)},
year  = {2025},
date = {2025-01-01},
booktitle = {IEEE Wireless Commun. Networking Conf. WCNC},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {With the advancement of large model technologies, AI -generated content is gradually emerging as a mainstream method for content creation. The metaverse, as a key application scenario for the next-generation communication technologies, heavily depends on advanced content generation technologies. Nevertheless, the diverse types of metaverse applications and their stringent real-time requirements constrain the full potential of AIGC technologies within this environment. In order to tackle with this problem, we construct a priority-aware multi-UAV aided metaverse system and formulate it as a Markov decision process (MDP). We propose a diffusion-based reinforcement learning algorithm to solve the resource allocation problem and demonstrate its superiority through enough comparison and ablation experiments. © 2025 IEEE.},
keywords = {Aerial vehicle, AI-generated content, AI-generated content (AIGC), Allocation methods, Content-resources, Diffusion Model, Drones, Metaverse, Metaverses, Priority-aware, Reinforcement Learning, Reinforcement learnings, Resource allocation, Resources allocation, Target drones, Unmanned aerial vehicle, Unmanned aerial vehicle (UAV)},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhang, Z.; Wang, J.; Chen, J.; Fu, H.; Tong, Z.; Jiang, C.
Diffusion-Based Reinforcement Learning for Cooperative Offloading and Resource Allocation in Multi-UAV Assisted Edge-Enabled Metaverse Journal Article
In: IEEE Transactions on Vehicular Technology, vol. 74, no. 7, pp. 11281–11293, 2025, ISSN: 00189545 (ISSN); 19399359 (ISSN), (Publisher: Institute of Electrical and Electronics Engineers Inc.).
Abstract | Links | BibTeX | Tags: Aerial vehicle, Content creation, Content services, Contrastive Learning, Decision making, Deep learning, Deep reinforcement learning, Diffusion Model, Global industry, Helicopter services, Markov processes, Metaverse, Metaverses, Reinforcement Learning, Reinforcement learnings, Resource allocation, Resources allocation, Typical application, uncrewedaerial vehicle (UAV), Unmanned aerial vehicle, Unmanned aerial vehicles (UAV)
@article{zhang_diffusion-based_2025,
title = {Diffusion-Based Reinforcement Learning for Cooperative Offloading and Resource Allocation in Multi-UAV Assisted Edge-Enabled Metaverse},
author = {Z. Zhang and J. Wang and J. Chen and H. Fu and Z. Tong and C. Jiang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85219108203&doi=10.1109%2FTVT.2025.3544879&partnerID=40&md5=b620573362df88291ef0bc7edc16247f},
doi = {10.1109/TVT.2025.3544879},
issn = {00189545 (ISSN); 19399359 (ISSN)},
year  = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Vehicular Technology},
volume = {74},
number = {7},
pages = {11281–11293},
abstract = {As one of the typical applications of 6G, the metaverse, with its superior immersion and diversified services, has garnered widespread attention from both the global industry and academia. Simultaneously, the emergence of AI-generated content (AIGC), exemplified by ChatGPT, has revolutionized the means of content creation in the metaverse. Providing metaverse users with diversified AIGC services anytime and anywhere to meet the demand for immersive and blended virtual-real experiences in the physical world has become a major challenge in the development of the metaverse. Considering the flexibility and mobility of uncrewed aerial vehicles (UAVs), we innovatively incorporate multiple UAVs as one of the AIGC service providers and construct a multi-UAV assisted edge-enabled metaverse system in the context of AIGC-as-a-Service (AaaS) scenario. To solve the complex resource management and allocation problem in the aforementioned system, we formulate it as a Markov decision process (MDP) and propose utilizing the generative capabilities of the diffusion model in combination with the robust decision-making abilities of reinforcement learning to tackle these issues. In order to substantiate the efficacy of the proposed diffusion-based reinforcement learning framework, we propose a novel diffusion-based soft actor-critic algorithm for metaverse (Meta-DSAC). Subsequently, a series of experiments are executed and the simulation results empirically validate the proposed algorithm's comparative advantages of the ability to provide stable and substantial long-term rewards, as well as the enhanced capacity to model complex environments. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Institute of Electrical and Electronics Engineers Inc.},
keywords = {Aerial vehicle, Content creation, Content services, Contrastive Learning, Decision making, Deep learning, Deep reinforcement learning, Diffusion Model, Global industry, Helicopter services, Markov processes, Metaverse, Metaverses, Reinforcement Learning, Reinforcement learnings, Resource allocation, Resources allocation, Typical application, uncrewedaerial vehicle (UAV), Unmanned aerial vehicle, Unmanned aerial vehicles (UAV)},
pubstate = {published},
tppubtype = {article}
}
Sinha, Y.; Shanmugam, S.; Sahu, Y. K.; Mukhopadhyay, A.; Biswas, P.
Diffuse Your Data Blues: Augmenting Low-Resource Datasets via User-Assisted Diffusion Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 538–552, Association for Computing Machinery, 2025, ISBN: 9798400713064 (ISBN).
Abstract | Links | BibTeX | Tags: Data gathering, Detection models, Diffusion Model, diffusion models, Efficient Augmentation, Image Composition, Industrial context, Mixed reality, Object Detection, Objects detection, Synthetic Dataset, Synthetic datasets, Training objects
@inproceedings{sinha_diffuse_2025,
title = {Diffuse Your Data Blues: Augmenting Low-Resource Datasets via User-Assisted Diffusion},
author = {Y. Sinha and S. Shanmugam and Y. K. Sahu and A. Mukhopadhyay and P. Biswas},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001924293&doi=10.1145%2F3708359.3712163&partnerID=40&md5=8de7dcc94f2b2ad9e8d6de168ba05840},
doi = {10.1145/3708359.3712163},
isbn = {9798400713064 (ISBN)},
year  = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {538–552},
publisher = {Association for Computing Machinery},
abstract = {Mixed reality applications in industrial contexts necessitate extensive and varied datasets for training object detection models, yet actual data gathering may be obstructed by logistical or cost issues. This study investigates the implementation of generative AI methods to work on this issue for mixed reality applications, with an emphasis on assembly and disassembly tasks. The novel objects found in industrial settings are difficult to describe using words, making text-based models less effective. In this study, a diffusion model is used to generate images by combining novel objects with various backgrounds. The backgrounds are selected where object detection in specific applications has been ineffective. This approach efficiently produces a diverse range of training samples. We compare three approaches: traditional augmentation methods, GAN-based augmentation, and Diffusion-based augmentation. Results show that the diffusion model significantly improved detection metrics. For instance, applying diffusion models to the dataset containing mechanical components of a pneumatic cylinder raised the F1 Score from 69.77 to 84.21 and the mAP@50 from 76.48 to 88.77, resulting in an increase in object detection performance, with a 67% less dataset size compared to the traditional augmented dataset. The proposed image composition diffusion model and user-friendly interface further simplify dataset enrichment, proving effective for augmenting data and improving the robustness of detection models. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Data gathering, Detection models, Diffusion Model, diffusion models, Efficient Augmentation, Image Composition, Industrial context, Mixed reality, Object Detection, Objects detection, Synthetic Dataset, Synthetic datasets, Training objects},
pubstate = {published},
tppubtype = {inproceedings}
}
Pielage, L.; Schmidle, P.; Marschall, B.; Risse, B.
Interactive High-Quality Skin Lesion Generation using Diffusion Models for VR-based Dermatological Education Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 878–897, Association for Computing Machinery, 2025, ISBN: 9798400713064 (ISBN).
Abstract | Links | BibTeX | Tags: Deep learning, Dermatology, Diffusion Model, diffusion models, Digital elevation model, Generative AI, Graphical user interfaces, Guidance Strategies, Guidance strategy, Image generation, Image generations, Inpainting, Interactive Generation, Medical education, Medical Imaging, Simulation training, Skin lesion, Upsampling, Virtual environments, Virtual Reality
@inproceedings{pielage_interactive_2025,
title = {Interactive High-Quality Skin Lesion Generation using Diffusion Models for VR-based Dermatological Education},
author = {L. Pielage and P. Schmidle and B. Marschall and B. Risse},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001923208&doi=10.1145%2F3708359.3712101&partnerID=40&md5=d15d791e0d786bee2da91553da332ca3},
doi = {10.1145/3708359.3712101},
isbn = {9798400713064 (ISBN)},
year  = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {878–897},
publisher = {Association for Computing Machinery},
abstract = {Malignant melanoma is one of the most lethal forms of cancer when not detected early. As a result, cancer screening programs have been implemented internationally, all of which require visual inspection of skin lesions. Early melanoma detection is a crucial competence in medical and dermatological education, and it is primarily trained using 2D imagery. However, given the intrinsic 3D nature of skin lesions and the importance of incorporating additional contextual information about the patient (e.g., skin type, nearby lesions, etc.), this approach falls short of providing a comprehensive and scalable learning experience. A potential solution is the use of Virtual Reality (VR) scenarios, which can offer an effective strategy to train skin cancer screenings in a realistic 3D setting, thereby enhancing medical students' awareness of early melanoma detection. In this paper, we present a comprehensive pipeline and models for generating malignant melanomas and benign nevi, which can be utilized in VR-based medical training. We use diffusion models for the generation of skin lesions, which we have enhanced with various guiding strategies to give educators maximum flexibility in designing scenarios and seamlessly placing lesions on virtual agents. Additionally, we have developed a tool which comprises a graphical user interface (GUI) enabling the generation of new lesions and adapting existing ones using an intuitive and interactive inpainting strategy. The tool also offers a novel custom upsampling strategy to achieve a sufficient resolution required for diagnostic purposes. The generated skin lesions have been validated in a user study with trained dermatologists, confirming the overall high quality of the generated lesions and the utility for educational purposes. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Deep learning, Dermatology, Diffusion Model, diffusion models, Digital elevation model, Generative AI, Graphical user interfaces, Guidance Strategies, Guidance strategy, Image generation, Image generations, Inpainting, Interactive Generation, Medical education, Medical Imaging, Simulation training, Skin lesion, Upsampling, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Oh, S.; Jung, M.; Kim, T.
EnvMat: A Network for Simultaneous Generation of PBR Maps and Environment Maps from a Single Image Journal Article
In: Electronics (Switzerland), vol. 14, no. 13, 2025, ISSN: 20799292 (ISSN), (Publisher: Multidisciplinary Digital Publishing Institute (MDPI)).
Abstract | Links | BibTeX | Tags: 3D graphics, Auto encoders, Cameras, Diffusion, Diffusion Model, Environment maps, generative artificial intelligence, Image understanding, Latent diffusion model, latent diffusion models, Metaverse, Metaverses, Neural Networks, Physically based rendering, physically based rendering (PBR), Rendering (computer graphics), Tellurium compounds, Three dimensional computer graphics, Variational Autoencoder, Variational Autoencoders (VAEs), Variational techniques, Virtual Reality, Visualization
@article{oh_envmat_2025,
title = {EnvMat: A Network for Simultaneous Generation of PBR Maps and Environment Maps from a Single Image},
author = {S. Oh and M. Jung and T. Kim},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105010306182&doi=10.3390%2Felectronics14132554&partnerID=40&md5=a6e24d71cb6f1e632ee2415b99f68c0e},
doi = {10.3390/electronics14132554},
issn = {20799292 (ISSN)},
year  = {2025},
date = {2025-01-01},
journal = {Electronics (Switzerland)},
volume = {14},
number = {13},
abstract = {Generative neural networks have expanded from text and image generation to creating realistic 3D graphics, which are critical for immersive virtual environments. Physically Based Rendering (PBR)—crucial for realistic 3D graphics—depends on PBR maps, environment (env) maps for lighting, and camera viewpoints. Current research mainly generates PBR maps separately, often using fixed env maps and camera poses. This limitation reduces visual consistency and immersion in 3D spaces. Addressing this, we propose EnvMat, a diffusion-based model that simultaneously generates PBR and env maps. EnvMat uses two Variational Autoencoders (VAEs) for map reconstruction and a Latent Diffusion UNet. Experimental results show that EnvMat surpasses the existing methods in preserving visual accuracy, as validated through metrics like L-PIPS, MS-SSIM, and CIEDE2000. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Multidisciplinary Digital Publishing Institute (MDPI)},
keywords = {3D graphics, Auto encoders, Cameras, Diffusion, Diffusion Model, Environment maps, generative artificial intelligence, Image understanding, Latent diffusion model, latent diffusion models, Metaverse, Metaverses, Neural Networks, Physically based rendering, physically based rendering (PBR), Rendering (computer graphics), Tellurium compounds, Three dimensional computer graphics, Variational Autoencoder, Variational Autoencoders (VAEs), Variational techniques, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {article}
}
Saddik, A. El; Ahmad, J.; Khan, M.; Abouzahir, S.; Gueaieb, W.
Unleashing Creativity in the Metaverse: Generative AI and Multimodal Content Journal Article
In: ACM Transactions on Multimedia Computing, Communications and Applications, vol. 21, no. 7, pp. 1–43, 2025, ISSN: 15516857 (ISSN); 15516865 (ISSN), (Publisher: Association for Computing Machinery).
Abstract | Links | BibTeX | Tags: Adversarial networks, Artificial intelligence, Content generation, Context information, Creatives, Diffusion Model, diffusion models, Generative adversarial networks, Generative AI, Human engineering, Information instructions, Interactive computer graphics, Interactive computer systems, Interactive devices, Interoperability, Metaverse, Metaverses, Multi-modal, multimodal, Simple++, Three dimensional computer graphics, user experience, User interfaces, Virtual Reality
@article{el_saddik_unleashing_2025,
title = {Unleashing Creativity in the Metaverse: Generative AI and Multimodal Content},
author = {A. El Saddik and J. Ahmad and M. Khan and S. Abouzahir and W. Gueaieb},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105011860002&doi=10.1145%2F3713075&partnerID=40&md5=20064843ced240c42e9353d747672cb3},
doi = {10.1145/3713075},
issn = {15516857 (ISSN); 15516865 (ISSN)},
year  = {2025},
date = {2025-01-01},
journal = {ACM Transactions on Multimedia Computing, Communications and Applications},
volume = {21},
number = {7},
pages = {1–43},
abstract = {The metaverse presents an emerging creative expression and collaboration frontier where generative artificial intelligence (GenAI) can play a pivotal role with its ability to generate multimodal content from simple prompts. These prompts allow the metaverse to interact with GenAI, where context information, instructions, input data, or even output indications constituting the prompt can come from within the metaverse. However, their integration poses challenges regarding interoperability, lack of standards, scalability, and maintaining a high-quality user experience. This article explores how GenAI can productively assist in enhancing creativity within the contexts of the metaverse and unlock new opportunities. We provide a technical, in-depth overview of the different generative models for image, video, audio, and 3D content within the metaverse environments. We also explore the bottlenecks, opportunities, and innovative applications of GenAI from the perspectives of end users, developers, service providers, and AI researchers. This survey commences by highlighting the potential of GenAI for enhancing the metaverse experience through dynamic content generation to populate massive virtual worlds. Subsequently, we shed light on the ongoing research practices and trends in multimodal content generation, enhancing realism and creativity and alleviating bottlenecks related to standardization, computational cost, privacy, and safety. Last, we share insights into promising research directions toward the integration of GenAI with the metaverse for creative enhancement, improved immersion, and innovative interactive applications. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Association for Computing Machinery},
keywords = {Adversarial networks, Artificial intelligence, Content generation, Context information, Creatives, Diffusion Model, diffusion models, Generative adversarial networks, Generative AI, Human engineering, Information instructions, Interactive computer graphics, Interactive computer systems, Interactive devices, Interoperability, Metaverse, Metaverses, Multi-modal, multimodal, Simple++, Three dimensional computer graphics, user experience, User interfaces, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Xi, Z.; Yao, Z.; Huang, J.; Lu, Z. -Q.; Yan, H.; Mu, T. -J.; Wang, Z.; Xu, Q. -C.
TerraCraft: City-scale generative procedural modeling with natural languages Journal Article
In: Graphical Models, vol. 141, 2025, ISSN: 15240703 (ISSN), (Publisher: Elsevier Inc.).
Abstract | Links | BibTeX | Tags: 3D scene generation, 3D scenes, algorithm, Automation, City layout, City scale, data set, Diffusion Model, Game design, Geometry, High quality, Language, Language Model, Large datasets, Large language model, LLMs, Modeling languages, Natural language processing systems, Procedural modeling, Procedural models, Scene Generation, Three dimensional computer graphics, three-dimensional modeling, urban area, Virtual Reality
@article{xi_terracraft_2025,
title = {TerraCraft: City-scale generative procedural modeling with natural languages},
author = {Z. Xi and Z. Yao and J. Huang and Z. -Q. Lu and H. Yan and T. -J. Mu and Z. Wang and Q. -C. Xu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105012397682&doi=10.1016%2Fj.gmod.2025.101285&partnerID=40&md5=15a84050280e5015b1f7b1ef40c62100},
doi = {10.1016/j.gmod.2025.101285},
issn = {15240703 (ISSN)},
year  = {2025},
date = {2025-01-01},
journal = {Graphical Models},
volume = {141},
abstract = {Automated generation of large-scale 3D scenes presents a significant challenge due to the resource-intensive training and datasets required. This is in sharp contrast to the 2D counterparts that have become readily available due to their superior speed and quality. However, prior work in 3D procedural modeling has demonstrated promise in generating high-quality assets using the combination of algorithms and user-defined rules. To leverage the best of both 2D generative models and procedural modeling tools, we present TerraCraft, a novel framework for generating geometrically high-quality 3D city-scale scenes. By utilizing Large Language Models (LLMs), TerraCraft can generate city-scale 3D scenes from natural text descriptions. With its intuitive operation and powerful capabilities, TerraCraft enables users to easily create geometrically high-quality scenes readily for various applications, such as virtual reality and game design. We validate TerraCraft's effectiveness through extensive experiments and user studies, showing its superior performance compared to existing baselines. © 2025 Elsevier B.V., All rights reserved.},
note = {Publisher: Elsevier Inc.},
keywords = {3D scene generation, 3D scenes, algorithm, Automation, City layout, City scale, data set, Diffusion Model, Game design, Geometry, High quality, Language, Language Model, Large datasets, Large language model, LLMs, Modeling languages, Natural language processing systems, Procedural modeling, Procedural models, Scene Generation, Three dimensional computer graphics, three-dimensional modeling, urban area, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
You, F.; Du, H.; Kang, J.; Ni, W.; Niyato, D.; Jamalipour, A.
Generative AI-aided Reinforcement Learning for Computation Offloading and Privacy Protection in VR-based Multi-Access Edge Computing Proceedings Article
In: Proc. - IEEE Smart World Congr., SWC - IEEE Ubiquitous Intell. Comput., Auton. Trusted Comput., Digit. Twin, Metaverse, Priv. Comput. Data Secur., Scalable Comput. Commun., pp. 2209–2214, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798331520861 (ISBN).
Abstract | Links | BibTeX | Tags: Computation offloading, Content services, Differential privacy, Diffusion Model, Edge computing, Generative adversarial networks, Generative diffusion model, generative diffusion models, Inverse problems, Multi-access edge computing, Multiaccess, Policy optimization, Proximal policy optimization, Reinforcement Learning, User privacy, Virtual environments, Virtual Reality
@inproceedings{you_generative_2024,
title = {Generative AI-aided Reinforcement Learning for Computation Offloading and Privacy Protection in VR-based Multi-Access Edge Computing},
author = {F. You and H. Du and J. Kang and W. Ni and D. Niyato and A. Jamalipour},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002235341&doi=10.1109%2FSWC62898.2024.00337&partnerID=40&md5=cdf21b7feaa62a76506f96acee2f25c2},
doi = {10.1109/SWC62898.2024.00337},
isbn = {9798331520861 (ISBN)},
year  = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Smart World Congr., SWC - IEEE Ubiquitous Intell. Comput., Auton. Trusted Comput., Digit. Twin, Metaverse, Priv. Comput. Data Secur., Scalable Comput. Commun.},
pages = {2209–2214},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {The rapid growth of Artificial Intelligence-Generated Content (AIGC) services has led to increased mobile user participation in related computations and interactions. This development has enabled AI-generated characters to interact with Virtual Reality (VR) users in real time, making the VR experience more interactive and personalized. In this paper, we consider an MEC system where VR users engage in AIGC services, focusing on the Generative Diffusion Model (GDM)based image generation tasks. Specifically, VR users initiate requests for computing resources, while computation offloading distributes the processing load across the MEC system. To manage AIGC edge computation offloading and cloudlet-VR user connections jointly, a Data Center Operator (DCO) employs a centralized Proximal Policy Optimization (PPO) algorithm. To protect VR users' privacy while preserving PPO functionality, we employ the Generative Diffusion Model (GDM), specifically the Denoising Diffusion Implicit Model (DDIM), which first introduces noise to the PPO state, then conducts a denoising process to recover the state information. We further employ Inverse Reinforcement Learning (IRL) to infer rewards for the recovered states, using expert demonstrations trained by the PPO. The similarity between PPO-generated rewards and IRL-inferred rewards is then computed. Simulation results demonstrate that our proposed approach successfully achieves computation offloading while protecting VR users' privacy within the PPO centralized management framework. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Computation offloading, Content services, Differential privacy, Diffusion Model, Edge computing, Generative adversarial networks, Generative diffusion model, generative diffusion models, Inverse problems, Multi-access edge computing, Multiaccess, Policy optimization, Proximal policy optimization, Reinforcement Learning, User privacy, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Si, J.; Yang, S.; Song, J.; Son, S.; Lee, S.; Kim, D.; Kim, S.
Generating and Integrating Diffusion Model-Based Panoramic Views for Virtual Interview Platform Proceedings Article
In: IEEE Int. Conf. Artif. Intell. Eng. Technol., IICAIET, pp. 343–348, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350389692 (ISBN).
Abstract | Links | BibTeX | Tags: AI, Deep learning, Diffusion, Diffusion Model, Diffusion technology, Digital elevation model, High quality, Manual process, Model-based OPC, New approaches, Panorama, Panoramic views, Virtual environments, Virtual Interview, Virtual Reality
@inproceedings{si_generating_2024,
title = {Generating and Integrating Diffusion Model-Based Panoramic Views for Virtual Interview Platform},
author = {J. Si and S. Yang and J. Song and S. Son and S. Lee and D. Kim and S. Kim},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85209663031&doi=10.1109%2FIICAIET62352.2024.10730450&partnerID=40&md5=a8baef1851b8ad9b37e4df4e4b1735e2},
doi = {10.1109/IICAIET62352.2024.10730450},
isbn = {9798350389692 (ISBN)},
year  = {2024},
date = {2024-01-01},
booktitle = {IEEE Int. Conf. Artif. Intell. Eng. Technol., IICAIET},
pages = {343–348},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {This paper presents a new approach to improve virtual interview platforms in education, which are gaining significant attention. This study aims to simplify the complex manual process of equipment setup to enhance the realism and reliability of virtual interviews. To this end, this study proposes a method for automatically constructing 3D virtual interview environments using diffusion technology in generative AI. In this research, we exploit a diffusion model capable of generating high-quality panoramic images. We generate images of interview rooms capable of delivering immersive interview experiences via refined text prompts. The resulting imagery is then reconstituted 3D VR content utilizing the Unity engine, facilitating enhanced interaction and engagement within virtual environments. This research compares and analyzes various methods presented in related research and proposes a new process for efficiently constructing 360-degree virtual environments. When wearing Oculus Quest 2 and experiencing the virtual environment created using the proposed method, a high sense of immersion was experienced, similar to the actual interview environment. © 2024 Elsevier B.V., All rights reserved.},
keywords = {AI, Deep learning, Diffusion, Diffusion Model, Diffusion technology, Digital elevation model, High quality, Manual process, Model-based OPC, New approaches, Panorama, Panoramic views, Virtual environments, Virtual Interview, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Jayaraman, S.; Bhavya, R.; Srihari, V.; Rajam, V. Mary Anita
TexAVi: Generating Stereoscopic VR Video Clips from Text Descriptions Proceedings Article
In: IEEE Int. Conf. Comput. Vis. Mach. Intell., CVMI, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 9798350376876 (ISBN).
Abstract | Links | BibTeX | Tags: Adversarial networks, Computer simulation languages, Deep learning, Depth Estimation, Depth perception, Diffusion Model, diffusion models, Digital elevation model, Generative adversarial networks, Generative model, Generative systems, Language Model, Motion capture, Stereo image processing, Text-to-image, Training data, Video analysis, Video-clips, Virtual environments, Virtual Reality
@inproceedings{jayaraman_texavi_2024,
title = {TexAVi: Generating Stereoscopic VR Video Clips from Text Descriptions},
author = {S. Jayaraman and R. Bhavya and V. Srihari and V. Mary Anita Rajam},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85215265234&doi=10.1109%2FCVMI61877.2024.10782691&partnerID=40&md5=21e6ecfcc0710c036ba93e39b5fcd30d},
doi = {10.1109/CVMI61877.2024.10782691},
isbn = {9798350376876 (ISBN)},
year  = {2024},
date = {2024-01-01},
booktitle = {IEEE Int. Conf. Comput. Vis. Mach. Intell., CVMI},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {While generative models such as text-to-image, large language models and text-to-video have seen significant progress, the extension to text-to-virtual-reality remains largely unexplored, due to a deficit in training data and the complexity of achieving realistic depth and motion in virtual environments. This paper proposes an approach to coalesce existing generative systems to form a stereoscopic virtual reality video from text. Carried out in three main stages, we start with a base text-to-image model that captures context from an input text. We then employ Stable Diffusion on the rudimentary image produced, to generate frames with enhanced realism and overall quality. These frames are processed with depth estimation algorithms to create left-eye and right-eye views, which are stitched side-by-side to create an immersive viewing experience. Such systems would be highly beneficial in virtual reality production, since filming and scene building often require extensive hours of work and post-production effort. We utilize image evaluation techniques, specifically Fréchet Inception Distance and CLIP Score, to assess the visual quality of frames produced for the video. These quantitative measures establish the proficiency of the proposed method. Our work highlights the exciting possibilities of using natural language-driven graphics in fields like virtual reality simulations. © 2025 Elsevier B.V., All rights reserved.},
keywords = {Adversarial networks, Computer simulation languages, Deep learning, Depth Estimation, Depth perception, Diffusion Model, diffusion models, Digital elevation model, Generative adversarial networks, Generative model, Generative systems, Language Model, Motion capture, Stereo image processing, Text-to-image, Training data, Video analysis, Video-clips, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Si, J.; Yang, S.; Kim, D.; Kim, S.
Metaverse Interview Room Creation With Virtual Interviewer Generation Using Diffusion Model Proceedings Article
In: Proc. IEEE Asia-Pacific Conf. Comput. Sci. Data Eng., CSDE, Institute of Electrical and Electronics Engineers Inc., 2023, ISBN: 9798350341072 (ISBN).
Abstract | Links | BibTeX | Tags: Changing trends, Cutting edges, Diffusion, Diffusion Model, Generative AI, Hiring process, Interview skills, It focus, Metaverse, Metaverses, Unity, Virtual Interview, Virtual Reality
@inproceedings{si_metaverse_2023,
title = {Metaverse Interview Room Creation With Virtual Interviewer Generation Using Diffusion Model},
author = {J. Si and S. Yang and D. Kim and S. Kim},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85190586380&doi=10.1109%2FCSDE59766.2023.10487677&partnerID=40&md5=4e7c26e9b1dd272b9df8f57fcb10d079},
doi = {10.1109/CSDE59766.2023.10487677},
isbn = {9798350341072 (ISBN)},
year  = {2023},
date = {2023-01-01},
booktitle = {Proc. IEEE Asia-Pacific Conf. Comput. Sci. Data Eng., CSDE},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Virtual interviews are an effective way to respond quickly to the changing trends of our time and adapt flexibly to the hiring processes of various organizations. Through this method, applicants have the opportunity to practice their interview skills and receive feedback, greatly aiding their job preparation. Additionally, experiencing a virtual interview environment that is similar to an actual one enables them to adapt more easily to a variety of new interview situations. This paper delves deeply into the virtual interview environment implemented by combining cutting-edge metaverse technology and generative AI. Specifically, it focuses on creating an environment utilizing realistic Diffusion models to generate interviewers, enabling the provision of scenarios that are similar to actual interviews. © 2024 Elsevier B.V., All rights reserved.},
keywords = {Changing trends, Cutting edges, Diffusion, Diffusion Model, Generative AI, Hiring process, Interview skills, It focus, Metaverse, Metaverses, Unity, Virtual Interview, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}