AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Azzarelli, A.; Anantrasirichai, N.; Bull, D. R.
Intelligent Cinematography: a review of AI research for cinematographic production Journal Article
In: Artificial Intelligence Review, vol. 58, no. 4, 2025, ISSN: 02692821 (ISSN).
Abstract | Links | BibTeX | Tags: Artificial intelligence research, Computer vision, Content acquisition, Creative industries, Holistic view, machine learning, Machine-learning, Mergers and acquisitions, Review papers, Three dimensional computer graphics, Video applications, Video processing, Video processing and applications, Virtual production, Virtual Reality, Vision research
@article{azzarelli_intelligent_2025,
title = {Intelligent Cinematography: a review of AI research for cinematographic production},
author = {A. Azzarelli and N. Anantrasirichai and D. R. Bull},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85217373428&doi=10.1007%2fs10462-024-11089-3&partnerID=40&md5=360923b5ba8f63b6edfa1b7fd135c926},
doi = {10.1007/s10462-024-11089-3},
issn = {02692821 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Artificial Intelligence Review},
volume = {58},
number = {4},
abstract = {This paper offers the first comprehensive review of artificial intelligence (AI) research in the context of real camera content acquisition for entertainment purposes and is aimed at both researchers and cinematographers. Addressing the lack of review papers in the field of intelligent cinematography (IC) and the breadth of related computer vision research, we present a holistic view of the IC landscape while providing technical insight, important for experts across disciplines. We provide technical background on generative AI, object detection, automated camera calibration and 3-D content acquisition, with references to assist non-technical readers. The application sections categorize work in terms of four production types: General Production, Virtual Production, Live Production and Aerial Production. Within each application section, we (1) sub-classify work according to research topic and (2) describe the trends and challenges relevant to each type of production. In the final chapter, we address the greater scope of IC research and summarize the significant potential of this area to influence the creative industries sector. We suggest that work relating to virtual production has the greatest potential to impact other mediums of production, driven by the growing interest in LED volumes/stages for in-camera virtual effects (ICVFX) and automated 3-D capture for virtual modeling of real world scenes and actors. We also address ethical and legal concerns regarding the use of creative AI that impact on artists, actors, technologists and the general public. © The Author(s) 2025.},
keywords = {Artificial intelligence research, Computer vision, Content acquisition, Creative industries, Holistic view, machine learning, Machine-learning, Mergers and acquisitions, Review papers, Three dimensional computer graphics, Video applications, Video processing, Video processing and applications, Virtual production, Virtual Reality, Vision research},
pubstate = {published},
tppubtype = {article}
}
Suzuki, R.; Gonzalez-Franco, M.; Sra, M.; Lindlbauer, D.
Everyday AR through AI-in-the-Loop Proceedings Article
In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071395-8 (ISBN).
Abstract | Links | BibTeX | Tags: Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization
@inproceedings{suzuki_everyday_2025,
title = {Everyday AR through AI-in-the-Loop},
author = {R. Suzuki and M. Gonzalez-Franco and M. Sra and D. Lindlbauer},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005752990&doi=10.1145%2f3706599.3706741&partnerID=40&md5=56b5e447819dde7aa4a29f8e3899e535},
doi = {10.1145/3706599.3706741},
isbn = {979-840071395-8 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Conf Hum Fact Comput Syst Proc},
publisher = {Association for Computing Machinery},
abstract = {This workshop brings together experts and practitioners from augmented reality (AR) and artificial intelligence (AI) to shape the future of AI-in-the-loop everyday AR experiences. With recent advancements in both AR hardware and AI capabilities, we envision that everyday AR—always-available and seamlessly integrated into users’ daily environments—is becoming increasingly feasible. This workshop will explore how AI can drive such everyday AR experiences. We discuss a range of topics, including adaptive and context-aware AR, generative AR content creation, always-on AI assistants, AI-driven accessible design, and real-world-oriented AI agents. Our goal is to identify the opportunities and challenges in AI-enabled AR, focusing on creating novel AR experiences that seamlessly blend the digital and physical worlds. Through the workshop, we aim to foster collaboration, inspire future research, and build a community to advance the research field of AI-enhanced AR. © 2025 Copyright held by the owner/author(s).},
keywords = {Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Gao, H.; Xie, Y.; Kasneci, E.
PerVRML: ChatGPT-Driven Personalized VR Environments for Machine Learning Education Journal Article
In: International Journal of Human-Computer Interaction, 2025, ISSN: 10447318 (ISSN).
Abstract | Links | BibTeX | Tags: Backpropagation, ChatGPT, Curricula, Educational robots, Immersive learning, Interactive learning, Language Model, Large language model, large language models, Learning mode, Machine learning education, Machine-learning, Personalized learning, Support vector machines, Teaching, Virtual Reality, Virtual-reality environment, Virtualization
@article{gao_pervrml_2025,
title = {PerVRML: ChatGPT-Driven Personalized VR Environments for Machine Learning Education},
author = {H. Gao and Y. Xie and E. Kasneci},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005776517&doi=10.1080%2f10447318.2025.2504188&partnerID=40&md5=c2c59be3d20d02c6df7750c2330c8f6d},
doi = {10.1080/10447318.2025.2504188},
issn = {10447318 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {International Journal of Human-Computer Interaction},
abstract = {The advent of large language models (LLMs) such as ChatGPT has demonstrated significant potential for advancing educational technologies. Recently, growing interest has emerged in integrating ChatGPT with virtual reality (VR) to provide interactive and dynamic learning environments. This study explores the effectiveness of ChatGTP-driven VR in facilitating machine learning education through PerVRML. PerVRML incorporates a ChatGPT-powered avatar that provides real-time assistance and uses LLMs to personalize learning paths based on various sensor data from VR. A between-subjects design was employed to compare two learning modes: personalized and non-personalized. Quantitative data were collected from assessments, user experience surveys, and interaction metrics. The results indicate that while both learning modes supported learning effectively, ChatGPT-powered personalization significantly improved learning outcomes and had distinct impacts on user feedback. These findings underscore the potential of ChatGPT-enhanced VR to deliver adaptive and personalized educational experiences. © 2025 Taylor & Francis Group, LLC.},
keywords = {Backpropagation, ChatGPT, Curricula, Educational robots, Immersive learning, Interactive learning, Language Model, Large language model, large language models, Learning mode, Machine learning education, Machine-learning, Personalized learning, Support vector machines, Teaching, Virtual Reality, Virtual-reality environment, Virtualization},
pubstate = {published},
tppubtype = {article}
}
Ademola, A.; Sinclair, D.; Koniaris, B.; Hannah, S.; Mitchell, K.
NeFT-Net: N-window extended frequency transformer for rhythmic motion prediction Journal Article
In: Computers and Graphics, vol. 129, 2025, ISSN: 00978493 (ISSN).
Abstract | Links | BibTeX | Tags: Cosine transforms, Discrete cosine transforms, Human motions, Immersive, machine learning, Machine-learning, Motion analysis, Motion prediction, Motion processing, Motion sequences, Motion tracking, Real-world, Rendering, Rendering (computer graphics), Rhythmic motion, Three dimensional computer graphics, Virtual environments, Virtual Reality
@article{ademola_neft-net_2025,
title = {NeFT-Net: N-window extended frequency transformer for rhythmic motion prediction},
author = {A. Ademola and D. Sinclair and B. Koniaris and S. Hannah and K. Mitchell},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105006724723&doi=10.1016%2fj.cag.2025.104244&partnerID=40&md5=08fd0792837332404ec9acdd16f608bf},
doi = {10.1016/j.cag.2025.104244},
issn = {00978493 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Computers and Graphics},
volume = {129},
abstract = {Advancements in prediction of human motion sequences are critical for enabling online virtual reality (VR) users to dance and move in ways that accurately mirror real-world actions, delivering a more immersive and connected experience. However, latency in networked motion tracking remains a significant challenge, disrupting engagement and necessitating predictive solutions to achieve real-time synchronization of remote motions. To address this issue, we propose a novel approach leveraging a synthetically generated dataset based on supervised foot anchor placement timings for rhythmic motions, ensuring periodicity and reducing prediction errors. Our model integrates a discrete cosine transform (DCT) to encode motion, refine high-frequency components, and smooth motion sequences, mitigating jittery artifacts. Additionally, we introduce a feed-forward attention mechanism designed to learn from N-window pairs of 3D key-point pose histories for precise future motion prediction. Quantitative and qualitative evaluations on the Human3.6M dataset highlight significant improvements in mean per joint position error (MPJPE) metrics, demonstrating the superiority of our technique over state-of-the-art approaches. We further introduce novel result pose visualizations through the use of generative AI methods. © 2025 The Authors},
keywords = {Cosine transforms, Discrete cosine transforms, Human motions, Immersive, machine learning, Machine-learning, Motion analysis, Motion prediction, Motion processing, Motion sequences, Motion tracking, Real-world, Rendering, Rendering (computer graphics), Rhythmic motion, Three dimensional computer graphics, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2024
Chaccour, C.; Saad, W.; Debbah, M.; Poor, H. V.
Joint Sensing, Communication, and AI: A Trifecta for Resilient THz User Experiences Journal Article
In: IEEE Transactions on Wireless Communications, vol. 23, no. 9, pp. 11444–11460, 2024, ISSN: 15361276 (ISSN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, artificial intelligence (AI), Behavioral Research, Channel state information, Computer hardware, Cramer-Rao bounds, Extended reality (XR), Hardware, Joint sensing and communication, Learning systems, machine learning, machine learning (ML), Machine-learning, Multi agent systems, reliability, Resilience, Sensor data fusion, Tera Hertz, Terahertz, terahertz (THz), Terahertz communication, Wireless communications, Wireless sensor networks, X reality
@article{chaccour_joint_2024,
title = {Joint Sensing, Communication, and AI: A Trifecta for Resilient THz User Experiences},
author = {C. Chaccour and W. Saad and M. Debbah and H. V. Poor},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85190170739&doi=10.1109%2fTWC.2024.3382192&partnerID=40&md5=da12c6f31faacaa08118b26e4570843f},
doi = {10.1109/TWC.2024.3382192},
issn = {15361276 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {IEEE Transactions on Wireless Communications},
volume = {23},
number = {9},
pages = {11444–11460},
abstract = {In this paper a novel joint sensing, communication, and artificial intelligence (AI) framework is proposed so as to optimize extended reality (XR) experiences over terahertz (THz) wireless systems. Within this framework, active reconfigurable intelligent surfaces (RISs) are incorporated as pivotal elements, serving as enhanced base stations in the THz band to enhance Line-of-Sight (LoS) communication. The proposed framework consists of three main components. First, a tensor decomposition framework is proposed to extract unique sensing parameters for XR users and their environment by exploiting the THz channel sparsity. Essentially, the THz band's quasi-opticality is exploited and the sensing parameters are extracted from the uplink communication signal, thereby allowing for the use of the same waveform, spectrum, and hardware for both communication and sensing functionalities. Then, the Cramér-Rao lower bound is derived to assess the accuracy of the estimated sensing parameters. Second, a non-autoregressive multi-resolution generative AI framework integrated with an adversarial transformer is proposed to predict missing and future sensing information. The proposed framework offers robust and comprehensive historical sensing information and anticipatory forecasts of future environmental changes, which are generalizable to fluctuations in both known and unforeseen user behaviors and environmental conditions. Third, a multi-agent deep recurrent hysteretic Q-neural network is developed to control the handover policy of RIS subarrays, leveraging the informative nature of sensing information to minimize handover cost, maximize the individual quality of personal experiences (QoPEs), and improve the robustness and resilience of THz links. Simulation results show a high generalizability of the proposed unsupervised generative artificial intelligence (AI) framework to fluctuations in user behavior and velocity, leading to a 61% improvement in instantaneous reliability compared to schemes with known channel state information. © 2002-2012 IEEE.},
keywords = {Artificial intelligence, artificial intelligence (AI), Behavioral Research, Channel state information, Computer hardware, Cramer-Rao bounds, Extended reality (XR), Hardware, Joint sensing and communication, Learning systems, machine learning, machine learning (ML), Machine-learning, Multi agent systems, reliability, Resilience, Sensor data fusion, Tera Hertz, Terahertz, terahertz (THz), Terahertz communication, Wireless communications, Wireless sensor networks, X reality},
pubstate = {published},
tppubtype = {article}
}
Gujar, P.; Paliwal, G.; Panyam, S.
Generative AI and the Future of Interactive and Immersive Advertising Proceedings Article
In: D., Rivas-Lalaleo; S.L.S., Maita (Ed.): ETCM - Ecuador Tech. Chapters Meet., Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835039158-9 (ISBN).
Abstract | Links | BibTeX | Tags: Ad Creation, Adversarial machine learning, Advertising Technology (AdTech), Advertizing, Advertizing technology, Augmented Reality, Augmented Reality (AR), Generative adversarial networks, Generative AI, Immersive, Immersive Advertising, Immersive advertizing, Interactive Advertising, Interactive advertizing, machine learning, Machine-learning, Marketing, Mixed reality, Mixed Reality (MR), Personalization, Personalizations, User Engagement, Virtual environments, Virtual Reality, Virtual Reality (VR)
@inproceedings{gujar_generative_2024,
title = {Generative AI and the Future of Interactive and Immersive Advertising},
author = {P. Gujar and G. Paliwal and S. Panyam},
editor = {Rivas-Lalaleo D. and Maita S.L.S.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85211805262&doi=10.1109%2fETCM63562.2024.10746166&partnerID=40&md5=179c5ceeb28ed72e809748322535c7ad},
doi = {10.1109/ETCM63562.2024.10746166},
isbn = {979-835039158-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {ETCM - Ecuador Tech. Chapters Meet.},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Generative AI is revolutionizing interactive and immersive advertising by enabling more personalized, engaging experiences through advanced technologies like VR, AR, and MR. This transformation is reshaping how advertisers create, deliver, and optimize content, allowing for two-way communication and blurring lines between digital and physical worlds. AI enhances user engagement through predictive analytics, real-time adaptation, and natural language processing, while also optimizing ad placement and personalization. Future trends include integration with emerging technologies like 5G and IoT, fully immersive experiences, and hyper-personalization. However, challenges such as privacy concerns, transparency issues, and ethical considerations must be addressed. As AI continues to evolve, it promises to create unprecedented opportunities for brands to connect with audiences in meaningful ways, potentially blurring the line between advertising and interactive entertainment. The industry must proactively address these challenges to ensure AI-driven advertising enhances user experiences while respecting privacy and maintaining trust. © 2024 IEEE.},
keywords = {Ad Creation, Adversarial machine learning, Advertising Technology (AdTech), Advertizing, Advertizing technology, Augmented Reality, Augmented Reality (AR), Generative adversarial networks, Generative AI, Immersive, Immersive Advertising, Immersive advertizing, Interactive Advertising, Interactive advertizing, machine learning, Machine-learning, Marketing, Mixed reality, Mixed Reality (MR), Personalization, Personalizations, User Engagement, Virtual environments, Virtual Reality, Virtual Reality (VR)},
pubstate = {published},
tppubtype = {inproceedings}
}
Weerasinghe, K.; Janapati, S.; Ge, X.; Kim, S.; Iyer, S.; Stankovic, J. A.; Alemzadeh, H.
Real-Time Multimodal Cognitive Assistant for Emergency Medical Services Proceedings Article
In: Proc. - ACM/IEEE Conf. Internet-of-Things Des. Implement., IoTDI, pp. 85–96, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037025-6 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Cognitive Assistance, Computational Linguistics, Decision making, Domain knowledge, Edge computing, Emergency medical services, Forecasting, Graphic methods, Language Model, machine learning, Machine-learning, Multi-modal, Real- time, Service protocols, Smart Health, Speech recognition, State of the art
@inproceedings{weerasinghe_real-time_2024,
title = {Real-Time Multimodal Cognitive Assistant for Emergency Medical Services},
author = {K. Weerasinghe and S. Janapati and X. Ge and S. Kim and S. Iyer and J. A. Stankovic and H. Alemzadeh},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85197769304&doi=10.1109%2fIoTDI61053.2024.00012&partnerID=40&md5=a3b7cf14e46ecb2d4e49905fb845f2c9},
doi = {10.1109/IoTDI61053.2024.00012},
isbn = {979-835037025-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - ACM/IEEE Conf. Internet-of-Things Des. Implement., IoTDI},
pages = {85–96},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Emergency Medical Services (EMS) responders often operate under time-sensitive conditions, facing cognitive overload and inherent risks, requiring essential skills in critical thinking and rapid decision-making. This paper presents CognitiveEMS, an end-to-end wearable cognitive assistant system that can act as a collaborative virtual partner engaging in the real-time acquisition and analysis of multimodal data from an emergency scene and interacting with EMS responders through Augmented Reality (AR) smart glasses. CognitiveEMS processes the continuous streams of data in real-time and leverages edge computing to provide assistance in EMS protocol selection and intervention recognition. We address key technical challenges in real-time cognitive assistance by introducing three novel components: (i) a Speech Recognition model that is fine-tuned for real-world medical emergency conversations using simulated EMS audio recordings, augmented with synthetic data generated by large language models (LLMs); (ii) an EMS Protocol Prediction model that combines state-of-the-art (SOTA) tiny language models with EMS domain knowledge using graph-based attention mechanisms; (iii) an EMS Action Recognition module which leverages multimodal audio and video data and protocol predictions to infer the intervention/treatment actions taken by the responders at the incident scene. Our results show that for speech recognition we achieve superior performance compared to SOTA (WER of 0.290 vs. 0.618) on conversational data. Our protocol prediction component also significantly outperforms SOTA (top-3 accuracy of 0.800 vs. 0.200) and the action recognition achieves an accuracy of 0.727, while maintaining an end-to-end latency of 3.78s for protocol prediction on the edge and 0.31s on the server. © 2024 IEEE.},
keywords = {Artificial intelligence, Augmented Reality, Cognitive Assistance, Computational Linguistics, Decision making, Domain knowledge, Edge computing, Emergency medical services, Forecasting, Graphic methods, Language Model, machine learning, Machine-learning, Multi-modal, Real- time, Service protocols, Smart Health, Speech recognition, State of the art},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, M.; M'Hiri, F.
Beyond Traditional Teaching: Large Language Models as Simulated Teaching Assistants in Computer Science Proceedings Article
In: SIGCSE - Proc. ACM Tech. Symp. Comput. Sci. Educ., pp. 743–749, Association for Computing Machinery, Inc, 2024, ISBN: 979-840070423-9 (ISBN).
Abstract | Links | BibTeX | Tags: Adaptive teaching, ChatGPT, Computational Linguistics, CS education, E-Learning, Education computing, Engineering education, GPT, Language Model, LLM, machine learning, Machine-learning, Novice programmer, novice programmers, Openai, Programming, Python, Students, Teaching, Virtual Reality
@inproceedings{liu_beyond_2024,
title = {Beyond Traditional Teaching: Large Language Models as Simulated Teaching Assistants in Computer Science},
author = {M. Liu and F. M'Hiri},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85189289344&doi=10.1145%2f3626252.3630789&partnerID=40&md5=44ec79c8f005f4551c820c61f5b5d435},
doi = {10.1145/3626252.3630789},
isbn = {979-840070423-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {SIGCSE - Proc. ACM Tech. Symp. Comput. Sci. Educ.},
volume = {1},
pages = {743–749},
publisher = {Association for Computing Machinery, Inc},
abstract = {As the prominence of Large Language Models (LLMs) grows in various sectors, their potential in education warrants exploration. In this study, we investigate the feasibility of employing GPT-3.5 from OpenAI, as an LLM teaching assistant (TA) or a virtual TA in computer science (CS) courses. The objective is to enhance the accessibility of CS education while maintaining academic integrity by refraining from providing direct solutions to current-semester assignments. Targeting Foundations of Programming (COMP202), an undergraduate course that introduces students to programming with Python, we have developed a virtual TA using the LangChain framework, known for integrating language models with diverse data sources and environments. The virtual TA assists students with their code and clarifies complex concepts. For homework questions, it is designed to guide students with hints rather than giving out direct solutions. We assessed its performance first through a qualitative evaluation, then a survey-based comparative analysis, using a mix of questions commonly asked on the COMP202 discussion board and questions created by the authors. Our preliminary results indicate that the virtual TA outperforms human TAs on clarity and engagement, matching them on accuracy when the question is non-assignment-specific, for which human TAs still proved more reliable. These findings suggest that while virtual TAs, leveraging the capabilities of LLMs, hold great promise towards making CS education experience more accessible and engaging, their optimal use necessitates human supervision. We conclude by identifying several directions that could be explored in future implementations. © 2024 ACM.},
keywords = {Adaptive teaching, ChatGPT, Computational Linguistics, CS education, E-Learning, Education computing, Engineering education, GPT, Language Model, LLM, machine learning, Machine-learning, Novice programmer, novice programmers, Openai, Programming, Python, Students, Teaching, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Cronin, I.
Apress Media LLC, 2024, ISBN: 979-886880282-9 (ISBN); 979-886880281-2 (ISBN).
Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Autonomous system, Autonomous systems, Business applications, Computer vision, Decision making, Gaussian Splatting, Gaussians, Generative AI, Language processing, Learning algorithms, Learning systems, machine learning, Machine-learning, Natural Language Processing, Natural Language Processing (NLP), Natural language processing systems, Natural languages, Splatting
@book{cronin_understanding_2024,
title = {Understanding Generative AI Business Applications: A Guide to Technical Principles and Real-World Applications},
author = {I. Cronin},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001777571&doi=10.1007%2f979-8-8688-0282-9&partnerID=40&md5=c0714ff3e1ad755596426ea092b830d6},
doi = {10.1007/979-8-8688-0282-9},
isbn = {979-886880282-9 (ISBN); 979-886880281-2 (ISBN)},
year = {2024},
date = {2024-01-01},
publisher = {Apress Media LLC},
series = {Understanding Generative AI Business Applications: A Guide to Technical Principles and Real-World Applications},
abstract = {This guide covers the fundamental technical principles and various business applications of Generative AI for planning, developing, and evaluating AI-driven products. It equips you with the knowledge you need to harness the potential of Generative AI for enhancing business creativity and productivity. The book is organized into three sections: text-based, senses-based, and rationale-based. Each section provides an in-depth exploration of the specific methods and applications of Generative AI. In the text-based section, you will find detailed discussions on designing algorithms to automate and enhance written communication, including insights into the technical aspects of transformer-based Natural Language Processing (NLP) and chatbot architecture, such as GPT-4, Claude 2, Google Bard, and others. The senses-based section offers a glimpse into the algorithms and data structures that underpin visual, auditory, and multisensory experiences, including NeRF, 3D Gaussian Splatting, Stable Diffusion, AR and VR technologies, and more. The rationale-based section illuminates the decision-making capabilities of AI, with a focus on machine learning and data analytics techniques that empower applications such as simulation models, agents, and autonomous systems. In summary, this book serves as a guide for those seeking to navigate the dynamic landscape of Generative AI. Whether you’re a seasoned AI professional or a business leader looking to harness the power of creative automation, these pages offer a roadmap to leverage Generative AI for your organization’s success. © 2024 by Irena Cronin.},
keywords = {Artificial intelligence, Augmented Reality, Autonomous system, Autonomous systems, Business applications, Computer vision, Decision making, Gaussian Splatting, Gaussians, Generative AI, Language processing, Learning algorithms, Learning systems, machine learning, Machine-learning, Natural Language Processing, Natural Language Processing (NLP), Natural language processing systems, Natural languages, Splatting},
pubstate = {published},
tppubtype = {book}
}
Federico, G.; Carrara, F.; Amato, G.; Benedetto, M. Di
Spatio-Temporal 3D Reconstruction from Frame Sequences and Feature Points Proceedings Article
In: ACM Int. Conf. Proc. Ser., pp. 52–64, Association for Computing Machinery, 2024, ISBN: 979-840071794-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D reconstruction, Adversarial machine learning, Artificial intelligence, Color motion pictures, Color photography, Contrastive Learning, De-noising, Deep learning, Denoising Diffusion Probabilistic Model, Frame features, machine learning, Machine-learning, Probabilistic models, Signed Distance Field, Signed distance fields, Spatio-temporal, Video Reconstruction, Video streaming
@inproceedings{federico_spatio-temporal_2024,
title = {Spatio-Temporal 3D Reconstruction from Frame Sequences and Feature Points},
author = {G. Federico and F. Carrara and G. Amato and M. Di Benedetto},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85203128613&doi=10.1145%2f3672406.3672415&partnerID=40&md5=2a0dc51baa15f0dcd7f9d2cca708ec15},
doi = {10.1145/3672406.3672415},
isbn = {979-840071794-9 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {ACM Int. Conf. Proc. Ser.},
pages = {52–64},
publisher = {Association for Computing Machinery},
abstract = {Reconstructing a large real environment is a fundamental task to promote eXtended Reality adoption in industrial and entertainment fields. However, the short range of depth cameras, the sparsity of LiDAR sensors, and the huge computational cost of Structure-from-Motion pipelines prevent scene replication in near real time. To overcome these limitations, we introduce a spatio-temporal diffusion neural architecture, a generative AI technique that fuses temporal information (i.e., a short temporally-ordered list of color photographs, like sparse frames of a video stream) with an approximate spatial resemblance of the explored environment. Our aim is to modify an existing 3D diffusion neural model to produce a Signed Distance Field volume from which a 3D mesh representation can be extracted. Our results show that the hallucination approach of diffusion models is an effective methodology where a fast reconstruction is a crucial target. © 2024 Owner/Author.},
keywords = {3D reconstruction, Adversarial machine learning, Artificial intelligence, Color motion pictures, Color photography, Contrastive Learning, De-noising, Deep learning, Denoising Diffusion Probabilistic Model, Frame features, machine learning, Machine-learning, Probabilistic models, Signed Distance Field, Signed distance fields, Spatio-temporal, Video Reconstruction, Video streaming},
pubstate = {published},
tppubtype = {inproceedings}
}
Otoum, Y.; Gottimukkala, N.; Kumar, N.; Nayak, A.
Machine Learning in Metaverse Security: Current Solutions and Future Challenges Journal Article
In: ACM Computing Surveys, vol. 56, no. 8, 2024, ISSN: 03600300 (ISSN).
Abstract | Links | BibTeX | Tags: 'current, Block-chain, Blockchain, digital twin, E-Learning, Extended reality, Future challenges, Generative AI, machine learning, Machine-learning, Metaverse Security, Metaverses, Security and privacy, Spatio-temporal dynamics, Sustainable development
@article{otoum_machine_2024,
title = {Machine Learning in Metaverse Security: Current Solutions and Future Challenges},
author = {Y. Otoum and N. Gottimukkala and N. Kumar and A. Nayak},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85193466017&doi=10.1145%2f3654663&partnerID=40&md5=b35485c5f2e943ec105ea11a80712cbe},
doi = {10.1145/3654663},
issn = {03600300 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {ACM Computing Surveys},
volume = {56},
number = {8},
abstract = {The Metaverse, positioned as the next frontier of the Internet, has the ambition to forge a virtual shared realm characterized by immersion, hyper-spatiotemporal dynamics, and self-sustainability. Recent technological strides in AI, Extended Reality, 6G, and blockchain propel the Metaverse closer to realization, gradually transforming it from science fiction into an imminent reality. Nevertheless, the extensive deployment of the Metaverse faces substantial obstacles, primarily stemming from its potential to infringe on privacy and be susceptible to security breaches, whether inherent in its underlying technologies or arising from the evolving digital landscape. Metaverse security provisioning is poised to confront various foundational challenges owing to its distinctive attributes, encompassing immersive realism, hyper-spatiotemporally, sustainability, and heterogeneity. This article undertakes a comprehensive study of the security and privacy challenges facing the Metaverse, leveraging machine learning models for this purpose. In particular, our focus centers on an innovative distributed Metaverse architecture characterized by interactions across 3D worlds. Subsequently, we conduct a thorough review of the existing cutting-edge measures designed for Metaverse systems while also delving into the discourse surrounding security and privacy threats. As we contemplate the future of Metaverse systems, we outline directions for open research pursuits in this evolving landscape. © 2024 Copyright held by the owner/author(s). Publication rights licensed to ACM.},
keywords = {'current, Block-chain, Blockchain, digital twin, E-Learning, Extended reality, Future challenges, Generative AI, machine learning, Machine-learning, Metaverse Security, Metaverses, Security and privacy, Spatio-temporal dynamics, Sustainable development},
pubstate = {published},
tppubtype = {article}
}
Michael, Z.; Gemeinhardt, J.; Moritz, K.
Interactive WebXR Hypertext Storytelling for Cultural Heritage Proceedings Article
In: C., Atzenbeck; J., Rubart (Ed.): Proc. Workshop Hum. Factors Hypertext, Hum. - Assoc. ACM Conf. Hypertext Soc. Media ,HT, Association for Computing Machinery, Inc, 2024, ISBN: 979-840071120-6 (ISBN).
Abstract | Links | BibTeX | Tags: 2D textures, 3D modeling, 3D models, 3d-modeling, Cultural heritage, Cultural heritages, Extended reality (XR), Generative AI, History, HTTP, Hypertext, Hypertext systems, Immersive, Machine-learning, Open source software, Open systems, Scene structure, Three dimensional computer graphics, Virtual environments, Virtual Reality, Web browsers
@inproceedings{michael_interactive_2024,
title = {Interactive WebXR Hypertext Storytelling for Cultural Heritage},
author = {Z. Michael and J. Gemeinhardt and K. Moritz},
editor = {Atzenbeck C. and Rubart J.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85211623904&doi=10.1145%2f3679058.3688635&partnerID=40&md5=60aad5a9a95e52c3fff51ebb6f670bd6},
doi = {10.1145/3679058.3688635},
isbn = {979-840071120-6 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. Workshop Hum. Factors Hypertext, Hum. - Assoc. ACM Conf. Hypertext Soc. Media ,HT},
publisher = {Association for Computing Machinery, Inc},
abstract = {We are presenting our approach for interactive cultural heritage storytelling in WebXR. Therefore, we are describing our scenes’ structure consisting of (stylized) photospheres of the historic locations, 3D models of 3D-scanned historic artifacts and animated 2D textures of historic characters generated with a machine learning toolset. The result is a platform-independent web-application in an immersive interactive WebXR environment running in browsers on PCs, tablets, phones and XR headsets thanks to the underlying software based on the open-source framework A-Frame. Our paper describes the process, the results and the limitations in detail. The resulting application, designed for the Fichtelgebirge region in Upper Franconia, Germany, offers users an immersive digital time travel experience in the virtual space and within a museum setting connecting real artifacts and virtual stories. © 2024 Copyright held by the owner/author(s).},
keywords = {2D textures, 3D modeling, 3D models, 3d-modeling, Cultural heritage, Cultural heritages, Extended reality (XR), Generative AI, History, HTTP, Hypertext, Hypertext systems, Immersive, Machine-learning, Open source software, Open systems, Scene structure, Three dimensional computer graphics, Virtual environments, Virtual Reality, Web browsers},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Yeo, J. Q.; Wang, Y.; Tanary, S.; Cheng, J.; Lau, M.; Ng, A. B.; Guan, F.
AICRID: AI-Empowered CR For Interior Design Proceedings Article
In: G., Bruder; A.H., Olivier; A., Cunningham; E.Y., Peng; J., Grubert; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 837–841, Institute of Electrical and Electronics Engineers Inc., 2023, ISBN: 979-835032891-2 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D models, 3d-modeling, Architectural design, Artificial intelligence, Artificial intelligence technologies, Augmented Reality, Augmented reality technology, Interior Design, Interior designs, machine learning, Machine-learning, Model generation, Novel design, Text images, User need, Visualization
@inproceedings{yeo_aicrid_2023,
title = {AICRID: AI-Empowered CR For Interior Design},
author = {J. Q. Yeo and Y. Wang and S. Tanary and J. Cheng and M. Lau and A. B. Ng and F. Guan},
editor = {Bruder G. and Olivier A.H. and Cunningham A. and Peng E.Y. and Grubert J. and Williams I.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85180375829&doi=10.1109%2fISMAR-Adjunct60411.2023.00184&partnerID=40&md5=b14d89dbd38a4dfe3f85b90800d42e78},
doi = {10.1109/ISMAR-Adjunct60411.2023.00184},
isbn = {979-835032891-2 (ISBN)},
year = {2023},
date = {2023-01-01},
booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},
pages = {837–841},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Augmented Reality (AR) technologies have been utilized for interior design for years. Normally 3D furniture models need to be created manually or by scanning with specialized devices and this is usually a costly process. Additionally, users need controllers or hands for manipulating the virtual furniture which may lead to fatigue for long-time usage. Artificial Intelligence (AI) technologies have made it possible to generate 3D models from texts, images or both and show potential to automate interactions through the user's voice. We propose a novel design, AICRID in short, which aims to automate the 3D model generation and to facilitate the interactions for interior design AR by leveraging on AI technologies. Specifically, our design will allow the users to directly generate 3D furniture models with generative AI, enabling them to directly interact with the virtual objects through their voices. © 2023 IEEE.},
keywords = {3D modeling, 3D models, 3d-modeling, Architectural design, Artificial intelligence, Artificial intelligence technologies, Augmented Reality, Augmented reality technology, Interior Design, Interior designs, machine learning, Machine-learning, Model generation, Novel design, Text images, User need, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}