AHCI RESEARCH GROUP

Publications

Papers published in international journals,
proceedings of conferences, workshops and books.

OUR RESEARCH

Scientific Publications

How to

Here you can find the complete list of our publications.
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.

Show all

2025

Raj, P.

Generative AI for the enterprise metaverse system engineering Book Section

In: Engineering the Metaverse: Enabling technologies, platforms and use cases, pp. 97–118, Institution of Engineering and Technology, 2025, ISBN: 978-183953881-0 (ISBN); 978-183953880-3 (ISBN).

Abstract | Links | BibTeX | Tags: Computer vision, Cybersecurity, Digital avatars, Immersive and interactive experience, Internet of things (IoT)

@incollection{raj_generative_2025,

title = {Generative AI for the enterprise metaverse system engineering},

author = {P. Raj},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-86000473202&doi=10.1049%2fPBPC070E_ch6&partnerID=40&md5=274a94afa9c33ac10902194859e85e81},

doi = {10.1049/PBPC070E_ch6},

isbn = {978-183953881-0 (ISBN); 978-183953880-3 (ISBN)},

year  = {2025},

date = {2025-01-01},

booktitle = {Engineering the Metaverse: Enabling technologies, platforms and use cases},

pages = {97–118},

publisher = {Institution of Engineering and Technology},

abstract = {With digitization and digitalization technologies flourishing, the digital era is dawning upon us influentially. Every tangible entity becomes digitized to participate in mainstream computing fluidly. When digital entities interact with one another purposefully, a massive amount of multi-structured data gets produced, collected, cleansed, and stocked for posterior data analytics to extract hidden insights, patterns, and other knowledge bases. Artificial intelligence (AI), the most popular digital transformation technology on the planet Earth, is succulently capable of making sense out of accumulating digital datasets. Knowledge discovery is disseminated to the concerned systems to make them aware of the power of knowledge-enabled systems. Notably, there are several illuminating and insightful improvisations (for example, multimodal generative AI) in the AI space. Blockchain, Web 3.0, the IoT, edge computing, cloud-native computing, immersive technologies such as virtual, augmented, mixed, and extended realities (VR, AR, MR and XR), multimodal LLMs for crafting 3D models for a variety of things including buildings, landscapes, etc., digital twins for complicated physical systems, processes and spaces, 5G communication, non-fungible tokens (NFTs), simulation tools, etc. are blending to establish 3D virtual environments to captivate users. Besides the consumer metaverse, enterprise metaverse systems emerge and evolve fast to bring delectable and decisive automation, acceleration, and augmentation to businesses. In this chapter, we have dug deep and dealt with the enterprise metaverse at length to educate and empower our esteemed readers. © The Institution of Engineering and Technology and its licensors 2024.},

keywords = {Computer vision, Cybersecurity, Digital avatars, Immersive and interactive experience, Internet of things (IoT)},

pubstate = {published},

tppubtype = {incollection}

}

Oskooei, A. Rafiei; Aktaş, M. S.; Keleş, M.

Seeing the Sound: Multilingual Lip Sync for Real-Time Face-to-Face Translation † Journal Article

In: Computers, vol. 14, no. 1, 2025, ISSN: 2073431X (ISSN).

Abstract | Links | BibTeX | Tags: Computer vision, Deep learning, face-to-face translation, Generative AI, human–computer interaction, lip synchronization, talking head generation

@article{rafiei_oskooei_seeing_2025,

title = {Seeing the Sound: Multilingual Lip Sync for Real-Time Face-to-Face Translation †},

author = {A. Rafiei Oskooei and M. S. Aktaş and M. Keleş},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85215974883&doi=10.3390%2fcomputers14010007&partnerID=40&md5=f4d244e3e1cba572d2a3beb9c0895d32},

doi = {10.3390/computers14010007},

issn = {2073431X (ISSN)},

year  = {2025},

date = {2025-01-01},

journal = {Computers},

volume = {14},

number = {1},

abstract = {Imagine a future where language is no longer a barrier to real-time conversations, enabling instant and lifelike communication across the globe. As cultural boundaries blur, the demand for seamless multilingual communication has become a critical technological challenge. This paper addresses the lack of robust solutions for real-time face-to-face translation, particularly for low-resource languages, by introducing a comprehensive framework that not only translates language but also replicates voice nuances and synchronized facial expressions. Our research tackles the primary challenge of achieving accurate lip synchronization across culturally diverse languages, filling a significant gap in the literature by evaluating the generalizability of lip sync models beyond English. Specifically, we develop a novel evaluation framework combining quantitative lip sync error metrics and qualitative assessments by human observers. This framework is applied to assess two state-of-the-art lip sync models with different architectures for Turkish, Persian, and Arabic languages, using a newly collected dataset. Based on these findings, we propose and implement a modular system that integrates language-agnostic lip sync models with neural networks to deliver a fully functional face-to-face translation experience. Inference Time Analysis shows this system achieves highly realistic, face-translated talking heads in real time, with a throughput as low as 0.381 s. This transformative framework is primed for deployment in immersive environments such as VR/AR, Metaverse ecosystems, and advanced video conferencing platforms. It offers substantial benefits to developers and businesses aiming to build next-generation multilingual communication systems for diverse applications. While this work focuses on three languages, its modular design allows scalability to additional languages. However, further testing in broader linguistic and cultural contexts is required to confirm its universal applicability, paving the way for a more interconnected and inclusive world where language ceases to hinder human connection. © 2024 by the authors.},

keywords = {Computer vision, Deep learning, face-to-face translation, Generative AI, human–computer interaction, lip synchronization, talking head generation},

pubstate = {published},

tppubtype = {article}

}

Imagine a future where language is no longer a barrier to real-time conversations, enabling instant and lifelike communication across the globe. As cultural boundaries blur, the demand for seamless multilingual communication has become a critical technological challenge. This paper addresses the lack of robust solutions for real-time face-to-face translation, particularly for low-resource languages, by introducing a comprehensive framework that not only translates language but also replicates voice nuances and synchronized facial expressions. Our research tackles the primary challenge of achieving accurate lip synchronization across culturally diverse languages, filling a significant gap in the literature by evaluating the generalizability of lip sync models beyond English. Specifically, we develop a novel evaluation framework combining quantitative lip sync error metrics and qualitative assessments by human observers. This framework is applied to assess two state-of-the-art lip sync models with different architectures for Turkish, Persian, and Arabic languages, using a newly collected dataset. Based on these findings, we propose and implement a modular system that integrates language-agnostic lip sync models with neural networks to deliver a fully functional face-to-face translation experience. Inference Time Analysis shows this system achieves highly realistic, face-translated talking heads in real time, with a throughput as low as 0.381 s. This transformative framework is primed for deployment in immersive environments such as VR/AR, Metaverse ecosystems, and advanced video conferencing platforms. It offers substantial benefits to developers and businesses aiming to build next-generation multilingual communication systems for diverse applications. While this work focuses on three languages, its modular design allows scalability to additional languages. However, further testing in broader linguistic and cultural contexts is required to confirm its universal applicability, paving the way for a more interconnected and inclusive world where language ceases to hinder human connection. © 2024 by the authors.

Vachha, C.; Kang, Y.; Dive, Z.; Chidambaram, A.; Gupta, A.; Jun, E.; Hartmann, B.

Dreamcrafter: Immersive Editing of 3D Radiance Fields Through Flexible, Generative Inputs and Outputs Proceedings Article

In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071394-1 (ISBN).

Abstract | Links | BibTeX | Tags: 3D modeling, 3D scenes, AI assisted creativity tool, Animation, Computer vision, Direct manipulation, Drawing (graphics), Gaussian Splatting, Gaussians, Generative AI, Graphic, Graphics, High level languages, Immersive, Interactive computer graphics, Splatting, Three dimensional computer graphics, Virtual Reality, Worldbuilding interface

@inproceedings{vachha_dreamcrafter_2025,

title = {Dreamcrafter: Immersive Editing of 3D Radiance Fields Through Flexible, Generative Inputs and Outputs},

author = {C. Vachha and Y. Kang and Z. Dive and A. Chidambaram and A. Gupta and E. Jun and B. Hartmann},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005725679&doi=10.1145%2f3706598.3714312&partnerID=40&md5=68cf2a08d3057fd9756e25d53959872b},

doi = {10.1145/3706598.3714312},

isbn = {979-840071394-1 (ISBN)},

year  = {2025},

date = {2025-01-01},

booktitle = {Conf Hum Fact Comput Syst Proc},

publisher = {Association for Computing Machinery},

abstract = {Authoring 3D scenes is a central task for spatial computing applications. Competing visions for lowering existing barriers are (1) focus on immersive, direct manipulation of 3D content or (2) leverage AI techniques that capture real scenes (3D Radiance Fields such as, NeRFs, 3D Gaussian Splatting) and modify them at a higher level of abstraction, at the cost of high latency. We unify the complementary strengths of these approaches and investigate how to integrate generative AI advances into real-time, immersive 3D Radiance Field editing. We introduce Dreamcrafter, a VR-based 3D scene editing system that: (1) provides a modular architecture to integrate generative AI algorithms; (2) combines different levels of control for creating objects, including natural language and direct manipulation; and (3) introduces proxy representations that support interaction during high-latency operations. We contribute empirical findings on control preferences and discuss how generative AI interfaces beyond text input enhance creativity in scene editing and world building. © 2025 Copyright held by the owner/author(s).},

keywords = {3D modeling, 3D scenes, AI assisted creativity tool, Animation, Computer vision, Direct manipulation, Drawing (graphics), Gaussian Splatting, Gaussians, Generative AI, Graphic, Graphics, High level languages, Immersive, Interactive computer graphics, Splatting, Three dimensional computer graphics, Virtual Reality, Worldbuilding interface},

pubstate = {published},

tppubtype = {inproceedings}

}

Azzarelli, A.; Anantrasirichai, N.; Bull, D. R.

Intelligent Cinematography: a review of AI research for cinematographic production Journal Article

In: Artificial Intelligence Review, vol. 58, no. 4, 2025, ISSN: 02692821 (ISSN).

Abstract | Links | BibTeX | Tags: Artificial intelligence research, Computer vision, Content acquisition, Creative industries, Holistic view, machine learning, Machine-learning, Mergers and acquisitions, Review papers, Three dimensional computer graphics, Video applications, Video processing, Video processing and applications, Virtual production, Virtual Reality, Vision research

@article{azzarelli_intelligent_2025,

title = {Intelligent Cinematography: a review of AI research for cinematographic production},

author = {A. Azzarelli and N. Anantrasirichai and D. R. Bull},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85217373428&doi=10.1007%2fs10462-024-11089-3&partnerID=40&md5=360923b5ba8f63b6edfa1b7fd135c926},

doi = {10.1007/s10462-024-11089-3},

issn = {02692821 (ISSN)},

year  = {2025},

date = {2025-01-01},

journal = {Artificial Intelligence Review},

volume = {58},

number = {4},

abstract = {This paper offers the first comprehensive review of artificial intelligence (AI) research in the context of real camera content acquisition for entertainment purposes and is aimed at both researchers and cinematographers. Addressing the lack of review papers in the field of intelligent cinematography (IC) and the breadth of related computer vision research, we present a holistic view of the IC landscape while providing technical insight, important for experts across disciplines. We provide technical background on generative AI, object detection, automated camera calibration and 3-D content acquisition, with references to assist non-technical readers. The application sections categorize work in terms of four production types: General Production, Virtual Production, Live Production and Aerial Production. Within each application section, we (1) sub-classify work according to research topic and (2) describe the trends and challenges relevant to each type of production. In the final chapter, we address the greater scope of IC research and summarize the significant potential of this area to influence the creative industries sector. We suggest that work relating to virtual production has the greatest potential to impact other mediums of production, driven by the growing interest in LED volumes/stages for in-camera virtual effects (ICVFX) and automated 3-D capture for virtual modeling of real world scenes and actors. We also address ethical and legal concerns regarding the use of creative AI that impact on artists, actors, technologists and the general public. © The Author(s) 2025.},

keywords = {Artificial intelligence research, Computer vision, Content acquisition, Creative industries, Holistic view, machine learning, Machine-learning, Mergers and acquisitions, Review papers, Three dimensional computer graphics, Video applications, Video processing, Video processing and applications, Virtual production, Virtual Reality, Vision research},

pubstate = {published},

tppubtype = {article}

}

Suzuki, R.; Gonzalez-Franco, M.; Sra, M.; Lindlbauer, D.

Everyday AR through AI-in-the-Loop Proceedings Article

In: Conf Hum Fact Comput Syst Proc, Association for Computing Machinery, 2025, ISBN: 979-840071395-8 (ISBN).

Abstract | Links | BibTeX | Tags: Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization

@inproceedings{suzuki_everyday_2025,

title = {Everyday AR through AI-in-the-Loop},

author = {R. Suzuki and M. Gonzalez-Franco and M. Sra and D. Lindlbauer},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005752990&doi=10.1145%2f3706599.3706741&partnerID=40&md5=56b5e447819dde7aa4a29f8e3899e535},

doi = {10.1145/3706599.3706741},

isbn = {979-840071395-8 (ISBN)},

year  = {2025},

date = {2025-01-01},

booktitle = {Conf Hum Fact Comput Syst Proc},

publisher = {Association for Computing Machinery},

abstract = {This workshop brings together experts and practitioners from augmented reality (AR) and artificial intelligence (AI) to shape the future of AI-in-the-loop everyday AR experiences. With recent advancements in both AR hardware and AI capabilities, we envision that everyday AR—always-available and seamlessly integrated into users’ daily environments—is becoming increasingly feasible. This workshop will explore how AI can drive such everyday AR experiences. We discuss a range of topics, including adaptive and context-aware AR, generative AR content creation, always-on AI assistants, AI-driven accessible design, and real-world-oriented AI agents. Our goal is to identify the opportunities and challenges in AI-enabled AR, focusing on creating novel AR experiences that seamlessly blend the digital and physical worlds. Through the workshop, we aim to foster collaboration, inspire future research, and build a community to advance the research field of AI-enhanced AR. © 2025 Copyright held by the owner/author(s).},

keywords = {Augmented Reality, Augmented reality content, Augmented reality hardware, Computer vision, Content creation, Context-Aware, Generative AI, generative artificial intelligence, Human-AI Interaction, Human-artificial intelligence interaction, Language Model, Large language model, large language models, machine learning, Machine-learning, Mixed reality, Virtual Reality, Virtualization},

pubstate = {published},

tppubtype = {inproceedings}

}

Stacchio, L.; Balloni, E.; Frontoni, E.; Paolanti, M.; Zingaretti, P.; Pierdicca, R.

MineVRA: Exploring the Role of Generative AI-Driven Content Development in XR Environments through a Context-Aware Approach Journal Article

In: IEEE Transactions on Visualization and Computer Graphics, vol. 31, no. 5, pp. 3602–3612, 2025, ISSN: 10772626 (ISSN).

Abstract | Links | BibTeX | Tags: adult, Article, Artificial intelligence, Computer graphics, Computer vision, Content Development, Contents development, Context-Aware, Context-aware approaches, Extended reality, female, Generative adversarial networks, Generative AI, generative artificial intelligence, human, Human-in-the-loop, Immersive, Immersive environment, male, Multi-modal, User need, Virtual environments, Virtual Reality

@article{stacchio_minevra_2025,

title = {MineVRA: Exploring the Role of Generative AI-Driven Content Development in XR Environments through a Context-Aware Approach},

author = {L. Stacchio and E. Balloni and E. Frontoni and M. Paolanti and P. Zingaretti and R. Pierdicca},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105003746367&doi=10.1109%2fTVCG.2025.3549160&partnerID=40&md5=70b162b574eebbb0cb71db871aa787e1},

doi = {10.1109/TVCG.2025.3549160},

issn = {10772626 (ISSN)},

year  = {2025},

date = {2025-01-01},

journal = {IEEE Transactions on Visualization and Computer Graphics},

volume = {31},

number = {5},

pages = {3602–3612},

abstract = {The convergence of Artificial Intelligence (AI), Computer Vision (CV), Computer Graphics (CG), and Extended Reality (XR) is driving innovation in immersive environments. A key challenge in these environments is the creation of personalized 3D assets, traditionally achieved through manual modeling, a time-consuming process that often fails to meet individual user needs. More recently, Generative AI (GenAI) has emerged as a promising solution for automated, context-aware content generation. In this paper, we present MineVRA (Multimodal generative artificial iNtelligence for contExt-aware Virtual Reality Assets), a novel Human-In-The-Loop (HITL) XR framework that integrates GenAI to facilitate coherent and adaptive 3D content generation in immersive scenarios. To evaluate the effectiveness of this approach, we conducted a comparative user study analyzing the performance and user satisfaction of GenAI-generated 3D objects compared to those generated by Sketchfab in different immersive contexts. The results suggest that GenAI can significantly complement traditional 3D asset libraries, with valuable design implications for the development of human-centered XR environments. © 1995-2012 IEEE.},

keywords = {adult, Article, Artificial intelligence, Computer graphics, Computer vision, Content Development, Contents development, Context-Aware, Context-aware approaches, Extended reality, female, Generative adversarial networks, Generative AI, generative artificial intelligence, human, Human-in-the-loop, Immersive, Immersive environment, male, Multi-modal, User need, Virtual environments, Virtual Reality},

pubstate = {published},

tppubtype = {article}

}

Banafa, A.

Artificial intelligence in action: Real-world applications and innovations Book

River Publishers, 2025, ISBN: 978-877004619-0 (ISBN); 978-877004620-6 (ISBN).

Abstract | Links | BibTeX | Tags: 5G, Affective Computing, AGI, AI, AI alignments, AI Ethics, AI hallucinations, AI hype, AI models, Alexa, ANI, ASI, Augmented Reality, Autoencoders, Autonomic computing, Autonomous Cars, Autoregressive models, Big Data, Big Data Analytics, Bitcoin, Blockchain, C3PO, Casual AI, Causal reasoning, ChatGPT, Cloud computing, Collective AI, Compression engines, Computer vision, Conditional Automation, Convolutional neural networks (CNNs), Cryptocurrency, Cybersecurity, Deceptive AI, Deep learning, Digital transformation, Driver Assistance, Driverless Cars, Drones, Elon Musk, Entanglement, Environment and sustainability, Ethereum, Explainable AI, Facebook, Facial Recognition, Feedforward. Neural Networks, Fog Computing, Full Automation, Future of AI, General AI, Generative Adversarial Networks (GANs), Generative AI, Google, Green AI, High Automation, Hybrid Blockchain, IEEE, Industrial Internet of Things (IIoT), Internet of things (IoT), Jarvis, Java, JavaScript, Long Short-Term Memory Networks, LTE, machine learning, Microsoft, MultiModal AI, Narrow AI, Natural disasters, Natural Language Generation (NLG), Natural Language Processing (NLP), NetFlix, Network Security, Neural Networks, Nuclear, Nuclear AI, NYTimes, Objective-driven AI, Open Source, Partial Automation, PayPal, Perfect AI, Private Blockchain, Private Cloud Computing, Programming languages, Python, Quantum Communications, Quantum Computing, Quantum Cryptography, Quantum internet, Quantum Machine Learning (QML), R2D2, Reactive machines. limited memory, Recurrent Neural Networks, Responsible AI, Robots, Sci-Fi movies, Self-Aware, Semiconductorâ??s, Sensate AI, Siri, Small Data, Smart Contracts. Hybrid Cloud Computing, Smart Devices, Sovereign AI, Super AI, Superposition, TensorFlow, Theory of Mind, Thick Data, Twitter, Variational Autoencoders (VAEs), Virtual Reality, Voice user interface (VUI), Wearable computing devices (WCD), Wearable Technology, Wi-Fi, XAI, Zero-Trust Model

@book{banafa_artificial_2025,

title = {Artificial intelligence in action: Real-world applications and innovations},

author = {A. Banafa},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105000403587&partnerID=40&md5=4b0d94be48194a942b22bef63f36d3bf},

isbn = {978-877004619-0 (ISBN); 978-877004620-6 (ISBN)},

year  = {2025},

date = {2025-01-01},

publisher = {River Publishers},

series = {Artificial Intelligence in Action: Real-World Applications and Innovations},

abstract = {This comprehensive book dives deep into the current landscape of AI, exploring its fundamental principles, development challenges, potential risks, and the cutting-edge breakthroughs that are propelling it forward. Artificial intelligence (AI) is rapidly transforming industries and societies worldwide through groundbreaking innovations and real-world applications. Starting with the core concepts, the book examines the various types of AI systems, generative AI models, and the complexities of machine learning. It delves into the programming languages driving AI development, data pipelines, model creation and deployment processes, while shedding light on issues like AI hallucinations and the intricate path of machine unlearning. The book then showcases the remarkable real-world applications of AI across diverse domains. From preventing job displacement and promoting environmental sustainability, to enhancing disaster response, drone technology, and even nuclear energy innovation, it highlights how AI is tackling complex challenges and driving positive change. The book also explores the double-edged nature of AI, recognizing its tremendous potential while cautioning about the risks of misuse, unintended consequences, and the urgent need for responsible development practices. It examines the intersection of AI and fields like operating system design, warfare, and semiconductor technology, underscoring the wide-ranging implications of this transformative force. As the quest for artificial general intelligence (AGI) and superintelligent AI systems intensifies, the book delves into cutting-edge research, emerging trends, and the pursuit of multimodal, explainable, and causally aware AI systems. It explores the symbiotic relationship between AI and human creativity, the rise of user-friendly "casual AI," and the potential of AI to tackle open-ended tasks. This is an essential guide for understanding the profound impact of AI on our world today and its potential to shape our future. From the frontiers of innovation to the challenges of responsible development, this book offers a comprehensive and insightful exploration of the remarkable real-world applications and innovations driving the AI revolution. © 2025 River Publishers. All rights reserved.},

keywords = {5G, Affective Computing, AGI, AI, AI alignments, AI Ethics, AI hallucinations, AI hype, AI models, Alexa, ANI, ASI, Augmented Reality, Autoencoders, Autonomic computing, Autonomous Cars, Autoregressive models, Big Data, Big Data Analytics, Bitcoin, Blockchain, C3PO, Casual AI, Causal reasoning, ChatGPT, Cloud computing, Collective AI, Compression engines, Computer vision, Conditional Automation, Convolutional neural networks (CNNs), Cryptocurrency, Cybersecurity, Deceptive AI, Deep learning, Digital transformation, Driver Assistance, Driverless Cars, Drones, Elon Musk, Entanglement, Environment and sustainability, Ethereum, Explainable AI, Facebook, Facial Recognition, Feedforward. Neural Networks, Fog Computing, Full Automation, Future of AI, General AI, Generative Adversarial Networks (GANs), Generative AI, Google, Green AI, High Automation, Hybrid Blockchain, IEEE, Industrial Internet of Things (IIoT), Internet of things (IoT), Jarvis, Java, JavaScript, Long Short-Term Memory Networks, LTE, machine learning, Microsoft, MultiModal AI, Narrow AI, Natural disasters, Natural Language Generation (NLG), Natural Language Processing (NLP), NetFlix, Network Security, Neural Networks, Nuclear, Nuclear AI, NYTimes, Objective-driven AI, Open Source, Partial Automation, PayPal, Perfect AI, Private Blockchain, Private Cloud Computing, Programming languages, Python, Quantum Communications, Quantum Computing, Quantum Cryptography, Quantum internet, Quantum Machine Learning (QML), R2D2, Reactive machines. limited memory, Recurrent Neural Networks, Responsible AI, Robots, Sci-Fi movies, Self-Aware, Semiconductorâ??s, Sensate AI, Siri, Small Data, Smart Contracts. Hybrid Cloud Computing, Smart Devices, Sovereign AI, Super AI, Superposition, TensorFlow, Theory of Mind, Thick Data, Twitter, Variational Autoencoders (VAEs), Virtual Reality, Voice user interface (VUI), Wearable computing devices (WCD), Wearable Technology, Wi-Fi, XAI, Zero-Trust Model},

pubstate = {published},

tppubtype = {book}

}

This comprehensive book dives deep into the current landscape of AI, exploring its fundamental principles, development challenges, potential risks, and the cutting-edge breakthroughs that are propelling it forward. Artificial intelligence (AI) is rapidly transforming industries and societies worldwide through groundbreaking innovations and real-world applications. Starting with the core concepts, the book examines the various types of AI systems, generative AI models, and the complexities of machine learning. It delves into the programming languages driving AI development, data pipelines, model creation and deployment processes, while shedding light on issues like AI hallucinations and the intricate path of machine unlearning. The book then showcases the remarkable real-world applications of AI across diverse domains. From preventing job displacement and promoting environmental sustainability, to enhancing disaster response, drone technology, and even nuclear energy innovation, it highlights how AI is tackling complex challenges and driving positive change. The book also explores the double-edged nature of AI, recognizing its tremendous potential while cautioning about the risks of misuse, unintended consequences, and the urgent need for responsible development practices. It examines the intersection of AI and fields like operating system design, warfare, and semiconductor technology, underscoring the wide-ranging implications of this transformative force. As the quest for artificial general intelligence (AGI) and superintelligent AI systems intensifies, the book delves into cutting-edge research, emerging trends, and the pursuit of multimodal, explainable, and causally aware AI systems. It explores the symbiotic relationship between AI and human creativity, the rise of user-friendly "casual AI," and the potential of AI to tackle open-ended tasks. This is an essential guide for understanding the profound impact of AI on our world today and its potential to shape our future. From the frontiers of innovation to the challenges of responsible development, this book offers a comprehensive and insightful exploration of the remarkable real-world applications and innovations driving the AI revolution. © 2025 River Publishers. All rights reserved.

2024

Jeong, E.; Kim, H.; Park, S.; Yoon, S.; Ahn, J.; Woo, W.

Function-Adaptive Affordance Extraction from 3D Objects Using LLM for Interaction Authoring with Augmented Artifacts Proceedings Article

In: U., Eck; M., Sra; J., Stefanucci; M., Sugimoto; M., Tatzgern; I., Williams (Ed.): Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct, pp. 205–208, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-833150691-9 (ISBN).

Abstract | Links | BibTeX | Tags: 3D modeling, Applied computing, Art and humanity, Artificial intelligence, Arts and humanities, Augmented Reality, Computer interaction, Computer vision, Computing methodologies, computing methodology, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Humanities computing, Interaction paradigm, Interaction paradigms, Language processing, Mixed / augmented reality, Mixed reality, Modeling languages, Natural Language Processing, Natural language processing systems, Natural languages, Three dimensional computer graphics

@inproceedings{jeong_function-adaptive_2024,

title = {Function-Adaptive Affordance Extraction from 3D Objects Using LLM for Interaction Authoring with Augmented Artifacts},

author = {E. Jeong and H. Kim and S. Park and S. Yoon and J. Ahn and W. Woo},

editor = {Eck U. and Sra M. and Stefanucci J. and Sugimoto M. and Tatzgern M. and Williams I.},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214379963&doi=10.1109%2fISMAR-Adjunct64951.2024.00050&partnerID=40&md5=7222e0599a7e2aa0adaea38e4b9e13cc},

doi = {10.1109/ISMAR-Adjunct64951.2024.00050},

isbn = {979-833150691-9 (ISBN)},

year  = {2024},

date = {2024-01-01},

booktitle = {Proc. - IEEE Int. Symp. Mixed Augment. Real. Adjunct, ISMAR-Adjunct},

pages = {205–208},

publisher = {Institute of Electrical and Electronics Engineers Inc.},

abstract = {We propose an algorithm that extracts the most suitable affordances, interaction targets, and corresponding coordinates adaptively from 3D models of various artifacts based on their functional context for efficient authoring of XR content with artifacts. Traditionally, authoring AR scenes to convey artifact context required one-to-one manual work. Our approach leverages a Large Language Model (LLM) to extract interaction types, positions, and subjects based on the artifact's name and usage context. This enables templated XR experience creation, replacing repetitive manual labor. Consequently, our system streamlines the XR authoring process, making it more efficient and scalable. © 2024 IEEE.},

keywords = {3D modeling, Applied computing, Art and humanity, Artificial intelligence, Arts and humanities, Augmented Reality, Computer interaction, Computer vision, Computing methodologies, computing methodology, Human computer interaction, Human computer interaction (HCI), Human-centered computing, Humanities computing, Interaction paradigm, Interaction paradigms, Language processing, Mixed / augmented reality, Mixed reality, Modeling languages, Natural Language Processing, Natural language processing systems, Natural languages, Three dimensional computer graphics},

pubstate = {published},

tppubtype = {inproceedings}

}

Cronin, I.

Understanding Generative AI Business Applications: A Guide to Technical Principles and Real-World Applications Book

Apress Media LLC, 2024, ISBN: 979-886880282-9 (ISBN); 979-886880281-2 (ISBN).

Abstract | Links | BibTeX | Tags: Artificial intelligence, Augmented Reality, Autonomous system, Autonomous systems, Business applications, Computer vision, Decision making, Gaussian Splatting, Gaussians, Generative AI, Language processing, Learning algorithms, Learning systems, machine learning, Machine-learning, Natural Language Processing, Natural Language Processing (NLP), Natural language processing systems, Natural languages, Splatting

@book{cronin_understanding_2024,

title = {Understanding Generative AI Business Applications: A Guide to Technical Principles and Real-World Applications},

author = {I. Cronin},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001777571&doi=10.1007%2f979-8-8688-0282-9&partnerID=40&md5=c0714ff3e1ad755596426ea092b830d6},

doi = {10.1007/979-8-8688-0282-9},

isbn = {979-886880282-9 (ISBN); 979-886880281-2 (ISBN)},

year  = {2024},

date = {2024-01-01},

publisher = {Apress Media LLC},

series = {Understanding Generative AI Business Applications: A Guide to Technical Principles and Real-World Applications},

abstract = {This guide covers the fundamental technical principles and various business applications of Generative AI for planning, developing, and evaluating AI-driven products. It equips you with the knowledge you need to harness the potential of Generative AI for enhancing business creativity and productivity. The book is organized into three sections: text-based, senses-based, and rationale-based. Each section provides an in-depth exploration of the specific methods and applications of Generative AI. In the text-based section, you will find detailed discussions on designing algorithms to automate and enhance written communication, including insights into the technical aspects of transformer-based Natural Language Processing (NLP) and chatbot architecture, such as GPT-4, Claude 2, Google Bard, and others. The senses-based section offers a glimpse into the algorithms and data structures that underpin visual, auditory, and multisensory experiences, including NeRF, 3D Gaussian Splatting, Stable Diffusion, AR and VR technologies, and more. The rationale-based section illuminates the decision-making capabilities of AI, with a focus on machine learning and data analytics techniques that empower applications such as simulation models, agents, and autonomous systems. In summary, this book serves as a guide for those seeking to navigate the dynamic landscape of Generative AI. Whether you’re a seasoned AI professional or a business leader looking to harness the power of creative automation, these pages offer a roadmap to leverage Generative AI for your organization’s success. © 2024 by Irena Cronin.},

keywords = {Artificial intelligence, Augmented Reality, Autonomous system, Autonomous systems, Business applications, Computer vision, Decision making, Gaussian Splatting, Gaussians, Generative AI, Language processing, Learning algorithms, Learning systems, machine learning, Machine-learning, Natural Language Processing, Natural Language Processing (NLP), Natural language processing systems, Natural languages, Splatting},

pubstate = {published},

tppubtype = {book}

}

2023

Wang, Z.; Joshi, A.; Zhang, G.; Ren, W.; Jia, F.; Sun, X.

Elevating Perception: Unified Recognition Framework and Vision-Language Pre-Training Using Three-Dimensional Image Reconstruction Proceedings Article

In: Proc. - Int. Conf. Artif. Intell., Human-Comput. Interact. Robot., AIHCIR, pp. 592–596, Institute of Electrical and Electronics Engineers Inc., 2023, ISBN: 979-835036036-3 (ISBN).

Abstract | Links | BibTeX | Tags: 3D Model LLM, 3D modeling, 3D models, 3D Tech, 3d-modeling, Augmented Reality, Character recognition, Component, Computer aided design, Computer vision, Continuous time systems, Data handling, Generative AI, Image enhancement, Image Reconstruction, Image to Text Generation, Medical Imaging, Pattern recognition, Pre-training, Reconstructive Training, Text generations, Three dimensional computer graphics, Virtual Reality

@inproceedings{wang_elevating_2023,

title = {Elevating Perception: Unified Recognition Framework and Vision-Language Pre-Training Using Three-Dimensional Image Reconstruction},

author = {Z. Wang and A. Joshi and G. Zhang and W. Ren and F. Jia and X. Sun},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85192837757&doi=10.1109%2fAIHCIR61661.2023.00105&partnerID=40&md5=0fe17cc622a9aa90e88b8c3e6a3bed3b},

doi = {10.1109/AIHCIR61661.2023.00105},

isbn = {979-835036036-3 (ISBN)},

year  = {2023},

date = {2023-01-01},

booktitle = {Proc. - Int. Conf. Artif. Intell., Human-Comput. Interact. Robot., AIHCIR},

pages = {592–596},

publisher = {Institute of Electrical and Electronics Engineers Inc.},

abstract = {This research project explores a paradigm shift in perceptual enhancement by integrating a Unified Recognition Framework and Vision-Language Pre-Training in three-dimensional image reconstruction. Through the synergy of advanced algorithms from computer vision & language processing, the project tries to enhance the precision and depth of perception in reconstructed images. This innovative approach holds the potential to revolutionize fields such as medical imaging, virtual reality, and computer-aided design, providing a comprehensive perspective on the intersection of multimodal data processing and perceptual advancement. The anticipated research outcomes are expected to significantly contribute to the evolution of technologies that rely on accurate and contextually rich three-dimensional reconstructions. Moreover, the research aims to reduce the constant need for new datasets by improving pattern recognition through 3D image patterning on backpropagation. This continuous improvement of vectors is envisioned to enhance the efficiency and accuracy of pattern recognition, contributing to the optimization of perceptual systems over time. © 2023 IEEE.},

keywords = {3D Model LLM, 3D modeling, 3D models, 3D Tech, 3d-modeling, Augmented Reality, Character recognition, Component, Computer aided design, Computer vision, Continuous time systems, Data handling, Generative AI, Image enhancement, Image Reconstruction, Image to Text Generation, Medical Imaging, Pattern recognition, Pre-training, Reconstructive Training, Text generations, Three dimensional computer graphics, Virtual Reality},

pubstate = {published},

tppubtype = {inproceedings}

}