AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Guo, H.; Liu, Z.; Tang, C.; Zhang, X.
An Interactive Framework for Personalized Navigation Based on Metacosmic Cultural Tourism and Large Model Fine-Tuning Journal Article
In: IEEE Access, vol. 13, pp. 81450–81461, 2025, ISSN: 21693536 (ISSN).
Abstract | Links | BibTeX | Tags: Cultural informations, Digital Cultural Heritage, Digital cultural heritages, Digital guide, Fine tuning, fine-tuning, Historical monuments, Language Model, Large language model, Leisure, Metacosmic cultural tourism, Multimodal Interaction, Tourism, Virtual tour
@article{guo_interactive_2025,
title = {An Interactive Framework for Personalized Navigation Based on Metacosmic Cultural Tourism and Large Model Fine-Tuning},
author = {H. Guo and Z. Liu and C. Tang and X. Zhang},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105004059236&doi=10.1109%2fACCESS.2025.3565359&partnerID=40&md5=45d328831c5795fa31e7e033299912b5},
doi = {10.1109/ACCESS.2025.3565359},
issn = {21693536 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {IEEE Access},
volume = {13},
pages = {81450–81461},
abstract = {With the wide application of large language models (LLMs) and the rapid growth of metaverse tourism demand, the digital tour and personalized interaction of historical sites have become the key to improving users’ digital travel experience. Creating an environment where users can access rich cultural information and enjoy personalized, immersive experiences is a crucial issue in the field of digital cultural travel. To this end, we propose a tourism information multimodal generation personalized question-answering interactive framework TIGMI (Tourism Information Generation and Multimodal Interaction) based on LLM fine-tuning, which aims to provide a richer and more in-depth experience for virtual tours of historical monuments. Taking Qutan Temple as an example, the framework combines LLM, retrieval augmented generation (RAG), and auto-prompting engineering techniques to retrieve accurate information related to the historical monument from external knowledge bases and seamlessly integrates it into the generated content. This integration mechanism ensures the accuracy and relevance of the generated answers. Through TIGMI’s LLM-driven command interaction mechanism in the 3D digital scene of Qutan Temple, users are able to interact with the building and scene environment in a personalized and real-time manner, successfully integrating historical and cultural information with modern digital technology. This integration significantly enhances the naturalness of interaction and personalizes the user experience, thereby improving user immersion and information acquisition efficiency. Evaluation results show that TIGMI excels in question-answering and multimodal interactions, significantly enhancing the depth and breadth of services provided by the personalized virtual tour. We conclude by addressing the limitations of TIGMI and briefly discuss how future research will focus on further improving the accuracy and user satisfaction of the generated content to adapt to the dynamically changing tourism environment. © 2013 IEEE.},
keywords = {Cultural informations, Digital Cultural Heritage, Digital cultural heritages, Digital guide, Fine tuning, fine-tuning, Historical monuments, Language Model, Large language model, Leisure, Metacosmic cultural tourism, Multimodal Interaction, Tourism, Virtual tour},
pubstate = {published},
tppubtype = {article}
}
Chen, J.; Grubert, J.; Kristensson, P. O.
Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR, pp. 206–216, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833153645-9 (ISBN).
Abstract | Links | BibTeX | Tags: 3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality
@inproceedings{chen_analyzing_2025,
title = {Analyzing Multimodal Interaction Strategies for LLM-Assisted Manipulation of 3D Scenes},
author = {J. Chen and J. Grubert and P. O. Kristensson},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105002716635&doi=10.1109%2fVR59515.2025.00045&partnerID=40&md5=306aa7fbb3dad0aa9d43545f3c7eb9ea},
doi = {10.1109/VR59515.2025.00045},
isbn = {979-833153645-9 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces, VR},
pages = {206–216},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {As more applications of large language models (LLMs) for 3D content in immersive environments emerge, it is crucial to study user behavior to identify interaction patterns and potential barriers to guide the future design of immersive content creation and editing systems which involve LLMs. In an empirical user study with 12 participants, we combine quantitative usage data with post-experience questionnaire feedback to reveal common interaction patterns and key barriers in LLM-assisted 3D scene editing systems. We identify opportunities for improving natural language interfaces in 3D design tools and propose design recommendations. Through an empirical study, we demonstrate that LLM-assisted interactive systems can be used productively in immersive environments. © 2025 IEEE.},
keywords = {3D modeling, 3D reconstruction, 3D scene editing, 3D scenes, Computer simulation languages, Editing systems, Immersive environment, Interaction pattern, Interaction strategy, Language Model, Large language model, large language models, Multimodal Interaction, Scene editing, Three dimensional computer graphics, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {inproceedings}
}
Xing, Y.; Ban, J.; Hubbard, T. D.; Villano, M.; Gómez-Zará, D.
Immersed in my Ideas: Using Virtual Reality and LLMs to Visualize Users’ Ideas and Thoughts Proceedings Article
In: Int Conf Intell User Interfaces Proc IUI, pp. 60–65, Association for Computing Machinery, 2025, ISBN: 979-840071409-2 (ISBN).
Abstract | Links | BibTeX | Tags: 3-D environments, 3D modeling, Computer simulation languages, Creativity, Idea Generation, Immersive, Interactive virtual reality, Language Model, Large language model, Multimodal Interaction, Reflection, Text Visualization, Think aloud, Virtual environments, Virtual Reality, Visualization
@inproceedings{xing_immersed_2025,
title = {Immersed in my Ideas: Using Virtual Reality and LLMs to Visualize Users’ Ideas and Thoughts},
author = {Y. Xing and J. Ban and T. D. Hubbard and M. Villano and D. Gómez-Zará},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001675169&doi=10.1145%2f3708557.3716330&partnerID=40&md5=20fb0623d2a1fff92282116b01fac4f3},
doi = {10.1145/3708557.3716330},
isbn = {979-840071409-2 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Int Conf Intell User Interfaces Proc IUI},
pages = {60–65},
publisher = {Association for Computing Machinery},
abstract = {We introduce the Voice Interactive Virtual Reality Annotation (VIVRA), an application that employs Large Language Models to facilitate brainstorming and idea exploration in an immersive 3D environment. As users think aloud to brainstorm and ideate, the application automatically detects, summarizes, suggests, and connects their ideas in real time. The experience brings participants into a room where their ideas emerge as interactive objects that embody the topics detected from their ideas. We evaluated the effectiveness of VIVRA in an exploratory study with 29 participants, followed by a user study with 10 participants comparing the application with other visualizations. Our results show that VIVRA helped participants reflect and think more about their ideas, serving as a valuable tool for personal exploration. We discuss the potential benefits and applications, highlighting the benefits of combining immersive 3D spaces and LLMs to explore, learn, and reflect on ideas. © 2025 Copyright held by the owner/author(s).},
keywords = {3-D environments, 3D modeling, Computer simulation languages, Creativity, Idea Generation, Immersive, Interactive virtual reality, Language Model, Large language model, Multimodal Interaction, Reflection, Text Visualization, Think aloud, Virtual environments, Virtual Reality, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Dongye, X.; Weng, D.; Jiang, H.; Tian, Z.; Bao, Y.; Chen, P.
Personalized decision-making for agents in face-to-face interaction in virtual reality Journal Article
In: Multimedia Systems, vol. 31, no. 1, 2025, ISSN: 09424962 (ISSN).
Abstract | Links | BibTeX | Tags: Decision making, Decision-making process, Decisions makings, Design frameworks, Face-to-face interaction, Feed-back based, Fine tuning, Human-agent interaction, Human–agent interaction, Integrated circuit design, Intelligent virtual agents, Language Model, Large language model, Multi agent systems, Multimodal Interaction, Virtual environments, Virtual Reality
@article{dongye_personalized_2025,
title = {Personalized decision-making for agents in face-to-face interaction in virtual reality},
author = {X. Dongye and D. Weng and H. Jiang and Z. Tian and Y. Bao and P. Chen},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85212947825&doi=10.1007%2fs00530-024-01591-7&partnerID=40&md5=d969cd926fdfd241399f2f96dbf42907},
doi = {10.1007/s00530-024-01591-7},
issn = {09424962 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Multimedia Systems},
volume = {31},
number = {1},
abstract = {Intelligent agents for face-to-face interaction in virtual reality are expected to make decisions and provide appropriate feedback based on the user’s multimodal interaction inputs. Designing the agent’s decision-making process poses a significant challenge owing to the limited availability of multimodal interaction decision-making datasets and the complexities associated with providing personalized interaction feedback to diverse users. To overcome these challenges, we propose a novel design framework that involves generating and labeling symbolic interaction data, pre-training a small-scale real-time decision-making network, collecting personalized interaction data within interactions, and fine-tuning the network using personalized data. We develop a prototype system to demonstrate our design framework, which utilizes interaction distances, head orientations, and hand postures as inputs in virtual reality. The agent is capable of delivering personalized feedback from different users. We evaluate the proposed design framework by demonstrating the utilization of large language models for data labeling, emphasizing reliability and robustness. Furthermore, we evaluate the incorporation of personalized data fine-tuning for decision-making networks within our design framework, highlighting its importance in improving the user interaction experience. The design principles of this framework can be further explored and applied to various domains involving virtual agents. © The Author(s), under exclusive licence to Springer-Verlag GmbH Germany, part of Springer Nature 2024.},
keywords = {Decision making, Decision-making process, Decisions makings, Design frameworks, Face-to-face interaction, Feed-back based, Fine tuning, Human-agent interaction, Human–agent interaction, Integrated circuit design, Intelligent virtual agents, Language Model, Large language model, Multi agent systems, Multimodal Interaction, Virtual environments, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2022
Wang, A.; Gao, Z.; Lee, L. H.; Braud, T.; Hui, P.
Decentralized, not Dehumanized in the Metaverse: Bringing Utility to NFTs through Multimodal Interaction Proceedings Article
In: ACM Int. Conf. Proc. Ser., pp. 662–667, Association for Computing Machinery, 2022, ISBN: 978-145039390-4 (ISBN).
Abstract | Links | BibTeX | Tags: AI-generated art, Arts computing, Behavioral Research, Computation theory, Continuum mechanics, Decentralised, Human behaviors, Interaction, Multi-modal, multimodal, Multimodal Interaction, NFTs, Non-fungible token, Text-to-image, The metaverse
@inproceedings{wang_decentralized_2022,
title = {Decentralized, not Dehumanized in the Metaverse: Bringing Utility to NFTs through Multimodal Interaction},
author = {A. Wang and Z. Gao and L. H. Lee and T. Braud and P. Hui},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85142799074&doi=10.1145%2f3536221.3558176&partnerID=40&md5=f9dee1e9e60afc71c4533cbdee0b98a7},
doi = {10.1145/3536221.3558176},
isbn = {978-145039390-4 (ISBN)},
year = {2022},
date = {2022-01-01},
booktitle = {ACM Int. Conf. Proc. Ser.},
pages = {662–667},
publisher = {Association for Computing Machinery},
abstract = {User Interaction for NFTs (Non-fungible Tokens) is gaining increasing attention. Although NFTs have been traditionally single-use and monolithic, recent applications aim to connect multimodal interaction with human behavior. This paper reviews the related technological approaches and business practices in NFT art. We highlight that multimodal interaction is a currently under-studied issue in mainstream NFT art, and conjecture that multimodal interaction is a crucial enabler for decentralization in the NFT community. We present a continuum theory and propose a framework combining a bottom-up approach with AI multimodal process. Through this framework, we put forward integrating human behavior data into generative NFT units, as "multimodal interactive NFT."Our work displays the possibilities of NFTs in the art world, beyond the traditional 2D and 3D static content. © 2022 ACM.},
keywords = {AI-generated art, Arts computing, Behavioral Research, Computation theory, Continuum mechanics, Decentralised, Human behaviors, Interaction, Multi-modal, multimodal, Multimodal Interaction, NFTs, Non-fungible token, Text-to-image, The metaverse},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Augello, Agnese; Infantino, Ignazio; Pilato, Giovanni; Vitale, Gianpaolo
In: Future Internet, vol. 13, no. 4, 2021, ISSN: 19995903.
Abstract | Links | BibTeX | Tags: Augmented Reality, Cultural heritage, Multimodal Interaction, User Behavior Analysis
@article{augelloSiteExperienceEnhancement2021,
title = {Site Experience Enhancement and Perspective in Cultural Heritage Fruition— a Survey on New Technologies and Methodologies Based on a ``Four-Pillars'' Approach},
author = { Agnese Augello and Ignazio Infantino and Giovanni Pilato and Gianpaolo Vitale},
doi = {10.3390/fi13040092},
issn = {19995903},
year = {2021},
date = {2021-01-01},
journal = {Future Internet},
volume = {13},
number = {4},
abstract = {This paper deals with innovative fruition modalities of cultural heritage sites. Based on two ongoing experiments, four pillars are considered, that is, User Localization, Multimodal Interaction, User Understanding and Gamification. A survey of the existing literature regarding one or more issues related to the four pillars is proposed. It aims to put in evidence the exploitation of these contributions to cultural heritage. It is discussed how a cultural site can be enriched, extended and transformed into an intelligent multimodal environment in this perspective. This new augmented environment can focus on the visitor, analyze his activity and behavior, and make his experience more satisfying, fulfilling and unique. After an in-depth overview of the existing technologies and methodologies for the fruition of cultural interest sites, the two experiments are described in detail and the authors' vision of the future is proposed. textcopyright 2021 by the authors. Licensee MDPI, Basel, Switzerland.},
keywords = {Augmented Reality, Cultural heritage, Multimodal Interaction, User Behavior Analysis},
pubstate = {published},
tppubtype = {article}
}
Augello, Agnese; Infantino, Ignazio; Pilato, Giovanni; Vitale, Gianpaolo
In: Future Internet, vol. 13, no. 4, 2021, ISSN: 19995903.
Abstract | Links | BibTeX | Tags: Augmented Reality, Cultural heritage, Multimodal Interaction, User Behavior Analysis
@article{augello_site_2021,
title = {Site experience enhancement and perspective in cultural heritage fruition—a survey on new technologies and methodologies based on a “four-pillars” approach},
author = {Agnese Augello and Ignazio Infantino and Giovanni Pilato and Gianpaolo Vitale},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85104788117&doi=10.3390%2ffi13040092&partnerID=40&md5=fad920aac766f790ed2b3a37a98dcd85},
doi = {10.3390/fi13040092},
issn = {19995903},
year = {2021},
date = {2021-01-01},
journal = {Future Internet},
volume = {13},
number = {4},
abstract = {This paper deals with innovative fruition modalities of cultural heritage sites. Based on two ongoing experiments, four pillars are considered, that is, User Localization, Multimodal Interaction, User Understanding and Gamification. A survey of the existing literature regarding one or more issues related to the four pillars is proposed. It aims to put in evidence the exploitation of these contributions to cultural heritage. It is discussed how a cultural site can be enriched, extended and transformed into an intelligent multimodal environment in this perspective. This new augmented environment can focus on the visitor, analyze his activity and behavior, and make his experience more satisfying, fulfilling and unique. After an in-depth overview of the existing technologies and methodologies for the fruition of cultural interest sites, the two experiments are described in detail and the authors’ vision of the future is proposed. © 2021 by the authors. Licensee MDPI, Basel, Switzerland.},
keywords = {Augmented Reality, Cultural heritage, Multimodal Interaction, User Behavior Analysis},
pubstate = {published},
tppubtype = {article}
}
2009
Gentile, Antonio; Santangelo, Antonella; Sorce, Salvatore; Augello, Agnese; Pilato, Giovanni; Genco, Alessandro; Gaglio, Salvatore
IGI Global, 2009, ISBN: 978-1-60566-978-6.
Abstract | Links | BibTeX | Tags: Cultural heritage, Multimodal Interaction
@book{gentileExploitingMultimodalityIntelligent2009,
title = {Exploiting Multimodality for Intelligent Mobile Access to Pervasive Services in Cultural Heritage Sites},
author = { Antonio Gentile and Antonella Santangelo and Salvatore Sorce and Agnese Augello and Giovanni Pilato and Alessandro Genco and Salvatore Gaglio},
doi = {10.4018/978-1-60566-978-6.ch009},
isbn = {978-1-60566-978-6},
year = {2009},
date = {2009-01-01},
publisher = {IGI Global},
abstract = {In this chapter the role of multimodality in intelligent, mobile guides for cultural heritage environments is discussed. Multimodal access to information contents enables the creation of systems with a higher degree of accessibility and usability. A multimodal interaction may involve several human interaction modes, such as sight, touch and voice to navigate contents, or gestures to activate controls. We first start our discussion by presenting a timeline of cultural heritage system evolution, spanning from 2001 to 2008, which highlights design issues such as intelligence and context-awareness in providing information. Then, multimodal access to contents is discussed, along with problems and corresponding solutions; an evaluation of several reviewed systems is also presented. Lastly, a case study multimodal framework termed MAGA is described, which combines intelligent conversational agents with speech recognition/synthesis technology in a framework employing RFID based location and Wi-Fi based data exchange. textcopyright 2010, IGI Global.},
keywords = {Cultural heritage, Multimodal Interaction},
pubstate = {published},
tppubtype = {book}
}
Sorce, Salvatore; Augello, Agnese; Santangelo, Antonella; Genco, Alessandro; Gentile, Antonio; Gaglio, Salvatore; Pilato, Giovanni
An RFID Framework for Multimodal Service Provision Proceedings Article
In: Proceedings of the International Conference on Complex, Intelligent and Software Intensive Systems, CISIS 2009, pp. 730–735, 2009, ISBN: 978-0-7695-3575-3.
Abstract | Links | BibTeX | Tags: Common Sense Reasoning, Context awareness, Conversational Agents, Human computer interaction, Multimodal Interaction, Ontologies, RFID Technology
@inproceedings{sorceRFIDFrameworkMultimodal2009,
title = {An RFID Framework for Multimodal Service Provision},
author = { Salvatore Sorce and Agnese Augello and Antonella Santangelo and Alessandro Genco and Antonio Gentile and Salvatore Gaglio and Giovanni Pilato},
doi = {10.1109/CISIS.2009.168},
isbn = {978-0-7695-3575-3},
year = {2009},
date = {2009-01-01},
booktitle = {Proceedings of the International Conference on Complex, Intelligent and Software Intensive Systems, CISIS 2009},
pages = {730--735},
abstract = {In recent years there has been a growing interest toward the evelopment of pervasive and contextaware services, and RFID technology played a relevant role in the context sensing task. We propose the use of RFID technology together with a conversational agent in order to implement a multimodal information retrieval service we call SensorMesh. The information acquired from RFID tags about the nearest point of interest is processed by the conversational agent that carries a more natural interaction with the user, also exploiting a common sense ontology. The service is accessible using a multimodal browser on Personal Digital Assistants (PDAs); the browser allows the user to interact with the conversational agent by means of spoken language instead of the traditional, keyboard- (or stylus-) based input systems. The resulting system offers a more natural interaction with respect to traditional prerecorded, audio-visual services, and it is particularly suitable for non technology-skilled users. textcopyright 2009 IEEE.},
keywords = {Common Sense Reasoning, Context awareness, Conversational Agents, Human computer interaction, Multimodal Interaction, Ontologies, RFID Technology},
pubstate = {published},
tppubtype = {inproceedings}
}
Sorce, Salvatore; Augello, Agnese; Santangelo, Antonella; Genco, Alessandro; Gentile, Antonio; Gaglio, Salvatore; Pilato, Giovanni
An RFID framework for multimodal service provision Proceedings Article
In: Proceedings of the International Conference on Complex, Intelligent and Software Intensive Systems, CISIS 2009, pp. 730–735, 2009, ISBN: 978-0-7695-3575-3.
Abstract | Links | BibTeX | Tags: Common Sense Reasoning, Context awareness, Conversational Agents, Human computer interaction, Multimodal Interaction, Ontologies, RFID Technology
@inproceedings{sorce_rfid_2009,
title = {An RFID framework for multimodal service provision},
author = {Salvatore Sorce and Agnese Augello and Antonella Santangelo and Alessandro Genco and Antonio Gentile and Salvatore Gaglio and Giovanni Pilato},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-70349743844&doi=10.1109%2fCISIS.2009.168&partnerID=40&md5=5667642a4b78f7ba9959dd18cc832673},
doi = {10.1109/CISIS.2009.168},
isbn = {978-0-7695-3575-3},
year = {2009},
date = {2009-01-01},
booktitle = {Proceedings of the International Conference on Complex, Intelligent and Software Intensive Systems, CISIS 2009},
pages = {730–735},
abstract = {In recent years there has been a growing interest toward the evelopment of pervasive and contextaware services, and RFID technology played a relevant role in the context sensing task. We propose the use of RFID technology together with a conversational agent in order to implement a multimodal information retrieval service we call SensorMesh. The information acquired from RFID tags about the nearest point of interest is processed by the conversational agent that carries a more natural interaction with the user, also exploiting a common sense ontology. The service is accessible using a multimodal browser on Personal Digital Assistants (PDAs); the browser allows the user to interact with the conversational agent by means of spoken language instead of the traditional, keyboard- (or stylus-) based input systems. The resulting system offers a more natural interaction with respect to traditional prerecorded, audio-visual services, and it is particularly suitable for non technology-skilled users. © 2009 IEEE.},
keywords = {Common Sense Reasoning, Context awareness, Conversational Agents, Human computer interaction, Multimodal Interaction, Ontologies, RFID Technology},
pubstate = {published},
tppubtype = {inproceedings}
}
Gentile, Antonio; Santangelo, Antonella; Sorce, Salvatore; Augello, Agnese; Pilato, Giovanni; Genco, Alessandro; Gaglio, Salvatore
IGI Global, 2009, ISBN: 978-1-60566-978-6.
Abstract | Links | BibTeX | Tags: Cultural heritage, Multimodal Interaction
@book{gentile_exploiting_2009,
title = {Exploiting multimodality for intelligent mobile access to pervasive services in cultural heritage sites},
author = {Antonio Gentile and Antonella Santangelo and Salvatore Sorce and Agnese Augello and Giovanni Pilato and Alessandro Genco and Salvatore Gaglio},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84883720089&doi=10.4018%2f978-1-60566-978-6.ch009&partnerID=40&md5=0a985b1b28d8119a91cfdf3aecf55b14},
doi = {10.4018/978-1-60566-978-6.ch009},
isbn = {978-1-60566-978-6},
year = {2009},
date = {2009-01-01},
publisher = {IGI Global},
abstract = {In this chapter the role of multimodality in intelligent, mobile guides for cultural heritage environments is discussed. Multimodal access to information contents enables the creation of systems with a higher degree of accessibility and usability. A multimodal interaction may involve several human interaction modes, such as sight, touch and voice to navigate contents, or gestures to activate controls. We first start our discussion by presenting a timeline of cultural heritage system evolution, spanning from 2001 to 2008, which highlights design issues such as intelligence and context-awareness in providing information. Then, multimodal access to contents is discussed, along with problems and corresponding solutions; an evaluation of several reviewed systems is also presented. Lastly, a case study multimodal framework termed MAGA is described, which combines intelligent conversational agents with speech recognition/synthesis technology in a framework employing RFID based location and Wi-Fi based data exchange. © 2010, IGI Global.},
keywords = {Cultural heritage, Multimodal Interaction},
pubstate = {published},
tppubtype = {book}
}
2007
Augello, Agnese; Santangelo, Antonella; Sorce, Salvatore; Pilato, Giovanni; Gentile, Antonio; Genco, Alessandro; Gaglio, Salvatore
A Multimodal Interaction Guide for Pervasive Services Access Proceedings Article
In: 2007 IEEE International Conference on Pervasive Services, ICPS, pp. 250–256, 2007, ISBN: 1-4244-1326-5 978-1-4244-1326-3.
Abstract | Links | BibTeX | Tags: Common Sense Reasoning, Conversational Agents, Cultural heritage, IVA, Multimodal Interaction, Pervasive Systems
@inproceedings{augelloMultimodalInteractionGuide2007,
title = {A Multimodal Interaction Guide for Pervasive Services Access},
author = { Agnese Augello and Antonella Santangelo and Salvatore Sorce and Giovanni Pilato and Antonio Gentile and Alessandro Genco and Salvatore Gaglio},
doi = {10.1109/PERSER.2007.4283923},
isbn = {1-4244-1326-5 978-1-4244-1326-3},
year = {2007},
date = {2007-01-01},
booktitle = {2007 IEEE International Conference on Pervasive Services, ICPS},
pages = {250--256},
abstract = {A pervasive, multimodal virtual guide for a cultural heritage site tour is illustrated. The guide is based on the integration of different technologies such as conversational agents, commonsense reasoning knowledge bases, multimodal interfaces and self-location detection systems. The aim of the work is to offer a more natural, context sensitive access to information with respect to traditional audio/visual pre-recorded guides. A prototype has been developed and implemented on a Qtek 9090 with Windows Mobile 2003 in order to deal with the "Museo Archeologico Regionale di Agrigento" domain. textcopyright 2007 IEEE.},
keywords = {Common Sense Reasoning, Conversational Agents, Cultural heritage, IVA, Multimodal Interaction, Pervasive Systems},
pubstate = {published},
tppubtype = {inproceedings}
}
Santangelo, Antonella; Augello, Agnese; Sorce, Salvatore; Pilato, Giovanni; Gentile, Antonio; Genco, Alessandro; Gaglio, Salvatore
A Virtual Shopper Customer Assistant in Pervasive Environments Journal Article
In: Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), vol. 4805 LNCS, no. PART 1, pp. 447–456, 2007, ISSN: 03029743.
Abstract | Links | BibTeX | Tags: Chatbots, Human computer interaction, Knowledge Representation, Multimodal Interaction, Pervasive Systems
@article{santangeloVirtualShopperCustomer2007,
title = {A Virtual Shopper Customer Assistant in Pervasive Environments},
author = { Antonella Santangelo and Agnese Augello and Salvatore Sorce and Giovanni Pilato and Antonio Gentile and Alessandro Genco and Salvatore Gaglio},
doi = {10.1007/978-3-540-76888-3_67},
issn = {03029743},
year = {2007},
date = {2007-01-01},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
volume = {4805 LNCS},
number = {PART 1},
pages = {447--456},
abstract = {In this work we propose a smart, human-like PDA-based personal shopper assistant. The system is able to understand the user needs through a spoken natural language interaction and then stores the preferences of the potential customer. Subsequently the personal shopper suggests the most suitable items and shops that match the user profile. The interaction is given by automatic speech recognition and text-to-speech technologies; localization is allowed by the use of Wireless technologies, while the interaction is performed by an Alice-based chat-bot endowed with reasoning capabilities. Besides, being implemented on a PDA, the personal shopper satisfies the user needs of mobility and it is also usable on different mobile devices. textcopyright Springer-Verlag Berlin Heidelberg 2007.},
keywords = {Chatbots, Human computer interaction, Knowledge Representation, Multimodal Interaction, Pervasive Systems},
pubstate = {published},
tppubtype = {article}
}
Sorce, Salvatore; Augello, Agnese; Santangelo, Antonella; Pilato, Giovanni; Gentile, Antonio; Genco, Alessandro; Gaglio, Salvatore
A Multimodal Guide for the Augmented Campus Proceedings Article
In: Proceedings ACM SIGUCCS User Services Conference, pp. 325–331, 2007, ISBN: 978-1-59593-634-9.
Abstract | Links | BibTeX | Tags: Human computer interaction, Information Retrieval, Multimodal Interaction
@inproceedings{sorceMultimodalGuideAugmented2007,
title = {A Multimodal Guide for the Augmented Campus},
author = { Salvatore Sorce and Agnese Augello and Antonella Santangelo and Giovanni Pilato and Antonio Gentile and Alessandro Genco and Salvatore Gaglio},
doi = {10.1145/1294046.1294123},
isbn = {978-1-59593-634-9},
year = {2007},
date = {2007-01-01},
booktitle = {Proceedings ACM SIGUCCS User Services Conference},
pages = {325--331},
abstract = {The use of Personal Digital Assistants (PDAs) with ad-hoc built-in information retrieval and auto-localization functionalities can help people navigating an environment in a more natural manner compared to traditional audio/visual pre-recorded guides. In this work we propose and discuss a user-friendly, multi-modal guide system for pervasive context-aware service provision within augmented environments. The proposed system is adaptable to the user needs of mobility within a given environment; it is usable on different mobile devices and in particular on PDAs, which are used as advanced adaptive HEI (human-environment interaction) interfaces. An information retrieval service is provided that is easily accessible through spoken language interaction in cooperation with an auto-localization service. The interaction is enabled by speech recognition and synthesis technologies, and by a ChatBot system, endowed with common sense reasoning capabilities to properly interpret user speech and provide him with the requested information. This interaction mode turns to be more natural, and users are required to have only basic skills on the use of PDAs. The auto-localization service relies on a RFID-based framework, which resides partly in the mobile side of the entire system (PDAs), and partly in the environment side. In particular, RFID technology allows the system to provide users with context-related information. An implemented case study is showed that illustrates service provision in an augmented environment within university campus settings (termed "Augmented Campus"). Lastly, a discussion about user experiences while using trial services within the Augmented Campus is given. textcopyright Copyright 2007 ACM.},
keywords = {Human computer interaction, Information Retrieval, Multimodal Interaction},
pubstate = {published},
tppubtype = {inproceedings}
}
Sorce, Salvatore; Augello, Agnese; Santangelo, Antonella; Pilato, Giovanni; Gentile, Antonio; Genco, Alessandro; Gaglio, Salvatore
A multimodal guide for the augmented campus Proceedings Article
In: Proceedings ACM SIGUCCS User Services Conference, pp. 325–331, 2007, ISBN: 978-1-59593-634-9.
Abstract | Links | BibTeX | Tags: Human computer interaction, Information Retrieval, Multimodal Interaction
@inproceedings{sorce_multimodal_2007,
title = {A multimodal guide for the augmented campus},
author = {Salvatore Sorce and Agnese Augello and Antonella Santangelo and Giovanni Pilato and Antonio Gentile and Alessandro Genco and Salvatore Gaglio},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-41149136230&doi=10.1145%2f1294046.1294123&partnerID=40&md5=629ffe213d018d4e0f6f8383f8eb7ecb},
doi = {10.1145/1294046.1294123},
isbn = {978-1-59593-634-9},
year = {2007},
date = {2007-01-01},
booktitle = {Proceedings ACM SIGUCCS User Services Conference},
pages = {325–331},
abstract = {The use of Personal Digital Assistants (PDAs) with ad-hoc built-in information retrieval and auto-localization functionalities can help people navigating an environment in a more natural manner compared to traditional audio/visual pre-recorded guides. In this work we propose and discuss a user-friendly, multi-modal guide system for pervasive context-aware service provision within augmented environments. The proposed system is adaptable to the user needs of mobility within a given environment; it is usable on different mobile devices and in particular on PDAs, which are used as advanced adaptive HEI (human-environment interaction) interfaces. An information retrieval service is provided that is easily accessible through spoken language interaction in cooperation with an auto-localization service. The interaction is enabled by speech recognition and synthesis technologies, and by a ChatBot system, endowed with common sense reasoning capabilities to properly interpret user speech and provide him with the requested information. This interaction mode turns to be more natural, and users are required to have only basic skills on the use of PDAs. The auto-localization service relies on a RFID-based framework, which resides partly in the mobile side of the entire system (PDAs), and partly in the environment side. In particular, RFID technology allows the system to provide users with context-related information. An implemented case study is showed that illustrates service provision in an augmented environment within university campus settings (termed "Augmented Campus"). Lastly, a discussion about user experiences while using trial services within the Augmented Campus is given. © Copyright 2007 ACM.},
keywords = {Human computer interaction, Information Retrieval, Multimodal Interaction},
pubstate = {published},
tppubtype = {inproceedings}
}
Santangelo, Antonella; Augello, Agnese; Sorce, Salvatore; Pilato, Giovanni; Gentile, Antonio; Genco, Alessandro; Gaglio, Salvatore
A virtual shopper customer assistant in pervasive environments Journal Article
In: Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), vol. 4805 LNCS, no. PART 1, pp. 447–456, 2007, ISSN: 03029743.
Abstract | Links | BibTeX | Tags: Chatbots, Human computer interaction, Knowledge Representation, Multimodal Interaction, Pervasive Systems
@article{santangelo_virtual_2007,
title = {A virtual shopper customer assistant in pervasive environments},
author = {Antonella Santangelo and Agnese Augello and Salvatore Sorce and Giovanni Pilato and Antonio Gentile and Alessandro Genco and Salvatore Gaglio},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-38349004552&doi=10.1007%2f978-3-540-76888-3_67&partnerID=40&md5=4db3497dab2382d2cff3cf691d2868ec},
doi = {10.1007/978-3-540-76888-3_67},
issn = {03029743},
year = {2007},
date = {2007-01-01},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
volume = {4805 LNCS},
number = {PART 1},
pages = {447–456},
abstract = {In this work we propose a smart, human-like PDA-based personal shopper assistant. The system is able to understand the user needs through a spoken natural language interaction and then stores the preferences of the potential customer. Subsequently the personal shopper suggests the most suitable items and shops that match the user profile. The interaction is given by automatic speech recognition and text-to-speech technologies; localization is allowed by the use of Wireless technologies, while the interaction is performed by an Alice-based chat-bot endowed with reasoning capabilities. Besides, being implemented on a PDA, the personal shopper satisfies the user needs of mobility and it is also usable on different mobile devices. © Springer-Verlag Berlin Heidelberg 2007.},
keywords = {Chatbots, Human computer interaction, Knowledge Representation, Multimodal Interaction, Pervasive Systems},
pubstate = {published},
tppubtype = {article}
}
Augello, Agnese; Santangelo, Antonella; Sorce, Salvatore; Pilato, Giovanni; Gentile, Antonio; Genco, Alessandro; Gaglio, Salvatore
A multimodal interaction guide for pervasive services access Proceedings Article
In: 2007 IEEE International Conference on Pervasive Services, ICPS, pp. 250–256, 2007, ISBN: 1-4244-1326-5 978-1-4244-1326-3.
Abstract | Links | BibTeX | Tags: Common Sense Reasoning, Conversational Agents, Cultural heritage, IVA, Multimodal Interaction, Pervasive Systems
@inproceedings{augello_multimodal_2007,
title = {A multimodal interaction guide for pervasive services access},
author = {Agnese Augello and Antonella Santangelo and Salvatore Sorce and Giovanni Pilato and Antonio Gentile and Alessandro Genco and Salvatore Gaglio},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-52249087540&doi=10.1109%2fPERSER.2007.4283923&partnerID=40&md5=88f7a02ce52c6c47fd2c827f4e77205d},
doi = {10.1109/PERSER.2007.4283923},
isbn = {1-4244-1326-5 978-1-4244-1326-3},
year = {2007},
date = {2007-01-01},
booktitle = {2007 IEEE International Conference on Pervasive Services, ICPS},
pages = {250–256},
abstract = {A pervasive, multimodal virtual guide for a cultural heritage site tour is illustrated. The guide is based on the integration of different technologies such as conversational agents, commonsense reasoning knowledge bases, multimodal interfaces and self-location detection systems. The aim of the work is to offer a more natural, context sensitive access to information with respect to traditional audio/visual pre-recorded guides. A prototype has been developed and implemented on a Qtek 9090 with Windows Mobile 2003 in order to deal with the "Museo Archeologico Regionale di Agrigento" domain. © 2007 IEEE.},
keywords = {Common Sense Reasoning, Conversational Agents, Cultural heritage, IVA, Multimodal Interaction, Pervasive Systems},
pubstate = {published},
tppubtype = {inproceedings}
}
2006
Augello, Agnese; Santangelo, Antonella; Gentile, Antonio; Gaglio, Salvatore; Pilato, Giovanni
A Multimodal Chat-Bot Based Information Technology System Proceedings Article
In: 4th International Industrial Simulation Conference 2006, ISC 2006, pp. 86–90, EUROSIS, 2006.
Abstract | BibTeX | Tags: Cultural heritage, Human computer interaction, IVA, Multimodal Interaction
@inproceedings{augelloMultimodalChatbotBased2006,
title = {A Multimodal Chat-Bot Based Information Technology System},
author = { Agnese Augello and Antonella Santangelo and Antonio Gentile and Salvatore Gaglio and Giovanni Pilato},
year = {2006},
date = {2006-01-01},
booktitle = {4th International Industrial Simulation Conference 2006, ISC 2006},
pages = {86--90},
publisher = {EUROSIS},
abstract = {The proposed system integrates chat-bot and speech recognition technologies in order to build a versatile, user-friendly, virtual assistant guide with information retrieval capabilities. The system is adaptable to the user needs of mobility being also usable on different devices (i.e. PDAs, Smartphone). The system has been implemented on a Qtek 9090 with Windows Mobile 2003 and a simulation for the cultural heritage domain is here presented.},
keywords = {Cultural heritage, Human computer interaction, IVA, Multimodal Interaction},
pubstate = {published},
tppubtype = {inproceedings}
}
Augello, Agnese; Santangelo, Antonella; Gentile, Antonio; Gaglio, Salvatore; Pilato, Giovanni
A multimodal chat-bot based information technology system Proceedings Article
In: 4th International Industrial Simulation Conference 2006, ISC 2006, pp. 86–90, EUROSIS, 2006.
Abstract | Links | BibTeX | Tags: Cultural heritage, Human computer interaction, IVA, Multimodal Interaction
@inproceedings{augello_multimodal_2006,
title = {A multimodal chat-bot based information technology system},
author = {Agnese Augello and Antonella Santangelo and Antonio Gentile and Salvatore Gaglio and Giovanni Pilato},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84898452841&partnerID=40&md5=a28454fccefbd112bd0ab92b6b23d502},
year = {2006},
date = {2006-01-01},
booktitle = {4th International Industrial Simulation Conference 2006, ISC 2006},
pages = {86–90},
publisher = {EUROSIS},
abstract = {The proposed system integrates chat-bot and speech recognition technologies in order to build a versatile, user-friendly, virtual assistant guide with information retrieval capabilities. The system is adaptable to the user needs of mobility being also usable on different devices (i.e. PDAs, Smartphone). The system has been implemented on a Qtek 9090 with Windows Mobile 2003 and a simulation for the cultural heritage domain is here presented.},
keywords = {Cultural heritage, Human computer interaction, IVA, Multimodal Interaction},
pubstate = {published},
tppubtype = {inproceedings}
}