AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2025
Kai, W. -H.; Xing, K. -X.
Video-driven musical composition using large language model with memory-augmented state space Journal Article
In: Visual Computer, vol. 41, no. 5, pp. 3345–3357, 2025, ISSN: 01782789 (ISSN).
Abstract | Links | BibTeX | Tags: 'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space
@article{kai_video-driven_2025,
title = {Video-driven musical composition using large language model with memory-augmented state space},
author = {W. -H. Kai and K. -X. Xing},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001073242&doi=10.1007%2fs00371-024-03606-w&partnerID=40&md5=7ea24f13614a9a24caf418c37a10bd8c},
doi = {10.1007/s00371-024-03606-w},
issn = {01782789 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Visual Computer},
volume = {41},
number = {5},
pages = {3345–3357},
abstract = {The current landscape of research leveraging large language models (LLMs) is experiencing a surge. Many works harness the powerful reasoning capabilities of these models to comprehend various modalities, such as text, speech, images, videos, etc. However, the research work on LLms for music inspiration is still in its infancy. To fill the gap in this field and break through the dilemma that LLMs can only understand short videos with limited frames, we propose a large language model with state space for long-term video-to-music generation. To capture long-range dependency and maintaining high performance, while further decrease the computing cost, our overall network includes the Enhanced Video Mamba, which incorporates continuous moving window partitioning and local feature augmentation, and a long-term memory bank that captures and aggregates historical video information to mitigate information loss in long sequences. This framework achieves both subquadratic-time computation and near-linear memory complexity, enabling effective long-term video-to-music generation. We conduct a thorough evaluation of our proposed framework. The experimental results demonstrate that our model achieves or surpasses the performance of the current state-of-the-art models. Our code released on https://github.com/kai211233/S2L2-V2M. © The Author(s), under exclusive licence to Springer-Verlag GmbH Germany, part of Springer Nature 2024.},
keywords = {'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space},
pubstate = {published},
tppubtype = {article}
}
Hu, Y. -H.; Matsumoto, A.; Ito, K.; Narumi, T.; Kuzuoka, H.; Amemiya, T.
Avatar Motion Generation Pipeline for the Metaverse via Synthesis of Generative Models of Text and Video Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 767–771, Institute of Electrical and Electronics Engineers Inc., 2025, ISBN: 979-833151484-6 (ISBN).
Abstract | Links | BibTeX | Tags: Ambient intelligence, Design and evaluation methods, Distributed computer systems, Human-centered computing, Language Model, Metaverses, Processing capability, Text-processing, Treemap, Treemaps, Visualization, Visualization design and evaluation method, Visualization design and evaluation methods, Visualization designs, Visualization technique, Visualization techniques
@inproceedings{hu_avatar_2025,
title = {Avatar Motion Generation Pipeline for the Metaverse via Synthesis of Generative Models of Text and Video},
author = {Y. -H. Hu and A. Matsumoto and K. Ito and T. Narumi and H. Kuzuoka and T. Amemiya},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105005158851&doi=10.1109%2fVRW66409.2025.00155&partnerID=40&md5=2bc9a6390e1cf710206835722ca8dbbf},
doi = {10.1109/VRW66409.2025.00155},
isbn = {979-833151484-6 (ISBN)},
year = {2025},
date = {2025-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {767–771},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Efforts to integrate AI avatars into the metaverse to enhance interactivity have progressed in both research and commercial domains. AI avatars in the metaverse are expected to exhibit not only verbal responses but also avatar motions, such as non-verbal gestures, to enable seamless communication with users. Large Language Models (LLMs) are known for their advanced text processing capabilities, such as user input, avatar actions, and even entire virtual environments as text, making them a promising approach for planning avatar motions. However, generating the avatar motions solely from the textual information often requires extensive training data whereas the configuration is very challenging, with results that often lack diversity and fail to match user expectations. On the other hand, AI technologies for generating videos have progressed to the point where they can depict diverse and natural human movements based on prompts. Therefore, this paper introduces a novel pipeline, TVMP, that synthesizes LLMs with advanced text processing capabilities and video generation models with the ability to generate videos containing a variety of motions. The pipeline first generates videos from text input, then estimates the motions from the generated videos, and lastly exports the estimated motion data into the avatars in the metaverse. Feedback on the TVMP prototype suggests further refinement is needed, such as speed control, display of the progress, and direct edition for contextual relevance and usability enhancements. The proposed method enables AI avatars to perform highly adaptive and diverse movements to fulfill user expectations and contributes to developing a more immersive metaverse. © 2025 IEEE.},
keywords = {Ambient intelligence, Design and evaluation methods, Distributed computer systems, Human-centered computing, Language Model, Metaverses, Processing capability, Text-processing, Treemap, Treemaps, Visualization, Visualization design and evaluation method, Visualization design and evaluation methods, Visualization designs, Visualization technique, Visualization techniques},
pubstate = {published},
tppubtype = {inproceedings}
}
2018
Napoli, Claudia Di; Valentino, Marco; Sabatucci, Luca; Cossentino, Massimo
Adaptive Workflows of Home-Care Services Proceedings Article
In: 2018 IEEE 27th International Conference on Enabling Technologies: Infrastructure for Collaborative Enterprises (WETICE), pp. 3–8, IEEE, 2018.
Abstract | Links | BibTeX | Tags: AAL for the Elderly, Assistive robotics, Distributed computer systems, Dynamic workflow, Health care application
@inproceedings{dinapoliAdaptiveWorkflowsHomecare2018,
title = {Adaptive Workflows of Home-Care Services},
author = { Claudia Di Napoli and Marco Valentino and Luca Sabatucci and Massimo Cossentino},
doi = {10.1109/WETICE.2018.00008},
year = {2018},
date = {2018-01-01},
booktitle = {2018 IEEE 27th International Conference on Enabling Technologies: Infrastructure for Collaborative Enterprises (WETICE)},
pages = {3--8},
publisher = {IEEE},
abstract = {With the increased number of elderly people in developed countries, assistive robotics is gaining more attention allowing to support home care assistance. Here, assistive robotics is adopted to monitor the activities of daily living (ADL) of patients with mild neurological disorders to limit the human monitoring, usually representing a burden for family members. In order to improve the effectiveness and user acceptance level of the robotic system, a middleware layer, able to automatically generate monitoring plans for home care patients, is proposed. The plans are generated as workflow of services, each one representing a monitoring task that can be executed by different devices, including humans, in different ways. We show that a service-oriented approach allows generating adaptive monitoring plans for patients with different levels of neurological disorders, taking into account the dynamic nature of their personality profiles, as well as of the environment they live in.},
keywords = {AAL for the Elderly, Assistive robotics, Distributed computer systems, Dynamic workflow, Health care application},
pubstate = {published},
tppubtype = {inproceedings}
}
Napoli, Claudia Di; Valentino, Marco; Sabatucci, Luca; Cossentino, Massimo
Adaptive workflows of home-care services Proceedings Article
In: 2018 IEEE 27th International Conference on Enabling Technologies: Infrastructure for Collaborative Enterprises (WETICE), pp. 3–8, IEEE, 2018.
Abstract | Links | BibTeX | Tags: AAL for the Elderly, Assistive robotics, Distributed computer systems, Dynamic workflow, Health care application
@inproceedings{di_napoli_adaptive_2018,
title = {Adaptive workflows of home-care services},
author = {Claudia Di Napoli and Marco Valentino and Luca Sabatucci and Massimo Cossentino},
doi = {10.1109/WETICE.2018.00008},
year = {2018},
date = {2018-01-01},
booktitle = {2018 IEEE 27th International Conference on Enabling Technologies: Infrastructure for Collaborative Enterprises (WETICE)},
pages = {3–8},
publisher = {IEEE},
abstract = {With the increased number of elderly people in developed countries, assistive robotics is gaining more attention allowing to support home care assistance. Here, assistive robotics is adopted to monitor the activities of daily living (ADL) of patients with mild neurological disorders to limit the human monitoring, usually representing a burden for family members. In order to improve the effectiveness and user acceptance level of the robotic system, a middleware layer, able to automatically generate monitoring plans for home care patients, is proposed. The plans are generated as workflow of services, each one representing a monitoring task that can be executed by different devices, including humans, in different ways. We show that a service-oriented approach allows generating adaptive monitoring plans for patients with different levels of neurological disorders, taking into account the dynamic nature of their personality profiles, as well as of the environment they live in.},
keywords = {AAL for the Elderly, Assistive robotics, Distributed computer systems, Dynamic workflow, Health care application},
pubstate = {published},
tppubtype = {inproceedings}
}
2017
Napoli, Claudia Di; Sabatucci, Luca; Cossentino, Massimo; Rossi, Silvia
Generating and Instantiating Abstract Workflows with QoS User Requirements. Proceedings Article
In: ICAART (1), pp. 276–283, 2017.
Abstract | Links | BibTeX | Tags: Automatic service composition, Distributed computer systems, Dynamic workflow, Multiagent negotiation, Quality of Service
@inproceedings{dinapoliGeneratingInstantiatingAbstract2017,
title = {Generating and Instantiating Abstract Workflows with QoS User Requirements.},
author = { Claudia Di Napoli and Luca Sabatucci and Massimo Cossentino and Silvia Rossi},
doi = {10.5220/0006203902760283},
year = {2017},
date = {2017-01-01},
booktitle = {ICAART (1)},
pages = {276--283},
abstract = {The growing availability of services accessible through the network makes it possible to build complex applications resulting from their composition that are usually characterized also by non-functional properties, known as Quality of Service (QoS). To exploit the full potential of service technology, automatic QoS-based composition of services is crucial. In this work a framework for automatic service composition is presented that relies on planning and service negotiation techniques for addressing both functional and non-functional requirements. The proposed approach allows for dynamic service composition and QoS attributes, and it can be applied when services are provided in the contest of a competitive market of service providers without knowledge disclosure.},
keywords = {Automatic service composition, Distributed computer systems, Dynamic workflow, Multiagent negotiation, Quality of Service},
pubstate = {published},
tppubtype = {inproceedings}
}
Napoli, Claudia Di; Sabatucci, Luca; Cossentino, Massimo; Rossi, Silvia
Generating and Instantiating Abstract Workflows with QoS User Requirements. Proceedings Article
In: ICAART (1), pp. 276–283, 2017.
Abstract | Links | BibTeX | Tags: Automatic service composition, Distributed computer systems, Dynamic workflow, Multiagent negotiation, Quality of Service
@inproceedings{di_napoli_generating_2017,
title = {Generating and Instantiating Abstract Workflows with QoS User Requirements.},
author = {Claudia Di Napoli and Luca Sabatucci and Massimo Cossentino and Silvia Rossi},
doi = {10.5220/0006203902760283},
year = {2017},
date = {2017-01-01},
booktitle = {ICAART (1)},
pages = {276–283},
abstract = {The growing availability of services accessible through the network makes it possible to build complex applications resulting from their composition that are usually characterized also by non-functional properties, known as Quality of Service (QoS). To exploit the full potential of service technology, automatic QoS-based composition of services is crucial. In this work a framework for automatic service composition is presented that relies on planning and service negotiation techniques for addressing both functional and non-functional requirements. The proposed approach allows for dynamic service composition and QoS attributes, and it can be applied when services are provided in the contest of a competitive market of service providers without knowledge disclosure.},
keywords = {Automatic service composition, Distributed computer systems, Dynamic workflow, Multiagent negotiation, Quality of Service},
pubstate = {published},
tppubtype = {inproceedings}
}