AHCI RESEARCH GROUP

Publications

Papers published in international journals,
proceedings of conferences, workshops and books.

OUR RESEARCH

Scientific Publications

How to

Here you can find the complete list of our publications.
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.

Show all

2025

Kai, W. -H.; Xing, K. -X.

Video-driven musical composition using large language model with memory-augmented state space Journal Article

In: Visual Computer, vol. 41, no. 5, pp. 3345–3357, 2025, ISSN: 01782789 (ISSN); 14322315 (ISSN), (Publisher: Springer Science and Business Media Deutschland GmbH).

Abstract | Links | BibTeX | Tags: 'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space

@article{kai_video-driven_2025,

title = {Video-driven musical composition using large language model with memory-augmented state space},

author = {W. -H. Kai and K. -X. Xing},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105001073242&doi=10.1007%2Fs00371-024-03606-w&partnerID=40&md5=71a40ea7584c5a5f210afc1c30aac468},

doi = {10.1007/s00371-024-03606-w},

issn = {01782789 (ISSN); 14322315 (ISSN)},

year  = {2025},

date = {2025-01-01},

journal = {Visual Computer},

volume = {41},

number = {5},

pages = {3345–3357},

abstract = {The current landscape of research leveraging large language models (LLMs) is experiencing a surge. Many works harness the powerful reasoning capabilities of these models to comprehend various modalities, such as text, speech, images, videos, etc. However, the research work on LLms for music inspiration is still in its infancy. To fill the gap in this field and break through the dilemma that LLMs can only understand short videos with limited frames, we propose a large language model with state space for long-term video-to-music generation. To capture long-range dependency and maintaining high performance, while further decrease the computing cost, our overall network includes the Enhanced Video Mamba, which incorporates continuous moving window partitioning and local feature augmentation, and a long-term memory bank that captures and aggregates historical video information to mitigate information loss in long sequences. This framework achieves both subquadratic-time computation and near-linear memory complexity, enabling effective long-term video-to-music generation. We conduct a thorough evaluation of our proposed framework. The experimental results demonstrate that our model achieves or surpasses the performance of the current state-of-the-art models. Our code released on https://github.com/kai211233/S2L2-V2M. © 2025 Elsevier B.V., All rights reserved.},

note = {Publisher: Springer Science and Business Media Deutschland GmbH},

keywords = {'current, Associative storage, Augmented Reality, Augmented state space, Computer simulation languages, Computer system recovery, Distributed computer systems, HTTP, Language Model, Large language model, Long-term video-to-music generation, Mamba, Memory architecture, Memory-augmented, Modeling languages, Music, Musical composition, Natural language processing systems, Object oriented programming, Performance, Problem oriented languages, State space, State-space},

pubstate = {published},

tppubtype = {article}

}

Lu, J.; Gao, J.; Feng, F.; He, Z.; Zheng, M.; Liu, K.; He, J.; Liao, B.; Xu, S.; Sun, K.; Mo, Y.; Peng, Q.; Luo, J.; Li, Q.; Lu, G.; Wang, Z.; Dong, J.; He, K.; Cheng, S.; Cao, J.; Jiao, H.; Zhang, P.; Ma, S.; Zhu, L.; Shi, C.; Zhang, Y.; Chen, Y.; Wang, W.; Zhu, S.; Li, X.; Wang, Q.; Liu, J.; Wang, C.; Lin, W.; Zhai, E.; Wu, J.; Liu, Q.; Fu, B.; Cai, D.

Alibaba Stellar: A New Generation RDMA Network for Cloud AI Proceedings Article

In: pp. 453–466, Association for Computing Machinery, Inc, 2025, ISBN: 9798400715242 (ISBN).

Abstract | Links | BibTeX | Tags: Access network, Cloud computing, Congestion control (communication), Containers, data center networking, Data center networkings, Language Model, Learning systems, Machine learning applications, Memory architecture, Network support, Network support for AI and machine learning application, network support for AI and machine learning applications, Performance, Program processors, Remote direct memory access, Stellars, Transport and congestion control, Virtual Reality, Virtualization

@inproceedings{lu_alibaba_2025,

title = {Alibaba Stellar: A New Generation RDMA Network for Cloud AI},

author = {J. Lu and J. Gao and F. Feng and Z. He and M. Zheng and K. Liu and J. He and B. Liao and S. Xu and K. Sun and Y. Mo and Q. Peng and J. Luo and Q. Li and G. Lu and Z. Wang and J. Dong and K. He and S. Cheng and J. Cao and H. Jiao and P. Zhang and S. Ma and L. Zhu and C. Shi and Y. Zhang and Y. Chen and W. Wang and S. Zhu and X. Li and Q. Wang and J. Liu and C. Wang and W. Lin and E. Zhai and J. Wu and Q. Liu and B. Fu and D. Cai},

url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105016208536&doi=10.1145%2F3718958.3750539&partnerID=40&md5=901fdd27c510072633f0390a0abfb653},

doi = {10.1145/3718958.3750539},

isbn = {9798400715242 (ISBN)},

year  = {2025},

date = {2025-01-01},

pages = {453–466},

publisher = {Association for Computing Machinery, Inc},

abstract = {The rapid adoption of Large Language Models (LLMs) in cloud environments has intensified the demand for high-performance AI training and inference, where Remote Direct Memory Access (RDMA) plays a critical role. However, existing RDMA virtualization solutions, such as Single-Root Input/Output Virtualization (SR-IOV), face significant limitations in scalability, performance, and stability. These issues include lengthy container initialization times, hardware resource constraints, and inefficient traffic steering. To address these challenges, we propose Stellar, a new generation RDMA network for cloud AI. Stellar introduces three key innovations: Para-Virtualized Direct Memory Access (PVDMA) for on-demand memory pinning, extended Memory Translation Table (eMTT) for optimized GPU Direct RDMA (GDR) performance, and RDMA Packet Spray for efficient multi-path utilization. Deployed in our large-scale AI clusters, Stellar spins up virtual devices in seconds, reduces container initialization time by 15 times, and improves LLM training speed by up to 14%. Our evaluations demonstrate that Stellar significantly outperforms existing solutions, offering a scalable, stable, and high-performance RDMA network for cloud AI. © 2025 Elsevier B.V., All rights reserved.},

keywords = {Access network, Cloud computing, Congestion control (communication), Containers, data center networking, Data center networkings, Language Model, Learning systems, Machine learning applications, Memory architecture, Network support, Network support for AI and machine learning application, network support for AI and machine learning applications, Performance, Program processors, Remote direct memory access, Stellars, Transport and congestion control, Virtual Reality, Virtualization},

pubstate = {published},

tppubtype = {inproceedings}

}