AHCI RESEARCH GROUP
Publications
Papers published in international journals,
proceedings of conferences, workshops and books.
OUR RESEARCH
Scientific Publications
How to
You can use the tag cloud to select only the papers dealing with specific research topics.
You can expand the Abstract, Links and BibTex record for each paper.
2024
Weid, M.; Khezrian, N.; Mana, A. P.; Farzinnejad, F.; Grubert, J.
GenDeck: Towards a HoloDeck with Text-to-3D Model Generation Proceedings Article
In: Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW, pp. 1188–1189, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037449-0 (ISBN).
Abstract | Links | BibTeX | Tags: 3D content, 3D modeling, 3D models, 3d-modeling, Computational costs, Extende Reality, Human computer interaction, Immersive virtual reality, Knowledge Work, Model generation, Proof of concept, Three dimensional computer graphics, Virtual Reality, Visual fidelity
@inproceedings{weid_gendeck_2024,
title = {GenDeck: Towards a HoloDeck with Text-to-3D Model Generation},
author = {M. Weid and N. Khezrian and A. P. Mana and F. Farzinnejad and J. Grubert},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85195600251&doi=10.1109%2fVRW62533.2024.00388&partnerID=40&md5=6dab0cc05259fa2dbe0a2b3806e569af},
doi = {10.1109/VRW62533.2024.00388},
isbn = {979-835037449-0 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Conf. Virtual Real. 3D User Interfaces Abstr. Workshops, VRW},
pages = {1188–1189},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Generative Artificial Intelligence has the potential to substantially transform the way 3D content for Extended Reality applications is produced. Specifically, the development of text-to-3D and image-to-3D generators with increasing visual fidelity and decreasing computational costs is thriving quickly. Within this work, we present GenDeck, a proof-of-concept application to experience text-to-3D model generation inside an immersive Virtual Reality environment. © 2024 IEEE.},
keywords = {3D content, 3D modeling, 3D models, 3d-modeling, Computational costs, Extende Reality, Human computer interaction, Immersive virtual reality, Knowledge Work, Model generation, Proof of concept, Three dimensional computer graphics, Virtual Reality, Visual fidelity},
pubstate = {published},
tppubtype = {inproceedings}
}
Weng, S. C. -C.; Chiou, Y. -M.; Do, E. Y. -L.
Dream Mesh: A Speech-to-3D Model Generative Pipeline in Mixed Reality Proceedings Article
In: Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR, pp. 345–349, Institute of Electrical and Electronics Engineers Inc., 2024, ISBN: 979-835037202-1 (ISBN).
Abstract | Links | BibTeX | Tags: 3D content, 3D modeling, 3D models, 3d-modeling, Augmented Reality, Digital assets, Generative AI, generative artificial intelligence, Intelligence models, Mesh generation, Mixed reality, Modeling, Speech-to-3D, Text modeling, Three dimensional computer graphics, User interfaces
@inproceedings{weng_dream_2024,
title = {Dream Mesh: A Speech-to-3D Model Generative Pipeline in Mixed Reality},
author = {S. C. -C. Weng and Y. -M. Chiou and E. Y. -L. Do},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85187218106&doi=10.1109%2fAIxVR59861.2024.00059&partnerID=40&md5=5bfe206e841f23de6458f88a0824bd4d},
doi = {10.1109/AIxVR59861.2024.00059},
isbn = {979-835037202-1 (ISBN)},
year = {2024},
date = {2024-01-01},
booktitle = {Proc. - IEEE Int. Conf. Artif. Intell. Ext. Virtual Real., AIxVR},
pages = {345–349},
publisher = {Institute of Electrical and Electronics Engineers Inc.},
abstract = {Generative Artificial Intelligence (AI) models have risen to prominence due to their unparalleled ability to craft and generate digital assets, encompassing text, images, audio, video, and 3D models. Leveraging the capabilities of diffusion models, such as Stable Diffusion and Instruct pix2pix, users can guide AI with specific prompts, streamlining the creative journey for graphic designers. However, the primary application of these models has been to graphic content within desktop interfaces, prompting professionals in interior and architectural design to seek more tailored solutions for their daily operations. To bridge this gap, Augmented Reality (AR) and Mixed Reality (MR) technologies offer a promising solution, transforming traditional 2D artworks into engaging 3D interactive realms. In this paper, we present "Dream Mesh,"a MR application MR tool that combines a Speech-to-3D generative workflow besed on DreamFusion model without relying on pre-existing 3D content libraries. This innovative system empowers users to express 3D content needs through natural language input, promising transformative potential in real-time 3D content creation and an enhanced MR user experience. © 2024 IEEE.},
keywords = {3D content, 3D modeling, 3D models, 3d-modeling, Augmented Reality, Digital assets, Generative AI, generative artificial intelligence, Intelligence models, Mesh generation, Mixed reality, Modeling, Speech-to-3D, Text modeling, Three dimensional computer graphics, User interfaces},
pubstate = {published},
tppubtype = {inproceedings}
}
He, K.; Yao, K.; Zhang, Q.; Yu, J.; Liu, L.; Xu, L.
DressCode: Autoregressively Sewing and Generating Garments from Text Guidance Journal Article
In: ACM Transactions on Graphics, vol. 43, no. 4, 2024, ISSN: 07300301 (ISSN).
Abstract | Links | BibTeX | Tags: 3D content, 3d garments, autoregressive model, Autoregressive modelling, Content creation, Digital humans, Embeddings, Fashion design, Garment generation, Interactive computer graphics, Sewing pattern, sewing patterns, Textures, Virtual Reality, Virtual Try-On
@article{he_dresscode_2024,
title = {DressCode: Autoregressively Sewing and Generating Garments from Text Guidance},
author = {K. He and K. Yao and Q. Zhang and J. Yu and L. Liu and L. Xu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199257820&doi=10.1145%2f3658147&partnerID=40&md5=8996e62e4d9dabb5a7034f8bf4df5a43},
doi = {10.1145/3658147},
issn = {07300301 (ISSN)},
year = {2024},
date = {2024-01-01},
journal = {ACM Transactions on Graphics},
volume = {43},
number = {4},
abstract = {Apparel's significant role in human appearance underscores the importance of garment digitalization for digital human creation. Recent advances in 3D content creation are pivotal for digital human creation. Nonetheless, garment generation from text guidance is still nascent. We introduce a text-driven 3D garment generation framework, DressCode, which aims to democratize design for novices and offer immense potential in fashion design, virtual try-on, and digital human creation. We first introduce SewingGPT, a GPT-based architecture integrating cross-attention with text-conditioned embedding to generate sewing patterns with text guidance. We then tailor a pre-trained Stable Diffusion to generate tile-based Physically-based Rendering (PBR) textures for the garments. By leveraging a large language model, our framework generates CG-friendly garments through natural language interaction. It also facilitates pattern completion and texture editing, streamlining the design process through user-friendly interaction. This framework fosters innovation by allowing creators to freely experiment with designs and incorporate unique elements into their work. With comprehensive evaluations and comparisons with other state-of-the-art methods, our method showcases superior quality and alignment with input prompts. User studies further validate our high-quality rendering results, highlighting its practical utility and potential in production settings. Copyright © 2024 held by the owner/author(s).},
keywords = {3D content, 3d garments, autoregressive model, Autoregressive modelling, Content creation, Digital humans, Embeddings, Fashion design, Garment generation, Interactive computer graphics, Sewing pattern, sewing patterns, Textures, Virtual Reality, Virtual Try-On},
pubstate = {published},
tppubtype = {article}
}
2023
Feng, Y.; Zhu, H.; Peng, D.; Peng, X.; Hu, P.
RONO: Robust Discriminative Learning with Noisy Labels for 2D-3D Cross-Modal Retrieval Proceedings Article
In: Proc IEEE Comput Soc Conf Comput Vision Pattern Recognit, pp. 11610–11619, IEEE Computer Society, 2023, ISBN: 10636919 (ISSN).
Abstract | Links | BibTeX | Tags: 3D content, 3D data, 3D modeling, Adversarial machine learning, Contrastive Learning, Cross-modal, Discriminative learning, Federated learning, Heterogeneous structures, Learning mechanism, Learning performance, Metaverses, Multi-modal learning, Noisy labels, Spatio-temporal data
@inproceedings{feng_rono_2023,
title = {RONO: Robust Discriminative Learning with Noisy Labels for 2D-3D Cross-Modal Retrieval},
author = {Y. Feng and H. Zhu and D. Peng and X. Peng and P. Hu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85170845124&doi=10.1109%2fCVPR52729.2023.01117&partnerID=40&md5=2eee285207ff3ea8e774480e29d96ec1},
doi = {10.1109/CVPR52729.2023.01117},
isbn = {10636919 (ISSN)},
year = {2023},
date = {2023-01-01},
booktitle = {Proc IEEE Comput Soc Conf Comput Vision Pattern Recognit},
volume = {2023-June},
pages = {11610–11619},
publisher = {IEEE Computer Society},
abstract = {Recently, with the advent of Metaverse and AI Generated Content, cross-modal retrieval becomes popular with a burst of 2D and 3D data. However, this problem is challenging given the heterogeneous structure and semantic discrepancies. Moreover, imperfect annotations are ubiquitous given the ambiguous 2D and 3D content, thus inevitably producing noisy labels to degrade the learning performance. To tackle the problem, this paper proposes a robust 2D-3D retrieval framework (RONO) to robustly learn from noisy multimodal data. Specifically, one novel Robust Discriminative Center Learning mechanism (RDCL) is proposed in RONO to adaptively distinguish clean and noisy samples for respectively providing them with positive and negative optimization directions, thus mitigating the negative impact of noisy labels. Besides, we present a Shared Space Consistency Learning mechanism (SSCL) to capture the intrinsic information inside the noisy data by minimizing the cross-modal and semantic discrepancy between common space and label space simultaneously. Comprehensive mathematical analyses are given to theoretically prove the noise tolerance of the proposed method. Furthermore, we conduct extensive experiments on four 3D-model multimodal datasets to verify the effectiveness of our method by comparing it with 15 state-of-the-art methods. © 2023 IEEE.},
keywords = {3D content, 3D data, 3D modeling, Adversarial machine learning, Contrastive Learning, Cross-modal, Discriminative learning, Federated learning, Heterogeneous structures, Learning mechanism, Learning performance, Metaverses, Multi-modal learning, Noisy labels, Spatio-temporal data},
pubstate = {published},
tppubtype = {inproceedings}
}