diff --git a/Slides-handout_version.ipynb b/Slides-handout_version.ipynb index 4b2fad0..6b7d0b2 100644 --- a/Slides-handout_version.ipynb +++ b/Slides-handout_version.ipynb @@ -2429,7 +2429,12 @@ "id": "19c5aa13-1eb3-479c-b0ae-672fa2c4676d", "metadata": {}, "source": [ - "\\small General multi-task diffusion models:\n", + "\\small Regional generation:\n", + "\n", + "- [\\color{SkyBlue}{Mastering Text-to-Image Diffusion: Recaptioning, Planning, and Generating with Multimodal LLMs}](https://arxiv.org/abs/2401.11708) by Yang et al. [2024]\n", + "- [\\color{SkyBlue}{Training-free Regional Prompting for Diffusion Transformers}](https://arxiv.org/abs/2411.02395) by Chen et al. [2024]]\n", + "\n", + "General multi-task diffusion models:\n", "\n", "- [\\color{SkyBlue}{OmniGen: Unified Image Generation}](https://arxiv.org/abs/2409.11340) by Xiao et al. [2024]" ] diff --git a/Slides.ipynb b/Slides.ipynb index c08fdb8..0f3259e 100644 --- a/Slides.ipynb +++ b/Slides.ipynb @@ -2512,7 +2512,12 @@ "id": "bd52f057-cdfa-4d4d-b6ca-90a3c1ab9665", "metadata": {}, "source": [ - "\\small General multi-task diffusion models:\n", + "\\small Regional generation:\n", + "\n", + "- [\\color{SkyBlue}{Mastering Text-to-Image Diffusion: Recaptioning, Planning, and Generating with Multimodal LLMs}](https://arxiv.org/abs/2401.11708) by Yang et al. [2024]\n", + "- [\\color{SkyBlue}{Training-free Regional Prompting for Diffusion Transformers}](https://arxiv.org/abs/2411.02395) by Chen et al. [2024]\n", + "\n", + "General multi-task diffusion models:\n", "\n", "- [\\color{SkyBlue}{OmniGen: Unified Image Generation}](https://arxiv.org/abs/2409.11340) by Xiao et al. [2024]" ] diff --git a/docs/Slides-handout_version.pdf b/docs/Slides-handout_version.pdf index bff16f0..c1741c8 100644 Binary files a/docs/Slides-handout_version.pdf and b/docs/Slides-handout_version.pdf differ diff --git a/docs/Slides.pdf b/docs/Slides.pdf index 39ace9b..0cf32ca 100644 Binary files a/docs/Slides.pdf and b/docs/Slides.pdf differ diff --git a/references_slides.bib b/references_slides.bib index 0a7210e..553f4b7 100644 --- a/references_slides.bib +++ b/references_slides.bib @@ -857,6 +857,25 @@ @misc{wang2024rectifieddiffusionstraightnessneed url={https://arxiv.org/abs/2410.07303}, } +@inproceedings{ +yang2024mastering, +title={Mastering Text-to-Image Diffusion: Recaptioning, Planning, and Generating with Multimodal {LLM}s}, +author={Ling Yang and Zhaochen Yu and Chenlin Meng and Minkai Xu and Stefano Ermon and Bin CUI}, +booktitle={Forty-first International Conference on Machine Learning}, +year={2024}, +url={https://openreview.net/forum?id=DgLFkAPwuZ} +} + +@misc{chen2024trainingfreeregionalpromptingdiffusion, + title={Training-free Regional Prompting for Diffusion Transformers}, + author={Anthony Chen and Jianjin Xu and Wenzhao Zheng and Gaole Dai and Yida Wang and Renrui Zhang and Haofan Wang and Shanghang Zhang}, + year={2024}, + eprint={2411.02395}, + archivePrefix={arXiv}, + primaryClass={cs.CV}, + url={https://arxiv.org/abs/2411.02395}, +} + @misc{xiao2024omnigenunifiedimagegeneration, title={OmniGen: Unified Image Generation}, author={Shitao Xiao and Yueze Wang and Junjie Zhou and Huaying Yuan and Xingrun Xing and Ruiran Yan and Shuting Wang and Tiejun Huang and Zheng Liu},