-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
kubinka0505
committed
Jan 2, 2024
0 parents
commit 136e196
Showing
24 changed files
with
8,053 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
{ | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"name": "Music Generation - AudioCraft", | ||
"collapsed_sections": [ | ||
"pcKlg7MAdaBl" | ||
], | ||
"provenance": [] | ||
}, | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
}, | ||
"accelerator": "GPU", | ||
"gpuClass": "standard" | ||
}, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"<font color=DeepSkyBlue size=7> 🎶 **Music Generation** ➕ **- *AudioCraft***</font>" | ||
], | ||
"metadata": { | ||
"id": "SV_zN6b_8UK9" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"cellView": "form", | ||
"id": "WO1EH3sEbcdM" | ||
}, | ||
"source": [ | ||
"#@title # <font color=\"lime\">**1.**</font> Install requirements. 📥\n", | ||
"#@markdown - Modules\n", | ||
"#@markdown - [**`AudioCraft`**](https://github.com/facebookresearch/audiocraft) - Audio generation model 🎶\n", | ||
"\n", | ||
"from tensorflow import config\n", | ||
"GPU = config.list_physical_devices(\"GPU\")\n", | ||
"GPU = GPU[0].device_type == \"GPU\" if GPU else 0\n", | ||
"\n", | ||
"if not GPU:\n", | ||
"\traise SystemExit(\"GPU unavailable!\\nThe continuation is pointless.\")\n", | ||
"\n", | ||
"#-=-=-=-#\n", | ||
"\n", | ||
"print('Installing dependencies for \"AudioCraft\"...')\n", | ||
"!pip install -U git+https://github.com/facebookresearch/audiocraft#egg=audiocraft --quiet --exists-action i | grep -v \"already\"\n", | ||
"\n", | ||
"import torch, torchaudio\n", | ||
"from audiocraft.models import musicgen\n", | ||
"from audiocraft.utils.notebook import display_audio" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"#@title # <font color=gold>**2.**</font> Configure. ⚙️\n", | ||
"Model_Name = \"Medium\" #@param [\"Small\", \"Medium\", \"Melody\", \"Large\"]\n", | ||
"Model_Name = Model_Name.lower()\n", | ||
"\n", | ||
"#@markdown Notices:\n", | ||
"#@markdown - ### ⛔ <font color=\"red\">**Can't be less than `2` prompts</font>. (RAM failure)**\n", | ||
"#@markdown - ### 🙊 **Unable to generate realistic vocals**.\n", | ||
"#@markdown - ### 🇬🇧 Has been trained with **English descriptions <font color=red>and will not</font>** perform as well in other languages.\n", | ||
"#@markdown - ### 🎶 <font color=gold>Does not perform **equally well for all music styles and cultures**</font>.\n", | ||
"#@markdown - 📉 <font color=gold>**Sometimes generates end of songs, collapsing to silence**</font>.\n", | ||
"#@markdown - 🤷♂️ It is sometimes difficult to assess what types of text descriptions provide the best generations.<br>Prompt engineering may be required to obtain satisfying results.\n", | ||
"\n", | ||
"if not \"musicgen\" in locals():\n", | ||
"\traise SystemExit(\"Requirements were not installed!\\nPlease run the first cell.\")\n", | ||
"\n", | ||
"Model = musicgen.MusicGen.get_pretrained(Model_Name, device = \"cuda\")" | ||
], | ||
"metadata": { | ||
"cellView": "form", | ||
"id": "jY8jPyEncCWs" | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"# <font color=\"lime\">**3.**</font> Generate. 🎶\n", | ||
"### ℹ️ For good quality results, **provide as much details as possible.**" | ||
], | ||
"metadata": { | ||
"id": "pcKlg7MAdaBl" | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"## Text to audio 📝" | ||
], | ||
"metadata": { | ||
"id": "Qh52G6fBdlCz" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"#@markdown ## Audio Prompts 📝\n", | ||
"#@markdown Can be empty.\n", | ||
"Description_1 = \"Crazy EDM, heavy bang\" #@param {\"type\": \"string\"}\n", | ||
"Description_2 = \"Classic Reggae track with an electronic guitar solo\" #@param {\"type\": \"string\"}\n", | ||
"Description_3 = \"LoFi electro chill with organic samples\" #@param {\"type\": \"string\"}\n", | ||
"Description_4 = \"Rock with saturated guitars, a heavy bass line and crazy drum break and fills\" #@param {\"type\": \"string\"}\n", | ||
"Description_5 = \"Earthy tones, environmentally conscious, ukulele-infused, harmonic, breezy, easygoing, organic instrumentation, gentle grooves\" #@param {\"type\": \"string\"}\n", | ||
"Remove_Duplicated_Descriptions = False #@param {\"type\": \"boolean\"}\n", | ||
"\n", | ||
"#@markdown <br>\n", | ||
"\n", | ||
"#@markdown ## Generation parameters\n", | ||
"#@markdown #### <font color=red>**For developers only!**</font>\n", | ||
"Use_ArgMax_Decoding = False #@param {type: \"boolean\"}\n", | ||
"Top_P = 250 #@param {type: \"slider\", min: 100, max: 500.0, step: 1}\n", | ||
"Top_K = 0.0 #@param {type: \"slider\", min: 0.0, max: 10.0, step: 0.1}\n", | ||
"SoftMax_Temperature = 1.0 #@param {type: \"slider\", min: 1.0, max: 10.0, step: 0.1}\n", | ||
"CFG_Coefficient = 3.0 #@param {type: \"slider\", min: 1.0, max: 10.0, step: 0.1}\n", | ||
"Use_2_CFG = False #@param {type: \"boolean\"}\n", | ||
"\n", | ||
"#@markdown <br>\n", | ||
"\n", | ||
"#@markdown ## Music related settings\n", | ||
"#@markdown Experimental, do not rely.\n", | ||
"BPM = 135 #@param {type: \"slider\", min: 60, max: 170, step: 1}\n", | ||
"BPM = f\"{BPM} BPM\", f\"{BPM} Beats Per Minute\"\n", | ||
"Time_Signature = \"4/4\" #@param [\"3/4\", \"3/8\", \"4/4\", \"6/8\"]\n", | ||
"Time_Signature = f\"{Time_Signature} Time Signature\"\n", | ||
"Duration_Seconds = 25 #@param {type: \"slider\", min: 1.0, max: 60.0, step: 0.1}\n", | ||
"Target_Key_Note = \"None\" #@param [\"C\", \"C♯ (D♭)\", \"D\", \"D♯ (E♭)\", \"E\", \"F\", \"F♯ (G♭)\", \"G\", \"G♯ (A♭)\", \"A\", \"A♯ (B♭)\", \"B\", \"None\"]\n", | ||
"Target_Key_Mode = \"Major\" #@param [\"Major\", \"Minor\", \"Dorian\", \"Phyrgian\", \"Mixolydian\"]\n", | ||
"if Target_Key_Note == \"None\": Target_Key_Note = \"\"\n", | ||
"Target_Key = \"in {0} {1} key scale\".format(Target_Key_Note, Target_Key_Mode)\n", | ||
"\n", | ||
"#-=-=-=-#\n", | ||
"\n", | ||
"if not \"Model\" in locals():\n", | ||
"\traise SystemExit(\"Model was not downloaded!\\nPlease run the previous cell.\")\n", | ||
"\n", | ||
"Descriptions_ = Description_1, Description_2, Description_3, Description_4, Description_5\n", | ||
"Descriptions = [\", \".join((Prompt.strip(), *BPM, Time_Signature, Target_Key)) for Prompt in Descriptions_ if Prompt]\n", | ||
"\n", | ||
"if Remove_Duplicated_Descriptions:\n", | ||
"\tDescriptions = list(set(Descriptions))\n", | ||
"if len(Descriptions) < 2:\n", | ||
"\traise SystemExit(\"Amount of prompts have to be > 1.\")\n", | ||
"\n", | ||
"Model.set_generation_params(\n", | ||
"\t0 if Use_ArgMax_Decoding else 1,\n", | ||
"\tTop_K, Top_P, SoftMax_Temperature,\n", | ||
"\tDuration_Seconds, CFG_Coefficient, Use_2_CFG\n", | ||
")\n", | ||
"\n", | ||
"#-=-=-=-#\n", | ||
"\n", | ||
"print(\"Final prompts:\")\n", | ||
"print(\"\\t\" + \"\\n\\t\".join(Descriptions))\n", | ||
"print(\"\\n\" + \"━\" * 64 + \"\\n\")\n", | ||
"\n", | ||
"try:\n", | ||
"\tResult = Model.generate(Descriptions, progress = 1)\n", | ||
"\tdisplay_audio(Result, 32E3)\n", | ||
"except KeyboardInterrupt:\n", | ||
"\traise SystemExit(\"Cancelled.\")" | ||
], | ||
"metadata": { | ||
"cellView": "form", | ||
"id": "r19GIsZNdmO0" | ||
} | ||
} | ||
] | ||
} |
Oops, something went wrong.