diff --git a/DOCKER_README.md b/DOCKER_README.md new file mode 100644 index 0000000..62a32b6 --- /dev/null +++ b/DOCKER_README.md @@ -0,0 +1,72 @@ +# Docker +The Docker image currently works on Windows and Linux, optionally supporting NVIDIA GPUs. + +## General Remarks +- The resulting Docker image is 22 GB in size. Building might require even more disk space temporarily. +- Build time depends on your hardware and internet connection. Expect at least 10min to be normal. +- The Docker build: + - Downloads XTTS as default TTS engine + - Enables RVC by default + - Downloads all supported RVC models + - Enables deepspeed by default +- Starting the Docker image should only a few seconds due to all the steps that were already executed during build. + +## Docker for Linux + +### Ubuntu Specific Setup for GPUs +1. Make sure the latest nvidia drivers are installed: `sudo ubuntu-drivers install` +1. Install Docker your preferred way. One way to do it is to follow the official documentation [here](https://docs.docker.com/engine/install/ubuntu/#uninstall-old-versions). + - Start by uninstalling the old versions + - Follow the "apt" repository installation method + - Check that everything is working with the "hello-world" container +1. If, when launching the docker contain, you have an error message saying that the GPU cannot be used, you might have to install [Nvidia Docker Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html). + - Install with the "apt" method + - Run the docker configuration command + ```sudo nvidia-ctk runtime configure --runtime=docker``` + - Restart docker + +## Docker for Windows (WSL2) +### Windows Specific Setup for GPUs +> Make sure your Nvidia drivers are up to date: https://www.nvidia.com/download/index.aspx +1. Install WSL2 in PowerShell with `wsl --install` and restart +2. Open PowerShell, type and enter ```ubuntu```. It should now load you into wsl2 +3. Remove the original nvidia cache key: `sudo apt-key del 7fa2af80` +4. Download CUDA toolkit keyring: `wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-keyring_1.1-1_all.deb` +5. Install keyring: `sudo dpkg -i cuda-keyring_1.1-1_all.deb` +6. Update package list: `sudo apt-get update` +7. Install CUDA toolkit: `sudo apt-get -y install cuda-toolkit-12-4` +8. Install Docker Desktop using WSL2 as the backend +9. Restart +10. If you wish to monitor the terminal remotely via SSH, follow [this guide](https://www.hanselman.com/blog/how-to-ssh-into-wsl2-on-windows-10-from-an-external-machine). +11. Open PowerShell, type ```ubuntu```, [then follow below](#building-and-running-in-docker) + +## Building and Running in Docker + +1. Open a terminal (or Ubuntu WSL) and go where you cloned the repo +3. Build the image with `./docker-build.sh` +4. Start the container with `./docker-start.sh` +5. Visit `http://localhost:7851/` or remotely with `http://:7851` + +## Arguments for building and starting docker +There are various arguments to customize the build and start of the docker image. + +### Arguments for `docker-build.sh` +- `--tts_model` allows to choose the TTS model that is used by default. Valid values are `piper`, `vits`, `xtts`. Defaults to `xtts`. + - Example: `docker-build.sh --tts_model piper` +- `--tag` allows to choose the docker tag. Defaults to `latest`. + - Example: `docker-build.sh --tag mytag` + +### Arguments for `docker-start.sh` +- `--config` lets you choose a config JSON file which can subset of `confignew.json`. This allows you to change only + few values and leave the rest as defined in the default `confignew.json` file. + - Example: `docker-build.sh --config /my/config/file.json` with content `{"branding": "My Brand "}` will just change + the branding in `confignew.json`. +- `--voices` lets you add voices for the TTS engine in WAV format. You have to specify the folder containing all + voice files. + - Example: `docker-build.sh --voices /my/voices/dir` +- `--rvc_voices` similar to voices, this option lets you pick the folder containing the RVC models. + - Example: `docker-build.sh --rvc_vices /my/rvc/voices/dir` +- `--no_ui` allows you to not expose port 7852 for the gradio interface. Note that you still have to set `launch_gradio` + to `false` via JSON file passed to `--config`. +- Since the above commands only address the most important options, you might pass additional arbitrary docker commands + to the `docker-start.sh`. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..69ac137 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,138 @@ +FROM continuumio/miniconda3:24.7.1-0 + +# Argument to choose the model: piper, vits, xtts +ARG TTS_MODEL="xtts" +ENV TTS_MODEL=$TTS_MODEL + +SHELL ["/bin/bash", "-l", "-c"] +ENV SHELL=/bin/bash +ENV HOST=0.0.0.0 +ENV DEBIAN_FRONTEND=noninteractive +ENV CUDA_DOCKER_ARCH=all +ENV GRADIO_SERVER_NAME="0.0.0.0" + +RUN < start_alltalk.sh +#!/usr/bin/env bash +source ~/.bashrc + +# Merging config from docker_confignew.json into confignew.json: +jq -s '.[0] * .[1] * .[2]' confignew.json docker_default_config.json docker_confignew.json > confignew.json.tmp +mv confignew.json.tmp confignew.json + +conda activate alltalk +python script.py +EOF + cat << EOF > start_finetune.sh +#!/usr/bin/env bash +source ~/.bashrc +export TRAINER_TELEMETRY=0 +conda activate alltalk +python finetune.py +EOF + cat << EOF > start_diagnostics.sh +#!/usr/bin/env bash +source ~/.bashrc +conda activate alltalk +python diagnostics.py +EOF + chmod +x start_alltalk.sh + chmod +x start_environment.sh + chmod +x start_finetune.sh + chmod +x start_diagnostics.sh +EOR + +COPY . . + +# Create script to execute firstrun.py: +RUN echo $'#!/usr/bin/env bash \n\ +source ~/.bashrc \n\ +conda activate alltalk \n\ +python ./system/config/firstrun.py $@' > ./start_firstrun.sh + +RUN chmod +x start_firstrun.sh +RUN ./start_firstrun.sh --tts_model $TTS_MODEL + +RUN mkdir -p /alltalk/outputs +RUN mkdir -p /root/.triton/autotune + +# Enabling deepspeed for all models: +RUN find . -name model_settings.json -exec sed -i -e 's/"deepspeed_enabled": false/"deepspeed_enabled": true/g' {} \; + +# Downloading all RVC models: +RUN < /tmp/rvc_files.txt + xargs -n 1 curl --create-dirs --output-dir models/rvc_base -LO < /tmp/rvc_files.txt + rm -f /tmp/rvc_files.txt +EOR + +## Start alltalk: +ENTRYPOINT ["sh", "-c", "./start_alltalk.sh"] diff --git a/docker-build.sh b/docker-build.sh new file mode 100755 index 0000000..bb60d10 --- /dev/null +++ b/docker-build.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +TTS_MODEL=xtts +DOCKER_TAG=latest + +# Parse arguments +while [ "$#" -gt 0 ]; do + case "$1" in + --tts_model) + TTS_MODEL="$2" + shift + ;; + --tag) + DOCKER_TAG="$2" + shift + ;; + *) + printf '%s\n' "Invalid argument ($1)" + exit 1 + ;; + esac + shift +done + +echo "Starting docker build process using TTS model '${TTS_MODEL}' and docker tag '${DOCKER_TAG}'" + +docker buildx \ + build \ + --build-arg TTS_MODEL=$TTS_MODEL \ + -t alltalk_beta:${DOCKER_TAG} \ + . + +echo "Docker build process finished" \ No newline at end of file diff --git a/docker-start.sh b/docker-start.sh new file mode 100755 index 0000000..d76486a --- /dev/null +++ b/docker-start.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +ALLTALK_DIR="/alltalk" +WITH_UI=true +declare -a ADDITIONAL_ARGS=() + +# Parse arguments +while [ "$#" -gt 0 ]; do + case "$1" in + --config) + CONFIG="$2" + shift + ;; + --voices) + VOICES="$2" + shift + ;; + --rvc_voices) + RVC_VOICES="$2" + shift + ;; + --no_ui) + WITH_UI=false + ;; + *) + # Allow to pass arbitrary arguments to docker as well to be flexible: + ADDITIONAL_ARGS+=( $1 ) + ;; + esac + shift +done + +# Compose docker arguments based on user input to the script: +declare -a DOCKER_ARGS=() + +if [[ -n $CONFIG ]]; then + # Mount the config file to docker_confignew.json: + DOCKER_ARGS+=( -v ${CONFIG}:${ALLTALK_DIR}/docker_confignew.json ) +fi + +if [[ -n $VOICES ]]; then + DOCKER_ARGS+=( -v ${VOICES}:${ALLTALK_DIR}/voices ) +fi + +if [[ -n $RVC_VOICES ]]; then + DOCKER_ARGS+=( -v ${RVC_VOICES}:${ALLTALK_DIR}/models/rvc_voices ) +fi + +if [ "$WITH_UI" = true ] ; then + DOCKER_ARGS+=( -p 7852:7852 ) +fi + +docker run \ + --rm \ + -it \ + -p 7851:7851 \ + --gpus=all \ + --name alltalk \ + "${DOCKER_ARGS[@]}" \ + "${ADDITIONAL_ARGS[@]}" \ + alltalk_beta:latest &> /dev/stdout \ No newline at end of file diff --git a/docker_confignew.json b/docker_confignew.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/docker_confignew.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/docker_default_config.json b/docker_default_config.json new file mode 100644 index 0000000..e14efdf --- /dev/null +++ b/docker_default_config.json @@ -0,0 +1,7 @@ +{ + "delete_output_wavs": "1", + "rvc_settings": { + "rvc_enabled": true, + "f0method": "rmvpe" + } +} \ No newline at end of file