diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml deleted file mode 100644 index fa5375c0ae29..000000000000 --- a/.github/workflows/docker-publish.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Trigger Build And Publish Docker Images - -on: - push: - branches: - - master - paths: - - docker/** - - ".github/workflows/docker-publish.yml" - -jobs: - build-publish: - name: Trigger Build and Push Docker images to Docker Hub - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 - with: - python-version: "3.10" - # TODO diff --git a/docker/build.sh b/docker/build.sh index 4c4e35b7e970..f3b460a39436 100644 --- a/docker/build.sh +++ b/docker/build.sh @@ -91,6 +91,5 @@ echo "Show installed packages:" docker run --rm -i pytorchignite/${image_name}:${image_tag} pip list echo "Test pytorchignite/${image_name}:${image_tag}" -# python test_image.py pytorchignite/${image_name}:${image_tag} -docker run --rm -i -v $PWD:/ws -w /ws -e HVD_VERSION=${HVD_VERSION:-} -e MSDP_VERSION=${MSDP_VERSION:-} pytorchignite/${image_name}:${image_tag} python test_image.py pytorchignite/${image_name}:${image_tag} +docker run --rm -i -v $PWD:/ws -w /ws -e HVD_VERSION=${HVD_VERSION:-} -e MSDP_VERSION=${MSDP_VERSION:-} pytorchignite/${image_name}:${image_tag} /bin/bash -c "python test_image.py pytorchignite/${image_name}:${image_tag}" echo "OK" \ No newline at end of file diff --git a/docker/hvd/Dockerfile.hvd-apex b/docker/hvd/Dockerfile.hvd-apex index a465abdaba24..30f5b8a55fec 100644 --- a/docker/hvd/Dockerfile.hvd-apex +++ b/docker/hvd/Dockerfile.hvd-apex @@ -6,8 +6,6 @@ ARG PTH_VERSION # 1/Building apex with pytorch:*-devel FROM pytorch/pytorch:${PTH_VERSION}-devel AS apex-hvd-builder -ARG ARG_TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0 8.6" -ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST ENV CUDA_HOME=/usr/local/cuda # Install git @@ -21,7 +19,7 @@ RUN echo "Setup NVIDIA Apex" && \ git clone https://github.com/NVIDIA/apex $tmp_apex_path && \ cd $tmp_apex_path && \ pip install packaging && \ - pip wheel -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . + pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" . ARG HVD_VERSION @@ -30,6 +28,9 @@ RUN apt-get update && apt-get install -y git && \ git clone --recursive --depth 1 --branch ${HVD_VERSION} https://github.com/horovod/horovod.git /horovod && \ conda install -y cmake nccl -c conda-forge && \ cd /horovod && \ + # temporary -std=c++17 fix + sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt && \ + sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" horovod/torch/CMakeLists.txt && \ HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_NCCL_LINK=SHARED HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_PYTORCH=1 pip wheel --no-cache-dir . && \ rm -rf /var/lib/apt/lists/* diff --git a/docker/hvd/Dockerfile.hvd-apex-nlp b/docker/hvd/Dockerfile.hvd-apex-nlp index a8f51988baa7..6379490c4966 100644 --- a/docker/hvd/Dockerfile.hvd-apex-nlp +++ b/docker/hvd/Dockerfile.hvd-apex-nlp @@ -4,4 +4,5 @@ FROM pytorchignite/hvd-apex:latest # Ignite NLP dependencies RUN pip install --upgrade --no-cache-dir transformers \ spacy \ - nltk + nltk \ + torchtext diff --git a/docker/hvd/Dockerfile.hvd-apex-vision b/docker/hvd/Dockerfile.hvd-apex-vision index 44ada46417af..59a1e273da0a 100644 --- a/docker/hvd/Dockerfile.hvd-apex-vision +++ b/docker/hvd/Dockerfile.hvd-apex-vision @@ -1,15 +1,6 @@ # Dockerfile.hvd-apex-vision FROM pytorchignite/hvd-apex:latest -# Install opencv dependencies -RUN apt-get update && \ - apt-get -y install --no-install-recommends libglib2.0 \ - libsm6 \ - libxext6 \ - libxrender-dev \ - libgl1-mesa-glx && \ - rm -rf /var/lib/apt/lists/* - # Ignite vision dependencies RUN pip install --upgrade --no-cache-dir albumentations \ image-dataset-viz \ diff --git a/docker/hvd/Dockerfile.hvd-base b/docker/hvd/Dockerfile.hvd-base index 3bdec5efdc43..3312a77e34d4 100644 --- a/docker/hvd/Dockerfile.hvd-base +++ b/docker/hvd/Dockerfile.hvd-base @@ -12,6 +12,9 @@ RUN apt-get update && apt-get install -y git && \ git clone --recursive --depth 1 --branch ${HVD_VERSION} https://github.com/horovod/horovod.git /horovod && \ conda install -y cmake nccl -c conda-forge && \ cd /horovod && \ + # temporary -std=c++17 fix + sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt && \ + sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" horovod/torch/CMakeLists.txt && \ HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_NCCL_LINK=SHARED HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_PYTORCH=1 pip wheel --no-cache-dir . && \ rm -rf /var/lib/apt/lists/* diff --git a/docker/hvd/Dockerfile.hvd-nlp b/docker/hvd/Dockerfile.hvd-nlp index 84da0230b9e9..db4ca4c3ebc1 100644 --- a/docker/hvd/Dockerfile.hvd-nlp +++ b/docker/hvd/Dockerfile.hvd-nlp @@ -4,4 +4,5 @@ FROM pytorchignite/hvd-base:latest # Ignite NLP dependencies RUN pip install --upgrade --no-cache-dir transformers \ spacy \ - nltk + nltk \ + torchtext diff --git a/docker/hvd/Dockerfile.hvd-vision b/docker/hvd/Dockerfile.hvd-vision index c9737ea4e165..b6d7a65681e0 100644 --- a/docker/hvd/Dockerfile.hvd-vision +++ b/docker/hvd/Dockerfile.hvd-vision @@ -1,15 +1,6 @@ # Dockerfile.hvd-vision FROM pytorchignite/hvd-base:latest -# Install opencv dependencies -RUN apt-get update && \ - apt-get -y install --no-install-recommends libglib2.0 \ - libsm6 \ - libxext6 \ - libxrender-dev \ - libgl1-mesa-glx && \ - rm -rf /var/lib/apt/lists/* - # Ignite vision dependencies RUN pip install --upgrade --no-cache-dir albumentations \ image-dataset-viz \ diff --git a/docker/main/Dockerfile.apex b/docker/main/Dockerfile.apex index d39445071646..debf8f304eac 100644 --- a/docker/main/Dockerfile.apex +++ b/docker/main/Dockerfile.apex @@ -6,8 +6,6 @@ ARG PTH_VERSION # 1/Building apex with pytorch:*-devel FROM pytorch/pytorch:${PTH_VERSION}-devel AS apex-builder -ARG ARG_TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0 8.6" -ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST ENV CUDA_HOME=/usr/local/cuda # Install git @@ -21,7 +19,7 @@ RUN echo "Setup NVIDIA Apex" && \ git clone https://github.com/NVIDIA/apex $tmp_apex_path && \ cd $tmp_apex_path && \ pip install packaging && \ - pip wheel -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . + pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" . # 2/ Build the runtime image FROM pytorch/pytorch:${PTH_VERSION}-runtime diff --git a/docker/main/Dockerfile.apex-nlp b/docker/main/Dockerfile.apex-nlp index b9be5acd6d9f..ad7507df777c 100644 --- a/docker/main/Dockerfile.apex-nlp +++ b/docker/main/Dockerfile.apex-nlp @@ -4,4 +4,5 @@ FROM pytorchignite/apex:latest # Ignite NLP dependencies RUN pip install --upgrade --no-cache-dir transformers \ spacy \ - nltk + nltk \ + torchtext diff --git a/docker/main/Dockerfile.apex-vision b/docker/main/Dockerfile.apex-vision index d5c8bb79f4aa..724d828ef19e 100644 --- a/docker/main/Dockerfile.apex-vision +++ b/docker/main/Dockerfile.apex-vision @@ -1,15 +1,6 @@ # Dockerfile.apex-vision FROM pytorchignite/apex:latest -# Install opencv dependencies -RUN apt-get update && \ - apt-get -y install --no-install-recommends libglib2.0 \ - libsm6 \ - libxext6 \ - libxrender-dev \ - libgl1-mesa-glx && \ - rm -rf /var/lib/apt/lists/* - # Ignite vision dependencies RUN pip install --upgrade --no-cache-dir albumentations \ image-dataset-viz \ diff --git a/docker/main/Dockerfile.nlp b/docker/main/Dockerfile.nlp index ca8a9f1e26a9..e5ef45cfef0f 100644 --- a/docker/main/Dockerfile.nlp +++ b/docker/main/Dockerfile.nlp @@ -4,4 +4,5 @@ FROM pytorchignite/base:latest # Ignite NLP dependencies RUN pip install --upgrade --no-cache-dir transformers \ spacy \ - nltk + nltk \ + torchtext diff --git a/docker/main/Dockerfile.vision b/docker/main/Dockerfile.vision index b9a6611a998e..a5adce81917f 100644 --- a/docker/main/Dockerfile.vision +++ b/docker/main/Dockerfile.vision @@ -1,15 +1,6 @@ # Dockerfile.vision FROM pytorchignite/base:latest -# Install opencv dependencies -RUN apt-get update && \ - apt-get -y install --no-install-recommends libglib2.0 \ - libsm6 \ - libxext6 \ - libxrender-dev \ - libgl1-mesa-glx && \ - rm -rf /var/lib/apt/lists/* - # Ignite vision dependencies RUN pip install --upgrade --no-cache-dir albumentations \ image-dataset-viz \ diff --git a/docker/msdp/Dockerfile.msdp-apex b/docker/msdp/Dockerfile.msdp-apex index 32dce0fea00f..d26b679532b2 100644 --- a/docker/msdp/Dockerfile.msdp-apex +++ b/docker/msdp/Dockerfile.msdp-apex @@ -6,8 +6,6 @@ ARG PTH_VERSION # 1/Building apex with pytorch:*-devel FROM pytorch/pytorch:${PTH_VERSION}-devel AS apex-msdp-builder -ARG ARG_TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0 8.6" -ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST ENV CUDA_HOME=/usr/local/cuda # Install git @@ -21,7 +19,7 @@ RUN echo "Setup NVIDIA Apex" && \ git clone https://github.com/NVIDIA/apex $tmp_apex_path && \ cd $tmp_apex_path && \ pip install packaging && \ - pip wheel -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . + pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" . # For pip --use-feature option RUN python -m pip install --upgrade pip diff --git a/docker/msdp/Dockerfile.msdp-apex-nlp b/docker/msdp/Dockerfile.msdp-apex-nlp index bb8110f8b8a1..cfdedfb01d52 100644 --- a/docker/msdp/Dockerfile.msdp-apex-nlp +++ b/docker/msdp/Dockerfile.msdp-apex-nlp @@ -4,4 +4,5 @@ FROM pytorchignite/msdp-apex:latest # Ignite NLP dependencies RUN pip install --upgrade --no-cache-dir transformers \ spacy \ - nltk + nltk \ + torchtext diff --git a/docker/msdp/Dockerfile.msdp-apex-vision b/docker/msdp/Dockerfile.msdp-apex-vision index e98216669145..238e23333ed2 100644 --- a/docker/msdp/Dockerfile.msdp-apex-vision +++ b/docker/msdp/Dockerfile.msdp-apex-vision @@ -1,15 +1,6 @@ # Dockerfile.msdp-apex-vision FROM pytorchignite/msdp-apex:latest -# Install opencv dependencies -RUN apt-get update && \ - apt-get -y install --no-install-recommends libglib2.0 \ - libsm6 \ - libxext6 \ - libxrender-dev \ - libgl1-mesa-glx && \ - rm -rf /var/lib/apt/lists/* - # Ignite vision dependencies RUN pip install --upgrade --no-cache-dir albumentations \ image-dataset-viz \