Skip to content

Commit

Permalink
Fixed HVD build, vision image issue and apex build issue
Browse files Browse the repository at this point in the history
  • Loading branch information
vfdev-5 committed Oct 18, 2023
1 parent fec2a18 commit be66b2e
Show file tree
Hide file tree
Showing 16 changed files with 20 additions and 82 deletions.
21 changes: 0 additions & 21 deletions .github/workflows/docker-publish.yml

This file was deleted.

3 changes: 1 addition & 2 deletions docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,5 @@ echo "Show installed packages:"
docker run --rm -i pytorchignite/${image_name}:${image_tag} pip list

echo "Test pytorchignite/${image_name}:${image_tag}"
# python test_image.py pytorchignite/${image_name}:${image_tag}
docker run --rm -i -v $PWD:/ws -w /ws -e HVD_VERSION=${HVD_VERSION:-} -e MSDP_VERSION=${MSDP_VERSION:-} pytorchignite/${image_name}:${image_tag} python test_image.py pytorchignite/${image_name}:${image_tag}
docker run --rm -i -v $PWD:/ws -w /ws -e HVD_VERSION=${HVD_VERSION:-} -e MSDP_VERSION=${MSDP_VERSION:-} pytorchignite/${image_name}:${image_tag} /bin/bash -c "python test_image.py pytorchignite/${image_name}:${image_tag}"
echo "OK"
7 changes: 4 additions & 3 deletions docker/hvd/Dockerfile.hvd-apex
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ ARG PTH_VERSION
# 1/Building apex with pytorch:*-devel
FROM pytorch/pytorch:${PTH_VERSION}-devel AS apex-hvd-builder

ARG ARG_TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0 8.6"
ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST
ENV CUDA_HOME=/usr/local/cuda

# Install git
Expand All @@ -21,7 +19,7 @@ RUN echo "Setup NVIDIA Apex" && \
git clone https://github.com/NVIDIA/apex $tmp_apex_path && \
cd $tmp_apex_path && \
pip install packaging && \
pip wheel -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" .

ARG HVD_VERSION

Expand All @@ -30,6 +28,9 @@ RUN apt-get update && apt-get install -y git && \
git clone --recursive --depth 1 --branch ${HVD_VERSION} https://github.com/horovod/horovod.git /horovod && \
conda install -y cmake nccl -c conda-forge && \
cd /horovod && \
# temporary -std=c++17 fix
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt && \
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" horovod/torch/CMakeLists.txt && \
HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_NCCL_LINK=SHARED HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_PYTORCH=1 pip wheel --no-cache-dir . && \
rm -rf /var/lib/apt/lists/*

Expand Down
3 changes: 2 additions & 1 deletion docker/hvd/Dockerfile.hvd-apex-nlp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ FROM pytorchignite/hvd-apex:latest
# Ignite NLP dependencies
RUN pip install --upgrade --no-cache-dir transformers \
spacy \
nltk
nltk \
torchtext
9 changes: 0 additions & 9 deletions docker/hvd/Dockerfile.hvd-apex-vision
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# Dockerfile.hvd-apex-vision
FROM pytorchignite/hvd-apex:latest

# Install opencv dependencies
RUN apt-get update && \
apt-get -y install --no-install-recommends libglib2.0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx && \
rm -rf /var/lib/apt/lists/*

# Ignite vision dependencies
RUN pip install --upgrade --no-cache-dir albumentations \
image-dataset-viz \
Expand Down
3 changes: 3 additions & 0 deletions docker/hvd/Dockerfile.hvd-base
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ RUN apt-get update && apt-get install -y git && \
git clone --recursive --depth 1 --branch ${HVD_VERSION} https://github.com/horovod/horovod.git /horovod && \
conda install -y cmake nccl -c conda-forge && \
cd /horovod && \
# temporary -std=c++17 fix
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt && \
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" horovod/torch/CMakeLists.txt && \
HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_NCCL_LINK=SHARED HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_PYTORCH=1 pip wheel --no-cache-dir . && \
rm -rf /var/lib/apt/lists/*

Expand Down
3 changes: 2 additions & 1 deletion docker/hvd/Dockerfile.hvd-nlp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ FROM pytorchignite/hvd-base:latest
# Ignite NLP dependencies
RUN pip install --upgrade --no-cache-dir transformers \
spacy \
nltk
nltk \
torchtext
9 changes: 0 additions & 9 deletions docker/hvd/Dockerfile.hvd-vision
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# Dockerfile.hvd-vision
FROM pytorchignite/hvd-base:latest

# Install opencv dependencies
RUN apt-get update && \
apt-get -y install --no-install-recommends libglib2.0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx && \
rm -rf /var/lib/apt/lists/*

# Ignite vision dependencies
RUN pip install --upgrade --no-cache-dir albumentations \
image-dataset-viz \
Expand Down
4 changes: 1 addition & 3 deletions docker/main/Dockerfile.apex
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ ARG PTH_VERSION
# 1/Building apex with pytorch:*-devel
FROM pytorch/pytorch:${PTH_VERSION}-devel AS apex-builder

ARG ARG_TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0 8.6"
ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST
ENV CUDA_HOME=/usr/local/cuda

# Install git
Expand All @@ -21,7 +19,7 @@ RUN echo "Setup NVIDIA Apex" && \
git clone https://github.com/NVIDIA/apex $tmp_apex_path && \
cd $tmp_apex_path && \
pip install packaging && \
pip wheel -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" .

# 2/ Build the runtime image
FROM pytorch/pytorch:${PTH_VERSION}-runtime
Expand Down
3 changes: 2 additions & 1 deletion docker/main/Dockerfile.apex-nlp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ FROM pytorchignite/apex:latest
# Ignite NLP dependencies
RUN pip install --upgrade --no-cache-dir transformers \
spacy \
nltk
nltk \
torchtext
9 changes: 0 additions & 9 deletions docker/main/Dockerfile.apex-vision
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# Dockerfile.apex-vision
FROM pytorchignite/apex:latest

# Install opencv dependencies
RUN apt-get update && \
apt-get -y install --no-install-recommends libglib2.0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx && \
rm -rf /var/lib/apt/lists/*

# Ignite vision dependencies
RUN pip install --upgrade --no-cache-dir albumentations \
image-dataset-viz \
Expand Down
3 changes: 2 additions & 1 deletion docker/main/Dockerfile.nlp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ FROM pytorchignite/base:latest
# Ignite NLP dependencies
RUN pip install --upgrade --no-cache-dir transformers \
spacy \
nltk
nltk \
torchtext
9 changes: 0 additions & 9 deletions docker/main/Dockerfile.vision
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# Dockerfile.vision
FROM pytorchignite/base:latest

# Install opencv dependencies
RUN apt-get update && \
apt-get -y install --no-install-recommends libglib2.0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx && \
rm -rf /var/lib/apt/lists/*

# Ignite vision dependencies
RUN pip install --upgrade --no-cache-dir albumentations \
image-dataset-viz \
Expand Down
4 changes: 1 addition & 3 deletions docker/msdp/Dockerfile.msdp-apex
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ ARG PTH_VERSION
# 1/Building apex with pytorch:*-devel
FROM pytorch/pytorch:${PTH_VERSION}-devel AS apex-msdp-builder

ARG ARG_TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0 8.6"
ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST
ENV CUDA_HOME=/usr/local/cuda

# Install git
Expand All @@ -21,7 +19,7 @@ RUN echo "Setup NVIDIA Apex" && \
git clone https://github.com/NVIDIA/apex $tmp_apex_path && \
cd $tmp_apex_path && \
pip install packaging && \
pip wheel -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" .

# For pip --use-feature option
RUN python -m pip install --upgrade pip
Expand Down
3 changes: 2 additions & 1 deletion docker/msdp/Dockerfile.msdp-apex-nlp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ FROM pytorchignite/msdp-apex:latest
# Ignite NLP dependencies
RUN pip install --upgrade --no-cache-dir transformers \
spacy \
nltk
nltk \
torchtext
9 changes: 0 additions & 9 deletions docker/msdp/Dockerfile.msdp-apex-vision
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# Dockerfile.msdp-apex-vision
FROM pytorchignite/msdp-apex:latest

# Install opencv dependencies
RUN apt-get update && \
apt-get -y install --no-install-recommends libglib2.0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx && \
rm -rf /var/lib/apt/lists/*

# Ignite vision dependencies
RUN pip install --upgrade --no-cache-dir albumentations \
image-dataset-viz \
Expand Down

0 comments on commit be66b2e

Please sign in to comment.