Dockerize CI + Release builds

Gets both CI and Release builds integrated in one workflow. Mount ccache and pip cache as required for fast iterative builds Current Release docker builds still run with root perms, fix it in the future to run as the same user. There may be some corner cases left especially when switching build types etc. Docker build TEST plan: tl;dr: Build everythin: Releases (Python 3.8, 3.9, 3.10) and CIs. TM_PACKAGES="torch-mlir out-of-tree in-tree" 2.57s user 2.49s system 0% cpu 30:33.11 total Out of Tree + PyTorch binaries: Fresh build (purged cache): TM_PACKAGES="out-of-tree" 0.47s user 0.51s system 0% cpu 5:24.99 total Incremental with ccache: TM_PACKAGES="out-of-tree" 0.09s user 0.08s system 0% cpu 34.817 total Out of Tree + PyTorch from source Incremental TM_PACKAGES="out-of-tree" TM_USE_PYTORCH_BINARY=OFF 1.58s user 1.81s system 2% cpu 1:59.61 total In-Tree + PyTorch binaries: Fresh build and tests: (purge ccache) TM_PACKAGES="in-tree" 0.53s user 0.49s system 0% cpu 6:23.35 total Fresh build/ but with prior ccache TM_PACKAGES="in-tree" 0.45s user 0.66s system 0% cpu 3:57.47 total Incremental in-tree with all tests and regression tests TM_PACKAGES="in-tree" 0.16s user 0.09s system 0% cpu 2:18.52 total In-Tree + PyTorch from source Fresh build and tests: (purge ccache) TM_PACKAGES="in-tree" TM_USE_PYTORCH_BINARY=OFF 2.03s user 2.28s system 0% cpu 11:11.86 total Fresh build/ but with prior ccache TM_PACKAGES="in-tree" TM_USE_PYTORCH_BINARY=OFF 1.58s user 1.88s system 1% cpu 4:53.15 total Incremental in-tree with all tests and regression tests TM_PACKAGES="in-tree" TM_USE_PYTORCH_BINARY=OFF 1.09s user 1.10s system 1% cpu 3:29.84 total Incremental without tests TM_PACKAGES="in-tree" TM_USE_PYTORCH_BINARY=OFF TM_SKIP_TESTS=ON 1.52s user 1.42s system 3% cpu 1:15.82 total In-tree+out-of-tree + Pytorch Binaries TM_PACKAGES="out-of-tree in-tree" 0.25s user 0.18s system 0% cpu 3:01.91 total To clear all artifacts: rm -rf build build_oot llvm-build libtorch docker_venv externals/pytorch/build
llvm · Aug 29, 2022 · 2055b25 · 2055b25
1 parent e16b43e
commit 2055b25
Show file tree

Hide file tree

Showing 4 changed files with 377 additions and 30 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 *.swp
 .cache/
 .vscode
+.ccache
 .env
 *.code-workspace
 .ipynb_checkpoints
@@ -26,3 +27,8 @@ bazel-*
 
 # Autogenerated files
 /python/torch_mlir/csrc/base_lazy_backend/generated
+
+#Docker builds
+build_oot/
+docker_venv/
+llvm-build/
diff --git a/build_tools/docker/Dockerfile b/build_tools/docker/Dockerfile
@@ -0,0 +1,54 @@
+ARG BASE_IMG=ubuntu:22.04
+FROM ${BASE_IMG} as dev-base
+
+# Disable apt-key parse waring. If someone knows how to do whatever the "proper"
+# thing is then feel free. The warning complains about parsing apt-key output,
+# which we're not even doing.
+ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
+
+ARG ARCH="x86_64"
+ARG REPO_NAME="deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy main"
+RUN apt-get update && \
+    apt-get install -y \
+    ca-certificates \
+    software-properties-common \
+    wget \
+    apt-transport-https \
+    ccache \
+    curl \
+    cmake \
+    ninja-build \
+    git \
+    gnupg \
+    lsb-release \
+    python3-pip \
+    python3.10 \
+    python3.10-dev \
+    python3.10-venv \
+    unzip && \
+    echo $REPO_NAME >> /etc/apt/sources.list.d/llvm.list && \
+    wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key| apt-key add - && \
+    apt-get update && \
+    apt-get install -y \
+    clang \
+    lld
+
+######## Bazel ########
+WORKDIR /install-bazel
+ARG BAZEL_VERSION=5.2.0
+
+# https://bazel.build/install/ubuntu
+RUN curl -fsSL https://bazel.build/bazel-release.pub.gpg \
+  | gpg --dearmor >bazel-archive-keyring.gpg \
+  && mv bazel-archive-keyring.gpg /usr/share/keyrings \
+  && echo "deb [arch=amd64 signed-by=/usr/share/keyrings/bazel-archive-keyring.gpg] https://storage.googleapis.com/bazel-apt stable jdk1.8" \
+  | tee /etc/apt/sources.list.d/bazel.list \
+  && apt-get update \
+  && apt-get install -y "bazel=${BAZEL_VERSION?}" \
+  && rm -rf /install-bazel
+
+### Clean up
+RUN apt-get clean \
+  && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /main_checkout/torch-mlir
diff --git a/build_tools/python_deploy/build_linux_packages.sh b/build_tools/python_deploy/build_linux_packages.sh
@@ -16,77 +16,145 @@
 #   ./build_tools/python_deploy/build_linux_packages.sh
 #
 # Build specific Python versions and packages to custom directory:
-#   python_versions="cp38-cp38 cp39-cp39" \
-#   packages="torch-mlir" \
-#   output_dir="/tmp/wheelhouse" \
+#   TM_PYTHON_VERSIONS="cp38-cp38 cp39-cp39" \
+#   TM_PACKAGES="torch-mlir" \
+#   TM_OUTPUT_DIR="/tmp/wheelhouse" \
 #   ./build_tools/python_deploy/build_linux_packages.sh
 #
 # Valid Python versions match a subdirectory under /opt/python in the docker
 # image. Typically:
-#   cp37-cp37m cp38-cp38 cp39-cp39 cp310-cp310
+#   cp38-cp38 cp39-cp39 cp310-cp310
 #
 # Valid packages:
-#   torch-mlir
+#   torch-mlir, in-tree, out-of-tree
 #
 # Note that this script is meant to be run on CI and it will pollute both the
-# output directory and in-tree build/ directories (under runtime/ and
-# iree/compiler/) with docker created, root owned builds. Sorry - there is
-# no good way around it.
+# output directory and in-tree build/ directories with docker created, root owned builds.
+# Sorry - there is no good way around it but TODO: move to using user UID/GID.
 #
 # It can be run on a workstation but recommend using a git worktree dedicated
 # to packaging to avoid stomping on development artifacts.
 set -eu -o errtrace
 
 this_dir="$(cd "$(dirname "$0")" && pwd)"
 repo_root="$(cd "$this_dir"/../../ && pwd)"
-manylinux_docker_image="${manylinux_docker_image:-stellaraccident/manylinux2014_x86_64-bazel-5.1.0:latest}"
-python_versions="${TM_PYTHON_VERSIONS:-cp38-cp38 cp39-cp39 cp310-cp310}"
-output_dir="${output_dir:-${this_dir}/wheelhouse}"
-packages="${packages:-torch-mlir}"
+# This needs to be a manylinux image so we can ship pip packages
+TM_RELEASE_DOCKER_IMAGE="${TM_RELEASE_DOCKER_IMAGE:-stellaraccident/manylinux2014_x86_64-bazel-5.1.0:latest}"
+# This assumes an Ubuntu LTS like image. You can build your own with
+# ./build_tools/docker/Dockerfile
+TM_CI_DOCKER_IMAGE="${TM_CI_DOCKER_IMAGE:-powderluv/torch-mlir-ci:latest}"
+# Version of Python to use in Release builds. Ignored in CIs.
+TM_PYTHON_VERSIONS="${TM_PYTHON_VERSIONS:-cp38-cp38 cp39-cp39 cp310-cp310}"
+# Location to store Release wheels
+TM_OUTPUT_DIR="${TM_OUTPUT_DIR:-${this_dir}/wheelhouse}"
+# What "packages to build"
+TM_PACKAGES="${TM_PACKAGES:-torch-mlir out-of-tree in-tree}"
+# Use pre-built Pytorch
+TM_USE_PYTORCH_BINARY="${TM_USE_PYTORCH_BINARY:-ON}"
+# Skip running tests if you want quick iteration
+TM_SKIP_TESTS="${TM_SKIP_TESTS:-OFF}"
 
 PKG_VER_FILE="${repo_root}"/torch_mlir_package_version ; [ -f "$PKG_VER_FILE" ] && . "$PKG_VER_FILE"
 export TORCH_MLIR_PYTHON_PACKAGE_VERSION="${TORCH_MLIR_PYTHON_PACKAGE_VERSION:-0.0.1}"
 echo "Setting torch-mlir Python Package version to: ${TORCH_MLIR_PYTHON_PACKAGE_VERSION}"
 
 function run_on_host() {
-  echo "Running on host"
-  echo "Launching docker image ${manylinux_docker_image}"
-  echo "Outputting to ${output_dir}"
-  rm -rf "${output_dir}"
-  mkdir -p "${output_dir}"
+  echo "Running on host for $1:$@"
+  echo "Outputting to ${TM_OUTPUT_DIR}"
+  rm -rf "${TM_OUTPUT_DIR}"
+  mkdir -p "${TM_OUTPUT_DIR}"
+  case "$package" in
+    torch-mlir)
+      TM_CURRENT_DOCKER_IMAGE=${TM_RELEASE_DOCKER_IMAGE}
+      export USERID=0
+      export GROUPID=0
+      ;;
+    out-of-tree)
+      TM_CURRENT_DOCKER_IMAGE=${TM_CI_DOCKER_IMAGE}
+      # CI uses only Python3.10
+      TM_PYTHON_VERSIONS="cp310-cp310"
+      export USERID=$(id -u)
+      export GROUPID=$(id -g)
+      ;;
+    in-tree)
+      TM_CURRENT_DOCKER_IMAGE=${TM_CI_DOCKER_IMAGE}
+      # CI uses only Python3.10
+      TM_PYTHON_VERSIONS="cp310-cp310"
+      export USERID=$(id -u)
+      export GROUPID=$(id -g)
+      ;;
+    *)
+      echo "Unrecognized package '$package'"
+      exit 1
+      ;;
+  esac
+  echo "Launching docker image ${TM_CURRENT_DOCKER_IMAGE} with UID:${USERID} GID:${GROUPID}"
   docker run --rm \
     -v "${repo_root}:/main_checkout/torch-mlir" \
-    -v "${output_dir}:/wheelhouse" \
+    -v "${TM_OUTPUT_DIR}:/wheelhouse" \
+    -v "${HOME}:/home/${USER}" \
+    --user ${USERID}:${GROUPID} \
+    --workdir="/home/$USER" \
+    --volume="/etc/group:/etc/group:ro" \
+    --volume="/etc/passwd:/etc/passwd:ro" \
+    --volume="/etc/shadow:/etc/shadow:ro" \
+    --ipc=host \
+    --ulimit nofile=32768:32768 \
     -e __MANYLINUX_BUILD_WHEELS_IN_DOCKER=1 \
     -e "TORCH_MLIR_PYTHON_PACKAGE_VERSION=${TORCH_MLIR_PYTHON_PACKAGE_VERSION}" \
-    -e "python_versions=${python_versions}" \
-    -e "packages=${packages}" \
-    "${manylinux_docker_image}" \
-    -- bash /main_checkout/torch-mlir/build_tools/python_deploy/build_linux_packages.sh
+    -e "TM_PYTHON_VERSIONS=${TM_PYTHON_VERSIONS}" \
+    -e "TM_PACKAGES=${package}" \
+    -e "TM_SKIP_TESTS=${TM_SKIP_TESTS}" \
+    -e "TM_USE_PYTORCH_BINARY=${TM_USE_PYTORCH_BINARY}" \
+    -e "CCACHE_DIR=/main_checkout/torch-mlir/.ccache" \
+    "${TM_CURRENT_DOCKER_IMAGE}" \
+    /bin/bash /main_checkout/torch-mlir/build_tools/python_deploy/build_linux_packages.sh
 }
 
 function run_in_docker() {
   echo "Running in docker"
-  echo "Using python versions: ${python_versions}"
+  echo "Using python versions: ${TM_PYTHON_VERSIONS}"
 
   local orig_path="$PATH"
 
   # Build phase.
-  for package in $packages; do
-    echo "******************** BUILDING PACKAGE ${package} ********************"
-    for python_version in $python_versions; do
+  for package in $TM_PACKAGES; do
+    echo "******************** BUILDING PACKAGE ${package} (docker) ************"
+    for python_version in $TM_PYTHON_VERSIONS; do
       python_dir="/opt/python/$python_version"
       if ! [ -x "$python_dir/bin/python" ]; then
-        echo "ERROR: Could not find python: $python_dir (skipping)"
-        continue
+        echo "Could not find python: $python_dir (using system default Python3)"
+	      python_dir=`which python3`
+        echo "Defaulting to $python_dir (expected for CI builds)"
       fi
       export PATH=$python_dir/bin:$orig_path
-      echo ":::: Python version $(python --version)"
+      echo ":::: Python version $(python3 --version)"
       case "$package" in
         torch-mlir)
           clean_wheels torch_mlir "$python_version"
           build_torch_mlir
           #run_audit_wheel torch_mlir "$python_version"
+          clean_build torch_mlir "$python_version"
+          ;;
+        out-of-tree)
+          #clean_build "$package" "$python_version"
+          setup_venv "$python_version"
+          build_out_of_tree "$TM_USE_PYTORCH_BINARY" "$python_version"
+          if [ "${TM_SKIP_TESTS}" == "OFF" ]; then
+            #Tests always run with the installed PyTorch to simulate end users
+            #setup_venv "$python_version"
+            test_out_of_tree
+	        fi
+          ;;
+        in-tree)
+          #clean_build "$package" "$python_version"
+          setup_venv "$python_version"
+          build_in_tree "$TM_USE_PYTORCH_BINARY" "$python_version"
+          if [ "${TM_SKIP_TESTS}" == "OFF" ]; then
+            #Tests always run with the installed PyTorch to simulate end users
+            #setup_venv "$python_version"
+            test_in_tree;
+	        fi
           ;;
         *)
           echo "Unrecognized package '$package'"
@@ -97,6 +165,122 @@ function run_in_docker() {
   done
 }
 
+
+function build_in_tree() {
+  local torch_from_src="$1"
+  local python_version="$2"
+  echo ":::: Build in-tree Torch from source: $torch_from_src with Python: $python_version"
+  cmake -GNinja -B/main_checkout/torch-mlir/build \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_C_COMPILER=clang \
+      -DCMAKE_CXX_COMPILER=clang++ \
+      -DCMAKE_LINKER=lld \
+      -DLLVM_ENABLE_ASSERTIONS=ON \
+      -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+      -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+      -DLLVM_ENABLE_PROJECTS=mlir \
+      -DLLVM_EXTERNAL_PROJECTS="torch-mlir;torch-mlir-dialects" \
+      -DLLVM_EXTERNAL_TORCH_MLIR_SOURCE_DIR="/main_checkout/torch-mlir" \
+      -DLLVM_EXTERNAL_TORCH_MLIR_DIALECTS_SOURCE_DIR="/main_checkout/torch-mlir/externals/llvm-external-projects/torch-mlir-dialects" \
+      -DLLVM_TARGETS_TO_BUILD=host \
+      -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
+      -DTORCH_MLIR_ENABLE_LTC=OFF \
+      -DTORCH_MLIR_USE_INSTALLED_PYTORCH="$torch_from_src" \
+      -DPython3_EXECUTABLE="$(which python3)" \
+      /main_checkout/torch-mlir/externals/llvm-project/llvm
+  cmake --build /main_checkout/torch-mlir/build
+  ccache -s
+}
+
+function test_in_tree() {
+  echo ":::: Test in-tree"
+  cmake --build /main_checkout/torch-mlir/build --target check-torch-mlir-all
+
+  cd /main_checkout/torch-mlir/
+  export PYTHONPATH="/main_checkout/torch-mlir/build/tools/torch-mlir/python_packages/torch_mlir"
+
+  echo ":::: Run refbackend e2e integration tests"
+  python -m e2e_testing.torchscript.main --config=refbackend -v
+
+  echo ":::: Run eager_mode e2e integration tests"
+  python -m e2e_testing.torchscript.main --config=eager_mode -v
+
+  echo ":::: Run TOSA e2e integration tests"
+  python -m e2e_testing.torchscript.main --config=tosa -v
+
+  echo ":::: Run Lazy Tensor Core e2e integration tests"
+  # Temporarily disabled in top of main (https://github.com/llvm/torch-mlir/pull/1292)
+  #python -m e2e_testing.torchscript.main --config=lazy_tensor_core -v
+}
+
+function setup_venv() {
+  local python_version="$1"
+  echo ":::: Setting up VENV with Python: $python_version"
+  python3 -m venv /main_checkout/torch-mlir/docker_venv
+  source /main_checkout/torch-mlir/docker_venv/bin/activate
+
+  echo ":::: pip installing dependencies"
+  python3 -m pip install -r /main_checkout/torch-mlir/externals/llvm-project/mlir/python/requirements.txt
+  python3 -m pip install -r /main_checkout/torch-mlir/requirements.txt
+
+}
+
+function build_out_of_tree() {
+  local torch_from_src="$1"
+  local python_version="$2"
+  echo ":::: Build out-of-tree Torch from source: $torch_from_src with Python: $python_version"
+
+  if [ ! -d "/main_checkout/torch-mlir/llvm-build/lib/cmake/mlir/" ]
+  then
+  echo ":::: LLVM / MLIR is not built so building it first.."
+    cmake -GNinja -B/main_checkout/torch-mlir/llvm-build \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DCMAKE_C_COMPILER=clang \
+        -DCMAKE_CXX_COMPILER=clang++ \
+        -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+        -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+        -DCMAKE_LINKER=lld \
+        -DLLVM_ENABLE_ASSERTIONS=ON \
+        -DLLVM_ENABLE_PROJECTS=mlir \
+        -DLLVM_TARGETS_TO_BUILD=host \
+        -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
+        -DPython3_EXECUTABLE="$(which python3)" \
+        /main_checkout/torch-mlir/externals/llvm-project/llvm
+    cmake --build /main_checkout/torch-mlir/llvm-build
+  fi
+
+  # Incremental builds come here directly and can run cmake if required.
+  cmake -GNinja -B/main_checkout/torch-mlir/build_oot \
+      -DCMAKE_C_COMPILER=clang \
+      -DCMAKE_CXX_COMPILER=clang++ \
+      -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+      -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+      -DCMAKE_LINKER=lld \
+      -DLLVM_DIR="/main_checkout/torch-mlir/llvm-build/lib/cmake/llvm/" \
+      -DMLIR_DIR="/main_checkout/torch-mlir/llvm-build/lib/cmake/mlir/" \
+      -DMLIR_ENABLE_BINDINGS_PYTHON=OFF \
+      -DTORCH_MLIR_ENABLE_LTC=OFF \
+      -DTORCH_MLIR_USE_INSTALLED_PYTORCH="$torch_from_src" \
+      -DPython3_EXECUTABLE="$(which python3)" \
+      /main_checkout/torch-mlir
+  cmake --build /main_checkout/torch-mlir/build_oot
+  ccache -s
+}
+
+function test_out_of_tree() {
+  echo ":::: Test out-of-tree"
+  cmake --build /main_checkout/torch-mlir/build_oot --target check-torch-mlir-all
+  #TODO: Add regression tests
+}
+
+function clean_build() {
+  # clean up for recursive runs
+  local package="$1"
+  local python_version="$2"
+  echo ":::: Clean build dir $package $python_version"
+  rm -rf /main_checkout/torch-mlir/build /main_checkout/torch-mlir/llvm-build /main_checkout/torch-mlir/docker_venv  /main_checkout/torch-mlir/libtorch
+}
+
 function build_torch_mlir() {
   python -m pip install -r /main_checkout/torch-mlir/requirements.txt --extra-index-url https://download.pytorch.org/whl/nightly/cpu
   CMAKE_GENERATOR=Ninja \
@@ -123,7 +307,10 @@ function clean_wheels() {
 
 # Trampoline to the docker container if running on the host.
 if [ -z "${__MANYLINUX_BUILD_WHEELS_IN_DOCKER-}" ]; then
-  run_on_host "$@"
+  for package in $TM_PACKAGES; do
+    echo "******************** BUILDING PACKAGE ${package} (host) *************"
+    run_on_host "${package} $@"
+  done
 else
   run_in_docker "$@"
 fi