Skip to content

Commit

Permalink
Dockerize CI + Release builds
Browse files Browse the repository at this point in the history
Gets both CI and Release builds integrated in one workflow.
Mount ccache and pip cache as required for fast iterative builds
Current Release docker builds still run with root perms, fix it
in the future to run as the same user.

There may be some corner cases left especially when switching
build types etc.

Docker build TEST plan:

tl;dr:
Build everythin: Releases (Python 3.8, 3.9, 3.10) and CIs.
  TM_PACKAGES="torch-mlir out-of-tree in-tree"
  2.57s user 2.49s system 0% cpu 30:33.11 total

Out of Tree + PyTorch binaries:

  Fresh build (purged cache):
    TM_PACKAGES="out-of-tree"
    0.47s user 0.51s system 0% cpu 5:24.99 total

  Incremental with ccache:
    TM_PACKAGES="out-of-tree"
    0.09s user 0.08s system 0% cpu 34.817 total

Out of Tree + PyTorch from source

  Incremental
    TM_PACKAGES="out-of-tree" TM_USE_PYTORCH_BINARY=OFF
    1.58s user 1.81s system 2% cpu 1:59.61 total

In-Tree + PyTorch binaries:

  Fresh build and tests: (purge ccache)
  TM_PACKAGES="in-tree"
  0.53s user 0.49s system 0% cpu 6:23.35 total

  Fresh build/ but with prior ccache
  TM_PACKAGES="in-tree"
  0.45s user 0.66s system 0% cpu 3:57.47 total

  Incremental in-tree with all tests and regression tests
  TM_PACKAGES="in-tree"
  0.16s user 0.09s system 0% cpu 2:18.52 total

In-Tree + PyTorch from source

  Fresh build and tests: (purge ccache)
  TM_PACKAGES="in-tree" TM_USE_PYTORCH_BINARY=OFF
  2.03s user 2.28s system 0% cpu 11:11.86 total

  Fresh build/ but with prior ccache
  TM_PACKAGES="in-tree" TM_USE_PYTORCH_BINARY=OFF
  1.58s user 1.88s system 1% cpu 4:53.15 total

  Incremental in-tree with all tests and regression tests
  TM_PACKAGES="in-tree" TM_USE_PYTORCH_BINARY=OFF
  1.09s user 1.10s system 1% cpu 3:29.84 total

  Incremental without tests
  TM_PACKAGES="in-tree" TM_USE_PYTORCH_BINARY=OFF TM_SKIP_TESTS=ON
  1.52s user 1.42s system 3% cpu 1:15.82 total

In-tree+out-of-tree + Pytorch Binaries
  TM_PACKAGES="out-of-tree in-tree"
  0.25s user 0.18s system 0% cpu 3:01.91 total

To clear all artifacts:
rm -rf build build_oot llvm-build libtorch docker_venv
externals/pytorch/build
  • Loading branch information
powderluv committed Aug 29, 2022
1 parent e16b43e commit 2055b25
Show file tree
Hide file tree
Showing 4 changed files with 377 additions and 30 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
*.swp
.cache/
.vscode
.ccache
.env
*.code-workspace
.ipynb_checkpoints
Expand All @@ -26,3 +27,8 @@ bazel-*

# Autogenerated files
/python/torch_mlir/csrc/base_lazy_backend/generated

#Docker builds
build_oot/
docker_venv/
llvm-build/
54 changes: 54 additions & 0 deletions build_tools/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
ARG BASE_IMG=ubuntu:22.04
FROM ${BASE_IMG} as dev-base

# Disable apt-key parse waring. If someone knows how to do whatever the "proper"
# thing is then feel free. The warning complains about parsing apt-key output,
# which we're not even doing.
ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1

ARG ARCH="x86_64"
ARG REPO_NAME="deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy main"
RUN apt-get update && \
apt-get install -y \
ca-certificates \
software-properties-common \
wget \
apt-transport-https \
ccache \
curl \
cmake \
ninja-build \
git \
gnupg \
lsb-release \
python3-pip \
python3.10 \
python3.10-dev \
python3.10-venv \
unzip && \
echo $REPO_NAME >> /etc/apt/sources.list.d/llvm.list && \
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key| apt-key add - && \
apt-get update && \
apt-get install -y \
clang \
lld

######## Bazel ########
WORKDIR /install-bazel
ARG BAZEL_VERSION=5.2.0

# https://bazel.build/install/ubuntu
RUN curl -fsSL https://bazel.build/bazel-release.pub.gpg \
| gpg --dearmor >bazel-archive-keyring.gpg \
&& mv bazel-archive-keyring.gpg /usr/share/keyrings \
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/bazel-archive-keyring.gpg] https://storage.googleapis.com/bazel-apt stable jdk1.8" \
| tee /etc/apt/sources.list.d/bazel.list \
&& apt-get update \
&& apt-get install -y "bazel=${BAZEL_VERSION?}" \
&& rm -rf /install-bazel

### Clean up
RUN apt-get clean \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /main_checkout/torch-mlir
247 changes: 217 additions & 30 deletions build_tools/python_deploy/build_linux_packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,77 +16,145 @@
# ./build_tools/python_deploy/build_linux_packages.sh
#
# Build specific Python versions and packages to custom directory:
# python_versions="cp38-cp38 cp39-cp39" \
# packages="torch-mlir" \
# output_dir="/tmp/wheelhouse" \
# TM_PYTHON_VERSIONS="cp38-cp38 cp39-cp39" \
# TM_PACKAGES="torch-mlir" \
# TM_OUTPUT_DIR="/tmp/wheelhouse" \
# ./build_tools/python_deploy/build_linux_packages.sh
#
# Valid Python versions match a subdirectory under /opt/python in the docker
# image. Typically:
# cp37-cp37m cp38-cp38 cp39-cp39 cp310-cp310
# cp38-cp38 cp39-cp39 cp310-cp310
#
# Valid packages:
# torch-mlir
# torch-mlir, in-tree, out-of-tree
#
# Note that this script is meant to be run on CI and it will pollute both the
# output directory and in-tree build/ directories (under runtime/ and
# iree/compiler/) with docker created, root owned builds. Sorry - there is
# no good way around it.
# output directory and in-tree build/ directories with docker created, root owned builds.
# Sorry - there is no good way around it but TODO: move to using user UID/GID.
#
# It can be run on a workstation but recommend using a git worktree dedicated
# to packaging to avoid stomping on development artifacts.
set -eu -o errtrace

this_dir="$(cd "$(dirname "$0")" && pwd)"
repo_root="$(cd "$this_dir"/../../ && pwd)"
manylinux_docker_image="${manylinux_docker_image:-stellaraccident/manylinux2014_x86_64-bazel-5.1.0:latest}"
python_versions="${TM_PYTHON_VERSIONS:-cp38-cp38 cp39-cp39 cp310-cp310}"
output_dir="${output_dir:-${this_dir}/wheelhouse}"
packages="${packages:-torch-mlir}"
# This needs to be a manylinux image so we can ship pip packages
TM_RELEASE_DOCKER_IMAGE="${TM_RELEASE_DOCKER_IMAGE:-stellaraccident/manylinux2014_x86_64-bazel-5.1.0:latest}"
# This assumes an Ubuntu LTS like image. You can build your own with
# ./build_tools/docker/Dockerfile
TM_CI_DOCKER_IMAGE="${TM_CI_DOCKER_IMAGE:-powderluv/torch-mlir-ci:latest}"
# Version of Python to use in Release builds. Ignored in CIs.
TM_PYTHON_VERSIONS="${TM_PYTHON_VERSIONS:-cp38-cp38 cp39-cp39 cp310-cp310}"
# Location to store Release wheels
TM_OUTPUT_DIR="${TM_OUTPUT_DIR:-${this_dir}/wheelhouse}"
# What "packages to build"
TM_PACKAGES="${TM_PACKAGES:-torch-mlir out-of-tree in-tree}"
# Use pre-built Pytorch
TM_USE_PYTORCH_BINARY="${TM_USE_PYTORCH_BINARY:-ON}"
# Skip running tests if you want quick iteration
TM_SKIP_TESTS="${TM_SKIP_TESTS:-OFF}"

PKG_VER_FILE="${repo_root}"/torch_mlir_package_version ; [ -f "$PKG_VER_FILE" ] && . "$PKG_VER_FILE"
export TORCH_MLIR_PYTHON_PACKAGE_VERSION="${TORCH_MLIR_PYTHON_PACKAGE_VERSION:-0.0.1}"
echo "Setting torch-mlir Python Package version to: ${TORCH_MLIR_PYTHON_PACKAGE_VERSION}"

function run_on_host() {
echo "Running on host"
echo "Launching docker image ${manylinux_docker_image}"
echo "Outputting to ${output_dir}"
rm -rf "${output_dir}"
mkdir -p "${output_dir}"
echo "Running on host for $1:$@"
echo "Outputting to ${TM_OUTPUT_DIR}"
rm -rf "${TM_OUTPUT_DIR}"
mkdir -p "${TM_OUTPUT_DIR}"
case "$package" in
torch-mlir)
TM_CURRENT_DOCKER_IMAGE=${TM_RELEASE_DOCKER_IMAGE}
export USERID=0
export GROUPID=0
;;
out-of-tree)
TM_CURRENT_DOCKER_IMAGE=${TM_CI_DOCKER_IMAGE}
# CI uses only Python3.10
TM_PYTHON_VERSIONS="cp310-cp310"
export USERID=$(id -u)
export GROUPID=$(id -g)
;;
in-tree)
TM_CURRENT_DOCKER_IMAGE=${TM_CI_DOCKER_IMAGE}
# CI uses only Python3.10
TM_PYTHON_VERSIONS="cp310-cp310"
export USERID=$(id -u)
export GROUPID=$(id -g)
;;
*)
echo "Unrecognized package '$package'"
exit 1
;;
esac
echo "Launching docker image ${TM_CURRENT_DOCKER_IMAGE} with UID:${USERID} GID:${GROUPID}"
docker run --rm \
-v "${repo_root}:/main_checkout/torch-mlir" \
-v "${output_dir}:/wheelhouse" \
-v "${TM_OUTPUT_DIR}:/wheelhouse" \
-v "${HOME}:/home/${USER}" \
--user ${USERID}:${GROUPID} \
--workdir="/home/$USER" \
--volume="/etc/group:/etc/group:ro" \
--volume="/etc/passwd:/etc/passwd:ro" \
--volume="/etc/shadow:/etc/shadow:ro" \
--ipc=host \
--ulimit nofile=32768:32768 \
-e __MANYLINUX_BUILD_WHEELS_IN_DOCKER=1 \
-e "TORCH_MLIR_PYTHON_PACKAGE_VERSION=${TORCH_MLIR_PYTHON_PACKAGE_VERSION}" \
-e "python_versions=${python_versions}" \
-e "packages=${packages}" \
"${manylinux_docker_image}" \
-- bash /main_checkout/torch-mlir/build_tools/python_deploy/build_linux_packages.sh
-e "TM_PYTHON_VERSIONS=${TM_PYTHON_VERSIONS}" \
-e "TM_PACKAGES=${package}" \
-e "TM_SKIP_TESTS=${TM_SKIP_TESTS}" \
-e "TM_USE_PYTORCH_BINARY=${TM_USE_PYTORCH_BINARY}" \
-e "CCACHE_DIR=/main_checkout/torch-mlir/.ccache" \
"${TM_CURRENT_DOCKER_IMAGE}" \
/bin/bash /main_checkout/torch-mlir/build_tools/python_deploy/build_linux_packages.sh
}

function run_in_docker() {
echo "Running in docker"
echo "Using python versions: ${python_versions}"
echo "Using python versions: ${TM_PYTHON_VERSIONS}"

local orig_path="$PATH"

# Build phase.
for package in $packages; do
echo "******************** BUILDING PACKAGE ${package} ********************"
for python_version in $python_versions; do
for package in $TM_PACKAGES; do
echo "******************** BUILDING PACKAGE ${package} (docker) ************"
for python_version in $TM_PYTHON_VERSIONS; do
python_dir="/opt/python/$python_version"
if ! [ -x "$python_dir/bin/python" ]; then
echo "ERROR: Could not find python: $python_dir (skipping)"
continue
echo "Could not find python: $python_dir (using system default Python3)"
python_dir=`which python3`
echo "Defaulting to $python_dir (expected for CI builds)"
fi
export PATH=$python_dir/bin:$orig_path
echo ":::: Python version $(python --version)"
echo ":::: Python version $(python3 --version)"
case "$package" in
torch-mlir)
clean_wheels torch_mlir "$python_version"
build_torch_mlir
#run_audit_wheel torch_mlir "$python_version"
clean_build torch_mlir "$python_version"
;;
out-of-tree)
#clean_build "$package" "$python_version"
setup_venv "$python_version"
build_out_of_tree "$TM_USE_PYTORCH_BINARY" "$python_version"
if [ "${TM_SKIP_TESTS}" == "OFF" ]; then
#Tests always run with the installed PyTorch to simulate end users
#setup_venv "$python_version"
test_out_of_tree
fi
;;
in-tree)
#clean_build "$package" "$python_version"
setup_venv "$python_version"
build_in_tree "$TM_USE_PYTORCH_BINARY" "$python_version"
if [ "${TM_SKIP_TESTS}" == "OFF" ]; then
#Tests always run with the installed PyTorch to simulate end users
#setup_venv "$python_version"
test_in_tree;
fi
;;
*)
echo "Unrecognized package '$package'"
Expand All @@ -97,6 +165,122 @@ function run_in_docker() {
done
}


function build_in_tree() {
local torch_from_src="$1"
local python_version="$2"
echo ":::: Build in-tree Torch from source: $torch_from_src with Python: $python_version"
cmake -GNinja -B/main_checkout/torch-mlir/build \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_LINKER=lld \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DLLVM_ENABLE_PROJECTS=mlir \
-DLLVM_EXTERNAL_PROJECTS="torch-mlir;torch-mlir-dialects" \
-DLLVM_EXTERNAL_TORCH_MLIR_SOURCE_DIR="/main_checkout/torch-mlir" \
-DLLVM_EXTERNAL_TORCH_MLIR_DIALECTS_SOURCE_DIR="/main_checkout/torch-mlir/externals/llvm-external-projects/torch-mlir-dialects" \
-DLLVM_TARGETS_TO_BUILD=host \
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
-DTORCH_MLIR_ENABLE_LTC=OFF \
-DTORCH_MLIR_USE_INSTALLED_PYTORCH="$torch_from_src" \
-DPython3_EXECUTABLE="$(which python3)" \
/main_checkout/torch-mlir/externals/llvm-project/llvm
cmake --build /main_checkout/torch-mlir/build
ccache -s
}

function test_in_tree() {
echo ":::: Test in-tree"
cmake --build /main_checkout/torch-mlir/build --target check-torch-mlir-all

cd /main_checkout/torch-mlir/
export PYTHONPATH="/main_checkout/torch-mlir/build/tools/torch-mlir/python_packages/torch_mlir"

echo ":::: Run refbackend e2e integration tests"
python -m e2e_testing.torchscript.main --config=refbackend -v

echo ":::: Run eager_mode e2e integration tests"
python -m e2e_testing.torchscript.main --config=eager_mode -v

echo ":::: Run TOSA e2e integration tests"
python -m e2e_testing.torchscript.main --config=tosa -v

echo ":::: Run Lazy Tensor Core e2e integration tests"
# Temporarily disabled in top of main (https://github.com/llvm/torch-mlir/pull/1292)
#python -m e2e_testing.torchscript.main --config=lazy_tensor_core -v
}

function setup_venv() {
local python_version="$1"
echo ":::: Setting up VENV with Python: $python_version"
python3 -m venv /main_checkout/torch-mlir/docker_venv
source /main_checkout/torch-mlir/docker_venv/bin/activate

echo ":::: pip installing dependencies"
python3 -m pip install -r /main_checkout/torch-mlir/externals/llvm-project/mlir/python/requirements.txt
python3 -m pip install -r /main_checkout/torch-mlir/requirements.txt

}

function build_out_of_tree() {
local torch_from_src="$1"
local python_version="$2"
echo ":::: Build out-of-tree Torch from source: $torch_from_src with Python: $python_version"

if [ ! -d "/main_checkout/torch-mlir/llvm-build/lib/cmake/mlir/" ]
then
echo ":::: LLVM / MLIR is not built so building it first.."
cmake -GNinja -B/main_checkout/torch-mlir/llvm-build \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_LINKER=lld \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DLLVM_ENABLE_PROJECTS=mlir \
-DLLVM_TARGETS_TO_BUILD=host \
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
-DPython3_EXECUTABLE="$(which python3)" \
/main_checkout/torch-mlir/externals/llvm-project/llvm
cmake --build /main_checkout/torch-mlir/llvm-build
fi

# Incremental builds come here directly and can run cmake if required.
cmake -GNinja -B/main_checkout/torch-mlir/build_oot \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_LINKER=lld \
-DLLVM_DIR="/main_checkout/torch-mlir/llvm-build/lib/cmake/llvm/" \
-DMLIR_DIR="/main_checkout/torch-mlir/llvm-build/lib/cmake/mlir/" \
-DMLIR_ENABLE_BINDINGS_PYTHON=OFF \
-DTORCH_MLIR_ENABLE_LTC=OFF \
-DTORCH_MLIR_USE_INSTALLED_PYTORCH="$torch_from_src" \
-DPython3_EXECUTABLE="$(which python3)" \
/main_checkout/torch-mlir
cmake --build /main_checkout/torch-mlir/build_oot
ccache -s
}

function test_out_of_tree() {
echo ":::: Test out-of-tree"
cmake --build /main_checkout/torch-mlir/build_oot --target check-torch-mlir-all
#TODO: Add regression tests
}

function clean_build() {
# clean up for recursive runs
local package="$1"
local python_version="$2"
echo ":::: Clean build dir $package $python_version"
rm -rf /main_checkout/torch-mlir/build /main_checkout/torch-mlir/llvm-build /main_checkout/torch-mlir/docker_venv /main_checkout/torch-mlir/libtorch
}

function build_torch_mlir() {
python -m pip install -r /main_checkout/torch-mlir/requirements.txt --extra-index-url https://download.pytorch.org/whl/nightly/cpu
CMAKE_GENERATOR=Ninja \
Expand All @@ -123,7 +307,10 @@ function clean_wheels() {

# Trampoline to the docker container if running on the host.
if [ -z "${__MANYLINUX_BUILD_WHEELS_IN_DOCKER-}" ]; then
run_on_host "$@"
for package in $TM_PACKAGES; do
echo "******************** BUILDING PACKAGE ${package} (host) *************"
run_on_host "${package} $@"
done
else
run_in_docker "$@"
fi
Loading

0 comments on commit 2055b25

Please sign in to comment.