From 4cb5679ff4e2b87195724de44b132e55b38344ba Mon Sep 17 00:00:00 2001 From: Frank Liu Date: Wed, 12 Apr 2023 07:31:06 -0700 Subject: [PATCH 1/3] [pytorch] Upgrade PyTorch to 2.0.0 --- .github/workflows/nightly_publish.yml | 10 ++- api/src/main/java/ai/djl/util/Platform.java | 6 +- .../test/java/ai/djl/util/PlatformTest.java | 4 +- engines/pytorch/pytorch-engine/README.md | 77 ++++++++++--------- engines/pytorch/pytorch-jni/build.gradle | 8 +- engines/pytorch/pytorch-native/build.gradle | 11 ++- engines/pytorch/pytorch-native/build.sh | 2 +- gradle.properties | 2 +- 8 files changed, 67 insertions(+), 53 deletions(-) diff --git a/.github/workflows/nightly_publish.yml b/.github/workflows/nightly_publish.yml index ce66e9b4999..cd6141a1227 100644 --- a/.github/workflows/nightly_publish.yml +++ b/.github/workflows/nightly_publish.yml @@ -162,11 +162,11 @@ jobs: LD_LIBRARY_PATH=/root/.djl.ai/paddle/${PADDLE_VERSION}-cu102-linux-x86_64:/usr/local/cuda/lib64 \ ./gradlew :engines:paddlepaddle:paddlepaddle-model-zoo:test - test-cuda-113: + test-cuda-118: if: github.repository == 'deepjavalibrary/djl' runs-on: [ self-hosted, gpu ] container: - image: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04 + image: nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu18.04 options: --gpus all --runtime=nvidia timeout-minutes: 30 needs: create-runners @@ -203,7 +203,7 @@ jobs: publish: if: github.repository == 'deepjavalibrary/djl' runs-on: ubuntu-latest - needs: [ build, test-pytorch, test-tensorflow, test-aarch64, test-cuda-102, test-cuda-113 ] + needs: [ build, test-pytorch, test-tensorflow, test-aarch64, test-cuda-102, test-cuda-118 ] steps: - uses: actions/checkout@v3 - name: Set up JDK 11 @@ -223,6 +223,7 @@ jobs: run: | ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.11.0 -Psnapshot ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.12.1 -Psnapshot + ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.13.1 -Psnapshot ./gradlew clean engines:ml:xgboost:publish -Pgpu -Psnapshot ./gradlew clean publish -Psnapshot cd bom @@ -237,6 +238,7 @@ jobs: run: | ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.11.0 -P${{ github.event.inputs.mode }} ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.12.1 -P${{ github.event.inputs.mode }} + ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.13.1 -P${{ github.event.inputs.mode }} ./gradlew clean engines:ml:xgboost:publish -Pgpu -P${{ github.event.inputs.mode }} ./gradlew clean publish -P${{ github.event.inputs.mode }} cd bom @@ -282,7 +284,7 @@ jobs: stop-runners: if: ${{ github.repository == 'deepjavalibrary/djl' && always() }} runs-on: [ self-hosted, scheduler ] - needs: [ create-runners, test-aarch64, test-cuda-102, test-cuda-113 ] + needs: [ create-runners, test-aarch64, test-cuda-102, test-cuda-118 ] steps: - name: Stop all instances run: | diff --git a/api/src/main/java/ai/djl/util/Platform.java b/api/src/main/java/ai/djl/util/Platform.java index 66846125ece..cfa00ef5199 100644 --- a/api/src/main/java/ai/djl/util/Platform.java +++ b/api/src/main/java/ai/djl/util/Platform.java @@ -295,10 +295,10 @@ public boolean matches(Platform system) { return true; } - // native package can run on system which major version is greater or equal + // native package can run on system which CUDA version is greater or equal if (system.flavor.startsWith("cu") - && Integer.parseInt(flavor.substring(2, 4)) - <= Integer.parseInt(system.flavor.substring(2, 4))) { + && Integer.parseInt(flavor.substring(2, 5)) + <= Integer.parseInt(system.flavor.substring(2, 5))) { return true; } logger.warn("The bundled library: " + this + " doesn't match system: " + system); diff --git a/api/src/test/java/ai/djl/util/PlatformTest.java b/api/src/test/java/ai/djl/util/PlatformTest.java index b9b49215689..039cbbea875 100644 --- a/api/src/test/java/ai/djl/util/PlatformTest.java +++ b/api/src/test/java/ai/djl/util/PlatformTest.java @@ -61,8 +61,8 @@ public void testPlatform() throws IOException { platform = Platform.fromUrl(url); // cu111 can run on cu113 machine Assert.assertTrue(platform.matches(system)); - // cu113 can run on cu111 machine (the same major version) - Assert.assertTrue(system.matches(platform)); + // cu113 cannot run on cu111 machine (the same major version) + Assert.assertFalse(system.matches(platform)); url = createPropertyFile("version=1.8.0\nclassifier=cu102-linux-x86_64"); platform = Platform.fromUrl(url); diff --git a/engines/pytorch/pytorch-engine/README.md b/engines/pytorch/pytorch-engine/README.md index 50113f2747f..199028c3a6d 100644 --- a/engines/pytorch/pytorch-engine/README.md +++ b/engines/pytorch/pytorch-engine/README.md @@ -46,6 +46,7 @@ The following table illustrates which pytorch version that DJL supports: | PyTorch engine version | PyTorch native library version | |------------------------|-------------------------------------------| +| pytorch-engine:0.22.0 | 1.11.0, 1.12.1, 1.13.1, 2.0.0 | | pytorch-engine:0.21.0 | 1.11.0, 1.12.1, 1.13.1 | | pytorch-engine:0.20.0 | 1.11.0, 1.12.1, 1.13.0 | | pytorch-engine:0.19.0 | 1.10.0, 1.11.0, 1.12.1 | @@ -108,21 +109,21 @@ export PYTORCH_FLAVOR=cpu ### macOS For macOS, you can use the following library: -- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0 -- ai.djl.pytorch:pytorch-native-cpu:1.13.1:osx-x86_64 +- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0 +- ai.djl.pytorch:pytorch-native-cpu:2.0.0:osx-x86_64 ```xml ai.djl.pytorch pytorch-native-cpu osx-x86_64 - 1.13.1 + 2.0.0 runtime ai.djl.pytorch pytorch-jni - 1.13.1-0.21.0 + 2.0.0-0.22.0 runtime ``` @@ -132,21 +133,21 @@ For macOS, you can use the following library: ### macOS M1 For macOS M1, you can use the following library: -- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0 -- ai.djl.pytorch:pytorch-native-cpu:1.13.1:osx-aarch64 +- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0 +- ai.djl.pytorch:pytorch-native-cpu:2.0.0:osx-aarch64 ```xml ai.djl.pytorch pytorch-native-cpu osx-aarch64 - 1.13.1 + 2.0.0 runtime ai.djl.pytorch pytorch-jni - 1.13.1-0.21.0 + 2.0.0-0.22.0 runtime ``` @@ -157,29 +158,29 @@ installed on your GPU machine, you can use one of the following library: #### Linux GPU -- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0 -- ai.djl.pytorch:pytorch-native-cu117:1.13.1:linux-x86_64 - CUDA 11.7 +- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0 +- ai.djl.pytorch:pytorch-native-cu118:2.0.0:linux-x86_64 - CUDA 11.8 ```xml ai.djl.pytorch - pytorch-native-cu117 + pytorch-native-cu118 linux-x86_64 - 1.13.1 + 2.0.0 runtime ai.djl.pytorch pytorch-jni - 1.13.1-0.21.0 + 2.0.0-0.22.0 runtime ``` ### Linux CPU -- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0 -- ai.djl.pytorch:pytorch-native-cpu:1.13.1:linux-x86_64 +- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0 +- ai.djl.pytorch:pytorch-native-cpu:2.0.0:linux-x86_64 ```xml @@ -187,20 +188,20 @@ installed on your GPU machine, you can use one of the following library: pytorch-native-cpu linux-x86_64 runtime - 1.13.1 + 2.0.0 ai.djl.pytorch pytorch-jni - 1.13.1-0.21.0 + 2.0.0-0.22.0 runtime ``` ### For aarch64 build -- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0 -- ai.djl.pytorch:pytorch-native-cpu-precxx11:1.13.1:linux-aarch64 +- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0 +- ai.djl.pytorch:pytorch-native-cpu-precxx11:2.0.0:linux-aarch64 ```xml @@ -208,12 +209,12 @@ installed on your GPU machine, you can use one of the following library: pytorch-native-cpu-precxx11 linux-aarch64 runtime - 1.13.1 + 2.0.0 ai.djl.pytorch pytorch-jni - 1.13.1-0.21.0 + 2.0.0-0.22.0 runtime ``` @@ -223,22 +224,22 @@ installed on your GPU machine, you can use one of the following library: We also provide packages for the system like CentOS 7/Ubuntu 14.04 with GLIBC >= 2.17. All the package were built with GCC 7, we provided a newer `libstdc++.so.6.24` in the package that contains `CXXABI_1.3.9` to use the package successfully. -- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0 -- ai.djl.pytorch:pytorch-native-cu117-precxx11:1.13.1:linux-x86_64 - CUDA 11.7 -- ai.djl.pytorch:pytorch-native-cpu-precxx11:1.13.1:linux-x86_64 - CPU +- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0 +- ai.djl.pytorch:pytorch-native-cu118-precxx11:2.0.0:linux-x86_64 - CUDA 11.8 +- ai.djl.pytorch:pytorch-native-cpu-precxx11:2.0.0:linux-x86_64 - CPU ```xml ai.djl.pytorch - pytorch-native-cu117-precxx11 + pytorch-native-cu118-precxx11 linux-x86_64 - 1.13.1 + 2.0.0 runtime ai.djl.pytorch pytorch-jni - 1.13.1-0.21.0 + 2.0.0-0.22.0 runtime ``` @@ -248,13 +249,13 @@ All the package were built with GCC 7, we provided a newer `libstdc++.so.6.24` i ai.djl.pytorch pytorch-native-cpu-precxx11 linux-x86_64 - 1.13.1 + 2.0.0 runtime ai.djl.pytorch pytorch-jni - 1.13.1-0.21.0 + 2.0.0-0.22.0 runtime ``` @@ -269,29 +270,29 @@ For the Windows platform, you can choose between CPU and GPU. #### Windows GPU -- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0 -- ai.djl.pytorch:pytorch-native-cu117:1.13.1:win-x86_64 - CUDA 11.7 +- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0 +- ai.djl.pytorch:pytorch-native-cu118:2.0.0:win-x86_64 - CUDA 11.8 ```xml ai.djl.pytorch - pytorch-native-cu117 + pytorch-native-cu118 win-x86_64 - 1.13.1 + 2.0.0 runtime ai.djl.pytorch pytorch-jni - 1.13.1-0.21.0 + 2.0.0-0.22.0 runtime ``` ### Windows CPU -- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0 -- ai.djl.pytorch:pytorch-native-cpu:1.13.1:win-x86_64 +- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0 +- ai.djl.pytorch:pytorch-native-cpu:2.0.0:win-x86_64 ```xml @@ -299,12 +300,12 @@ For the Windows platform, you can choose between CPU and GPU. pytorch-native-cpu win-x86_64 runtime - 1.13.1 + 2.0.0 ai.djl.pytorch pytorch-jni - 1.13.1-0.21.0 + 2.0.0-0.22.0 runtime ``` diff --git a/engines/pytorch/pytorch-jni/build.gradle b/engines/pytorch/pytorch-jni/build.gradle index 3e7371577aa..450c832e803 100644 --- a/engines/pytorch/pytorch-jni/build.gradle +++ b/engines/pytorch/pytorch-jni/build.gradle @@ -24,7 +24,13 @@ processResources { "osx-x86_64/cpu/libdjl_torch.dylib", "win-x86_64/cpu/djl_torch.dll" ] - if (ptVersion.startsWith("1.13.")) { + if (ptVersion.startsWith("2.0.")) { + files.add("linux-aarch64/cpu-precxx11/libdjl_torch.so") + files.add("linux-x86_64/cu118/libdjl_torch.so") + files.add("linux-x86_64/cu118-precxx11/libdjl_torch.so") + files.add("win-x86_64/cu118/djl_torch.dll") + files.add("osx-aarch64/cpu/libdjl_torch.dylib") + } else if (ptVersion.startsWith("1.13.")) { files.add("linux-aarch64/cpu-precxx11/libdjl_torch.so") files.add("linux-x86_64/cu117/libdjl_torch.so") files.add("linux-x86_64/cu117-precxx11/libdjl_torch.so") diff --git a/engines/pytorch/pytorch-native/build.gradle b/engines/pytorch/pytorch-native/build.gradle index 714d03d6cec..3ed5f18c087 100644 --- a/engines/pytorch/pytorch-native/build.gradle +++ b/engines/pytorch/pytorch-native/build.gradle @@ -22,6 +22,8 @@ if (project.hasProperty("cu11")) { FLAVOR = "cu116" } else if (VERSION.startsWith("1.13.")) { FLAVOR = "cu117" + } else if (VERSION.startsWith("2.0.")) { + FLAVOR = "cu118" } else { throw new GradleException("Unsupported PyTorch version: ${VERSION}") } @@ -93,6 +95,8 @@ def prepareNativeLib(String binaryRoot, String ver) { cu11 = "cu116" } else if (ver.startsWith("1.13.")) { cu11 = "cu117" + } else if (ver.startsWith("2.0.")) { + cu11 = "cu118" } else { throw new GradleException("Unsupported PyTorch version: ${ver}") } @@ -283,9 +287,9 @@ tasks.register('uploadS3') { "${BINARY_ROOT}/cpu/win-x86_64/native/lib/", "${BINARY_ROOT}/cpu-precxx11/linux-aarch64/native/lib/", "${BINARY_ROOT}/cpu-precxx11/linux-x86_64/native/lib/", - "${BINARY_ROOT}/cu117/linux-x86_64/native/lib/", - "${BINARY_ROOT}/cu117/win-x86_64/native/lib/", - "${BINARY_ROOT}/cu117-precxx11/linux-x86_64/native/lib/" + "${BINARY_ROOT}/cu118/linux-x86_64/native/lib/", + "${BINARY_ROOT}/cu118/win-x86_64/native/lib/", + "${BINARY_ROOT}/cu118-precxx11/linux-x86_64/native/lib/" ] uploadDirs.each { item -> fileTree(item).files.name.each { @@ -351,6 +355,7 @@ flavorNames.each { flavor -> } from "src/main/resources" archiveClassifier = "${osName}" + archiveBaseName = "pytorch-native-${flavor}" manifest { attributes("Automatic-Module-Name": "ai.djl.pytorch_native_${flavor}_${osName}") diff --git a/engines/pytorch/pytorch-native/build.sh b/engines/pytorch/pytorch-native/build.sh index dea17e6f3fe..78c59d6bf2a 100755 --- a/engines/pytorch/pytorch-native/build.sh +++ b/engines/pytorch/pytorch-native/build.sh @@ -23,7 +23,7 @@ ARCH=$4 if [[ ! -d "libtorch" ]]; then if [[ $PLATFORM == 'linux' ]]; then - if [[ ! "$FLAVOR" =~ ^(cpu|cu102|cu113|cu116|cu117)$ ]]; then + if [[ ! "$FLAVOR" =~ ^(cpu|cu102|cu113|cu116|cu117|cu118)$ ]]; then echo "$FLAVOR is not supported." exit 1 fi diff --git a/gradle.properties b/gradle.properties index 1ad59e370e7..4ae37952273 100644 --- a/gradle.properties +++ b/gradle.properties @@ -13,7 +13,7 @@ systemProp.org.gradle.internal.publish.checksums.insecure=true djl_version=0.22.0 mxnet_version=1.9.1 -pytorch_version=1.13.1 +pytorch_version=2.0.0 tensorflow_version=2.10.1 tflite_version=2.6.2 trt_version=8.4.1 From afb7d9dca1f3f057fe4958ec65eae16b3fe578e0 Mon Sep 17 00:00:00 2001 From: Frank Liu Date: Tue, 11 Apr 2023 17:59:27 -0700 Subject: [PATCH 2/3] [pytorch] Fixes binary location check for 2.0 --- .../src/main/java/ai/djl/pytorch/jni/LibUtils.java | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java index e211d003dbd..3ee18dbc1e2 100644 --- a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java +++ b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java @@ -313,15 +313,7 @@ private static LibTorch copyNativeLibraryFromClasspath(Platform platform) { if (!m.matches()) { throw new AssertionError("Unexpected version: " + version); } - String[] versions = m.group(1).split("\\."); - int minorVersion = Integer.parseInt(versions[1]); - int buildVersion = Integer.parseInt(versions[2]); - String pathPrefix; - if (minorVersion > 10 || (minorVersion == 10 && buildVersion == 2)) { - pathPrefix = "pytorch/" + flavor + '/' + classifier; - } else { - pathPrefix = "native/lib"; - } + String pathPrefix = "pytorch/" + flavor + '/' + classifier; Files.createDirectories(cacheDir); tmp = Files.createTempDirectory(cacheDir, "tmp"); From 8f33a36fdaa0f8935e123ac7bb922951c46848cb Mon Sep 17 00:00:00 2001 From: Frank Liu Date: Wed, 12 Apr 2023 07:30:41 -0700 Subject: [PATCH 3/3] [pytorch] Downloads only matching cuda version of native library --- .../java/ai/djl/pytorch/jni/LibUtils.java | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java index 3ee18dbc1e2..526c6fdd0ab 100644 --- a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java +++ b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java @@ -31,6 +31,7 @@ import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Properties; @@ -289,6 +290,7 @@ private static LibTorch findNativeLibrary() { } private static LibTorch copyNativeLibraryFromClasspath(Platform platform) { + logger.debug("Found bundled PyTorch package: {}.", platform); String version = platform.getVersion(); String flavor = platform.getFlavor(); if (!flavor.endsWith("-precxx11") @@ -350,6 +352,7 @@ private static LibTorch downloadPyTorch(Platform platform) { String classifier = platform.getClassifier(); String precxx11; String flavor = Utils.getEnvOrSystemProperty("PYTORCH_FLAVOR"); + boolean override; if (flavor == null || flavor.isEmpty()) { flavor = platform.getFlavor(); if (System.getProperty("os.name").startsWith("Linux") @@ -360,9 +363,11 @@ private static LibTorch downloadPyTorch(Platform platform) { precxx11 = ""; } flavor += precxx11; + override = false; } else { logger.info("Uses override PYTORCH_FLAVOR: {}", flavor); precxx11 = flavor.endsWith("-precxx11") ? "-precxx11" : ""; + override = true; } Path cacheDir = Utils.getEngineCacheDir("pytorch"); @@ -399,23 +404,32 @@ private static LibTorch downloadPyTorch(Platform platform) { Files.createDirectories(cacheDir); List lines = Utils.readLines(is); if (flavor.startsWith("cu")) { - String cudaMajor = flavor.substring(0, 4); + int cudaVersion = Integer.parseInt(flavor.substring(2, 5)); Pattern pattern = Pattern.compile( - '(' - + cudaMajor - + "\\d" + "cu(\\d\\d\\d)" + precxx11 - + ")/" + + '/' + classifier + "/native/lib/" + NATIVE_LIB_NAME + ".gz"); + List cudaVersions = new ArrayList<>(); boolean match = false; for (String line : lines) { Matcher m = pattern.matcher(line); if (m.matches()) { - flavor = m.group(1); + cudaVersions.add(Integer.parseInt(m.group(1))); + } + } + // find highest matching CUDA version + cudaVersions.sort(Collections.reverseOrder()); + for (int cuda : cudaVersions) { + if (override && cuda == cudaVersion) { + match = true; + break; + } else if (cuda <= cudaVersion) { + flavor = "cu" + cuda + precxx11; match = true; break; }