[pytorch] Upgrade PyTorch to 2.0.0

deepjavalibrary · Apr 12, 2023 · b46a76f · b46a76f
1 parent e6cbe1e
commit b46a76f
Show file tree

Hide file tree

Showing 9 changed files with 76 additions and 60 deletions.
diff --git a/.github/workflows/nightly_publish.yml b/.github/workflows/nightly_publish.yml
@@ -162,11 +162,11 @@ jobs:
           LD_LIBRARY_PATH=/root/.djl.ai/paddle/${PADDLE_VERSION}-cu102-linux-x86_64:/usr/local/cuda/lib64 \
           ./gradlew :engines:paddlepaddle:paddlepaddle-model-zoo:test
 
-  test-cuda-113:
+  test-cuda-118:
     if: github.repository == 'deepjavalibrary/djl'
     runs-on: [ self-hosted, gpu ]
     container:
-      image: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04
+      image: nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu18.04
       options: --gpus all --runtime=nvidia
     timeout-minutes: 30
     needs: create-runners
@@ -203,7 +203,7 @@ jobs:
   publish:
     if: github.repository == 'deepjavalibrary/djl'
     runs-on: ubuntu-latest
-    needs: [ build, test-pytorch, test-tensorflow, test-aarch64, test-cuda-102, test-cuda-113 ]
+    needs: [ build, test-pytorch, test-tensorflow, test-aarch64, test-cuda-102, test-cuda-118 ]
     steps:
       - uses: actions/checkout@v3
       - name: Set up JDK 11
@@ -223,6 +223,7 @@ jobs:
         run: |
           ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.11.0 -Psnapshot
           ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.12.1 -Psnapshot
+          ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.13.1 -Psnapshot
           ./gradlew clean engines:ml:xgboost:publish -Pgpu -Psnapshot
           ./gradlew clean publish -Psnapshot
           cd bom
@@ -237,6 +238,7 @@ jobs:
         run: |
           ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.11.0 -P${{ github.event.inputs.mode }}
           ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.12.1 -P${{ github.event.inputs.mode }}
+          ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.13.1 -P${{ github.event.inputs.mode }}
           ./gradlew clean engines:ml:xgboost:publish -Pgpu -P${{ github.event.inputs.mode }}
           ./gradlew clean publish -P${{ github.event.inputs.mode }}
           cd bom
@@ -282,7 +284,7 @@ jobs:
   stop-runners:
     if: ${{ github.repository == 'deepjavalibrary/djl' && always() }}
     runs-on: [ self-hosted, scheduler ]
-    needs: [ create-runners, test-aarch64, test-cuda-102, test-cuda-113 ]
+    needs: [ create-runners, test-aarch64, test-cuda-102, test-cuda-118 ]
     steps:
       - name: Stop all instances
         run: |

diff --git a/api/src/main/java/ai/djl/util/Platform.java b/api/src/main/java/ai/djl/util/Platform.java
@@ -295,10 +295,10 @@ public boolean matches(Platform system) {
             return true;
         }
 
-        // native package can run on system which major version is greater or equal
+        // native package can run on system which CUDA version is greater or equal
         if (system.flavor.startsWith("cu")
-                && Integer.parseInt(flavor.substring(2, 4))
-                        <= Integer.parseInt(system.flavor.substring(2, 4))) {
+                && Integer.parseInt(flavor.substring(2, 5))
+                        <= Integer.parseInt(system.flavor.substring(2, 5))) {
             return true;
         }
         logger.warn("The bundled library: " + this + " doesn't match system: " + system);

diff --git a/api/src/test/java/ai/djl/util/PlatformTest.java b/api/src/test/java/ai/djl/util/PlatformTest.java
@@ -61,8 +61,8 @@ public void testPlatform() throws IOException {
         platform = Platform.fromUrl(url);
         // cu111 can run on cu113 machine
         Assert.assertTrue(platform.matches(system));
-        // cu113 can run on cu111 machine (the same major version)
-        Assert.assertTrue(system.matches(platform));
+        // cu113 cannot run on cu111 machine (the same major version)
+        Assert.assertFalse(system.matches(platform));
 
         url = createPropertyFile("version=1.8.0\nclassifier=cu102-linux-x86_64");
         platform = Platform.fromUrl(url);

diff --git a/engines/pytorch/pytorch-engine/README.md b/engines/pytorch/pytorch-engine/README.md
@@ -46,6 +46,7 @@ The following table illustrates which pytorch version that DJL supports:
 
 | PyTorch engine version | PyTorch native library version            |
 |------------------------|-------------------------------------------|
+| pytorch-engine:0.22.0  | 1.11.0, 1.12.1, 1.13.1, 2.0.0             |
 | pytorch-engine:0.21.0  | 1.11.0, 1.12.1, 1.13.1                    |
 | pytorch-engine:0.20.0  | 1.11.0, 1.12.1, 1.13.0                    |
 | pytorch-engine:0.19.0  | 1.10.0, 1.11.0, 1.12.1                    |
@@ -108,21 +109,21 @@ export PYTORCH_FLAVOR=cpu
 ### macOS
 For macOS, you can use the following library:
 
-- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
-- ai.djl.pytorch:pytorch-native-cpu:1.13.1:osx-x86_64
+- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
+- ai.djl.pytorch:pytorch-native-cpu:2.0.0:osx-x86_64
 
 ```xml
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-native-cpu</artifactId>
     <classifier>osx-x86_64</classifier>
-    <version>1.13.1</version>
+    <version>2.0.0</version>
     <scope>runtime</scope>
 </dependency>
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-jni</artifactId>
-    <version>1.13.1-0.21.0</version>
+    <version>2.0.0-0.22.0</version>
     <scope>runtime</scope>
 </dependency>
 ```
@@ -132,21 +133,21 @@ For macOS, you can use the following library:
 ### macOS M1
 For macOS M1, you can use the following library:
 
-- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
-- ai.djl.pytorch:pytorch-native-cpu:1.13.1:osx-aarch64
+- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
+- ai.djl.pytorch:pytorch-native-cpu:2.0.0:osx-aarch64
 
 ```xml
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-native-cpu</artifactId>
     <classifier>osx-aarch64</classifier>
-    <version>1.13.1</version>
+    <version>2.0.0</version>
     <scope>runtime</scope>
 </dependency>
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-jni</artifactId>
-    <version>1.13.1-0.21.0</version>
+    <version>2.0.0-0.22.0</version>
     <scope>runtime</scope>
 </dependency>
 ```
@@ -157,63 +158,63 @@ installed on your GPU machine, you can use one of the following library:
 
 #### Linux GPU
 
-- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
-- ai.djl.pytorch:pytorch-native-cu117:1.13.1:linux-x86_64 - CUDA 11.7
+- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
+- ai.djl.pytorch:pytorch-native-cu118:2.0.0:linux-x86_64 - CUDA 11.8
 
 ```xml
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
-    <artifactId>pytorch-native-cu117</artifactId>
+    <artifactId>pytorch-native-cu118</artifactId>
     <classifier>linux-x86_64</classifier>
-    <version>1.13.1</version>
+    <version>2.0.0</version>
     <scope>runtime</scope>
 </dependency>
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-jni</artifactId>
-    <version>1.13.1-0.21.0</version>
+    <version>2.0.0-0.22.0</version>
     <scope>runtime</scope>
 </dependency>
 ```
 
 ### Linux CPU
 
-- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
-- ai.djl.pytorch:pytorch-native-cpu:1.13.1:linux-x86_64
+- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
+- ai.djl.pytorch:pytorch-native-cpu:2.0.0:linux-x86_64
 
 ```xml
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-native-cpu</artifactId>
     <classifier>linux-x86_64</classifier>
     <scope>runtime</scope>
-    <version>1.13.1</version>
+    <version>2.0.0</version>
 </dependency>
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-jni</artifactId>
-    <version>1.13.1-0.21.0</version>
+    <version>2.0.0-0.22.0</version>
     <scope>runtime</scope>
 </dependency>
 ```
 
 ### For aarch64 build
 
-- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
-- ai.djl.pytorch:pytorch-native-cpu-precxx11:1.13.1:linux-aarch64
+- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
+- ai.djl.pytorch:pytorch-native-cpu-precxx11:2.0.0:linux-aarch64
 
 ```xml
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-native-cpu-precxx11</artifactId>
     <classifier>linux-aarch64</classifier>
     <scope>runtime</scope>
-    <version>1.13.1</version>
+    <version>2.0.0</version>
 </dependency>
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-jni</artifactId>
-    <version>1.13.1-0.21.0</version>
+    <version>2.0.0-0.22.0</version>
     <scope>runtime</scope>
 </dependency>
 ```
@@ -223,22 +224,22 @@ installed on your GPU machine, you can use one of the following library:
 We also provide packages for the system like CentOS 7/Ubuntu 14.04 with GLIBC >= 2.17.
 All the package were built with GCC 7, we provided a newer `libstdc++.so.6.24` in the package that contains `CXXABI_1.3.9` to use the package successfully.
 
-- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
-- ai.djl.pytorch:pytorch-native-cu117-precxx11:1.13.1:linux-x86_64 - CUDA 11.7
-- ai.djl.pytorch:pytorch-native-cpu-precxx11:1.13.1:linux-x86_64   - CPU
+- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
+- ai.djl.pytorch:pytorch-native-cu118-precxx11:2.0.0:linux-x86_64 - CUDA 11.8
+- ai.djl.pytorch:pytorch-native-cpu-precxx11:2.0.0:linux-x86_64   - CPU
 
 ```xml
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
-    <artifactId>pytorch-native-cu117-precxx11</artifactId>
+    <artifactId>pytorch-native-cu118-precxx11</artifactId>
     <classifier>linux-x86_64</classifier>
-    <version>1.13.1</version>
+    <version>2.0.0</version>
     <scope>runtime</scope>
 </dependency>
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-jni</artifactId>
-    <version>1.13.1-0.21.0</version>
+    <version>2.0.0-0.22.0</version>
     <scope>runtime</scope>
 </dependency>
 ```
@@ -248,13 +249,13 @@ All the package were built with GCC 7, we provided a newer `libstdc++.so.6.24` i
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-native-cpu-precxx11</artifactId>
     <classifier>linux-x86_64</classifier>
-    <version>1.13.1</version>
+    <version>2.0.0</version>
     <scope>runtime</scope>
 </dependency>
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-jni</artifactId>
-    <version>1.13.1-0.21.0</version>
+    <version>2.0.0-0.22.0</version>
     <scope>runtime</scope>
 </dependency>
 ```
@@ -269,42 +270,42 @@ For the Windows platform, you can choose between CPU and GPU.
 
 #### Windows GPU
 
-- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
-- ai.djl.pytorch:pytorch-native-cu117:1.13.1:win-x86_64 - CUDA 11.7
+- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
+- ai.djl.pytorch:pytorch-native-cu118:2.0.0:win-x86_64 - CUDA 11.8
 
 ```xml
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
-    <artifactId>pytorch-native-cu117</artifactId>
+    <artifactId>pytorch-native-cu118</artifactId>
     <classifier>win-x86_64</classifier>
-    <version>1.13.1</version>
+    <version>2.0.0</version>
     <scope>runtime</scope>
 </dependency>
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-jni</artifactId>
-    <version>1.13.1-0.21.0</version>
+    <version>2.0.0-0.22.0</version>
     <scope>runtime</scope>
 </dependency>
 ```
 
 ### Windows CPU
 
-- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
-- ai.djl.pytorch:pytorch-native-cpu:1.13.1:win-x86_64
+- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
+- ai.djl.pytorch:pytorch-native-cpu:2.0.0:win-x86_64
 
 ```xml
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-native-cpu</artifactId>
     <classifier>win-x86_64</classifier>
     <scope>runtime</scope>
-    <version>1.13.1</version>
+    <version>2.0.0</version>
 </dependency>
 <dependency>
     <groupId>ai.djl.pytorch</groupId>
     <artifactId>pytorch-jni</artifactId>
-    <version>1.13.1-0.21.0</version>
+    <version>2.0.0-0.22.0</version>
     <scope>runtime</scope>
 </dependency>
 ```
diff --git a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java
@@ -407,12 +407,10 @@ private static LibTorch downloadPyTorch(Platform platform) {
             Files.createDirectories(cacheDir);
             List<String> lines = Utils.readLines(is);
             if (flavor.startsWith("cu")) {
-                String cudaMajor = flavor.substring(0, 4);
+                int cudaVersion = Integer.parseInt(flavor.substring(2, 5));
                 Pattern pattern =
                         Pattern.compile(
-                                '('
-                                        + cudaMajor
-                                        + "\\d"
+                                "(cu\\d\\d\\d"
                                         + precxx11
                                         + ")/"
                                         + classifier
@@ -423,9 +421,13 @@ private static LibTorch downloadPyTorch(Platform platform) {
                 for (String line : lines) {
                     Matcher m = pattern.matcher(line);
                     if (m.matches()) {
-                        flavor = m.group(1);
-                        match = true;
-                        break;
+                        String f = m.group(1);
+                        int v = Integer.parseInt(f.substring(2, 5));
+                        if (cudaVersion >= v) {
+                            flavor = f;
+                            match = true;
+                            break;
+                        }
                     }
                 }
                 if (!match) {

diff --git a/engines/pytorch/pytorch-jni/build.gradle b/engines/pytorch/pytorch-jni/build.gradle
@@ -24,7 +24,13 @@ processResources {
                 "osx-x86_64/cpu/libdjl_torch.dylib",
                 "win-x86_64/cpu/djl_torch.dll"
         ]
-        if (ptVersion.startsWith("1.13.")) {
+        if (ptVersion.startsWith("2.0.")) {
+            files.add("linux-aarch64/cpu-precxx11/libdjl_torch.so")
+            files.add("linux-x86_64/cu118/libdjl_torch.so")
+            files.add("linux-x86_64/cu118-precxx11/libdjl_torch.so")
+            files.add("win-x86_64/cu118/djl_torch.dll")
+            files.add("osx-aarch64/cpu/libdjl_torch.dylib")
+        } else if (ptVersion.startsWith("1.13.")) {
             files.add("linux-aarch64/cpu-precxx11/libdjl_torch.so")
             files.add("linux-x86_64/cu117/libdjl_torch.so")
             files.add("linux-x86_64/cu117-precxx11/libdjl_torch.so")

diff --git a/engines/pytorch/pytorch-native/build.gradle b/engines/pytorch/pytorch-native/build.gradle
@@ -22,6 +22,8 @@ if (project.hasProperty("cu11")) {
         FLAVOR = "cu116"
     } else if (VERSION.startsWith("1.13.")) {
         FLAVOR = "cu117"
+    } else if (VERSION.startsWith("2.0.")) {
+        FLAVOR = "cu118"
     } else {
         throw new GradleException("Unsupported PyTorch version: ${VERSION}")
     }
@@ -93,6 +95,8 @@ def prepareNativeLib(String binaryRoot, String ver) {
         cu11 = "cu116"
     } else if (ver.startsWith("1.13.")) {
         cu11 = "cu117"
+    } else if (ver.startsWith("2.0.")) {
+        cu11 = "cu118"
     } else {
         throw new GradleException("Unsupported PyTorch version: ${ver}")
     }
@@ -283,9 +287,9 @@ tasks.register('uploadS3') {
                 "${BINARY_ROOT}/cpu/win-x86_64/native/lib/",
                 "${BINARY_ROOT}/cpu-precxx11/linux-aarch64/native/lib/",
                 "${BINARY_ROOT}/cpu-precxx11/linux-x86_64/native/lib/",
-                "${BINARY_ROOT}/cu117/linux-x86_64/native/lib/",
-                "${BINARY_ROOT}/cu117/win-x86_64/native/lib/",
-                "${BINARY_ROOT}/cu117-precxx11/linux-x86_64/native/lib/"
+                "${BINARY_ROOT}/cu118/linux-x86_64/native/lib/",
+                "${BINARY_ROOT}/cu118/win-x86_64/native/lib/",
+                "${BINARY_ROOT}/cu118-precxx11/linux-x86_64/native/lib/"
         ]
         uploadDirs.each { item ->
             fileTree(item).files.name.each {
@@ -351,6 +355,7 @@ flavorNames.each { flavor ->
             }
             from "src/main/resources"
             archiveClassifier = "${osName}"
+            archiveBaseName = "pytorch-native-${flavor}"
 
             manifest {
                 attributes("Automatic-Module-Name": "ai.djl.pytorch_native_${flavor}_${osName}")