Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pytorch] Upgrade PyTorch to 2.0.0 #2525

Merged
merged 3 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions .github/workflows/nightly_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,11 @@ jobs:
LD_LIBRARY_PATH=/root/.djl.ai/paddle/${PADDLE_VERSION}-cu102-linux-x86_64:/usr/local/cuda/lib64 \
./gradlew :engines:paddlepaddle:paddlepaddle-model-zoo:test

test-cuda-113:
test-cuda-118:
if: github.repository == 'deepjavalibrary/djl'
runs-on: [ self-hosted, gpu ]
container:
image: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04
image: nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu18.04
options: --gpus all --runtime=nvidia
timeout-minutes: 30
needs: create-runners
Expand Down Expand Up @@ -203,7 +203,7 @@ jobs:
publish:
if: github.repository == 'deepjavalibrary/djl'
runs-on: ubuntu-latest
needs: [ build, test-pytorch, test-tensorflow, test-aarch64, test-cuda-102, test-cuda-113 ]
needs: [ build, test-pytorch, test-tensorflow, test-aarch64, test-cuda-102, test-cuda-118 ]
steps:
- uses: actions/checkout@v3
- name: Set up JDK 11
Expand All @@ -223,6 +223,7 @@ jobs:
run: |
./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.11.0 -Psnapshot
./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.12.1 -Psnapshot
./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.13.1 -Psnapshot
./gradlew clean engines:ml:xgboost:publish -Pgpu -Psnapshot
./gradlew clean publish -Psnapshot
cd bom
Expand All @@ -237,6 +238,7 @@ jobs:
run: |
./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.11.0 -P${{ github.event.inputs.mode }}
./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.12.1 -P${{ github.event.inputs.mode }}
./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.13.1 -P${{ github.event.inputs.mode }}
./gradlew clean engines:ml:xgboost:publish -Pgpu -P${{ github.event.inputs.mode }}
./gradlew clean publish -P${{ github.event.inputs.mode }}
cd bom
Expand Down Expand Up @@ -282,7 +284,7 @@ jobs:
stop-runners:
if: ${{ github.repository == 'deepjavalibrary/djl' && always() }}
runs-on: [ self-hosted, scheduler ]
needs: [ create-runners, test-aarch64, test-cuda-102, test-cuda-113 ]
needs: [ create-runners, test-aarch64, test-cuda-102, test-cuda-118 ]
steps:
- name: Stop all instances
run: |
Expand Down
6 changes: 3 additions & 3 deletions api/src/main/java/ai/djl/util/Platform.java
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,10 @@ public boolean matches(Platform system) {
return true;
}

// native package can run on system which major version is greater or equal
// native package can run on system which CUDA version is greater or equal
if (system.flavor.startsWith("cu")
&& Integer.parseInt(flavor.substring(2, 4))
<= Integer.parseInt(system.flavor.substring(2, 4))) {
&& Integer.parseInt(flavor.substring(2, 5))
<= Integer.parseInt(system.flavor.substring(2, 5))) {
return true;
}
logger.warn("The bundled library: " + this + " doesn't match system: " + system);
Expand Down
4 changes: 2 additions & 2 deletions api/src/test/java/ai/djl/util/PlatformTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ public void testPlatform() throws IOException {
platform = Platform.fromUrl(url);
// cu111 can run on cu113 machine
Assert.assertTrue(platform.matches(system));
// cu113 can run on cu111 machine (the same major version)
Assert.assertTrue(system.matches(platform));
// cu113 cannot run on cu111 machine (the same major version)
Assert.assertFalse(system.matches(platform));

url = createPropertyFile("version=1.8.0\nclassifier=cu102-linux-x86_64");
platform = Platform.fromUrl(url);
Expand Down
77 changes: 39 additions & 38 deletions engines/pytorch/pytorch-engine/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ The following table illustrates which pytorch version that DJL supports:

| PyTorch engine version | PyTorch native library version |
|------------------------|-------------------------------------------|
| pytorch-engine:0.22.0 | 1.11.0, 1.12.1, 1.13.1, 2.0.0 |
| pytorch-engine:0.21.0 | 1.11.0, 1.12.1, 1.13.1 |
| pytorch-engine:0.20.0 | 1.11.0, 1.12.1, 1.13.0 |
| pytorch-engine:0.19.0 | 1.10.0, 1.11.0, 1.12.1 |
Expand Down Expand Up @@ -108,21 +109,21 @@ export PYTORCH_FLAVOR=cpu
### macOS
For macOS, you can use the following library:

- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
- ai.djl.pytorch:pytorch-native-cpu:1.13.1:osx-x86_64
- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
- ai.djl.pytorch:pytorch-native-cpu:2.0.0:osx-x86_64

```xml
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cpu</artifactId>
<classifier>osx-x86_64</classifier>
<version>1.13.1</version>
<version>2.0.0</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-jni</artifactId>
<version>1.13.1-0.21.0</version>
<version>2.0.0-0.22.0</version>
<scope>runtime</scope>
</dependency>
```
Expand All @@ -132,21 +133,21 @@ For macOS, you can use the following library:
### macOS M1
For macOS M1, you can use the following library:

- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
- ai.djl.pytorch:pytorch-native-cpu:1.13.1:osx-aarch64
- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
- ai.djl.pytorch:pytorch-native-cpu:2.0.0:osx-aarch64

```xml
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cpu</artifactId>
<classifier>osx-aarch64</classifier>
<version>1.13.1</version>
<version>2.0.0</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-jni</artifactId>
<version>1.13.1-0.21.0</version>
<version>2.0.0-0.22.0</version>
<scope>runtime</scope>
</dependency>
```
Expand All @@ -157,63 +158,63 @@ installed on your GPU machine, you can use one of the following library:

#### Linux GPU

- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
- ai.djl.pytorch:pytorch-native-cu117:1.13.1:linux-x86_64 - CUDA 11.7
- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
- ai.djl.pytorch:pytorch-native-cu118:2.0.0:linux-x86_64 - CUDA 11.8

```xml
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cu117</artifactId>
<artifactId>pytorch-native-cu118</artifactId>
<classifier>linux-x86_64</classifier>
<version>1.13.1</version>
<version>2.0.0</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-jni</artifactId>
<version>1.13.1-0.21.0</version>
<version>2.0.0-0.22.0</version>
<scope>runtime</scope>
</dependency>
```

### Linux CPU

- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
- ai.djl.pytorch:pytorch-native-cpu:1.13.1:linux-x86_64
- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
- ai.djl.pytorch:pytorch-native-cpu:2.0.0:linux-x86_64

```xml
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cpu</artifactId>
<classifier>linux-x86_64</classifier>
<scope>runtime</scope>
<version>1.13.1</version>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-jni</artifactId>
<version>1.13.1-0.21.0</version>
<version>2.0.0-0.22.0</version>
<scope>runtime</scope>
</dependency>
```

### For aarch64 build

- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
- ai.djl.pytorch:pytorch-native-cpu-precxx11:1.13.1:linux-aarch64
- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
- ai.djl.pytorch:pytorch-native-cpu-precxx11:2.0.0:linux-aarch64

```xml
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cpu-precxx11</artifactId>
<classifier>linux-aarch64</classifier>
<scope>runtime</scope>
<version>1.13.1</version>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-jni</artifactId>
<version>1.13.1-0.21.0</version>
<version>2.0.0-0.22.0</version>
<scope>runtime</scope>
</dependency>
```
Expand All @@ -223,22 +224,22 @@ installed on your GPU machine, you can use one of the following library:
We also provide packages for the system like CentOS 7/Ubuntu 14.04 with GLIBC >= 2.17.
All the package were built with GCC 7, we provided a newer `libstdc++.so.6.24` in the package that contains `CXXABI_1.3.9` to use the package successfully.

- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
- ai.djl.pytorch:pytorch-native-cu117-precxx11:1.13.1:linux-x86_64 - CUDA 11.7
- ai.djl.pytorch:pytorch-native-cpu-precxx11:1.13.1:linux-x86_64 - CPU
- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
- ai.djl.pytorch:pytorch-native-cu118-precxx11:2.0.0:linux-x86_64 - CUDA 11.8
- ai.djl.pytorch:pytorch-native-cpu-precxx11:2.0.0:linux-x86_64 - CPU

```xml
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cu117-precxx11</artifactId>
<artifactId>pytorch-native-cu118-precxx11</artifactId>
<classifier>linux-x86_64</classifier>
<version>1.13.1</version>
<version>2.0.0</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-jni</artifactId>
<version>1.13.1-0.21.0</version>
<version>2.0.0-0.22.0</version>
<scope>runtime</scope>
</dependency>
```
Expand All @@ -248,13 +249,13 @@ All the package were built with GCC 7, we provided a newer `libstdc++.so.6.24` i
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cpu-precxx11</artifactId>
<classifier>linux-x86_64</classifier>
<version>1.13.1</version>
<version>2.0.0</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-jni</artifactId>
<version>1.13.1-0.21.0</version>
<version>2.0.0-0.22.0</version>
<scope>runtime</scope>
</dependency>
```
Expand All @@ -269,42 +270,42 @@ For the Windows platform, you can choose between CPU and GPU.

#### Windows GPU

- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
- ai.djl.pytorch:pytorch-native-cu117:1.13.1:win-x86_64 - CUDA 11.7
- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
- ai.djl.pytorch:pytorch-native-cu118:2.0.0:win-x86_64 - CUDA 11.8

```xml
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cu117</artifactId>
<artifactId>pytorch-native-cu118</artifactId>
<classifier>win-x86_64</classifier>
<version>1.13.1</version>
<version>2.0.0</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-jni</artifactId>
<version>1.13.1-0.21.0</version>
<version>2.0.0-0.22.0</version>
<scope>runtime</scope>
</dependency>
```

### Windows CPU

- ai.djl.pytorch:pytorch-jni:1.13.1-0.21.0
- ai.djl.pytorch:pytorch-native-cpu:1.13.1:win-x86_64
- ai.djl.pytorch:pytorch-jni:2.0.0-0.22.0
- ai.djl.pytorch:pytorch-native-cpu:2.0.0:win-x86_64

```xml
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cpu</artifactId>
<classifier>win-x86_64</classifier>
<scope>runtime</scope>
<version>1.13.1</version>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-jni</artifactId>
<version>1.13.1-0.21.0</version>
<version>2.0.0-0.22.0</version>
<scope>runtime</scope>
</dependency>
```
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
Expand Down Expand Up @@ -289,6 +290,7 @@ private static LibTorch findNativeLibrary() {
}

private static LibTorch copyNativeLibraryFromClasspath(Platform platform) {
logger.debug("Found bundled PyTorch package: {}.", platform);
String version = platform.getVersion();
String flavor = platform.getFlavor();
if (!flavor.endsWith("-precxx11")
Expand All @@ -313,15 +315,7 @@ private static LibTorch copyNativeLibraryFromClasspath(Platform platform) {
if (!m.matches()) {
throw new AssertionError("Unexpected version: " + version);
}
String[] versions = m.group(1).split("\\.");
int minorVersion = Integer.parseInt(versions[1]);
int buildVersion = Integer.parseInt(versions[2]);
String pathPrefix;
if (minorVersion > 10 || (minorVersion == 10 && buildVersion == 2)) {
pathPrefix = "pytorch/" + flavor + '/' + classifier;
} else {
pathPrefix = "native/lib";
}
String pathPrefix = "pytorch/" + flavor + '/' + classifier;

Files.createDirectories(cacheDir);
tmp = Files.createTempDirectory(cacheDir, "tmp");
Expand Down Expand Up @@ -358,6 +352,7 @@ private static LibTorch downloadPyTorch(Platform platform) {
String classifier = platform.getClassifier();
String precxx11;
String flavor = Utils.getEnvOrSystemProperty("PYTORCH_FLAVOR");
boolean override;
if (flavor == null || flavor.isEmpty()) {
flavor = platform.getFlavor();
if (System.getProperty("os.name").startsWith("Linux")
Expand All @@ -368,9 +363,11 @@ private static LibTorch downloadPyTorch(Platform platform) {
precxx11 = "";
}
flavor += precxx11;
override = false;
} else {
logger.info("Uses override PYTORCH_FLAVOR: {}", flavor);
precxx11 = flavor.endsWith("-precxx11") ? "-precxx11" : "";
override = true;
}

Path cacheDir = Utils.getEngineCacheDir("pytorch");
Expand Down Expand Up @@ -407,23 +404,32 @@ private static LibTorch downloadPyTorch(Platform platform) {
Files.createDirectories(cacheDir);
List<String> lines = Utils.readLines(is);
if (flavor.startsWith("cu")) {
String cudaMajor = flavor.substring(0, 4);
int cudaVersion = Integer.parseInt(flavor.substring(2, 5));
Pattern pattern =
Pattern.compile(
'('
+ cudaMajor
+ "\\d"
"cu(\\d\\d\\d)"
+ precxx11
+ ")/"
+ '/'
+ classifier
+ "/native/lib/"
+ NATIVE_LIB_NAME
+ ".gz");
List<Integer> cudaVersions = new ArrayList<>();
boolean match = false;
for (String line : lines) {
Matcher m = pattern.matcher(line);
if (m.matches()) {
flavor = m.group(1);
cudaVersions.add(Integer.parseInt(m.group(1)));
}
}
// find highest matching CUDA version
cudaVersions.sort(Collections.reverseOrder());
for (int cuda : cudaVersions) {
if (override && cuda == cudaVersion) {
match = true;
break;
} else if (cuda <= cudaVersion) {
flavor = "cu" + cuda + precxx11;
match = true;
break;
}
Expand Down
Loading