[pytorch] Add system property to config GraphExecutorOptimize (#2156)

[pytorch] Add system property to config GraphExecutorOptimize Co-authored-by: Frank Liu <frankfliu2000@gmail.com>
deepjavalibrary · Nov 18, 2022 · 28209d6 · 28209d6
1 parent ed507a8
commit 28209d6
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 19 deletions.
diff --git a/docs/development/inference_performance_optimization.md b/docs/development/inference_performance_optimization.md
@@ -38,18 +38,22 @@ Note that MxNet uses thread_local storage: Every thread that performs inference
 
 ### PyTorch
 
-#### Multithreading Inference
+#### graph optimizer
 
-If you use multithreading inference feature with DJL 0.8.0 and earlier version, we have to disable GC to close the NDArray by
+PyTorch graph executor optimizer (JIT tensorexpr fuser) is enabled by default. This may impact
+the inference latency for a few inference calls. You can disable graph executor optimizer globally
+by setting the following system properties:
 
 ```
-# If you are using DJL 0.5.0
--Dai.djl.pytorch.disable_close_resource_on_finalize=true
-# If you are using DJL 0.6.0
--Dai.djl.disable_close_resource_on_finalize=true
+-Dai.djl.pytorch.graph_optimizer=false
 ```
 
-Please make sure all the NDArrays are attached to the NDManager.
+The graph executor optimizer is per thread configuration. If you want to disable it in a per model
+basis, you have to call the following method in each inference thread:
+
+```java
+JniUtils.setGraphExecutorOptimize(false);
+```
 
 #### oneDNN(MKLDNN) acceleration
 Unlike TensorFlow and Apache MXNet, PyTorch by default doesn't enable MKLDNN which is treated as a device type like CPU and GPU.

diff --git a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/engine/PtModel.java b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/engine/PtModel.java
@@ -99,11 +99,13 @@ public void load(Path modelPath, String prefix, Map<String, ?> options)
             for (int i = 0; i < extraFileKeys.length; i++) {
                 properties.put(extraFileKeys[i], extraFileValues[i]);
             }
-            // By default, the parameters are frozen, since the previous version before adding this
-            // `trainParam`, they were frozen due to the setting `JITCallGuard guard`, which
-            // disables
-            // autograd. Also, the pretrained parameters usually should not be updated too much. It
-            // is safe to freeze it. Users may unfreeze it and set their learning rate small.
+
+            /*
+             * By default, the parameters are frozen, since the previous version before adding this
+             * trainParam, they were frozen due to the setting JITCallGuard guard, which disables
+             * autograd. Also, the pretrained parameters usually should not be updated too much. It
+             * is safe to freeze it. Users may unfreeze it and set their learning rate small.
+             */
             block.freezeParameters(!trainParam);
         } else {
             boolean hasParameter = true;

diff --git a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/engine/PtSymbolBlock.java b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/engine/PtSymbolBlock.java
@@ -122,6 +122,17 @@ protected NDList forwardInternal(
                 JniUtils.enableInferenceMode(this);
             }
         }
+
+        if (System.getProperty("ai.djl.pytorch.graph_optimizer") != null) {
+            /*
+             * By default, graph_optimizer is enabled. But it requires a warm-up time in a few
+             * inference calls. This optimizer setting is thread local, thus has to be disabled per
+             * thread. User must programmatically call JniUtils.setGraphExecutorOptimize(false) if
+             * he wants to disable graph optimizer per model.
+             */
+            boolean setOptimizer = Boolean.getBoolean("ai.djl.pytorch.graph_optimizer");
+            JniUtils.setGraphExecutorOptimize(setOptimizer);
+        }
         if (first) {
             synchronized (PtSymbolBlock.class) {
                 if (first) {

diff --git a/...nes/pytorch/pytorch-native/src/main/native/ai_djl_pytorch_jni_PyTorchLibrary_inference.cc b/...nes/pytorch/pytorch-native/src/main/native/ai_djl_pytorch_jni_PyTorchLibrary_inference.cc
@@ -25,9 +25,6 @@ struct JITCallGuard {
   torch::NoGradGuard no_grad;
 #else
   c10::InferenceMode guard;
-#ifdef __ANDROID__
-  torch::jit::GraphOptimizerEnabledGuard no_optimizer_guard{false};
-#endif
 #endif
 };
 

diff --git a/examples/src/main/java/ai/djl/examples/training/transferlearning/TransferFreshFruit.java b/examples/src/main/java/ai/djl/examples/training/transferlearning/TransferFreshFruit.java
@@ -65,7 +65,7 @@ public static void main(String[] args)
 
     public static TrainingResult runExample(String[] args)
             throws IOException, TranslateException, ModelException, URISyntaxException {
-        boolean retrain = args.length == 1 && "-p".equals(args[0]);
+        boolean trainParam = args.length == 1 && "-p".equals(args[0]);
         // The modelUrl can be replaced by local model path:
         // String modelUrl = "/YOUR_PATH/resnet18_embedding.pt";
         String modelUrl = "djl://ai.djl.pytorch/resnet18_embedding";
@@ -75,7 +75,7 @@ public static TrainingResult runExample(String[] args)
                         .optModelUrls(modelUrl)
                         .optEngine("PyTorch")
                         .optProgress(new ProgressBar())
-                        .optOption("trainParam", String.valueOf(retrain))
+                        .optOption("trainParam", String.valueOf(trainParam))
                         .build();
 
         ZooModel<NDList, NDList> embedding = criteria.loadModel();
@@ -85,7 +85,7 @@ public static TrainingResult runExample(String[] args)
                 new SequentialBlock()
                         .add(baseBlock)
                         .addSingleton(nd -> nd.squeeze(new int[] {2, 3}))
-                        .add(Linear.builder().setUnits(2).build()) // linear on which dim?
+                        .add(Linear.builder().setUnits(2).build())
                         .addSingleton(nd -> nd.softmax(1));
 
         Model model = Model.newInstance("TransferFreshFruit");
@@ -120,7 +120,7 @@ public static TrainingResult runExample(String[] args)
         EasyTrain.fit(trainer, 10, datasetTrain, datasetTest);
 
         // Save model
-        // model.save("your-model-path");
+        // model.save(Paths.get("SAVE_PATH"), "transferFreshFruit");
 
         model.close();
         embedding.close();