diff --git a/llm/predictor.py b/llm/predictor.py
index 6c8f84c84cdb..6d04d9c203ef 100644
--- a/llm/predictor.py
+++ b/llm/predictor.py
@@ -27,7 +27,6 @@
 import paddle.distributed.fleet.base.topology as tp
 import paddle.incubate.multiprocessing as mp
 from paddle.distributed import fleet
-from paddlenlp_ops import reset_stop_value
 from utils import (
     dybatch_preprocess,
     get_alibi_slopes,
@@ -57,6 +56,16 @@
 from paddlenlp.utils.import_utils import import_module, is_paddlenlp_ops_available
 from paddlenlp.utils.log import logger
 
+try:
+    from paddlenlp_ops import reset_stop_value
+except (ImportError, ModuleNotFoundError):
+    logger.warning(
+        "if you run predictor.py with --inference_model argument, please ensure you install "
+        "the paddlenlp_ops by following the instructions "
+        "provided at https://github.com/PaddlePaddle/PaddleNLP/blob/develop/csrc/README.md"
+    )
+
+
 # Note(@RochardWooSJTU): MAX_BSZ must be the same as definition in get_output / save_output
 MAX_BSZ = 512
 
diff --git a/llm/utils.py b/llm/utils.py
index bd9f366577ba..bee1529f8ecc 100644
--- a/llm/utils.py
+++ b/llm/utils.py
@@ -25,7 +25,6 @@
 import paddle.incubate.multiprocessing as mp
 from paddle.distributed import fleet
 from paddle.io import BatchSampler, DataLoader, DistributedBatchSampler
-from paddlenlp_ops import get_output
 from sklearn.metrics import accuracy_score
 
 from paddlenlp.datasets import InTokensIterableDataset
@@ -704,6 +703,9 @@ def read_res(model_name_or_path: str, tensor_queue: mp.Queue, result_queue: mp.Q
 
     logger.info("Start read result message")
     logger.info(f"Current path is {os.getcwd()}")
+
+    from paddlenlp_ops import get_output
+
     while True:
         get_output(output_tensor, 0, True)
         if output_tensor[0, 0] == -2:  # read none