Skip to content

Commit

Permalink
fix llama3 static run
Browse files Browse the repository at this point in the history
  • Loading branch information
yuanlehome committed Aug 28, 2024
1 parent 34a71c8 commit 10d3e95
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions llm/predict/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ def _preprocess(self, source):
source,
max_length=self.config.src_length,
truncation=True,
return_position_ids=True if not isinstance(self.tokenizer, ChatGLMTokenizer) else False,
truncation_side="left",
return_tensors=self.return_tensors,
padding=True,
Expand Down Expand Up @@ -305,6 +306,9 @@ def __init__(self, config: PredictorArgument, tokenizer: PretrainedTokenizer = N
inference_config.disable_gpu()
inference_config.disable_glog_info()
inference_config.enable_new_executor()
# remove `gpu_cpu_map_matmul_v2_to_matmul_pass` to avoid mapping matmul_v2 -> matmul op
if config.dtype == "bfloat16":
inference_config.delete_pass("gpu_cpu_map_matmul_v2_to_matmul_pass")
if in_pir_executor_mode():
inference_config.enable_new_ir()
if in_cinn_mode():
Expand Down

0 comments on commit 10d3e95

Please sign in to comment.