-
Notifications
You must be signed in to change notification settings - Fork 257
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
modify 3.x ipex example structure (#1858)
* modify 3.x ipex example structure Signed-off-by: Cheng, Zixuan <zixuan.cheng@intel.com> * add json path Signed-off-by: Cheng, Zixuan <zixuan.cheng@intel.com> * fix for sq Signed-off-by: Cheng, Zixuan <zixuan.cheng@intel.com> * minor fix Signed-off-by: Cheng, Zixuan <zixuan.cheng@intel.com> * Update run_clm_no_trainer.py * Update run_clm_no_trainer.py * Update run_clm_no_trainer.py * minor fix Signed-off-by: Cheng, Zixuan <zixuan.cheng@intel.com> * remove old files Signed-off-by: Cheng, Zixuan <zixuan.cheng@intel.com> * fix act_algo Signed-off-by: Cheng, Zixuan <zixuan.cheng@intel.com> --------- Signed-off-by: Cheng, Zixuan <zixuan.cheng@intel.com> Co-authored-by: xinhe <xin3.he@intel.com>
- Loading branch information
1 parent
922b247
commit b99a79d
Showing
20 changed files
with
1,162 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
{ | ||
"pytorch": { | ||
"gpt_j_ipex":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"gpt_j_ipex_sq":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"llama2_7b_ipex":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"llama2_7b_ipex_sq":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"opt_125m_ipex":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 8 | ||
}, | ||
"opt_125m_ipex_sq":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 8 | ||
} | ||
} | ||
} |
64 changes: 64 additions & 0 deletions
64
...ch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
Step-by-Step | ||
============ | ||
This document describes the step-by-step instructions to run large language models (LLMs) using Smooth Quantization on 4th Gen Intel® Xeon® Scalable Processor (codenamed Sapphire Rapids) with PyTorch and Intel® Extension for PyTorch. | ||
|
||
The script `run_clm_no_trainer.py` supports `GPTJ`, `OPT`, `LLaMA2`, `BLOOM` and `Falcon` quantization and validates last word prediction accuracy with [lm_eval](https://github.com/EleutherAI/lm-evaluation-harness.git) now, and we are adding more models. | ||
|
||
# Prerequisite | ||
## 1. Create Environment | ||
``` | ||
# Installation | ||
pip install -r requirements.txt | ||
``` | ||
|
||
# Run | ||
|
||
Here is how to run the scripts: | ||
|
||
**Causal Language Modeling (CLM)** | ||
|
||
`run_clm_no_trainer.py` quantizes the large language models using the dataset [NeelNanda/pile-10k](https://huggingface.co/datasets/NeelNanda/pile-10k) calibration and validates `lambada_openai`, `piqa`, `winogrande`, `hellaswag` and other datasets accuracy provided by lm_eval, an example command is as follows. | ||
### GPT-J-6b | ||
|
||
#### Quantization | ||
```bash | ||
# "--sq" is used to enable smooth quant | ||
python run_clm_no_trainer.py \ | ||
--model EleutherAI/gpt-j-6B \ | ||
--quantize \ | ||
--sq \ | ||
--alpha 1.0 \ | ||
--ipex \ | ||
--output_dir "saved_results" | ||
``` | ||
**Notes**: Smooth quantization here is based on torch.jit. Without past key value in example_inputs, the quantized model cannot be used for text-generation. | ||
|
||
### OPT-125m | ||
|
||
#### Quantization | ||
|
||
```bash | ||
# "--sq" is used to enable smooth quant | ||
python run_clm_no_trainer.py \ | ||
--model facebook/opt-125m \ | ||
--quantize \ | ||
--sq \ | ||
--alpha 0.5 \ | ||
--ipex \ | ||
--output_dir "saved_results" | ||
``` | ||
|
||
### LLAMA2-7b/13b/70b | ||
>Note: LLAMA requires IPEX requirements >= 2.1 to get better accuracy. | ||
#### Quantization | ||
|
||
```bash | ||
# "--sq" is used to enable smooth quant | ||
python run_clm_no_trainer.py \ | ||
--model meta-llama/Llama-2-7b-hf \ | ||
--quantize \ | ||
--sq \ | ||
--alpha 0.8 \ | ||
--ipex \ | ||
--output_dir "saved_results" | ||
``` |
13 changes: 13 additions & 0 deletions
13
...torch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
accelerate | ||
protobuf | ||
sentencepiece != 0.1.92 | ||
datasets >= 1.1.3 | ||
torch >= 1.10 | ||
transformers | ||
pytest | ||
wandb | ||
einops | ||
neural-compressor | ||
intel-extension-for-transformers | ||
lm_eval==0.4.2 | ||
peft |
96 changes: 96 additions & 0 deletions
96
...torch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_benchmark.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#!/bin/bash | ||
set -x | ||
|
||
function main { | ||
|
||
init_params "$@" | ||
run_benchmark | ||
|
||
} | ||
|
||
# init params | ||
function init_params { | ||
iters=100 | ||
batch_size=16 | ||
approach=static | ||
tuned_checkpoint=saved_results | ||
task=lambada_openai | ||
echo ${max_eval_samples} | ||
for var in "$@" | ||
do | ||
case $var in | ||
--topology=*) | ||
topology=$(echo $var |cut -f2 -d=) | ||
;; | ||
--dataset_location=*) | ||
dataset_location=$(echo $var |cut -f2 -d=) | ||
;; | ||
--input_model=*) | ||
input_model=$(echo $var |cut -f2 -d=) | ||
;; | ||
--mode=*) | ||
mode=$(echo $var |cut -f2 -d=) | ||
;; | ||
--batch_size=*) | ||
batch_size=$(echo $var |cut -f2 -d=) | ||
;; | ||
--iters=*) | ||
iters=$(echo ${var} |cut -f2 -d=) | ||
;; | ||
--int8=*) | ||
int8=$(echo ${var} |cut -f2 -d=) | ||
;; | ||
--config=*) | ||
tuned_checkpoint=$(echo $var |cut -f2 -d=) | ||
;; | ||
*) | ||
echo "Error: No such parameter: ${var}" | ||
exit 1 | ||
;; | ||
esac | ||
done | ||
|
||
} | ||
|
||
|
||
# run_benchmark | ||
function run_benchmark { | ||
extra_cmd='' | ||
|
||
if [[ ${mode} == "accuracy" ]]; then | ||
mode_cmd=" --accuracy " | ||
extra_cmd=$extra_cmd" --load" | ||
elif [[ ${mode} == "performance" ]]; then | ||
mode_cmd=" --performance --iters "${iters} | ||
extra_cmd=$extra_cmd" --load" | ||
else | ||
echo "Error: No such mode: ${mode}" | ||
exit 1 | ||
fi | ||
|
||
if [[ ${int8} == "true" ]]; then | ||
extra_cmd=$extra_cmd" --int8" | ||
fi | ||
echo $extra_cmd | ||
|
||
if [ "${topology}" = "opt_125m_ipex_sq" ]; then | ||
model_name_or_path="facebook/opt-125m" | ||
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5" | ||
elif [ "${topology}" = "llama2_7b_ipex_sq" ]; then | ||
model_name_or_path="meta-llama/Llama-2-7b-hf" | ||
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.8" | ||
elif [ "${topology}" = "gpt_j_ipex_sq" ]; then | ||
model_name_or_path="EleutherAI/gpt-j-6b" | ||
extra_cmd=$extra_cmd" --ipex --sq --alpha 1.0" | ||
fi | ||
|
||
python -u run_clm_no_trainer.py \ | ||
--model ${model_name_or_path} \ | ||
--approach ${approach} \ | ||
--output_dir ${tuned_checkpoint} \ | ||
--task ${task} \ | ||
--batch_size ${batch_size} \ | ||
${extra_cmd} ${mode_cmd} | ||
} | ||
|
||
main "$@" |
Oops, something went wrong.