intel · chensuyue · Jul 29, 2024 · Jul 16, 2024 · Jul 17, 2024 · Jul 17, 2024
diff --git a/examples/3.x_api/pytorch/cv/static_quant/main.py b/examples/3.x_api/pytorch/cv/static_quant/main.py
@@ -81,6 +81,8 @@
                     help='quantize model')
 parser.add_argument("--calib_iters", default=2, type=int,
                     help="For calibration only.")
+parser.add_argument('-o', '--output_dir', default='', type=str, metavar='PATH',
+                    help='path to quantized result.')
 
 best_acc1 = 0
 
@@ -297,9 +299,13 @@ def main_worker(gpu, ngpus_per_node, args):
         config.freezing = True
         opt_model = torch.compile(q_model)
         model = opt_model
-
+        if args.output_dir:
+            model.save(example_inputs=example_inputs, output_dir = args.output_dir)
 
     if args.evaluate:
+        if args.output_dir:
+            from neural_compressor.torch.quantization import load
+            model = load(args.output_dir)
         validate(val_loader, model, criterion, args)
         return
 

diff --git a/examples/3.x_api/pytorch/cv/static_quant/run_quant.sh b/examples/3.x_api/pytorch/cv/static_quant/run_quant.sh
@@ -38,8 +38,9 @@ function init_params {
 function run_tuning {
     if [ "${topology}" = "resnet18_pt2e_static" ]; then
         model_name_or_path="resnet18"
+        output_dir = "saved_results"
     fi
-    python main.py -a ${model_name_or_path} ${dataset_location} -q -e
+    python main.py -a ${model_name_or_path} ${dataset_location} -q -o ${output_dir}
 }
 
 main "$@"
diff --git a/...huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py b/...huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py
@@ -14,7 +14,7 @@
     "--revision", default=None,
     help="Transformers parameter: set the model hub commit number")
 parser.add_argument("--dataset", nargs="?", default="NeelNanda/pile-10k", const="NeelNanda/pile-10k")
-parser.add_argument("--output_dir", nargs="?", default="./saved_results")
+parser.add_argument("--output_dir", nargs="?", default="")
 parser.add_argument("--quantize", action="store_true")
 parser.add_argument("--approach", type=str, default='static',
                     help="Select from ['dynamic', 'static', 'weight-only']")
@@ -98,9 +98,15 @@ def get_example_inputs(tokenizer):
 
     opt_model.config = user_model.config # for lm eval
     user_model = opt_model
+    if args.output_dir:
+        user_model.save(example_inputs=example_inputs, output_dir = args.output_dir)
 
 
 if args.accuracy:
+    if args.output_dir:
+        from neural_compressor.torch.quantization import load
+        model = load(args.output_dir)
+        model.config = user_model.config
     from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
     eval_args = LMEvalParser(
         model="hf",

diff --git a/...orch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_quant.sh b/...orch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_quant.sh
@@ -39,8 +39,9 @@ function run_tuning {
 
     if [ "${topology}" = "opt_125m_pt2e_static" ]; then
         model_name_or_path="facebook/opt-125m"
+        output_dir = "saved_results"
     fi
-    python run_clm_no_trainer.py --model ${model_name_or_path} --quantize --accuracy --tasks "lambada_openai"
+    python run_clm_no_trainer.py --model ${model_name_or_path} --quantize --output_dir ${output_dir} --tasks "lambada_openai"
 }
 
 main "$@"
diff --git a/...ytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh b/...ytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh
@@ -4,17 +4,12 @@ set -x
 function main {
 
   init_params "$@"
-  run_benchmark
+  run_tuning
 
 }
 
 # init params
 function init_params {
-  iters=100
-  batch_size=16
-  tuned_checkpoint=saved_results
-  task=lambada_openai
-  echo ${max_eval_samples}
   for var in "$@"
   do
     case $var in
@@ -27,21 +22,9 @@ function init_params {
       --input_model=*)
           input_model=$(echo $var |cut -f2 -d=)
       ;;
-      --mode=*)
-          mode=$(echo $var |cut -f2 -d=)
-      ;;
-      --batch_size=*)
-          batch_size=$(echo $var |cut -f2 -d=)
-      ;;
-      --iters=*)
-          iters=$(echo ${var} |cut -f2 -d=)
-      ;;
-      --int8=*)
-          int8=$(echo ${var} |cut -f2 -d=)
-      ;;
-      --config=*)
-          tuned_checkpoint=$(echo $var |cut -f2 -d=)
-      ;;
+       --output_model=*)
+           tuned_checkpoint=$(echo $var |cut -f2 -d=)
+       ;;
       *)
           echo "Error: No such parameter: ${var}"
           exit 1
@@ -51,26 +34,14 @@ function init_params {
 
 }
 
-
-# run_benchmark
-function run_benchmark {
+# run_tuning
+function run_tuning {
     extra_cmd=''
+    batch_size=8
+    DATASET_NAME="NeelNanda/pile-10k"
+    tuned_checkpoint="saved_results"
 
-    if [[ ${mode} == "accuracy" ]]; then
-        mode_cmd=" --accuracy "
-    elif [[ ${mode} == "performance" ]]; then
-        mode_cmd=" --performance --iters "${iters}
-    else
-        echo "Error: No such mode: ${mode}"
-        exit 1
-    fi
-
-    if [[ ${int8} == "true" ]]; then
-        extra_cmd=$extra_cmd" --int8"
-    fi
-    echo $extra_cmd
-
-        if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
+    if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
         model_name_or_path="facebook/opt-125m"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
     elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_bnb" ]; then
@@ -96,11 +67,11 @@ function run_benchmark {
         model_name_or_path="EleutherAI/gpt-j-6b"
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
     elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_bnb" ]; then
-        model_name_or_path="EleutherAI/gpt-j-6b"\
+        model_name_or_path="EleutherAI/gpt-j-6b"
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
         extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
     elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_ggml" ]; then
-        model_name_or_path="EleutherAI/gpt-j-6b"\
+        model_name_or_path="EleutherAI/gpt-j-6b"
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
         extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
     elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
@@ -118,10 +89,12 @@ function run_benchmark {
 
     python -u run_clm_no_trainer.py \
         --model ${model_name_or_path} \
+        --dataset ${DATASET_NAME} \
+        --accuracy \
         --output_dir ${tuned_checkpoint} \
-        --task ${task} \
+        --tasks "lambada_openai" \
         --batch_size ${batch_size} \
-        ${extra_cmd} ${mode_cmd}
+        ${extra_cmd}
 }
 
 main "$@"