Skip to content

Commit

Permalink
Add save/load for pt2e example (#1927)
Browse files Browse the repository at this point in the history
Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
  • Loading branch information
Kaihui-intel authored Jul 29, 2024
1 parent 50eb6fb commit 0e724a4
Show file tree
Hide file tree
Showing 12 changed files with 502 additions and 307 deletions.
506 changes: 236 additions & 270 deletions examples/3.x_api/pytorch/cv/static_quant/main.py

Large diffs are not rendered by default.

103 changes: 103 additions & 0 deletions examples/3.x_api/pytorch/cv/static_quant/run_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/bin/bash
set -x

function main {

init_params "$@"
run_benchmark

}

# init params
function init_params {
iters=100
batch_size=16
tuned_checkpoint=saved_results
echo ${max_eval_samples}
for var in "$@"
do
case $var in
--topology=*)
topology=$(echo $var |cut -f2 -d=)
;;
--dataset_location=*)
dataset_location=$(echo $var |cut -f2 -d=)
;;
--input_model=*)
input_model=$(echo $var |cut -f2 -d=)
;;
--mode=*)
mode=$(echo $var |cut -f2 -d=)
;;
--batch_size=*)
batch_size=$(echo $var |cut -f2 -d=)
;;
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
;;
--config=*)
tuned_checkpoint=$(echo $var |cut -f2 -d=)
;;
*)
echo "Error: No such parameter: ${var}"
exit 1
;;
esac
done

}


# run_benchmark
function run_benchmark {
extra_cmd=''

if [[ ${mode} == "accuracy" ]]; then
mode_cmd=" --accuracy "
elif [[ ${mode} == "performance" ]]; then
mode_cmd=" --performance --iters "${iters}
else
echo "Error: No such mode: ${mode}"
exit 1
fi
if [[ ${int8} == "true" ]]; then
extra_cmd=$extra_cmd" --int8"
fi
echo $extra_cmd


echo $extra_cmd

if [ "${topology}" = "resnet18_pt2e_static" ]; then
model_name_or_path="resnet18"
fi

if [[ ${mode} == "accuracy" ]]; then
python main.py \
--pretrained \
-a resnet18 \
-b 30 \
--tuned_checkpoint ${tuned_checkpoint} \
${dataset_location} \
${extra_cmd} \
${mode_cmd}
elif [[ ${mode} == "performance" ]]; then
incbench --num_cores_per_instance 4 \
main.py \
--pretrained \
-a resnet18 \
-b 30 \
--tuned_checkpoint ${tuned_checkpoint} \
${dataset_location} \
${extra_cmd} \
${mode_cmd}
else
echo "Error: No such mode: ${mode}"
exit 1
fi
}

main "$@"
9 changes: 8 additions & 1 deletion examples/3.x_api/pytorch/cv/static_quant/run_quant.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ function main {

# init params
function init_params {
tuned_checkpoint="saved_results"
for var in "$@"
do
case $var in
Expand Down Expand Up @@ -39,7 +40,13 @@ function run_tuning {
if [ "${topology}" = "resnet18_pt2e_static" ]; then
model_name_or_path="resnet18"
fi
python main.py -a ${model_name_or_path} ${dataset_location} -q -e
python main.py \
--pretrained \
-t \
-a resnet18 \
-b 30 \
--tuned_checkpoint ${tuned_checkpoint} \
${dataset_location}
}

main "$@"
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash
set -x

function main {

init_params "$@"
run_benchmark

}

# init params
function init_params {
iters=100
batch_size=16
tuned_checkpoint=saved_results
task=lambada_openai
echo ${max_eval_samples}
for var in "$@"
do
case $var in
--topology=*)
topology=$(echo $var |cut -f2 -d=)
;;
--dataset_location=*)
dataset_location=$(echo $var |cut -f2 -d=)
;;
--input_model=*)
input_model=$(echo $var |cut -f2 -d=)
;;
--mode=*)
mode=$(echo $var |cut -f2 -d=)
;;
--batch_size=*)
batch_size=$(echo $var |cut -f2 -d=)
;;
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
;;
--config=*)
tuned_checkpoint=$(echo $var |cut -f2 -d=)
;;
*)
echo "Error: No such parameter: ${var}"
exit 1
;;
esac
done

}


# run_benchmark
function run_benchmark {
extra_cmd=''

if [[ ${mode} == "accuracy" ]]; then
mode_cmd=" --accuracy "
extra_cmd=$extra_cmd
elif [[ ${mode} == "performance" ]]; then
mode_cmd=" --performance --iters "${iters}
extra_cmd=$extra_cmd
else
echo "Error: No such mode: ${mode}"
exit 1
fi

if [[ ${int8} == "true" ]]; then
extra_cmd=$extra_cmd" --int8"
fi
echo $extra_cmd

echo $extra_cmd

if [ "${topology}" = "opt_125m_pt2e_static" ]; then
model_name_or_path="facebook/opt-125m"
fi
if [[ ${mode} == "accuracy" ]]; then
python -u run_clm_no_trainer.py \
--model ${model_name_or_path} \
--output_dir ${tuned_checkpoint} \
--task ${task} \
--batch_size ${batch_size} \
${extra_cmd} ${mode_cmd}
elif [[ ${mode} == "performance" ]]; then
incbench --num_cores_per_instance 4 run_clm_no_trainer.py \
--model ${model_name_or_path} \
--batch_size ${batch_size} \
--output_dir ${tuned_checkpoint} \
${extra_cmd} ${mode_cmd}
else
echo "Error: No such mode: ${mode}"
exit 1
fi
}

main "$@"
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"--revision", default=None,
help="Transformers parameter: set the model hub commit number")
parser.add_argument("--dataset", nargs="?", default="NeelNanda/pile-10k", const="NeelNanda/pile-10k")
parser.add_argument("--output_dir", nargs="?", default="./saved_results")
parser.add_argument("--output_dir", nargs="?", default="")
parser.add_argument("--quantize", action="store_true")
parser.add_argument("--approach", type=str, default='static',
help="Select from ['dynamic', 'static', 'weight-only']")
Expand Down Expand Up @@ -80,7 +80,7 @@ def get_example_inputs(tokenizer):
dynamic_shapes = {"input_ids": (batch, seq_len)}
example_inputs = get_example_inputs(tokenizer)
exported_model = export(user_model, example_inputs=example_inputs, dynamic_shapes=dynamic_shapes)

quant_config = get_default_static_config()
# prepare
prepare_model = prepare(exported_model, quant_config)
Expand All @@ -90,17 +90,32 @@ def get_example_inputs(tokenizer):
prepare_model(*example_inputs)
# convert
converted_model = convert(prepare_model)
# inference
from torch._inductor import config

# save
if args.output_dir:
converted_model.save(example_inputs=example_inputs, output_dir = args.output_dir)



if args.int8:
if args.output_dir:
print("Load int8 model.")
from neural_compressor.torch.quantization import load
model = load(args.output_dir)

config.freezing = True
opt_model = torch.compile(converted_model)
model.config = user_model.config # for lm eval

# Compile the quantized model and replace the Q/DQ pattern with Q-operator
from torch._inductor import config

opt_model.config = user_model.config # for lm eval
user_model = opt_model
config.freezing = True
opt_model = torch.compile(model)

opt_model.config = user_model.config # for lm eval
user_model = opt_model

if args.accuracy:

from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
Expand All @@ -120,29 +135,21 @@ def get_example_inputs(tokenizer):
print('Batch size = %d' % args.batch_size)

if args.performance:
# user_model.eval()
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
batch_size, input_leng = args.batch_size, 512
example_inputs = torch.ones((batch_size, input_leng), dtype=torch.long)
print("Batch size = {:d}".format(batch_size))
print("The length of input tokens = {:d}".format(input_leng))
import time

samples = args.iters * args.batch_size
eval_args = LMEvalParser(
model="hf",
user_model=user_model,
tokenizer=tokenizer,
batch_size=args.batch_size,
tasks=args.tasks,
limit=samples,
device="cpu",
)
start = time.time()
results = evaluate(eval_args)
end = time.time()
for task_name in args.tasks.split(","):
if task_name == "wikitext":
acc = results["results"][task_name]["word_perplexity,none"]
else:
acc = results["results"][task_name]["acc,none"]
print("Accuracy: %.5f" % acc)
print('Throughput: %.3f samples/sec' % (samples / (end - start)))
print('Latency: %.3f ms' % ((end - start) * 1000 / samples))
print('Batch size = %d' % args.batch_size)
total_iters = args.iters
warmup_iters = 5
with torch.no_grad():
for i in range(total_iters):
if i == warmup_iters:
start = time.time()
user_model(example_inputs)
end = time.time()
latency = (end - start) / ((total_iters - warmup_iters) * args.batch_size)
throughput = ((total_iters - warmup_iters) * args.batch_size) / (end - start)
print("Latency: {:.3f} ms".format(latency * 10**3))
print("Throughput: {:.3f} samples/sec".format(throughput))
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ function run_tuning {

if [ "${topology}" = "opt_125m_pt2e_static" ]; then
model_name_or_path="facebook/opt-125m"
output_dir="saved_results"
fi
python run_clm_no_trainer.py --model ${model_name_or_path} --quantize --accuracy --tasks "lambada_openai"
python run_clm_no_trainer.py --model ${model_name_or_path} --quantize --output_dir ${output_dir} --tasks "lambada_openai"
}

main "$@"
2 changes: 2 additions & 0 deletions neural_compressor/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from neural_compressor.common.utils import (
level,
level_name,
logger,
Logger,
TuningLogger,
Expand All @@ -31,6 +32,7 @@
__all__ = [
"options",
"level",
"level_name",
"logger",
"Logger",
"TuningLogger",
Expand Down
2 changes: 2 additions & 0 deletions neural_compressor/common/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

__all__ = [
"level",
"level_name",
"Logger", # TODO: not expose it
"logger",
"TuningLogger",
Expand Down Expand Up @@ -138,6 +139,7 @@ def warning(msg, *args, **kwargs):


level = Logger().get_logger().level
level_name = logging.getLevelName(level)

logger = Logger

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@ def transformation(gm: torch.fx.GraphModule, node_candidate_list: List[str], tar
for pattern_pair in HALF_PRECISION_PATTERN_REGISTRY[target_dtype].values():
apply_single_pattern_pair(gm, pattern_pair, node_candidate_list)
utils.logger.info("Half precision conversion is done:")
gm.print_readable(True)
if utils.level_name == "DEBUG": # pragma: no cover
gm.print_readable(True)


# =============================================================================
Expand Down
3 changes: 2 additions & 1 deletion neural_compressor/torch/algorithms/pt2e_quant/save_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def save(model, example_inputs, output_dir="./saved_results"):
os.makedirs(output_dir, exist_ok=True)
qmodel_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), WEIGHT_NAME)
qconfig_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), QCONFIG_NAME)
quantized_ep = torch.export.export(model, example_inputs)
dynamic_shapes = model.dynamic_shapes
quantized_ep = torch.export.export(model, example_inputs, dynamic_shapes=dynamic_shapes)
torch.export.save(quantized_ep, qmodel_file_path)
for key, op_config in model.qconfig.items():
model.qconfig[key] = op_config.to_dict()
Expand Down
Loading

0 comments on commit 0e724a4

Please sign in to comment.