Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add save/load for pt2e example #1927

Merged
merged 18 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion examples/3.x_api/pytorch/cv/static_quant/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@
help='quantize model')
parser.add_argument("--calib_iters", default=2, type=int,
help="For calibration only.")
parser.add_argument('-o', '--output_dir', default='', type=str, metavar='PATH',
help='path to quantized result.')

best_acc1 = 0

Expand Down Expand Up @@ -297,9 +299,13 @@ def main_worker(gpu, ngpus_per_node, args):
config.freezing = True
opt_model = torch.compile(q_model)
model = opt_model

if args.output_dir:
model.save(example_inputs=example_inputs, output_dir = args.output_dir)

if args.evaluate:
if args.output_dir:
from neural_compressor.torch.quantization import load
model = load(args.output_dir)
validate(val_loader, model, criterion, args)
return

Expand Down
3 changes: 2 additions & 1 deletion examples/3.x_api/pytorch/cv/static_quant/run_quant.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ function init_params {
function run_tuning {
Kaihui-intel marked this conversation as resolved.
Show resolved Hide resolved
if [ "${topology}" = "resnet18_pt2e_static" ]; then
model_name_or_path="resnet18"
output_dir = "saved_results"
fi
python main.py -a ${model_name_or_path} ${dataset_location} -q -e
python main.py -a ${model_name_or_path} ${dataset_location} -q -o ${output_dir}
}

main "$@"
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"--revision", default=None,
help="Transformers parameter: set the model hub commit number")
parser.add_argument("--dataset", nargs="?", default="NeelNanda/pile-10k", const="NeelNanda/pile-10k")
parser.add_argument("--output_dir", nargs="?", default="./saved_results")
parser.add_argument("--output_dir", nargs="?", default="")
parser.add_argument("--quantize", action="store_true")
parser.add_argument("--approach", type=str, default='static',
help="Select from ['dynamic', 'static', 'weight-only']")
Expand Down Expand Up @@ -98,9 +98,15 @@ def get_example_inputs(tokenizer):

opt_model.config = user_model.config # for lm eval
user_model = opt_model
if args.output_dir:
user_model.save(example_inputs=example_inputs, output_dir = args.output_dir)


if args.accuracy:
if args.output_dir:
from neural_compressor.torch.quantization import load
model = load(args.output_dir)
model.config = user_model.config
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ function run_tuning {

if [ "${topology}" = "opt_125m_pt2e_static" ]; then
model_name_or_path="facebook/opt-125m"
output_dir = "saved_results"
fi
python run_clm_no_trainer.py --model ${model_name_or_path} --quantize --accuracy --tasks "lambada_openai"
python run_clm_no_trainer.py --model ${model_name_or_path} --quantize --output_dir ${output_dir} --tasks "lambada_openai"
}

main "$@"
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,12 @@ set -x
function main {

init_params "$@"
run_benchmark
run_tuning
Kaihui-intel marked this conversation as resolved.
Show resolved Hide resolved

}

# init params
function init_params {
iters=100
batch_size=16
tuned_checkpoint=saved_results
task=lambada_openai
echo ${max_eval_samples}
for var in "$@"
do
case $var in
Expand All @@ -27,21 +22,9 @@ function init_params {
--input_model=*)
input_model=$(echo $var |cut -f2 -d=)
;;
--mode=*)
mode=$(echo $var |cut -f2 -d=)
;;
--batch_size=*)
batch_size=$(echo $var |cut -f2 -d=)
;;
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
;;
--config=*)
tuned_checkpoint=$(echo $var |cut -f2 -d=)
;;
--output_model=*)
tuned_checkpoint=$(echo $var |cut -f2 -d=)
;;
*)
echo "Error: No such parameter: ${var}"
exit 1
Expand All @@ -51,26 +34,14 @@ function init_params {

}


# run_benchmark
function run_benchmark {
# run_tuning
function run_tuning {
extra_cmd=''
batch_size=8
DATASET_NAME="NeelNanda/pile-10k"
tuned_checkpoint="saved_results"

if [[ ${mode} == "accuracy" ]]; then
mode_cmd=" --accuracy "
elif [[ ${mode} == "performance" ]]; then
mode_cmd=" --performance --iters "${iters}
else
echo "Error: No such mode: ${mode}"
exit 1
fi

if [[ ${int8} == "true" ]]; then
extra_cmd=$extra_cmd" --int8"
fi
echo $extra_cmd

if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
model_name_or_path="facebook/opt-125m"
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_bnb" ]; then
Expand All @@ -96,11 +67,11 @@ function run_benchmark {
model_name_or_path="EleutherAI/gpt-j-6b"
extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_bnb" ]; then
model_name_or_path="EleutherAI/gpt-j-6b"\
model_name_or_path="EleutherAI/gpt-j-6b"
extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_ggml" ]; then
model_name_or_path="EleutherAI/gpt-j-6b"\
model_name_or_path="EleutherAI/gpt-j-6b"
extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
Expand All @@ -118,10 +89,12 @@ function run_benchmark {

python -u run_clm_no_trainer.py \
--model ${model_name_or_path} \
--dataset ${DATASET_NAME} \
--accuracy \
--output_dir ${tuned_checkpoint} \
--task ${task} \
--tasks "lambada_openai" \
--batch_size ${batch_size} \
${extra_cmd} ${mode_cmd}
${extra_cmd}
}

main "$@"
Loading