From f7f4338cd6ed7359aa6927fdef2285836042609e Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Tue, 21 May 2024 20:02:51 +0000 Subject: [PATCH 1/2] Add default scheme --- .../quantization/quant_scheme.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index ed0f8245..5a4ad36c 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -37,3 +37,32 @@ class QuantizationScheme(BaseModel): weights: Optional[QuantizationArgs] = None input_activations: Optional[QuantizationArgs] = None output_activations: Optional[QuantizationArgs] = None + + @classmethod + def default_scheme( + cls, + targets: Optional[List[str]] = None, + ): + + if targets is None: + # default to quantizing all Linear layers + targets = ["Linear"] + + # default to 8 bit integer symmetric quantization + # for weights + weights = QuantizationArgs(num_bits=8, symmetric=True) + + # default to 8 bit integer asymmetric quantization + input_activations = QuantizationArgs(num_bits=8, symmetric=False) + + # Do not quantize the output activations + # by default + output_activations = None + + return cls( + targets=targets, + weights=weights, + input_activations=input_activations, + output_activations=output_activations,) + + From ecb27b8763fdb8ff2734414ac512ffe2a3c93547 Mon Sep 17 00:00:00 2001 From: Benjamin Fineran Date: Wed, 22 May 2024 11:35:39 -0400 Subject: [PATCH 2/2] Apply suggestions from code review --- src/compressed_tensors/quantization/quant_scheme.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index 5a4ad36c..d372dd21 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -53,7 +53,7 @@ def default_scheme( weights = QuantizationArgs(num_bits=8, symmetric=True) # default to 8 bit integer asymmetric quantization - input_activations = QuantizationArgs(num_bits=8, symmetric=False) + input_activations = QuantizationArgs(num_bits=8, symmetric=True) # Do not quantize the output activations # by default