From 7ed115c0cbe1840c4597bdcd821bb7d145e3a12e Mon Sep 17 00:00:00 2001 From: "ZHENG, Zhen" Date: Thu, 18 Jan 2024 05:35:21 +0000 Subject: [PATCH 1/6] Support user to specify the quantization mode. --- mii/api.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mii/api.py b/mii/api.py index d46ded1d..547b42c3 100644 --- a/mii/api.py +++ b/mii/api.py @@ -50,6 +50,13 @@ def _parse_kwargs_to_model_config( # Create the ModelConfig object and return it with remaining kwargs model_config = ModelConfig(**model_config) + + # TODO: to find a way to make this more elegant. + if "quantization_mode" in remaining_kwargs.keys(): + model_config.inference_engine_config.quantization.quantization_mode = remaining_kwargs[ + "quantization_mode"] + remaining_kwargs.pop("quantization_mode") + return model_config, remaining_kwargs @@ -151,7 +158,8 @@ def serve( create_score_file(mii_config) if mii_config.deployment_type == DeploymentType.LOCAL: - import_score_file(mii_config.deployment_name, DeploymentType.LOCAL).init() + import_score_file(mii_config.deployment_name, + DeploymentType.LOCAL).init() return MIIClient(mii_config=mii_config) if mii_config.deployment_type == DeploymentType.AML: acr_name = mii.aml_related.utils.get_acr_name() From eddcd70771d375aaa8f2f861542f167e31a9c3dc Mon Sep 17 00:00:00 2001 From: "ZHENG, Zhen" Date: Mon, 4 Mar 2024 12:05:50 +0000 Subject: [PATCH 2/6] Fix format --- mii/api.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mii/api.py b/mii/api.py index 547b42c3..44f20176 100644 --- a/mii/api.py +++ b/mii/api.py @@ -158,8 +158,7 @@ def serve( create_score_file(mii_config) if mii_config.deployment_type == DeploymentType.LOCAL: - import_score_file(mii_config.deployment_name, - DeploymentType.LOCAL).init() + import_score_file(mii_config.deployment_name, DeploymentType.LOCAL).init() return MIIClient(mii_config=mii_config) if mii_config.deployment_type == DeploymentType.AML: acr_name = mii.aml_related.utils.get_acr_name() From cc2900fa958610ccf7a387722dd3ba6d0a005bdb Mon Sep 17 00:00:00 2001 From: Michael Wyatt Date: Tue, 5 Mar 2024 12:37:54 -0800 Subject: [PATCH 3/6] move quantization_mode to model config --- mii/api.py | 6 ------ mii/config.py | 12 ++++++++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/mii/api.py b/mii/api.py index 44f20176..d909c837 100644 --- a/mii/api.py +++ b/mii/api.py @@ -51,12 +51,6 @@ def _parse_kwargs_to_model_config( # Create the ModelConfig object and return it with remaining kwargs model_config = ModelConfig(**model_config) - # TODO: to find a way to make this more elegant. - if "quantization_mode" in remaining_kwargs.keys(): - model_config.inference_engine_config.quantization.quantization_mode = remaining_kwargs[ - "quantization_mode"] - remaining_kwargs.pop("quantization_mode") - return model_config, remaining_kwargs diff --git a/mii/config.py b/mii/config.py index 490947f1..d082dbf0 100644 --- a/mii/config.py +++ b/mii/config.py @@ -131,6 +131,12 @@ class ModelConfig(MIIConfigModel): `inference_engine_config`. """ + quantization_mode: Optional[str] = None + """ + The quantization mode in string format. The supported modes are as follows: + - 'wf6af16', weight-only quantization with FP6 weight and FP16 activation. + """ + inference_engine_config: RaggedInferenceEngineConfig = {} """ DeepSpeed inference engine config. This is automatically generated, but you @@ -210,6 +216,12 @@ def propagate_tp_size(cls, values: Dict[str, Any]) -> Dict[str, Any]: values.get("inference_engine_config").tensor_parallel.tp_size = tensor_parallel return values + @root_validator + def propagate_quantization_mode(cls, values: Dict[str, Any]) -> Dict[str, Any]: + quantization_mode = values.get("quantization_mode") + values.get("inference_engine_config").quantization_mode = quantization_mode + return values + @root_validator def check_replica_config(cls, values: Dict[str, Any]) -> Dict[str, Any]: num_replica_config = len(values.get("replica_configs")) From 886e3e6494eac3fe4c7a3ff9616ad7475875f9a0 Mon Sep 17 00:00:00 2001 From: Michael Wyatt Date: Wed, 6 Mar 2024 16:52:12 -0800 Subject: [PATCH 4/6] Update config.py --- mii/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mii/config.py b/mii/config.py index d082dbf0..bb8edc45 100644 --- a/mii/config.py +++ b/mii/config.py @@ -219,7 +219,7 @@ def propagate_tp_size(cls, values: Dict[str, Any]) -> Dict[str, Any]: @root_validator def propagate_quantization_mode(cls, values: Dict[str, Any]) -> Dict[str, Any]: quantization_mode = values.get("quantization_mode") - values.get("inference_engine_config").quantization_mode = quantization_mode + values.get("inference_engine_config").quantization.quantization_mode = quantization_mode return values @root_validator From 6628d0c0a36a2e8b6f48cbb4f76fd4fe31bba9d3 Mon Sep 17 00:00:00 2001 From: "ZHENG, Zhen" Date: Thu, 7 Mar 2024 01:21:23 +0000 Subject: [PATCH 5/6] Fix format problem. --- mii/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mii/config.py b/mii/config.py index bb8edc45..8e9c5cd7 100644 --- a/mii/config.py +++ b/mii/config.py @@ -219,7 +219,8 @@ def propagate_tp_size(cls, values: Dict[str, Any]) -> Dict[str, Any]: @root_validator def propagate_quantization_mode(cls, values: Dict[str, Any]) -> Dict[str, Any]: quantization_mode = values.get("quantization_mode") - values.get("inference_engine_config").quantization.quantization_mode = quantization_mode + values.get( + "inference_engine_config").quantization.quantization_mode = quantization_mode return values @root_validator From c379cdb191f9b098d5393d5dd9ff9649b09999bd Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Thu, 7 Mar 2024 17:14:39 -0800 Subject: [PATCH 6/6] Update DeepSpeed in the requirements --- requirements/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index f067ee09..019fc261 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ asyncio -deepspeed>=0.13.0 +deepspeed>=0.14.0 deepspeed-kernels Flask-RESTful grpcio