From d6123cc591876d322016b0753989a585b9a48dd7 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Thu, 15 Aug 2024 10:56:22 +0100 Subject: [PATCH 1/5] Upgrade PyTorch to 2.3.1 --- eland/cli/eland_import_hub_model.py | 7 ++++--- noxfile.py | 2 +- setup.py | 6 +++--- tests/ml/pytorch/test_pytorch_model_config_pytest.py | 4 ++-- tests/ml/pytorch/test_pytorch_model_upload_pytest.py | 4 ++-- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/eland/cli/eland_import_hub_model.py b/eland/cli/eland_import_hub_model.py index 4ca85447..79fbf74c 100755 --- a/eland/cli/eland_import_hub_model.py +++ b/eland/cli/eland_import_hub_model.py @@ -230,6 +230,7 @@ def check_cluster_version(es_client, logger): sem_ver = parse_es_version(es_info["version"]["number"]) major_version = sem_ver[0] minor_version = sem_ver[1] + patch_version = sem_ver[2] # NLP models added in 8 if major_version < 8: @@ -238,13 +239,13 @@ def check_cluster_version(es_client, logger): ) exit(1) - # PyTorch was upgraded to version 2.1.2 in 8.13 + # PyTorch was upgraded to version 2.3.1 in 8.15.1 # and is incompatible with earlier versions - if major_version == 8 and minor_version < 13: + if major_version == 8 and minor_version < 15 and patch_version < 1: import torch logger.error( - f"Eland uses PyTorch version {torch.__version__} which is incompatible with Elasticsearch versions prior to 8.13. Please upgrade Elasticsearch to at least version 8.13" + f"Eland uses PyTorch version {torch.__version__} which is incompatible with Elasticsearch versions prior to 8.15.1. Please upgrade Elasticsearch to at least version 8.15.1" ) exit(1) diff --git a/noxfile.py b/noxfile.py index a60950ec..e8a57191 100644 --- a/noxfile.py +++ b/noxfile.py @@ -121,7 +121,7 @@ def test(session, pandas_version: str): "--nbval", ) - # PyTorch 2.1.2 doesn't support Python 3.12 + # PyTorch 2.3.1 doesn't support Python 3.12 if session.python == "3.12": pytest_args += ("--ignore=eland/ml/pytorch",) session.run( diff --git a/setup.py b/setup.py index 1767ea32..01405b7b 100644 --- a/setup.py +++ b/setup.py @@ -60,10 +60,10 @@ "lightgbm": ["lightgbm>=2,<4"], "pytorch": [ "requests<3", - "torch==2.1.2", + "torch==2.3.1", "tqdm", - "sentence-transformers>=2.1.0,<=2.3.1", - "transformers[torch]>=4.31.0,<4.36.0", + "sentence-transformers>=2.1.0,<=2.7.0", + "transformers[torch]>=4.31.0,<4.44.0", ], } extras["all"] = list({dep for deps in extras.values() for dep in deps}) diff --git a/tests/ml/pytorch/test_pytorch_model_config_pytest.py b/tests/ml/pytorch/test_pytorch_model_config_pytest.py index 50ea4aa9..7a297368 100644 --- a/tests/ml/pytorch/test_pytorch_model_config_pytest.py +++ b/tests/ml/pytorch/test_pytorch_model_config_pytest.py @@ -58,8 +58,8 @@ pytestmark = [ pytest.mark.skipif( - ES_VERSION < (8, 13, 0), - reason="Eland uses Pytorch 2.1.2, versions of Elasticsearch prior to 8.13.0 are incompatible with PyTorch 2.1.2", + ES_VERSION < (8, 15, 1), + reason="Eland uses Pytorch 2.3.1, versions of Elasticsearch prior to 8.15.1 are incompatible with PyTorch 2.3.1", ), pytest.mark.skipif( not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run" diff --git a/tests/ml/pytorch/test_pytorch_model_upload_pytest.py b/tests/ml/pytorch/test_pytorch_model_upload_pytest.py index 7eac6a8d..a86fafae 100644 --- a/tests/ml/pytorch/test_pytorch_model_upload_pytest.py +++ b/tests/ml/pytorch/test_pytorch_model_upload_pytest.py @@ -39,8 +39,8 @@ pytestmark = [ pytest.mark.skipif( - ES_VERSION < (8, 13, 0), - reason="Eland uses Pytorch 2.1.2, versions of Elasticsearch prior to 8.13.0 are incompatible with PyTorch 2.1.2", + ES_VERSION < (8, 15, 1), + reason="Eland uses Pytorch 2.3.1, versions of Elasticsearch prior to 8.15.1 are incompatible with PyTorch 2.3.1", ), pytest.mark.skipif( not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run" From 91192a131609e96d9d6266404d97e439d47be0b3 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Thu, 15 Aug 2024 10:56:32 +0100 Subject: [PATCH 2/5] unmute test --- tests/ml/pytorch/test_pytorch_model_config_pytest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ml/pytorch/test_pytorch_model_config_pytest.py b/tests/ml/pytorch/test_pytorch_model_config_pytest.py index 7a297368..28c869be 100644 --- a/tests/ml/pytorch/test_pytorch_model_config_pytest.py +++ b/tests/ml/pytorch/test_pytorch_model_config_pytest.py @@ -163,7 +163,6 @@ class TestModelConfguration: - @pytest.mark.skip(reason="https://github.com/elastic/eland/issues/633") @pytest.mark.parametrize( "model_id,task_type,config_type,tokenizer_type,max_sequence_len,embedding_size", MODEL_CONFIGURATIONS, From f73fe37e245b46614d8aeaa5f0c0f4f5bf0fd656 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Wed, 4 Sep 2024 09:24:50 +0100 Subject: [PATCH 3/5] Fix sem ver check, update to 8.15.2 --- eland/cli/eland_import_hub_model.py | 8 +++----- tests/ml/pytorch/test_pytorch_model_upload_pytest.py | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/eland/cli/eland_import_hub_model.py b/eland/cli/eland_import_hub_model.py index 79fbf74c..9496e91e 100755 --- a/eland/cli/eland_import_hub_model.py +++ b/eland/cli/eland_import_hub_model.py @@ -229,8 +229,6 @@ def check_cluster_version(es_client, logger): sem_ver = parse_es_version(es_info["version"]["number"]) major_version = sem_ver[0] - minor_version = sem_ver[1] - patch_version = sem_ver[2] # NLP models added in 8 if major_version < 8: @@ -239,13 +237,13 @@ def check_cluster_version(es_client, logger): ) exit(1) - # PyTorch was upgraded to version 2.3.1 in 8.15.1 + # PyTorch was upgraded to version 2.3.1 in 8.15.2 # and is incompatible with earlier versions - if major_version == 8 and minor_version < 15 and patch_version < 1: + if sem_ver < (8, 15, 2): import torch logger.error( - f"Eland uses PyTorch version {torch.__version__} which is incompatible with Elasticsearch versions prior to 8.15.1. Please upgrade Elasticsearch to at least version 8.15.1" + f"Eland uses PyTorch version {torch.__version__} which is incompatible with Elasticsearch versions prior to 8.15.2. Please upgrade Elasticsearch to at least version 8.15.2" ) exit(1) diff --git a/tests/ml/pytorch/test_pytorch_model_upload_pytest.py b/tests/ml/pytorch/test_pytorch_model_upload_pytest.py index a86fafae..c84a77e0 100644 --- a/tests/ml/pytorch/test_pytorch_model_upload_pytest.py +++ b/tests/ml/pytorch/test_pytorch_model_upload_pytest.py @@ -39,8 +39,8 @@ pytestmark = [ pytest.mark.skipif( - ES_VERSION < (8, 15, 1), - reason="Eland uses Pytorch 2.3.1, versions of Elasticsearch prior to 8.15.1 are incompatible with PyTorch 2.3.1", + ES_VERSION < (8, 15, 2), + reason="Eland uses Pytorch 2.3.1, versions of Elasticsearch prior to 8.15.2 are incompatible with PyTorch 2.3.1", ), pytest.mark.skipif( not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run" From c1c46f3f42fcec40e713ce7a984b47eb4aeb0e29 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Sun, 29 Sep 2024 10:47:29 +0100 Subject: [PATCH 4/5] Add sentencepiece to the install requriements For the slow tokenizer --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 01405b7b..1befe7d0 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,9 @@ "torch==2.3.1", "tqdm", "sentence-transformers>=2.1.0,<=2.7.0", - "transformers[torch]>=4.31.0,<4.44.0", + # sentencepiece is a required dependency for the slow tokenizers + # https://huggingface.co/transformers/v4.4.2/migration.html#sentencepiece-is-removed-from-the-required-dependencies + "transformers[sentencepiece]>=4.31.0,<4.44.0", ], } extras["all"] = list({dep for deps in extras.values() for dep in deps}) From 28a6464bb838401583d9802174787de7751b1307 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Sun, 29 Sep 2024 10:49:59 +0100 Subject: [PATCH 5/5] fix one test skip another --- eland/ml/pytorch/transformers.py | 4 ++++ tests/ml/pytorch/test_pytorch_model_config_pytest.py | 8 -------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/eland/ml/pytorch/transformers.py b/eland/ml/pytorch/transformers.py index ab89e55b..271a2431 100644 --- a/eland/ml/pytorch/transformers.py +++ b/eland/ml/pytorch/transformers.py @@ -36,6 +36,7 @@ AutoConfig, AutoModel, AutoModelForQuestionAnswering, + BertTokenizer, PretrainedConfig, PreTrainedModel, PreTrainedTokenizer, @@ -757,6 +758,9 @@ def _find_max_sequence_length(self) -> int: if max_len is not None and max_len < REASONABLE_MAX_LENGTH: return int(max_len) + if isinstance(self._tokenizer, BertTokenizer): + return 512 + raise UnknownModelInputSizeError("Cannot determine model max input length") def _create_config( diff --git a/tests/ml/pytorch/test_pytorch_model_config_pytest.py b/tests/ml/pytorch/test_pytorch_model_config_pytest.py index 28c869be..c12be3a8 100644 --- a/tests/ml/pytorch/test_pytorch_model_config_pytest.py +++ b/tests/ml/pytorch/test_pytorch_model_config_pytest.py @@ -149,14 +149,6 @@ 1024, None, ), - ( - "cardiffnlp/twitter-roberta-base-sentiment", - "text_classification", - TextClassificationInferenceOptions, - NlpRobertaTokenizationConfig, - 512, - None, - ), ] else: MODEL_CONFIGURATIONS = []