From 2cfda6d67bde15aae5df72f58c7063b69bf4e2ab Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Fri, 26 Apr 2024 22:20:46 +0100 Subject: [PATCH] Fix import for HuggingFace Dataset Provider (#2085) Signed-off-by: Andrey Velichkevich --- .../language-modeling/train_api_hf_dataset.ipynb | 4 ++-- .../language-modeling/train_api_s3_dataset.ipynb | 16 +++++++--------- .../text-classification/Fine-Tune-BERT-LLM.ipynb | 4 ++-- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/examples/pytorch/language-modeling/train_api_hf_dataset.ipynb b/examples/pytorch/language-modeling/train_api_hf_dataset.ipynb index 2b97218718..c5869196ca 100644 --- a/examples/pytorch/language-modeling/train_api_hf_dataset.ipynb +++ b/examples/pytorch/language-modeling/train_api_hf_dataset.ipynb @@ -22,7 +22,7 @@ "from kubeflow.storage_initializer.hugging_face import (\n", " HuggingFaceModelParams,\n", " HuggingFaceTrainerParams,\n", - " HfDatasetParams,\n", + " HuggingFaceDatasetParams,\n", ")\n", "from kubeflow.storage_initializer.constants import INIT_CONTAINER_MOUNT_PATH\n", "from peft import LoraConfig\n", @@ -70,7 +70,7 @@ " ),\n", " # it is assumed for text related tasks, you have 'text' column in the dataset.\n", " # for more info on how dataset is loaded check load_and_preprocess_data function in sdk/python/kubeflow/trainer/hf_llm_training.py\n", - " dataset_provider_parameters=HfDatasetParams(repo_id=\"imdatta0/ultrachat_1k\"),\n", + " dataset_provider_parameters=HuggingFaceDatasetParams(repo_id=\"imdatta0/ultrachat_1k\"),\n", " trainer_parameters=HuggingFaceTrainerParams(\n", " lora_config=LoraConfig(\n", " r=8,\n", diff --git a/examples/pytorch/language-modeling/train_api_s3_dataset.ipynb b/examples/pytorch/language-modeling/train_api_s3_dataset.ipynb index d9d8d2a842..332ba38c3e 100644 --- a/examples/pytorch/language-modeling/train_api_s3_dataset.ipynb +++ b/examples/pytorch/language-modeling/train_api_s3_dataset.ipynb @@ -21,8 +21,8 @@ "from kubeflow.storage_initializer.hugging_face import (\n", " HuggingFaceModelParams,\n", " HuggingFaceTrainerParams,\n", - " HfDatasetParams,\n", ")\n", + "from kubeflow.storage_initializer.s3 import S3DatasetParams\n", "from kubeflow.storage_initializer.constants import INIT_CONTAINER_MOUNT_PATH\n", "from peft import LoraConfig\n", "import transformers\n", @@ -81,14 +81,12 @@ " # it is assumed for text related tasks, you have 'text' column in the dataset.\n", " # for more info on how dataset is loaded check load_and_preprocess_data function in sdk/python/kubeflow/trainer/hf_llm_training.py\n", " dataset_provider_parameters=S3DatasetParams(\n", - " {\n", - " \"endpoint_url\": \"http://10.117.63.3\",\n", - " \"bucket_name\": \"test\",\n", - " \"file_key\": \"imdatta0___ultrachat_1k\",\n", - " \"region_name\": \"us-east-1\",\n", - " \"access_key\": s3_access_key,\n", - " \"secret_key\": s3_secret_key,\n", - " }\n", + " endpoint_url=\"http://10.117.63.3\",\n", + " bucket_name=\"test\",\n", + " file_key=\"imdatta0___ultrachat_1k\",\n", + " region_name=\"us-east-1\",\n", + " access_key=s3_access_key,\n", + " secret_key=s3_secret_key,\n", " ),\n", " trainer_parameters=HuggingFaceTrainerParams(\n", " lora_config=LoraConfig(\n", diff --git a/examples/pytorch/text-classification/Fine-Tune-BERT-LLM.ipynb b/examples/pytorch/text-classification/Fine-Tune-BERT-LLM.ipynb index e28975a6f1..d5afd1910c 100644 --- a/examples/pytorch/text-classification/Fine-Tune-BERT-LLM.ipynb +++ b/examples/pytorch/text-classification/Fine-Tune-BERT-LLM.ipynb @@ -614,7 +614,7 @@ "from kubeflow.storage_initializer.hugging_face import (\n", " HuggingFaceModelParams,\n", " HuggingFaceTrainerParams,\n", - " HfDatasetParams,\n", + " HuggingFaceDatasetParams,\n", ")\n", "\n", "import transformers\n", @@ -646,7 +646,7 @@ " \"access_modes\": [\"ReadWriteOnce\"] # Since we use 1 Worker, PVC access mode is ReadWriteOnce.\n", " },\n", " # Use 3000 samples from Yelp dataset.\n", - " dataset_provider_parameters=HfDatasetParams(\n", + " dataset_provider_parameters=HuggingFaceDatasetParams(\n", " repo_id=\"yelp_review_full\",\n", " split=\"train[:3000]\",\n", " ),\n",