From 2d9ce3020edc8aab7972860176154536a048e2ed Mon Sep 17 00:00:00 2001 From: Noah Negrey Date: Tue, 14 Jan 2020 10:37:07 -0700 Subject: [PATCH] automl: add natural language entity extraction ga samples [(#2676)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2676) * automl: add natural language entity extraction ga samples * Update language_entity_extraction_predict_test.py * Update language_entity_extraction_predict_test.py * use centralized automl testing project and add comments that link to docs --- ...nguage_entity_extraction_create_dataset.py | 42 ++++++++++++++ ...e_entity_extraction_create_dataset_test.py | 42 ++++++++++++++ ...language_entity_extraction_create_model.py | 43 +++++++++++++++ ...age_entity_extraction_create_model_test.py | 34 ++++++++++++ .../language_entity_extraction_predict.py | 55 +++++++++++++++++++ ...language_entity_extraction_predict_test.py | 46 ++++++++++++++++ 6 files changed, 262 insertions(+) create mode 100644 samples/snippets/language_entity_extraction_create_dataset.py create mode 100644 samples/snippets/language_entity_extraction_create_dataset_test.py create mode 100644 samples/snippets/language_entity_extraction_create_model.py create mode 100644 samples/snippets/language_entity_extraction_create_model_test.py create mode 100644 samples/snippets/language_entity_extraction_predict.py create mode 100644 samples/snippets/language_entity_extraction_predict_test.py diff --git a/samples/snippets/language_entity_extraction_create_dataset.py b/samples/snippets/language_entity_extraction_create_dataset.py new file mode 100644 index 00000000..056ff22c --- /dev/null +++ b/samples/snippets/language_entity_extraction_create_dataset.py @@ -0,0 +1,42 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_dataset(project_id, display_name): + """Create a dataset.""" + # [START automl_language_entity_extraction_create_dataset] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = "YOUR_PROJECT_ID" + # display_name = "YOUR_DATASET_NAME" + + client = automl.AutoMlClient() + + # A resource that represents Google Cloud Platform location. + project_location = client.location_path(project_id, "us-central1") + metadata = automl.types.TextExtractionDatasetMetadata() + dataset = automl.types.Dataset( + display_name=display_name, text_extraction_dataset_metadata=metadata + ) + + # Create a dataset with the dataset metadata in the region. + response = client.create_dataset(project_location, dataset) + + created_dataset = response.result() + + # Display the dataset information + print("Dataset name: {}".format(created_dataset.name)) + print("Dataset id: {}".format(created_dataset.name.split("/")[-1])) + # [END automl_language_entity_extraction_create_dataset] diff --git a/samples/snippets/language_entity_extraction_create_dataset_test.py b/samples/snippets/language_entity_extraction_create_dataset_test.py new file mode 100644 index 00000000..044a0d50 --- /dev/null +++ b/samples/snippets/language_entity_extraction_create_dataset_test.py @@ -0,0 +1,42 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import os + +from google.cloud import automl + +import language_entity_extraction_create_dataset + + +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] + + +def test_entity_extraction_create_dataset(capsys): + # create dataset + dataset_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") + language_entity_extraction_create_dataset.create_dataset( + PROJECT_ID, dataset_name + ) + out, _ = capsys.readouterr() + assert "Dataset id: " in out + + # Delete the created dataset + dataset_id = out.splitlines()[1].split()[2] + client = automl.AutoMlClient() + dataset_full_id = client.dataset_path( + PROJECT_ID, "us-central1", dataset_id + ) + response = client.delete_dataset(dataset_full_id) + response.result() diff --git a/samples/snippets/language_entity_extraction_create_model.py b/samples/snippets/language_entity_extraction_create_model.py new file mode 100644 index 00000000..5e0748dd --- /dev/null +++ b/samples/snippets/language_entity_extraction_create_model.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_model(project_id, dataset_id, display_name): + """Create a model.""" + # [START automl_language_entity_extraction_create_model] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = "YOUR_PROJECT_ID" + # dataset_id = "YOUR_DATASET_ID" + # display_name = "YOUR_MODEL_NAME" + + client = automl.AutoMlClient() + + # A resource that represents Google Cloud Platform location. + project_location = client.location_path(project_id, "us-central1") + # Leave model unset to use the default base model provided by Google + metadata = automl.types.TextExtractionModelMetadata() + model = automl.types.Model( + display_name=display_name, + dataset_id=dataset_id, + text_extraction_model_metadata=metadata, + ) + + # Create a model with the model metadata in the region. + response = client.create_model(project_location, model) + + print("Training operation name: {}".format(response.operation.name)) + print("Training started...") + # [END automl_language_entity_extraction_create_model] diff --git a/samples/snippets/language_entity_extraction_create_model_test.py b/samples/snippets/language_entity_extraction_create_model_test.py new file mode 100644 index 00000000..0ff74c89 --- /dev/null +++ b/samples/snippets/language_entity_extraction_create_model_test.py @@ -0,0 +1,34 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import language_entity_extraction_create_model + +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] +DATASET_ID = "TEN0000000000000000000" + + +def test_entity_extraction_create_model(capsys): + # As entity extraction does not let you cancel model creation, instead try + # to create a model from a nonexistent dataset, but other elements of the + # request were valid. + try: + language_entity_extraction_create_model.create_model( + PROJECT_ID, DATASET_ID, "classification_test_create_model" + ) + out, _ = capsys.readouterr() + assert "Dataset does not exist." in out + except Exception as e: + assert "Dataset does not exist." in e.message diff --git a/samples/snippets/language_entity_extraction_predict.py b/samples/snippets/language_entity_extraction_predict.py new file mode 100644 index 00000000..40d7e89b --- /dev/null +++ b/samples/snippets/language_entity_extraction_predict.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def predict(project_id, model_id, content): + """Predict.""" + # [START automl_language_entity_extraction_predict] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = "YOUR_PROJECT_ID" + # model_id = "YOUR_MODEL_ID" + # content = "text to predict" + + prediction_client = automl.PredictionServiceClient() + + # Get the full path of the model. + model_full_id = prediction_client.model_path( + project_id, "us-central1", model_id + ) + + # Supported mime_types: 'text/plain', 'text/html' + # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#textsnippet + text_snippet = automl.types.TextSnippet( + content=content, mime_type="text/plain" + ) + payload = automl.types.ExamplePayload(text_snippet=text_snippet) + + response = prediction_client.predict(model_full_id, payload) + + for annotation_payload in response.payload: + print( + "Text Extract Entity Types: {}".format( + annotation_payload.display_name + ) + ) + print( + "Text Score: {}".format(annotation_payload.text_extraction.score) + ) + text_segment = annotation_payload.text_extraction.text_segment + print("Text Extract Entity Content: {}".format(text_segment.content)) + print("Text Start Offset: {}".format(text_segment.start_offset)) + print("Text End Offset: {}".format(text_segment.end_offset)) + # [END automl_language_entity_extraction_predict] diff --git a/samples/snippets/language_entity_extraction_predict_test.py b/samples/snippets/language_entity_extraction_predict_test.py new file mode 100644 index 00000000..35dfddef --- /dev/null +++ b/samples/snippets/language_entity_extraction_predict_test.py @@ -0,0 +1,46 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from google.cloud import automl +import pytest + +import language_entity_extraction_predict + +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] +MODEL_ID = os.environ["ENTITY_EXTRACTION_MODEL_ID"] + + +@pytest.fixture(scope="function") +def verify_model_state(): + client = automl.AutoMlClient() + model_full_id = client.model_path(PROJECT_ID, "us-central1", MODEL_ID) + + model = client.get_model(model_full_id) + if model.deployment_state == automl.enums.Model.DeploymentState.UNDEPLOYED: + # Deploy model if it is not deployed + response = client.deploy_model(model_full_id) + response.result() + + +def test_predict(capsys, verify_model_state): + verify_model_state + text = ( + "Constitutional mutations in the WT1 gene in patients with " + "Denys-Drash syndrome." + ) + language_entity_extraction_predict.predict(PROJECT_ID, MODEL_ID, text) + out, _ = capsys.readouterr() + assert "Text Extract Entity Types: " in out