From 2d9ce3020edc8aab7972860176154536a048e2ed Mon Sep 17 00:00:00 2001
From: Noah Negrey <nnegrey@users.noreply.github.com>
Date: Tue, 14 Jan 2020 10:37:07 -0700
Subject: [PATCH] automl: add natural language entity extraction ga samples
 [(#2676)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2676)

* automl: add natural language entity extraction ga samples

* Update language_entity_extraction_predict_test.py

* Update language_entity_extraction_predict_test.py

* use centralized automl testing project and add comments that link to docs
---
 ...nguage_entity_extraction_create_dataset.py | 42 ++++++++++++++
 ...e_entity_extraction_create_dataset_test.py | 42 ++++++++++++++
 ...language_entity_extraction_create_model.py | 43 +++++++++++++++
 ...age_entity_extraction_create_model_test.py | 34 ++++++++++++
 .../language_entity_extraction_predict.py     | 55 +++++++++++++++++++
 ...language_entity_extraction_predict_test.py | 46 ++++++++++++++++
 6 files changed, 262 insertions(+)
 create mode 100644 samples/snippets/language_entity_extraction_create_dataset.py
 create mode 100644 samples/snippets/language_entity_extraction_create_dataset_test.py
 create mode 100644 samples/snippets/language_entity_extraction_create_model.py
 create mode 100644 samples/snippets/language_entity_extraction_create_model_test.py
 create mode 100644 samples/snippets/language_entity_extraction_predict.py
 create mode 100644 samples/snippets/language_entity_extraction_predict_test.py

diff --git a/samples/snippets/language_entity_extraction_create_dataset.py b/samples/snippets/language_entity_extraction_create_dataset.py
new file mode 100644
index 00000000..056ff22c
--- /dev/null
+++ b/samples/snippets/language_entity_extraction_create_dataset.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def create_dataset(project_id, display_name):
+    """Create a dataset."""
+    # [START automl_language_entity_extraction_create_dataset]
+    from google.cloud import automl
+
+    # TODO(developer): Uncomment and set the following variables
+    # project_id = "YOUR_PROJECT_ID"
+    # display_name = "YOUR_DATASET_NAME"
+
+    client = automl.AutoMlClient()
+
+    # A resource that represents Google Cloud Platform location.
+    project_location = client.location_path(project_id, "us-central1")
+    metadata = automl.types.TextExtractionDatasetMetadata()
+    dataset = automl.types.Dataset(
+        display_name=display_name, text_extraction_dataset_metadata=metadata
+    )
+
+    # Create a dataset with the dataset metadata in the region.
+    response = client.create_dataset(project_location, dataset)
+
+    created_dataset = response.result()
+
+    # Display the dataset information
+    print("Dataset name: {}".format(created_dataset.name))
+    print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))
+    # [END automl_language_entity_extraction_create_dataset]
diff --git a/samples/snippets/language_entity_extraction_create_dataset_test.py b/samples/snippets/language_entity_extraction_create_dataset_test.py
new file mode 100644
index 00000000..044a0d50
--- /dev/null
+++ b/samples/snippets/language_entity_extraction_create_dataset_test.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import os
+
+from google.cloud import automl
+
+import language_entity_extraction_create_dataset
+
+
+PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
+
+
+def test_entity_extraction_create_dataset(capsys):
+    # create dataset
+    dataset_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+    language_entity_extraction_create_dataset.create_dataset(
+        PROJECT_ID, dataset_name
+    )
+    out, _ = capsys.readouterr()
+    assert "Dataset id: " in out
+
+    # Delete the created dataset
+    dataset_id = out.splitlines()[1].split()[2]
+    client = automl.AutoMlClient()
+    dataset_full_id = client.dataset_path(
+        PROJECT_ID, "us-central1", dataset_id
+    )
+    response = client.delete_dataset(dataset_full_id)
+    response.result()
diff --git a/samples/snippets/language_entity_extraction_create_model.py b/samples/snippets/language_entity_extraction_create_model.py
new file mode 100644
index 00000000..5e0748dd
--- /dev/null
+++ b/samples/snippets/language_entity_extraction_create_model.py
@@ -0,0 +1,43 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def create_model(project_id, dataset_id, display_name):
+    """Create a model."""
+    # [START automl_language_entity_extraction_create_model]
+    from google.cloud import automl
+
+    # TODO(developer): Uncomment and set the following variables
+    # project_id = "YOUR_PROJECT_ID"
+    # dataset_id = "YOUR_DATASET_ID"
+    # display_name = "YOUR_MODEL_NAME"
+
+    client = automl.AutoMlClient()
+
+    # A resource that represents Google Cloud Platform location.
+    project_location = client.location_path(project_id, "us-central1")
+    # Leave model unset to use the default base model provided by Google
+    metadata = automl.types.TextExtractionModelMetadata()
+    model = automl.types.Model(
+        display_name=display_name,
+        dataset_id=dataset_id,
+        text_extraction_model_metadata=metadata,
+    )
+
+    # Create a model with the model metadata in the region.
+    response = client.create_model(project_location, model)
+
+    print("Training operation name: {}".format(response.operation.name))
+    print("Training started...")
+    # [END automl_language_entity_extraction_create_model]
diff --git a/samples/snippets/language_entity_extraction_create_model_test.py b/samples/snippets/language_entity_extraction_create_model_test.py
new file mode 100644
index 00000000..0ff74c89
--- /dev/null
+++ b/samples/snippets/language_entity_extraction_create_model_test.py
@@ -0,0 +1,34 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import language_entity_extraction_create_model
+
+PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
+DATASET_ID = "TEN0000000000000000000"
+
+
+def test_entity_extraction_create_model(capsys):
+    # As entity extraction does not let you cancel model creation, instead try
+    # to create a model from a nonexistent dataset, but other elements of the
+    # request were valid.
+    try:
+        language_entity_extraction_create_model.create_model(
+            PROJECT_ID, DATASET_ID, "classification_test_create_model"
+        )
+        out, _ = capsys.readouterr()
+        assert "Dataset does not exist." in out
+    except Exception as e:
+        assert "Dataset does not exist." in e.message
diff --git a/samples/snippets/language_entity_extraction_predict.py b/samples/snippets/language_entity_extraction_predict.py
new file mode 100644
index 00000000..40d7e89b
--- /dev/null
+++ b/samples/snippets/language_entity_extraction_predict.py
@@ -0,0 +1,55 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def predict(project_id, model_id, content):
+    """Predict."""
+    # [START automl_language_entity_extraction_predict]
+    from google.cloud import automl
+
+    # TODO(developer): Uncomment and set the following variables
+    # project_id = "YOUR_PROJECT_ID"
+    # model_id = "YOUR_MODEL_ID"
+    # content = "text to predict"
+
+    prediction_client = automl.PredictionServiceClient()
+
+    # Get the full path of the model.
+    model_full_id = prediction_client.model_path(
+        project_id, "us-central1", model_id
+    )
+
+    # Supported mime_types: 'text/plain', 'text/html'
+    # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#textsnippet
+    text_snippet = automl.types.TextSnippet(
+        content=content, mime_type="text/plain"
+    )
+    payload = automl.types.ExamplePayload(text_snippet=text_snippet)
+
+    response = prediction_client.predict(model_full_id, payload)
+
+    for annotation_payload in response.payload:
+        print(
+            "Text Extract Entity Types: {}".format(
+                annotation_payload.display_name
+            )
+        )
+        print(
+            "Text Score: {}".format(annotation_payload.text_extraction.score)
+        )
+        text_segment = annotation_payload.text_extraction.text_segment
+        print("Text Extract Entity Content: {}".format(text_segment.content))
+        print("Text Start Offset: {}".format(text_segment.start_offset))
+        print("Text End Offset: {}".format(text_segment.end_offset))
+    # [END automl_language_entity_extraction_predict]
diff --git a/samples/snippets/language_entity_extraction_predict_test.py b/samples/snippets/language_entity_extraction_predict_test.py
new file mode 100644
index 00000000..35dfddef
--- /dev/null
+++ b/samples/snippets/language_entity_extraction_predict_test.py
@@ -0,0 +1,46 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from google.cloud import automl
+import pytest
+
+import language_entity_extraction_predict
+
+PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
+MODEL_ID = os.environ["ENTITY_EXTRACTION_MODEL_ID"]
+
+
+@pytest.fixture(scope="function")
+def verify_model_state():
+    client = automl.AutoMlClient()
+    model_full_id = client.model_path(PROJECT_ID, "us-central1", MODEL_ID)
+
+    model = client.get_model(model_full_id)
+    if model.deployment_state == automl.enums.Model.DeploymentState.UNDEPLOYED:
+        # Deploy model if it is not deployed
+        response = client.deploy_model(model_full_id)
+        response.result()
+
+
+def test_predict(capsys, verify_model_state):
+    verify_model_state
+    text = (
+        "Constitutional mutations in the WT1 gene in patients with "
+        "Denys-Drash syndrome."
+    )
+    language_entity_extraction_predict.predict(PROJECT_ID, MODEL_ID, text)
+    out, _ = capsys.readouterr()
+    assert "Text Extract Entity Types: " in out