From 28f93713a7ff731c51385af2047f743c43163757 Mon Sep 17 00:00:00 2001 From: mesmacosta Date: Fri, 13 Dec 2019 16:55:51 -0300 Subject: [PATCH] feat(datacatalog): add sample for create a fileset entry quickstart --- datacatalog/samples/quickstart/__init__.py | 0 .../create_fileset_entry_quickstart.py | 115 ++++++++++++++++++ datacatalog/samples/tests/conftest.py | 13 ++ .../samples/tests/quickstart/__init__.py | 0 .../test_create_fileset_entry_quickstart.py | 40 ++++++ 5 files changed, 168 insertions(+) create mode 100644 datacatalog/samples/quickstart/__init__.py create mode 100644 datacatalog/samples/quickstart/create_fileset_entry_quickstart.py create mode 100644 datacatalog/samples/tests/quickstart/__init__.py create mode 100644 datacatalog/samples/tests/quickstart/test_create_fileset_entry_quickstart.py diff --git a/datacatalog/samples/quickstart/__init__.py b/datacatalog/samples/quickstart/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/datacatalog/samples/quickstart/create_fileset_entry_quickstart.py b/datacatalog/samples/quickstart/create_fileset_entry_quickstart.py new file mode 100644 index 000000000000..55b0af59e689 --- /dev/null +++ b/datacatalog/samples/quickstart/create_fileset_entry_quickstart.py @@ -0,0 +1,115 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_fileset_entry_quickstart(client, project_id, entry_group_id, entry_id): + + # [START datacatalog_create_fileset_quickstart_tag] + # Import required modules. + from google.cloud import datacatalog_v1beta1 + + # TODO(developer): Construct a Data Catalog client object. + # client = datacatalog_v1beta1.DataCatalogClient() + + # TODO(developer): Set project_id to your + # Google Cloud Platform project ID the entry will belong. + # project_id = "your-project-id" + + # TODO(developer): Specify the geographic location where the + # entry should reside. + # Currently, Data Catalog stores metadata in the us-central1 region. + location_id = "us-central1" + + # TODO(developer): Set entry_group_id to the ID of the entry group + # the entry will belong. + # entry_group_id = "your_entry_group_id" + + # TODO(developer): Set entry_id to the ID of the entry to create. + # entry_id = "your_entry_id" + + # Create an Entry Group. + # Construct a full Entry Group object to send to the API. + entry_group_obj = datacatalog_v1beta1.types.EntryGroup() + entry_group_obj.display_name = "My Fileset Entry Group" + entry_group_obj.description = "This Entry Group consists of ...." + + # Send the Entry Group to the API for creation. + # Raises google.api_core.exceptions.AlreadyExists if the Entry Group + # already exists within the project. + entry_group = client.create_entry_group( + parent=datacatalog_v1beta1.DataCatalogClient.location_path( + project_id, location_id + ), + entry_group_id=entry_group_id, + entry_group=entry_group_obj, + ) + print("Created entry group {}".format(entry_group.name)) + + # Create a Fileset Entry. + # Construct a full Entry object to send to the API. + entry = datacatalog_v1beta1.types.Entry() + entry.display_name = "My Fileset" + entry.description = "This Fileset consists of ..." + entry.gcs_fileset_spec.file_patterns.append("gs://cloud-samples-data/*") + entry.type = datacatalog_v1beta1.enums.EntryType.FILESET + + # Create the Schema, for example when you have a csv file. + columns = [] + columns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="first_name", + description="First name", + mode="REQUIRED", + type="STRING", + ) + ) + + columns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="last_name", description="Last name", mode="REQUIRED", type="STRING" + ) + ) + + # Create sub columns for the addresses parent column + subcolumns = [] + subcolumns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="city", description="City", mode="NULLABLE", type="STRING" + ) + ) + + subcolumns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="state", description="State", mode="NULLABLE", type="STRING" + ) + ) + + columns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="addresses", + description="Addresses", + mode="REPEATED", + subcolumns=subcolumns, + type="RECORD", + ) + ) + + entry.schema.columns.extend(columns) + + # Send the entry to the API for creation. + # Raises google.api_core.exceptions.AlreadyExists if the Entry already + # exists within the project. + entry = client.create_entry(entry_group.name, entry_id, entry) + print("Created entry {}".format(entry.name)) + # [END datacatalog_create_fileset_quickstart_tag] diff --git a/datacatalog/samples/tests/conftest.py b/datacatalog/samples/tests/conftest.py index b0669fa0df28..75e6753ff446 100644 --- a/datacatalog/samples/tests/conftest.py +++ b/datacatalog/samples/tests/conftest.py @@ -42,6 +42,19 @@ def project_id(default_credentials): return default_credentials[1] +@pytest.fixture +def random_entry_id(client, project_id, random_entry_group_id): + now = datetime.datetime.now() + random_entry_id = "example_entry_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + yield random_entry_id + entry_name = datacatalog_v1beta1.DataCatalogClient.entry_path( + project_id, "us-central1", random_entry_group_id, random_entry_id + ) + client.delete_entry(entry_name) + + @pytest.fixture def random_entry_group_id(client, project_id): now = datetime.datetime.now() diff --git a/datacatalog/samples/tests/quickstart/__init__.py b/datacatalog/samples/tests/quickstart/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/datacatalog/samples/tests/quickstart/test_create_fileset_entry_quickstart.py b/datacatalog/samples/tests/quickstart/test_create_fileset_entry_quickstart.py new file mode 100644 index 000000000000..769d034fac4a --- /dev/null +++ b/datacatalog/samples/tests/quickstart/test_create_fileset_entry_quickstart.py @@ -0,0 +1,40 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import datacatalog_v1beta1 + +from ...quickstart import create_fileset_entry_quickstart + + +def test_create_fileset_entry_quickstart( + capsys, client, project_id, random_entry_group_id, random_entry_id +): + + create_fileset_entry_quickstart.create_fileset_entry_quickstart( + client, project_id, random_entry_group_id, random_entry_id + ) + out, err = capsys.readouterr() + assert ( + "Created entry group" + " projects/{}/locations/{}/entryGroups/{}".format( + project_id, "us-central1", random_entry_group_id + ) + in out + ) + + expected_entry_name = datacatalog_v1beta1.DataCatalogClient.entry_path( + project_id, "us-central1", random_entry_group_id, random_entry_id + ) + + assert "Created entry {}".format(expected_entry_name) in out