From f72ab40d1539e38c169fe08b34129923d65398dd Mon Sep 17 00:00:00 2001 From: Ricardo Mendes Date: Mon, 4 Nov 2019 21:35:51 -0300 Subject: [PATCH] feat(datacatalog): add sample to create a fileset entry (#9590) Fixes #9589 --- datacatalog/samples/tests/conftest.py | 26 ++++++ .../tests/test_create_fileset_entry.py | 30 +++++++ .../samples/v1beta1/create_fileset_entry.py | 86 +++++++++++++++++++ 3 files changed, 142 insertions(+) create mode 100644 datacatalog/samples/tests/test_create_fileset_entry.py create mode 100644 datacatalog/samples/v1beta1/create_fileset_entry.py diff --git a/datacatalog/samples/tests/conftest.py b/datacatalog/samples/tests/conftest.py index b147413db588..b0669fa0df28 100644 --- a/datacatalog/samples/tests/conftest.py +++ b/datacatalog/samples/tests/conftest.py @@ -53,3 +53,29 @@ def random_entry_group_id(client, project_id): project_id, "us-central1", random_entry_group_id ) client.delete_entry_group(entry_group_name) + + +@pytest.fixture +def random_entry_name(client, entry_group_name): + now = datetime.datetime.now() + random_entry_id = "example_entry_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + random_entry_name = "{}/entries/{}".format(entry_group_name, random_entry_id) + yield random_entry_name + client.delete_entry(random_entry_name) + + +@pytest.fixture +def entry_group_name(client, project_id): + now = datetime.datetime.now() + entry_group_id = "python_entry_group_sample_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + entry_group = client.create_entry_group( + datacatalog_v1beta1.DataCatalogClient.location_path(project_id, "us-central1"), + entry_group_id, + {}, + ) + yield entry_group.name + client.delete_entry_group(entry_group.name) diff --git a/datacatalog/samples/tests/test_create_fileset_entry.py b/datacatalog/samples/tests/test_create_fileset_entry.py new file mode 100644 index 000000000000..8d0bc28fd07f --- /dev/null +++ b/datacatalog/samples/tests/test_create_fileset_entry.py @@ -0,0 +1,30 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import re + +from ..v1beta1 import create_fileset_entry + + +def test_create_fileset_entry(capsys, client, random_entry_name): + + entry_name_pattern = "(?P.+?)/entries/(?P.+?$)" + entry_name_matches = re.match(entry_name_pattern, random_entry_name) + entry_group_name = entry_name_matches.group("entry_group_name") + entry_id = entry_name_matches.group("entry_id") + + create_fileset_entry.create_fileset_entry(client, entry_group_name, entry_id) + out, err = capsys.readouterr() + assert "Created entry {}".format(random_entry_name) in out diff --git a/datacatalog/samples/v1beta1/create_fileset_entry.py b/datacatalog/samples/v1beta1/create_fileset_entry.py new file mode 100644 index 000000000000..6cc275655988 --- /dev/null +++ b/datacatalog/samples/v1beta1/create_fileset_entry.py @@ -0,0 +1,86 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_fileset_entry(client, entry_group_name, entry_id): + + # [START datacatalog_create_fileset_tag] + from google.cloud import datacatalog_v1beta1 + + # TODO(developer): Construct a Data Catalog client object. + # client = datacatalog_v1beta1.DataCatalogClient() + + # TODO(developer): Set entry_group_name to the Name of the entry group + # the entry will belong. + # entry_group_name = "your_entry_group_name" + + # TODO(developer): Set entry_id to the ID of the entry to create. + # entry_id = "your_entry_id" + + # Construct a full Entry object to send to the API. + entry = datacatalog_v1beta1.types.Entry() + entry.display_name = "My Fileset" + entry.description = "This Fileset consists of ..." + entry.gcs_fileset_spec.file_patterns.append("gs://my_bucket/*") + entry.type = datacatalog_v1beta1.enums.EntryType.FILESET + + # Create the Schema, for example when you have a csv file. + columns = [] + columns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="first_name", + description="First name", + mode="REQUIRED", + type="STRING", + ) + ) + + columns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="last_name", description="Last name", mode="REQUIRED", type="STRING" + ) + ) + + # Create sub columns for the addresses parent column + subcolumns = [] + subcolumns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="city", description="City", mode="NULLABLE", type="STRING" + ) + ) + + subcolumns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="state", description="State", mode="NULLABLE", type="STRING" + ) + ) + + columns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="addresses", + description="Addresses", + mode="REPEATED", + subcolumns=subcolumns, + type="RECORD", + ) + ) + + entry.schema.columns.extend(columns) + + # Send the entry to the API for creation. + # Raises google.api_core.exceptions.AlreadyExists if the Entry already + # exists within the project. + entry = client.create_entry(entry_group_name, entry_id, entry) + print("Created entry {}".format(entry.name)) + # [END datacatalog_create_fileset_tag]