diff --git a/bigquery/google/cloud/bigquery/_helpers.py b/bigquery/google/cloud/bigquery/_helpers.py index bb3998732a5a..eb5161c9fe71 100644 --- a/bigquery/google/cloud/bigquery/_helpers.py +++ b/bigquery/google/cloud/bigquery/_helpers.py @@ -18,6 +18,7 @@ import copy import datetime import decimal +import re from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -29,6 +30,12 @@ _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" +_PROJECT_PREFIX_PATTERN = re.compile( + r""" + (?P\S+\:[^.]+)\.(?P[^.]+)(?:$|\.(?P[^.]+)$) +""", + re.VERBOSE, +) def _not_null(value, field): @@ -586,24 +593,42 @@ def _str_or_none(value): return str(value) +def _split_id(full_id): + """Helper: split full_id into composite parts. + + Args: + full_id (str): Fully-qualified ID in standard SQL format. + + Returns: + List[str]: ID's parts separated into components. + """ + with_prefix = _PROJECT_PREFIX_PATTERN.match(full_id) + if with_prefix is None: + parts = full_id.split(".") + else: + parts = with_prefix.groups() + parts = [part for part in parts if part] + return parts + + def _parse_3_part_id(full_id, default_project=None, property_name="table_id"): output_project_id = default_project output_dataset_id = None output_resource_id = None - parts = full_id.split(".") + parts = _split_id(full_id) if len(parts) != 2 and len(parts) != 3: raise ValueError( "{property_name} must be a fully-qualified ID in " - 'standard SQL format. e.g. "project.dataset.{property_name}", ' + 'standard SQL format, e.g., "project.dataset.{property_name}", ' "got {}".format(full_id, property_name=property_name) ) if len(parts) == 2 and not default_project: raise ValueError( "When default_project is not set, {property_name} must be a " - "fully-qualified ID in standard SQL format. " - 'e.g. "project.dataset_id.{property_name}", got {}'.format( + "fully-qualified ID in standard SQL format, " + 'e.g., "project.dataset_id.{property_name}", got {}'.format( full_id, property_name=property_name ) ) diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 494c219d4f67..67a7353f94e7 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -18,7 +18,6 @@ import six import copy -import re import google.cloud._helpers from google.cloud.bigquery import _helpers @@ -27,14 +26,6 @@ from google.cloud.bigquery.table import TableReference -_PROJECT_PREFIX_PATTERN = re.compile( - r""" - (?P\S+\:[^.]+)\.(?P[^.]+)$ -""", - re.VERBOSE, -) - - def _get_table_reference(self, table_id): """Constructs a TableReference. @@ -299,13 +290,7 @@ def from_string(cls, dataset_id, default_project=None): """ output_dataset_id = dataset_id output_project_id = default_project - with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id) - if with_prefix is None: - parts = dataset_id.split(".") - else: - project_id = with_prefix.group("project_id") - dataset_id = with_prefix.group("dataset_id") - parts = [project_id, dataset_id] + parts = _helpers._split_id(dataset_id) if len(parts) == 1 and not default_project: raise ValueError( diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index 8ba7fee892e5..2f72dd3b542e 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -215,11 +215,23 @@ def test_from_string(self): self.assertEqual(got.dataset_id, "string_dataset") self.assertEqual(got.table_id, "string_table") + def test_from_string_w_prefix(self): + cls = self._get_target_class() + got = cls.from_string("google.com:string-project.string_dataset.string_table") + self.assertEqual(got.project, "google.com:string-project") + self.assertEqual(got.dataset_id, "string_dataset") + self.assertEqual(got.table_id, "string_table") + def test_from_string_legacy_string(self): cls = self._get_target_class() with self.assertRaises(ValueError): cls.from_string("string-project:string_dataset.string_table") + def test_from_string_w_incorrect_prefix(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string("google.com.string-project.string_dataset.string_table") + def test_from_string_not_fully_qualified(self): cls = self._get_target_class() with self.assertRaises(ValueError):