Skip to content

Commit

Permalink
BigQuery: Add support of the project ID with org prefix to the Table.…
Browse files Browse the repository at this point in the history
…from_string() method (#9161)

* add prefix support

* Update _helpers.py

* consolidate the regex

* update split_id method

* '_parse_id' method renamed to '_split_id'
* switched to 're.groups' implementation instead of partly grouping

* Update dataset.py

flake8 fixed

* Update _helpers.py

* added the docstring for the '_split_id' method

* fix lint failure
  • Loading branch information
emar-kar authored and plamut committed Sep 6, 2019
1 parent 5c0ea2e commit 9f471fb
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 20 deletions.
33 changes: 29 additions & 4 deletions bigquery/google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import copy
import datetime
import decimal
import re

from google.cloud._helpers import UTC
from google.cloud._helpers import _date_from_iso8601_date
Expand All @@ -29,6 +30,12 @@
_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f"
_TIMEONLY_WO_MICROS = "%H:%M:%S"
_TIMEONLY_W_MICROS = "%H:%M:%S.%f"
_PROJECT_PREFIX_PATTERN = re.compile(
r"""
(?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)(?:$|\.(?P<custom_id>[^.]+)$)
""",
re.VERBOSE,
)


def _not_null(value, field):
Expand Down Expand Up @@ -586,24 +593,42 @@ def _str_or_none(value):
return str(value)


def _split_id(full_id):
"""Helper: split full_id into composite parts.
Args:
full_id (str): Fully-qualified ID in standard SQL format.
Returns:
List[str]: ID's parts separated into components.
"""
with_prefix = _PROJECT_PREFIX_PATTERN.match(full_id)
if with_prefix is None:
parts = full_id.split(".")
else:
parts = with_prefix.groups()
parts = [part for part in parts if part]
return parts


def _parse_3_part_id(full_id, default_project=None, property_name="table_id"):
output_project_id = default_project
output_dataset_id = None
output_resource_id = None
parts = full_id.split(".")
parts = _split_id(full_id)

if len(parts) != 2 and len(parts) != 3:
raise ValueError(
"{property_name} must be a fully-qualified ID in "
'standard SQL format. e.g. "project.dataset.{property_name}", '
'standard SQL format, e.g., "project.dataset.{property_name}", '
"got {}".format(full_id, property_name=property_name)
)

if len(parts) == 2 and not default_project:
raise ValueError(
"When default_project is not set, {property_name} must be a "
"fully-qualified ID in standard SQL format. "
'e.g. "project.dataset_id.{property_name}", got {}'.format(
"fully-qualified ID in standard SQL format, "
'e.g., "project.dataset_id.{property_name}", got {}'.format(
full_id, property_name=property_name
)
)
Expand Down
17 changes: 1 addition & 16 deletions bigquery/google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import six
import copy
import re

import google.cloud._helpers
from google.cloud.bigquery import _helpers
Expand All @@ -27,14 +26,6 @@
from google.cloud.bigquery.table import TableReference


_PROJECT_PREFIX_PATTERN = re.compile(
r"""
(?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)$
""",
re.VERBOSE,
)


def _get_table_reference(self, table_id):
"""Constructs a TableReference.
Expand Down Expand Up @@ -299,13 +290,7 @@ def from_string(cls, dataset_id, default_project=None):
"""
output_dataset_id = dataset_id
output_project_id = default_project
with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id)
if with_prefix is None:
parts = dataset_id.split(".")
else:
project_id = with_prefix.group("project_id")
dataset_id = with_prefix.group("dataset_id")
parts = [project_id, dataset_id]
parts = _helpers._split_id(dataset_id)

if len(parts) == 1 and not default_project:
raise ValueError(
Expand Down
12 changes: 12 additions & 0 deletions bigquery/tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,23 @@ def test_from_string(self):
self.assertEqual(got.dataset_id, "string_dataset")
self.assertEqual(got.table_id, "string_table")

def test_from_string_w_prefix(self):
cls = self._get_target_class()
got = cls.from_string("google.com:string-project.string_dataset.string_table")
self.assertEqual(got.project, "google.com:string-project")
self.assertEqual(got.dataset_id, "string_dataset")
self.assertEqual(got.table_id, "string_table")

def test_from_string_legacy_string(self):
cls = self._get_target_class()
with self.assertRaises(ValueError):
cls.from_string("string-project:string_dataset.string_table")

def test_from_string_w_incorrect_prefix(self):
cls = self._get_target_class()
with self.assertRaises(ValueError):
cls.from_string("google.com.string-project.string_dataset.string_table")

def test_from_string_not_fully_qualified(self):
cls = self._get_target_class()
with self.assertRaises(ValueError):
Expand Down

0 comments on commit 9f471fb

Please sign in to comment.