Skip to content

Commit

Permalink
refactor(bigquery): update code samples to use strings for table and …
Browse files Browse the repository at this point in the history
…dataset IDs (#9974)

* load_and_query_partitioned_table

* remove client_query_legacy_sql from snippets

* client_query_w_named_params

* client_query_w_positional_params

* client_query_w_timestamp_params

* client_query_w_array_params

* client_query_w_struct_params

* query_no_cache

* query_external_gcs_temporary_table

* unify test_update_table_require_partition_filter

* Update test_copy_table_multiple_source.py

* Update client_query_add_column.py

* Update client_query_relax_column.py

* flake8 correction

* fix queries.rst file

* import reformat + comma deletion
  • Loading branch information
emar-kar authored and tswast committed Dec 26, 2019
1 parent fe9deb1 commit 4f7d4b1
Show file tree
Hide file tree
Showing 24 changed files with 610 additions and 365 deletions.
346 changes: 0 additions & 346 deletions bigquery/docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,78 +228,6 @@ def test_create_partitioned_table(client, to_delete):
assert table.time_partitioning.expiration_ms == 7776000000


def test_load_and_query_partitioned_table(client, to_delete):
dataset_id = "load_partitioned_table_dataset_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_load_table_partitioned]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
table_id = "us_states_by_date"

dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
bigquery.SchemaField("date", "DATE"),
]
job_config.skip_leading_rows = 1
job_config.time_partitioning = bigquery.TimePartitioning(
type_=bigquery.TimePartitioningType.DAY,
field="date", # name of column to use for partitioning
expiration_ms=7776000000,
) # 90 days
uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv"

load_job = client.load_table_from_uri(
uri, dataset_ref.table(table_id), job_config=job_config
) # API request

assert load_job.job_type == "load"

load_job.result() # Waits for table load to complete.

table = client.get_table(dataset_ref.table(table_id))
print("Loaded {} rows to table {}".format(table.num_rows, table_id))
# [END bigquery_load_table_partitioned]
assert table.num_rows == 50

project_id = client.project

# [START bigquery_query_partitioned_table]
import datetime

# from google.cloud import bigquery
# client = bigquery.Client()
# project_id = 'my-project'
# dataset_id = 'my_dataset'
table_id = "us_states_by_date"

sql_template = """
SELECT *
FROM `{}.{}.{}`
WHERE date BETWEEN @start_date AND @end_date
"""
sql = sql_template.format(project_id, dataset_id, table_id)
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = [
bigquery.ScalarQueryParameter("start_date", "DATE", datetime.date(1800, 1, 1)),
bigquery.ScalarQueryParameter("end_date", "DATE", datetime.date(1899, 12, 31)),
]

# API request
query_job = client.query(sql, job_config=job_config)

rows = list(query_job)
print("{} states were admitted to the US in the 1800s".format(len(rows)))
# [END bigquery_query_partitioned_table]
assert len(rows) == 29


@pytest.mark.skip(
reason=(
"update_table() is flaky "
Expand Down Expand Up @@ -1327,35 +1255,6 @@ def test_extract_table_compressed(client, to_delete):
to_delete.insert(0, blob)


def test_client_query_legacy_sql(client):
"""Run a query with Legacy SQL explicitly set"""
# [START bigquery_query_legacy]
# from google.cloud import bigquery
# client = bigquery.Client()

query = (
"SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] "
'WHERE state = "TX" '
"LIMIT 100"
)

# Set use_legacy_sql to True to use legacy SQL syntax.
job_config = bigquery.QueryJobConfig()
job_config.use_legacy_sql = True

query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results.
for row in query_job: # API request - fetches results
print(row)
# [END bigquery_query_legacy]


def test_client_query_total_rows(client, capsys):
"""Run a query and just check for how many rows."""
# [START bigquery_query_total_rows]
Expand Down Expand Up @@ -1420,251 +1319,6 @@ def test_manage_job(client):
# [END bigquery_get_job]


def test_client_query_w_named_params(client, capsys):
"""Run a query using named query parameters"""

# [START bigquery_query_params_named]
# from google.cloud import bigquery
# client = bigquery.Client()

query = """
SELECT word, word_count
FROM `bigquery-public-data.samples.shakespeare`
WHERE corpus = @corpus
AND word_count >= @min_word_count
ORDER BY word_count DESC;
"""
query_params = [
bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print("{}: \t{}".format(row.word, row.word_count))

assert query_job.state == "DONE"
# [END bigquery_query_params_named]

out, _ = capsys.readouterr()
assert "the" in out


def test_client_query_w_positional_params(client, capsys):
"""Run a query using query parameters"""

# [START bigquery_query_params_positional]
# from google.cloud import bigquery
# client = bigquery.Client()

query = """
SELECT word, word_count
FROM `bigquery-public-data.samples.shakespeare`
WHERE corpus = ?
AND word_count >= ?
ORDER BY word_count DESC;
"""
# Set the name to None to use positional parameters.
# Note that you cannot mix named and positional parameters.
query_params = [
bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"),
bigquery.ScalarQueryParameter(None, "INT64", 250),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print("{}: \t{}".format(row.word, row.word_count))

assert query_job.state == "DONE"
# [END bigquery_query_params_positional]

out, _ = capsys.readouterr()
assert "the" in out


def test_client_query_w_timestamp_params(client, capsys):
"""Run a query using query parameters"""

# [START bigquery_query_params_timestamps]
# from google.cloud import bigquery
# client = bigquery.Client()

import datetime
import pytz

query = "SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);"
query_params = [
bigquery.ScalarQueryParameter(
"ts_value",
"TIMESTAMP",
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC),
)
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print(row)

assert query_job.state == "DONE"
# [END bigquery_query_params_timestamps]

out, _ = capsys.readouterr()
assert "2016, 12, 7, 9, 0" in out


def test_client_query_w_array_params(client, capsys):
"""Run a query using array query parameters"""
# [START bigquery_query_params_arrays]
# from google.cloud import bigquery
# client = bigquery.Client()

query = """
SELECT name, sum(number) as count
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE gender = @gender
AND state IN UNNEST(@states)
GROUP BY name
ORDER BY count DESC
LIMIT 10;
"""
query_params = [
bigquery.ScalarQueryParameter("gender", "STRING", "M"),
bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print("{}: \t{}".format(row.name, row.count))

assert query_job.state == "DONE"
# [END bigquery_query_params_arrays]

out, _ = capsys.readouterr()
assert "James" in out


def test_client_query_w_struct_params(client, capsys):
"""Run a query using struct query parameters"""
# [START bigquery_query_params_structs]
# from google.cloud import bigquery
# client = bigquery.Client()

query = "SELECT @struct_value AS s;"
query_params = [
bigquery.StructQueryParameter(
"struct_value",
bigquery.ScalarQueryParameter("x", "INT64", 1),
bigquery.ScalarQueryParameter("y", "STRING", "foo"),
)
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print(row.s)

assert query_job.state == "DONE"
# [END bigquery_query_params_structs]

out, _ = capsys.readouterr()
assert "1" in out
assert "foo" in out


def test_query_no_cache(client):
# [START bigquery_query_no_cache]
# from google.cloud import bigquery
# client = bigquery.Client()

job_config = bigquery.QueryJobConfig()
job_config.use_query_cache = False
sql = """
SELECT corpus
FROM `bigquery-public-data.samples.shakespeare`
GROUP BY corpus;
"""
query_job = client.query(
sql,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request

# Print the results.
for row in query_job: # API request - fetches results
print(row)
# [END bigquery_query_no_cache]


def test_query_external_gcs_temporary_table(client):
# [START bigquery_query_external_gcs_temp]
# from google.cloud import bigquery
# client = bigquery.Client()

# Configure the external data source and query job
external_config = bigquery.ExternalConfig("CSV")
external_config.source_uris = [
"gs://cloud-samples-data/bigquery/us-states/us-states.csv"
]
external_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
]
external_config.options.skip_leading_rows = 1 # optionally skip header row
table_id = "us_states"
job_config = bigquery.QueryJobConfig()
job_config.table_definitions = {table_id: external_config}

# Example query to find states starting with 'W'
sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id)

query_job = client.query(sql, job_config=job_config) # API request

w_states = list(query_job) # Waits for query to finish
print("There are {} states with names starting with W.".format(len(w_states)))
# [END bigquery_query_external_gcs_temp]
assert len(w_states) == 4


def test_query_external_gcs_permanent_table(client, to_delete):
dataset_id = "query_external_gcs_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
Expand Down
2 changes: 1 addition & 1 deletion bigquery/docs/usage/queries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Run a query using a named query parameter
See BigQuery documentation for more information on
`parameterized queries <https://cloud.google.com/bigquery/docs/parameterized-queries>`_.

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/client_query_w_named_params.py
:language: python
:dedent: 4
:start-after: [START bigquery_query_params_named]
Expand Down
Loading

0 comments on commit 4f7d4b1

Please sign in to comment.