refactor(bigquery): update code samples to use strings for table and …

…dataset IDs (#9974) * load_and_query_partitioned_table * remove client_query_legacy_sql from snippets * client_query_w_named_params * client_query_w_positional_params * client_query_w_timestamp_params * client_query_w_array_params * client_query_w_struct_params * query_no_cache * query_external_gcs_temporary_table * unify test_update_table_require_partition_filter * Update test_copy_table_multiple_source.py * Update client_query_add_column.py * Update client_query_relax_column.py * flake8 correction * fix queries.rst file * import reformat + comma deletion
googleapis · Dec 26, 2019 · 4f7d4b1 · 4f7d4b1
1 parent fe9deb1
commit 4f7d4b1
Show file tree

Hide file tree

Showing 24 changed files with 610 additions and 365 deletions.
diff --git a/bigquery/docs/snippets.py b/bigquery/docs/snippets.py
@@ -228,78 +228,6 @@ def test_create_partitioned_table(client, to_delete):
     assert table.time_partitioning.expiration_ms == 7776000000
 
 
-def test_load_and_query_partitioned_table(client, to_delete):
-    dataset_id = "load_partitioned_table_dataset_{}".format(_millis())
-    dataset = bigquery.Dataset(client.dataset(dataset_id))
-    client.create_dataset(dataset)
-    to_delete.append(dataset)
-
-    # [START bigquery_load_table_partitioned]
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-    # dataset_id = 'my_dataset'
-    table_id = "us_states_by_date"
-
-    dataset_ref = client.dataset(dataset_id)
-    job_config = bigquery.LoadJobConfig()
-    job_config.schema = [
-        bigquery.SchemaField("name", "STRING"),
-        bigquery.SchemaField("post_abbr", "STRING"),
-        bigquery.SchemaField("date", "DATE"),
-    ]
-    job_config.skip_leading_rows = 1
-    job_config.time_partitioning = bigquery.TimePartitioning(
-        type_=bigquery.TimePartitioningType.DAY,
-        field="date",  # name of column to use for partitioning
-        expiration_ms=7776000000,
-    )  # 90 days
-    uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv"
-
-    load_job = client.load_table_from_uri(
-        uri, dataset_ref.table(table_id), job_config=job_config
-    )  # API request
-
-    assert load_job.job_type == "load"
-
-    load_job.result()  # Waits for table load to complete.
-
-    table = client.get_table(dataset_ref.table(table_id))
-    print("Loaded {} rows to table {}".format(table.num_rows, table_id))
-    # [END bigquery_load_table_partitioned]
-    assert table.num_rows == 50
-
-    project_id = client.project
-
-    # [START bigquery_query_partitioned_table]
-    import datetime
-
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-    # project_id = 'my-project'
-    # dataset_id = 'my_dataset'
-    table_id = "us_states_by_date"
-
-    sql_template = """
-        SELECT *
-        FROM `{}.{}.{}`
-        WHERE date BETWEEN @start_date AND @end_date
-    """
-    sql = sql_template.format(project_id, dataset_id, table_id)
-    job_config = bigquery.QueryJobConfig()
-    job_config.query_parameters = [
-        bigquery.ScalarQueryParameter("start_date", "DATE", datetime.date(1800, 1, 1)),
-        bigquery.ScalarQueryParameter("end_date", "DATE", datetime.date(1899, 12, 31)),
-    ]
-
-    # API request
-    query_job = client.query(sql, job_config=job_config)
-
-    rows = list(query_job)
-    print("{} states were admitted to the US in the 1800s".format(len(rows)))
-    # [END bigquery_query_partitioned_table]
-    assert len(rows) == 29
-
-
 @pytest.mark.skip(
     reason=(
         "update_table() is flaky "
@@ -1327,35 +1255,6 @@ def test_extract_table_compressed(client, to_delete):
     to_delete.insert(0, blob)
 
 
-def test_client_query_legacy_sql(client):
-    """Run a query with Legacy SQL explicitly set"""
-    # [START bigquery_query_legacy]
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-
-    query = (
-        "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] "
-        'WHERE state = "TX" '
-        "LIMIT 100"
-    )
-
-    # Set use_legacy_sql to True to use legacy SQL syntax.
-    job_config = bigquery.QueryJobConfig()
-    job_config.use_legacy_sql = True
-
-    query_job = client.query(
-        query,
-        # Location must match that of the dataset(s) referenced in the query.
-        location="US",
-        job_config=job_config,
-    )  # API request - starts the query
-
-    # Print the results.
-    for row in query_job:  # API request - fetches results
-        print(row)
-    # [END bigquery_query_legacy]
-
-
 def test_client_query_total_rows(client, capsys):
     """Run a query and just check for how many rows."""
     # [START bigquery_query_total_rows]
@@ -1420,251 +1319,6 @@ def test_manage_job(client):
     # [END bigquery_get_job]
 
 
-def test_client_query_w_named_params(client, capsys):
-    """Run a query using named query parameters"""
-
-    # [START bigquery_query_params_named]
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-
-    query = """
-        SELECT word, word_count
-        FROM `bigquery-public-data.samples.shakespeare`
-        WHERE corpus = @corpus
-        AND word_count >= @min_word_count
-        ORDER BY word_count DESC;
-    """
-    query_params = [
-        bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
-        bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
-    ]
-    job_config = bigquery.QueryJobConfig()
-    job_config.query_parameters = query_params
-    query_job = client.query(
-        query,
-        # Location must match that of the dataset(s) referenced in the query.
-        location="US",
-        job_config=job_config,
-    )  # API request - starts the query
-
-    # Print the results
-    for row in query_job:
-        print("{}: \t{}".format(row.word, row.word_count))
-
-    assert query_job.state == "DONE"
-    # [END bigquery_query_params_named]
-
-    out, _ = capsys.readouterr()
-    assert "the" in out
-
-
-def test_client_query_w_positional_params(client, capsys):
-    """Run a query using query parameters"""
-
-    # [START bigquery_query_params_positional]
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-
-    query = """
-        SELECT word, word_count
-        FROM `bigquery-public-data.samples.shakespeare`
-        WHERE corpus = ?
-        AND word_count >= ?
-        ORDER BY word_count DESC;
-    """
-    # Set the name to None to use positional parameters.
-    # Note that you cannot mix named and positional parameters.
-    query_params = [
-        bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"),
-        bigquery.ScalarQueryParameter(None, "INT64", 250),
-    ]
-    job_config = bigquery.QueryJobConfig()
-    job_config.query_parameters = query_params
-    query_job = client.query(
-        query,
-        # Location must match that of the dataset(s) referenced in the query.
-        location="US",
-        job_config=job_config,
-    )  # API request - starts the query
-
-    # Print the results
-    for row in query_job:
-        print("{}: \t{}".format(row.word, row.word_count))
-
-    assert query_job.state == "DONE"
-    # [END bigquery_query_params_positional]
-
-    out, _ = capsys.readouterr()
-    assert "the" in out
-
-
-def test_client_query_w_timestamp_params(client, capsys):
-    """Run a query using query parameters"""
-
-    # [START bigquery_query_params_timestamps]
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-
-    import datetime
-    import pytz
-
-    query = "SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);"
-    query_params = [
-        bigquery.ScalarQueryParameter(
-            "ts_value",
-            "TIMESTAMP",
-            datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC),
-        )
-    ]
-    job_config = bigquery.QueryJobConfig()
-    job_config.query_parameters = query_params
-    query_job = client.query(
-        query,
-        # Location must match that of the dataset(s) referenced in the query.
-        location="US",
-        job_config=job_config,
-    )  # API request - starts the query
-
-    # Print the results
-    for row in query_job:
-        print(row)
-
-    assert query_job.state == "DONE"
-    # [END bigquery_query_params_timestamps]
-
-    out, _ = capsys.readouterr()
-    assert "2016, 12, 7, 9, 0" in out
-
-
-def test_client_query_w_array_params(client, capsys):
-    """Run a query using array query parameters"""
-    # [START bigquery_query_params_arrays]
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-
-    query = """
-        SELECT name, sum(number) as count
-        FROM `bigquery-public-data.usa_names.usa_1910_2013`
-        WHERE gender = @gender
-        AND state IN UNNEST(@states)
-        GROUP BY name
-        ORDER BY count DESC
-        LIMIT 10;
-    """
-    query_params = [
-        bigquery.ScalarQueryParameter("gender", "STRING", "M"),
-        bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]),
-    ]
-    job_config = bigquery.QueryJobConfig()
-    job_config.query_parameters = query_params
-    query_job = client.query(
-        query,
-        # Location must match that of the dataset(s) referenced in the query.
-        location="US",
-        job_config=job_config,
-    )  # API request - starts the query
-
-    # Print the results
-    for row in query_job:
-        print("{}: \t{}".format(row.name, row.count))
-
-    assert query_job.state == "DONE"
-    # [END bigquery_query_params_arrays]
-
-    out, _ = capsys.readouterr()
-    assert "James" in out
-
-
-def test_client_query_w_struct_params(client, capsys):
-    """Run a query using struct query parameters"""
-    # [START bigquery_query_params_structs]
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-
-    query = "SELECT @struct_value AS s;"
-    query_params = [
-        bigquery.StructQueryParameter(
-            "struct_value",
-            bigquery.ScalarQueryParameter("x", "INT64", 1),
-            bigquery.ScalarQueryParameter("y", "STRING", "foo"),
-        )
-    ]
-    job_config = bigquery.QueryJobConfig()
-    job_config.query_parameters = query_params
-    query_job = client.query(
-        query,
-        # Location must match that of the dataset(s) referenced in the query.
-        location="US",
-        job_config=job_config,
-    )  # API request - starts the query
-
-    # Print the results
-    for row in query_job:
-        print(row.s)
-
-    assert query_job.state == "DONE"
-    # [END bigquery_query_params_structs]
-
-    out, _ = capsys.readouterr()
-    assert "1" in out
-    assert "foo" in out
-
-
-def test_query_no_cache(client):
-    # [START bigquery_query_no_cache]
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-
-    job_config = bigquery.QueryJobConfig()
-    job_config.use_query_cache = False
-    sql = """
-        SELECT corpus
-        FROM `bigquery-public-data.samples.shakespeare`
-        GROUP BY corpus;
-    """
-    query_job = client.query(
-        sql,
-        # Location must match that of the dataset(s) referenced in the query.
-        location="US",
-        job_config=job_config,
-    )  # API request
-
-    # Print the results.
-    for row in query_job:  # API request - fetches results
-        print(row)
-    # [END bigquery_query_no_cache]
-
-
-def test_query_external_gcs_temporary_table(client):
-    # [START bigquery_query_external_gcs_temp]
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-
-    # Configure the external data source and query job
-    external_config = bigquery.ExternalConfig("CSV")
-    external_config.source_uris = [
-        "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
-    ]
-    external_config.schema = [
-        bigquery.SchemaField("name", "STRING"),
-        bigquery.SchemaField("post_abbr", "STRING"),
-    ]
-    external_config.options.skip_leading_rows = 1  # optionally skip header row
-    table_id = "us_states"
-    job_config = bigquery.QueryJobConfig()
-    job_config.table_definitions = {table_id: external_config}
-
-    # Example query to find states starting with 'W'
-    sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id)
-
-    query_job = client.query(sql, job_config=job_config)  # API request
-
-    w_states = list(query_job)  # Waits for query to finish
-    print("There are {} states with names starting with W.".format(len(w_states)))
-    # [END bigquery_query_external_gcs_temp]
-    assert len(w_states) == 4
-
-
 def test_query_external_gcs_permanent_table(client, to_delete):
     dataset_id = "query_external_gcs_{}".format(_millis())
     dataset = bigquery.Dataset(client.dataset(dataset_id))

diff --git a/bigquery/docs/usage/queries.rst b/bigquery/docs/usage/queries.rst
@@ -43,7 +43,7 @@ Run a query using a named query parameter
 See BigQuery documentation for more information on
 `parameterized queries <https://cloud.google.com/bigquery/docs/parameterized-queries>`_.
 
-.. literalinclude:: ../snippets.py
+.. literalinclude:: ../samples/client_query_w_named_params.py
    :language: python
    :dedent: 4
    :start-after: [START bigquery_query_params_named]