Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: refactor list_rows tests and add test for scalars #829

Merged
merged 9 commits into from
Aug 13, 2021
4 changes: 2 additions & 2 deletions tests/data/scalars.jsonl
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
{"bool_col": true, "bytes_col": "SGVsbG8sIFdvcmxkIQ==", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "interval_col": "P7Y11M9DT4H15M37.123456S", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "rowindex": 0, "string_col": "Hello, World!", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 1, "string_col": null, "time_col": null, "timestamp_col": null}
10 changes: 5 additions & 5 deletions tests/data/scalars_extreme.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"}
{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "interval_col": "P-10000Y0M-3660000DT-87840000H0M0S", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "rowindex": 0, "string_col": "Hello, World", "time_col": "23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
Copy link
Contributor

@plamut plamut Aug 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch of a missing nine digit in timestamp's microseconds. 👍

{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "interval_col": "P10000Y0M3660000DT87840000H0M0S", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "rowindex": 1, "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "interval_col": "P0Y0M0DT0H0M0.000001S", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "rowindex": 2, "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"}
{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "interval_col": "P0Y0M0DT0H0M0S", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "rowindex": 3, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 4, "string_col": null, "time_col": null, "timestamp_col": null}
53 changes: 31 additions & 22 deletions tests/data/scalars_schema.json
Original file line number Diff line number Diff line change
@@ -1,33 +1,32 @@
[
{
"mode": "NULLABLE",
"name": "timestamp_col",
"type": "TIMESTAMP"
"name": "bool_col",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "time_col",
"type": "TIME"
"name": "bignumeric_col",
"type": "BIGNUMERIC"
},
{
"mode": "NULLABLE",
"name": "float64_col",
"type": "FLOAT"
"name": "bytes_col",
"type": "BYTES"
},
{
"mode": "NULLABLE",
"name": "datetime_col",
"type": "DATETIME"
"name": "date_col",
"type": "DATE"
},
{
"mode": "NULLABLE",
"name": "bignumeric_col",
"type": "BIGNUMERIC"
"name": "datetime_col", "type": "DATETIME"
plamut marked this conversation as resolved.
Show resolved Hide resolved
},
{
"mode": "NULLABLE",
"name": "numeric_col",
"type": "NUMERIC"
"name": "float64_col",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
Expand All @@ -36,27 +35,37 @@
},
{
"mode": "NULLABLE",
"name": "date_col",
"type": "DATE"
"name": "int64_col",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "string_col",
"type": "STRING"
"name": "interval_col",
"type": "INTERVAL"
},
{
"mode": "NULLABLE",
"name": "bool_col",
"type": "BOOLEAN"
"name": "numeric_col",
"type": "NUMERIC"
},
{
"mode": "REQUIRED",
"name": "rowindex",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "bytes_col",
"type": "BYTES"
"name": "string_col",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "int64_col",
"type": "INTEGER"
"name": "time_col",
"type": "TIME"
},
{
"mode": "NULLABLE",
"name": "timestamp_col",
"type": "TIMESTAMP"
}
]
36 changes: 30 additions & 6 deletions tests/system/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,14 @@

"""System tests for Arrow connector."""

from typing import Optional

import pytest

from google.cloud import bigquery
from google.cloud.bigquery import enums


pyarrow = pytest.importorskip(
"pyarrow", minversion="3.0.0"
) # Needs decimal256 for BIGNUMERIC columns.
Expand All @@ -31,17 +37,35 @@
),
)
def test_list_rows_nullable_scalars_dtypes(
bigquery_client,
scalars_table,
scalars_extreme_table,
max_results,
scalars_table_name,
bigquery_client: bigquery.Client,
scalars_table: str,
scalars_extreme_table: str,
max_results: Optional[int],
scalars_table_name: str,
):
table_id = scalars_table
if scalars_table_name == "scalars_extreme_table":
table_id = scalars_extreme_table

# Avoid INTERVAL columns until they are supported by the BigQuery Storage
# API and pyarrow.
plamut marked this conversation as resolved.
Show resolved Hide resolved
schema = [
bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN),
bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC),
bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES),
bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE),
bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME),
bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64),
bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY),
bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64),
bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC),
bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING),
bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME),
bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP),
]

arrow_table = bigquery_client.list_rows(
table_id, max_results=max_results,
table_id, max_results=max_results, selected_fields=schema,
).to_arrow()

schema = arrow_table.schema
Expand Down
48 changes: 0 additions & 48 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2428,54 +2428,6 @@ def test_nested_table_to_arrow(self):
self.assertTrue(pyarrow.types.is_list(record_col[1].type))
self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type))

def test_list_rows_empty_table(self):
from google.cloud.bigquery.table import RowIterator

dataset_id = _make_dataset_id("empty_table")
dataset = self.temp_dataset(dataset_id)
table_ref = dataset.table("empty_table")
table = Config.CLIENT.create_table(bigquery.Table(table_ref))

# It's a bit silly to list rows for an empty table, but this does
# happen as the result of a DDL query from an IPython magic command.
rows = Config.CLIENT.list_rows(table)
self.assertIsInstance(rows, RowIterator)
self.assertEqual(tuple(rows), ())

def test_list_rows_page_size(self):
from google.cloud.bigquery.job import SourceFormat
from google.cloud.bigquery.job import WriteDisposition

num_items = 7
page_size = 3
num_pages, num_last_page = divmod(num_items, page_size)

SF = bigquery.SchemaField
schema = [SF("string_col", "STRING", mode="NULLABLE")]
to_insert = [{"string_col": "item%d" % i} for i in range(num_items)]
rows = [json.dumps(row) for row in to_insert]
body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii"))

table_id = "test_table"
dataset = self.temp_dataset(_make_dataset_id("nested_df"))
table = dataset.table(table_id)
self.to_delete.insert(0, table)
job_config = bigquery.LoadJobConfig()
job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON
job_config.schema = schema
# Load a table using a local JSON file from memory.
Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result()

df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size)
pages = df.pages

for i in range(num_pages):
page = next(pages)
self.assertEqual(page.num_items, page_size)
page = next(pages)
self.assertEqual(page.num_items, num_last_page)

def temp_dataset(self, dataset_id, location=None):
project = Config.CLIENT.project
dataset_ref = bigquery.DatasetReference(project, dataset_id)
Expand Down
120 changes: 120 additions & 0 deletions tests/system/test_list_rows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import decimal

from google.cloud import bigquery
from google.cloud.bigquery import enums


def test_list_rows_empty_table(bigquery_client: bigquery.Client, table_id: str):
from google.cloud.bigquery.table import RowIterator

table = bigquery_client.create_table(table_id)

# It's a bit silly to list rows for an empty table, but this does
# happen as the result of a DDL query from an IPython magic command.
rows = bigquery_client.list_rows(table)
assert isinstance(rows, RowIterator)
assert tuple(rows) == ()


def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str):
num_items = 7
page_size = 3
num_pages, num_last_page = divmod(num_items, page_size)

to_insert = [{"string_col": "item%d" % i, "rowindex": i} for i in range(num_items)]
bigquery_client.load_table_from_json(to_insert, table_id).result()

df = bigquery_client.list_rows(
table_id,
selected_fields=[bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)],
page_size=page_size,
)
pages = df.pages

for i in range(num_pages):
page = next(pages)
assert page.num_items == page_size
page = next(pages)
assert page.num_items == num_last_page


def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str):
rows = list(
sorted(
bigquery_client.list_rows(scalars_table), key=lambda row: row["rowindex"]
)
)
plamut marked this conversation as resolved.
Show resolved Hide resolved
row = rows[0]
assert row["bool_col"] # True
assert row["bytes_col"] == b"Hello, World!"
assert row["date_col"] == datetime.date(2021, 7, 21)
assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45)
assert row["geography_col"] == "POINT(-122.0838511 37.3860517)"
assert row["int64_col"] == 123456789
assert row["numeric_col"] == decimal.Decimal("1.23456789")
assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819")
assert row["float64_col"] == 1.25
assert row["string_col"] == "Hello, World!"
assert row["time_col"] == datetime.time(11, 41, 43, 76160)
assert row["timestamp_col"] == datetime.datetime(
2021, 7, 21, 17, 43, 43, 945289, tzinfo=datetime.timezone.utc
)

nullrow = rows[1]
for column, value in nullrow.items():
if column == "rowindex":
assert value == 1
else:
assert value is None


def test_list_rows_scalars_extreme(
bigquery_client: bigquery.Client, scalars_extreme_table: str
):
rows = list(
sorted(
bigquery_client.list_rows(scalars_extreme_table),
key=lambda row: row["rowindex"],
)
)
plamut marked this conversation as resolved.
Show resolved Hide resolved
row = rows[0]
assert row["bool_col"] # True
assert row["bytes_col"] == b"\r\n"
assert row["date_col"] == datetime.date(9999, 12, 31)
assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
assert row["geography_col"] == "POINT(-135 90)"
assert row["int64_col"] == 9223372036854775807
assert row["numeric_col"] == decimal.Decimal(
"9.9999999999999999999999999999999999999E+28"
)
assert row["bignumeric_col"] == decimal.Decimal(
"9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37"
plamut marked this conversation as resolved.
Show resolved Hide resolved
)
assert row["float64_col"] == float("Inf")
assert row["string_col"] == "Hello, World"
assert row["time_col"] == datetime.time(23, 59, 59, 999999)
assert row["timestamp_col"] == datetime.datetime(
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
)

nullrow = rows[4]
for column, value in nullrow.items():
if column == "rowindex":
assert value == 4
else:
assert value is None