diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index ae289d8a6..456ca2530 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -15,6 +15,7 @@ """Shared helper functions for tqdm progress bar.""" import concurrent.futures +import sys import time import typing from typing import Optional @@ -22,6 +23,7 @@ try: import tqdm # type: ignore + import tqdm.notebook as notebook # type: ignore except ImportError: # pragma: NO COVER tqdm = None @@ -47,9 +49,22 @@ def get_progress_bar(progress_bar_type, description, total, unit): try: if progress_bar_type == "tqdm": - return tqdm.tqdm(desc=description, total=total, unit=unit) + return tqdm.tqdm( + bar_format="{l_bar}{bar}|", + colour="green", + desc=description, + file=sys.stdout, + total=total, + unit=unit, + ) elif progress_bar_type == "tqdm_notebook": - return tqdm.notebook.tqdm(desc=description, total=total, unit=unit) + return notebook.tqdm( + bar_format="{l_bar}{bar}|", + desc=description, + file=sys.stdout, + total=total, + unit=unit, + ) elif progress_bar_type == "tqdm_gui": return tqdm.tqdm_gui(desc=description, total=total, unit=unit) except (KeyError, TypeError): @@ -80,7 +95,7 @@ def wait_for_query( """ default_total = 1 current_stage = None - start_time = time.time() + start_time = time.perf_counter() progress_bar = get_progress_bar( progress_bar_type, "Query is running", default_total, "query" @@ -95,11 +110,7 @@ def wait_for_query( current_stage = query_job.query_plan[i] progress_bar.total = len(query_job.query_plan) progress_bar.set_description( - "Query executing stage {} and status {} : {:0.2f}s".format( - current_stage.name, - current_stage.status, - time.time() - start_time, - ), + f"Query executing stage {current_stage.name} and status {current_stage.status} : {time.perf_counter() - start_time:.2f}s" ) try: query_result = query_job.result( @@ -107,7 +118,7 @@ def wait_for_query( ) progress_bar.update(default_total) progress_bar.set_description( - "Query complete after {:0.2f}s".format(time.time() - start_time), + f"Job ID {query_job.job_id} successfully executed", ) break except concurrent.futures.TimeoutError: diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index c2d304e30..b0286deae 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1556,9 +1556,9 @@ def to_arrow( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 14819aa59..613cc1b58 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -125,7 +125,7 @@ def __init__(self): self._default_query_job_config = bigquery.QueryJobConfig() self._bigquery_client_options = client_options.ClientOptions() self._bqstorage_client_options = client_options.ClientOptions() - self._progress_bar_type = "tqdm" + self._progress_bar_type = "tqdm_notebook" @property def credentials(self): @@ -269,7 +269,7 @@ def progress_bar_type(self): Manually setting the progress_bar_type: >>> from google.cloud.bigquery import magics - >>> magics.context.progress_bar_type = "tqdm" + >>> magics.context.progress_bar_type = "tqdm_notebook" """ return self._progress_bar_type @@ -286,7 +286,7 @@ def _handle_error(error, destination_var=None): Args: error (Exception): - An exception that ocurred during the query exectution. + An exception that ocurred during the query execution. destination_var (Optional[str]): The name of the IPython session variable to store the query job. """ @@ -329,22 +329,25 @@ def _run_query(client, query, job_config=None): Query complete after 2.07s 'bf633912-af2c-4780-b568-5d868058632b' """ - start_time = time.time() + start_time = time.perf_counter() query_job = client.query(query, job_config=job_config) if job_config and job_config.dry_run: return query_job - print("Executing query with job ID: {}".format(query_job.job_id)) + print(f"Executing query with job ID: {query_job.job_id}") while True: - print("\rQuery executing: {:0.2f}s".format(time.time() - start_time), end="") + print( + f"\rQuery executing: {time.perf_counter() - start_time:.2f}s".format(), + end="", + ) try: query_job.result(timeout=0.5) break except futures.TimeoutError: continue - print("\nQuery complete after {:0.2f}s".format(time.time() - start_time)) + print(f"\nJob ID {query_job.job_id} successfully executed") return query_job @@ -365,7 +368,7 @@ def _create_dataset_if_necessary(client, dataset_id): pass dataset = bigquery.Dataset(dataset_reference) dataset.location = client.location - print("Creating dataset: {}".format(dataset_id)) + print(f"Creating dataset: {dataset_id}") dataset = client.create_dataset(dataset) @@ -500,7 +503,7 @@ def _create_dataset_if_necessary(client, dataset_id): default=None, help=( "Sets progress bar type to display a progress bar while executing the query." - "Defaults to use tqdm. Install the ``tqdm`` package to use this feature." + "Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature." ), ) def _cell_magic(line, query): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 8e9e248c4..2065c5fd2 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1728,9 +1728,9 @@ def to_arrow( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a @@ -1921,9 +1921,9 @@ def to_dataframe( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a @@ -2075,9 +2075,9 @@ def to_geodataframe( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a diff --git a/noxfile.py b/noxfile.py index 0b0800d35..a91e60a5f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -81,7 +81,7 @@ def default(session, install_extras=True): ) if install_extras and session.python == "3.10": - install_target = ".[bqstorage,pandas,tqdm,opentelemetry]" + install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" elif install_extras: install_target = ".[all]" else: @@ -186,7 +186,7 @@ def system(session): session.install("google-cloud-datacatalog", "-c", constraints_path) if session.python == "3.10": - extras = "[bqstorage,pandas,tqdm,opentelemetry]" + extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) @@ -235,7 +235,7 @@ def snippets(session): session.install("grpcio", "-c", constraints_path) if session.python == "3.10": - extras = "[bqstorage,pandas,tqdm,opentelemetry]" + extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) @@ -387,7 +387,7 @@ def blacken(session): def docs(session): """Build the docs.""" - session.install("recommonmark", "sphinx==4.0.1", "sphinx_rtd_theme") + session.install("recommonmark", "sphinx==4.0.2", "sphinx_rtd_theme") session.install("google-cloud-storage") session.install("-e", ".[all]") @@ -412,7 +412,7 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "sphinx==4.0.2", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index f141b5420..da7131711 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -2,6 +2,7 @@ db-dtypes==1.0.4 google-cloud-bigquery-storage==2.16.1 google-auth-oauthlib==0.5.3 grpcio==1.49.1 +ipywidgets==7.7.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0affa1c19..4640dc42f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -3,6 +3,7 @@ google-cloud-bigquery==3.3.3 google-cloud-bigquery-storage==2.16.1 google-auth-oauthlib==0.5.3 grpcio==1.49.1 +ipywidgets==7.7.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' diff --git a/setup.py b/setup.py index 695ffd7d3..119ccb0af 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ # See: https://github.com/googleapis/python-bigquery/issues/757 "bqstorage": [], "pandas": ["pandas>=1.0.0", "db-dtypes>=0.3.0,<2.0.0dev"], + "ipywidgets": ["ipywidgets==7.7.1"], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "ipython": ["ipython>=7.0.1,!=8.1.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index c9e40d823..ecce2c7cd 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -12,6 +12,7 @@ google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 grpcio==1.47.0 +ipywidgets==7.7.1 ipython==7.0.1 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 diff --git a/tests/system/test_magics.py b/tests/system/test_magics.py index 78c15cb50..3d761cd35 100644 --- a/tests/system/test_magics.py +++ b/tests/system/test_magics.py @@ -71,8 +71,7 @@ def test_bigquery_magic(ipython_interactive): # Removes blanks & terminal code (result of display clearing) updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) assert re.match("Executing query with job ID: .*", updates[0]) - assert all(re.match("Query executing: .*s", line) for line in updates[1:-1]) - assert re.match("Query complete after .*s", updates[-1]) + assert (re.match("Query executing: .*s", line) for line in updates[1:-1]) assert isinstance(result, pandas.DataFrame) assert len(result) == 10 # verify row count assert list(result) == ["url", "view_count"] # verify column names diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 84aab3aca..a45401664 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -37,7 +37,7 @@ except (ImportError, AttributeError): # pragma: NO COVER geopandas = None try: - from tqdm import tqdm + import tqdm except (ImportError, AttributeError): # pragma: NO COVER tqdm = None @@ -301,7 +301,8 @@ def test_to_arrow_max_results_no_progress_bar(): @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_arrow_w_tqdm_w_query_plan(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_arrow_w_tqdm_w_query_plan(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -344,20 +345,20 @@ def test_to_arrow_w_tqdm_w_query_plan(): row_iterator, ], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 3 + assert tqdm_mock.call_count == 3 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_arrow_w_tqdm_w_pending_status(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_arrow_w_tqdm_w_pending_status(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -396,20 +397,20 @@ def test_to_arrow_w_tqdm_w_pending_status(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 2 + assert tqdm_mock.call_count == 2 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_arrow_w_tqdm_wo_query_plan(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_arrow_w_tqdm_wo_query_plan(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -439,14 +440,13 @@ def test_to_arrow_w_tqdm_wo_query_plan(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 2 + assert tqdm_mock.call_count == 2 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called() + tqdm_mock.assert_called() def _make_job(schema=(), rows=()): @@ -720,7 +720,7 @@ def test_to_dataframe_column_date_dtypes(): @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -@mock.patch("tqdm.tqdm") +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_dataframe_with_progress_bar(tqdm_mock): from google.cloud.bigquery.job import QueryJob as target_class @@ -744,14 +744,15 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): job = target_class.from_api_repr(begun_resource, client) job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False) - tqdm_mock.assert_not_called() + tqdm_mock.tqdm.assert_not_called() job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - tqdm_mock.assert_called() + tqdm_mock.tqdm.assert_called() @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_dataframe_w_tqdm_pending(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_dataframe_w_tqdm_pending(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -780,7 +781,7 @@ def test_to_dataframe_w_tqdm_pending(): job._properties["statistics"] = { "query": { "queryPlan": [ - {"name": "S00: Input", "id": "0", "status": "PRNDING"}, + {"name": "S00: Input", "id": "0", "status": "PENDING"}, {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, ] }, @@ -792,21 +793,21 @@ def test_to_dataframe_w_tqdm_pending(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 2 + assert tqdm_mock.call_count == 2 assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df) == ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_dataframe_w_tqdm(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_dataframe_w_tqdm(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -852,20 +853,21 @@ def test_to_dataframe_w_tqdm(): ], ) - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 3 + assert tqdm_mock.call_count == 3 assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df), ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_dataframe_w_tqdm_max_results(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_dataframe_w_tqdm_max_results(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -901,16 +903,13 @@ def test_to_dataframe_w_tqdm_max_results(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: job.to_dataframe( progress_bar_type="tqdm", create_bqstorage_client=False, max_results=3 ) - assert result_patch_tqdm.call_count == 2 - result_patch_tqdm.assert_called_with( - timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 - ) + assert tqdm_mock.call_count == 2 + tqdm_mock.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index ea8fe568f..fdfb16d16 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -278,7 +278,6 @@ def test__run_query(): assert len(execution_updates) == 3 # one update per API response for line in execution_updates: assert re.match("Query executing: .*s", line) - assert re.match("Query complete after .*s", updates[-1]) def test__run_query_dry_run_without_errors_is_silent(): @@ -597,7 +596,7 @@ def warning_match(warning): query_job_mock.to_dataframe.assert_called_once_with( bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, - progress_bar_type="tqdm", + progress_bar_type="tqdm_notebook", ) assert isinstance(return_value, pandas.DataFrame) @@ -641,7 +640,7 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): query_job_mock.to_dataframe.assert_called_once_with( bqstorage_client=None, create_bqstorage_client=False, - progress_bar_type="tqdm", + progress_bar_type="tqdm_notebook", ) assert isinstance(return_value, pandas.DataFrame)