From 7dd3258641ba2f72c4a61e26e8f04c39477b6dfc Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Thu, 12 Oct 2023 12:13:17 -0500 Subject: [PATCH 1/6] make tests not rely so much on pyarrow --- tests/conftest.py | 8 ++++---- tests/test_core.py | 6 ++++-- tests/test_io_text.py | 3 ++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index c1c5fc38..b2ba4d0f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -60,19 +60,19 @@ def ndjson_points_file_str(ndjson_points1_str: str) -> str: @pytest.fixture(scope="session") -def daa_old(ndjson_points1: str) -> dak.Array: +def daa(ndjson_points1: str) -> dak.Array: return dak.from_json([ndjson_points1] * 3) @pytest.fixture(scope="session") -def pq_points_dir(daa_old: dak.Array, tmp_path_factory: pytest.TempPathFactory) -> str: +def pq_points_dir(daa: dak.Array, tmp_path_factory: pytest.TempPathFactory) -> str: pqdir = tmp_path_factory.mktemp("pqfiles") - dak.to_parquet(daa_old, str(pqdir)) + dak.to_parquet(daa, str(pqdir)) return str(pqdir) @pytest.fixture(scope="session") -def daa(pq_points_dir: str) -> dak.Array: +def daa_parquet(pq_points_dir: str) -> dak.Array: return dak.from_parquet(pq_points_dir) diff --git a/tests/test_core.py b/tests/test_core.py index e23766f1..b6030409 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -46,8 +46,9 @@ def test_clear_divisions(ndjson_points_file: str) -> None: assert not daa.known_divisions -def test_dunder_str(daa: Array) -> None: - assert str(daa) == "dask.awkward" +def test_dunder_str(caa: ak.Array) -> None: + daa = dak.from_awkward(caa, npartitions=2) + assert str(daa) == "dask.awkward" def test_calculate_known_divisions(ndjson_points_file: str) -> None: @@ -820,6 +821,7 @@ def test_map_partitions_no_dask_collections_passed(caa): @pytest.mark.parametrize("fn", [dak.count, dak.zeros_like, dak.ones_like]) def test_shape_only_ops(fn: Callable, tmp_path_factory: pytest.TempPathFactory) -> None: + pytest.importorskip("pyarrow") a = ak.Array([{"a": 1, "b": 2}, {"a": 3, "b": 4}]) p = tmp_path_factory.mktemp("zeros-like-flat") ak.to_parquet(a, str(p / "file.parquet")) diff --git a/tests/test_io_text.py b/tests/test_io_text.py index 096f0e7f..783cbe21 100644 --- a/tests/test_io_text.py +++ b/tests/test_io_text.py @@ -8,7 +8,8 @@ from dask_awkward.lib.testutils import assert_eq -def test_form_text() -> None: +def test_from_text() -> None: + pytest.importorskip("pyarrow") f1 = "https://raw.githubusercontent.com/dask-contrib/dask-awkward/main/README.md" f2 = "https://raw.githubusercontent.com/dask-contrib/dask-awkward/main/LICENSE" From 38a40e8dad51fe1182f979bd0980d91f778d3f22 Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Thu, 12 Oct 2023 12:22:38 -0500 Subject: [PATCH 2/6] missing import --- tests/test_io_text.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_io_text.py b/tests/test_io_text.py index 783cbe21..6bac855a 100644 --- a/tests/test_io_text.py +++ b/tests/test_io_text.py @@ -3,6 +3,7 @@ import awkward as ak import awkward.operations.str as akstr import fsspec +import pytest import dask_awkward as dak from dask_awkward.lib.testutils import assert_eq From 41887429590ff43e2de5d5f7978827e1b880430a Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Thu, 12 Oct 2023 13:15:27 -0500 Subject: [PATCH 3/6] more skips --- tests/test_optimize.py | 2 ++ tests/test_structure.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/test_optimize.py b/tests/test_optimize.py index 3aa57531..a68f6771 100644 --- a/tests/test_optimize.py +++ b/tests/test_optimize.py @@ -2,12 +2,14 @@ import awkward as ak import dask +import pytest import dask_awkward as dak from dask_awkward.lib.testutils import assert_eq def test_multiple_computes(pq_points_dir: str) -> None: + pytest.importorskip("pyarrow") ds1 = dak.from_parquet(pq_points_dir) # add a columns= argument to force a new tokenize result in # from_parquet so we get two unique collections. diff --git a/tests/test_structure.py b/tests/test_structure.py index afb359dc..23a190b4 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -332,6 +332,8 @@ def test_isclose(daa, caa): def test_singletons(daa, L4, tmp_path): + pytest.importorskip("pyarrow") + import warnings path = str(tmp_path) From 05e31672e5d82e916a5434844db0af7f449a22bc Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Thu, 12 Oct 2023 13:24:10 -0500 Subject: [PATCH 4/6] move test from pq to json --- tests/test_optimize.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_optimize.py b/tests/test_optimize.py index a68f6771..0247d9d1 100644 --- a/tests/test_optimize.py +++ b/tests/test_optimize.py @@ -8,12 +8,11 @@ from dask_awkward.lib.testutils import assert_eq -def test_multiple_computes(pq_points_dir: str) -> None: - pytest.importorskip("pyarrow") - ds1 = dak.from_parquet(pq_points_dir) - # add a columns= argument to force a new tokenize result in +def test_multiple_computes(ndjson_points_file: str) -> None: + ds1 = dak.from_json([ndjson_points_file] * 2) + # add a kwarg argument to force a new tokenize result in # from_parquet so we get two unique collections. - ds2 = dak.from_parquet(pq_points_dir, columns=["points"]) + ds2 = dak.from_json([ndjson_points_file] * 2, buffersize=65536 // 2) lists = [[[1, 2, 3], [4, 5]], [[], [0, 0, 0]]] ds3 = dak.from_lists(lists) From 1b7df84d182a1432744f044e4c3f9d156cf452bd Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Thu, 12 Oct 2023 13:25:33 -0500 Subject: [PATCH 5/6] import --- tests/test_optimize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_optimize.py b/tests/test_optimize.py index 0247d9d1..50adf18e 100644 --- a/tests/test_optimize.py +++ b/tests/test_optimize.py @@ -2,7 +2,6 @@ import awkward as ak import dask -import pytest import dask_awkward as dak from dask_awkward.lib.testutils import assert_eq From 17026ca5d7d1cefc930bee5a1fcb6a64f6b1e34a Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Thu, 12 Oct 2023 13:43:21 -0500 Subject: [PATCH 6/6] comment --- tests/test_optimize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_optimize.py b/tests/test_optimize.py index 50adf18e..c9cf8ec0 100644 --- a/tests/test_optimize.py +++ b/tests/test_optimize.py @@ -10,7 +10,7 @@ def test_multiple_computes(ndjson_points_file: str) -> None: ds1 = dak.from_json([ndjson_points_file] * 2) # add a kwarg argument to force a new tokenize result in - # from_parquet so we get two unique collections. + # from_json so we get two unique collections. ds2 = dak.from_json([ndjson_points_file] * 2, buffersize=65536 // 2) lists = [[[1, 2, 3], [4, 5]], [[], [0, 0, 0]]]