Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #3560 Update argsort_benchmark #3838

Merged
merged 2 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion benchmark.ini
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ testpaths =
benchmark_v2/no_op_benchmark.py
benchmark_v2/io_benchmark.py
benchmark_v2/sort_cases_benchmark.py
python_functions = bench_*
python_functions = bench_* check_correctness*
env =
D:ARKOUDA_SERVER_HOST=localhost
D:ARKOUDA_SERVER_PORT=5555
Expand All @@ -34,3 +34,6 @@ env =
D:ARKOUDA_VERBOSE=True
D:ARKOUDA_CLIENT_TIMEOUT=0
D:ARKOUDA_LOG_LEVEL=DEBUG
markers =
skip_correctness_only
skip_numpy
2 changes: 1 addition & 1 deletion benchmark_v2/aggregate_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def run_agg(g, vals, op):

return vals.size + vals.itemsize


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="GroupBy.aggregate")
@pytest.mark.parametrize("op", ak.GroupBy.Reductions)
def bench_aggs(benchmark, op):
Expand Down
54 changes: 52 additions & 2 deletions benchmark_v2/argsort_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import arkouda as ak
import numpy as np
import pytest

import arkouda as ak

TYPES = ("int64", "uint64", "float64", "str")


@pytest.mark.skip_correctness_only(True)
@pytest.mark.parametrize("dtype", TYPES)
def bench_argsort(benchmark, dtype):
"""
Expand Down Expand Up @@ -33,4 +37,50 @@ def bench_argsort(benchmark, dtype):
benchmark.extra_info["description"] = "Measures the performance of ak.argsort"
benchmark.extra_info["problem_size"] = pytest.prob_size
benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
(nbytes / benchmark.stats["mean"]) / 2 ** 30)
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_numpy(False)
@pytest.mark.skip_correctness_only(True)
@pytest.mark.parametrize("dtype", TYPES)
def bench_np_argsort(benchmark, dtype):
cfg = ak.get_config()
N = pytest.prob_size * cfg["numLocales"]
if dtype in pytest.dtype:
np.random.seed(pytest.seed)
if dtype == "int64":
a = np.random.randint(0, 2**32, N)
elif dtype == "uint64":
a = np.random.randint(0, 2**32, N, dtype=np.uint64)
elif dtype == "float64":
a = np.random.random(N)
elif dtype == "str":
a = np.cast["str"](np.random.randint(0, 2**32, N))

benchmark.pedantic(np.argsort, args=[a], rounds=pytest.trials)

benchmark.extra_info["description"] = "Measures the performance of np.argsort"
benchmark.extra_info["problem_size"] = pytest.prob_size
benchmark.extra_info["average_rate"] = "{:.4f} GiB/sec".format(
((a.size * a.itemsize) / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(False)
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("seed", [pytest.seed])
def check_correctness(dtype, seed):
N = 10**4
if dtype == "int64":
a = ak.randint(0, 2**32, N, seed=seed)
elif dtype == "uint64":
a = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
elif dtype == "float64":
a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)
elif dtype == "str":
a = ak.random_strings_uniform(1, 16, N, seed=seed)

perm = ak.argsort(a)
if dtype in ("int64", "uint64", "float64"):
assert ak.is_sorted(a[perm])
4 changes: 2 additions & 2 deletions benchmark_v2/array_create_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _create_np_array(size, op, dtype, seed):

return a


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="AK Array Create")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand All @@ -56,7 +56,7 @@ def bench_ak_array_create(benchmark, op, dtype):
benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
(nbytes / benchmark.stats["mean"]) / 2 ** 30)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="NP Array Create")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
3 changes: 2 additions & 1 deletion benchmark_v2/array_transfer_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

TYPES = ("int64", "float64", "bigint")

@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="ArrayTransfer_tondarray")
@pytest.mark.parametrize("dtype", TYPES)
def bench_array_transfer_tondarray(benchmark, dtype):
Expand All @@ -27,7 +28,7 @@ def bench_array_transfer_tondarray(benchmark, dtype):
(nb / benchmark.stats["mean"]) / 2 ** 30)
benchmark.extra_info["max_bit"] = pytest.max_bits # useful when looking at bigint


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="ArrayTransfer_ak.array")
@pytest.mark.parametrize("dtype", TYPES)
def bench_array_transfer_akarray(benchmark, dtype):
Expand Down
2 changes: 1 addition & 1 deletion benchmark_v2/bigint_bitwise_binops_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def _perform_or_binop(a, b):
def _perform_shift_binop(a):
return a >> 10


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Bigint Bitwise Binops")
@pytest.mark.parametrize("op", OPS)
def bench_ak_bitwise_binops(benchmark, op):
Expand Down
2 changes: 2 additions & 0 deletions benchmark_v2/bigint_conversion_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import arkouda as ak
import pytest

@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="BigInt_Conversion")
def bench_to_bigint(benchmark):
cfg = ak.get_config()
Expand All @@ -25,6 +26,7 @@ def bench_to_bigint(benchmark):
(tot_bytes / benchmark.stats["mean"]) / 2 ** 30)
benchmark.extra_info["max_bits"] = pytest.max_bits

@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="BigInt_Conversion")
def bench_from_bigint(benchmark):
cfg = ak.get_config()
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/coargsort_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
TYPES = ["int64", "uint64", "float64", "str"]
NUM_ARR = [1, 2, 8, 16]


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_CoArgSort")
@pytest.mark.parametrize("numArrays", NUM_ARR)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down Expand Up @@ -38,7 +38,7 @@ def bench_coargsort(benchmark, dtype, numArrays):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="NumPy_CoArgSort")
@pytest.mark.parametrize("numArrays", NUM_ARR)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
22 changes: 22 additions & 0 deletions benchmark_v2/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,12 @@ def pytest_addoption(parser):
default=os.path.join(os.getcwd(), "ak_io_benchmark"),
help="Benchmark only option. Target path for measuring read/write rates",
)
parser.addoption(
"--correctness_only",
default=False,
action="store_true",
help="Only check correctness, not performance.",
)


def pytest_configure(config):
Expand Down Expand Up @@ -157,6 +163,8 @@ def pytest_configure(config):
pytest.io_read = config.getoption("io_only_read")
pytest.io_write = config.getoption("io_only_write")

pytest.correctness_only = config.getoption("correctness_only")


@pytest.fixture(scope="module", autouse=True)
def startup_teardown():
Expand Down Expand Up @@ -208,3 +216,17 @@ def manage_connection():
ak.disconnect()
except Exception as e:
raise ConnectionError(e)


@pytest.fixture(autouse=True)
def skip_correctness_only(request):
if request.node.get_closest_marker("skip_correctness_only"):
if request.node.get_closest_marker("skip_correctness_only").args[0] == pytest.correctness_only:
pytest.skip("this test requires --correctness_only != {}".format(pytest.correctness_only))


@pytest.fixture(autouse=True)
def skip_numpy(request):
if request.node.get_closest_marker("skip_numpy"):
if request.node.get_closest_marker("skip_numpy").args[0] == pytest.numpy:
pytest.skip("this test requires --numpy != {}".format(pytest.numpy))
2 changes: 1 addition & 1 deletion benchmark_v2/dataframe_indexing_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def generate_dataframe():
)
return ak.DataFrame(df_dict)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Dataframe_Indexing")
@pytest.mark.parametrize("op", OPS)
def bench_ak_dataframe(benchmark, op):
Expand Down
3 changes: 2 additions & 1 deletion benchmark_v2/encoding_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
ENCODINGS = ("idna", "ascii")


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Strings_EncodeDecode")
@pytest.mark.parametrize("encoding", ENCODINGS)
def bench_encode(benchmark, encoding):
Expand All @@ -19,7 +20,7 @@ def bench_encode(benchmark, encoding):
benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
(nbytes / benchmark.stats["mean"]) / 2 ** 30)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Strings_EncodeDecode")
@pytest.mark.parametrize("encoding", ENCODINGS)
def bench_decode(benchmark, encoding):
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/gather_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def _run_gather(a, i):
"""
return a[i]


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="AK_Gather")
@pytest.mark.parametrize("dtype", TYPES)
def bench_ak_gather(benchmark, dtype):
Expand Down Expand Up @@ -56,7 +56,7 @@ def bench_ak_gather(benchmark, dtype):
benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
(bytes_per_sec / 2 ** 30))


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="NumPy_Gather")
@pytest.mark.parametrize("dtype", TYPES)
def bench_np_gather(benchmark, dtype):
Expand Down
2 changes: 1 addition & 1 deletion benchmark_v2/groupby_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def generate_arrays(dtype, numArrays):
arrays = arrays[0]
return arrays, totalbytes


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="GroupBy_Creation")
@pytest.mark.parametrize("numArrays", NUM_ARR)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
2 changes: 1 addition & 1 deletion benchmark_v2/in1d_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
SIZES = {"MEDIUM": THRESHOLD - 1, "LARGE": THRESHOLD + 1}
MAXSTRLEN = 5


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_in1d")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("size", SIZES)
Expand Down
16 changes: 8 additions & 8 deletions benchmark_v2/io_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _generate_df(N, dtype, returnDict=False):
}
return df_dict if returnDict else ak.DataFrame(df_dict)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Write_HDF5")
@pytest.mark.parametrize("dtype", TYPES)
def bench_ak_write_hdf(benchmark, dtype):
Expand All @@ -90,7 +90,7 @@ def bench_ak_write_hdf(benchmark, dtype):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Write_Parquet")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("comp", COMPRESSIONS)
Expand All @@ -117,7 +117,7 @@ def bench_ak_write_parquet(benchmark, dtype, comp):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Write_Parquet")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("comp", COMPRESSIONS)
Expand Down Expand Up @@ -147,7 +147,7 @@ def bench_ak_write_parquet_multi(benchmark, dtype, comp):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Write_Parquet")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("comp", COMPRESSIONS)
Expand Down Expand Up @@ -176,7 +176,7 @@ def bench_ak_write_parquet_append(benchmark, dtype, comp):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Read_HDF5")
@pytest.mark.parametrize("dtype", TYPES)
def bench_ak_read_hdf(benchmark, dtype):
Expand All @@ -198,7 +198,7 @@ def bench_ak_read_hdf(benchmark, dtype):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Read_Parquet")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("comp", COMPRESSIONS)
Expand All @@ -221,7 +221,7 @@ def bench_ak_read_parquet(benchmark, dtype, comp):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Read_Parquet")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("comp", COMPRESSIONS)
Expand All @@ -248,7 +248,7 @@ def bench_ak_read_parquet_multi_column(benchmark, dtype, comp):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Delete")
def bench_ak_delete(benchmark):
if pytest.io_delete or (not pytest.io_write and not pytest.io_read):
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/no_op_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

SECONDS = pytest.trials


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_No_Op",
max_time=SECONDS
)
Expand All @@ -17,7 +17,7 @@ def bench_ak_noop(benchmark):
benchmark.extra_info["transfer_rate"] = f"{benchmark.stats['rounds'] / benchmark.stats['total']:.4f} " \
f"operations per second"


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_No_Op",
max_time=SECONDS
)
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/reduce_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
OPS = ("sum", "prod", "min", "max")
TYPES = ("int64", "float64")


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_Reduce")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down Expand Up @@ -33,7 +33,7 @@ def bench_ak_reduce(benchmark, op, dtype):
(nbytes / benchmark.stats["mean"]) / 2 ** 30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Numpy_Reduce")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/scan_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
OPS = ("cumsum", "cumprod")
TYPES = ("int64", "float64")


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="AK_Scan")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down Expand Up @@ -35,7 +35,7 @@ def bench_ak_scan(benchmark, op, dtype):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Numpy_Scan")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
Loading
Loading