diff --git a/ibis/backends/bigquery/tests/conftest.py b/ibis/backends/bigquery/tests/conftest.py index 368b504d30fd..e9fc09d54dbe 100644 --- a/ibis/backends/bigquery/tests/conftest.py +++ b/ibis/backends/bigquery/tests/conftest.py @@ -1,8 +1,15 @@ import os +from pathlib import Path import pytest import ibis +import ibis.expr.types as ir +from ibis.backends.tests.base import ( + BackendTest, + RoundAwayFromZero, + UnorderedComparator, +) PROJECT_ID = os.environ.get('GOOGLE_BIGQUERY_PROJECT_ID', 'ibis-gbq') DATASET_ID = 'testing' @@ -49,6 +56,34 @@ def connect(project_id, dataset_id, application_name=None): pytest.skip(skip_message) +class BigQueryTest(UnorderedComparator, BackendTest, RoundAwayFromZero): + supports_divide_by_zero = True + supports_floating_modulus = False + returned_timestamp_unit = 'us' + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + project_id = os.environ.get('GOOGLE_BIGQUERY_PROJECT_ID') + if project_id is None: + pytest.skip( + 'Environment variable GOOGLE_BIGQUERY_PROJECT_ID ' + 'not defined' + ) + elif not project_id: + pytest.skip( + 'Environment variable GOOGLE_BIGQUERY_PROJECT_ID is empty' + ) + return connect(project_id, dataset_id='testing') + + @property + def batting(self) -> ir.TableExpr: + return None + + @property + def awards_players(self) -> ir.TableExpr: + return None + + @pytest.fixture(scope='session') def project_id(): return PROJECT_ID diff --git a/ibis/backends/clickhouse/tests/conftest.py b/ibis/backends/clickhouse/tests/conftest.py index 90cf860d5822..1524698637c0 100644 --- a/ibis/backends/clickhouse/tests/conftest.py +++ b/ibis/backends/clickhouse/tests/conftest.py @@ -1,8 +1,16 @@ import os +from pathlib import Path +from typing import Callable import pytest import ibis +import ibis.expr.types as ir +from ibis.backends.tests.base import ( + BackendTest, + RoundHalfToEven, + UnorderedComparator, +) CLICKHOUSE_HOST = os.environ.get('IBIS_TEST_CLICKHOUSE_HOST', 'localhost') CLICKHOUSE_PORT = int(os.environ.get('IBIS_TEST_CLICKHOUSE_PORT', 9000)) @@ -11,6 +19,56 @@ IBIS_TEST_CLICKHOUSE_DB = os.environ.get('IBIS_TEST_DATA_DB', 'ibis_testing') +class ClickhouseTest(UnorderedComparator, BackendTest, RoundHalfToEven): + check_dtype = False + supports_window_operations = False + returned_timestamp_unit = 's' + supported_to_timestamp_units = {'s'} + supports_floating_modulus = False + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + host = os.environ.get('IBIS_TEST_CLICKHOUSE_HOST', 'localhost') + port = int(os.environ.get('IBIS_TEST_CLICKHOUSE_PORT', 9000)) + user = os.environ.get('IBIS_TEST_CLICKHOUSE_USER', 'default') + password = os.environ.get('IBIS_TEST_CLICKHOUSE_PASSWORD', '') + database = os.environ.get( + 'IBIS_TEST_CLICKHOUSE_DATABASE', 'ibis_testing' + ) + return ibis.clickhouse.connect( + host=host, + port=port, + password=password, + database=database, + user=user, + ) + + @property + def functional_alltypes(self) -> ir.TableExpr: + t = super().functional_alltypes + return t.mutate(bool_col=t.bool_col == 1) + + @staticmethod + def greatest( + f: Callable[..., ir.ValueExpr], *args: ir.ValueExpr + ) -> ir.ValueExpr: + if len(args) > 2: + raise NotImplementedError( + 'Clickhouse does not support more than 2 arguments to greatest' + ) + return f(*args) + + @staticmethod + def least( + f: Callable[..., ir.ValueExpr], *args: ir.ValueExpr + ) -> ir.ValueExpr: + if len(args) > 2: + raise NotImplementedError( + 'Clickhouse does not support more than 2 arguments to least' + ) + return f(*args) + + @pytest.fixture(scope='module') def con(): return ibis.clickhouse.connect( diff --git a/ibis/backends/csv/tests/conftest.py b/ibis/backends/csv/tests/conftest.py index bc9cddd527a4..9d723c18344a 100644 --- a/ibis/backends/csv/tests/conftest.py +++ b/ibis/backends/csv/tests/conftest.py @@ -1,7 +1,65 @@ +from pathlib import Path + import pandas as pd import pytest +import ibis +import ibis.expr.datatypes as dt +import ibis.expr.types as ir from ibis.backends.csv import CSVClient +from ibis.backends.pandas.tests.conftest import PandasTest + + +class CsvTest(PandasTest): + check_names = False + supports_divide_by_zero = True + returned_timestamp_unit = 'ns' + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + filename = data_directory / 'functional_alltypes.csv' + if not filename.exists(): + pytest.skip('test data set {} not found'.format(filename)) + return ibis.csv.connect(data_directory) + + @property + def functional_alltypes(self) -> ir.TableExpr: + schema = ibis.schema( + [ + ('bool_col', dt.boolean), + ('string_col', dt.string), + ('timestamp_col', dt.timestamp), + ] + ) + return self.connection.table('functional_alltypes', schema=schema) + + @property + def batting(self) -> ir.TableExpr: + schema = ibis.schema( + [ + ('lgID', dt.string), + ('G', dt.float64), + ('AB', dt.float64), + ('R', dt.float64), + ('H', dt.float64), + ('X2B', dt.float64), + ('X3B', dt.float64), + ('HR', dt.float64), + ('RBI', dt.float64), + ('SB', dt.float64), + ('CS', dt.float64), + ('BB', dt.float64), + ('SO', dt.float64), + ] + ) + return self.connection.table('batting', schema=schema) + + @property + def awards_players(self) -> ir.TableExpr: + schema = ibis.schema( + [('lgID', dt.string), ('tie', dt.string), ('notes', dt.string)] + ) + return self.connection.table('awards_players', schema=schema) @pytest.fixture diff --git a/ibis/backends/dask/tests/conftest.py b/ibis/backends/dask/tests/conftest.py index e3179dba744a..443131ac96f5 100644 --- a/ibis/backends/dask/tests/conftest.py +++ b/ibis/backends/dask/tests/conftest.py @@ -2,9 +2,16 @@ import pandas as pd import pytest +from ibis.backends.pandas.tests.conftest import PandasTest + from .. import connect +class DaskTest(PandasTest): + # clone pandas directly until the rest of the dask backend is defined + pass + + @pytest.fixture def dataframe(): return dd.from_pandas( diff --git a/ibis/backends/hdf5/tests/conftest.py b/ibis/backends/hdf5/tests/conftest.py index c3f2718bb5be..008871b41239 100644 --- a/ibis/backends/hdf5/tests/conftest.py +++ b/ibis/backends/hdf5/tests/conftest.py @@ -1,6 +1,23 @@ +from pathlib import Path + import pytest +import ibis from ibis.backends.hdf5 import HDFClient +from ibis.backends.pandas.tests.conftest import PandasTest + + +class HDF5Test(PandasTest): + check_names = False + supports_divide_by_zero = True + returned_timestamp_unit = 'ns' + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + filename = data_directory / 'functional_alltypes.h5' + if not filename.exists(): + pytest.skip('test data set {} not found'.format(filename)) + return ibis.hdf5.connect(data_directory) @pytest.fixture diff --git a/ibis/backends/impala/tests/conftest.py b/ibis/backends/impala/tests/conftest.py index dfbad0d3a024..076c35c9319e 100644 --- a/ibis/backends/impala/tests/conftest.py +++ b/ibis/backends/impala/tests/conftest.py @@ -1,15 +1,61 @@ import inspect import os import warnings +from pathlib import Path import pytest import ibis +import ibis.expr.types as ir import ibis.util as util from ibis import options +from ibis.backends.tests.base import ( + BackendTest, + RoundAwayFromZero, + UnorderedComparator, +) from ibis.tests.expr.mocks import MockConnection +class ImpalaTest(UnorderedComparator, BackendTest, RoundAwayFromZero): + supports_arrays = True + supports_arrays_outside_of_select = False + check_dtype = False + supports_divide_by_zero = True + returned_timestamp_unit = 's' + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + from ibis.backends.impala.tests.conftest import IbisTestEnv + + env = IbisTestEnv() + hdfs_client = ibis.impala.hdfs_connect( + host=env.nn_host, + port=env.webhdfs_port, + auth_mechanism=env.auth_mechanism, + verify=env.auth_mechanism not in ['GSSAPI', 'LDAP'], + user=env.webhdfs_user, + ) + auth_mechanism = env.auth_mechanism + if auth_mechanism == 'GSSAPI' or auth_mechanism == 'LDAP': + print("Warning: ignoring invalid Certificate Authority errors") + return ibis.impala.connect( + host=env.impala_host, + port=env.impala_port, + auth_mechanism=env.auth_mechanism, + hdfs_client=hdfs_client, + database='ibis_testing', + ) + + @property + def batting(self) -> ir.TableExpr: + return None + + @property + def awards_players(self) -> ir.TableExpr: + return None + + def isproperty(obj): return isinstance(obj, property) diff --git a/ibis/tests/all/__init__.py b/ibis/backends/mysql/tests/__init__.py similarity index 100% rename from ibis/tests/all/__init__.py rename to ibis/backends/mysql/tests/__init__.py diff --git a/ibis/backends/mysql/tests/conftest.py b/ibis/backends/mysql/tests/conftest.py new file mode 100644 index 000000000000..c22854c1f804 --- /dev/null +++ b/ibis/backends/mysql/tests/conftest.py @@ -0,0 +1,60 @@ +import os +from pathlib import Path + +from pkg_resources import parse_version + +import ibis +from ibis.backends.tests.base import BackendTest, RoundHalfToEven + + +class MySQLTest(BackendTest, RoundHalfToEven): + # mysql has the same rounding behavior as postgres + check_dtype = False + supports_window_operations = False + returned_timestamp_unit = 's' + + def __init__(self, data_directory: Path) -> None: + super().__init__(data_directory) + # mariadb supports window operations after version 10.2 + # but the sqlalchemy version string looks like: + # 5.5.5.10.2.12.MariaDB.10.2.12+maria~jessie + # or 10.4.12.MariaDB.1:10.4.12+maria~bionic + # example of possible results: + # https://github.com/sqlalchemy/sqlalchemy/blob/rel_1_3/ + # test/dialect/mysql/test_dialect.py#L244-L268 + con = self.connection + if 'MariaDB' in str(con.version): + # we might move this parsing step to the mysql client + version_detail = con.con.dialect._parse_server_version( + str(con.version) + ) + version = ( + version_detail[:3] + if version_detail[3] == 'MariaDB' + else version_detail[3:6] + ) + self.__class__.supports_window_operations = version >= (10, 2) + elif con.version >= parse_version('8.0'): + # mysql supports window operations after version 8 + self.__class__.supports_window_operations = True + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + user = os.environ.get('IBIS_TEST_MYSQL_USER', 'ibis') + password = os.environ.get('IBIS_TEST_MYSQL_PASSWORD', 'ibis') + host = os.environ.get('IBIS_TEST_MYSQL_HOST', 'localhost') + port = os.environ.get('IBIS_TEST_MYSQL_PORT', 3306) + database = os.environ.get('IBIS_TEST_MYSQL_DATABASE', 'ibis_testing') + return ibis.mysql.connect( + host=host, + port=port, + user=user, + password=password, + database=database, + ) + + @property + def functional_alltypes(self): + # BOOLEAN <-> TINYINT(1) + t = super().functional_alltypes + return t.mutate(bool_col=t.bool_col == 1) diff --git a/ibis/backends/omniscidb/tests/conftest.py b/ibis/backends/omniscidb/tests/conftest.py index 88d9eb18b68e..61f70310b19b 100644 --- a/ibis/backends/omniscidb/tests/conftest.py +++ b/ibis/backends/omniscidb/tests/conftest.py @@ -1,12 +1,17 @@ """OmniSciDB test configuration module.""" import os import typing +from pathlib import Path +from typing import Optional import pandas import pytest import ibis +import ibis.expr.operations as ops +import ibis.expr.types as ir import ibis.util as util +from ibis.backends.tests.base import BackendTest, RoundAwayFromZero OMNISCIDB_HOST = os.environ.get('IBIS_TEST_OMNISCIDB_HOST', 'localhost') OMNISCIDB_PORT = int(os.environ.get('IBIS_TEST_OMNISCIDB_PORT', 6274)) @@ -18,6 +23,52 @@ OMNISCIDB_DB = os.environ.get('IBIS_TEST_DATA_DB', 'ibis_testing') +class OmniSciDBTest(BackendTest, RoundAwayFromZero): + check_dtype = False + check_names = False + supports_window_operations = True + supports_divide_by_zero = False + supports_floating_modulus = False + returned_timestamp_unit = 's' + # Exception: Non-empty LogicalValues not supported yet + additional_skipped_operations = frozenset( + { + ops.Abs, + ops.Ceil, + ops.Floor, + ops.Exp, + ops.Sign, + ops.Sqrt, + ops.Ln, + ops.Log10, + ops.Modulus, + } + ) + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + user = os.environ.get('IBIS_TEST_OMNISCIDB_USER', 'admin') + password = os.environ.get( + 'IBIS_TEST_OMNISCIDB_PASSWORD', 'HyperInteractive' + ) + host = os.environ.get('IBIS_TEST_OMNISCIDB_HOST', 'localhost') + port = os.environ.get('IBIS_TEST_OMNISCIDB_PORT', '6274') + database = os.environ.get( + 'IBIS_TEST_OMNISCIDB_DATABASE', 'ibis_testing' + ) + return ibis.omniscidb.connect( + host=host, + port=port, + user=user, + password=password, + database=database, + ) + + @property + def geo(self) -> Optional[ir.TableExpr]: + return self.db.geo + + @pytest.fixture(scope='module') def con(): """Define a connection fixture. diff --git a/ibis/backends/pandas/tests/conftest.py b/ibis/backends/pandas/tests/conftest.py new file mode 100644 index 000000000000..29f9f2db60a3 --- /dev/null +++ b/ibis/backends/pandas/tests/conftest.py @@ -0,0 +1,35 @@ +from pathlib import Path + +import pandas as pd + +import ibis +import ibis.expr.operations as ops +from ibis.backends.tests.base import BackendTest, RoundHalfToEven + + +class PandasTest(BackendTest, RoundHalfToEven): + check_names = False + additional_skipped_operations = frozenset({ops.StringSQLLike}) + supported_to_timestamp_units = BackendTest.supported_to_timestamp_units | { + 'ns' + } + supports_divide_by_zero = True + returned_timestamp_unit = 'ns' + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + return ibis.pandas.connect( + { + 'functional_alltypes': pd.read_csv( + str(data_directory / 'functional_alltypes.csv'), + index_col=None, + dtype={'bool_col': bool, 'string_col': str}, + parse_dates=['timestamp_col'], + encoding='utf-8', + ), + 'batting': pd.read_csv(str(data_directory / 'batting.csv')), + 'awards_players': pd.read_csv( + str(data_directory / 'awards_players.csv') + ), + } + ) diff --git a/ibis/backends/parquet/tests/conftest.py b/ibis/backends/parquet/tests/conftest.py index 12e63acb7bae..6adabc31160a 100644 --- a/ibis/backends/parquet/tests/conftest.py +++ b/ibis/backends/parquet/tests/conftest.py @@ -1,10 +1,27 @@ +from pathlib import Path + import pyarrow as pa import pyarrow.parquet as pq import pytest +import ibis +from ibis.backends.pandas.tests.conftest import PandasTest from ibis.backends.parquet import ParquetClient +class ParquetTest(PandasTest): + check_names = False + supports_divide_by_zero = True + returned_timestamp_unit = 'ns' + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + filename = data_directory / 'functional_alltypes.parquet' + if not filename.exists(): + pytest.skip('test data set {} not found'.format(filename)) + return ibis.parquet.connect(data_directory) + + @pytest.fixture def parquet(tmpdir, file_backends_data): # create single files diff --git a/ibis/backends/postgres/tests/conftest.py b/ibis/backends/postgres/tests/conftest.py index 290bb34c56a6..286bb2716470 100644 --- a/ibis/backends/postgres/tests/conftest.py +++ b/ibis/backends/postgres/tests/conftest.py @@ -14,10 +14,14 @@ import os +from pathlib import Path +from typing import Optional import pytest import ibis +import ibis.expr.types as ir +from ibis.backends.tests.base import BackendTest, RoundHalfToEven PG_USER = os.environ.get( 'IBIS_TEST_POSTGRES_USER', os.environ.get('PGUSER', 'postgres') @@ -36,6 +40,48 @@ ) +class PostgresTest(BackendTest, RoundHalfToEven): + # postgres rounds half to even for double precision and half away from zero + # for numeric and decimal + + returned_timestamp_unit = 's' + + @property + def name(self) -> str: + return 'postgres' + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + user = os.environ.get( + 'IBIS_TEST_POSTGRES_USER', os.environ.get('PGUSER', 'postgres') + ) + password = os.environ.get( + 'IBIS_TEST_POSTGRES_PASSWORD', os.environ.get('PGPASS', 'postgres') + ) + host = os.environ.get( + 'IBIS_TEST_POSTGRES_HOST', os.environ.get('PGHOST', 'localhost') + ) + port = os.environ.get( + 'IBIS_TEST_POSTGRES_PORT', os.environ.get('PGPORT', '5432') + ) + database = os.environ.get( + 'IBIS_TEST_POSTGRES_DATABASE', + os.environ.get('PGDATABASE', 'ibis_testing'), + ) + return ibis.postgres.connect( + host=host, + port=port, + user=user, + password=password, + database=database, + ) + + @property + def geo(self) -> Optional[ir.TableExpr]: + if 'geo' in self.db.list_tables(): + return self.db.geo + + def _random_identifier(suffix): return '__ibis_test_{}_{}'.format(suffix, ibis.util.guid()) diff --git a/ibis/backends/pyspark/tests/conftest.py b/ibis/backends/pyspark/tests/conftest.py index baab8dcb83fa..5d23a6d34fc6 100644 --- a/ibis/backends/pyspark/tests/conftest.py +++ b/ibis/backends/pyspark/tests/conftest.py @@ -4,6 +4,32 @@ import pytest import ibis +import ibis.expr.types as ir +from ibis.backends.tests.base import ( + BackendTest, + RoundAwayFromZero, + get_pyspark_testing_client, +) + + +class PySparkTest(BackendTest, RoundAwayFromZero): + supported_to_timestamp_units = {'s'} + + @staticmethod + def connect(data_directory): + return get_pyspark_testing_client(data_directory) + + @property + def functional_alltypes(self) -> ir.TableExpr: + return self.connection.table('functional_alltypes') + + @property + def batting(self) -> ir.TableExpr: + return self.connection.table('batting') + + @property + def awards_players(self) -> ir.TableExpr: + return self.connection.table('awards_players') @pytest.fixture(scope='session') diff --git a/ibis/backends/spark/tests/conftest.py b/ibis/backends/spark/tests/conftest.py index 9c7eee3fcaaf..230242440855 100644 --- a/ibis/backends/spark/tests/conftest.py +++ b/ibis/backends/spark/tests/conftest.py @@ -2,8 +2,31 @@ import pytest +import ibis.expr.types as ir import ibis.util as util -from ibis.tests.all.conftest import get_spark_testing_client +from ibis.backends.tests.base import ( + BackendTest, + RoundHalfToEven, + get_spark_testing_client, +) + + +class SparkTest(BackendTest, RoundHalfToEven): + @staticmethod + def connect(data_directory): + return get_spark_testing_client(data_directory) + + @property + def functional_alltypes(self) -> ir.TableExpr: + return self.connection.table('functional_alltypes') + + @property + def batting(self) -> ir.TableExpr: + return self.connection.table('batting') + + @property + def awards_players(self) -> ir.TableExpr: + return self.connection.table('awards_players') @pytest.fixture(scope='session', autouse=True) diff --git a/ibis/backends/spark/tests/test_udf.py b/ibis/backends/spark/tests/test_udf.py index d011e45d8066..229c52ab5a8a 100644 --- a/ibis/backends/spark/tests/test_udf.py +++ b/ibis/backends/spark/tests/test_udf.py @@ -7,7 +7,7 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.types as ir -from ibis.tests.backends import Spark +from ibis.backends.spark.tests.conftest import SparkTest pytestmark = [pytest.mark.spark, pytest.mark.udf] @@ -162,7 +162,7 @@ def test_udf(t, df, fn): result = expr.execute() expected = df.a.str.len().mul(2) - expected = Spark.default_series_rename(expected) + expected = SparkTest.default_series_rename(expected) tm.assert_series_equal(result, expected) @@ -197,7 +197,7 @@ def test_multiple_argument_udf(con, t, df, fn): result = expr.execute() expected = df.b + df.c - expected = Spark.default_series_rename(expected) + expected = SparkTest.default_series_rename(expected) tm.assert_series_equal(result, expected) @@ -226,7 +226,7 @@ def test_udaf(con, t, df): result = expr.execute() expected = t.a.execute().str.len().mul(2).sum() - expected = Spark.default_series_rename(expected) + expected = SparkTest.default_series_rename(expected) assert result == expected @@ -288,7 +288,7 @@ def test_compose_udfs(t_random, df_random, times_two_fn, add_one_fn): expr = times_two_fn(add_one_fn(t_random.a)) result = expr.execute() expected = df_random.a.add(1.0).mul(2.0) - expected = Spark.default_series_rename(expected) + expected = SparkTest.default_series_rename(expected) tm.assert_series_equal(expected, result) @@ -368,5 +368,5 @@ def test_array_return_type_reduction_window(con, t_random, df_random, qs): result = expr.execute() expected_raw = df_random.b.quantile(qs).tolist() expected = pd.Series([expected_raw] * len(df_random)) - expected = Spark.default_series_rename(expected) + expected = SparkTest.default_series_rename(expected) tm.assert_series_equal(result, expected) diff --git a/ibis/backends/sqlite/tests/conftest.py b/ibis/backends/sqlite/tests/conftest.py index fd6e5d39c64c..6d0417965faf 100644 --- a/ibis/backends/sqlite/tests/conftest.py +++ b/ibis/backends/sqlite/tests/conftest.py @@ -1,8 +1,35 @@ import os +from pathlib import Path import pytest import ibis +import ibis.expr.types as ir +from ibis.backends.tests.base import BackendTest, RoundAwayFromZero + + +class SQLiteTest(BackendTest, RoundAwayFromZero): + supports_arrays = False + supports_arrays_outside_of_select = supports_arrays + supports_window_operations = True + check_dtype = False + returned_timestamp_unit = 's' + + @staticmethod + def connect(data_directory: Path) -> ibis.client.Client: + path = Path( + os.environ.get( + 'IBIS_TEST_SQLITE_DATABASE', data_directory / 'ibis_testing.db' + ) + ) + if not path.exists(): + pytest.skip('SQLite testing db {} does not exist'.format(path)) + return ibis.sqlite.connect(str(path)) + + @property + def functional_alltypes(self) -> ir.TableExpr: + t = self.db.functional_alltypes + return t.mutate(timestamp_col=t.timestamp_col.cast('timestamp')) @pytest.fixture(scope='module') diff --git a/ibis/backends/tests/__init__.py b/ibis/backends/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ibis/backends/tests/base.py b/ibis/backends/tests/base.py new file mode 100644 index 000000000000..7aba2bf10725 --- /dev/null +++ b/ibis/backends/tests/base.py @@ -0,0 +1,327 @@ +import abc +from pathlib import Path +from typing import Any, Callable, Mapping, Optional + +import numpy as np +import pandas as pd +import pandas.testing as tm +import pytest + +import ibis +import ibis.backends.base_sqlalchemy.compiler as comp +import ibis.expr.types as ir + + +# TODO: Merge into BackendTest, #2564 +class RoundingConvention: + @staticmethod + @abc.abstractmethod + def round(series: pd.Series, decimals: int = 0) -> pd.Series: + """Round a series to `decimals` number of decimal values.""" + + +# TODO: Merge into BackendTest, #2564 +class RoundAwayFromZero(RoundingConvention): + @staticmethod + def round(series: pd.Series, decimals: int = 0) -> pd.Series: + if not decimals: + return ( + -(np.sign(series)) * np.ceil(-(series.abs()) - 0.5) + ).astype(np.int64) + return series.round(decimals=decimals) + + +# TODO: Merge into BackendTest, #2564 +class RoundHalfToEven(RoundingConvention): + @staticmethod + def round(series: pd.Series, decimals: int = 0) -> pd.Series: + result = series.round(decimals=decimals) + return result if decimals else result.astype(np.int64) + + +# TODO: Merge into BackendTest, #2564 +class UnorderedComparator: + @classmethod + def assert_series_equal( + cls, left: pd.Series, right: pd.Series, *args: Any, **kwargs: Any + ) -> None: + left = left.sort_values().reset_index(drop=True) + right = right.sort_values().reset_index(drop=True) + return super().assert_series_equal(left, right, *args, **kwargs) + + @classmethod + def assert_frame_equal( + cls, left: pd.DataFrame, right: pd.DataFrame, *args: Any, **kwargs: Any + ) -> None: + columns = list(set(left.columns) & set(right.columns)) + left = left.sort_values(by=columns) + right = right.sort_values(by=columns) + return super().assert_frame_equal(left, right, *args, **kwargs) + + +class BackendTest(abc.ABC): + check_dtype = True + check_names = True + supports_arrays = True + supports_arrays_outside_of_select = supports_arrays + supports_window_operations = True + additional_skipped_operations = frozenset() + supports_divide_by_zero = False + returned_timestamp_unit = 'us' + supported_to_timestamp_units = {'s', 'ms', 'us'} + supports_floating_modulus = True + + def __init__(self, data_directory: Path) -> None: + self.api # skips if we can't access the backend + self.connection = self.connect(data_directory) + + @classmethod + def name(cls) -> str: + return cls.__name__.lower()[: -len('Test')] + + def __str__(self) -> str: + return self.__class__.__name__ + + @staticmethod + @abc.abstractmethod + def connect(data_directory: Path) -> ibis.client.Client: + """Return a connection with data loaded from `data_directory`.""" + + @classmethod + def assert_series_equal( + cls, left: pd.Series, right: pd.Series, *args: Any, **kwargs: Any + ) -> None: + kwargs.setdefault('check_dtype', cls.check_dtype) + kwargs.setdefault('check_names', cls.check_names) + tm.assert_series_equal(left, right, *args, **kwargs) + + @classmethod + def assert_frame_equal( + cls, left: pd.DataFrame, right: pd.DataFrame, *args: Any, **kwargs: Any + ) -> None: + left = left.reset_index(drop=True) + right = right.reset_index(drop=True) + tm.assert_frame_equal(left, right, *args, **kwargs) + + @staticmethod + def default_series_rename( + series: pd.Series, name: str = 'tmp' + ) -> pd.Series: + return series.rename(name) + + @staticmethod + def greatest( + f: Callable[..., ir.ValueExpr], *args: ir.ValueExpr + ) -> ir.ValueExpr: + return f(*args) + + @staticmethod + def least( + f: Callable[..., ir.ValueExpr], *args: ir.ValueExpr + ) -> ir.ValueExpr: + return f(*args) + + @property + def db(self) -> ibis.client.Database: + return self.connection.database() + + @property + def functional_alltypes(self) -> ir.TableExpr: + return self.db.functional_alltypes + + @property + def batting(self) -> ir.TableExpr: + return self.db.batting + + @property + def awards_players(self) -> ir.TableExpr: + return self.db.awards_players + + @property + def geo(self) -> Optional[ir.TableExpr]: + return None + + @property + def api(self): + return getattr(ibis, self.name) + + def make_context( + self, params: Optional[Mapping[ir.ValueExpr, Any]] = None + ) -> comp.QueryContext: + return self.api.dialect.make_context(params=params) + + +# TODO move to the spark/pyspark backends, #2565 +_spark_testing_client = None +_pyspark_testing_client = None + + +# TODO move to the sparn/pyspark backends, #2565 +def get_spark_testing_client(data_directory): + global _spark_testing_client + if _spark_testing_client is None: + _spark_testing_client = get_common_spark_testing_client( + data_directory, lambda session: ibis.spark.connect(session) + ) + return _spark_testing_client + + +# TODO move to the spark/pyspark backends, #2565 +def get_pyspark_testing_client(data_directory): + global _pyspark_testing_client + if _pyspark_testing_client is None: + _pyspark_testing_client = get_common_spark_testing_client( + data_directory, lambda session: ibis.pyspark.connect(session) + ) + return _pyspark_testing_client + + +# TODO move to the spark/pyspark backends, #2565 +def get_common_spark_testing_client(data_directory, connect): + pytest.importorskip('pyspark') + import pyspark.sql.types as pt + from pyspark.sql import SparkSession + + spark = SparkSession.builder.getOrCreate() + _spark_testing_client = connect(spark) + s = _spark_testing_client._session + + df_functional_alltypes = s.read.csv( + path=str(data_directory / 'functional_alltypes.csv'), + schema=pt.StructType( + [ + pt.StructField('index', pt.IntegerType(), True), + pt.StructField('Unnamed: 0', pt.IntegerType(), True), + pt.StructField('id', pt.IntegerType(), True), + # cast below, Spark can't read 0/1 as bool + pt.StructField('bool_col', pt.ByteType(), True), + pt.StructField('tinyint_col', pt.ByteType(), True), + pt.StructField('smallint_col', pt.ShortType(), True), + pt.StructField('int_col', pt.IntegerType(), True), + pt.StructField('bigint_col', pt.LongType(), True), + pt.StructField('float_col', pt.FloatType(), True), + pt.StructField('double_col', pt.DoubleType(), True), + pt.StructField('date_string_col', pt.StringType(), True), + pt.StructField('string_col', pt.StringType(), True), + pt.StructField('timestamp_col', pt.TimestampType(), True), + pt.StructField('year', pt.IntegerType(), True), + pt.StructField('month', pt.IntegerType(), True), + ] + ), + mode='FAILFAST', + header=True, + ) + df_functional_alltypes = df_functional_alltypes.withColumn( + "bool_col", df_functional_alltypes["bool_col"].cast("boolean") + ) + df_functional_alltypes.createOrReplaceTempView('functional_alltypes') + + df_batting = s.read.csv( + path=str(data_directory / 'batting.csv'), + schema=pt.StructType( + [ + pt.StructField('playerID', pt.StringType(), True), + pt.StructField('yearID', pt.IntegerType(), True), + pt.StructField('stint', pt.IntegerType(), True), + pt.StructField('teamID', pt.StringType(), True), + pt.StructField('lgID', pt.StringType(), True), + pt.StructField('G', pt.IntegerType(), True), + pt.StructField('AB', pt.DoubleType(), True), + pt.StructField('R', pt.DoubleType(), True), + pt.StructField('H', pt.DoubleType(), True), + pt.StructField('X2B', pt.DoubleType(), True), + pt.StructField('X3B', pt.DoubleType(), True), + pt.StructField('HR', pt.DoubleType(), True), + pt.StructField('RBI', pt.DoubleType(), True), + pt.StructField('SB', pt.DoubleType(), True), + pt.StructField('CS', pt.DoubleType(), True), + pt.StructField('BB', pt.DoubleType(), True), + pt.StructField('SO', pt.DoubleType(), True), + pt.StructField('IBB', pt.DoubleType(), True), + pt.StructField('HBP', pt.DoubleType(), True), + pt.StructField('SH', pt.DoubleType(), True), + pt.StructField('SF', pt.DoubleType(), True), + pt.StructField('GIDP', pt.DoubleType(), True), + ] + ), + header=True, + ) + df_batting.createOrReplaceTempView('batting') + + df_awards_players = s.read.csv( + path=str(data_directory / 'awards_players.csv'), + schema=pt.StructType( + [ + pt.StructField('playerID', pt.StringType(), True), + pt.StructField('awardID', pt.StringType(), True), + pt.StructField('yearID', pt.IntegerType(), True), + pt.StructField('lgID', pt.StringType(), True), + pt.StructField('tie', pt.StringType(), True), + pt.StructField('notes', pt.StringType(), True), + ] + ), + header=True, + ) + df_awards_players.createOrReplaceTempView('awards_players') + + df_simple = s.createDataFrame([(1, 'a')], ['foo', 'bar']) + df_simple.createOrReplaceTempView('simple') + + df_struct = s.createDataFrame([((1, 2, 'a'),)], ['struct_col']) + df_struct.createOrReplaceTempView('struct') + + df_nested_types = s.createDataFrame( + [([1, 2], [[3, 4], [5, 6]], {'a': [[2, 4], [3, 5]]})], + [ + 'list_of_ints', + 'list_of_list_of_ints', + 'map_string_list_of_list_of_ints', + ], + ) + df_nested_types.createOrReplaceTempView('nested_types') + + df_complicated = s.createDataFrame( + [({(1, 3): [[2, 4], [3, 5]]},)], ['map_tuple_list_of_list_of_ints'] + ) + df_complicated.createOrReplaceTempView('complicated') + + df_udf = s.createDataFrame( + [('a', 1, 4.0, 'a'), ('b', 2, 5.0, 'a'), ('c', 3, 6.0, 'b')], + ['a', 'b', 'c', 'key'], + ) + df_udf.createOrReplaceTempView('udf') + + df_udf_nan = s.createDataFrame( + pd.DataFrame( + { + 'a': np.arange(10, dtype=float), + 'b': [3.0, np.NaN] * 5, + 'key': list('ddeefffggh'), + } + ) + ) + df_udf_nan.createOrReplaceTempView('udf_nan') + + df_udf_null = s.createDataFrame( + [ + (float(i), None if i % 2 else 3.0, 'ddeefffggh'[i]) + for i in range(10) + ], + ['a', 'b', 'key'], + ) + df_udf_null.createOrReplaceTempView('udf_null') + + df_udf_random = s.createDataFrame( + pd.DataFrame( + { + 'a': np.arange(4, dtype=float).tolist() + + np.random.rand(3).tolist(), + 'b': np.arange(4, dtype=float).tolist() + + np.random.rand(3).tolist(), + 'key': list('ddeefff'), + } + ) + ) + df_udf_random.createOrReplaceTempView('udf_random') + + return _spark_testing_client diff --git a/ibis/backends/tests/conftest.py b/ibis/backends/tests/conftest.py new file mode 100644 index 000000000000..906af8a2212c --- /dev/null +++ b/ibis/backends/tests/conftest.py @@ -0,0 +1,308 @@ +import importlib +import inspect +import operator +import os +from pathlib import Path + +import pytest + +import ibis +import ibis.common.exceptions as com +import ibis.util as util + +from .base import BackendTest + + +def _random_identifier(suffix): + return '__ibis_test_{}_{}'.format(suffix, util.guid()) + + +def backend_test_classes(): + """Return all backend tests classes defined in thebackends directories.""" + for backend_dir in Path(__file__).parent.parent.iterdir(): + backend = backend_dir.name + try: + conftest = importlib.import_module( + f'ibis.backends.{backend}.tests.conftest' + ) + except ImportError: + pass + else: + for obj_name in dir(conftest): + obj = getattr(conftest, obj_name) + if ( + inspect.isclass(obj) + and issubclass(obj, BackendTest) + and obj.name() == backend + ): + yield obj + + +ALL_BACKENDS = sorted( + backend_test_classes(), key=operator.attrgetter("__name__") +) + + +def pytest_runtest_call(item): + """Dynamically add various custom markers.""" + nodeid = item.nodeid + for marker in list(item.iter_markers(name="only_on_backends")): + (backend_types,) = map(tuple, marker.args) + backend = item.funcargs["backend"] + assert isinstance( + backend, BackendTest + ), "backend has type {!r}".format(type(backend).__name__) + if not isinstance(backend, backend_types): + pytest.skip( + f"only_on_backends: {backend} is not in {backend_types} " + f"{nodeid}" + ) + + for marker in list(item.iter_markers(name="skip_backends")): + (backend_types,) = map(tuple, marker.args) + backend = item.funcargs["backend"] + assert isinstance( + backend, BackendTest + ), "backend has type {!r}".format(type(backend).__name__) + if isinstance(backend, backend_types): + pytest.skip(f"skip_backends: {backend} {nodeid}") + + for marker in list(item.iter_markers(name="skip_missing_feature")): + backend = item.funcargs["backend"] + (features,) = marker.args + missing_features = [ + feature for feature in features if not getattr(backend, feature) + ] + if missing_features: + pytest.mark.skip( + 'Backend {} is missing features {} needed to run {}'.format( + type(backend).__name__, ', '.join(missing_features), nodeid + ) + ) + + for marker in list(item.iter_markers(name="xfail_backends")): + (backend_types,) = map(tuple, marker.args) + backend = item.funcargs["backend"] + assert isinstance( + backend, BackendTest + ), "backend has type {!r}".format(type(backend).__name__) + item.add_marker( + pytest.mark.xfail( + condition=isinstance(backend, backend_types), + reason='Backend {} does not pass this test'.format( + type(backend).__name__ + ), + **marker.kwargs, + ) + ) + + for marker in list(item.iter_markers(name="xpass_backends")): + (backend_types,) = map(tuple, marker.args) + backend = item.funcargs["backend"] + assert isinstance( + backend, BackendTest + ), "backend has type {!r}".format(type(backend).__name__) + item.add_marker( + pytest.mark.xfail( + condition=not isinstance(backend, backend_types), + reason='{} does not pass this test'.format( + type(backend).__name__ + ), + **marker.kwargs, + ) + ) + + +@pytest.hookimpl(hookwrapper=True) +def pytest_pyfunc_call(pyfuncitem): + """Dynamically add an xfail marker for specific backends.""" + outcome = yield + try: + outcome.get_result() + except ( + com.OperationNotDefinedError, + com.UnsupportedOperationError, + com.UnsupportedBackendType, + NotImplementedError, + ) as e: + markers = list(pyfuncitem.iter_markers(name="xfail_unsupported")) + assert ( + len(markers) == 1 + ), "More than one xfail_unsupported marker found on test {}".format( + pyfuncitem + ) + (marker,) = markers + backend = pyfuncitem.funcargs["backend"] + assert isinstance( + backend, BackendTest + ), "backend has type {!r}".format(type(backend).__name__) + pytest.xfail(reason='{}: {}'.format(type(backend).__name__, e)) + + +pytestmark = pytest.mark.backend + +pytest_backends = os.environ.get('PYTEST_BACKENDS', '').split(' ') +params_backend = [ + pytest.param(backend, marks=getattr(pytest.mark, backend.name())) + for backend in ALL_BACKENDS + if backend.name() in pytest_backends or not pytest_backends +] +if len(pytest_backends) != len(params_backend): + unknown_backends = set(pytest_backends) - {b.name() for b in ALL_BACKENDS} + raise ValueError( + 'PYTEST_BACKENDS environment variable contains unknown ' + f'backends {unknown_backends} {[b.name() for b in ALL_BACKENDS]}' + ) + + +@pytest.fixture(params=params_backend, scope='session') +def backend(request, data_directory): + return request.param(data_directory) + + +@pytest.fixture(scope='session') +def con(backend): + return backend.connection + + +@pytest.fixture(scope='session') +def alltypes(backend): + return backend.functional_alltypes + + +@pytest.fixture(scope='session') +def sorted_alltypes(alltypes): + return alltypes.sort_by('id') + + +@pytest.fixture(scope='session') +def batting(backend): + return backend.batting + + +@pytest.fixture(scope='session') +def awards_players(backend): + return backend.awards_players + + +@pytest.fixture(scope='session') +def geo(backend): + if backend.geo is None: + pytest.skip( + 'Geo Spatial type not supported for {} backend.'.format( + backend.name + ) + ) + return backend.geo + + +@pytest.fixture +def analytic_alltypes(alltypes): + return alltypes + + +@pytest.fixture(scope='session') +def df(alltypes): + return alltypes.execute() + + +@pytest.fixture(scope='session') +def sorted_df(df): + return df.sort_values('id').reset_index(drop=True) + + +@pytest.fixture(scope='session') +def batting_df(batting): + return batting.execute(limit=None) + + +@pytest.fixture(scope='session') +def awards_players_df(awards_players): + return awards_players.execute(limit=None) + + +@pytest.fixture(scope='session') +def geo_df(geo): + # Currently geo is implemented just for OmniSciDB + if geo is not None: + return geo.execute(limit=None) + return None + + +@pytest.fixture +def temp_table(con: ibis.client.Client) -> str: + """ + Return a temporary table name. + + Parameters + ---------- + con : ibis.client.Client + + Yields + ------ + name : string + Random table name for a temporary usage. + """ + name = _random_identifier('table') + try: + yield name + finally: + if hasattr(con, 'drop_table'): + con.drop_table(name, force=True) + + +@pytest.fixture +def temp_view(con) -> str: + """Return a temporary view name. + + Parameters + ---------- + con : ibis.omniscidb.OmniSciDBClient + + Yields + ------ + name : string + Random view name for a temporary usage. + """ + name = _random_identifier('view') + try: + yield name + finally: + if hasattr(con, 'drop_view'): + con.drop_view(name, force=True) + + +@pytest.fixture(scope='session') +def current_data_db(con, backend) -> str: + """Return current database name.""" + if not hasattr(con, 'current_database'): + pytest.skip( + f'{backend.name} backend doesn\'t have current_database method.' + ) + return con.current_database + + +@pytest.fixture +def alternate_current_database(con, backend, current_data_db: str) -> str: + """Create a temporary database and yield its name. + Drops the created database upon completion. + + Parameters + ---------- + con : ibis.client.Client + current_data_db : str + Yields + ------- + str + """ + name = _random_identifier('database') + if not hasattr(con, 'create_database'): + pytest.skip( + f'{backend.name} backend doesn\'t have create_database method.' + ) + con.create_database(name) + try: + yield name + finally: + con.set_database(current_data_db) + con.drop_database(name, force=True) diff --git a/ibis/tests/all/test_aggregation.py b/ibis/backends/tests/test_aggregation.py similarity index 92% rename from ibis/tests/all/test_aggregation.py rename to ibis/backends/tests/test_aggregation.py index f779a1fe2dac..706193b532dc 100644 --- a/ibis/tests/all/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -4,14 +4,12 @@ from pytest import param import ibis.expr.datatypes as dt -from ibis.tests.backends import ( - BigQuery, - Clickhouse, - MySQL, - Postgres, - PySpark, - SQLite, -) +from ibis.backends.bigquery.tests.conftest import BigQueryTest +from ibis.backends.clickhouse.tests.conftest import ClickhouseTest +from ibis.backends.mysql.tests.conftest import MySQLTest +from ibis.backends.postgres.tests.conftest import PostgresTest +from ibis.backends.pyspark.tests.conftest import PySparkTest +from ibis.backends.sqlite.tests.conftest import SQLiteTest from ibis.udf.vectorized import reduction @@ -174,7 +172,7 @@ def test_aggregate_grouped( lambda t, where: t.double_col.approx_median(), lambda t, where: t.double_col.median(), id='approx_median', - marks=pytest.mark.xpass_backends([Clickhouse]), + marks=pytest.mark.xpass_backends([ClickhouseTest]), ), param( lambda t, where: t.double_col.std(how='sample'), @@ -210,7 +208,7 @@ def test_aggregate_grouped( lambda t, where: t.string_col.approx_nunique(), lambda t, where: t.string_col.nunique(), id='approx_nunique', - marks=pytest.mark.xfail_backends([MySQL, SQLite]), + marks=pytest.mark.xfail_backends([MySQLTest, SQLiteTest]), ), param( lambda t, where: t.double_col.arbitrary(how='first'), @@ -284,7 +282,7 @@ def test_group_concat(backend, alltypes, df, result_fn, expected_fn): ], ) @pytest.mark.xfail_unsupported -@pytest.mark.xfail_backends([PySpark]) # Issue #2130 +@pytest.mark.xfail_backends([PySparkTest]) # Issue #2130 def test_topk_op(backend, alltypes, df, result_fn, expected_fn): # TopK expression will order rows by "count" but each backend # can have different result for that. @@ -313,8 +311,10 @@ def test_topk_op(backend, alltypes, df, result_fn, expected_fn): ) @pytest.mark.xfail_unsupported # Issues #2369 #2133 #2131 #2132 -@pytest.mark.xfail_backends([BigQuery, Clickhouse, MySQL, Postgres]) -@pytest.mark.skip_backends([SQLite], reason='Issue #2128') +@pytest.mark.xfail_backends( + [BigQueryTest, ClickhouseTest, MySQLTest, PostgresTest] +) +@pytest.mark.skip_backends([SQLiteTest], reason='Issue #2128') def test_topk_filter_op(backend, alltypes, df, result_fn, expected_fn): # TopK expression will order rows by "count" but each backend # can have different result for that. diff --git a/ibis/tests/all/test_array.py b/ibis/backends/tests/test_array.py similarity index 85% rename from ibis/tests/all/test_array.py rename to ibis/backends/tests/test_array.py index e820fa4255c1..f0ed69915935 100644 --- a/ibis/tests/all/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -1,7 +1,7 @@ import pytest import ibis -from ibis.tests.backends import BigQuery +from ibis.backends.bigquery.tests.conftest import BigQueryTest @pytest.mark.xfail_unsupported @@ -9,7 +9,7 @@ ['supports_arrays', 'supports_arrays_outside_of_select'] ) # Issues #2370 -@pytest.mark.xfail_backends([BigQuery]) +@pytest.mark.xfail_backends([BigQueryTest]) def test_array_concat(backend, con): left = ibis.literal([1, 2, 3]) right = ibis.literal([2, 1]) diff --git a/ibis/tests/all/test_client.py b/ibis/backends/tests/test_client.py similarity index 90% rename from ibis/tests/all/test_client.py rename to ibis/backends/tests/test_client.py index c030f92b2e15..632ae8078225 100644 --- a/ibis/tests/all/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -4,14 +4,12 @@ import ibis import ibis.expr.datatypes as dt -from ibis.tests.backends import ( - BigQuery, - Clickhouse, - Impala, - OmniSciDB, - PySpark, - Spark, -) +from ibis.backends.bigquery.tests.conftest import BigQueryTest +from ibis.backends.clickhouse.tests.conftest import ClickhouseTest +from ibis.backends.impala.tests.conftest import ImpalaTest +from ibis.backends.omniscidb.tests.conftest import OmniSciDBTest +from ibis.backends.pyspark.tests.conftest import PySparkTest +from ibis.backends.spark.tests.conftest import SparkTest @pytest.fixture @@ -102,7 +100,7 @@ def test_query_schema(backend, con, alltypes, expr_fn, expected): 'select * from functional_alltypes \nlimit 10\n', ], ) -@pytest.mark.xfail_backends((BigQuery,)) +@pytest.mark.xfail_backends((BigQueryTest,)) @pytest.mark.xfail_unsupported def test_sql(backend, con, sql): if not hasattr(con, 'sql') or not hasattr(con, '_get_schema_using_query'): @@ -146,7 +144,7 @@ def test_rename_table(con, backend, temp_table, new_schema): @pytest.mark.xfail_unsupported -@pytest.mark.xfail_backends([Impala, PySpark, Spark]) +@pytest.mark.xfail_backends([ImpalaTest, PySparkTest, SparkTest]) def test_nullable_input_output(con, backend, temp_table): # - Impala, PySpark and Spark non-nullable issues #2138 and #2137 if not hasattr(con, 'create_table') or not hasattr(con, 'drop_table'): @@ -175,7 +173,7 @@ def test_nullable_input_output(con, backend, temp_table): @pytest.mark.xfail_unsupported -@pytest.mark.xfail_backends([PySpark, Spark]) +@pytest.mark.xfail_backends([PySparkTest, SparkTest]) def test_create_drop_view(con, backend, temp_view): # pyspark and spark skipt because table actually is a temporary view if not hasattr(con, 'create_view') or not hasattr(con, 'drop_view'): @@ -199,7 +197,7 @@ def test_create_drop_view(con, backend, temp_view): @pytest.mark.only_on_backends( - [BigQuery, Clickhouse, Impala, OmniSciDB, Spark, BigQuery], + [BigQueryTest, ClickhouseTest, ImpalaTest, OmniSciDBTest, SparkTest], reason="run only if backend is sql-based", ) def test_separate_database(con, alternate_current_database, current_data_db): diff --git a/ibis/tests/all/test_column.py b/ibis/backends/tests/test_column.py similarity index 100% rename from ibis/tests/all/test_column.py rename to ibis/backends/tests/test_column.py diff --git a/ibis/tests/all/test_generic.py b/ibis/backends/tests/test_generic.py similarity index 100% rename from ibis/tests/all/test_generic.py rename to ibis/backends/tests/test_generic.py diff --git a/ibis/tests/all/test_geospatial.py b/ibis/backends/tests/test_geospatial.py similarity index 98% rename from ibis/tests/all/test_geospatial.py rename to ibis/backends/tests/test_geospatial.py index 7094f7bc8cfe..c307f597979d 100644 --- a/ibis/tests/all/test_geospatial.py +++ b/ibis/backends/tests/test_geospatial.py @@ -5,7 +5,8 @@ from pytest import param import ibis -from ibis.tests.backends import OmniSciDB, Postgres +from ibis.backends.omniscidb.tests.conftest import OmniSciDBTest +from ibis.backends.postgres.tests.conftest import PostgresTest geopandas = pytest.importorskip('geopandas') shapely = pytest.importorskip('shapely') @@ -32,7 +33,7 @@ ).name('p') # add here backends that support geo spatial types -all_db_geo_supported = [OmniSciDB, Postgres] +all_db_geo_supported = [OmniSciDBTest, PostgresTest] # test input data with shapely geometries shp_point_0 = shapely.geometry.Point(0, 0) @@ -257,7 +258,7 @@ def test_literal_multi_geospatial_inferred(backend, con, shp, expected): [96.34, 114.36, 10.24, 10.24, 10.24], id='perimeter', marks=pytest.mark.skip_backends( - [Postgres], reason='TODO: fix different results issue' + [PostgresTest], reason='TODO: fix different results issue' ), ), param( @@ -265,7 +266,7 @@ def test_literal_multi_geospatial_inferred(backend, con, shp, expected): [7, 11, 5, 5, 5], id='n_points', marks=pytest.mark.skip_backends( - [Postgres], reason='TODO: fix different results issue' + [PostgresTest], reason='TODO: fix different results issue' ), ), ], diff --git a/ibis/tests/all/test_join.py b/ibis/backends/tests/test_join.py similarity index 84% rename from ibis/tests/all/test_join.py rename to ibis/backends/tests/test_join.py index 819f6fc37694..a5771084d2aa 100644 --- a/ibis/tests/all/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -2,10 +2,12 @@ import pytest from pytest import param -from ibis.tests.backends import Csv, Pandas, PySpark +from ibis.backends.csv.tests.conftest import CsvTest +from ibis.backends.pandas.tests.conftest import PandasTest +from ibis.backends.pyspark.tests.conftest import PySparkTest # add here backends that passes join tests -all_db_join_supported = [Pandas, PySpark] +all_db_join_supported = [PandasTest, PySparkTest] @pytest.mark.parametrize( @@ -31,7 +33,7 @@ ) @pytest.mark.only_on_backends(all_db_join_supported) # Csv is a subclass of Pandas so need to skip it explicitly. -@pytest.mark.skip_backends([Csv]) +@pytest.mark.skip_backends([CsvTest]) @pytest.mark.xfail_unsupported def test_join_project_left_table(backend, con, batting, awards_players, how): diff --git a/ibis/tests/all/test_json.py b/ibis/backends/tests/test_json.py similarity index 90% rename from ibis/tests/all/test_json.py rename to ibis/backends/tests/test_json.py index 9ae3dfd9c4d7..33c79d05de65 100644 --- a/ibis/tests/all/test_json.py +++ b/ibis/backends/tests/test_json.py @@ -5,10 +5,10 @@ from pytest import param import ibis -from ibis.tests.backends import Postgres +from ibis.backends.postgres.tests.conftest import PostgresTest # add here backends that support json types -all_db_geo_supported = [Postgres] +all_db_geo_supported = [PostgresTest] @pytest.mark.parametrize('data', [param({'status': True}, id='status')]) diff --git a/ibis/tests/all/test_nettypes.py b/ibis/backends/tests/test_nettypes.py similarity index 90% rename from ibis/tests/all/test_nettypes.py rename to ibis/backends/tests/test_nettypes.py index b030ac3d10df..86b069465fe1 100644 --- a/ibis/tests/all/test_nettypes.py +++ b/ibis/backends/tests/test_nettypes.py @@ -4,10 +4,10 @@ from pytest import param import ibis -from ibis.tests.backends import Postgres +from ibis.backends.postgres.tests.confest import PostgresTest # add here backends that support json types -net_types_supported = [Postgres] +net_types_supported = [PostgresTest] @pytest.mark.parametrize('data', [param({'status': True}, id='status')]) diff --git a/ibis/tests/all/test_numeric.py b/ibis/backends/tests/test_numeric.py similarity index 96% rename from ibis/tests/all/test_numeric.py rename to ibis/backends/tests/test_numeric.py index 02c06edffb14..ee8e2794d5d1 100644 --- a/ibis/tests/all/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -9,8 +9,10 @@ import ibis from ibis import literal as L +from ibis.backends.mysql.tests.conftest import MySQLTest +from ibis.backends.omniscidb.tests.conftest import OmniSciDBTest +from ibis.backends.postgres.tests.conftest import PostgresTest from ibis.expr import datatypes as dt -from ibis.tests.backends import MySQL, OmniSciDB, Postgres from ibis.tests.util import assert_equal try: @@ -56,19 +58,19 @@ def sch_decimal(): lambda t: ibis.literal(np.nan), lambda t: np.nan, id='nan-literal', - marks=pytest.mark.xfail_backends([OmniSciDB]), + marks=pytest.mark.xfail_backends([OmniSciDBTest]), ), param( lambda t: ibis.literal(np.inf), lambda t: np.inf, id='inf-literal', - marks=pytest.mark.xfail_backends([OmniSciDB]), + marks=pytest.mark.xfail_backends([OmniSciDBTest]), ), param( lambda t: ibis.literal(-np.inf), lambda t: -np.inf, id='-inf-literal', - marks=pytest.mark.xfail_backends([OmniSciDB]), + marks=pytest.mark.xfail_backends([OmniSciDBTest]), ), ], ) @@ -376,7 +378,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): ) ], ) -@pytest.mark.only_on_backends([Postgres, MySQL]) +@pytest.mark.only_on_backends([PostgresTest, MySQLTest]) def test_sa_default_numeric_precision_and_scale( con, backend, dialects, default_precisions, default_scales ): @@ -415,7 +417,7 @@ def test_sa_default_numeric_precision_and_scale( con.drop_table(table_name, force=True) -@pytest.mark.only_on_backends([Postgres, MySQL]) +@pytest.mark.only_on_backends([PostgresTest, MySQLTest]) def test_random(con): expr = ibis.random() result = con.execute(expr) diff --git a/ibis/tests/all/test_param.py b/ibis/backends/tests/test_param.py similarity index 94% rename from ibis/tests/all/test_param.py rename to ibis/backends/tests/test_param.py index 2ac6a6dfd5e4..968531109f0b 100644 --- a/ibis/tests/all/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -4,7 +4,7 @@ import ibis import ibis.expr.datatypes as dt -from ibis.tests.backends import PySpark +from ibis.backends.pyspark.tests.conftest import PySparkTest @pytest.mark.parametrize( @@ -46,7 +46,7 @@ def test_date_scalar_parameter( backend.assert_series_equal(result, expected) -@pytest.mark.xfail_backends([PySpark]) +@pytest.mark.xfail_backends([PySparkTest]) @pytest.mark.xfail_unsupported def test_timestamp_accepts_date_literals(backend, alltypes): date_string = '2009-03-01' @@ -56,7 +56,7 @@ def test_timestamp_accepts_date_literals(backend, alltypes): assert expr.compile(params=params) is not None -@pytest.mark.xfail_backends([PySpark]) +@pytest.mark.xfail_backends([PySparkTest]) @pytest.mark.xfail_unsupported def test_scalar_param_array(backend, con): value = [1, 2, 3] diff --git a/ibis/tests/all/test_string.py b/ibis/backends/tests/test_string.py similarity index 83% rename from ibis/tests/all/test_string.py rename to ibis/backends/tests/test_string.py index 03acfcbe4bfa..f8b06de0c9ec 100644 --- a/ibis/tests/all/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -3,15 +3,13 @@ import ibis import ibis.expr.datatypes as dt -from ibis.tests.backends import ( - BigQuery, - Clickhouse, - Impala, - OmniSciDB, - Postgres, - PySpark, - Spark, -) +from ibis.backends.bigquery.tests.conftest import BigQueryTest +from ibis.backends.clickhouse.tests.conftest import ClickhouseTest +from ibis.backends.impala.tests.conftest import ImpalaTest +from ibis.backends.omniscidb.tests.conftest import OmniSciDBTest +from ibis.backends.postgres.tests.conftest import PostgresTest +from ibis.backends.pyspark.tests.conftest import PySparkTest +from ibis.backends.spark.tests.conftest import SparkTest def test_string_col_is_unicode(backend, alltypes, df): @@ -55,55 +53,67 @@ def test_string_col_is_unicode(backend, alltypes, df): lambda t: t.string_col.re_search(r'[[:digit:]]+'), lambda t: t.string_col.str.contains(r'\d+'), id='re_search', - marks=pytest.mark.xfail_backends((Spark, PySpark)), + marks=pytest.mark.xfail_backends((SparkTest, PySparkTest)), ), param( lambda t: t.string_col.re_extract(r'([[:digit:]]+)', 0), lambda t: t.string_col.str.extract(r'(\d+)', expand=False), id='re_extract', - marks=pytest.mark.xfail_backends((Spark, PySpark)), + marks=pytest.mark.xfail_backends((SparkTest, PySparkTest)), ), param( lambda t: t.string_col.re_replace(r'[[:digit:]]+', 'a'), lambda t: t.string_col.str.replace(r'\d+', 'a'), id='re_replace', - marks=pytest.mark.xfail_backends((Spark, PySpark)), + marks=pytest.mark.xfail_backends((SparkTest, PySparkTest)), ), param( lambda t: t.string_col.re_search(r'\\d+'), lambda t: t.string_col.str.contains(r'\d+'), id='re_search_spark', - marks=pytest.mark.xpass_backends((Clickhouse, Impala, Spark)), + marks=pytest.mark.xpass_backends( + (ClickhouseTest, ImpalaTest, SparkTest) + ), ), param( lambda t: t.string_col.re_extract(r'(\\d+)', 0), lambda t: t.string_col.str.extract(r'(\d+)', expand=False), id='re_extract_spark', - marks=pytest.mark.xpass_backends((Clickhouse, Impala, Spark)), + marks=pytest.mark.xpass_backends( + (ClickhouseTest, ImpalaTest, SparkTest) + ), ), param( lambda t: t.string_col.re_replace(r'\\d+', 'a'), lambda t: t.string_col.str.replace(r'\d+', 'a'), id='re_replace_spark', - marks=pytest.mark.xpass_backends((Clickhouse, Impala, Spark)), + marks=pytest.mark.xpass_backends( + (ClickhouseTest, ImpalaTest, SparkTest) + ), ), param( lambda t: t.string_col.re_search(r'\d+'), lambda t: t.string_col.str.contains(r'\d+'), id='re_search_spark', - marks=pytest.mark.xfail_backends((Clickhouse, Impala, Spark)), + marks=pytest.mark.xfail_backends( + (ClickhouseTest, ImpalaTest, SparkTest) + ), ), param( lambda t: t.string_col.re_extract(r'(\d+)', 0), lambda t: t.string_col.str.extract(r'(\d+)', expand=False), id='re_extract_spark', - marks=pytest.mark.xfail_backends((Clickhouse, Impala, Spark)), + marks=pytest.mark.xfail_backends( + (ClickhouseTest, ImpalaTest, SparkTest) + ), ), param( lambda t: t.string_col.re_replace(r'\d+', 'a'), lambda t: t.string_col.str.replace(r'\d+', 'a'), id='re_replace_spark', - marks=pytest.mark.xfail_backends((Clickhouse, Impala, Spark)), + marks=pytest.mark.xfail_backends( + (ClickhouseTest, ImpalaTest, SparkTest) + ), ), param( lambda t: t.string_col.repeat(2), @@ -164,7 +174,7 @@ def test_string_col_is_unicode(backend, alltypes, df): lambda t: t.string_col.length(), lambda t: t.string_col.str.len().astype('int32'), id='length', - marks=pytest.mark.xfail_backends([OmniSciDB]), # #2338 + marks=pytest.mark.xfail_backends([OmniSciDBTest]), # #2338 ), param( lambda t: t.string_col.strip(), @@ -232,7 +242,7 @@ def test_string_col_is_unicode(backend, alltypes, df): lambda t: t.date_string_col.split('/'), lambda t: t.date_string_col.str.split('/'), id='split', - marks=pytest.mark.xfail_backends([BigQuery]), # Issue #2372 + marks=pytest.mark.xfail_backends([BigQueryTest]), # Issue #2372 ), param( lambda t: ibis.literal('-').join(['a', t.string_col, 'c']), @@ -254,7 +264,7 @@ def test_string(backend, alltypes, df, result_func, expected_func): 'data, data_type', [param('123e4567-e89b-12d3-a456-426655440000', 'uuid', id='uuid')], ) -@pytest.mark.only_on_backends([Postgres]) +@pytest.mark.only_on_backends([PostgresTest]) def test_special_strings(backend, con, alltypes, data, data_type): lit = ibis.literal(data, type=data_type).name('tmp') expr = alltypes[[alltypes.id, lit]].head(1) diff --git a/ibis/tests/all/test_temporal.py b/ibis/backends/tests/test_temporal.py similarity index 87% rename from ibis/tests/all/test_temporal.py rename to ibis/backends/tests/test_temporal.py index be6625190584..ef75653bfac4 100644 --- a/ibis/tests/all/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -8,19 +8,17 @@ import ibis import ibis.expr.datatypes as dt +from ibis.backends.bigquery.tests.conftest import BigQueryTest +from ibis.backends.clickhouse.tests.conftest import ClickhouseTest +from ibis.backends.csv.tests.conftest import CsvTest +from ibis.backends.impala.tests.conftest import ImpalaTest from ibis.backends.pandas.execution.temporal import day_name -from ibis.tests.backends import ( - BigQuery, - Clickhouse, - Csv, - Impala, - Pandas, - Parquet, - Postgres, - PySpark, - Spark, - SQLite, -) +from ibis.backends.pandas.tests.conftest import PandasTest +from ibis.backends.parquet.tests.conftest import ParquetTest +from ibis.backends.postgres.tests.conftest import PostgresTest +from ibis.backends.pyspark.tests.conftest import PySparkTest +from ibis.backends.spark.tests.conftest import SparkTest +from ibis.backends.sqlite.tests.conftest import SQLiteTest @pytest.mark.parametrize('attr', ['year', 'month', 'day']) @@ -88,7 +86,12 @@ def test_timestamp_extract(backend, alltypes, df, attr): 'D', # Spark truncation to week truncates to different days than Pandas # Pandas backend is probably doing this wrong - param('W', marks=pytest.mark.xpass_backends((Csv, Pandas, Parquet))), + param( + 'W', + marks=pytest.mark.xpass_backends( + (CsvTest, PandasTest, ParquetTest) + ), + ), 'h', 'm', 's', @@ -116,7 +119,12 @@ def test_timestamp_truncate(backend, alltypes, df, unit): 'Y', 'M', 'D', - param('W', marks=pytest.mark.xpass_backends((Csv, Pandas, Parquet))), + param( + 'W', + marks=pytest.mark.xpass_backends( + (CsvTest, PandasTest, ParquetTest) + ), + ), ], ) @pytest.mark.xfail_unsupported @@ -147,18 +155,25 @@ def test_date_truncate(backend, alltypes, df, unit): 'ms', pd.Timedelta, marks=pytest.mark.xpass_backends( - (Csv, Pandas, Parquet, BigQuery, Impala, Postgres) + ( + CsvTest, + PandasTest, + ParquetTest, + BigQueryTest, + ImpalaTest, + PostgresTest, + ) ), ), param( 'us', pd.Timedelta, - marks=pytest.mark.xfail_backends((Clickhouse, SQLite)), + marks=pytest.mark.xfail_backends((ClickhouseTest, SQLiteTest)), ), ], ) @pytest.mark.xfail_unsupported -@pytest.mark.skip_backends([Spark]) +@pytest.mark.skip_backends([SparkTest]) def test_integer_to_interval_timestamp( backend, con, alltypes, df, unit, displacement_type ): @@ -185,7 +200,7 @@ def convert_to_offset(offset, displacement_type=displacement_type): 'unit', ['Y', param('Q', marks=pytest.mark.xfail), 'M', 'W', 'D'] ) @pytest.mark.xfail_unsupported -@pytest.mark.skip_backends([Spark]) +@pytest.mark.skip_backends([SparkTest]) def test_integer_to_interval_date(backend, con, alltypes, df, unit): interval = alltypes.int_col.to_interval(unit=unit) array = alltypes.date_string_col.split('/') @@ -263,7 +278,7 @@ def test_integer_to_interval_date_failure(backend, con, alltypes, df, unit): ) ), id='timestamp-subtract-timestamp', - marks=pytest.mark.xfail_backends([Spark]), + marks=pytest.mark.xfail_backends([SparkTest]), ), param( lambda t, be: t.timestamp_col.date() - ibis.date(date_value), @@ -273,7 +288,7 @@ def test_integer_to_interval_date_failure(backend, con, alltypes, df, unit): ], ) @pytest.mark.xfail_unsupported -@pytest.mark.skip_backends([Spark]) +@pytest.mark.skip_backends([SparkTest]) def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): expr = expr_fn(alltypes, backend) expected = expected_fn(df, backend) @@ -285,7 +300,7 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): @pytest.mark.xfail_unsupported -@pytest.mark.skip_backends([Spark]) +@pytest.mark.skip_backends([SparkTest]) def test_interval_add_cast_scalar(backend, alltypes): timestamp_date = alltypes.timestamp_col.date() delta = ibis.literal(10).cast("interval('D')") @@ -297,8 +312,8 @@ def test_interval_add_cast_scalar(backend, alltypes): @pytest.mark.xfail_unsupported # PySpark does not support casting columns to intervals -@pytest.mark.xfail_backends([PySpark]) -@pytest.mark.skip_backends([Spark]) +@pytest.mark.xfail_backends([PySparkTest]) +@pytest.mark.skip_backends([SparkTest]) def test_interval_add_cast_column(backend, alltypes, df): timestamp_date = alltypes.timestamp_col.date() delta = alltypes.bigint_col.cast("interval('D')") @@ -319,7 +334,7 @@ def test_interval_add_cast_column(backend, alltypes, df): ) @pytest.mark.xfail_unsupported # Spark takes Java SimpleDateFormat instead of strftime -@pytest.mark.skip_backends([Spark]) +@pytest.mark.skip_backends([SparkTest]) def test_strftime(backend, con, alltypes, df, ibis_pattern, pandas_pattern): expr = alltypes.timestamp_col.strftime(ibis_pattern) expected = df.timestamp_col.dt.strftime(pandas_pattern) @@ -340,10 +355,22 @@ def test_strftime(backend, con, alltypes, df, ibis_pattern, pandas_pattern): param( 'us', marks=pytest.mark.xpass_backends( - (BigQuery, Csv, Impala, Pandas, Parquet, Spark) + ( + BigQueryTest, + CsvTest, + ImpalaTest, + PandasTest, + ParquetTest, + SparkTest, + ) + ), + ), + param( + 'ns', + marks=pytest.mark.xpass_backends( + (CsvTest, PandasTest, ParquetTest) ), ), - param('ns', marks=pytest.mark.xpass_backends((Csv, Pandas, Parquet))), ], ) @pytest.mark.xfail_unsupported diff --git a/ibis/tests/all/test_union.py b/ibis/backends/tests/test_union.py similarity index 55% rename from ibis/tests/all/test_union.py rename to ibis/backends/tests/test_union.py index 01c1ca9a7601..550a53f0eb2c 100644 --- a/ibis/tests/all/test_union.py +++ b/ibis/backends/tests/test_union.py @@ -1,11 +1,17 @@ import pandas as pd import pytest -from ibis.tests.backends import BigQuery, Impala, Pandas, Postgres, PySpark +from ibis.backends.bigquery.tests.conftest import BigQueryTest +from ibis.backends.impala.tests.conftest import ImpalaTest +from ibis.backends.pandas.tests.conftest import PandasTest +from ibis.backends.postgres.tests.conftest import PostgresTest +from ibis.backends.pyspark.tests.conftest import PySparkTest @pytest.mark.parametrize('distinct', [False, True]) -@pytest.mark.only_on_backends([BigQuery, Impala, Pandas, Postgres, PySpark]) +@pytest.mark.only_on_backends( + [BigQueryTest, ImpalaTest, PandasTest, PostgresTest, PySparkTest] +) @pytest.mark.xfail_unsupported def test_union(backend, alltypes, df, distinct): result = alltypes.union(alltypes, distinct=distinct).execute() diff --git a/ibis/tests/all/test_vectorized_udf.py b/ibis/backends/tests/test_vectorized_udf.py similarity index 90% rename from ibis/tests/all/test_vectorized_udf.py rename to ibis/backends/tests/test_vectorized_udf.py index fe7f40f63f83..a8ef6a41ffbd 100644 --- a/ibis/tests/all/test_vectorized_udf.py +++ b/ibis/backends/tests/test_vectorized_udf.py @@ -3,8 +3,9 @@ import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt +from ibis.backends.pandas.tests.conftest import PandasTest +from ibis.backends.pyspark.tests.conftest import PySparkTest from ibis.expr.window import window -from ibis.tests.backends import Pandas, PySpark from ibis.udf.vectorized import analytic, elementwise, reduction pytestmark = pytest.mark.udf @@ -49,7 +50,7 @@ def mean_struct(v, w): return v.mean(), w.mean() -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_elementwise_udf(backend, alltypes, df): result = add_one(alltypes['double_col']).execute() @@ -57,7 +58,7 @@ def test_elementwise_udf(backend, alltypes, df): backend.assert_series_equal(result, expected, check_names=False) -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_elementwise_udf_mutate(backend, alltypes, df): expr = alltypes.mutate(incremented=add_one(alltypes['double_col'])) @@ -68,7 +69,7 @@ def test_elementwise_udf_mutate(backend, alltypes, df): backend.assert_series_equal(result['incremented'], expected['incremented']) -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_analytic_udf(backend, alltypes, df): result = calc_zscore(alltypes['double_col']).execute() @@ -76,7 +77,7 @@ def test_analytic_udf(backend, alltypes, df): backend.assert_series_equal(result, expected, check_names=False) -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_analytic_udf_mutate(backend, alltypes, df): expr = alltypes.mutate(zscore=calc_zscore(alltypes['double_col'])) @@ -87,7 +88,7 @@ def test_analytic_udf_mutate(backend, alltypes, df): backend.assert_series_equal(result['zscore'], expected['zscore']) -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_reduction_udf(backend, alltypes, df): result = calc_mean(alltypes['double_col']).execute() @@ -95,7 +96,7 @@ def test_reduction_udf(backend, alltypes, df): assert result == expected -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_output_type_in_list_invalid(backend, alltypes, df): # Test that an error is raised if UDF output type is wrapped in a list @@ -110,7 +111,7 @@ def add_one(s): return s + 1 -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_valid_kwargs(backend, alltypes, df): # Test different forms of UDF definition with keyword arguments @@ -151,7 +152,7 @@ def foo3(v, **kwargs): backend.assert_frame_equal(result, expected) -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_valid_args(backend, alltypes, df): # Test different forms of UDF definition with *args @@ -177,7 +178,7 @@ def foo2(v, *args): backend.assert_frame_equal(result, expected) -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_valid_args_and_kwargs(backend, alltypes, df): # Test UDFs with both *args and keyword arguments @@ -220,7 +221,7 @@ def foo4(v, *args, **kwargs): backend.assert_frame_equal(result, expected) -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_invalid_kwargs(backend, alltypes): # Test that defining a UDF with a non-column argument that is not a @@ -233,7 +234,7 @@ def foo1(v, amount): return v + 1 -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_elementwise_udf_destruct(backend, alltypes): result = alltypes.mutate( @@ -247,7 +248,7 @@ def test_elementwise_udf_destruct(backend, alltypes): backend.assert_frame_equal(result, expected) -@pytest.mark.only_on_backends([Pandas, PySpark]) +@pytest.mark.only_on_backends([PandasTest, PySparkTest]) @pytest.mark.xfail_unsupported def test_elementwise_udf_named_destruct(backend, alltypes): """Test error when assigning name to a destruct column.""" @@ -260,7 +261,7 @@ def test_elementwise_udf_named_destruct(backend, alltypes): ) -@pytest.mark.only_on_backends([PySpark]) +@pytest.mark.only_on_backends([PySparkTest]) @pytest.mark.xfail_unsupported def test_elementwise_udf_struct(backend, alltypes): result = alltypes.mutate( @@ -278,7 +279,7 @@ def test_elementwise_udf_struct(backend, alltypes): backend.assert_frame_equal(result, expected) -@pytest.mark.only_on_backends([Pandas]) +@pytest.mark.only_on_backends([PandasTest]) def test_analytic_udf_destruct(backend, alltypes): w = window(preceding=None, following=None, group_by='year') @@ -296,7 +297,7 @@ def test_analytic_udf_destruct(backend, alltypes): backend.assert_frame_equal(result, expected) -@pytest.mark.only_on_backends([Pandas]) +@pytest.mark.only_on_backends([PandasTest]) def test_reduction_udf_destruct_groupby(backend, alltypes): result = ( alltypes.groupby('year') @@ -320,7 +321,7 @@ def test_reduction_udf_destruct_groupby(backend, alltypes): backend.assert_frame_equal(result, expected) -@pytest.mark.only_on_backends([Pandas]) +@pytest.mark.only_on_backends([PandasTest]) def test_reduction_udf_destruct_no_groupby(backend, alltypes): result = alltypes.aggregate( mean_struct(alltypes['double_col'], alltypes['int_col']).destructure() @@ -334,7 +335,7 @@ def test_reduction_udf_destruct_no_groupby(backend, alltypes): backend.assert_frame_equal(result, expected) -@pytest.mark.only_on_backends([Pandas]) +@pytest.mark.only_on_backends([PandasTest]) def test_reduction_udf_destruct_window(backend, alltypes): win = window( preceding=ibis.interval(hours=2), diff --git a/ibis/tests/all/test_window.py b/ibis/backends/tests/test_window.py similarity index 92% rename from ibis/tests/all/test_window.py rename to ibis/backends/tests/test_window.py index c7dad01a740a..9de2943aa685 100644 --- a/ibis/tests/all/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -5,18 +5,16 @@ import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt -from ibis.tests.backends import ( - Csv, - Impala, - MySQL, - OmniSciDB, - Pandas, - Parquet, - Postgres, - PySpark, - Spark, - SQLite, -) +from ibis.backends.csv.tests.conftest import CsvTest +from ibis.backends.impala.tests.conftest import ImpalaTest +from ibis.backends.mysql.tests.conftest import MySQLTest +from ibis.backends.omniscidb.tests.conftest import OmniSciDBTest +from ibis.backends.pandas.tests.conftest import PandasTest +from ibis.backends.paquet.tests.conftest import ParquetTest +from ibis.backends.postgres.tests.conftest import PostgresTest +from ibis.backends.pyspark.tests.conftest import PySparkTest +from ibis.backends.spark.tests.conftest import SparkTest +from ibis.backends.sqlite.tests.conftest import SQLiteTest from ibis.udf.vectorized import analytic, reduction @@ -63,7 +61,7 @@ def calc_zscore(s): lambda t: t.id.rank(pct=True), id='percent_rank', marks=pytest.mark.xpass_backends( - [Csv, Pandas, Parquet, PySpark, OmniSciDB], + [CsvTest, PandasTest, ParquetTest, PySparkTest, OmniSciDBTest], raises=AssertionError, ), ), @@ -88,7 +86,7 @@ def calc_zscore(s): lambda t: t.cumcount(), id='row_number', marks=pytest.mark.xfail_backends( - (Pandas, Csv, Parquet), + (PandasTest, CsvTest, ParquetTest), raises=(IndexError, com.UnboundExpressionError), ), ), @@ -136,7 +134,7 @@ def calc_zscore(s): ), id='cumnotany', marks=pytest.mark.xfail_backends( - (Impala, Postgres, Spark, MySQL, SQLite) + (ImpalaTest, PostgresTest, SparkTest, MySQLTest, SQLiteTest) ), ), param( @@ -161,7 +159,7 @@ def calc_zscore(s): ), id='cumnotall', marks=pytest.mark.xfail_backends( - (Impala, Postgres, Spark, MySQL, SQLite) + (ImpalaTest, PostgresTest, SparkTest, MySQLTest, SQLiteTest) ), ), param( @@ -242,13 +240,13 @@ def test_grouped_bounded_expanding_window( id='mean_udf', marks=[ pytest.mark.udf, - pytest.mark.skip_backends([PySpark, Spark]), + pytest.mark.skip_backends([PySparkTest, SparkTest]), ], ), ], ) # Some backends do not support non-grouped window specs -@pytest.mark.xfail_backends([OmniSciDB]) +@pytest.mark.xfail_backends([OmniSciDBTest]) @pytest.mark.xfail_unsupported def test_ungrouped_bounded_expanding_window( backend, alltypes, df, con, result_fn, expected_fn @@ -456,7 +454,7 @@ def test_grouped_unbounded_window( # is currently inconsistent with the other backends (see #2378). True, id='orderered', - marks=pytest.mark.skip_backends([Spark, Impala]), + marks=pytest.mark.skip_backends([SparkTest, ImpalaTest]), ), param( # Disabled on MySQL and PySpark because they require a defined @@ -465,12 +463,14 @@ def test_grouped_unbounded_window( # backends (see #2381). False, id='unordered', - marks=pytest.mark.skip_backends([MySQL, PySpark, Spark]), + marks=pytest.mark.skip_backends( + [MySQLTest, PySparkTest, SparkTest] + ), ), ], ) # Some backends do not support non-grouped window specs -@pytest.mark.xfail_backends([OmniSciDB]) +@pytest.mark.xfail_backends([OmniSciDBTest]) @pytest.mark.xfail_unsupported def test_ungrouped_unbounded_window( backend, alltypes, df, con, result_fn, expected_fn, ordered diff --git a/ibis/tests/all/conftest.py b/ibis/tests/all/conftest.py deleted file mode 100644 index 962d5f0b92aa..000000000000 --- a/ibis/tests/all/conftest.py +++ /dev/null @@ -1,466 +0,0 @@ -import operator -import os - -import numpy as np -import pandas as pd -import pytest - -import ibis -import ibis.common.exceptions as com -import ibis.util as util -from ibis.tests.backends import Backend - - -def _random_identifier(suffix): - return '__ibis_test_{}_{}'.format(suffix, util.guid()) - - -def subclasses(cls): - """Get all child classes of `cls` not including `cls`, transitively.""" - assert isinstance(cls, type), "cls is not a class, type: {}".format( - type(cls) - ) - children = set(cls.__subclasses__()) - return children.union(*map(subclasses, children)) - - -ALL_BACKENDS = sorted(subclasses(Backend), key=operator.attrgetter("__name__")) - - -def pytest_runtest_call(item): - """Dynamically add various custom markers.""" - nodeid = item.nodeid - for marker in list(item.iter_markers(name="only_on_backends")): - (backend_types,) = map(tuple, marker.args) - backend = item.funcargs["backend"] - assert isinstance(backend, Backend), "backend has type {!r}".format( - type(backend).__name__ - ) - if not isinstance(backend, backend_types): - pytest.skip( - f"only_on_backends: {backend} is not in {backend_types} " - f"{nodeid}" - ) - - for marker in list(item.iter_markers(name="skip_backends")): - (backend_types,) = map(tuple, marker.args) - backend = item.funcargs["backend"] - assert isinstance(backend, Backend), "backend has type {!r}".format( - type(backend).__name__ - ) - if isinstance(backend, backend_types): - pytest.skip(f"skip_backends: {backend} {nodeid}") - - for marker in list(item.iter_markers(name="skip_missing_feature")): - backend = item.funcargs["backend"] - (features,) = marker.args - missing_features = [ - feature for feature in features if not getattr(backend, feature) - ] - if missing_features: - pytest.mark.skip( - 'Backend {} is missing features {} needed to run {}'.format( - type(backend).__name__, ', '.join(missing_features), nodeid - ) - ) - - for marker in list(item.iter_markers(name="xfail_backends")): - (backend_types,) = map(tuple, marker.args) - backend = item.funcargs["backend"] - assert isinstance(backend, Backend), "backend has type {!r}".format( - type(backend).__name__ - ) - item.add_marker( - pytest.mark.xfail( - condition=isinstance(backend, backend_types), - reason='Backend {} does not pass this test'.format( - type(backend).__name__ - ), - **marker.kwargs, - ) - ) - - for marker in list(item.iter_markers(name="xpass_backends")): - (backend_types,) = map(tuple, marker.args) - backend = item.funcargs["backend"] - assert isinstance(backend, Backend), "backend has type {!r}".format( - type(backend).__name__ - ) - item.add_marker( - pytest.mark.xfail( - condition=not isinstance(backend, backend_types), - reason='{} does not pass this test'.format( - type(backend).__name__ - ), - **marker.kwargs, - ) - ) - - -@pytest.hookimpl(hookwrapper=True) -def pytest_pyfunc_call(pyfuncitem): - """Dynamically add an xfail marker for specific backends.""" - outcome = yield - try: - outcome.get_result() - except ( - com.OperationNotDefinedError, - com.UnsupportedOperationError, - com.UnsupportedBackendType, - NotImplementedError, - ) as e: - markers = list(pyfuncitem.iter_markers(name="xfail_unsupported")) - assert ( - len(markers) == 1 - ), "More than one xfail_unsupported marker found on test {}".format( - pyfuncitem - ) - (marker,) = markers - backend = pyfuncitem.funcargs["backend"] - assert isinstance(backend, Backend), "backend has type {!r}".format( - type(backend).__name__ - ) - pytest.xfail(reason='{}: {}'.format(type(backend).__name__, e)) - - -pytestmark = pytest.mark.backend - -pytest_backends = os.environ.get('PYTEST_BACKENDS', '').split(' ') -params_backend = [ - pytest.param(backend, marks=getattr(pytest.mark, backend.__name__.lower())) - for backend in ALL_BACKENDS - if backend.__name__.lower() in pytest_backends or not pytest_backends -] -if len(pytest_backends) != len(params_backend): - unknown_backends = set(pytest_backends) - { - b.__name__.lower() for b in ALL_BACKENDS - } - raise ValueError( - 'PYTEST_BACKENDS environment variable contain unknown ' - f'backends {unknown_backends}' - ) - - -@pytest.fixture(params=params_backend, scope='session') -def backend(request, data_directory): - return request.param(data_directory) - - -@pytest.fixture(scope='session') -def con(backend): - return backend.connection - - -@pytest.fixture(scope='session') -def alltypes(backend): - return backend.functional_alltypes - - -@pytest.fixture(scope='session') -def sorted_alltypes(alltypes): - return alltypes.sort_by('id') - - -@pytest.fixture(scope='session') -def batting(backend): - return backend.batting - - -@pytest.fixture(scope='session') -def awards_players(backend): - return backend.awards_players - - -@pytest.fixture(scope='session') -def geo(backend): - if backend.geo is None: - pytest.skip( - 'Geo Spatial type not supported for {} backend.'.format( - backend.name - ) - ) - return backend.geo - - -@pytest.fixture -def analytic_alltypes(alltypes): - return alltypes - - -@pytest.fixture(scope='session') -def df(alltypes): - return alltypes.execute() - - -@pytest.fixture(scope='session') -def sorted_df(df): - return df.sort_values('id').reset_index(drop=True) - - -@pytest.fixture(scope='session') -def batting_df(batting): - return batting.execute(limit=None) - - -@pytest.fixture(scope='session') -def awards_players_df(awards_players): - return awards_players.execute(limit=None) - - -@pytest.fixture(scope='session') -def geo_df(geo): - # Currently geo is implemented just for OmniSciDB - if geo is not None: - return geo.execute(limit=None) - return None - - -_spark_testing_client = None -_pyspark_testing_client = None - - -def get_spark_testing_client(data_directory): - global _spark_testing_client - if _spark_testing_client is None: - _spark_testing_client = get_common_spark_testing_client( - data_directory, lambda session: ibis.spark.connect(session) - ) - return _spark_testing_client - - -def get_pyspark_testing_client(data_directory): - global _pyspark_testing_client - if _pyspark_testing_client is None: - _pyspark_testing_client = get_common_spark_testing_client( - data_directory, lambda session: ibis.pyspark.connect(session) - ) - return _pyspark_testing_client - - -def get_common_spark_testing_client(data_directory, connect): - pytest.importorskip('pyspark') - import pyspark.sql.types as pt - from pyspark.sql import SparkSession - - spark = SparkSession.builder.getOrCreate() - _spark_testing_client = connect(spark) - s = _spark_testing_client._session - - df_functional_alltypes = s.read.csv( - path=str(data_directory / 'functional_alltypes.csv'), - schema=pt.StructType( - [ - pt.StructField('index', pt.IntegerType(), True), - pt.StructField('Unnamed: 0', pt.IntegerType(), True), - pt.StructField('id', pt.IntegerType(), True), - # cast below, Spark can't read 0/1 as bool - pt.StructField('bool_col', pt.ByteType(), True), - pt.StructField('tinyint_col', pt.ByteType(), True), - pt.StructField('smallint_col', pt.ShortType(), True), - pt.StructField('int_col', pt.IntegerType(), True), - pt.StructField('bigint_col', pt.LongType(), True), - pt.StructField('float_col', pt.FloatType(), True), - pt.StructField('double_col', pt.DoubleType(), True), - pt.StructField('date_string_col', pt.StringType(), True), - pt.StructField('string_col', pt.StringType(), True), - pt.StructField('timestamp_col', pt.TimestampType(), True), - pt.StructField('year', pt.IntegerType(), True), - pt.StructField('month', pt.IntegerType(), True), - ] - ), - mode='FAILFAST', - header=True, - ) - df_functional_alltypes = df_functional_alltypes.withColumn( - "bool_col", df_functional_alltypes["bool_col"].cast("boolean") - ) - df_functional_alltypes.createOrReplaceTempView('functional_alltypes') - - df_batting = s.read.csv( - path=str(data_directory / 'batting.csv'), - schema=pt.StructType( - [ - pt.StructField('playerID', pt.StringType(), True), - pt.StructField('yearID', pt.IntegerType(), True), - pt.StructField('stint', pt.IntegerType(), True), - pt.StructField('teamID', pt.StringType(), True), - pt.StructField('lgID', pt.StringType(), True), - pt.StructField('G', pt.IntegerType(), True), - pt.StructField('AB', pt.DoubleType(), True), - pt.StructField('R', pt.DoubleType(), True), - pt.StructField('H', pt.DoubleType(), True), - pt.StructField('X2B', pt.DoubleType(), True), - pt.StructField('X3B', pt.DoubleType(), True), - pt.StructField('HR', pt.DoubleType(), True), - pt.StructField('RBI', pt.DoubleType(), True), - pt.StructField('SB', pt.DoubleType(), True), - pt.StructField('CS', pt.DoubleType(), True), - pt.StructField('BB', pt.DoubleType(), True), - pt.StructField('SO', pt.DoubleType(), True), - pt.StructField('IBB', pt.DoubleType(), True), - pt.StructField('HBP', pt.DoubleType(), True), - pt.StructField('SH', pt.DoubleType(), True), - pt.StructField('SF', pt.DoubleType(), True), - pt.StructField('GIDP', pt.DoubleType(), True), - ] - ), - header=True, - ) - df_batting.createOrReplaceTempView('batting') - - df_awards_players = s.read.csv( - path=str(data_directory / 'awards_players.csv'), - schema=pt.StructType( - [ - pt.StructField('playerID', pt.StringType(), True), - pt.StructField('awardID', pt.StringType(), True), - pt.StructField('yearID', pt.IntegerType(), True), - pt.StructField('lgID', pt.StringType(), True), - pt.StructField('tie', pt.StringType(), True), - pt.StructField('notes', pt.StringType(), True), - ] - ), - header=True, - ) - df_awards_players.createOrReplaceTempView('awards_players') - - df_simple = s.createDataFrame([(1, 'a')], ['foo', 'bar']) - df_simple.createOrReplaceTempView('simple') - - df_struct = s.createDataFrame([((1, 2, 'a'),)], ['struct_col']) - df_struct.createOrReplaceTempView('struct') - - df_nested_types = s.createDataFrame( - [([1, 2], [[3, 4], [5, 6]], {'a': [[2, 4], [3, 5]]})], - [ - 'list_of_ints', - 'list_of_list_of_ints', - 'map_string_list_of_list_of_ints', - ], - ) - df_nested_types.createOrReplaceTempView('nested_types') - - df_complicated = s.createDataFrame( - [({(1, 3): [[2, 4], [3, 5]]},)], ['map_tuple_list_of_list_of_ints'] - ) - df_complicated.createOrReplaceTempView('complicated') - - df_udf = s.createDataFrame( - [('a', 1, 4.0, 'a'), ('b', 2, 5.0, 'a'), ('c', 3, 6.0, 'b')], - ['a', 'b', 'c', 'key'], - ) - df_udf.createOrReplaceTempView('udf') - - df_udf_nan = s.createDataFrame( - pd.DataFrame( - { - 'a': np.arange(10, dtype=float), - 'b': [3.0, np.NaN] * 5, - 'key': list('ddeefffggh'), - } - ) - ) - df_udf_nan.createOrReplaceTempView('udf_nan') - - df_udf_null = s.createDataFrame( - [ - (float(i), None if i % 2 else 3.0, 'ddeefffggh'[i]) - for i in range(10) - ], - ['a', 'b', 'key'], - ) - df_udf_null.createOrReplaceTempView('udf_null') - - df_udf_random = s.createDataFrame( - pd.DataFrame( - { - 'a': np.arange(4, dtype=float).tolist() - + np.random.rand(3).tolist(), - 'b': np.arange(4, dtype=float).tolist() - + np.random.rand(3).tolist(), - 'key': list('ddeefff'), - } - ) - ) - df_udf_random.createOrReplaceTempView('udf_random') - - return _spark_testing_client - - -@pytest.fixture -def temp_table(con: ibis.client.Client) -> str: - """ - Return a temporary table name. - - Parameters - ---------- - con : ibis.client.Client - - Yields - ------ - name : string - Random table name for a temporary usage. - """ - name = _random_identifier('table') - try: - yield name - finally: - if hasattr(con, 'drop_table'): - con.drop_table(name, force=True) - - -@pytest.fixture -def temp_view(con) -> str: - """Return a temporary view name. - - Parameters - ---------- - con : ibis.omniscidb.OmniSciDBClient - - Yields - ------ - name : string - Random view name for a temporary usage. - """ - name = _random_identifier('view') - try: - yield name - finally: - if hasattr(con, 'drop_view'): - con.drop_view(name, force=True) - - -@pytest.fixture(scope='session') -def current_data_db(con, backend) -> str: - """Return current database name.""" - if not hasattr(con, 'current_database'): - pytest.skip( - f'{backend.name} backend doesn\'t have current_database method.' - ) - return con.current_database - - -@pytest.fixture -def alternate_current_database(con, backend, current_data_db: str) -> str: - """Create a temporary database and yield its name. - Drops the created database upon completion. - - Parameters - ---------- - con : ibis.client.Client - current_data_db : str - Yields - ------- - str - """ - name = _random_identifier('database') - if not hasattr(con, 'create_database'): - pytest.skip( - f'{backend.name} backend doesn\'t have create_database method.' - ) - con.create_database(name) - try: - yield name - finally: - con.set_database(current_data_db) - con.drop_database(name, force=True) diff --git a/ibis/tests/backends.py b/ibis/tests/backends.py deleted file mode 100644 index 640379038f64..000000000000 --- a/ibis/tests/backends.py +++ /dev/null @@ -1,588 +0,0 @@ -import abc -import os -from pathlib import Path -from typing import Any, Callable, Mapping, Optional - -import numpy as np -import pandas as pd -import pandas.testing as tm -import pytest -from pkg_resources import parse_version - -import ibis -import ibis.backends.base_sqlalchemy.compiler as comp -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -import ibis.expr.types as ir - - -class RoundingConvention: - @staticmethod - @abc.abstractmethod - def round(series: pd.Series, decimals: int = 0) -> pd.Series: - """Round a series to `decimals` number of decimal values.""" - - -class RoundAwayFromZero(RoundingConvention): - @staticmethod - def round(series: pd.Series, decimals: int = 0) -> pd.Series: - if not decimals: - return ( - -(np.sign(series)) * np.ceil(-(series.abs()) - 0.5) - ).astype(np.int64) - return series.round(decimals=decimals) - - -class RoundHalfToEven(RoundingConvention): - @staticmethod - def round(series: pd.Series, decimals: int = 0) -> pd.Series: - result = series.round(decimals=decimals) - return result if decimals else result.astype(np.int64) - - -class Backend(abc.ABC): - check_dtype = True - check_names = True - supports_arrays = True - supports_arrays_outside_of_select = supports_arrays - supports_window_operations = True - additional_skipped_operations = frozenset() - supports_divide_by_zero = False - returned_timestamp_unit = 'us' - supported_to_timestamp_units = {'s', 'ms', 'us'} - supports_floating_modulus = True - - def __init__(self, data_directory: Path) -> None: - self.api # skips if we can't access the backend - self.connection = self.connect(data_directory) - - @property - def name(self) -> str: - return str(self).lower() - - def __str__(self) -> str: - return self.__class__.__name__ - - @staticmethod - @abc.abstractmethod - def connect(data_directory: Path) -> ibis.client.Client: - """Return a connection with data loaded from `data_directory`.""" - - @classmethod - def assert_series_equal( - cls, left: pd.Series, right: pd.Series, *args: Any, **kwargs: Any - ) -> None: - kwargs.setdefault('check_dtype', cls.check_dtype) - kwargs.setdefault('check_names', cls.check_names) - tm.assert_series_equal(left, right, *args, **kwargs) - - @classmethod - def assert_frame_equal( - cls, left: pd.DataFrame, right: pd.DataFrame, *args: Any, **kwargs: Any - ) -> None: - left = left.reset_index(drop=True) - right = right.reset_index(drop=True) - tm.assert_frame_equal(left, right, *args, **kwargs) - - @staticmethod - def default_series_rename( - series: pd.Series, name: str = 'tmp' - ) -> pd.Series: - return series.rename(name) - - @staticmethod - def greatest( - f: Callable[..., ir.ValueExpr], *args: ir.ValueExpr - ) -> ir.ValueExpr: - return f(*args) - - @staticmethod - def least( - f: Callable[..., ir.ValueExpr], *args: ir.ValueExpr - ) -> ir.ValueExpr: - return f(*args) - - @property - def db(self) -> ibis.client.Database: - return self.connection.database() - - @property - def functional_alltypes(self) -> ir.TableExpr: - return self.db.functional_alltypes - - @property - def batting(self) -> ir.TableExpr: - return self.db.batting - - @property - def awards_players(self) -> ir.TableExpr: - return self.db.awards_players - - @property - def geo(self) -> Optional[ir.TableExpr]: - return None - - @property - def api(self): - return getattr(ibis, self.name) - - def make_context( - self, params: Optional[Mapping[ir.ValueExpr, Any]] = None - ) -> comp.QueryContext: - return self.api.dialect.make_context(params=params) - - -class UnorderedComparator: - @classmethod - def assert_series_equal( - cls, left: pd.Series, right: pd.Series, *args: Any, **kwargs: Any - ) -> None: - left = left.sort_values().reset_index(drop=True) - right = right.sort_values().reset_index(drop=True) - return super().assert_series_equal(left, right, *args, **kwargs) - - @classmethod - def assert_frame_equal( - cls, left: pd.DataFrame, right: pd.DataFrame, *args: Any, **kwargs: Any - ) -> None: - columns = list(set(left.columns) & set(right.columns)) - left = left.sort_values(by=columns) - right = right.sort_values(by=columns) - return super().assert_frame_equal(left, right, *args, **kwargs) - - -class Pandas(Backend, RoundHalfToEven): - check_names = False - additional_skipped_operations = frozenset({ops.StringSQLLike}) - supported_to_timestamp_units = Backend.supported_to_timestamp_units | { - 'ns' - } - supports_divide_by_zero = True - returned_timestamp_unit = 'ns' - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - return ibis.pandas.connect( - { - 'functional_alltypes': pd.read_csv( - str(data_directory / 'functional_alltypes.csv'), - index_col=None, - dtype={'bool_col': bool, 'string_col': str}, - parse_dates=['timestamp_col'], - encoding='utf-8', - ), - 'batting': pd.read_csv(str(data_directory / 'batting.csv')), - 'awards_players': pd.read_csv( - str(data_directory / 'awards_players.csv') - ), - } - ) - - -class Dask(Pandas): - # clone pandas directly until the rest of the dask backend is defined - pass - - -class Csv(Pandas): - check_names = False - supports_divide_by_zero = True - returned_timestamp_unit = 'ns' - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - filename = data_directory / 'functional_alltypes.csv' - if not filename.exists(): - pytest.skip('test data set {} not found'.format(filename)) - return ibis.csv.connect(data_directory) - - @property - def functional_alltypes(self) -> ir.TableExpr: - schema = ibis.schema( - [ - ('bool_col', dt.boolean), - ('string_col', dt.string), - ('timestamp_col', dt.timestamp), - ] - ) - return self.connection.table('functional_alltypes', schema=schema) - - @property - def batting(self) -> ir.TableExpr: - schema = ibis.schema( - [ - ('lgID', dt.string), - ('G', dt.float64), - ('AB', dt.float64), - ('R', dt.float64), - ('H', dt.float64), - ('X2B', dt.float64), - ('X3B', dt.float64), - ('HR', dt.float64), - ('RBI', dt.float64), - ('SB', dt.float64), - ('CS', dt.float64), - ('BB', dt.float64), - ('SO', dt.float64), - ] - ) - return self.connection.table('batting', schema=schema) - - @property - def awards_players(self) -> ir.TableExpr: - schema = ibis.schema( - [('lgID', dt.string), ('tie', dt.string), ('notes', dt.string)] - ) - return self.connection.table('awards_players', schema=schema) - - -class Parquet(Pandas): - check_names = False - supports_divide_by_zero = True - returned_timestamp_unit = 'ns' - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - filename = data_directory / 'functional_alltypes.parquet' - if not filename.exists(): - pytest.skip('test data set {} not found'.format(filename)) - return ibis.parquet.connect(data_directory) - - -class HDF5(Pandas): - check_names = False - supports_divide_by_zero = True - returned_timestamp_unit = 'ns' - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - filename = data_directory / 'functional_alltypes.h5' - if not filename.exists(): - pytest.skip('test data set {} not found'.format(filename)) - return ibis.hdf5.connect(data_directory) - - -class SQLite(Backend, RoundAwayFromZero): - supports_arrays = False - supports_arrays_outside_of_select = supports_arrays - supports_window_operations = True - check_dtype = False - returned_timestamp_unit = 's' - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - path = Path( - os.environ.get( - 'IBIS_TEST_SQLITE_DATABASE', data_directory / 'ibis_testing.db' - ) - ) - if not path.exists(): - pytest.skip('SQLite testing db {} does not exist'.format(path)) - return ibis.sqlite.connect(str(path)) - - @property - def functional_alltypes(self) -> ir.TableExpr: - t = self.db.functional_alltypes - return t.mutate(timestamp_col=t.timestamp_col.cast('timestamp')) - - -class Postgres(Backend, RoundHalfToEven): - # postgres rounds half to even for double precision and half away from zero - # for numeric and decimal - - returned_timestamp_unit = 's' - - @property - def name(self) -> str: - return 'postgres' - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - user = os.environ.get( - 'IBIS_TEST_POSTGRES_USER', os.environ.get('PGUSER', 'postgres') - ) - password = os.environ.get( - 'IBIS_TEST_POSTGRES_PASSWORD', os.environ.get('PGPASS', 'postgres') - ) - host = os.environ.get( - 'IBIS_TEST_POSTGRES_HOST', os.environ.get('PGHOST', 'localhost') - ) - port = os.environ.get( - 'IBIS_TEST_POSTGRES_PORT', os.environ.get('PGPORT', '5432') - ) - database = os.environ.get( - 'IBIS_TEST_POSTGRES_DATABASE', - os.environ.get('PGDATABASE', 'ibis_testing'), - ) - return ibis.postgres.connect( - host=host, - port=port, - user=user, - password=password, - database=database, - ) - - @property - def geo(self) -> Optional[ir.TableExpr]: - if 'geo' in self.db.list_tables(): - return self.db.geo - - -class OmniSciDB(Backend, RoundAwayFromZero): - check_dtype = False - check_names = False - supports_window_operations = True - supports_divide_by_zero = False - supports_floating_modulus = False - returned_timestamp_unit = 's' - # Exception: Non-empty LogicalValues not supported yet - additional_skipped_operations = frozenset( - { - ops.Abs, - ops.Ceil, - ops.Floor, - ops.Exp, - ops.Sign, - ops.Sqrt, - ops.Ln, - ops.Log10, - ops.Modulus, - } - ) - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - user = os.environ.get('IBIS_TEST_OMNISCIDB_USER', 'admin') - password = os.environ.get( - 'IBIS_TEST_OMNISCIDB_PASSWORD', 'HyperInteractive' - ) - host = os.environ.get('IBIS_TEST_OMNISCIDB_HOST', 'localhost') - port = os.environ.get('IBIS_TEST_OMNISCIDB_PORT', '6274') - database = os.environ.get( - 'IBIS_TEST_OMNISCIDB_DATABASE', 'ibis_testing' - ) - return ibis.omniscidb.connect( - host=host, - port=port, - user=user, - password=password, - database=database, - ) - - @property - def geo(self) -> Optional[ir.TableExpr]: - return self.db.geo - - -class MySQL(Backend, RoundHalfToEven): - # mysql has the same rounding behavior as postgres - check_dtype = False - supports_window_operations = False - returned_timestamp_unit = 's' - - def __init__(self, data_directory: Path) -> None: - super().__init__(data_directory) - # mariadb supports window operations after version 10.2 - # but the sqlalchemy version string looks like: - # 5.5.5.10.2.12.MariaDB.10.2.12+maria~jessie - # or 10.4.12.MariaDB.1:10.4.12+maria~bionic - # example of possible results: - # https://github.com/sqlalchemy/sqlalchemy/blob/rel_1_3/ - # test/dialect/mysql/test_dialect.py#L244-L268 - con = self.connection - if 'MariaDB' in str(con.version): - # we might move this parsing step to the mysql client - version_detail = con.con.dialect._parse_server_version( - str(con.version) - ) - version = ( - version_detail[:3] - if version_detail[3] == 'MariaDB' - else version_detail[3:6] - ) - self.__class__.supports_window_operations = version >= (10, 2) - elif con.version >= parse_version('8.0'): - # mysql supports window operations after version 8 - self.__class__.supports_window_operations = True - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - user = os.environ.get('IBIS_TEST_MYSQL_USER', 'ibis') - password = os.environ.get('IBIS_TEST_MYSQL_PASSWORD', 'ibis') - host = os.environ.get('IBIS_TEST_MYSQL_HOST', 'localhost') - port = os.environ.get('IBIS_TEST_MYSQL_PORT', 3306) - database = os.environ.get('IBIS_TEST_MYSQL_DATABASE', 'ibis_testing') - return ibis.mysql.connect( - host=host, - port=port, - user=user, - password=password, - database=database, - ) - - @property - def functional_alltypes(self): - # BOOLEAN <-> TINYINT(1) - t = super().functional_alltypes - return t.mutate(bool_col=t.bool_col == 1) - - -class Clickhouse(UnorderedComparator, Backend, RoundHalfToEven): - check_dtype = False - supports_window_operations = False - returned_timestamp_unit = 's' - supported_to_timestamp_units = {'s'} - supports_floating_modulus = False - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - host = os.environ.get('IBIS_TEST_CLICKHOUSE_HOST', 'localhost') - port = int(os.environ.get('IBIS_TEST_CLICKHOUSE_PORT', 9000)) - user = os.environ.get('IBIS_TEST_CLICKHOUSE_USER', 'default') - password = os.environ.get('IBIS_TEST_CLICKHOUSE_PASSWORD', '') - database = os.environ.get( - 'IBIS_TEST_CLICKHOUSE_DATABASE', 'ibis_testing' - ) - return ibis.clickhouse.connect( - host=host, - port=port, - password=password, - database=database, - user=user, - ) - - @property - def functional_alltypes(self) -> ir.TableExpr: - t = super().functional_alltypes - return t.mutate(bool_col=t.bool_col == 1) - - @staticmethod - def greatest( - f: Callable[..., ir.ValueExpr], *args: ir.ValueExpr - ) -> ir.ValueExpr: - if len(args) > 2: - raise NotImplementedError( - 'Clickhouse does not support more than 2 arguments to greatest' - ) - return f(*args) - - @staticmethod - def least( - f: Callable[..., ir.ValueExpr], *args: ir.ValueExpr - ) -> ir.ValueExpr: - if len(args) > 2: - raise NotImplementedError( - 'Clickhouse does not support more than 2 arguments to least' - ) - return f(*args) - - -class BigQuery(UnorderedComparator, Backend, RoundAwayFromZero): - supports_divide_by_zero = True - supports_floating_modulus = False - returned_timestamp_unit = 'us' - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - from ibis.bigquery.tests.conftest import connect - - project_id = os.environ.get('GOOGLE_BIGQUERY_PROJECT_ID') - if project_id is None: - pytest.skip( - 'Environment variable GOOGLE_BIGQUERY_PROJECT_ID ' - 'not defined' - ) - elif not project_id: - pytest.skip( - 'Environment variable GOOGLE_BIGQUERY_PROJECT_ID is empty' - ) - return connect(project_id, dataset_id='testing') - - @property - def batting(self) -> ir.TableExpr: - return None - - @property - def awards_players(self) -> ir.TableExpr: - return None - - -class Impala(UnorderedComparator, Backend, RoundAwayFromZero): - supports_arrays = True - supports_arrays_outside_of_select = False - check_dtype = False - supports_divide_by_zero = True - returned_timestamp_unit = 's' - - @staticmethod - def connect(data_directory: Path) -> ibis.client.Client: - from ibis.backends.impala.tests.conftest import IbisTestEnv - - env = IbisTestEnv() - hdfs_client = ibis.impala.hdfs_connect( - host=env.nn_host, - port=env.webhdfs_port, - auth_mechanism=env.auth_mechanism, - verify=env.auth_mechanism not in ['GSSAPI', 'LDAP'], - user=env.webhdfs_user, - ) - auth_mechanism = env.auth_mechanism - if auth_mechanism == 'GSSAPI' or auth_mechanism == 'LDAP': - print("Warning: ignoring invalid Certificate Authority errors") - return ibis.impala.connect( - host=env.impala_host, - port=env.impala_port, - auth_mechanism=env.auth_mechanism, - hdfs_client=hdfs_client, - database='ibis_testing', - ) - - @property - def batting(self) -> ir.TableExpr: - return None - - @property - def awards_players(self) -> ir.TableExpr: - return None - - -class Spark(Backend, RoundHalfToEven): - @staticmethod - def connect(data_directory): - from ibis.tests.all.conftest import get_spark_testing_client - - return get_spark_testing_client(data_directory) - - @property - def functional_alltypes(self) -> ir.TableExpr: - return self.connection.table('functional_alltypes') - - @property - def batting(self) -> ir.TableExpr: - return self.connection.table('batting') - - @property - def awards_players(self) -> ir.TableExpr: - return self.connection.table('awards_players') - - -class PySpark(Backend, RoundAwayFromZero): - supported_to_timestamp_units = {'s'} - - @staticmethod - def connect(data_directory): - from ibis.tests.all.conftest import get_pyspark_testing_client - - return get_pyspark_testing_client(data_directory) - - @property - def functional_alltypes(self) -> ir.TableExpr: - return self.connection.table('functional_alltypes') - - @property - def batting(self) -> ir.TableExpr: - return self.connection.table('batting') - - @property - def awards_players(self) -> ir.TableExpr: - return self.connection.table('awards_players')