Skip to content

Commit

Permalink
Merge pull request #28 from pydiverse/update-pixi
Browse files Browse the repository at this point in the history
Update pixi lockfile
  • Loading branch information
finn-rudolph authored Oct 2, 2024
2 parents 46ed4fe + 0172207 commit 6d1968a
Show file tree
Hide file tree
Showing 37 changed files with 8,931 additions and 8,470 deletions.
11 changes: 5 additions & 6 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
- main
pull_request:
paths-ignore:
- 'docs/**'
- "docs/**"

jobs:
lint:
Expand All @@ -17,10 +17,10 @@ jobs:
- name: Checkout branch
uses: actions/checkout@v4

- name: Install Python 3.9
- name: Install Python 3.10
uses: actions/setup-python@v5
with:
python-version: 3.9
python-version: "3.10"

- name: Linting - Run pre-commit checks
uses: pre-commit/action@v3.0.1
Expand All @@ -33,9 +33,9 @@ jobs:
os:
- ubuntu-latest
environment:
- py39
- py310
- py311
- py312
timeout-minutes: 20
steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -63,7 +63,6 @@ jobs:
env:
SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
run: |
pixi run -e ${{ matrix.environment }} pytest tests -ra ${RUNNER_DEBUG:+-v} --color=yes --postgres --mssql
2 changes: 1 addition & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ services:
ports:
- "6543:5432"
mssql:
image: mcr.microsoft.com/azure-sql-edge
image: mcr.microsoft.com/mssql/server:2022-latest
environment:
ACCEPT_EULA: Y
SA_PASSWORD: PydiQuant27
Expand Down
15,805 changes: 7,973 additions & 7,832 deletions pixi.lock

Large diffs are not rendered by default.

6 changes: 1 addition & 5 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ name = "pydiverse.transform"
platforms = ["linux-64", "osx-64", "osx-arm64", "win-64"]

[dependencies]
python = ">=3.9"
python = ">=3.10"
numpy = ">=1.23.1"
pandas = ">=1.4.3"
SQLAlchemy = ">=1.4.27"
Expand Down Expand Up @@ -56,8 +56,6 @@ myst-parser = ">=2.0.0"
docs = "cd docs && make html "
readthedocs = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r docs/build/html $READTHEDOCS_OUTPUT/html"

[feature.py39.dependencies]
python = "3.9.*"
[feature.py310.dependencies]
python = "3.10.*"
[feature.py311.dependencies]
Expand All @@ -67,11 +65,9 @@ python = "3.12.*"

[environments]
default = ["py312", "dev", "duckdb", "postgres", "mssql"]
py39 = ["py39", "dev", "duckdb", "postgres", "mssql"]
py310 = ["py310", "dev", "duckdb", "postgres", "mssql"]
py311 = ["py311", "dev", "duckdb", "postgres", "mssql"]
py312 = ["py312", "dev", "duckdb", "postgres", "mssql"]
py39ibm = ["py39", "dev", "duckdb", "postgres", "mssql", "ibm-db"]
py312ibm = ["py312", "dev", "duckdb", "postgres", "mssql", "ibm-db"]
docs = ["docs"]
release = { features = ["release"], no-default-feature = true }
37 changes: 11 additions & 26 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ name = "pydiverse-transform"
version = "0.2.0"
description = "Pipe based dataframe manipulation library that can also transform data on SQL databases"
authors = [
{name = "QuantCo, Inc."},
{name = "Nicolas Camenisch", email = "garnele007@gmail.com"},
{name = "Martin Trautmann", email = "windiana@users.sf.net"},
{ name = "QuantCo, Inc." },
{ name = "Nicolas Camenisch", email = "garnele007@gmail.com" },
{ name = "Martin Trautmann", email = "windiana@users.sf.net" },
]
license = "BSD-3-Clause"
readme = "docs/package/README.md"
Expand All @@ -16,16 +16,10 @@ classifiers = [
"Programming Language :: SQL",
"Topic :: Database",
]
requires-python = ">=3.9"
dependencies = [
"numpy>=1.23.1",
"pandas>=1.4.3",
"SQLAlchemy>=1.4.27",
]
requires-python = ">=3.10"
dependencies = ["numpy>=1.23.1", "pandas>=1.4.3", "SQLAlchemy>=1.4.27"]
[project.optional-dependencies]
pyarrow = [
"pyarrow>=11.0.0",
]
pyarrow = ["pyarrow>=11.0.0"]

[tool.hatch.build.targets.wheel]
packages = ["src/pydiverse"]
Expand All @@ -36,21 +30,12 @@ extend-exclude = ["docs/*"]
fix = true

[tool.ruff.lint]
select = [
"F",
"E",
"UP",
"W",
"I001",
"I002",
"B",
"A",
]
select = ["F", "E", "UP", "W", "I001", "I002", "B", "A"]
ignore = [
"B017",
"B028",
"A001", # Keep while filter verb still is called filter
"A003", # Keep while filter verb still is called filter
"B017",
"B028",
"A001", # Keep while filter verb still is called filter
"A003", # Keep while filter verb still is called filter
]
ignore-init-module-imports = true

Expand Down
16 changes: 16 additions & 0 deletions src/pydiverse/transform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@
)
from pydiverse.transform.pipe.pipeable import verb
from pydiverse.transform.pipe.table import Table
from pydiverse.transform.tree.dtypes import (
Bool,
Date,
DateTime,
Duration,
Float64,
Int64,
String,
)

__all__ = [
"Polars",
Expand All @@ -22,4 +31,11 @@
"aligned",
"verb",
"C",
"Float64",
"Int64",
"String",
"Bool",
"DateTime",
"Date",
"Duration",
]
65 changes: 63 additions & 2 deletions src/pydiverse/transform/backend/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,33 @@

import polars as pl
import sqlalchemy as sqa
from sqlalchemy.sql.type_api import TypeEngine as TypeEngine

from pydiverse.transform import ops
from pydiverse.transform.backend import sql
from pydiverse.transform.backend.sql import SqlImpl
from pydiverse.transform.backend.targets import Polars, Target
from pydiverse.transform.tree import dtypes
from pydiverse.transform.tree import dtypes, verbs
from pydiverse.transform.tree.ast import AstNode
from pydiverse.transform.tree.col_expr import Cast, Col
from pydiverse.transform.tree.col_expr import Cast, Col, ColFn, LiteralCol


class DuckDbImpl(SqlImpl):
dialect_name = "duckdb"

@classmethod
def export(cls, nd: AstNode, target: Target, final_select: list[Col]):
# insert casts after sum() over integer columns (duckdb converts them to floats)
for desc in nd.iter_subtree():
if isinstance(desc, verbs.Verb):
desc.map_col_nodes(
lambda u: Cast(u, dtypes.Int64())
if isinstance(u, ColFn)
and u.name == "sum"
and u.dtype() == dtypes.Int64
else u
)

if isinstance(target, Polars):
engine = sql.get_engine(nd)
with engine.connect() as conn:
Expand All @@ -31,3 +44,51 @@ def compile_cast(cls, cast: Cast, sqa_col: dict[str, sqa.Label]) -> Cast:
sqa.BigInteger()
)
return super().compile_cast(cast, sqa_col)

@classmethod
def compile_lit(cls, lit: LiteralCol) -> sqa.ColumnElement:
if lit.dtype() == dtypes.Int64:
return sqa.cast(lit.val, sqa.BigInteger)
return super().compile_lit(lit)


with DuckDbImpl.op(ops.FloorDiv()) as op:

@op.auto
def _floordiv(lhs, rhs):
return sqa.func.divide(lhs, rhs)


with DuckDbImpl.op(ops.RFloorDiv()) as op:

@op.auto
def _floordiv(rhs, lhs):
return sqa.func.divide(lhs, rhs)


with DuckDbImpl.op(ops.IsInf()) as op:

@op.auto
def _is_inf(x):
return sqa.func.isinf(x)


with DuckDbImpl.op(ops.IsNotInf()) as op:

@op.auto
def _is_not_inf(x):
return sqa.func.isfinite(x)


with DuckDbImpl.op(ops.IsNan()) as op:

@op.auto
def _is_nan(x):
return sqa.func.isnan(x)


with DuckDbImpl.op(ops.IsNotNan()) as op:

@op.auto
def _is_not_nan(x):
return ~sqa.func.isnan(x)
72 changes: 36 additions & 36 deletions src/pydiverse/transform/backend/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pydiverse.transform import ops
from pydiverse.transform.backend import sql
from pydiverse.transform.backend.sql import SqlImpl
from pydiverse.transform.errors import NotSupportedError
from pydiverse.transform.tree import dtypes, verbs
from pydiverse.transform.tree.ast import AstNode
from pydiverse.transform.tree.col_expr import (
Expand All @@ -27,36 +28,13 @@
class MsSqlImpl(SqlImpl):
dialect_name = "mssql"

INF = sqa.cast(sqa.literal("1.0"), type_=sqa.Float()) / sqa.literal(
"0.0", type_=sqa.Float()
)
NEG_INF = -INF
NAN = INF + NEG_INF

@classmethod
def compile_cast(cls, cast: Cast, sqa_col: dict[str, sqa.Label]) -> sqa.Cast:
compiled_val = cls.compile_col_expr(cast.val, sqa_col)
if cast.val.dtype() == dtypes.String and cast.target_type == dtypes.Float64:
return sqa.case(
(compiled_val == "inf", cls.INF),
(compiled_val == "-inf", -cls.INF),
(compiled_val.in_(("nan", "-nan")), cls.NAN),
else_=sqa.cast(
compiled_val,
cls.sqa_type(cast.target_type),
),
)

if cast.val.dtype() == dtypes.Float64 and cast.target_type == dtypes.String:
compiled = sqa.cast(cls.compile_col_expr(cast.val, sqa_col), sqa.String)
return sqa.case(
(compiled == "1.#QNAN", "nan"),
(compiled == "1.#INF", "inf"),
(compiled == "-1.#INF", "-inf"),
else_=compiled,
)
def inf():
raise NotSupportedError("SQL Server does not support `inf`")

return sqa.cast(compiled_val, cls.sqa_type(cast.target_type))
@classmethod
def nan():
raise NotSupportedError("SQL Server does not support `nan`")

@classmethod
def build_select(cls, nd: AstNode, final_select: list[Col]) -> Any:
Expand Down Expand Up @@ -90,7 +68,7 @@ def build_select(cls, nd: AstNode, final_select: list[Col]) -> Any:
@classmethod
def sqa_type(cls, t: dtypes.Dtype):
if isinstance(t, dtypes.DateTime):
return DATETIME2()
return DATETIME2

return super().sqa_type(t)

Expand Down Expand Up @@ -343,13 +321,7 @@ def _mean(x):

@op.auto
def _log(x):
# TODO: we still need to handle inf / -inf / nan
return sqa.case(
(x > 0, sqa.func.log(x)),
(x < 0, MsSqlImpl.NAN),
(x.is_(sqa.null()), None),
else_=-MsSqlImpl.INF,
)
return sqa.func.log(x)


with MsSqlImpl.op(ops.Ceil()) as op:
Expand All @@ -364,3 +336,31 @@ def _ceil(x):
@op.auto
def _str_to_datetime(x):
return sqa.cast(x, DATETIME2)


with MsSqlImpl.op(ops.IsInf()) as op:

@op.auto
def _is_inf(x):
return False


with MsSqlImpl.op(ops.IsNotInf()) as op:

@op.auto
def _is_not_inf(x):
return True


with MsSqlImpl.op(ops.IsNan()) as op:

@op.auto
def _is_nan(x):
return False


with MsSqlImpl.op(ops.IsNotNan()) as op:

@op.auto
def _is_not_nan(x):
return True
Loading

0 comments on commit 6d1968a

Please sign in to comment.