Skip to content

Commit

Permalink
Pandas 2.1 and py 3.12 compat (#123)
Browse files Browse the repository at this point in the history
  • Loading branch information
caspervdw authored Aug 19, 2024
1 parent 62f7b21 commit 796c43c
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 37 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test-conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-12, windows-latest]
python: ["3.9", "3.10"]
python: ["3.9", "3.12"]

steps:
- uses: actions/checkout@v2
Expand All @@ -29,7 +29,7 @@ jobs:
- name: Setup Environment
shell: bash
run: |
conda create --name test python=${{ matrix.python }} pytest numpy=1.* gdal=3.* scipy pytz dask-core toolz "pandas<2.2" geopandas=0.* "pyproj>=2"
conda create --name test python=${{ matrix.python }} pytest numpy=1.* gdal=3.* scipy pytz dask-core toolz "pandas<2.2" geopandas "pyproj>=2" fiona
source activate test
python -V
conda info
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ jobs:
fail-fast: false
matrix:
include:
- os: ubuntu-20.04
python: 3.7
numpy: "==1.16.*"
pins: "pygdal==3.0.4.* scipy==1.3.* dask[delayed]==1.* pandas==0.25.* geopandas==0.7.*"
- os: ubuntu-20.04
python: 3.8
numpy: "==1.18.*"
Expand All @@ -38,7 +34,11 @@ jobs:
- os: ubuntu-22.04
python: "3.11"
numpy: "==1.*"
pins: "pygdal==3.4.1.* geopandas==0.* pandas==2.1.*"
pins: "pygdal==3.4.1.* scipy==1.11.* dask[delayed]==2023.* pandas==2.0.* geopandas==0.*"
- os: ubuntu-22.04
python: "3.12"
numpy: "==1.*"
pins: "pygdal==3.4.1.*"

steps:
- uses: actions/checkout@v2
Expand Down
8 changes: 5 additions & 3 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ Changelog of dask-geomodeling

- Fixed warnings when reprojecting geometries with geopandas >= 0.9.

- Added mandatory `temporal` attribute for RasterBlock.
- Fixed compatbility and solved deprecation warning swith pandas 2.1.
Still incompatible with pandas >=2.2.

- Added version constraint showing incompatibility with numpy 2.

- Added version constraints showing incompatibility with geopandas 1.*,
pandas 2.2, and numpy 2.
- Added mandatory `temporal` attribute for RasterBlock.


2.4.4 (2024-01-17)
Expand Down
21 changes: 16 additions & 5 deletions dask_geomodeling/geometry/field_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@
]


def _none_to_nan(series: pd.Series) -> pd.Series:
"""Put NaN in place of None in a Series
If the series has only nones, downcasts type to float."""
if series.dtype == object:
nones = series.isna()
if nones.all(): # downcasts type to float:
return pd.Series(index=series.index, name=series.name, dtype=float)
else:
series=series.copy()
series[nones] = np.nan
return series

class Classify(BaseSingleSeries):
"""
Classify a value column into different bins
Expand Down Expand Up @@ -106,12 +119,11 @@ def right(self):
return self.args[3]

@staticmethod
def process(series, bins, labels, right):
def process(series: pd.Series, bins, labels, right):
open_bounds = len(labels) == len(bins) + 1
if open_bounds:
bins = np.concatenate([[-np.inf], bins, [np.inf]])
if series.dtype == object:
series = series.fillna(value=np.nan)
series = _none_to_nan(series)
result = pd.cut(series, bins, right, labels)

# Transform from categorical to whatever suits the "labels". The
Expand Down Expand Up @@ -207,8 +219,7 @@ def process(data, value_column, bin_columns, labels, right):
return pd.Series([], dtype=float)
features = data["features"]
series = features[value_column]
if series.dtype == object:
series = series.fillna(value=np.nan)
series = _none_to_nan(series)
values = series.values
bins = features[bin_columns].values
n_bins = len(bin_columns)
Expand Down
18 changes: 11 additions & 7 deletions dask_geomodeling/raster/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from osgeo import gdal, gdal_array

from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone

from dask_geomodeling import utils

Expand All @@ -14,6 +14,10 @@
__all__ = ["MemorySource", "RasterFileSource"]


def utc_from_ms_timestamp(timestamp):
"""Returns naive UTC datetime from ms timestamp"""
return datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc).replace(tzinfo=None)

class MemorySource(RasterBlock):
"""A raster source that interfaces data from memory.
Expand Down Expand Up @@ -174,9 +178,9 @@ def period(self):
if len(self) == 0:
return
elif len(self) == 1:
return (datetime.utcfromtimestamp(self.time_first / 1000),) * 2
return (utc_from_ms_timestamp(self.time_first),) * 2
else:
first = datetime.utcfromtimestamp(self.time_first / 1000)
first = utc_from_ms_timestamp(self.time_first)
last = first + (len(self) - 1) * self.timedelta
return first, last

Expand Down Expand Up @@ -206,7 +210,7 @@ def get_sources_and_requests(self, **request):
start, stop, first_i, last_i = utils.snap_start_stop(
request.get("start"),
request.get("stop"),
datetime.utcfromtimestamp(self.time_first / 1000),
utc_from_ms_timestamp(self.time_first),
self.timedelta,
len(self),
)
Expand Down Expand Up @@ -398,9 +402,9 @@ def period(self):
if len(self) == 0:
return
elif len(self) == 1:
return (datetime.utcfromtimestamp(self.time_first / 1000),) * 2
return (utc_from_ms_timestamp(self.time_first)) * 2
else:
first = datetime.utcfromtimestamp(self.time_first / 1000)
first = utc_from_ms_timestamp(self.time_first)
last = first + (len(self) - 1) * self.timedelta
return first, last

Expand All @@ -427,7 +431,7 @@ def get_sources_and_requests(self, **request):
start, stop, first_i, last_i = utils.snap_start_stop(
request.get("start"),
request.get("stop"),
datetime.utcfromtimestamp(self.time_first / 1000),
utc_from_ms_timestamp(self.time_first),
self.timedelta,
len(self),
)
Expand Down
19 changes: 10 additions & 9 deletions dask_geomodeling/raster/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,20 +271,20 @@ def _get_bin_label(dt, frequency, closed, label, timezone):
# while there is only 1 sample here, there might be multiple (empty) bins
# in some cases (see test_issue_5917)
series = pd.Series([0], index=[_dt_to_ts(dt, timezone)])
for label, bin in series.resample(frequency, closed=closed, label=label, kind="timestamp"):
for label, bin in series.resample(frequency, closed=closed, label=label):
if len(bin) != 0:
break
return _ts_to_dt(label, timezone)


def _get_bin_period(dt, frequency, closed, label, timezone):
"""Returns the label of the bin the input dt belongs to.
def _get_bin_start(dt, frequency, closed, label, timezone):
"""Returns the start (left side) of the bin the input dt belongs to.
:type dt: datetime.datetime without timezone.
"""
# go through resample, this is the only function that supports 'closed'
series = pd.Series([0], index=[_dt_to_ts(dt, timezone)])
resampled = series.resample(frequency, closed=closed, label=label, kind="period")
resampled = series.resample(frequency, closed=closed, label="left")
return resampled.first().index[0]


Expand Down Expand Up @@ -488,6 +488,7 @@ def __init__(
if not isinstance(frequency, str):
raise TypeError("'{}' object is not allowed.".format(type(frequency)))
frequency = to_offset(frequency).freqstr

if closed not in {None, "left", "right"}:
raise ValueError("closed must be None, 'left', or 'right'.")
if label not in {None, "left", "right"}:
Expand Down Expand Up @@ -552,9 +553,11 @@ def period(self):

@property
def timedelta(self):
if self.frequency is None:
return None
try:
return to_offset(self.frequency).delta
except AttributeError:
return pd.Timedelta(to_offset(self.frequency)).to_pytimedelta()
except ValueError:
return # e.g. Month is non-equidistant

@property
Expand Down Expand Up @@ -830,10 +833,8 @@ def get_sources_and_requests(self, **request):
request["start"] = self.period[0]
request["stop"] = stop
else:
start_period = _get_bin_period(start, **kwargs)

# snap request 'start' to the start of the first period
request["start"] = _ts_to_dt(start_period.start_time, self.timezone)
request["start"] = _ts_to_dt(_get_bin_start(start, **kwargs), self.timezone)
# snap request 'stop' to the last requested time
request["stop"] = stop
if kwargs["closed"] != "left":
Expand Down
1 change: 1 addition & 0 deletions dask_geomodeling/tests/test_geometry_sinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def assert_frame_equal_ignore_index(actual, expected, sort_col):
actual.set_index(sort_col).sort_index(),
expected.set_index(sort_col).sort_index(),
check_like=True,
check_index_type=False
)


Expand Down
12 changes: 6 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@

install_requires = (
[
"dask[delayed]>=0.20",
"pandas>=0.23,<=2.2",
"geopandas>=0.7,<1",
"dask[delayed]>=2.9",
"pandas>=1.0,<2.2",
"geopandas>=0.8",
"pytz",
"numpy>=1.14,<2",
"scipy>=1.1",
"numpy>=1.18,<2",
"scipy>=1.4",
"fiona"
],
)
Expand Down Expand Up @@ -53,7 +53,7 @@
zip_safe=False,
install_requires=install_requires,
tests_require=tests_require,
python_requires='>=3.7',
python_requires='>=3.8',
extras_require={"test": tests_require, "cityhash": ["cityhash"]},
entry_points={"console_scripts": []},
)

0 comments on commit 796c43c

Please sign in to comment.