Skip to content

Commit

Permalink
add pycnophylactic interpolation
Browse files Browse the repository at this point in the history
  • Loading branch information
knaaptime committed Jun 25, 2021
1 parent 53eec88 commit 0aecfa9
Show file tree
Hide file tree
Showing 8 changed files with 362 additions and 0 deletions.
1 change: 1 addition & 0 deletions .ci/37.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ dependencies:
- pygeos
- h3-py
- joblib
- astropy
1 change: 1 addition & 0 deletions .ci/38.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ dependencies:
- pygeos
- h3-py
- joblib
- astropy
1 change: 1 addition & 0 deletions .ci/39.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ dependencies:
- numpydoc
- nbsphinx
- joblib
- astropy
18 changes: 18 additions & 0 deletions docs/_static/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -482,3 +482,21 @@ @article{mennis06_intel_dasym_mappin_its_applic
volume = {33},
year = {2006}
}
@article{tobler_smooth_1979,
abstract = {Census enumerations are usually packaged in irregularly shaped geographical regions. Interior values can be interpolated for such regions, without specification of "control points," by using an analogy to elliptical partial differential equations. A solution procedure is suggested, using finite difference methods with classical boundary conditions. In order to estimate densities, an additional nonnegativity condition is required. Smooth contour maps, which satisfy the volume preserving and nonnegativity constraints, illustrate the method using actual geographical data. It is suggested that the procedure may be used to convert observations from one bureaucratic partitioning of a geographical area to another.},
author = {Tobler, Waldo R},
doi = {10.1080/01621459.1979.10481647},
file = {:Users/knaaptime/Library/Application Support/Mendeley Desktop/Downloaded/Tobler - 1979 - Smooth Pycnophylactic Interpolation for Geographical Regions.pdf:pdf},
isbn = {978-0-511-47935-9},
issn = {0162-1459},
journal = {Journal of the American Statistical Association},
keywords = {bivariate interpolation,density estimation,dirichlet integral,interpolation,methods,numerical,population density},
mendeley-groups = {boundary-harmonization},
month = {sep},
number = {367},
pages = {519--529},
title = {{Smooth pycnophylactic interpolation for geographical regions}},
url = {http://www.tandfonline.com/doi/abs/10.1080/01621459.1979.10481647},
volume = {74},
year = {1979}
}
1 change: 1 addition & 0 deletions tobler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
from . import dasymetric
from . import model
from . import util
from . import pycno
1 change: 1 addition & 0 deletions tobler/pycno/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .pycno import pycno_interpolate
308 changes: 308 additions & 0 deletions tobler/pycno/pycno.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
"""Pycnophylactic Interpolation (contributed by @danlewis85)."""
# https://github.com/danlewis85/pycno/

import geopandas as gpd
import numpy as np
import pandas as pd
import rasterio
from numpy import (
absolute,
apply_along_axis,
asarray,
convolve,
copy,
nan,
nanmax,
nanmean,
nansum,
pad,
power,
round,
unique,
)
from numpy.ma import masked_invalid, masked_where
from pandas import DataFrame
from rasterio.features import rasterize


def pycno(
gdf, value_field, cellsize, r=0.2, handle_null=True, converge=3, verbose=True
):
try:
from astropy.convolution import convolve as astro_convolve
except Exception:
raise ("Pycnophylactic interpolation requires the astropy package")

"""Returns a smooth pycnophylactic interpolation raster for a given geodataframe
Args:
gdf (geopandas.geodataframe.GeoDataFrame): Input GeoDataFrame.
value_field (str): Field name of values to be used to produce pycnophylactic surface
cellsize (int): Pixel size of raster in planar units (i.e. metres, feet)
r (float, optional): Relaxation parameter, default of 0.2 is generally fine.
handle_null (boolean, optional): Changes how nodata values are smoothed. Default True.
converge (int, optional): Index for stopping value, default 3 is generally fine.
verbose (boolean, optional): Print out progress at each iteration.
Returns:
Numpy Array: Smooth pycnophylactic interpolation.
Rasterio geotransform
GeoPandas crs
"""
# set nodata value
nodata = -9999

# work out raster rows and columns based on gdf extent and cellsize
xmin, ymin, xmax, ymax = gdf.total_bounds
xres = int((xmax - xmin) / cellsize)
yres = int((ymax - ymin) / cellsize)

# Work out transform so that we rasterize the area where the data are!
trans = rasterio.Affine.from_gdal(xmin, cellsize, 0, ymax, 0, -cellsize)

# First make a zone array
# NB using index values as ids can often be too large/alphanumeric. Limit is int32 polygon features.
# create a generator of geom, index pairs to use in rasterizing
shapes = ((geom, value) for geom, value in zip(gdf.geometry, gdf.index))
# burn the features into a raster array
feature_array = rasterize(
shapes=shapes, fill=nodata, out_shape=(yres, xres), transform=trans
)

# Get cell counts per index value (feature)
unique, count = np.unique(feature_array, return_counts=True)
cellcounts = asarray((unique, count)).T
# Lose the nodata counts
cellcounts = cellcounts[cellcounts[:, 0] != nodata, :]
# Adjust value totals by cells
# Make cell counts dataframe
celldf = DataFrame(cellcounts[:, 1], index=cellcounts[:, 0], columns=["cellcount"])
# Merge cell counts
gdf = gdf.merge(celldf, how="left", left_index=True, right_index=True)
# Calculate cell values
gdf["cellvalues"] = gdf[value_field] / gdf["cellcount"]

# create a generator of geom, cellvalue pairs to use in rasterizing
shapes = ((geom, value) for geom, value in zip(gdf.geometry, gdf.cellvalues))
# Now burn the initial value raster
value_array = rasterize(
shapes=shapes, fill=nodata, out_shape=(yres, xres), transform=trans
)

# Set nodata in value array to np.nan
value_array[value_array == -9999] = nan

# Set stopper value based on converge parameter
stopper = nanmax(value_array) * power(10.0, -converge)

# The basic numpy convolve function doesn't handle nulls.
def smooth2D(data):
# Create function that calls a 1 dimensionsal smoother.
s1d = lambda s: convolve(s, [0.5, 0.0, 0.5], mode="same")
# pad the data array with the mean value
padarray = pad(data, 1, "constant", constant_values=nanmean(data))
# make nodata mask
mask = masked_invalid(padarray).mask
# set nodata as zero to avoid eroding the raster
padarray[mask] = 0.0
# Apply the convolution along each axis of the data and average
padarray = (
apply_along_axis(s1d, 1, padarray) + apply_along_axis(s1d, 0, padarray)
) / 2
# Reinstate nodata
padarray[mask] = nan
return padarray[1:-1, 1:-1]

# The convolution function from astropy handles nulls.
def astroSmooth2d(data):
s1d = lambda s: astro_convolve(s, [0.5, 0, 0.5])
# pad the data array with the mean value
padarray = pad(data, 1, "constant", constant_values=nanmean(data))
# Apply the convolution along each axis of the data and average
padarray = (
apply_along_axis(s1d, 1, padarray) + apply_along_axis(s1d, 0, padarray)
) / 2
return padarray[1:-1, 1:-1]

def correct2Da(data):

for idx, val in gdf[value_field].iteritems():
# Create zone mask from feature_array
mask = masked_where(feature_array == idx, feature_array).mask
# Work out the correction factor
correct = (val - nansum(data[mask])) / mask.sum()
# Apply correction
data[mask] += correct

return data

def correct2Dm(data):

for idx, val in gdf[value_field].iteritems():
# Create zone mask from feature_array
mask = masked_where(feature_array == idx, feature_array).mask
# Work out the correction factor
correct = val / nansum(data[mask])
if correct != 0.0:
# Apply correction
data[mask] *= correct

return data

while True:
# Store the current iteration
old = copy(value_array)

# Smooth the value_array
if handle_null:
sm = astroSmooth2d(value_array)
else:
sm = smooth2d(value_array)

# Relaxation to prevent overcompensation in the smoothing step
value_array = value_array * r + (1.0 - r) * sm

# Perform correction
value_array = correct2Da(value_array)

# Reset any negative values to zero.
value_array[value_array < 0] = 0.0

# Perform correction
value_array = correct2Dm(value_array)

if verbose:
print(
"Maximum Change: "
+ str(round(nanmax(absolute(old - value_array)), 4))
+ " - will stop at "
+ str(round(stopper, 4))
)

if nanmax(absolute(old - value_array)) < stopper:
break

return (value_array, trans, gdf.crs)


def save_pycno(pycno_array, transform, crs, filestring, driver="GTiff"):

"""Saves a numpy array as a raster, largely a helper function for pycno
Args:
pycno_array (numpy array): 2D numpy array of pycnophylactic surface
transform (rasterio geotransform): Relevant transform from pycno()
crs (GeoPandas crs): Coordinate reference system of GeoDataFrame used in pycno()
filestring (str): File path to save raster
driver (str, optional): Format for output raster, default: geoTiff.
Returns:
None
"""
import rasterio

# Save raster
new_dataset = rasterio.open(
filestring,
"w",
driver=driver,
height=pycno_array.shape[0],
width=pycno_array.shape[1],
count=1,
dtype="float64",
crs=crs,
transform=transform,
)
new_dataset.write(pycno_array.astype("float64"), 1)
new_dataset.close()

return None


def extract_values(pycno_array, gdf, transform, fieldname="Estimate"):

"""Extract raster value sums according to a provided polygon geodataframe
Args:
pycno_array (numpy array): 2D numpy array of pycnophylactic surface.
gdf (geopandas.geodataframe.GeoDataFrame): Target GeoDataFrame.
transform (rasterio geotransform): Relevant transform from pycno()
fieldname (str, optional): New gdf field to save estimates in. Default name: 'Estimate'.
Returns:
geopandas.geodataframe.GeoDataFrame: Target GeoDataFrame with appended estimates.
"""
from numpy import nansum
from rasterio.features import geometry_mask

estimates = []
# Iterate through geodataframe and extract values
for idx, geom in gdf["geometry"].iteritems():
mask = geometry_mask(
[geom], pycno_array.shape, transform=transform, invert=True
)
estimates.append(nansum(pycno_array[mask]))
out = pd.Series(estimates)
return out


def pycno_interpolate(
source_df,
target_df,
variables,
cellsize,
r=0.2,
handle_null=True,
converge=3,
verbose=False,
):
"""Pycnophylactic Inerpolation.
Parameters
----------
source_df : geopandas.GeoDataFrame (required)
geodataframe with polygon geometries and data to transfer
target_df : geopandas.GeoDataFrame (required)
geodataframe with polygon geometries to receive new data
variables : list
columns on the source_df containing data to transfer
cellsize : int
Pixel size of intermediate raster in planar units (i.e. metres, feet)
r : float, optional
Relaxation parameter, default of 0.2 is generally fine
handle_null : bool, optional
Changes how nodata values are smoothed. Default True.
converge : int, optional
Index for stopping value, default 3 is generally fine.
verbose : bool, optional
Print out progress at each iteration.
Returns
-------
geopandas.GeoDataFrame
new geodaraframe with interpolated variables as columns and target_df geometry
as output geometry
Notes
-----
The formula is based on Tobler, W. R. (1979). Smooth pycnophylactic interpolation for geographical regions. Journal of the American Statistical Association, 74(367), 519–529. https://doi.org/10.1080/01621459.1979.10481647
Original implementation written by @danlewis85 at <https://github.com/danlewis85/pycno/>
and based in part on the R pycno package by Chris Brusndon (<https://cran.r-project.org/web/packages/pycno/index.html>)
References: :cite:`tobler_smooth_1979`
"""
assert source_df.crs.equals(
target_df.crs
), "source_df CRS and target_df CRS are not the same. Reproject into consistent systems before proceeding"
output_vars = target_df.copy()[[target_df.geometry.name]]
for variable in variables:
pyc, trans, _ = pycno(
source_df,
variable,
cellsize=cellsize,
r=r,
handle_null=handle_null,
converge=converge,
verbose=verbose,
)
vals = extract_values(pyc, target_df, transform=trans)
output_vars[variable] = vals

return output_vars
31 changes: 31 additions & 0 deletions tobler/tests/test_pycno.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""test interpolation functions."""
import geopandas

from libpysal.examples import load_example
from numpy.testing import assert_almost_equal
from tobler.pycno import pycno_interpolate


def datasets():
sac1 = load_example("Sacramento1")
sac2 = load_example("Sacramento2")
sac1 = geopandas.read_file(sac1.get_path("sacramentot2.shp"))
sac2 = geopandas.read_file(sac2.get_path("SacramentoMSA2.shp"))
sac1 = sac1.to_crs(sac1.estimate_utm_crs())
sac2 = sac2.to_crs(sac1.crs)
sac1["pct_poverty"] = sac1.POV_POP / sac1.POV_TOT

return sac1, sac2


def test_pycno_interpolate():
sac1, sac2 = datasets()
pyc = pycno_interpolate(
source_df=sac1,
target_df=sac2,
variables=["TOT_POP"],
cellsize=500
)
assert_almost_equal(pyc.TOT_POP.sum(), 1794618.503, decimal=3)


0 comments on commit 0aecfa9

Please sign in to comment.