Skip to content

Commit

Permalink
Merge pull request #135 from martinfleis/categorical
Browse files Browse the repository at this point in the history
ENH: support categorical variables in area_interpolate
  • Loading branch information
sjsrey authored Feb 27, 2021
2 parents ad51c9a + 72d1445 commit 32c8525
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 9 deletions.
33 changes: 24 additions & 9 deletions tobler/area_weighted/area_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,7 @@ def _area_tables_binning_parallel(source_df, target_df, n_jobs=-1):

# Build DOK table
table = coo_matrix(
(
areas,
(ids_src, ids_tgt),
),
(areas, (ids_src, ids_tgt),),
shape=(df1.shape[0], df2.shape[0]),
dtype=np.float32,
)
Expand Down Expand Up @@ -193,10 +190,7 @@ def _area_tables_binning(source_df, target_df, spatial_index):
areas = df1.geometry.values[ids_src].intersection(df2.geometry.values[ids_tgt]).area

table = coo_matrix(
(
areas,
(ids_src, ids_tgt),
),
(areas, (ids_src, ids_tgt),),
shape=(df1.shape[0], df2.shape[0]),
dtype=np.float32,
)
Expand Down Expand Up @@ -273,9 +267,10 @@ def _area_interpolate_binning(
allocate_total=True,
spatial_index="auto",
n_jobs=1,
categorical_variables=None,
):
"""
Area interpolation for extensive and intensive variables.
Area interpolation for extensive, intensive and categorical variables.
Parameters
----------
Expand Down Expand Up @@ -310,6 +305,8 @@ def _area_interpolate_binning(
available. If `table` is passed, this is ignored.
NOTE: as of Jan'21 multi-core functionality requires master versions
of `pygeos` and `geopandas`.
categorical_variables : list
[Optional. Default=None] Columns in dataframes for categorical variables
Returns
-------
Expand Down Expand Up @@ -344,6 +341,9 @@ def _area_interpolate_binning(
v_j = \\sum_i v_i w_{i,j}
w_{i,j} = a_{i,j} / \\sum_k a_{k,j}
For categorical variables, the estimate returns ratio of presence of each
unique category.
"""
source_df = source_df.copy()
target_df = target_df.copy()
Expand Down Expand Up @@ -404,10 +404,25 @@ def _area_interpolate_binning(
intensive = np.asarray(intensive)
intensive = pd.DataFrame(intensive.T, columns=intensive_variables)

if categorical_variables:
categorical = {}
for variable in categorical_variables:
unique = source_df[variable].unique()
for value in unique:
mask = source_df[variable] == value
categorical[f"{variable}_{value}"] = np.asarray(
table[mask].sum(axis=0)
)[0]

categorical = pd.DataFrame(categorical)
categorical = categorical.div(target_df.area, axis="rows")

if extensive_variables:
dfs.append(extensive)
if intensive_variables:
dfs.append(intensive)
if categorical_variables:
dfs.append(categorical)

df = pd.concat(dfs, axis=1)
df["geometry"] = target_df[target_df.geometry.name].reset_index(drop=True)
Expand Down
10 changes: 10 additions & 0 deletions tobler/tests/test_dasymetric.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def datasets():
sac1 = geopandas.read_file(sac1.get_path("sacramentot2.shp"))
sac2 = geopandas.read_file(sac2.get_path("SacramentoMSA2.shp"))
sac1["pct_poverty"] = sac1.POV_POP / sac1.POV_TOT
categories = ["cat", "dog", "donkey", "wombat", "capybara"]
sac1["animal"] = (categories * ((len(sac1) // len(categories)) + 1))[
: len(sac1)
]

return sac1, sac2
else:
Expand All @@ -44,9 +48,15 @@ def test_area_interpolate():
target_df=sac2,
extensive_variables=["TOT_POP"],
intensive_variables=["pct_poverty"],
categorical_variables=["animal"],
)
assert_almost_equal(area.TOT_POP.sum(), 1796856, decimal=0)
assert_almost_equal(area.pct_poverty.sum(), 2140, decimal=0)
assert_almost_equal(area.animal_cat.sum(), 32, decimal=0)
assert_almost_equal(area.animal_dog.sum(), 19, decimal=0)
assert_almost_equal(area.animal_donkey.sum(), 22, decimal=0)
assert_almost_equal(area.animal_wombat.sum(), 23, decimal=0)
assert_almost_equal(area.animal_capybara.sum(), 20, decimal=0)


@pytest.mark.skipif(QUILTMISSING, reason="quilt3 not available.")
Expand Down

0 comments on commit 32c8525

Please sign in to comment.