Skip to content

Commit

Permalink
Merge pull request #160 from martinfleis/csr
Browse files Browse the repository at this point in the history
  • Loading branch information
knaaptime authored Mar 28, 2022
2 parents 3f6496d + 199a8e7 commit 66f3825
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 23 deletions.
48 changes: 25 additions & 23 deletions tobler/area_weighted/area_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _area_tables_binning_parallel(source_df, target_df, n_jobs=-1):
Returns
-------
tables : scipy.sparse.dok_matrix
tables : scipy.sparse.csr_matrix
"""
from joblib import Parallel, delayed, parallel_backend
Expand Down Expand Up @@ -125,7 +125,7 @@ def _area_tables_binning_parallel(source_df, target_df, n_jobs=-1):
)
areas = np.concatenate(worker_out)

# Build DOK table
# Build CSR table
table = coo_matrix(
(
areas,
Expand All @@ -134,7 +134,7 @@ def _area_tables_binning_parallel(source_df, target_df, n_jobs=-1):
shape=(df1.shape[0], df2.shape[0]),
dtype=np.float32,
)
table = table.todok()
table = table.tocsr()
return table


Expand All @@ -161,7 +161,7 @@ def _area_tables_binning(source_df, target_df, spatial_index):
Returns
-------
tables : scipy.sparse.dok_matrix
tables : scipy.sparse.csr_matrix
"""
if _check_crs(source_df, target_df):
Expand Down Expand Up @@ -199,7 +199,7 @@ def _area_tables_binning(source_df, target_df, spatial_index):
dtype=np.float32,
)

table = table.todok()
table = table.tocsr()

return table

Expand All @@ -226,7 +226,7 @@ def _area_interpolate_binning(
[Optional. Default=None] Columns in dataframes for extensive variables
intensive_variables : list
[Optional. Default=None] Columns in dataframes for intensive variables
table : scipy.sparse.dok_matrix
table : scipy.sparse.csr_matrix
[Optional. Default=None] Area allocation source-target correspondence
table. If not provided, it will be built from `source_df` and
`target_df` using `tobler.area_interpolate._area_tables_binning`
Expand Down Expand Up @@ -305,19 +305,20 @@ def _area_interpolate_binning(
else:
table = _area_tables_binning_parallel(source_df, target_df, n_jobs=n_jobs)

den = source_df.area.values
if allocate_total:
den = np.asarray(table.sum(axis=1))
den = den + (den == 0)
den = 1.0 / den
n = den.shape[0]
den = den.reshape((n,))
den = diags([den], [0])
weights = den.dot(table) # row standardize table

dfs = []
extensive = []
if extensive_variables:

den = source_df.area.values
if allocate_total:
den = np.asarray(table.sum(axis=1))
den = den + (den == 0)
den = 1.0 / den
n = den.shape[0]
den = den.reshape((n,))
den = diags([den], [0])
weights = den.dot(table) # row standardize table

for variable in extensive_variables:
vals = _nan_check(source_df, variable)
vals = _inf_check(source_df, variable)
Expand All @@ -329,15 +330,16 @@ def _area_interpolate_binning(
extensive = np.array(extensive)
extensive = pd.DataFrame(extensive.T, columns=extensive_variables)

area = np.asarray(table.sum(axis=0))
den = 1.0 / (area + (area == 0))
n, k = den.shape
den = den.reshape((k,))
den = diags([den], [0])
weights = table.dot(den)

intensive = []
if intensive_variables:

area = np.asarray(table.sum(axis=0))
den = 1.0 / (area + (area == 0))
n, k = den.shape
den = den.reshape((k,))
den = diags([den], [0])
weights = table.dot(den)

for variable in intensive_variables:
vals = _nan_check(source_df, variable)
vals = _inf_check(source_df, variable)
Expand Down
66 changes: 66 additions & 0 deletions tobler/tests/test_area_interpolators.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,45 @@ def test_area_interpolate_singlecore():
assert_almost_equal(area.animal_capybara.sum(), 20, decimal=0)


def test_area_interpolate_extensive():
sac1, sac2 = datasets()
area = area_interpolate(
source_df=sac1,
target_df=sac2,
extensive_variables=["TOT_POP"],
n_jobs=1,
)
assert_almost_equal(area.TOT_POP.sum(), 1796856, decimal=0)


def test_area_interpolate_intensive():
sac1, sac2 = datasets()
area = area_interpolate(
source_df=sac1,
target_df=sac2,
intensive_variables=["pct_poverty"],
n_jobs=1,
)
assert_almost_equal(area.pct_poverty.sum(), 2140, decimal=0)


def test_area_interpolate_categorical():
sac1, sac2 = datasets()
area = area_interpolate(
source_df=sac1,
target_df=sac2,
extensive_variables=["TOT_POP"],
intensive_variables=["pct_poverty"],
categorical_variables=["animal"],
n_jobs=1,
)
assert_almost_equal(area.animal_cat.sum(), 32, decimal=0)
assert_almost_equal(area.animal_dog.sum(), 19, decimal=0)
assert_almost_equal(area.animal_donkey.sum(), 22, decimal=0)
assert_almost_equal(area.animal_wombat.sum(), 23, decimal=0)
assert_almost_equal(area.animal_capybara.sum(), 20, decimal=0)


def test_area_interpolate_custom_index():
sac1, sac2 = datasets()
sac1.index = sac1.index * 2
Expand Down Expand Up @@ -128,3 +167,30 @@ def test_area_tables_binning():
assert auto.mean() == pytest.approx(2.7552649e-05)

assert (auto[5][0].toarray() > 0).sum() == 7


def test_passed_table():
sac1, sac2 = datasets()
csr = _area_tables_binning(source_df=sac1, target_df=sac2, spatial_index="auto")

area = area_interpolate(
source_df=sac1,
target_df=sac2,
extensive_variables=["TOT_POP"],
intensive_variables=["pct_poverty"],
table=csr,
)
assert_almost_equal(area.TOT_POP.sum(), 1796856, decimal=0)
assert_almost_equal(area.pct_poverty.sum(), 2140, decimal=0)

dok = csr.todok()

area = area_interpolate(
source_df=sac1,
target_df=sac2,
extensive_variables=["TOT_POP"],
intensive_variables=["pct_poverty"],
table=dok,
)
assert_almost_equal(area.TOT_POP.sum(), 1796856, decimal=0)
assert_almost_equal(area.pct_poverty.sum(), 2140, decimal=0)

0 comments on commit 66f3825

Please sign in to comment.