Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved efficiency wrf_remove_urban #71

Merged
merged 29 commits into from
Jul 14, 2022
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4306a8c
Improved efficiency wrf_remove_urban
dargueso Jul 6, 2022
e612a25
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 6, 2022
16fef56
Update w2w.py
dargueso Jul 8, 2022
46c9a29
Merge branch 'optmize_remove_urb' of https://github.com/matthiasdemuz…
dargueso Jul 8, 2022
9c83799
Remove time prints
dargueso Jul 8, 2022
3209545
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 8, 2022
9d8f1f8
fix some typos causing errors
matthiasdemuzere Jul 8, 2022
19ecb79
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 8, 2022
aa92e73
add tqdm for progress bar monitoring?
matthiasdemuzere Jul 8, 2022
2a64791
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 8, 2022
ae16501
add tqdm description
matthiasdemuzere Jul 8, 2022
65fe470
Optimize wrf remove urban
dargueso Jul 11, 2022
9c7c4ee
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 11, 2022
b53aa6a
Removed unnecessary function and added annotation
dargueso Jul 12, 2022
058f5e0
Merge branch 'optmize_remove_urb' of https://github.com/matthiasdemuz…
dargueso Jul 12, 2022
673e0e4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 12, 2022
5a01850
add type annotation
jkittner Jul 12, 2022
2faec70
Update w2w.py
dargueso Jul 12, 2022
2350b70
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 12, 2022
f869207
Update README.md
dargueso Jul 12, 2022
41df46c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 12, 2022
5a19596
Update README.md
Jul 13, 2022
a3b4c56
fix typo, simplify kd tree variable unpacking
jkittner Jul 13, 2022
7dbb6a0
update JOSS with readme info
matthiasdemuzere Jul 13, 2022
5e2af74
fix typo
matthiasdemuzere Jul 13, 2022
29ec09d
update JOSS pdf
matthiasdemuzere Jul 13, 2022
ebe9188
Update JOSS/paper.md
matthiasdemuzere Jul 14, 2022
34a60d5
Update README.md
matthiasdemuzere Jul 14, 2022
561841a
update JOSS pdf
matthiasdemuzere Jul 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ install_requires =
rasterio>=1.1.6
rioxarray
scipy
tqdm
xarray[io]
importlib-metadata;python_version<"3.9"
importlib-resources;python_version<"3.9"
Expand Down
19 changes: 2 additions & 17 deletions tests/w2w_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import w2w.w2w
from w2w.w2w import _add_frc_lu_index_2_wrf
from w2w.w2w import _calc_distance_coord
from w2w.w2w import _check_hi_values
from w2w.w2w import _check_lcz_wrf_extent
from w2w.w2w import _compute_hi_distribution
Expand Down Expand Up @@ -289,20 +288,6 @@ def test_check_lcz_integrity_clean_file_written(tmpdir, info_mock):
assert os.listdir(tmpdir) == ['Shanghai_clean.tif']


@pytest.mark.parametrize(
('coords', 'expected'),
(
pytest.param((89, 0, 90, 1), 111194, id='near northern pole'),
pytest.param((-89, 0, -90, 1), 111194, id='near southern pole'),
pytest.param((-60, -10, -70, -12), 1115764, id='southern eastern hemisphere'),
pytest.param((-1, 0, 1, 2), 314498, id='across equator'),
pytest.param((45, 179, 46, -179), 191461, id='across dateline'),
),
)
def test_calc_distance_coord(coords, expected):
assert _calc_distance_coord(*coords) == pytest.approx(expected, abs=1)


@pytest.mark.parametrize(
('dst_file', 'dst_nu_file'),
(
Expand All @@ -315,7 +300,7 @@ def test_wrf_remove_urban(tmpdir, dst_file, dst_nu_file, info_mock):
{'dst_file': dst_file, 'dst_nu_file': os.path.join(tmpdir, dst_nu_file)}
)
old_ds = xr.open_dataset(info.dst_file)
wrf_remove_urban(info=info, NPIX_NLC=9)
wrf_remove_urban(info=info, NPIX_NLC=9, NPIX_AREA=25)
ds = xr.open_dataset(info.dst_nu_file)
# check lused 13 was reclassified to 12
assert ds.LU_INDEX.values[0][2][2] == 12
Expand Down Expand Up @@ -350,7 +335,7 @@ def test_wrf_remove_urban_output_already_exists_is_overwritten(tmpdir, info_mock
)
assert os.listdir(tmpdir) == ['5by5_new.nc']
m_time_old = os.path.getmtime(info.dst_nu_file)
wrf_remove_urban(info=info, NPIX_NLC=9)
wrf_remove_urban(info=info, NPIX_NLC=9, NPIX_AREA=25)
assert m_time_old != os.path.getmtime(info.dst_nu_file)


Expand Down
205 changes: 113 additions & 92 deletions w2w/w2w.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from rasterio.warp import reproject, Resampling
from rasterio.transform import Affine
from scipy.stats import mode, truncnorm
import scipy.spatial as spatial
import os, sys
import argparse
from argparse import RawTextHelpFormatter
Expand All @@ -24,6 +25,7 @@
from scipy import stats
import pyproj
from pyproj import CRS, Transformer
from tqdm.auto import tqdm

if sys.version_info >= (3, 9): # pragma: >=3.9 cover
import importlib.metadata as importlib_metadata
Expand Down Expand Up @@ -112,11 +114,23 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
'natural land cover (DEFAULT: 45)',
default=45,
)

parser.add_argument(
'-a',
'--npix-area',
metavar='',
type=int,
dest='NPIX_AREA',
help='Area in number of pixels to look for the nearest number of pixels'
'for sampling neighbouring natural land cover (DEFAULT: NPIX_NLC**2)',
default=None,
jkittner marked this conversation as resolved.
Show resolved Hide resolved
)
dargueso marked this conversation as resolved.
Show resolved Hide resolved
parser.add_argument(
'--lcz-ucp',
type=str,
help='File with custom LCZ-based urban canopy parameters',
)

args = parser.parse_args(argv)

# check if a custom LCZ UCP file was set and read it
Expand Down Expand Up @@ -149,9 +163,11 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
print(
f'{FBOLD}--> Replace WRF MODIS urban LC with ' f'surrounding natural LC{FEND}'
)

wrf_remove_urban(
info=info,
NPIX_NLC=args.NPIX_NLC,
NPIX_AREA=args.NPIX_AREA,
)

print(f'{FBOLD}--> Create LCZ-based geo_em file{FEND}')
Expand Down Expand Up @@ -365,34 +381,39 @@ def check_lcz_integrity(info: Info, LCZ_BAND: int) -> None:
lcz.rio.to_raster(info.src_file_clean, dtype=np.int32)


def _calc_distance_coord(
lat1: float, lon1: float, lat2: float, lon2: float
) -> xr.DataArray:
def using_kdtree(
data: pd.DataFrame,
kpoints: int,
) -> Tuple[NDArray[np.float_], NDArray[np.int_]]:

'''Calculate distance using coordinates
This uses the spherical law of cosines
'''
Extract nearest kpoints to each point given a list of them.
The input data contains at least latitude ('lat') and longitude ('lon').
The distance is calculated with great circle arc length.
Based on https://stackoverflow.com/q/43020919/190597
https://en.wikipedia.org/wiki/Great-circle_distance

earth_radius = 6371000 # Earth radius in m
lat1r = lat1 * np.pi / 180.0
lat2r = lat2 * np.pi / 180.0
lon1r = lon1 * np.pi / 180.0
lon2r = lon2 * np.pi / 180.0
Output:
distance: distances between nearest k points and each point using circle arch length.
index: indices of nearest k points for each point.
'''

d = (
np.arccos(
np.sin(lat1r) * np.sin(lat2r)
+ np.cos(lat1r) * np.cos(lat2r) * np.cos(lon2r - lon1r)
)
* earth_radius
)
R = 6371
phi = np.deg2rad(data['lat'])
theta = np.deg2rad(data['lon'])
data['x'] = R * np.cos(phi) * np.cos(theta)
data['y'] = R * np.cos(phi) * np.sin(theta)
data['z'] = R * np.sin(phi)
tree = spatial.KDTree(data[['x', 'y', 'z']])
distance, index = tree.query(data[['x', 'y', 'z']], k=kpoints)

return d
return distance, index


def wrf_remove_urban(
info: Info,
NPIX_NLC: int,
NPIX_AREA: int,
) -> None:

'''Remove MODIS urban extent from geo_em*.nc file'''
Expand All @@ -411,84 +432,84 @@ def wrf_remove_urban(
newgreenf = greenf.values.copy()
orig_num_land_cat = dst_data.NUM_LAND_CAT

# Convert urban to surrounding natural characteristics
for i in dst_data.south_north:
for j in dst_data.west_east:
if luse.isel(south_north=i, west_east=j) == 13:
data_coord = pd.DataFrame(
{
'lat': lat.values.ravel(),
'lon': lon.values.ravel(),
'luse': luse.values.ravel(),
}
)

dis = _calc_distance_coord(
lat.where((luse != 13) & (luse != 17) & (luse != 21)),
lon.where((luse != 13) & (luse != 17) & (luse != 21)),
lat.isel(south_north=i, west_east=j),
lon.isel(south_north=i, west_east=j),
)
ERROR = '\033[0;31m'
ENDC = '\033[0m'

disflat = (
dis.stack(gridpoints=('south_north', 'west_east'))
.reset_index('gridpoints')
.drop_vars(['south_north', 'west_east'])
)
aux = luse.where(
dis <= disflat.sortby(disflat).isel(gridpoints=NPIX_NLC), drop=True
)
m = stats.mode(aux.values.flatten(), nan_policy='omit')[0]
newluse[i, j] = int(m)
luf_2D = luf.values.reshape(luf.shape[0], -1)
if orig_num_land_cat > 20: # USING MODIS_LAKE
data_coord['luf_natland'] = list(
(luf_2D[12, :] == 0) & (luf_2D[16, :] == 0) & (luf_2D[20, :] == 0)
)
data_coord['luf_urb'] = list((luf_2D[12, :] != 0))
else: # USING MODIS (NO LAKES)
data_coord['luf_natland'] = list((luf_2D[12, :] == 0) & (luf_2D[16, :] == 0))
data_coord['luf_urb'] = list((luf_2D[12, :] != 0))
if NPIX_AREA == None:
NPIX_AREA = NPIX_NLC**2
if NPIX_AREA > luse.size:
raise ValueError(
f'{ERROR}ERROR: The area you selected is larger than the domain size\n'
f'You chose an area of {NPIX_AREA} pixels and the domain is {luse.size} pixels\n'
f'Reduce NPIX_AREA. \n\n'
f'Exiting...{ENDC}'
)

auxg = (
greenf.where(
dis <= disflat.sortby(disflat).isel(gridpoints=NPIX_NLC),
drop=True,
)
.where(aux == newluse[i, j])
.mean(dim=['south_north', 'west_east'])
)
newgreenf[:, i, j] = auxg

if luf.isel(south_north=i, west_east=j, land_cat=12) > 0.0:
if orig_num_land_cat > 20: # USING MODIS_LAKE
dis = _calc_distance_coord(
lat.where(
(luf.isel(land_cat=12) == 0.0)
& (luf.isel(land_cat=16) == 0.0)
& (luf.isel(land_cat=20) == 0.0)
),
lon.where(
(luf.isel(land_cat=12) == 0.0)
& (luf.isel(land_cat=16) == 0.0)
& (luf.isel(land_cat=20) == 0.0)
),
lat.isel(south_north=i, west_east=j),
lon.isel(south_north=i, west_east=j),
)
else: # USING MODIS (NO LAKES)
dis = _calc_distance_coord(
lat.where(
(luf.isel(land_cat=12) == 0.0)
& (luf.isel(land_cat=16) == 0.0)
),
lon.where(
(luf.isel(land_cat=12) == 0.0)
& (luf.isel(land_cat=16) == 0.0)
),
lat.isel(south_north=i, west_east=j),
lon.isel(south_north=i, west_east=j),
)
dkd, ikd = using_kdtree(data_coord, min(luse.size, NPIX_AREA))
dargueso marked this conversation as resolved.
Show resolved Hide resolved

disflat = (
dis.stack(gridpoints=('south_north', 'west_east'))
.reset_index('gridpoints')
.drop_vars(['south_north', 'west_east'])
)
aux = luse.where(
dis <= disflat.sortby(disflat).isel(gridpoints=NPIX_NLC), drop=True
)
m = stats.mode(aux.values.flatten(), nan_policy='omit')[0]
newlu = int(m) - 1
# newlu = int(mode(aux.values.flatten())[0])-1
newluf[newlu, i, j] += luf.isel(
south_north=i, west_east=j, land_cat=12
).values
newluf[12, i, j] = 0.0
data_coord['luse_urb'] = data_coord.luse == 13
data_coord['luse_natland'] = (
(data_coord.luse != 13) & (data_coord.luse != 17) & (data_coord.luse != 21)
)

data_urb = data_coord.where(data_coord.luse_urb).dropna()

for iurb in tqdm(data_urb.index, desc='Looping through urban grid pixels'):

i, j = np.unravel_index(iurb, luse.shape)
data_luse_natland = (
data_coord.loc[ikd[iurb, :]].where(data_coord.luse_natland).dropna()
)

try:
aux_kd = data_luse_natland.iloc[:NPIX_NLC]
mkd = data_luse_natland.iloc[:NPIX_NLC].luse.mode()[0]
except (IndexError, KeyError):
err = traceback.format_exc()
print(
f'{ERROR}ERROR: Not enough natural land points in selected area\n'
f'You chose to sample {NPIX_NLC} pixels over an area of {NPIX_AREA} '
f'Increase NPIX_AREA. \n\n'
f'{err}\n'
f'Exiting ...{ENDC}'
)
sys.exit(1)

gkf = np.take(
greenf.values.reshape(greenf.shape[0], -1),
aux_kd[aux_kd.luse == mkd.item()].index,
axis=1,
).mean(axis=1)
newluse[i, j] = int(mkd)
newgreenf[:, i, j] = gkf

## SET URBAN LANDUSEF to 0 and MOVE THAT FRACTION TO DOMINANT RURAL CATEGORY
data_coord['newluse'] = newluse.ravel()
data_luf = data_coord.where(data_coord.luf_urb).dropna()

for iurb_luf in data_luf.index:
i, j = np.unravel_index(iurb_luf, luse.shape)
newluf[int(data_luf.loc[iurb_luf]['newluse']) - 1, i, j] += luf.isel(
south_north=i, west_east=j, land_cat=12
).values
newluf[12, i, j] = 0.0

dst_data.LU_INDEX.values[0, :] = newluse[:]
dst_data.LANDUSEF.values[0, :] = newluf[:]
Expand Down