Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle comparison to undefined nodata values #269

Merged
merged 32 commits into from
Sep 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
3adb758
add function to make raster with nodata value undefined
emlys Aug 14, 2020
8a6df92
allow undefined nodata value in seasonal water yield
emlys Aug 17, 2020
ec32ebd
replaced all usages of isclose to nodata with utils.is_valid
emlys Aug 17, 2020
d06db6b
fixed split line syntax error
emlys Aug 17, 2020
fc82e7b
added tests for utils.is_valid
emlys Aug 18, 2020
22d7dfa
cleanup
emlys Aug 18, 2020
abef4d0
checkout SWY test from release/3.9
emlys Aug 18, 2020
4400b4a
added detailed test for SWY with undefined nodata value
emlys Aug 18, 2020
9ef2fc9
added HISTORY note
emlys Aug 18, 2020
3011a63
cleanup
emlys Aug 18, 2020
7e947fe
removing utils.is_valid from globio
emlys Aug 20, 2020
7d553e4
globio tests passing
emlys Aug 20, 2020
819cd46
removed utils.is_valid from coastal blue carbon
emlys Aug 20, 2020
f6f0edc
removed utils.is_valid from coastal_vulnerability
emlys Aug 20, 2020
476f4e7
removed utils.is_valid from crop production percentile
emlys Aug 20, 2020
c25484a
removed utils.is_valid from crop production
emlys Aug 20, 2020
f64e695
removed utils.is_valid froom delineateit, habitat quality
emlys Aug 20, 2020
4d258ec
fix seasonal_water_yield_core
emlys Aug 20, 2020
c9cc727
remove utils.is_valid from hydropower, ndr
emlys Aug 20, 2020
dc81973
removed utils.is_valid from routedem, scenic quality, sdr
emlys Aug 20, 2020
07c96a9
added SWY test, removed utils.is_valid
emlys Aug 20, 2020
339a2ad
remove utils.is_valid from rest of files
emlys Aug 20, 2020
be52fe9
fixed bug in coastal blue carbon
emlys Aug 21, 2020
22903e6
cleanup
emlys Aug 21, 2020
744a478
more small cleanup
emlys Aug 21, 2020
c029365
delete cython html file
emlys Aug 21, 2020
dee60d2
use slice(None) rather than True boolean array
emlys Aug 25, 2020
36b920f
fixed typos
emlys Aug 25, 2020
b783c5c
requested changes for PR
emlys Aug 27, 2020
d26b15a
Merge branch 'release/3.9' into bug/228
emlys Aug 28, 2020
547e90f
requested changes for PR
emlys Aug 31, 2020
4026e6e
move nodata check outside of loop
emlys Aug 31, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ Unreleased Changes (3.9)
* ``convolve_2d`` keyword ``ignore_nodata`` to ``ignore_nodata_and_edges``.
* ``get_raster_info`` / ``get_vector_info`` keyword ``projection`` to
``projection_wkt``.
* Fixed bug that was causing a TypeError when certain input rasters had an
undefined nodata value. Undefined nodata values should now work everywhere.
* Habitat Quality:
* Refactor of Habitat Quality that implements TaskGraph
* Threat files are now indicated in the Threat Table csv input under
Expand Down
3 changes: 2 additions & 1 deletion src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,8 @@ def reclass(array, d, out_dtype=None, nodata_mask=None):
raise
reclass_array = v[index].reshape(array.shape)

if nodata_mask and numpy.issubdtype(reclass_array.dtype, numpy.floating):
if nodata_mask is not None and numpy.issubdtype(reclass_array.dtype,
numpy.floating):
reclass_array[numpy.isclose(array, nodata_mask)] = numpy.nan
reclass_array[numpy.isclose(array, ndata)] = numpy.nan

Expand Down
15 changes: 9 additions & 6 deletions src/natcap/invest/coastal_vulnerability.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,7 @@ def calculate_wind_exposure(
bathy_band = bathy_raster.GetRasterBand(1)
bathy_raster_info = pygeoprocessing.get_raster_info(bathymetry_raster_path)
bathy_gt = bathy_raster_info['geotransform']
bathy_nodata = bathy_raster_info['nodata']
bathy_nodata = bathy_raster_info['nodata'][0]

target_shore_point_layer.StartTransaction()
temp_fetch_rays_layer.StartTransaction()
Expand Down Expand Up @@ -1282,7 +1282,7 @@ def extract_bathymetry_along_ray(
'bathymetry input fully cover the fetch ray area?'
% (value, {'xoff': ix, 'yoff': iy,
'win_xsize': win_size, 'win_ysize': win_size}))
if ~numpy.isclose(value, bathy_nodata):
if bathy_nodata is None or not math.isclose(value[0][0], bathy_nodata):
bathy_values.append(value)

# Gaps between shoreline and bathymetry input datasets could result in no
Expand Down Expand Up @@ -1315,12 +1315,15 @@ def extract_bathymetry_along_ray(
values = bathy_band.ReadAsArray(
xoff=ix, yoff=iy,
win_xsize=win_xsize, win_ysize=win_ysize)
if numpy.any(~numpy.isclose(values, bathy_nodata)):

# if nodata value is undefined, all pixels are valid
valid_mask = slice(None)
if bathy_nodata is not None:
valid_mask = ~numpy.isclose(values, bathy_nodata)
if numpy.any(valid_mask):
# take mean of valids and move on
value = numpy.mean(
values[~numpy.isclose(values, bathy_nodata)])
value = numpy.mean(values[valid_mask])
bathy_values.append(value)

return bathy_values


Expand Down
20 changes: 13 additions & 7 deletions src/natcap/invest/crop_production_percentile.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,9 @@ def _zero_observed_yield_op(observed_yield_array, observed_yield_nodata):
result = numpy.empty(
observed_yield_array.shape, dtype=numpy.float32)
result[:] = 0.0
valid_mask = ~numpy.isclose(observed_yield_array, observed_yield_nodata)
valid_mask = slice(None)
if observed_yield_nodata is not None:
valid_mask = ~numpy.isclose(observed_yield_array, observed_yield_nodata)
result[valid_mask] = observed_yield_array[valid_mask]
return result

Expand Down Expand Up @@ -623,12 +625,15 @@ def tabulate_results(
observed_production_raster_path)['nodata'][0]
for _, yield_block in pygeoprocessing.iterblocks(
(observed_production_raster_path, 1)):
production_pixel_count += numpy.count_nonzero(
~numpy.isclose(yield_block, observed_yield_nodata) &
(yield_block > 0.0))
yield_sum += numpy.sum(
yield_block[
~numpy.isclose(observed_yield_nodata, yield_block)])

# make a valid mask showing which pixels are not nodata
# if nodata value undefined, assume all pixels are valid
valid_mask = slice(None)
if observed_yield_nodata is not None:
valid_mask = ~numpy.isclose(yield_block, observed_yield_nodata)
production_pixel_count += numpy.count_nonzero(valid_mask & (
yield_block > 0.0))
yield_sum += numpy.sum(yield_block[valid_mask])
production_area = production_pixel_count * pixel_area_ha
production_lookup['observed'] = yield_sum
result_table.write(',%f' % production_area)
Expand All @@ -642,6 +647,7 @@ def tabulate_results(
yield_sum = 0.0
for _, yield_block in pygeoprocessing.iterblocks(
(yield_percentile_raster_path, 1)):
# _NODATA_YIELD will always have a value (defined above)
yield_sum += numpy.sum(
yield_block[~numpy.isclose(yield_block, _NODATA_YIELD)])
production_lookup[yield_percentile_id] = yield_sum
Expand Down
18 changes: 12 additions & 6 deletions src/natcap/invest/crop_production_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,9 @@ def _zero_observed_yield_op(observed_yield_array, observed_yield_nodata):
result = numpy.empty(
observed_yield_array.shape, dtype=numpy.float32)
result[:] = 0.0
valid_mask = ~numpy.isclose(observed_yield_array, observed_yield_nodata)
valid_mask = slice(None)
if observed_yield_nodata is not None:
valid_mask = ~numpy.isclose(observed_yield_array, observed_yield_nodata)
result[valid_mask] = observed_yield_array[valid_mask]
return result

Expand Down Expand Up @@ -737,12 +739,15 @@ def tabulate_regression_results(
observed_production_raster_path)['nodata'][0]
for _, yield_block in pygeoprocessing.iterblocks(
(observed_production_raster_path, 1)):

# make a valid mask showing which pixels are not nodata
# if nodata value undefined, assume all pixels are valid
valid_mask = slice(None)
if observed_yield_nodata is not None:
valid_mask = ~numpy.isclose(yield_block, observed_yield_nodata)
production_pixel_count += numpy.count_nonzero(
~numpy.isclose(yield_block, observed_yield_nodata) &
(yield_block > 0.0))
yield_sum += numpy.sum(
yield_block[
~numpy.isclose(observed_yield_nodata, yield_block)])
valid_mask & (yield_block > 0.0))
yield_sum += numpy.sum(yield_block[valid_mask])
Comment on lines +746 to +750
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice! this looks like it'll be a bit more efficient, as well as supporting the possibility of an undefined nodata value.

production_area = production_pixel_count * pixel_area_ha
production_lookup['observed'] = yield_sum
result_table.write(',%f' % production_area)
Expand All @@ -755,6 +760,7 @@ def tabulate_regression_results(
for _, yield_block in pygeoprocessing.iterblocks(
(crop_production_raster_path, 1)):
yield_sum += numpy.sum(
# _NODATA_YIELD will always have a value (defined above)
yield_block[~numpy.isclose(yield_block, _NODATA_YIELD)])
production_lookup['modeled'] = yield_sum
result_table.write(",%f" % yield_sum)
Expand Down
6 changes: 5 additions & 1 deletion src/natcap/invest/delineateit.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,11 @@ def _threshold_streams(flow_accum, src_nodata, out_nodata, threshold):
"""
out_matrix = numpy.empty(flow_accum.shape, dtype=numpy.uint8)
out_matrix[:] = out_nodata
valid_pixels = ~numpy.isclose(src_nodata, flow_accum)

valid_pixels = slice(None)
if src_nodata is not None:
valid_pixels = ~numpy.isclose(flow_accum, src_nodata)

over_threshold = flow_accum > threshold
out_matrix[valid_pixels & over_threshold] = 1
out_matrix[valid_pixels & ~over_threshold] = 0
Expand Down
48 changes: 27 additions & 21 deletions src/natcap/invest/globio.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,10 +456,12 @@ def _primary_veg_mask_op(lulc_array, globio_nodata, primary_veg_mask_nodata):
"""Masking out natural areas."""
# lulc_array and nodata could conceivably be a float here,
# if it's the user-provided globio dataset
valid_mask = ~numpy.isclose(lulc_array, globio_nodata)
# landcover type 1 in the GLOBIO schema represents primary vegetation
result = numpy.empty_like(lulc_array, dtype=numpy.int16)
result[:] = primary_veg_mask_nodata
valid_mask = slice(None)
if globio_nodata is not None:
valid_mask = ~numpy.isclose(lulc_array, globio_nodata)
result[valid_mask] = lulc_array[valid_mask] == 1
return result

Expand Down Expand Up @@ -493,7 +495,6 @@ def _msa_f_op(
Array with float values. One component of final MSA score.

"""
nodata_mask = numpy.isclose(primary_veg_mask_nodata, primary_veg_smooth)
msa_f = numpy.empty(primary_veg_smooth.shape)

less_than = msa_f_table.pop('<', None)
Expand All @@ -507,7 +508,10 @@ def _msa_f_op(
msa_f[primary_veg_smooth < less_than[0]] = (
less_than[1])

msa_f[nodata_mask] = msa_nodata
if msa_nodata is not None:
nodata_mask = numpy.isclose(primary_veg_smooth,
primary_veg_mask_nodata)
msa_f[nodata_mask] = msa_nodata

return msa_f

Expand Down Expand Up @@ -574,7 +578,9 @@ def _msa_op(msa_f, msa_lu, msa_i, globio_nodata):
"""Calculate the MSA which is the product of the sub MSAs."""
result = numpy.empty_like(msa_f, dtype=numpy.float32)
result[:] = globio_nodata
valid_mask = ~numpy.isclose(msa_f, globio_nodata)
valid_mask = slice(None)
if globio_nodata is not None:
valid_mask = ~numpy.isclose(msa_f, globio_nodata)
result[valid_mask] = msa_f[valid_mask] * msa_lu[valid_mask] * msa_i[valid_mask]
return result

Expand Down Expand Up @@ -809,7 +815,8 @@ def _calculate_globio_lulc_map(

def _forest_area_mask_op(lulc_array, globio_nodata, forest_areas_nodata):
"""Masking out forest areas."""
valid_mask = ~numpy.isclose(lulc_array, globio_nodata)
# comparing integers, numpy.isclose not needed
valid_mask = lulc_array != globio_nodata
# landcover code 130 represents all MODIS forest codes which originate
# as 1-5
result = numpy.empty_like(lulc_array, dtype=numpy.int16)
Expand Down Expand Up @@ -943,22 +950,21 @@ def _collapse_infrastructure_op(*infrastructure_array_list):
# necessarily nodata
infrastructure_result = numpy.zeros(
infrastructure_array_list[0].shape, dtype=numpy.uint8)

nodata_mask = numpy.isclose(
infrastructure_array_list[0], infrastructure_nodata_list[0])

infrastructure_mask = (infrastructure_array_list[0] > 0) & ~nodata_mask

for index in range(1, len(infrastructure_array_list)):
current_nodata = numpy.isclose(
infrastructure_array_list[index],
infrastructure_nodata_list[index])

infrastructure_mask = (
infrastructure_mask |
((infrastructure_array_list[index] > 0) & ~current_nodata))

nodata_mask = (nodata_mask & current_nodata)

nodata_mask = numpy.full(infrastructure_array_list[0].shape, True)
infrastructure_mask = numpy.full(infrastructure_array_list[0].shape, False)

for index in range(len(infrastructure_array_list)):
# mark all pixels True that have infrastructure in this layer
infrastructure_mask |= infrastructure_array_list[index] > 0
if infrastructure_nodata_list[index] is not None:
# update nodata mask: intersection with this layer
nodata_mask &= numpy.isclose(infrastructure_array_list[index],
infrastructure_nodata_list[index])
# if nodata is None, every pixel in this layer has data,
# so the nodata_mask should be all False
else:
nodata_mask &= False

infrastructure_result[infrastructure_mask] = 1
infrastructure_result[nodata_mask] = infrastructure_nodata
Expand Down
4 changes: 2 additions & 2 deletions src/natcap/invest/habitat_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,8 +700,8 @@ def quality_op(degradation, habitat):
# Both these rasters are Float32, so the actual pixel values written
# might be *slightly* off of _OUT_NODATA but should still be
# interpreted as nodata.
valid_pixels = ~(
numpy.isclose(degradation, _OUT_NODATA) |
# _OUT_NODATA (defined above) should never be None, so this is okay
valid_pixels = ~(numpy.isclose(degradation, _OUT_NODATA) |
numpy.isclose(habitat, _OUT_NODATA))

out_array[valid_pixels] = (
Expand Down
31 changes: 21 additions & 10 deletions src/natcap/invest/hydropower/hydropower_water_yield.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,9 @@ def aet_op(fractp, precip, precip_nodata, output_nodata):
result[:] = output_nodata
# checking if fractp >= 0 because it's a value that's between 0 and 1
# and the nodata value is a large negative number.
valid_mask = (fractp >= 0) & ~numpy.isclose(precip, precip_nodata)
valid_mask = fractp >= 0
if precip_nodata is not None:
valid_mask &= ~numpy.isclose(precip, precip_nodata)
result[valid_mask] = fractp[valid_mask] * precip[valid_mask]
return result

Expand All @@ -759,8 +761,10 @@ def wyield_op(fractp, precip, precip_nodata, output_nodata):
"""
result = numpy.empty_like(fractp)
result[:] = output_nodata
valid_mask = (~numpy.isclose(fractp, output_nodata) &
~numpy.isclose(precip, precip_nodata))
# output_nodata is defined above, should never be None
valid_mask = ~numpy.isclose(fractp, output_nodata)
if precip_nodata is not None:
valid_mask &= ~numpy.isclose(precip, precip_nodata)
result[valid_mask] = (1.0 - fractp[valid_mask]) * precip[valid_mask]
return result

Expand Down Expand Up @@ -799,16 +803,21 @@ def fractp_op(
# Kc, root, & veg were created by reclassify_raster, which set nodata
# to out_nodata. All others are products of align_and_resize_raster_stack
# and retain their original nodata values.
# out_nodata is defined above and should never be None.
valid_mask = (
~numpy.isclose(Kc, nodata_dict['out_nodata']) &
~numpy.isclose(eto, nodata_dict['eto']) &
~numpy.isclose(precip, nodata_dict['precip']) &
~numpy.isclose(root, nodata_dict['out_nodata']) &
~numpy.isclose(soil, nodata_dict['depth_root']) &
~numpy.isclose(pawc, nodata_dict['pawc']) &
~numpy.isclose(veg, nodata_dict['out_nodata']) &
~numpy.isclose(precip, 0.0))

if nodata_dict['eto'] is not None:
valid_mask &= ~numpy.isclose(eto, nodata_dict['eto'])
if nodata_dict['precip'] is not None:
valid_mask &= ~numpy.isclose(precip, nodata_dict['precip'])
if nodata_dict['depth_root'] is not None:
valid_mask &= ~numpy.isclose(soil, nodata_dict['depth_root'])
if nodata_dict['pawc'] is not None:
valid_mask &= ~numpy.isclose(pawc, nodata_dict['pawc'])

# Compute Budyko Dryness index
# Use the original AET equation if the land cover type is vegetation
# If not vegetation (wetlands, urban, water, etc...) use
Expand Down Expand Up @@ -871,8 +880,10 @@ def pet_op(eto_pix, Kc_pix, eto_nodata, output_nodata):
"""
result = numpy.empty(eto_pix.shape, dtype=numpy.float32)
result[:] = output_nodata
valid_mask = (~numpy.isclose(eto_pix, eto_nodata) &
~numpy.isclose(Kc_pix, output_nodata))

valid_mask = ~numpy.isclose(Kc_pix, output_nodata)
if eto_nodata is not None:
valid_mask &= ~numpy.isclose(eto_pix, eto_nodata)
result[valid_mask] = eto_pix[valid_mask] * Kc_pix[valid_mask]
return result

Expand Down
37 changes: 25 additions & 12 deletions src/natcap/invest/ndr/ndr.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,11 @@ def _normalize_raster(base_raster_path_band, target_normalized_raster_path):
base_raster_path_band[0])['nodata'][base_raster_path_band[1]-1]
for _, raster_block in pygeoprocessing.iterblocks(
base_raster_path_band):
valid_block = raster_block[~numpy.isclose(raster_block, base_nodata)]
valid_mask = slice(None)
if base_nodata is not None:
valid_mask = ~numpy.isclose(raster_block, base_nodata)

valid_block = raster_block[valid_mask]
value_sum += numpy.sum(valid_block)
value_count += valid_block.size

Expand All @@ -876,7 +880,10 @@ def _normalize_raster_op(array):
"""Divide values by mean."""
result = numpy.empty(array.shape, dtype=numpy.float32)
result[:] = numpy.float32(base_nodata)
valid_mask = ~numpy.isclose(array, base_nodata)

valid_mask = slice(None)
if base_nodata is not None:
valid_mask = ~numpy.isclose(raster_block, base_nodata)
result[valid_mask] = array[valid_mask]
if value_mean != 0:
result[valid_mask] /= value_mean
Expand Down Expand Up @@ -954,10 +961,10 @@ def _mult_op(*array_nodata_list):
"""Multiply non-nodata stacks."""
result = numpy.empty(array_nodata_list[0].shape)
result[:] = target_nodata
valid_mask = ~numpy.isclose(
array_nodata_list[0], array_nodata_list[1])
for array, nodata in zip(*[iter(array_nodata_list[2:])]*2):
valid_mask &= ~numpy.isclose(array, nodata)
valid_mask = numpy.full(result.shape, True)
for array, nodata in zip(*[iter(array_nodata_list)]*2):
if nodata is not None:
valid_mask &= ~numpy.isclose(array, nodata)
result[valid_mask] = array_nodata_list[0][valid_mask]
for array in array_nodata_list[2::2]:
result[valid_mask] *= array[valid_mask]
Expand Down Expand Up @@ -1188,7 +1195,10 @@ def _inverse_op(base_val):
"""Calculate inverse of S factor."""
result = numpy.empty(base_val.shape, dtype=numpy.float32)
result[:] = _TARGET_NODATA
valid_mask = ~numpy.isclose(base_val, base_nodata)
valid_mask = slice(None)
if base_nodata is not None:
valid_mask = ~numpy.isclose(base_val, base_nodata)

zero_mask = base_val == 0.0
result[valid_mask & ~zero_mask] = (
1.0 / base_val[valid_mask & ~zero_mask])
Expand Down Expand Up @@ -1260,6 +1270,8 @@ def _calculate_sub_ndr(

def _sub_ndr_op(dist_to_channel_array):
"""Calculate subsurface NDR."""
# nodata value from this ntermediate output should always be
# defined by pygeoprocessing, not None
valid_mask = ~numpy.isclose(
dist_to_channel_array, dist_to_channel_nodata)
result = numpy.empty(valid_mask.shape, dtype=numpy.float32)
Expand Down Expand Up @@ -1290,11 +1302,12 @@ def _calculate_export_op(
modified_load_array, ndr_array, modified_sub_load_array,
sub_ndr_array):
"""Combine NDR and subsurface NDR."""
valid_mask = (
~numpy.isclose(modified_load_array, load_nodata) &
~numpy.isclose(ndr_array, ndr_nodata) &
~numpy.isclose(modified_sub_load_array, subsurface_load_nodata) &
~numpy.isclose(sub_ndr_array, sub_ndr_nodata))
# these intermediate outputs should always have defined nodata
# values assigned by pygeoprocessing
valid_mask = ~(numpy.isclose(modified_load_array, load_nodata) |
numpy.isclose(ndr_array, ndr_nodata) |
numpy.isclose(modified_sub_load_array, subsurface_load_nodata) |
numpy.isclose(sub_ndr_array, sub_ndr_nodata))
result = numpy.empty(valid_mask.shape, dtype=numpy.float32)
result[:] = _TARGET_NODATA
result[valid_mask] = (
Expand Down
Loading