From 2354bdd1fad182c0c336f79291f6c2ef96841470 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Wed, 7 Apr 2021 17:20:31 -0400 Subject: [PATCH 01/13] Sentinel 2 L2A: Add band assets for lower spatial res versions --- .../stactools/sentinel2/constants.py | 54 +++++++++++++++++++ .../stactools/sentinel2/stac.py | 21 ++++---- tests/sentinel2/test_commands.py | 42 ++++++++++++++- 3 files changed, 106 insertions(+), 11 deletions(-) diff --git a/stactools_sentinel2/stactools/sentinel2/constants.py b/stactools_sentinel2/stactools/sentinel2/constants.py index 9ddf0002..9c1e7036 100644 --- a/stactools_sentinel2/stactools/sentinel2/constants.py +++ b/stactools_sentinel2/stactools/sentinel2/constants.py @@ -93,3 +93,57 @@ center_wavelength=2.190, full_width_half_max=0.242), } + +# A dict describing the resolutions that are +# available for each band as separate assets. +# The first resolution is the sensor gsd; others +# are downscaled versions. +BANDS_TO_RESOLUTIONS = { + 'B01': [ + 60, + ], + 'B02': [ + 10, + 20, + 60, + ], + 'B03': [ + 10, + 20, + 60, + ], + 'B04': [ + 10, + 20, + 60, + ], + 'B05': [ + 20, + 20, + 60, + ], + 'B06': [ + 20, + 60, + ], + 'B07': [ + 20, + 60, + ], + 'B08': [ + 10, + ], + 'B8A': [ + 20, + 60, + ], + 'B09': [60], + 'B11': [ + 20, + 60, + ], + 'B12': [ + 20, + 60, + ], +} diff --git a/stactools_sentinel2/stactools/sentinel2/stac.py b/stactools_sentinel2/stactools/sentinel2/stac.py index b41c9aad..a40d3325 100644 --- a/stactools_sentinel2/stactools/sentinel2/stac.py +++ b/stactools_sentinel2/stactools/sentinel2/stac.py @@ -12,12 +12,10 @@ from stactools.sentinel2.product_metadata import ProductMetadata from stactools.sentinel2.granule_metadata import GranuleMetadata from stactools.sentinel2.utils import extract_gsd -from stactools.sentinel2.constants import (DATASTRIP_METADATA_ASSET_KEY, - SENTINEL_PROVIDER, SENTINEL_LICENSE, - SENTINEL_BANDS, - SENTINEL_INSTRUMENTS, - SENTINEL_CONSTELLATION, - INSPIRE_METADATA_ASSET_KEY) +from stactools.sentinel2.constants import ( + BANDS_TO_RESOLUTIONS, DATASTRIP_METADATA_ASSET_KEY, SENTINEL_PROVIDER, + SENTINEL_LICENSE, SENTINEL_BANDS, SENTINEL_INSTRUMENTS, + SENTINEL_CONSTELLATION, INSPIRE_METADATA_ASSET_KEY) logger = logging.getLogger(__name__) @@ -184,17 +182,22 @@ def set_asset_properties(asset): # Handle band image - band_id_search = re.search(r'_(B\w{2})_', asset_href) + band_id_search = re.search(r'_(B\w{2})', asset_href) if band_id_search is not None: - band_id = band_id_search.group(1) + band_id, href_res = os.path.splitext(asset_href)[0].split('_')[-2:] + asset_res = int(href_res.replace('m', '')) band = SENTINEL_BANDS[band_id] + if asset_res == BANDS_TO_RESOLUTIONS[band_id][0]: + asset_key = band_id + else: + asset_key = f'{band_id}_{asset_res}m' asset = pystac.Asset(href=asset_href, media_type=asset_media_type, title=band.description, roles=['data']) item.ext.eo.set_bands([SENTINEL_BANDS[band_id]], asset) set_asset_properties(asset) - return (band_id, asset) + return (asset_key, asset) # Handle auxiliary images diff --git a/tests/sentinel2/test_commands.py b/tests/sentinel2/test_commands.py index 59307611..972dbce3 100644 --- a/tests/sentinel2/test_commands.py +++ b/tests/sentinel2/test_commands.py @@ -1,3 +1,4 @@ +from collections import defaultdict import os from tempfile import TemporaryDirectory @@ -7,7 +8,7 @@ from stactools.core.projection import reproject_geom from stactools.sentinel2.commands import create_sentinel2_command -from stactools.sentinel2.constants import SENTINEL_BANDS +from stactools.sentinel2.constants import BANDS_TO_RESOLUTIONS, SENTINEL_BANDS from tests.utils import (TestData, CliTestCase) @@ -58,13 +59,50 @@ def check_proj_bbox(item): item.validate() bands_seen = set() + bands_to_assets = defaultdict(list) - for asset in item.assets.values(): + for key, asset in item.assets.items(): self.assertTrue(is_absolute_href(asset.href)) bands = item.ext.eo.get_bands(asset) if bands is not None: bands_seen |= set(b.name for b in bands) + if key.split('_')[0] in SENTINEL_BANDS: + for b in bands: + bands_to_assets[b.name].append( + (key, asset)) self.assertEqual(bands_seen, set(SENTINEL_BANDS.keys())) + # Check that multiple resolutions exist for assets that + # have them, and that they are named such that the highest + # resolution asset is the band name, and others are + # appended with the resolution. + + resolutions_seen = defaultdict(list) + + for band_name, assets in bands_to_assets.items(): + for (asset_key, asset) in assets: + resolutions = BANDS_TO_RESOLUTIONS[band_name] + + asset_split = asset_key.split('_') + self.assertLessEqual(len(asset_split), 2) + + href_band, href_res = os.path.splitext( + asset.href)[0].split('_')[-2:] + asset_res = int(href_res.replace('m', '')) + self.assertEqual(href_band, band_name) + if len(asset_split) == 1: + self.assertEqual(asset_res, resolutions[0]) + resolutions_seen[band_name].append(asset_res) + else: + self.assertNotEqual(asset_res, resolutions[0]) + self.assertIn(asset_res, resolutions) + resolutions_seen[band_name].append(asset_res) + + self.assertEqual(set(resolutions_seen.keys()), + set(BANDS_TO_RESOLUTIONS.keys())) + for band in resolutions_seen: + self.assertEqual(set(resolutions_seen[band]), + set(BANDS_TO_RESOLUTIONS[band])) + check_proj_bbox(item) From 0132cd4dabf6acf09ea1e8948d90fe080282ea00 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 8 Apr 2021 03:37:40 -0400 Subject: [PATCH 02/13] Include resolution in band asset title --- stactools_sentinel2/stactools/sentinel2/stac.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stactools_sentinel2/stactools/sentinel2/stac.py b/stactools_sentinel2/stactools/sentinel2/stac.py index a40d3325..71dcfca2 100644 --- a/stactools_sentinel2/stactools/sentinel2/stac.py +++ b/stactools_sentinel2/stactools/sentinel2/stac.py @@ -193,7 +193,7 @@ def set_asset_properties(asset): asset_key = f'{band_id}_{asset_res}m' asset = pystac.Asset(href=asset_href, media_type=asset_media_type, - title=band.description, + title=f'{band.description} - {href_res}', roles=['data']) item.ext.eo.set_bands([SENTINEL_BANDS[band_id]], asset) set_asset_properties(asset) From 5a10d7bdf7447eea8a5a83dd5544902f8c719ace Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 8 Apr 2021 03:37:54 -0400 Subject: [PATCH 03/13] Set GSD correctly for downsampled band assets --- stactools_sentinel2/stactools/sentinel2/stac.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/stactools_sentinel2/stactools/sentinel2/stac.py b/stactools_sentinel2/stactools/sentinel2/stac.py index 71dcfca2..57747d79 100644 --- a/stactools_sentinel2/stactools/sentinel2/stac.py +++ b/stactools_sentinel2/stactools/sentinel2/stac.py @@ -170,12 +170,15 @@ def image_asset_from_href( return ('preview', asset) # Extract gsd and proj info - gsd = extract_gsd(asset_href) - shape = list(resolution_to_shape[int(gsd)]) + filename_gsd = extract_gsd(asset_href) + shape = list(resolution_to_shape[int(filename_gsd)]) transform = transform_from_bbox(proj_bbox, shape) - def set_asset_properties(asset): - item.common_metadata.set_gsd(gsd, asset) + def set_asset_properties(asset: pystac.Asset, band_gsd: Optional[int] = None): + if filename_gsd is None: + item.common_metadata.set_gsd(filename_gsd, asset) + else: + item.common_metadata.set_gsd(band_gsd, asset) item.ext.projection.set_shape(shape, asset) item.ext.projection.set_bbox(proj_bbox, asset) item.ext.projection.set_transform(transform, asset) @@ -196,7 +199,7 @@ def set_asset_properties(asset): title=f'{band.description} - {href_res}', roles=['data']) item.ext.eo.set_bands([SENTINEL_BANDS[band_id]], asset) - set_asset_properties(asset) + set_asset_properties(asset, band_gsd=BANDS_TO_RESOLUTIONS[band_id][0]) return (asset_key, asset) # Handle auxiliary images From 8284d6d4c1b5ef65107a7875d1e3644e8d213a73 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Fri, 9 Apr 2021 15:08:51 -0400 Subject: [PATCH 04/13] Fix formatting --- stactools_sentinel2/stactools/sentinel2/stac.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stactools_sentinel2/stactools/sentinel2/stac.py b/stactools_sentinel2/stactools/sentinel2/stac.py index 57747d79..0571aff3 100644 --- a/stactools_sentinel2/stactools/sentinel2/stac.py +++ b/stactools_sentinel2/stactools/sentinel2/stac.py @@ -174,7 +174,8 @@ def image_asset_from_href( shape = list(resolution_to_shape[int(filename_gsd)]) transform = transform_from_bbox(proj_bbox, shape) - def set_asset_properties(asset: pystac.Asset, band_gsd: Optional[int] = None): + def set_asset_properties(asset: pystac.Asset, + band_gsd: Optional[int] = None): if filename_gsd is None: item.common_metadata.set_gsd(filename_gsd, asset) else: From 948f56fd5d025bc28496ac12ff5b12163d63b25e Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 27 May 2021 11:33:32 -0400 Subject: [PATCH 05/13] Don't put GSD on assets where it differs from the spatial resolution --- .../stactools/sentinel2/stac.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/stactools_sentinel2/stactools/sentinel2/stac.py b/stactools_sentinel2/stactools/sentinel2/stac.py index 0571aff3..e5647c27 100644 --- a/stactools_sentinel2/stactools/sentinel2/stac.py +++ b/stactools_sentinel2/stactools/sentinel2/stac.py @@ -189,18 +189,34 @@ def set_asset_properties(asset: pystac.Asset, band_id_search = re.search(r'_(B\w{2})', asset_href) if band_id_search is not None: band_id, href_res = os.path.splitext(asset_href)[0].split('_')[-2:] - asset_res = int(href_res.replace('m', '')) band = SENTINEL_BANDS[band_id] + + # Get the asset resolution from the file name. + # If the asset resolution is the band GSD, then + # include the gsd information for that asset. Otherwise, + # do not include the GSD information in the asset + # as this may be confusing for users given that the + # raster spatial resolution and gsd will differ. + # See https://github.com/radiantearth/stac-spec/issues/1096 + asset_res = int(href_res.replace('m', '')) + band_gsd: Optional[int] = None if asset_res == BANDS_TO_RESOLUTIONS[band_id][0]: asset_key = band_id + band_gsd = asset_res else: + # If this isn't the default resolution, use the raster + # resolution in the asset key. + # TODO: Use the raster extension and spatial_resolution + # property to encode the spatial resolution of all assets. asset_key = f'{band_id}_{asset_res}m' + asset = pystac.Asset(href=asset_href, media_type=asset_media_type, title=f'{band.description} - {href_res}', roles=['data']) + item.ext.eo.set_bands([SENTINEL_BANDS[band_id]], asset) - set_asset_properties(asset, band_gsd=BANDS_TO_RESOLUTIONS[band_id][0]) + set_asset_properties(asset, band_gsd=band_gsd) return (asset_key, asset) # Handle auxiliary images From 354d5a441f4562617ea800c8fca395e101fa41e8 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 27 May 2021 12:53:45 -0400 Subject: [PATCH 06/13] Fix typo in Sentinel 2 L2A items --- stactools_sentinel2/stactools/sentinel2/granule_metadata.py | 2 +- tests/sentinel2/test_metadata.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/stactools_sentinel2/stactools/sentinel2/granule_metadata.py b/stactools_sentinel2/stactools/sentinel2/granule_metadata.py index 41179abf..af29761b 100644 --- a/stactools_sentinel2/stactools/sentinel2/granule_metadata.py +++ b/stactools_sentinel2/stactools/sentinel2/granule_metadata.py @@ -149,7 +149,7 @@ def metadata_dict(self): float, self._image_content_node.find_text( 'HIGH_PROBA_CLOUDS_PERCENTAGE')), - 's2:thin_sirrus_percentage': + 's2:thin_cirrus_percentage': map_opt( float, self._image_content_node.find_text('THIN_CIRRUS_PERCENTAGE')), diff --git a/tests/sentinel2/test_metadata.py b/tests/sentinel2/test_metadata.py index 82272da9..71a4af01 100644 --- a/tests/sentinel2/test_metadata.py +++ b/tests/sentinel2/test_metadata.py @@ -49,7 +49,7 @@ def test_parses_product_metadata_properties(self): 's2:unclassified_percentage': 0.0, 's2:medium_proba_clouds_percentage': 14.61311, 's2:high_proba_clouds_percentage': 24.183494, - 's2:thin_sirrus_percentage': 12.783723, + 's2:thin_cirrus_percentage': 12.783723, 's2:snow_ice_percentage': 0.0, 's2:mean_solar_zenith': 32.707073851362, 's2:mean_solar_azimuth': 62.3286549448294 From 2c13ad7187de551b1281e498d3e3bfe7f91f632f Mon Sep 17 00:00:00 2001 From: Chris Holmes Date: Tue, 1 Jun 2021 10:35:28 -0700 Subject: [PATCH 07/13] Small typo fix active -> activate --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 20ecca86..7f39a443 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,7 @@ If not using docker, itt's recommended to use [virtualenv](https://virtualenv.py - Make sure [virtualenv](https://virtualenv.pypa.io/en/latest/installation.html) is installed - Run `virtualenv venv` -- Activate the virtualenv with `source venv/bin/active` +- Activate the virtualenv with `source venv/bin/activate` #### Installing development requirements From 852d08f391e9e2940d8f9b2dcfec9fec7d26536f Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 12 May 2021 07:30:24 -0600 Subject: [PATCH 08/13] Add cgls_lc100 cog check This fails due to a bad `cogify` call in cgls_lc100. Also, adds a subsetted local file to make sure we don't run the tests forever when we do fix `cogify`. --- tests/cgls_lc100/test_commands.py | 13 +++++++++++-- ...C100_global_v3.0.1_2019-nrt_ccl_subset.tif | Bin 0 -> 1019260 bytes tests/utils/test_data.py | 11 ----------- 3 files changed, 11 insertions(+), 13 deletions(-) create mode 100644 tests/data-files/cgls_lc100/PROBAV_LC100_global_v3.0.1_2019-nrt_ccl_subset.tif diff --git a/tests/cgls_lc100/test_commands.py b/tests/cgls_lc100/test_commands.py index da93fd5d..f4716671 100644 --- a/tests/cgls_lc100/test_commands.py +++ b/tests/cgls_lc100/test_commands.py @@ -6,7 +6,13 @@ from stactools.cgls_lc100.commands import create_cgls_lc100_command from tests.utils import (TestData, CliTestCase) -EXTERNAL_DATA_PATH = 'cgls_lc100/PROBAV_LC100_global_v3.0.1_2019-nrt_ccl.tif' +# Created from: +# +# https://zenodo.org/record/3939050/files/ +# PROBAV_LC100_global_v3.0.1_2019-nrt_Change-Confidence-layer_EPSG-4326.tif?download=1 +# +# and subsetted via `gdal_translate -projwin -180 80 -179 79` +DATA_PATH = 'data-files/cgls_lc100/PROBAV_LC100_global_v3.0.1_2019-nrt_ccl_subset.tif' class CreateItemTest(CliTestCase): @@ -14,7 +20,7 @@ def create_subcommand_functions(self): return [create_cgls_lc100_command] def test_create_item(self): - tif_href = TestData.get_external_data(EXTERNAL_DATA_PATH) + tif_href = TestData.get_path(DATA_PATH) with TemporaryDirectory() as tmp_dir: cmd = ['cgls_lc100', 'create-item', '--cogify', tif_href, tmp_dir] @@ -26,5 +32,8 @@ def test_create_item(self): item_path = os.path.join(tmp_dir, jsons[0]) item = pystac.read_file(item_path) + cog = item.assets["cog_image"] + self.assertTrue(os.path.exists(cog.href), + f"COG does not exist: {cog.href}") item.validate() diff --git a/tests/data-files/cgls_lc100/PROBAV_LC100_global_v3.0.1_2019-nrt_ccl_subset.tif b/tests/data-files/cgls_lc100/PROBAV_LC100_global_v3.0.1_2019-nrt_ccl_subset.tif new file mode 100644 index 0000000000000000000000000000000000000000..5b4cb292cdd4b66c54ad20e8d554b3fd3e2f9cc0 GIT binary patch literal 1019260 zcmeI#Uud1>9S86yX>6;l)=f4q-0&q1tg}f@l2&cjMPuzkolT|9y5WW4^yE!)DecrlnTVS^oTzjG36IuaFj zxlfzq`{bPG{r7o)&+k1sefo=Oe@f}oDGlsN12KAcPYkz>2e-z(S$np|y?J}LjXyOo zlpc%M{u*z5X4~tZ-L~zGG49QIJH9`+ZO-{^3p;b2NHT{@TsW4svSmoE+W&%TK921F3uKK$cvFr zMm`t$yT~^q8JFiOJ3z6%QPe=YT^0mnKB5&B6I+J@-_grKv@~Oy|BVUbtH}cTF)EV2Cx^t0@ z$R9=iEb`Bhe~Ub@KXq>3pSt%)J`(wZ$QL9382L`*zN=H`)~i$ZE0If)-;I1B^7oN% zMeeyKb#A#Pb?=S582LoxbCIt^z8U#`~F zFD#WS)mpXK7@odxdj4!Scj3fXZ`OZLuQXRTTGi#1VtD#wb2V=@s+F~Nc0O;dS1Wl| zZ7elg^>R^dHnNG*L}{0OwDNM%Yz+5b9xsg^o49Ly?9sf@ zTx^a`9-lm3I=0JsE>&xJ93f6Nu4j1qOkC9cYqe~AG8;cWH90;tdHf?Ta;4giYYfhw z!$|4Qk?~UYV5?cl+wE#&IjdD0dG^4`@lq*UKRP5hab+YHy7j3 zXP1_Gw`|why=sgq*(}Jf&7b+$iOa3(V${^^x%1iK^Le9)o!3StvLY5?lt=H~UCMZ? z1{MOg-LK2_$`z~zc<+$G7I@;}MMXh(C8-Z7?WFM}M=zyzP^nv=v zEW(R%+xq1dIQ7+9wC>U;ZhcF&^72AGFE^sL+QZv-&}e3relt0etu~8Z9Y(9!d?h

zz1nZjTU}}Up?7Ui?Rabzn@eK*{9Rh~&WBxVREsNa%UpA}UK5tp#f4U{|6j31fL^9N zF?Qt>KMf4cTt77P;N2f&|D*87{dj!omCY&h_iSeTHPHXNU|;Heq#lUR-tl37^R@W2 zJ&@kMeqjI0m*U;DxBvT(Z@GH^OP6}v9O!3C->(-x`TX;(-u~|S>HIfl;>n%eb+nV- z_m2Pp0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ SfB*pk1PBlyK!CviAn+e%Z4L4O literal 0 HcmV?d00001 diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py index 4533c0bf..2924e1cb 100644 --- a/tests/utils/test_data.py +++ b/tests/utils/test_data.py @@ -5,10 +5,6 @@ import requests -CLGS_LC100_DIR = 'record/3939050/files' -CLGS_LC100_TIF = 'PROBAV_LC100_global_v3.0.1_2019-nrt_Change-Confidence-layer_EPSG-4326.tif' -CLGS_LC100_PARAM = 'download=1' - EXTERNAL_DATA = { 'aster/AST_L1T_00305032000040446_20150409135350_78838.hdf': { 'url': @@ -16,13 +12,6 @@ 'stactools/aster/AST_L1T_00305032000040446_20150409135350_78838.zip'), 'compress': 'zip' - }, - 'cgls_lc100/PROBAV_LC100_global_v3.0.1_2019-nrt_ccl.tif': { - 'url': ('https://zenodo.org/' - '{}/{}?{}'.format(CLGS_LC100_DIR, CLGS_LC100_TIF, - CLGS_LC100_PARAM)), - 'compress': - 'none' } } From 2db683632cc6b552af651f7fe0eaa6d7ad0bc2b1 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Mon, 3 May 2021 14:20:27 -0600 Subject: [PATCH 09/13] Use `stactools.core.utils` when possible - Adds `stactools.core.utils.convert.cogify` - Uses `cogify` and `call` from core when possible instead of defining custom calls in subpackages --- requirements-dev.txt | 1 + stactools_aster/stactools/aster/cog.py | 10 +- .../stactools/cgls_lc100/cog.py | 17 +- .../stactools/core/utils/convert.py | 20 + stactools_corine/stactools/corine/cog.py | 20 +- .../stactools/sentinel2/cog.py | 22 +- tests/core/test_utils.py | 52 ++ tests/data-files/core/byte.tif | Bin 0 -> 736 bytes .../utils/validate_cloud_optimized_geotiff.py | 449 ++++++++++++++++++ 9 files changed, 529 insertions(+), 62 deletions(-) create mode 100644 stactools_core/stactools/core/utils/convert.py create mode 100644 tests/core/test_utils.py create mode 100644 tests/data-files/core/byte.tif create mode 100644 tests/utils/validate_cloud_optimized_geotiff.py diff --git a/requirements-dev.txt b/requirements-dev.txt index afd7a963..4e0ac2b9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -10,3 +10,4 @@ yapf==0.30.* ipython==7.16.1 nbsphinx==0.7.1 coverage==5.2.* + diff --git a/stactools_aster/stactools/aster/cog.py b/stactools_aster/stactools/aster/cog.py index 57375553..1d252280 100644 --- a/stactools_aster/stactools/aster/cog.py +++ b/stactools_aster/stactools/aster/cog.py @@ -10,6 +10,7 @@ from stactools.core.projection import reproject_geom from stactools.core.utils.subprocess import call +from stactools.core.utils.convert import cogify from stactools.aster.utils import AsterSceneId from stactools.aster.xml_metadata import XmlMetadata @@ -41,13 +42,6 @@ def set_band_names(href: str, band_names: List[str]) -> None: ds.descriptions = band_names -def cogify(input_path, output_path): - call([ - 'gdal_translate', '-of', 'COG', '-co', 'predictor=2', '-co', - 'compress=deflate', input_path, output_path - ]) - - def _create_cog_for_sensor(sensor: str, file_prefix: str, tmp_dir: str, output_dir: str, bounds: List[float], crs: str, subdataset_info: List[Tuple[Any, int]]) -> str: @@ -70,7 +64,7 @@ def _create_cog_for_sensor(sensor: str, file_prefix: str, tmp_dir: str, set_band_names(merged_path, band_names) - cogify(merged_path, sensor_cog_href) + cogify(merged_path, sensor_cog_href, extra_args=["-co", "predictor=2"]) return sensor_cog_href diff --git a/stactools_cgls_lc100/stactools/cgls_lc100/cog.py b/stactools_cgls_lc100/stactools/cgls_lc100/cog.py index e1cb07cf..7ec8d339 100644 --- a/stactools_cgls_lc100/stactools/cgls_lc100/cog.py +++ b/stactools_cgls_lc100/stactools/cgls_lc100/cog.py @@ -1,30 +1,15 @@ import logging import os -from subprocess import Popen, PIPE, STDOUT import pystac +from stactools.core.utils.convert import cogify from stactools.cgls_lc100.constants import (ITEM_COG_IMAGE_NAME, ITEM_TIF_IMAGE_NAME) logger = logging.getLogger(__name__) -def call(command): - def log_subprocess_output(pipe): - for line in iter(pipe.readline, b''): # b'\n'-separated lines - logger.info(line.decode("utf-8").strip('\n')) - - process = Popen(command, stdout=PIPE, stderr=STDOUT) - with process.stdout: - log_subprocess_output(process.stdout) - return process.wait() # 0 means success - - -def cogify(input_path, output_path): - call(['gdal_translate', '-of', 'COG', '-co', input_path, output_path]) - - def _create_cog(item, cog_directory): tif_asset = item.assets.get(ITEM_TIF_IMAGE_NAME) cogify(tif_asset.href, cog_directory) diff --git a/stactools_core/stactools/core/utils/convert.py b/stactools_core/stactools/core/utils/convert.py new file mode 100644 index 00000000..64d96927 --- /dev/null +++ b/stactools_core/stactools/core/utils/convert.py @@ -0,0 +1,20 @@ +from typing import List + +from stactools.core.utils.subprocess import call + +DEFAULT_COGIFY_ARGS = ["-co", "compress=deflate"] + + +def cogify(infile: str, + outfile: str, + args: List[str] = None, + extra_args: List[str] = None): + """Creates a COG from a GDAL-readable file.""" + if args is None: + args = DEFAULT_COGIFY_ARGS[:] + args = ["gdal_translate", "-of", "COG"] + args + if extra_args: + args.extend(extra_args) + args.append(infile) + args.append(outfile) + call(args) diff --git a/stactools_corine/stactools/corine/cog.py b/stactools_corine/stactools/corine/cog.py index da3e33bb..59c10f58 100644 --- a/stactools_corine/stactools/corine/cog.py +++ b/stactools_corine/stactools/corine/cog.py @@ -1,33 +1,15 @@ import logging import os -from subprocess import Popen, PIPE, STDOUT import pystac +from stactools.core.utils.convert import cogify from stactools.corine.constants import (ITEM_COG_IMAGE_NAME, ITEM_TIF_IMAGE_NAME) logger = logging.getLogger(__name__) -def call(command): - def log_subprocess_output(pipe): - for line in iter(pipe.readline, b''): # b'\n'-separated lines - logger.info(line.decode("utf-8").strip('\n')) - - process = Popen(command, stdout=PIPE, stderr=STDOUT) - with process.stdout: - log_subprocess_output(process.stdout) - return process.wait() # 0 means success - - -def cogify(input_path, output_path): - call([ - 'gdal_translate', '-of', 'COG', '-co', 'compress=deflate', input_path, - output_path - ]) - - def _create_cog(item, cog_directory): tif_asset = item.assets.get(ITEM_TIF_IMAGE_NAME) diff --git a/stactools_sentinel2/stactools/sentinel2/cog.py b/stactools_sentinel2/stactools/sentinel2/cog.py index d8bb24fd..2b22649e 100644 --- a/stactools_sentinel2/stactools/sentinel2/cog.py +++ b/stactools_sentinel2/stactools/sentinel2/cog.py @@ -2,11 +2,13 @@ import os import shutil from tempfile import TemporaryDirectory -from subprocess import Popen, PIPE, STDOUT import pystac from pystac.utils import make_absolute_href +from stactools.core.utils.subprocess import call +from stactools.core.utils.convert import cogify + logger = logging.getLogger(__name__) # TODO: Refactor this into general-purpose cogification of item asset @@ -52,23 +54,5 @@ def is_non_cog_image(asset): or asset.media_type == pystac.MediaType.JPEG2000) -def call(command): - def log_subprocess_output(pipe): - for line in iter(pipe.readline, b''): # b'\n'-separated lines - logger.info(line.decode("utf-8").strip('\n')) - - process = Popen(command, stdout=PIPE, stderr=STDOUT) - with process.stdout: - log_subprocess_output(process.stdout) - return process.wait() # 0 means success - - -def cogify(input_path, output_path): - call([ - 'gdal_translate', '-of', 'COG', '-co', 'compress=deflate', input_path, - output_path - ]) - - def reproject(input_path, output_path): call(['gdalwarp', '-t_srs', 'epsg:3857', input_path, output_path]) diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py new file mode 100644 index 00000000..18d51165 --- /dev/null +++ b/tests/core/test_utils.py @@ -0,0 +1,52 @@ +from contextlib import contextmanager +import os.path +from tempfile import TemporaryDirectory +import unittest + +import rasterio + +from tests.utils import TestData, validate_cloud_optimized_geotiff +from stactools.core.utils.convert import cogify + + +class CogifyTest(unittest.TestCase): + @contextmanager + def cogify(self, **kwargs): + infile = TestData.get_path("data-files/core/byte.tif") + with TemporaryDirectory() as directory: + outfile = os.path.join(directory, "byte.tif") + cogify(infile, outfile, **kwargs) + yield outfile + + def test_default(self): + with self.cogify() as outfile: + self.assertTrue(os.path.exists(outfile)) + warnings, errors, _ = validate_cloud_optimized_geotiff.validate( + outfile, full_check=True) + self.assertEqual(len(warnings), 0) + self.assertEqual(len(errors), 0) + with rasterio.open(outfile) as dataset: + self.assertEqual(dataset.compression, + rasterio.enums.Compression.deflate) + + def test_override_default(self): + with self.cogify(args=["-co", "compress=lzw"]) as outfile: + self.assertTrue(os.path.exists(outfile)) + warnings, errors, _ = validate_cloud_optimized_geotiff.validate( + outfile, full_check=True) + self.assertEqual(len(warnings), 0) + self.assertEqual(len(errors), 0) + with rasterio.open(outfile) as dataset: + self.assertEqual(dataset.compression, + rasterio.enums.Compression.lzw) + + def test_extra_args(self): + with self.cogify( + extra_args=["-mo", "TIFFTAG_ARTIST=prince"]) as outfile: + validate_cloud_optimized_geotiff.validate(outfile) + warnings, errors, _ = validate_cloud_optimized_geotiff.validate( + outfile, full_check=True) + self.assertEqual(len(warnings), 0) + self.assertEqual(len(errors), 0) + with rasterio.open(outfile) as dataset: + self.assertEqual(dataset.tags()["TIFFTAG_ARTIST"], "prince") diff --git a/tests/data-files/core/byte.tif b/tests/data-files/core/byte.tif new file mode 100644 index 0000000000000000000000000000000000000000..6fddac1286241fd62383c20965a11c38f021d7bc GIT binary patch literal 736 zcmZuuJ&P1U5bfDr4}(P!ygQK#7K}8JGcmmblM`s4NH1($H)vMP%CnxvPmnJ1v6Ein6#x_x;;c%h zs8JDc6d4y&@I+WRn<|=Buvsz-idI5QL^Nxs#%x8`wdO1tePV^8<$}*^b~mUC=rN=! z7{PLA8RSW+C-^ZLEUFqI7n9w|2_p?2P*mI@R(R!%f>)$;a~u|><;qwwt!9b}@wzgk z$hmrsdlS@+90ovxH})3tE$rehVT`QHMIj>?P823vm34LR#9ERTIVj+9N)HEctB+yW zi*+zufB(5KcFopx8BfXBeP41ZrMGzTSVDX~rDmG&W718WW`rkOd=>fW76)%OJrrVa} zmuq>n!T;iA<$vht*X`rT|NQo3h=H5o23Jy=Z7}Q(xykHowHt(ae7}4>8{*wN%i~86 X9xm=LUOYctynpxh)#C7Q`62xUqeR@! literal 0 HcmV?d00001 diff --git a/tests/utils/validate_cloud_optimized_geotiff.py b/tests/utils/validate_cloud_optimized_geotiff.py new file mode 100644 index 00000000..f7daabde --- /dev/null +++ b/tests/utils/validate_cloud_optimized_geotiff.py @@ -0,0 +1,449 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# ***************************************************************************** +# $Id$ +# +# Project: GDAL +# Purpose: Validate Cloud Optimized GeoTIFF file structure +# Author: Even Rouault, +# +# ***************************************************************************** +# Copyright (c) 2017, Even Rouault +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# ***************************************************************************** + +import os.path +import struct +import sys +from osgeo import gdal + + +def Usage(): + print( + 'Usage: validate_cloud_optimized_geotiff.py [-q] [--full-check=yes/no/auto] test.tif' + ) + print('') + print('Options:') + print('-q: quiet mode') + print( + '--full-check=yes/no/auto: check tile/strip leader/trailer bytes. auto=yes for local files,' + ' and no for remote files') + return 1 + + +class ValidateCloudOptimizedGeoTIFFException(Exception): + pass + + +def full_check_band(f, band_name, band, errors, block_order_row_major, + block_leader_size_as_uint4, + block_trailer_last_4_bytes_repeated, + mask_interleaved_with_imagery): + + block_size = band.GetBlockSize() + mask_band = None + if mask_interleaved_with_imagery: + mask_band = band.GetMaskBand() + mask_block_size = mask_band.GetBlockSize() + if block_size != mask_block_size: + errors += [ + band_name + + ': mask block size is different from its imagery band' + ] + mask_band = None + + yblocks = (band.YSize + block_size[1] - 1) // block_size[1] + xblocks = (band.XSize + block_size[0] - 1) // block_size[0] + last_offset = 0 + for y in range(yblocks): + for x in range(xblocks): + + offset = band.GetMetadataItem('BLOCK_OFFSET_%d_%d' % (x, y), + 'TIFF') + offset = int(offset) if offset is not None else 0 + bytecount = band.GetMetadataItem('BLOCK_SIZE_%d_%d' % (x, y), + 'TIFF') + bytecount = int(bytecount) if bytecount is not None else 0 + + if offset > 0: + if block_order_row_major and offset < last_offset: + errors += [ + band_name + + ': offset of block (%d, %d) is smaller than previous block' + % (x, y) + ] + + if block_leader_size_as_uint4: + gdal.VSIFSeekL(f, offset - 4, 0) + leader_size = struct.unpack('= 4: + gdal.VSIFSeekL(f, offset + bytecount - 4, 0) + last_bytes = gdal.VSIFReadL(8, 1, f) + if last_bytes[0:4] != last_bytes[4:8]: + errors += [ + band_name + + ': for block (%d, %d), trailer bytes are invalid' + % (x, y) + ] + + if mask_band: + offset_mask = mask_band.GetMetadataItem( + 'BLOCK_OFFSET_%d_%d' % (x, y), 'TIFF') + offset_mask = int( + offset_mask) if offset_mask is not None else 0 + if offset > 0 and offset_mask > 0: + expected_offset_mask = offset + bytecount + \ + (4 if block_leader_size_as_uint4 else 0) + \ + (4 if block_trailer_last_4_bytes_repeated else 0) + if offset_mask != expected_offset_mask: + errors += [ + 'Mask of ' + band_name + + ': for block (%d, %d), offset is %d, whereas %d was expected' + % (x, y, offset_mask, expected_offset_mask) + ] + elif offset == 0 and offset_mask > 0: + if block_order_row_major and offset_mask < last_offset: + errors += [ + 'Mask of ' + band_name + + ': offset of block (%d, %d) is smaller than previous block' + % (x, y) + ] + + offset = offset_mask + + last_offset = offset + + +def validate(ds, check_tiled=True, full_check=False): + """Check if a file is a (Geo)TIFF with cloud optimized compatible structure. + + Args: + ds: GDAL Dataset for the file to inspect. + check_tiled: Set to False to ignore missing tiling. + full_check: Set to TRUe to check tile/strip leader/trailer bytes. Might be + slow on remote files + + Returns: + A tuple, whose first element is an array of error messages + (empty if there is no error), and the second element, a dictionary + with the structure of the GeoTIFF file. + + Raises: + ValidateCloudOptimizedGeoTIFFException: Unable to open the file or the + file is not a Tiff. + """ + + if int(gdal.VersionInfo('VERSION_NUM')) < 2020000: + raise ValidateCloudOptimizedGeoTIFFException( + 'GDAL 2.2 or above required') + + unicode_type = type(''.encode('utf-8').decode('utf-8')) + if isinstance(ds, (str, unicode_type)): + gdal.PushErrorHandler() + ds = gdal.Open(ds) + gdal.PopErrorHandler() + if ds is None: + raise ValidateCloudOptimizedGeoTIFFException( + 'Invalid file : %s' % gdal.GetLastErrorMsg()) + if ds.GetDriver().ShortName != 'GTiff': + raise ValidateCloudOptimizedGeoTIFFException( + 'The file is not a GeoTIFF') + + details = {} + errors = [] + warnings = [] + filename = ds.GetDescription() + main_band = ds.GetRasterBand(1) + ovr_count = main_band.GetOverviewCount() + filelist = ds.GetFileList() + if filelist is not None and filename + '.ovr' in filelist: + errors += [ + 'Overviews found in external .ovr file. They should be internal' + ] + + if main_band.XSize > 512 or main_band.YSize > 512: + if check_tiled: + block_size = main_band.GetBlockSize() + if block_size[0] == main_band.XSize and block_size[0] > 1024: + errors += [ + 'The file is greater than 512xH or Wx512, but is not tiled' + ] + + if ovr_count == 0: + warnings += [ + 'The file is greater than 512xH or Wx512, it is recommended ' + 'to include internal overviews' + ] + + ifd_offset = int(main_band.GetMetadataItem('IFD_OFFSET', 'TIFF')) + ifd_offsets = [ifd_offset] + + block_order_row_major = False + block_leader_size_as_uint4 = False + block_trailer_last_4_bytes_repeated = False + mask_interleaved_with_imagery = False + + if ifd_offset not in (8, 16): + + # Check if there is GDAL hidden structural metadata + f = gdal.VSIFOpenL(filename, 'rb') + if not f: + raise ValidateCloudOptimizedGeoTIFFException("Cannot open file") + signature = struct.unpack('B' * 4, gdal.VSIFReadL(4, 1, f)) + bigtiff = signature in ((0x49, 0x49, 0x2B, 0x00), (0x4D, 0x4D, 0x00, + 0x2B)) + if bigtiff: + expected_ifd_pos = 16 + else: + expected_ifd_pos = 8 + gdal.VSIFSeekL(f, expected_ifd_pos, 0) + pattern = "GDAL_STRUCTURAL_METADATA_SIZE=%06d bytes\n" % 0 + got = gdal.VSIFReadL(len(pattern), 1, f).decode('LATIN1') + if len(got) == len(pattern) and got.startswith( + 'GDAL_STRUCTURAL_METADATA_SIZE='): + size = int(got[len('GDAL_STRUCTURAL_METADATA_SIZE='):][0:6]) + extra_md = gdal.VSIFReadL(size, 1, f).decode('LATIN1') + block_order_row_major = 'BLOCK_ORDER=ROW_MAJOR' in extra_md + block_leader_size_as_uint4 = 'BLOCK_LEADER=SIZE_AS_UINT4' in extra_md + block_trailer_last_4_bytes_repeated = 'BLOCK_TRAILER=LAST_4_BYTES_REPEATED' in extra_md + mask_interleaved_with_imagery = 'MASK_INTERLEAVED_WITH_IMAGERY=YES' in extra_md + if 'KNOWN_INCOMPATIBLE_EDITION=YES' in extra_md: + errors += [ + "KNOWN_INCOMPATIBLE_EDITION=YES is declared in the file" + ] + expected_ifd_pos += len(pattern) + size + expected_ifd_pos += expected_ifd_pos % 2 # IFD offset starts on a 2-byte boundary + gdal.VSIFCloseL(f) + + if expected_ifd_pos != ifd_offsets[0]: + errors += [ + 'The offset of the main IFD should be %d. It is %d instead' % + (expected_ifd_pos, ifd_offsets[0]) + ] + + details['ifd_offsets'] = {} + details['ifd_offsets']['main'] = ifd_offset + + for i in range(ovr_count): + # Check that overviews are by descending sizes + ovr_band = ds.GetRasterBand(1).GetOverview(i) + if i == 0: + if (ovr_band.XSize > main_band.XSize + or ovr_band.YSize > main_band.YSize): + errors += [ + 'First overview has larger dimension than main band' + ] + else: + prev_ovr_band = ds.GetRasterBand(1).GetOverview(i - 1) + if (ovr_band.XSize > prev_ovr_band.XSize + or ovr_band.YSize > prev_ovr_band.YSize): + errors += [ + 'Overview of index %d has larger dimension than ' + 'overview of index %d' % (i, i - 1) + ] + + if check_tiled: + block_size = ovr_band.GetBlockSize() + if block_size[0] == ovr_band.XSize and block_size[0] > 1024: + errors += ['Overview of index %d is not tiled' % i] + + # Check that the IFD of descending overviews are sorted by increasing + # offsets + ifd_offset = int(ovr_band.GetMetadataItem('IFD_OFFSET', 'TIFF')) + ifd_offsets.append(ifd_offset) + details['ifd_offsets']['overview_%d' % i] = ifd_offset + if ifd_offsets[-1] < ifd_offsets[-2]: + if i == 0: + errors += [ + 'The offset of the IFD for overview of index %d is %d, ' + 'whereas it should be greater than the one of the main ' + 'image, which is at byte %d' % + (i, ifd_offsets[-1], ifd_offsets[-2]) + ] + else: + errors += [ + 'The offset of the IFD for overview of index %d is %d, ' + 'whereas it should be greater than the one of index %d, ' + 'which is at byte %d' % + (i, ifd_offsets[-1], i - 1, ifd_offsets[-2]) + ] + + # Check that the imagery starts by the smallest overview and ends with + # the main resolution dataset + + def get_block_offset(band): + blockxsize, blockysize = band.GetBlockSize() + for y in range(int((band.YSize + blockysize - 1) / blockysize)): + for x in range(int((band.XSize + blockxsize - 1) / blockxsize)): + block_offset = band.GetMetadataItem( + 'BLOCK_OFFSET_%d_%d' % (x, y), 'TIFF') + if block_offset: + return int(block_offset) + return 0 + + block_offset = get_block_offset(main_band) + data_offsets = [block_offset] + details['data_offsets'] = {} + details['data_offsets']['main'] = block_offset + for i in range(ovr_count): + ovr_band = ds.GetRasterBand(1).GetOverview(i) + block_offset = get_block_offset(ovr_band) + data_offsets.append(block_offset) + details['data_offsets']['overview_%d' % i] = block_offset + + if data_offsets[-1] != 0 and data_offsets[-1] < ifd_offsets[-1]: + if ovr_count > 0: + errors += [ + 'The offset of the first block of the smallest overview ' + 'should be after its IFD' + ] + else: + errors += [ + 'The offset of the first block of the image should ' + 'be after its IFD' + ] + for i in range(len(data_offsets) - 2, 0, -1): + if data_offsets[i] != 0 and data_offsets[i] < data_offsets[i + 1]: + errors += [ + 'The offset of the first block of overview of index %d should ' + 'be after the one of the overview of index %d' % (i - 1, i) + ] + if len(data_offsets) >= 2 and data_offsets[0] != 0 and data_offsets[ + 0] < data_offsets[1]: + errors += [ + 'The offset of the first block of the main resolution image ' + 'should be after the one of the overview of index %d' % + (ovr_count - 1) + ] + + if full_check and (block_order_row_major or block_leader_size_as_uint4 + or block_trailer_last_4_bytes_repeated + or mask_interleaved_with_imagery): + f = gdal.VSIFOpenL(filename, 'rb') + if not f: + raise ValidateCloudOptimizedGeoTIFFException("Cannot open file") + + full_check_band(f, 'Main resolution image', main_band, errors, + block_order_row_major, block_leader_size_as_uint4, + block_trailer_last_4_bytes_repeated, + mask_interleaved_with_imagery) + if main_band.GetMaskFlags() == gdal.GMF_PER_DATASET and \ + (filename + '.msk') not in ds.GetFileList(): + full_check_band(f, 'Mask band of main resolution image', + main_band.GetMaskBand(), errors, + block_order_row_major, block_leader_size_as_uint4, + block_trailer_last_4_bytes_repeated, False) + for i in range(ovr_count): + ovr_band = ds.GetRasterBand(1).GetOverview(i) + full_check_band(f, 'Overview %d' % i, ovr_band, errors, + block_order_row_major, block_leader_size_as_uint4, + block_trailer_last_4_bytes_repeated, + mask_interleaved_with_imagery) + if ovr_band.GetMaskFlags() == gdal.GMF_PER_DATASET and \ + (filename + '.msk') not in ds.GetFileList(): + full_check_band(f, 'Mask band of overview %d' % i, + ovr_band.GetMaskBand(), errors, + block_order_row_major, + block_leader_size_as_uint4, + block_trailer_last_4_bytes_repeated, False) + gdal.VSIFCloseL(f) + + return warnings, errors, details + + +def main(): + """Return 0 in case of success, 1 for failure.""" + + i = 1 + filename = None + quiet = False + full_check = None + while i < len(sys.argv): + if sys.argv[i] == '-q': + quiet = True + elif sys.argv[i] == '--full-check=yes': + full_check = True + elif sys.argv[i] == '--full-check=no': + full_check = False + elif sys.argv[i] == '--full-check=auto': + full_check = None + elif sys.argv[i][0] == '-': + return Usage() + elif filename is None: + filename = sys.argv[i] + else: + return Usage() + + i += 1 + + if filename is None: + return Usage() + + if full_check is None: + full_check = filename.startswith('/vsimem/') or os.path.exists( + filename) + + try: + ret = 0 + warnings, errors, details = validate(filename, full_check=full_check) + if warnings: + if not quiet: + print('The following warnings were found:') + for warning in warnings: + print(' - ' + warning) + print('') + if errors: + if not quiet: + print('%s is NOT a valid cloud optimized GeoTIFF.' % filename) + print('The following errors were found:') + for error in errors: + print(' - ' + error) + print('') + ret = 1 + else: + if not quiet: + print('%s is a valid cloud optimized GeoTIFF' % filename) + + if not quiet and not warnings and not errors: + headers_size = min(details['data_offsets'][k] + for k in details['data_offsets']) + if headers_size == 0: + headers_size = gdal.VSIStatL(filename).size + print('\nThe size of all IFD headers is %d bytes' % headers_size) + except ValidateCloudOptimizedGeoTIFFException as e: + if not quiet: + print('%s is NOT a valid cloud optimized GeoTIFF : %s' % + (filename, str(e))) + ret = 1 + + return ret + + +if __name__ == '__main__': + sys.exit(main()) From 61deef2cf856d196d40891c2e927faa505e742ed Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 4 May 2021 08:43:44 -0600 Subject: [PATCH 10/13] Fix README typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7f39a443..09148223 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ Once the container is built, you can run the `scripts/` scripts inside a docker ### Using virtualenv -If not using docker, itt's recommended to use [virtualenv](https://virtualenv.pypa.io/en/latest/index.html) to keep isolate the python environment used to develop stactools. See virtualenv documentation for more detailed information, but as a shortcut here's some quick steps: +If not using docker, it's recommended to use [virtualenv](https://virtualenv.pypa.io/en/latest/index.html) to keep isolate the python environment used to develop stactools. See virtualenv documentation for more detailed information, but as a shortcut here's some quick steps: - Make sure [virtualenv](https://virtualenv.pypa.io/en/latest/installation.html) is installed - Run `virtualenv venv` From d679cff5738300e2bd2325761ef457d1bdee960c Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Fri, 28 May 2021 11:32:31 -0600 Subject: [PATCH 11/13] Return the `call` value from `cogify` --- stactools_core/stactools/core/utils/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stactools_core/stactools/core/utils/convert.py b/stactools_core/stactools/core/utils/convert.py index 64d96927..9950d2b9 100644 --- a/stactools_core/stactools/core/utils/convert.py +++ b/stactools_core/stactools/core/utils/convert.py @@ -17,4 +17,4 @@ def cogify(infile: str, args.extend(extra_args) args.append(infile) args.append(outfile) - call(args) + return call(args) From 56deddc598c597438ebc9beac85d2e862112eab2 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 1 Jun 2021 07:24:09 -0600 Subject: [PATCH 12/13] Remove leftover debugging code from s2 test --- tests/sentinel2/test_commands.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/sentinel2/test_commands.py b/tests/sentinel2/test_commands.py index e87418ac..043d7d9a 100644 --- a/tests/sentinel2/test_commands.py +++ b/tests/sentinel2/test_commands.py @@ -58,10 +58,6 @@ def check_proj_bbox(item): item.validate() - import json - with open('s2-item.py', 'w') as f: - json.dump(item.to_dict(), f, indent=2) - bands_seen = set() bands_to_assets = defaultdict(list) From e49a26d76fa248b5f2c41c28ee26e53cc7457b81 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 2 Jun 2021 08:39:14 -0600 Subject: [PATCH 13/13] Offset boundaries from center of pixel for landsat Fixes #117. We use the SR ground sample distance as the bonding box pixel resolution. We do our best to be intelligent about the lat+lon offsets to account for assets near the poles. --- .../stactools/landsat/mtl_metadata.py | 45 ++++++++++++++----- tests/landsat/test_create_stac.py | 10 ++++- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/stactools_landsat/stactools/landsat/mtl_metadata.py b/stactools_landsat/stactools/landsat/mtl_metadata.py index f2650800..7ad1bb6f 100644 --- a/stactools_landsat/stactools/landsat/mtl_metadata.py +++ b/stactools_landsat/stactools/landsat/mtl_metadata.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional from pystac.utils import str_to_datetime +from pyproj import Geod from stactools.core.utils import map_opt from stactools.core.projection import transform_from_bbox @@ -90,31 +91,55 @@ def bbox(self) -> List[float]: self._get_float("PROJECTION_ATTRIBUTES/CORNER_LL_LAT_PRODUCT"), self._get_float("PROJECTION_ATTRIBUTES/CORNER_LR_LAT_PRODUCT") ] - - return [min(lons), min(lats), max(lons), max(lats)] + geod = Geod(ellps="WGS84") + offset = self.sr_gsd / 2 + _, _, bottom_distance = geod.inv(lons[2], lats[2], lons[3], lats[3]) + bottom_offset = offset * (lons[3] - lons[2]) / bottom_distance + _, _, top_distance = geod.inv(lons[0], lats[0], lons[1], lats[1]) + top_offset = offset * (lons[1] - lons[0]) / top_distance + _, _, lat_distance = geod.inv(lons[0], lats[0], lons[2], lats[2]) + lat_offset = offset * (lats[0] - lats[2]) / lat_distance + return [ + min(lons) - bottom_offset, + min(lats) - lat_offset, + max(lons) + top_offset, + max(lats) + lat_offset + ] @property def proj_bbox(self) -> List[float]: + # USGS metadata provide bounds at the center of the pixel, but + # GDAL/rasterio transforms are to edge of pixel. + # https://github.com/stac-utils/stactools/issues/117 + offset = self.sr_gsd / 2 xs = [ self._get_float( - "PROJECTION_ATTRIBUTES/CORNER_UL_PROJECTION_X_PRODUCT"), + "PROJECTION_ATTRIBUTES/CORNER_UL_PROJECTION_X_PRODUCT") - + offset, self._get_float( - "PROJECTION_ATTRIBUTES/CORNER_UR_PROJECTION_X_PRODUCT"), + "PROJECTION_ATTRIBUTES/CORNER_UR_PROJECTION_X_PRODUCT") + + offset, self._get_float( - "PROJECTION_ATTRIBUTES/CORNER_LL_PROJECTION_X_PRODUCT"), + "PROJECTION_ATTRIBUTES/CORNER_LL_PROJECTION_X_PRODUCT") - + offset, self._get_float( - "PROJECTION_ATTRIBUTES/CORNER_LR_PROJECTION_X_PRODUCT") + "PROJECTION_ATTRIBUTES/CORNER_LR_PROJECTION_X_PRODUCT") + + offset ] ys = [ self._get_float( - "PROJECTION_ATTRIBUTES/CORNER_UL_PROJECTION_Y_PRODUCT"), + "PROJECTION_ATTRIBUTES/CORNER_UL_PROJECTION_Y_PRODUCT") + + offset, self._get_float( - "PROJECTION_ATTRIBUTES/CORNER_UR_PROJECTION_Y_PRODUCT"), + "PROJECTION_ATTRIBUTES/CORNER_UR_PROJECTION_Y_PRODUCT") + + offset, self._get_float( - "PROJECTION_ATTRIBUTES/CORNER_LL_PROJECTION_Y_PRODUCT"), + "PROJECTION_ATTRIBUTES/CORNER_LL_PROJECTION_Y_PRODUCT") - + offset, self._get_float( - "PROJECTION_ATTRIBUTES/CORNER_LR_PROJECTION_Y_PRODUCT") + "PROJECTION_ATTRIBUTES/CORNER_LR_PROJECTION_Y_PRODUCT") - + offset ] return [min(xs), min(ys), max(xs), max(ys)] diff --git a/tests/landsat/test_create_stac.py b/tests/landsat/test_create_stac.py index 84a9a87b..4094bcdf 100644 --- a/tests/landsat/test_create_stac.py +++ b/tests/landsat/test_create_stac.py @@ -5,6 +5,7 @@ import pystac from pystac.utils import is_absolute_href from shapely.geometry import box, shape, mapping +import rasterio from stactools.core.projection import reproject_geom from stactools.landsat.assets import SR_ASSET_DEFS, THERMAL_ASSET_DEFS @@ -19,10 +20,11 @@ def create_subcommand_functions(self): return [create_landsat_command] def test_create_item(self): - def check_proj_bbox(item): + def check_proj_bbox(item, tif_bounds): bbox = item.bbox bbox_shp = box(*bbox) proj_bbox = item.ext.projection.bbox + self.assertEqual(proj_bbox, list(tif_bounds)) proj_bbox_shp = box(*proj_bbox) reproj_bbox_shp = shape( reproject_geom(f"epsg:{item.ext.projection.epsg}", "epsg:4326", @@ -33,6 +35,10 @@ def check_proj_bbox(item): for mtl_path in TEST_MTL_PATHS: with self.subTest(mtl_path): + base_path = "_".join(mtl_path.split("_")[:-1]) + tif_path = f"{base_path}_SR_B3.TIF" + with rasterio.open(tif_path) as dataset: + tif_bounds = dataset.bounds with TemporaryDirectory() as tmp_dir: cmd = [ 'landsat', 'create-item', '--mtl', mtl_path, @@ -64,7 +70,7 @@ def check_proj_bbox(item): else: self.assertEqual(bands_seen, set(L8_SR_BANDS.keys())) - check_proj_bbox(item) + check_proj_bbox(item, tif_bounds) def test_convert_and_create_agree(self): def get_item(output_dir: str) -> pystac.Item: