From 96ac7b0fc890d870568a101efbdc4aff9cf9cb74 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Thu, 13 Jan 2022 13:19:53 -0700 Subject: [PATCH 1/5] chore: sort `install_requires` --- setup.cfg | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/setup.cfg b/setup.cfg index 9fd967d2..218e6188 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,16 +31,16 @@ packages = find_namespace: zip_safe = False include_package_data = True install_requires = - pystac[validation] ~= 1.2 + GDAL ~= 3.3.2 + Shapely ~= 1.7 aiohttp ~= 3.7 click ~= 8.0 fsspec ~= 2021.7 - requests ~= 2.25 - Shapely ~= 1.7 + lxml ~= 4.6 pyproj ~= 3.0 + pystac[validation] ~= 1.2 rasterio ~= 1.2 - lxml ~= 4.6 - GDAL ~= 3.3.2 + requests ~= 2.25 [options.extras_require] all = From 0497418cef66c69c2a212a7e0ac3e50fda089a2a Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Thu, 13 Jan 2022 13:20:10 -0700 Subject: [PATCH 2/5] refactor!: refactor cogify This changes the API of the cogify function. --- src/stactools/core/utils/convert.py | 38 +++++++++++++++-------------- tests/core/test_utils.py | 10 ++------ 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/stactools/core/utils/convert.py b/src/stactools/core/utils/convert.py index 8948ab21..de4484c4 100644 --- a/src/stactools/core/utils/convert.py +++ b/src/stactools/core/utils/convert.py @@ -1,25 +1,27 @@ -from typing import List, Optional +from typing import Dict, Any, Optional -from stactools.core.utils.subprocess import call -from stactools.core.utils import gdal_driver_is_enabled +import rasterio +import rasterio.shutil +from rasterio.errors import DriverRegistrationError -DEFAULT_COGIFY_ARGS = ["-co", "compress=deflate"] +from stactools.core import utils + +DEFAULT_PROFILE = { + "compress": "deflate", + "driver": "COG", + "blocksize": 512, +} def cogify(infile: str, outfile: str, - args: Optional[List[str]] = None, - extra_args: Optional[List[str]] = None) -> int: + profile: Optional[Dict[str, Any]] = None) -> None: """Creates a COG from a GDAL-readable file.""" - if not gdal_driver_is_enabled("COG"): - raise Exception( - "GDAL's COG driver is not enabled. " - "Please make sure your GDAL version is 3.1 or greater.") - if args is None: - args = DEFAULT_COGIFY_ARGS[:] - args = ["gdal_translate", "-of", "COG"] + args - if extra_args: - args.extend(extra_args) - args.append(infile) - args.append(outfile) - return call(args) + if not utils.gdal_driver_is_enabled("COG"): + raise DriverRegistrationError( + "GDAL's COG driver is not enabled, make sure you're using GDAL >= 3.1" + ) + destination_profile = DEFAULT_PROFILE.copy() + if profile: + destination_profile.update(profile) + rasterio.shutil.copy(infile, outfile, **destination_profile) diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index d2c5255e..de658aa2 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -26,15 +26,9 @@ def test_default(self): self.assertEqual(dataset.compression, rasterio.enums.Compression.deflate) - def test_override_default(self): - with self.cogify(args=["-co", "compress=lzw"]) as outfile: + def test_profile(self): + with self.cogify(profile={"compress": "lzw"}) as outfile: self.assertTrue(os.path.exists(outfile)) with rasterio.open(outfile) as dataset: self.assertEqual(dataset.compression, rasterio.enums.Compression.lzw) - - def test_extra_args(self): - with self.cogify( - extra_args=["-mo", "TIFFTAG_ARTIST=prince"]) as outfile: - with rasterio.open(outfile) as dataset: - self.assertEqual(dataset.tags()["TIFFTAG_ARTIST"], "prince") From a01e0a1b929a8464e03d5c78519d35c2b393704d Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 19 Jan 2022 09:45:41 -0700 Subject: [PATCH 3/5] feat: use assertDictEqual for addraster test This helps debug dict differences better. --- tests/cli/commands/test_addraster.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/cli/commands/test_addraster.py b/tests/cli/commands/test_addraster.py index 660bcd41..2b22fcd3 100644 --- a/tests/cli/commands/test_addraster.py +++ b/tests/cli/commands/test_addraster.py @@ -39,4 +39,6 @@ def test_add_raster_to_item(self): asset = item.get_assets().get("analytic") assert asset is not None expected = expected_json("rasterbands.json") - assert expected == asset.to_dict().get("raster:bands") + self.maxDiff = None + for a, b in zip(expected, asset.to_dict().get("raster:bands")): + self.assertDictEqual(a, b) From e29df2553f7f4ae8293995b03f0948ffb727ec39 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 19 Jan 2022 09:46:23 -0700 Subject: [PATCH 4/5] feat: remove GDAL Python bindings dependency This requires refactoring `add_raster_to_item` to use rasterio instead of GDAL bindings. During this refactor, I discovered that the original implementation was not correctly handling the top bound of the histogram calculation, and so the last bin value was one too low in the tests. The test case was corrected. --- setup.cfg | 1 - src/stactools/core/addraster.py | 48 +++++++++++--------- tests/cli/commands/expected/rasterbands.json | 8 ++-- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/setup.cfg b/setup.cfg index 218e6188..c2c1506e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,7 +31,6 @@ packages = find_namespace: zip_safe = False include_package_data = True install_requires = - GDAL ~= 3.3.2 Shapely ~= 1.7 aiohttp ~= 3.7 click ~= 8.0 diff --git a/src/stactools/core/addraster.py b/src/stactools/core/addraster.py index 9b9be1f7..54aa8a05 100644 --- a/src/stactools/core/addraster.py +++ b/src/stactools/core/addraster.py @@ -1,14 +1,16 @@ import logging +from typing import List -from osgeo import gdal +import numpy from pystac import Item from pystac.utils import make_absolute_href from pystac.extensions.raster import (DataType, Histogram, RasterBand, RasterExtension, Statistics) +import rasterio logger = logging.getLogger(__name__) -NUM_BUCKETS = 256 +BINS = 256 def add_raster_to_item(item: Item) -> Item: @@ -25,27 +27,29 @@ def add_raster_to_item(item: Item) -> Item: for asset in item.assets.values(): if asset.roles and "data" in asset.roles: raster = RasterExtension.ext(asset) - bands = [] href = make_absolute_href(asset.href, item.get_self_href()) - dataset = gdal.Open(href, gdal.GA_ReadOnly) - for nband in range(dataset.RasterCount): - gdal_band = dataset.GetRasterBand(nband + 1) - band = RasterBand.create() - band.nodata = gdal_band.GetNoDataValue() - band.spatial_resolution = dataset.GetGeoTransform()[1] - band.data_type = DataType( - gdal.GetDataTypeName(gdal_band.DataType).lower()) - minimum = gdal_band.GetMinimum() - maximum = gdal_band.GetMaximum() - if not minimum or not max: - minimum, maximum = gdal_band.ComputeRasterMinMax(True) - band.statistics = Statistics.create(minimum=minimum, - maximum=maximum) - hist_data = gdal_band.GetHistogram(minimum, maximum, - NUM_BUCKETS) - band.histogram = Histogram.create(NUM_BUCKETS, minimum, - maximum, hist_data) - bands.append(band) + bands = _read_bands(href) if bands: raster.apply(bands) return item + + +def _read_bands(href: str) -> List[RasterBand]: + bands = [] + with rasterio.open(href) as dataset: + for (i, index) in enumerate(dataset.indexes): + data = dataset.read(index, masked=True) + band = RasterBand.create() + band.nodata = dataset.nodatavals[i] + band.spatial_resolution = dataset.transform[0] + band.data_type = DataType(dataset.dtypes[i]) + minimum = float(numpy.min(data)) # type: ignore + maximum = float(numpy.max(data)) # type: ignore + band.statistics = Statistics.create(minimum=minimum, + maximum=maximum) + hist_data, _ = numpy.histogram( # type: ignore + data, range=(minimum, maximum), bins=BINS) + band.histogram = Histogram.create(BINS, minimum, maximum, + hist_data.tolist()) + bands.append(band) + return bands diff --git a/tests/cli/commands/expected/rasterbands.json b/tests/cli/commands/expected/rasterbands.json index 6f430815..9e5c4956 100644 --- a/tests/cli/commands/expected/rasterbands.json +++ b/tests/cli/commands/expected/rasterbands.json @@ -267,7 +267,7 @@ 0, 0, 1, - 2 + 3 ] } }, @@ -539,7 +539,7 @@ 2, 7, 7, - 7 + 8 ] } }, @@ -811,7 +811,7 @@ 0, 1, 1, - 2 + 3 ] } }, @@ -1083,7 +1083,7 @@ 0, 1, 0, - 0 + 1 ] } } From 988c691fe9049de8695f49295dec66f73342a34f Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 19 Jan 2022 10:03:39 -0700 Subject: [PATCH 5/5] chore: update CHANGELOG for #222 --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a7619b5a..29c76674 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Use [pytest](https://docs.pytest.org/) for unit testing instead of `unittest` ([#220](https://github.com/stac-utils/stactools/pull/220)) +- Signature of `stactools.core.utils.convert.cogify` ([#222](https://github.com/stac-utils/stactools/pull/222)) + +### Removed + +- GDAL Python bindings dependency ([#222](https://github.com/stac-utils/stactools/pull/222)) ## [0.2.5] - 2022-01-03