Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend cogify #318

Merged
merged 9 commits into from
Jun 24, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed

- Modified stactools.core.utils.antimeridian.fix_item to return the item and updated 2 unit tests ([#309](https://github.com/stac-utils/stactools/issues/309))
- Relaxed typing for cmd parameter for the CliTestCase.run_command in cli_test.py ([#306](https://github.com/stac-utils/stactools/issues/306))
- Modified stactools.core.utils.convert with functions to export subdatasets from HDF files as separate COGs and
single bands from multiband files ([#318](https://github.com/stac-utils/stactools/pull/318))
- Modified stactools.core.utils.antimeridian.fix_item to return the item and updated 2 unit tests ([#317](https://github.com/stac-utils/stactools/pull/317))
- Relaxed typing for cmd parameter for the CliTestCase.run_command in cli_test.py ([#312](https://github.com/stac-utils/stactools/pull/312))
- Cleaned up API documentation ([#315](https://github.com/stac-utils/stactools/pull/315))

### Removed
Expand All @@ -24,7 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Removed

- Dropped support for Python 3.7 ([#223](https://github.com/stac-utils/stactools/issues/223))
- Dropped support for Python 3.7 ([#313](https://github.com/stac-utils/stactools/pull/313))

## [v0.3.1]

Expand Down
100 changes: 90 additions & 10 deletions src/stactools/core/utils/convert.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Format conversion utilities."""

from typing import Any, Dict, Optional
import os
from typing import Any, Dict, List, Optional, Tuple, cast

import rasterio
import rasterio.shutil
Expand All @@ -16,25 +17,104 @@
"""The default profile to use when writing Cloud-Optimized GeoTIFFs (COGs)."""


def cogify(infile: str, outfile: str, profile: Optional[Dict[str, Any]] = None) -> None:
def assert_cog_driver_is_enabled() -> None:
if not utils.gdal_driver_is_enabled("COG"):
raise DriverRegistrationError(
"GDAL's COG driver is not enabled, make sure you're using GDAL >= 3.1"
)


def cogify(
infile: str,
outfile: str,
band: Optional[int] = None,
profile: Optional[Dict[str, Any]] = None,
) -> None:
"""Creates a Cloud-Optimized GeoTIFF (COG) from a GDAL-readable file.

Uses :py:meth:`rasterio.shutil.copy`.
A band number can optionally be provided to extract a single band from a
multiband file. To create COGs from subdatasets, use
:py:meth:`stactools.core.utils.convert.cogify_subdatasets`.

Args:
infile (str): The input file.
outfile (str): The output COG to be written.
band (Optional[int]): The band number in the input file to extract.
If not provided, a multi-band COG will be created.
profile (Optional[dict[str, Any]]):
An optional profile to use on the
output file. If not provided,
:py:const:`stactools.core.utils.convert.DEFAULT_PROFILE` will be
used.
"""
if not utils.gdal_driver_is_enabled("COG"):
raise DriverRegistrationError(
"GDAL's COG driver is not enabled, make sure you're using GDAL >= 3.1"
)
destination_profile = DEFAULT_PROFILE.copy()
assert_cog_driver_is_enabled()

src = rasterio.open(infile)
dest_profile = DEFAULT_PROFILE.copy()
dest_profile.update(
{
"width": src.width,
"height": src.height,
"crs": src.crs,
"transform": src.transform,
}
)

if profile:
destination_profile.update(profile)
rasterio.shutil.copy(infile, outfile, **destination_profile)
dest_profile.update(profile)

# If a band number was provided, create a single-band COG
if band:
single_band = src.read(band)
dest_profile.update({"count": 1, "dtype": single_band.dtype})
with rasterio.open(outfile, "w", **dest_profile) as dest:
dest.write(single_band, 1)
# If no band numbers were provided, create a multi-band COG
else:
dest_profile.update({"count": src.count, "dtype": src.dtypes[0]})
gadomski marked this conversation as resolved.
Show resolved Hide resolved
rasterio.shutil.copy(infile, outfile, **dest_profile)


def cogify_subdatasets(
infile: str, outdir: str, subdataset_names: Optional[List[str]] = None
) -> Tuple[List[str], List[str]]:
"""Creates Cloud-Optimized GeoTIFFs for all subdatasets in a multi-dataset raster file.

The created files will be named the same as the source file, with a ``_SUBDATASET`` suffix.
E.g. if the source file is named ``foo.hdf`` and the subdataset is named ``bar``, the output
COG will be named ``foo_bar.tif``. Only 2D (and not 3D) subdatasets are supported.

Args:
infile (str): The input file containing subdatasets.
outdir (str): The output directory where the COGs will be created.
Returns:
Tuple[List[str], List[str]]:
A two tuple (paths, names):
- The first element is a list of the output COG paths
- The second element is a list of subdataset names
"""

assert_cog_driver_is_enabled()
with rasterio.open(infile) as dataset:
subdatasets = cast(List[str], dataset.subdatasets)
base_file_name = os.path.splitext(os.path.basename(infile))[0]
paths = []
names = []
for subdataset in subdatasets:
subd = rasterio.open(subdataset)
geomatician marked this conversation as resolved.
Show resolved Hide resolved
if len(subd.shape) != 2:
continue
parts = subdataset.split(":")
subdataset_name = parts[-1]
if subdataset_names and subdataset_name not in subdataset_names:
continue
sanitized_subdataset_name = subdataset_name.replace(" ", "_").replace(
"/", "_"
)
names.append(sanitized_subdataset_name)
file_name = f"{base_file_name}_{sanitized_subdataset_name}.tif"
outfile = os.path.join(outdir, file_name)
destination_profile = DEFAULT_PROFILE.copy()
rasterio.shutil.copy(subdataset, outfile, **destination_profile)
paths.append(outfile)
return (paths, names)
20 changes: 19 additions & 1 deletion tests/core/utils/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import rasterio

from stactools.core.utils.convert import cogify
from stactools.core.utils.convert import cogify, cogify_subdatasets
from tests import test_data


Expand All @@ -31,3 +31,21 @@ def test_profile(self):
self.assertTrue(os.path.exists(outfile))
with rasterio.open(outfile) as dataset:
self.assertEqual(dataset.compression, rasterio.enums.Compression.lzw)

def test_subdataset(self):
infile = test_data.get_path("data-files/hdf/AMSR_E_L3_RainGrid_B05_200707.h5")
with TemporaryDirectory() as directory:
paths, names = cogify_subdatasets(infile, directory)
self.assertEqual(
names,
[
"__MonthlyRainTotal_GeoGrid_Data_Fields_RrLandRain",
"__MonthlyRainTotal_GeoGrid_Data_Fields_TbOceanRain",
],
)
for path in paths:
self.assertTrue(os.path.exists(path))
with rasterio.open(path) as dataset:
self.assertEqual(
dataset.compression, rasterio.enums.Compression.deflate
)
Binary file not shown.