-
Notifications
You must be signed in to change notification settings - Fork 19
/
georaster.py
1773 lines (1446 loc) · 65.8 KB
/
georaster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
geoutils.georaster provides a toolset for working with raster data.
"""
from __future__ import annotations
import copy
import os
import warnings
from collections import abc
from numbers import Number
from typing import IO, Any, Callable, TypeVar
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pyproj
import rasterio as rio
import rasterio.mask
import rasterio.transform
import rasterio.warp
import rasterio.windows
from affine import Affine
from matplotlib import cm, colors
from rasterio.crs import CRS
from rasterio.io import MemoryFile
from rasterio.plot import show as rshow
from rasterio.warp import Resampling
from scipy.ndimage import map_coordinates
from shapely.geometry.polygon import Polygon
from geoutils.geovector import Vector
try:
import rioxarray
except ImportError:
_has_rioxarray = False
else:
_has_rioxarray = True
RasterType = TypeVar("RasterType", bound="Raster")
def _resampling_from_str(resampling: str) -> Resampling:
"""
Match a rio.warp.Resampling enum from a string representation.
:param resampling: A case-sensitive string matching the resampling enum (e.g. 'cubic_spline')
:raises ValueError: If no matching Resampling enum was found.
:returns: A rio.warp.Resampling enum that matches the given string.
"""
# Try to match the string version of the resampling method with a rio Resampling enum name
for method in rio.warp.Resampling:
if str(method).replace("Resampling.", "") == resampling:
resampling_method = method
break
# If no match was found, raise an error.
else:
raise ValueError(
f"'{resampling}' is not a valid rasterio.warp.Resampling method. "
f"Valid methods: {[str(method).replace('Resampling.', '') for method in rio.warp.Resampling]}"
)
return resampling_method
class Raster:
"""
Create a Raster object from a rasterio-supported raster dataset.
If not otherwise specified below, attribute types and contents correspond
to the attributes defined by rasterio.
Attributes:
filename : str
The path/filename of the loaded, file, only set if a disk-based file is read in.
data : np.array
Loaded image. Dimensions correspond to (bands, height, width).
nbands : int
Number of bands loaded into .data
bands : tuple
The indexes of the opened dataset which correspond to the bands loaded into data.
is_loaded : bool
True if the image data have been loaded into this Raster.
ds : rio.io.DatasetReader
Link to underlying DatasetReader object.
bounds
count
crs
dataset_mask
driver
dtypes
height
indexes
name
nodata
res
shape
transform
width
"""
# This only gets set if a disk-based file is read in.
# If the Raster is created with from_array, from_mem etc, this stays as None.
filename = None
_is_modified: bool | None = None
_disk_hash: int | None = None
# Rasterio-inherited names and types are defined here to get proper type hints.
# Maybe these don't have to be hard-coded in the future?
_data: np.ndarray | np.ma.masked_array
transform: Affine
crs: CRS
nodata: int | float | None
res: tuple[float | int, float | int]
bounds: rio.coords.BoundingBox
height: int
width: int
shape: tuple[int, int]
indexes: list[int]
count: int
dataset_mask: np.ndarray | None
driver: str
dtypes: list[str]
name: str
def __init__(
self,
filename_or_dataset: str | RasterType | rio.io.DatasetReader | rio.io.MemoryFile,
bands: None | int | list[int] = None,
load_data: bool = True,
downsample: int | float = 1,
masked: bool = True,
attrs: list[str] | None = None,
as_memfile: bool = False,
) -> None:
"""
Load a rasterio-supported dataset, given a filename.
:param filename_or_dataset: The filename of the dataset.
:param bands: The band(s) to load into the object. Default is to load all bands.
:param load_data: Load the raster data into the object. Default is True.
:param downsample: Reduce the size of the image loaded by this factor. Default is 1
:param masked: the data is loaded as a masked array, with no data values masked. Default is True.
:param attrs: Additional attributes from rasterio's DataReader class to add to the Raster object.
Default list is ['bounds', 'count', 'crs', 'dataset_mask', 'driver', 'dtypes', 'height', 'indexes',
'name', 'nodata', 'res', 'shape', 'transform', 'width'] - if no attrs are specified, these will be added.
:param as_memfile: open the dataset via a rio.MemoryFile.
:return: A Raster object
"""
# If Raster is passed, simply point back to Raster
if isinstance(filename_or_dataset, Raster):
for key in filename_or_dataset.__dict__:
setattr(self, key, filename_or_dataset.__dict__[key])
return
# Image is a file on disk.
elif isinstance(filename_or_dataset, str):
# Save the absolute on-disk filename
self.filename = os.path.abspath(filename_or_dataset)
if as_memfile:
# open the file in memory
memfile = MemoryFile(open(filename_or_dataset, "rb"))
# Read the file as a rasterio dataset
self.ds = memfile.open()
else:
self.ds = rio.open(filename_or_dataset, "r")
# If rio.Dataset is passed
elif isinstance(filename_or_dataset, rio.io.DatasetReader):
self.filename = filename_or_dataset.files[0]
self.ds = filename_or_dataset
# Or, image is already a Memory File.
elif isinstance(filename_or_dataset, rio.io.MemoryFile):
self.ds = filename_or_dataset.open()
# Provide a catch in case trying to load from data array
elif isinstance(filename_or_dataset, np.ndarray):
raise ValueError("np.array provided as filename. Did you mean to call Raster.from_array(...) instead? ")
# Don't recognise the input, so stop here.
else:
raise ValueError("filename argument not recognised.")
self._read_attrs(attrs)
# Save _masked attribute to be used by self.load()
self._masked = masked
# Check number of bands to be loaded
if bands is None:
nbands = self.count
elif isinstance(bands, int):
nbands = 1
elif isinstance(bands, abc.Iterable):
nbands = len(bands)
# Downsampled image size
if not isinstance(downsample, (int, float)):
raise ValueError("downsample must be of type int or float")
if downsample == 1:
out_shape = (nbands, self.height, self.width)
else:
down_width = int(np.ceil(self.width / downsample))
down_height = int(np.ceil(self.height / downsample))
out_shape = (nbands, down_height, down_width)
if load_data:
self.load(bands=bands, out_shape=out_shape)
self.nbands = self._data.shape[0]
self.is_loaded = True
if isinstance(filename_or_dataset, str):
self._is_modified = False
self._disk_hash = hash((self._data.tobytes(), self.transform, self.crs, self.nodata))
else:
self._data = None
self.nbands = None
self.is_loaded = False
# update attributes when downsample is not 1
if downsample != 1:
# Original attributes
meta = self.ds.meta
# width and height must be same as data
meta.update({"width": down_width, "height": down_height})
# Resolution is set, transform must be updated accordingly
res = tuple(np.asarray(self.res) * downsample)
transform = rio.transform.from_origin(self.bounds.left, self.bounds.top, res[0], res[1])
meta.update({"transform": transform})
# Update metadata
self._update(self.data, metadata=meta)
@classmethod
def from_array(
cls: type[RasterType],
data: np.ndarray | np.ma.masked_array,
transform: tuple[float, ...] | Affine,
crs: CRS | int,
nodata: int | float | None = None,
) -> RasterType:
"""Create a Raster from a numpy array and some geo-referencing information.
:param data: data array
:param transform: the 2-D affine transform for the image mapping.
Either a tuple(x_res, 0.0, top_left_x, 0.0, y_res, top_left_y) or
an affine.Affine object.
:param crs: Coordinate Reference System for image. Either a rasterio CRS,
or the EPSG integer.
:param nodata: nodata value
:returns: A Raster object containing the provided data.
Example:
You have a data array in EPSG:32645. It has a spatial resolution of
30 m in x and y, and its top left corner is X=478000, Y=3108140.
>>> transform = (30.0, 0.0, 478000.0, 0.0, -30.0, 3108140.0)
>>> myim = Raster.from_array(data, transform, 32645)
"""
if not isinstance(transform, Affine):
if isinstance(transform, tuple):
transform = Affine(*transform)
else:
raise ValueError("transform argument needs to be Affine or tuple.")
# Enable shortcut to create CRS from an EPSG ID.
if isinstance(crs, int):
crs = CRS.from_epsg(crs)
# If a 2-D ('single-band') array is passed in, give it a band dimension.
if len(data.shape) < 3:
data = np.expand_dims(data, 0)
# Preserves input mask
if isinstance(data, np.ma.masked_array):
if nodata is None:
if np.sum(data.mask) > 0:
raise ValueError("For masked arrays, a nodata value must be set")
else:
data.data[data.mask] = nodata
# Open handle to new memory file
mfh = MemoryFile()
# Create the memory file
with rio.open(
mfh,
"w",
height=data.shape[1],
width=data.shape[2],
count=data.shape[0],
dtype=data.dtype,
crs=crs,
transform=transform,
nodata=nodata,
driver="GTiff",
) as ds:
ds.write(data)
# Initialise a Raster object created with MemoryFile.
# (i.e., __init__ will now be run.)
return cls(mfh)
def __repr__(self) -> str:
"""Convert object to formal string representation."""
L = [getattr(self, item) for item in self._saved_attrs]
s: str = "{}.{}({})".format(type(self).__module__, type(self).__qualname__, ", ".join(map(str, L)))
return s
def __str__(self) -> str:
"""Provide string of information about Raster."""
return self.info()
def __eq__(self, other: object) -> bool:
"""Check if a Raster's data and georeferencing is equal to another."""
if not isinstance(other, type(self)): # TODO: Possibly add equals to SatelliteImage?
return NotImplemented
return all(
[
np.array_equal(self.data, other.data, equal_nan=True),
self.transform == other.transform,
self.crs == other.crs,
self.nodata == other.nodata,
]
)
def __ne__(self, other: object) -> bool:
return not self.__eq__(other)
def __add__(self: RasterType, other: RasterType | np.ndarray | Number) -> RasterType:
"""
Sum up the data of two rasters or a raster and a numpy array, or a raster and single number.
If other is a Raster, it must have the same data.shape, transform and crs as self.
If other is a np.ndarray, it must have the same shape.
Otherwise, other must be a single number.
"""
# Check that other is of correct type
if not isinstance(other, (Raster, np.ndarray, Number)):
raise ValueError("Addition possible only with a Raster, np.ndarray or single number.")
# Case 1 - other is a Raster
if isinstance(other, Raster):
# Check that both data are loaded
if not (self.is_loaded & other.is_loaded):
raise ValueError("Raster's data must be loaded with self.load().")
# Check that both rasters have the same shape and georeferences
if (self.data.shape == other.data.shape) & (self.transform == other.transform) & (self.crs == other.crs):
pass
else:
raise ValueError("Both rasters must have the same shape, transform and CRS.")
other_data = other.data
# Case 2 - other is a numpy array
elif isinstance(other, np.ndarray):
# Check that both array have the same shape
if self.data.shape == other.shape:
pass
else:
raise ValueError("Both rasters must have the same shape.")
other_data = other
# Case 3 - other is a single number
else:
other_data = other
# Calculate the sum of arrays
data = self.data + other_data
# Save as a new Raster
out_rst = self.from_array(data, self.transform, self.crs, nodata=self.nodata)
return out_rst
def __neg__(self: RasterType) -> RasterType:
"""Return self with self.data set to -self.data"""
out_rst = self.copy()
out_rst.data = -out_rst.data
return out_rst
def __sub__(self, other: Raster | np.ndarray | Number) -> Raster:
"""
Subtract two rasters. Both rasters must have the same data.shape, transform and crs.
"""
if isinstance(other, Raster):
# Need to convert both rasters to a common type before doing the negation
ctype: np.dtype = np.find_common_type([*self.dtypes, *other.dtypes], [])
other = other.astype(ctype)
return self + -other # type: ignore
def astype(self, dtype: np.dtype | type | str) -> Raster:
"""
Converts the data type of a Raster object.
:param dtype: Any numpy dtype or string accepted by numpy.astype
:returns: the output Raster with dtype changed.
"""
out_data = self.data.astype(dtype)
return self.from_array(out_data, self.transform, self.crs)
def _get_rio_attrs(self) -> list[str]:
"""Get the attributes that have the same name in rio.DatasetReader and Raster."""
rio_attrs: list[str] = []
for attr in Raster.__annotations__.keys():
if "__" in attr or attr not in dir(self.ds):
continue
rio_attrs.append(attr)
return rio_attrs
def _read_attrs(self, attrs: list[str] | str | None = None) -> None:
# Copy most used attributes/methods
rio_attrs = self._get_rio_attrs()
for attr in self.__annotations__.keys():
if "__" in attr or attr not in dir(self.ds):
continue
rio_attrs.append(attr)
if attrs is None:
self._saved_attrs = rio_attrs
attrs = rio_attrs
else:
if isinstance(attrs, str):
attrs = [attrs]
for attr in rio_attrs:
if attr not in attrs:
attrs.append(attr)
self._saved_attrs = attrs
for attr in attrs:
setattr(self, attr, getattr(self.ds, attr))
@property
def is_modified(self) -> bool:
"""Check whether file has been modified since it was created/opened.
:returns: True if Raster has been modified.
"""
if not self._is_modified:
new_hash = hash((self._data.tobytes(), self.transform, self.crs, self.nodata))
self._is_modified = not (self._disk_hash == new_hash)
return self._is_modified
@property
def data(self) -> np.ndarray | np.ma.masked_array:
"""
Get data.
:returns: data array.
"""
return self._data
@data.setter
def data(self, new_data: np.ndarray | np.ma.masked_array) -> None:
"""
Set the contents of .data.
new_data must have the same shape as existing data! (bands dimension included)
:param new_data: New data to assign to this instance of Raster
"""
# Check that new_data is a Numpy array
if not isinstance(new_data, np.ndarray):
raise ValueError("New data must be a numpy array.")
# Check that new_data has correct shape
if self.is_loaded:
orig_shape = self._data.shape
else:
orig_shape = (self.count, self.height, self.width)
if new_data.shape != orig_shape:
raise ValueError(f"New data must be of the same shape as existing data: {orig_shape}.")
# Check that new_data has the right type
if new_data.dtype != self._data.dtype:
raise ValueError(
"New data must be of the same type as existing\
data: {}".format(
self.data.dtype
)
)
self._data = new_data
def _update(
self,
imgdata: np.ndarray | None = None,
metadata: dict[str, Any] | None = None,
vrt_to_driver: str = "GTiff",
) -> None:
"""
Update the object with a new image or metadata.
:param imgdata: image data to update with.
:param metadata: metadata to update with.
:param vrt_to_driver: name of driver to coerce a VRT to. This is required
because rasterio does not support writing to to a VRTSourcedRasterBand.
"""
memfile = MemoryFile()
if imgdata is None:
imgdata = self.data
if metadata is None:
metadata = self.ds.meta
if metadata["driver"] == "VRT":
metadata["driver"] = vrt_to_driver
with memfile.open(**metadata) as ds:
ds.write(imgdata)
self.ds = memfile.open()
self._read_attrs()
if self.is_loaded:
self.load()
self._is_modified = True
def info(self, stats: bool = False) -> str:
"""
Returns string of information about the raster (filename, coordinate system, number of columns/rows, etc.).
:param stats: Add statistics for each band of the dataset (max, min, median, mean, std. dev.). Default is to
not calculate statistics.
:returns: text information about Raster attributes.
"""
as_str = [
f"Driver: {self.driver} \n",
f"Opened from file: {self.filename} \n",
f"Filename: {self.name} \n",
f"Raster modified since disk load? {self._is_modified} \n",
f"Size: {self.width}, {self.height}\n",
f"Number of bands: {self.count:d}\n",
f"Data types: {self.dtypes}\n",
f"Coordinate System: EPSG:{self.crs.to_epsg()}\n",
f"NoData Value: {self.nodata}\n",
"Pixel Size: {}, {}\n".format(*self.res),
"Upper Left Corner: {}, {}\n".format(*self.bounds[:2]),
"Lower Right Corner: {}, {}\n".format(*self.bounds[2:]),
]
if stats:
if self.data is not None:
if self.nbands == 1:
as_str.append(f"[MAXIMUM]: {np.nanmax(self.data):.2f}\n")
as_str.append(f"[MINIMUM]: {np.nanmin(self.data):.2f}\n")
as_str.append(f"[MEDIAN]: {np.ma.median(self.data):.2f}\n")
as_str.append(f"[MEAN]: {np.nanmean(self.data):.2f}\n")
as_str.append(f"[STD DEV]: {np.nanstd(self.data):.2f}\n")
else:
for b in range(self.nbands):
# try to keep with rasterio convention.
as_str.append(f"Band {b + 1}:")
as_str.append(f"[MAXIMUM]: {np.nanmax(self.data[b, :, :]):.2f}\n")
as_str.append(f"[MINIMUM]: {np.nanmin(self.data[b, :, :]):.2f}\n")
as_str.append(f"[MEDIAN]: {np.ma.median(self.data[b, :, :]):.2f}\n")
as_str.append(f"[MEAN]: {np.nanmean(self.data[b, :, :]):.2f}\n")
as_str.append(f"[STD DEV]: {np.nanstd(self.data[b, :, :]):.2f}\n")
return "".join(as_str)
def copy(self: RasterType, new_array: np.ndarray | None = None) -> RasterType:
"""
Copy the Raster object in memory
:param new_array: New array to use for the copied Raster
:return:
"""
if new_array is not None:
data = new_array
else:
data = self.data
cp = self.from_array(data=data, transform=self.transform, crs=self.crs, nodata=self.nodata)
return cp
@property
def __array_interface__(self) -> dict[str, Any]:
if self._data is None:
self.load()
return self._data.__array_interface__ # type: ignore
def load(self, bands: int | list[int] | None = None, **kwargs: Any) -> None:
r"""
Load specific bands of the dataset, using rasterio.read().
Ensure that self.data.ndim = 3 for ease of use (needed e.g. in show)
:param bands: The band(s) to load. Note that rasterio begins counting at 1, not 0.
\*\*kwargs: any additional arguments to rasterio.io.DatasetReader.read.
Useful ones are:
.. hlist::
* out_shape : to load a subsampled version
* window : to load a cropped version
* resampling : to set the resampling algorithm
"""
if bands is None:
self._data = self.ds.read(masked=self._masked, **kwargs)
bands = self.ds.indexes
else:
self._data = self.ds.read(bands, masked=self._masked, **kwargs)
if type(bands) is int:
bands = bands
# If ndim is 2, expand to 3
if self._data.ndim == 2:
self._data = np.expand_dims(self._data, 0)
self.nbands = self._data.shape[0]
self.is_loaded = True
self.bands = bands
def crop(
self: RasterType,
cropGeom: Raster | Vector | list[float] | tuple[float, ...],
mode: str = "match_pixel",
inplace: bool = True,
) -> RasterType | None:
"""
Crop the Raster to a given extent.
:param cropGeom: Geometry to crop raster to, as either a Raster object, a Vector object, or a list of
coordinates. If cropGeom is a Raster, crop() will crop to the boundary of the raster as returned by
Raster.ds.bounds. If cropGeom is a Vector, crop() will crop to the bounding geometry. If cropGeom is a
list of coordinates, the order is assumed to be [xmin, ymin, xmax, ymax].
:param mode: one of 'match_pixel' (default) or 'match_extent'. 'match_pixel' will preserve the original pixel
resolution, cropping to the extent that most closely aligns with the current coordinates. 'match_extent'
will match the extent exactly, adjusting the pixel resolution to fit the extent.
:param inplace: Update the raster inplace or return copy.
:returns: None if inplace=True and a new Raster if inplace=False
"""
assert mode in [
"match_extent",
"match_pixel",
], "mode must be one of 'match_pixel', 'match_extent'"
if isinstance(cropGeom, (Raster, Vector)):
xmin, ymin, xmax, ymax = cropGeom.bounds
elif isinstance(cropGeom, (list, tuple)):
xmin, ymin, xmax, ymax = cropGeom
else:
raise ValueError("cropGeom must be a Raster, Vector, or list of coordinates.")
meta = copy.copy(self.ds.meta)
if mode == "match_pixel":
crop_bbox = Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)])
crop_img, tfm = rio.mask.mask(self.ds, [crop_bbox], crop=True, all_touched=True)
meta.update(
{
"height": crop_img.shape[1],
"width": crop_img.shape[2],
"transform": tfm,
}
)
else:
window = rio.windows.from_bounds(xmin, ymin, xmax, ymax, transform=self.transform)
new_height = int(window.height)
new_width = int(window.width)
new_tfm = rio.transform.from_bounds(xmin, ymin, xmax, ymax, width=new_width, height=new_height)
if self.is_loaded:
new_img = np.zeros((self.nbands, new_height, new_width), dtype=self.data.dtype)
else:
new_img = np.zeros((self.count, new_height, new_width), dtype=self.data.dtype)
crop_img, tfm = rio.warp.reproject(
self.data,
new_img,
src_transform=self.transform,
dst_transform=new_tfm,
src_crs=self.crs,
dst_crs=self.crs,
)
meta.update({"height": new_height, "width": new_width, "transform": tfm})
if inplace:
self._update(crop_img, meta)
return None
else:
return self.from_array(crop_img, meta["transform"], meta["crs"], meta["nodata"])
def reproject(
self: RasterType,
dst_ref: RasterType | rio.io.Dataset | str | None = None,
dst_crs: CRS | str | None = None,
dst_size: tuple[int, int] | None = None,
dst_bounds: dict[str, float] | rio.coords.BoundingBox | None = None,
dst_res: float | abc.Iterable[float] | None = None,
nodata: int | float | None = None,
dtype: np.dtype | None = None,
resampling: Resampling | str = Resampling.nearest,
silent: bool = False,
n_threads: int = 0,
memory_limit: int = 64,
) -> RasterType:
"""
Reproject raster to a specified grid.
The output grid can either be given by a reference Raster (using `dst_ref`),
or by manually providing the output CRS (`dst_crs`), dimensions (`dst_size`),
resolution (with `dst_size`) and/or bounds (`dst_bounds`).
Any resampling algorithm implemented in rasterio can be used.
Currently: requires image data to have been loaded into memory.
NOT SUITABLE for large datasets yet! This requires work...
To reproject a Raster with different source bounds, first run Raster.crop.
:param dst_ref: a reference raster. If set will use the attributes of this
raster for the output grid. Can be provided as Raster/rasterio data set or as path to the file.
:param crs: Specify the Coordinate Reference System to reproject to. If dst_ref not set, defaults to self.crs.
:param dst_size: Raster size to write to (x, y). Do not use with dst_res.
:param dst_bounds: a BoundingBox object or a dictionary containing\
left, bottom, right, top bounds in the source CRS.
:param dst_res: Pixel size in units of target CRS. Either 1 value or (xres, yres). Do not use with dst_size.
:param nodata: nodata value in reprojected data.
:param resampling: A rasterio Resampling method
:param silent: If True, will not print warning statements
:param n_threads: The number of worker threads. Defaults to (os.cpu_count() - 1).
:param memory_limit: The warp operation memory limit in MB. Larger values may perform better.
:returns: Raster
"""
# Check that either dst_ref or dst_crs is provided
if dst_ref is not None:
if dst_crs is not None:
raise ValueError("Either of `dst_ref` or `dst_crs` must be set. Not both.")
else:
# In case dst_res or dst_size is set, use original CRS
if dst_crs is None:
dst_crs = self.crs
# Case a raster is provided as reference
if dst_ref is not None:
# Check that dst_ref type is either str, Raster or rasterio data set
# Preferably use Raster instance to avoid rasterio data set to remain open. See PR #45
if isinstance(dst_ref, Raster):
ds_ref = dst_ref
elif isinstance(dst_ref, rio.io.MemoryFile) or isinstance(dst_ref, rasterio.io.DatasetReader):
ds_ref = dst_ref
elif isinstance(dst_ref, str):
assert os.path.exists(dst_ref), "Reference raster does not exist"
ds_ref = Raster(dst_ref, load_data=False)
else:
raise ValueError(
"Type of dst_ref not understood, must be path to file (str), Raster or rasterio data set"
)
# Read reprojecting params from ref raster
dst_crs = ds_ref.crs
dst_size = (ds_ref.width, ds_ref.height)
dst_res = None
dst_bounds = ds_ref.bounds
else:
# Determine target CRS
dst_crs = CRS.from_user_input(dst_crs)
# Set output dtype
if dtype is None:
# Warning: this will not work for multiple bands with different dtypes
dtype = self.dtypes[0]
if nodata is None:
nodata = self.nodata
# If no data was set, need to set one by default, in case reprojection is done outside original bounds
# Otherwise, output nodata will be 99999 by default which will not work as expected for uint8 data.
if nodata is None:
if not silent:
warnings.warn("No nodata set, will use 0")
nodata = 0
# Basic reprojection options, needed in all cases.
reproj_kwargs = {
"src_transform": self.transform,
"src_crs": self.crs,
"dst_crs": dst_crs,
"resampling": resampling if isinstance(resampling, Resampling) else _resampling_from_str(resampling),
"dst_nodata": nodata,
}
# If dst_ref is None, check other input arguments
if dst_size is not None and dst_res is not None:
raise ValueError("dst_size and dst_res both specified. Specify only one.")
# Create a BoundingBox if required
if dst_bounds is not None:
if not isinstance(dst_bounds, rio.coords.BoundingBox):
dst_bounds = rio.coords.BoundingBox(
dst_bounds["left"],
dst_bounds["bottom"],
dst_bounds["right"],
dst_bounds["top"],
)
# Determine target raster size/resolution
dst_transform = None
if dst_res is not None:
if dst_bounds is None:
# Let rasterio determine the maximum bounds of the new raster.
reproj_kwargs.update({"dst_resolution": dst_res})
else:
# Bounds specified. First check if xres and yres are different.
if isinstance(dst_res, tuple):
xres = dst_res[0]
yres = dst_res[1]
else:
xres = dst_res
yres = dst_res
# Calculate new raster size which ensures that pixels have
# precisely the resolution specified.
dst_width = np.ceil((dst_bounds.right - dst_bounds.left) / xres)
dst_height = np.ceil(np.abs(dst_bounds.bottom - dst_bounds.top) / yres)
dst_size = (int(dst_width), int(dst_height))
# As a result of precise pixel size, the destination bounds may
# have to be adjusted.
x1 = dst_bounds.left + (xres * dst_width)
y1 = dst_bounds.top - (yres * dst_height)
dst_bounds = rio.coords.BoundingBox(top=dst_bounds.top, left=dst_bounds.left, bottom=y1, right=x1)
# Set output shape (Note: dst_size is (ncol, nrow))
if dst_size is not None:
dst_shape = (self.count, dst_size[1], dst_size[0])
dst_data = np.ones(dst_shape, dtype=dtype)
reproj_kwargs.update({"destination": dst_data})
else:
dst_shape = (self.count, self.height, self.width)
# If dst_bounds is set, will enforce dst_bounds
if dst_bounds is not None:
if dst_size is None:
# Calculate new raster size which ensures that pixels resolution is as close as possible to original
# Raster size is increased by up to one pixel if needed
yres, xres = self.res
dst_width = int(np.ceil((dst_bounds.right - dst_bounds.left) / xres))
dst_height = int(np.ceil(np.abs(dst_bounds.bottom - dst_bounds.top) / yres))
dst_size = (dst_width, dst_height)
# Calculate associated transform
dst_transform = rio.transform.from_bounds(*dst_bounds, width=dst_size[0], height=dst_size[1])
# Specify the output bounds and shape, let rasterio handle the rest
reproj_kwargs.update({"dst_transform": dst_transform})
dst_data = np.ones((dst_size[1], dst_size[0]), dtype=dtype)
reproj_kwargs.update({"destination": dst_data})
# Check that reprojection is actually needed
# Caution, dst_size is (width, height) while shape is (height, width)
if all(
[
(dst_transform == self.transform) or (dst_transform is None),
(dst_crs == self.crs) or (dst_crs is None),
(dst_size == self.shape[::-1]) or (dst_size is None),
(dst_res == self.res) or (dst_res == self.res[0] == self.res[1]) or (dst_res is None),
]
):
if nodata == self.nodata:
if not silent:
warnings.warn("Output projection, bounds and size are identical -> return self (not a copy!)")
return self
else:
warnings.warn("Only nodata is different, running self.set_ndv instead")
dst_r = self.copy()
dst_r.set_ndv(nodata)
return dst_r
# Set the performance keywords
if n_threads == 0:
# Default to cpu count minus one. If the cpu count is undefined, num_threads will be 1
cpu_count = os.cpu_count() or 2
num_threads = cpu_count - 1
else:
num_threads = n_threads
reproj_kwargs.update({"num_threads": num_threads, "warp_mem_limit": memory_limit})
# Currently reprojects all in-memory bands at once.
# This may need to be improved to allow reprojecting from-disk.
# See rio.warp.reproject docstring for more info.
dst_data, dst_transformed = rio.warp.reproject(self.data, **reproj_kwargs)
# Enforce output type
dst_data = dst_data.astype(dtype)
# Check for funny business.
if dst_transform is not None:
assert dst_transform == dst_transformed
# Write results to a new Raster.
dst_r = self.from_array(dst_data, dst_transformed, dst_crs, nodata)
return dst_r
def shift(self, xoff: float, yoff: float) -> None:
"""
Translate the Raster by a given x,y offset.
:param xoff: Translation x offset.
:param yoff: Translation y offset.
"""
# Check that data is loaded, as it is necessary for this method
assert self.is_loaded, "Data must be loaded, use self.load"
meta = self.ds.meta
dx, b, xmin, d, dy, ymax = list(self.transform)[:6]
meta.update({"transform": rio.transform.Affine(dx, b, xmin + xoff, d, dy, ymax + yoff)})
self._update(metadata=meta)
def set_ndv(self, ndv: abc.Iterable[int | float] | int | float, update_array: bool = False) -> None:
"""
Set new nodata values for bands (and possibly update arrays)
:param ndv: nodata values
:param update_array: change the existing nodata in array
"""
if not isinstance(ndv, (abc.Iterable, int, float, np.integer, np.floating)):
raise ValueError("Type of ndv not understood, must be list or float or int")
elif (isinstance(ndv, (int, float, np.integer, np.floating))) and self.count > 1:
print("Several raster band: using nodata value for all bands")
ndv = [ndv] * self.count
elif isinstance(ndv, abc.Iterable) and self.count == 1:
print("Only one raster band: using first nodata value provided")
ndv = list(ndv)[0]
meta = self.ds.meta
imgdata = self.data
pre_ndv = self.nodata
meta.update({"nodata": ndv})
if update_array and pre_ndv is not None:
# nodata values are specific to each band
# let's do a loop then
if self.count == 1:
if np.ma.isMaskedArray(imgdata):
imgdata.data[imgdata.mask] = ndv # type: ignore
else:
ind = imgdata[:] == pre_ndv