GeoscienceAustralia · robbibt · Oct 9, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
@@ -13,6 +13,7 @@ More complex case study workflows demonstrating how DEA can be used to address r
    Coastal_erosion.ipynb
    Estimate_climate_driver_influence_on_rainfall.ipynb
    Intertidal_elevation.ipynb
+   Intertidal_exposure.ipynb
    Mapping_inundation_using_stream_gauges.ipynb
    Radar_water_detection.ipynb
    Scalable_machine_learning/0_README

@@ -337,7 +337,7 @@ def test_model_tides_ensemble():
     assert modelled_tides_df.columns.tolist() == ["tide_model", "tide_m"]
     assert set(modelled_tides_df.tide_model) == set(models)
     assert np.allclose(
-        modelled_tides_df.tide_m,
+        modelled_tides_df.tide_m.values,
         [
             -2.819,
             -1.850,
@@ -352,7 +352,7 @@ def test_model_tides_ensemble():
             -0.071,
             -0.0158,
         ],
-        rtol=0.02,
+        atol=0.10,
     )
 
     # One-to-one mode
@@ -420,7 +420,7 @@ def test_model_tides_ensemble():
 
     # Check values are expected
     assert np.allclose(
-        modelled_tides_df.ensemble, [-2.819, 0.0730, -1.850, -0.069], rtol=0.01
+        modelled_tides_df.ensemble.values, [-2.819, 0.0730, -1.850, -0.069], atol=0.10
     )
 
     # Wide mode, custom functions
@@ -558,9 +558,9 @@ def test_pixel_tides(satellite_ds, measured_tides_ds, resolution):
             longitude=x_coords, latitude=y_coords, time="2020-02-14", method="nearest"
         )
 
-    # Test if extracted tides match expected results (to within ~3 cm)
+    # Test if extracted tides match expected results (to within ~12 cm)
     expected_tides = [-1.82249, -1.977088, -1.973618, -2.071242]
-    assert np.allclose(extracted_tides.values, expected_tides, atol=0.03)
+    assert np.allclose(extracted_tides.values, expected_tides, atol=0.12)
 
 
 def test_pixel_tides_quantile(satellite_ds):
@@ -603,7 +603,7 @@ def test_pixel_tides_quantile(satellite_ds):
             longitude=x_coords, latitude=y_coords, method="nearest"
         )
 
-    # Test if extracted tides match expected results (to within ~3 cm)
+    # Test if extracted tides match expected results (to within ~10 cm)
     expected_tides = np.array(
         [
             [-1.83, -1.98, -1.98, -2.07],
@@ -614,7 +614,7 @@ def test_pixel_tides_quantile(satellite_ds):
             [1.58, 1.61, 1.62, 1.64],
         ]
     )
-    assert np.allclose(extracted_tides.values, expected_tides, atol=0.03)
+    assert np.allclose(extracted_tides.values, expected_tides, atol=0.10)
 
 
 # Run test with quantile calculation off and on
@@ -793,7 +793,7 @@ def test_tidal_stats(satellite_ds, modelled_freq):
     # Calculate tidal stats
     tidal_stats_df = tidal_stats(satellite_ds, modelled_freq=modelled_freq)
 
-    # Compare outputs to expected results (within 5% or 0.05 m)
+    # Compare outputs to expected results (within 10% or 0.10 m)
     expected_results = pd.Series(
         {
             "tidepost_lat": -18.001,
@@ -811,7 +811,7 @@ def test_tidal_stats(satellite_ds, modelled_freq):
             "high_tide_offset": 0.308,
         }
     )
-    assert np.allclose(tidal_stats_df, expected_results, atol=0.05)
+    assert np.allclose(tidal_stats_df, expected_results, atol=0.10)
 
 
 def test_glint_angle(angle_metadata_ds):

@@ -226,6 +226,7 @@ def predict_xr(
     chunk_size=None,
     persist=False,
     proba=False,
+    max_proba=True,
     clean=False,
     return_input=False,
 ):
@@ -255,6 +256,11 @@ def predict_xr(
         distributed RAM.
     proba : bool
         If True, predict probabilities
+    max_proba : bool
+        If True, the probabilities array will be flattened to contain
+        only the probabiltiy for the "Predictions" class. If False, 
+        the "Probabilities" object will be an array of prediction
+        probaiblities for each classes
     clean : bool
         If True, remove Infs and NaNs from input and output arrays
     return_input : bool
@@ -282,7 +288,7 @@ def predict_xr(
             input_xr.chunks["y"][0]
         )
 
-    def _predict_func(model, input_xr, persist, proba, clean, return_input):
+    def _predict_func(model, input_xr, persist, proba, max_proba, clean, return_input):
         x, y, crs = input_xr.x, input_xr.y, input_xr.geobox.crs
 
         input_data = []
@@ -330,18 +336,35 @@ def _predict_func(model, input_xr, persist, proba, clean, return_input):
             print("   probabilities...")
             out_proba = model.predict_proba(input_data_flattened)
 
-            # convert to %
-            out_proba = da.max(out_proba, axis=1) * 100.0
+            # return either one band with the max probability, or the whole probability array
+            if max_proba == True:
+                print("  returning single probability band.")
+                out_proba = da.max(out_proba, axis=1) * 100.0
+                out_proba = out_proba.reshape(len(y), len(x))
+                out_proba = xr.DataArray(
+                    out_proba, coords={"x": x, "y": y}, dims=["y", "x"]
+                )
+                output_xr["Probabilities"] = out_proba
+            else:
+                print("  returning class probability array.")
+                out_proba = out_proba * 100.0
+                class_names = model.classes_  # Get the unique class names from the fitted classifier
+
+                # Loop through each class (band)
+                probabilities_dataset = xr.Dataset()
+                for i, class_name in enumerate(class_names):
+                    reshaped_band = out_proba[:, i].reshape(len(y), len(x))
+                    reshaped_da = xr.DataArray(
+                        reshaped_band, coords={"x": x, "y": y}, dims=["y", "x"]
+                    )
+                    probabilities_dataset[f"prob_{class_name}"] = reshaped_da
 
+                # merge in the probabilities
+                output_xr = xr.merge([output_xr, probabilities_dataset])
+
             if clean == True:
                 out_proba = da.where(da.isfinite(out_proba), out_proba, 0)
-
-            out_proba = out_proba.reshape(len(y), len(x))
-
-            out_proba = xr.DataArray(
-                out_proba, coords={"x": x, "y": y}, dims=["y", "x"]
-            )
-            output_xr["Probabilities"] = out_proba
+
 
         if return_input == True:
             print("   input features...")
@@ -391,12 +414,12 @@ def _predict_func(model, input_xr, persist, proba, clean, return_input):
         model = ParallelPostFit(model)
         with joblib.parallel_backend("dask"):
             output_xr = _predict_func(
-                model, input_xr, persist, proba, clean, return_input
+                model, input_xr, persist, proba, max_proba, clean, return_input
             )
 
     else:
         output_xr = _predict_func(
-            model, input_xr, persist, proba, clean, return_input
+            model, input_xr, persist, proba, max_proba, clean, return_input
         ).compute()
 
     return output_xr