Merge pull request #51 from Project-AgML/dev

Version 0.5.1
Project-AgML · Jul 14, 2023 · 0c380be · 0c380be
2 parents f09cabd + 8213539
commit 0c380be
Show file tree

Hide file tree

Showing 16 changed files with 152 additions and 12 deletions.
diff --git a/.github/workflows/update-datasets.yml b/.github/workflows/update-datasets.yml
@@ -7,6 +7,8 @@ on:
   push:
     paths:
       - agml/_assets/public_datasources.json
+    branches:
+      - dev
 
 permissions: write-all
 

diff --git a/README.md b/README.md
@@ -137,6 +137,7 @@ You're now ready to use AgML for training your own models!
 [riseholme_strawberry_classification_2021](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/riseholme_strawberry_classification_2021.md) | Image Classification | 3520 |
 [ghai_broccoli_detection](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/ghai_broccoli_detection.md) | Object Detection | 500 |
 [bean_synthetic_earlygrowth_aerial](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/bean_synthetic_earlygrowth_aerial.md) | Semantic Segmentation | 2500 |
+[ghai_strawberry_fruit_detection](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/ghai_strawberry_fruit_detection.md) | Object Detection | 500 |
 
 ## Usage Information
 
@@ -167,4 +168,4 @@ a bug or feature that you would like to see implemented, please don't hesitate t
 See the [contributing guidelines](/CONTRIBUTING.md) for more information.
 
 ## Funding
-This project is partly funded by the [National AI Institute for Food Systems (AIFS)](https://aifs.ucdavis.edu
+This project is partly funded by the [National AI Institute for Food Systems (AIFS)](https://aifs.ucdavis.ed
diff --git a/agml/__init__.py b/agml/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = '0.5.0'
+__version__ = '0.5.1'
 __all__ = ['data', 'synthetic', 'backend', 'viz', 'io']
 
 

diff --git a/agml/_assets/public_datasources.json b/agml/_assets/public_datasources.json
@@ -1230,5 +1230,45 @@
                 0.08992248773574829
             ]
         }
+    },
+    "ghai_strawberry_fruit_detection": {
+        "classes": {
+            "1": "Bud",
+            "2": "Calyx",
+            "3": "Detached Fruit",
+            "4": "Flower",
+            "5": "Large green",
+            "6": "Leaf",
+            "7": "Ripe fruit",
+            "8": "Small Green",
+            "9": "Stem",
+            "10": "Unripe fruit"
+        },
+        "ml_task": "object_detection",
+        "ag_task": "crop_detection",
+        "location": {
+            "continent": "north_america",
+            "country": "usa"
+        },
+        "sensor_modality": "rgb",
+        "real_synthetic": "real",
+        "platform": "handheld/ground",
+        "input_data_format": "jpg",
+        "annotation_format": "coco_json",
+        "n_images": "500",
+        "docs_url": "https://github.com/AxisAg/GHAIDatasets/blob/main/datasets/strawberry.md",
+        "external_image_sources": [],
+        "stats": {
+            "mean": [
+                0.49159616231918335,
+                0.5238277316093445,
+                0.4485996663570404
+            ],
+            "std": [
+                0.18163496255874634,
+                0.16137710213661194,
+                0.18042609095573425
+            ]
+        }
     }
 }
diff --git a/agml/_assets/shape_info.pickle b/agml/_assets/shape_info.pickle
diff --git a/agml/_assets/source_citations.json b/agml/_assets/source_citations.json
@@ -138,5 +138,9 @@
     "bean_synthetic_earlygrowth_aerial": {
         "license": "MIT",
         "citation": "@ARTICLE{10.3389/fpls.2019.01185,\n  \nAUTHOR={Bailey, Brian N.},   \n\t \nTITLE={Helios: A Scalable 3D Plant and Environmental Biophysical Modeling Framework},      \n\t\nJOURNAL={Frontiers in Plant Science},      \n\t\nVOLUME={10},      \n\t\nYEAR={2019},      \n\t  \nURL={https://www.frontiersin.org/article/10.3389/fpls.2019.01185},       \n\t\nDOI={10.3389/fpls.2019.01185},      \n\t\nISSN={1664-462X},   \n   \nABSTRACT={This article presents an overview of Helios, a new three-dimensional (3D) plant and environmental modeling framework. Helios is a model coupling framework designed to provide maximum flexibility in integrating and running arbitrary 3D environmental system models. Users interact with Helios through a well-documented open-source C++ API. Version 1.0 comes with model plug-ins for radiation transport, the surface energy balance, stomatal conductance, photosynthesis, solar position, and procedural tree generation. Additional plug-ins are also available for visualizing model geometry and data and for processing and integrating LiDAR scanning data. Many of the plug-ins perform calculations on the graphics processing unit, which allows for efficient simulation of very large domains with high detail. An example modeling study is presented in which leaf-level heterogeneity in water usage and photosynthesis of an orchard is examined to understand how this leaf-scale variability contributes to whole-tree and -canopy fluxes.}\n}"
+    },
+    "ghai_strawberry_fruit_detection": {
+        "license": "CC BY-SA 4.0",
+        "citation": ""
     }
 }
diff --git a/agml/_internal/preprocess.py b/agml/_internal/preprocess.py
@@ -1016,6 +1016,19 @@ def ghai_broccoli_detection(self, dataset_name):
         shutil.move(os.path.join(original_dir, 'coco.json'),
                     os.path.join(processed_dir, 'annotations.json'))
 
+    def ghai_strawberry_fruit_detection(self, dataset_name):
+        # Create processed directories
+        original_dir = os.path.join(self.data_original_dir, dataset_name)
+        processed_dir = os.path.join(self.data_processed_dir, dataset_name)
+        processed_image_dir = os.path.join(processed_dir, 'images')
+        os.makedirs(processed_image_dir, exist_ok = True)
+
+        # Move images
+        for image in tqdm(glob.glob(os.path.join(original_dir, '*.jpg'))):
+            shutil.move(image, processed_image_dir)
+        shutil.move(os.path.join(original_dir, 'coco.json'),
+                    os.path.join(processed_dir, 'annotations.json'))
+
 
 if __name__ == '__main__':
     # Initialize program arguments.

diff --git a/agml/data/loader.py b/agml/data/loader.py
@@ -15,6 +15,7 @@
 import os
 import json
 import copy
+import glob
 from typing import Union
 from collections.abc import Sequence
 from decimal import getcontext, Decimal
@@ -296,6 +297,38 @@ def helios(cls, name, dataset_path = None):
         information which is provided in the `.metadata` directory of the Helios
         generated dataset, allowing it to contain potentially even more info.
         """
+        # Instantiate from a list of datasets.
+        if isinstance(name, (list, tuple)):
+            if dataset_path is None:
+                dataset_path = [None] * len(name)
+            elif isinstance(dataset_path, str):
+                dataset_path = [dataset_path] * len(name)
+            else:
+                if not len(dataset_path) == len(name):
+                    raise ValueError("The number of dataset paths must be "
+                                     "the same as the number of dataset names.")
+            datasets = [cls.helios(n, dataset_path = dp)
+                        for n, dp in zip(name, dataset_path)]
+            return cls.merge(*datasets)
+
+        # Instantiate from a wildcard pattern.
+        if isinstance(name, str) and '*' in name:
+            if dataset_path is None:
+                dataset_path = os.path.abspath(synthetic_data_save_path())
+            elif not os.path.exists(dataset_path):
+                raise NotADirectoryError(
+                    f"Existing directory '{dataset_path}' for dataset of name "
+                    f"{name} not found, pass a custom path if you want to use "
+                    f"a custom dataset path for the dataset.")
+
+            # Get the list of datasets.
+            possible_datasets = glob.glob(os.path.join(dataset_path, name))
+            if len(possible_datasets) == 0:
+                raise ValueError(f"No datasets found for pattern: {name}.")
+            datasets = [cls.helios(os.path.basename(p), dataset_path = dataset_path)
+                        for p in sorted(possible_datasets)]
+            return cls.merge(*datasets)
+
         # Locate the path to the dataset, using synthetic semantics.
         if dataset_path is None:
             dataset_path = os.path.abspath(

diff --git a/agml/io.py b/agml/io.py
@@ -15,6 +15,8 @@
 import random
 import inspect
 
+import cv2
+
 from agml.utils.io import (
     get_file_list as _get_file_list,
     get_dir_list as _get_dir_list,
@@ -112,5 +114,15 @@ def random_file(path, **kwargs):
     return random.choice(get_file_list(path, **kwargs))
 
 
+def read_image(path, **kwargs):
+    """Reads an image from a file.
+
+    Args:
+        path (str): The path to the image file.
+        **kwargs: Keyword arguments to pass to `cv2.imread`.
 
+    Returns:
+        numpy.ndarray: The image.
+    """
+    return cv2.imread(path, **kwargs)
 
diff --git a/agml/models/segmentation.py b/agml/models/segmentation.py
@@ -32,7 +32,7 @@
 from agml.data.public import source
 from agml.utils.general import resolve_list_value
 from agml.utils.image import resolve_image_size
-from agml.viz.masks import show_image_with_overlaid_mask, show_image_and_mask
+from agml.viz.masks import show_image_and_overlaid_mask, show_image_and_mask
 
 # This is last since `agml.models.base` will check for PyTorch Lightning,
 # and PyTorch Lightning automatically installed torchmetrics with it.
@@ -250,7 +250,7 @@ def show_prediction(self, image, overlay = False, **kwargs):
         image = self._expand_input_images(image)[0]
         mask = self.predict(image, **kwargs)
         if overlay:
-            return show_image_with_overlaid_mask(image, mask, **kwargs)
+            return show_image_and_overlaid_mask(image, mask, **kwargs)
         return show_image_and_mask(image, mask, **kwargs)
 
     def load_benchmark(self, dataset):

diff --git a/agml/synthetic/generator.py b/agml/synthetic/generator.py
@@ -155,7 +155,7 @@ def _convert_options_to_xml(self):
 
         # The `scan` tag is used for LiDAR generation. This must be added later
         # because there can be multiple origins and thus multiple `scan` tags.
-        if self._generation_options.simulation_type == SimulationType.LiDAR:
+        if self._generation_options.simulation_type == SimulationType.LiDAR or self._generation_options.simulation_type == SimulationType.Both:
             scan_tags = []
             if isinstance(parameters['lidar']['origin'][0], list):
                 for origin in parameters['lidar']['origin']:
@@ -173,15 +173,15 @@ def _convert_options_to_xml(self):
             self._canopy + "Parameters": parameters['canopy'],
             'Ground': parameters['Ground']}
         xml_params = {'canopygenerator': canopy_parameters}
-        if self._generation_options.simulation_type == SimulationType.RGB:
+        if self._generation_options.simulation_type == SimulationType.RGB or self._generation_options.simulation_type == SimulationType.Both:
             xml_params[''] = parameters['camera']
 
         # Convert all of the parameters to XML format.
         tree = ET.parse(io.StringIO(dict2xml({'helios': xml_params})))
         root = tree.getroot()
 
         # Add the `scan` tags if necessary for LiDAR generation.
-        if self._generation_options.simulation_type == SimulationType.LiDAR:
+        if self._generation_options.simulation_type == SimulationType.LiDAR or self._generation_options.simulation_type == SimulationType.Both:
             for scan_tag in scan_tags: # noqa
                 scan_tag_contents = ET.parse(
                     io.StringIO(dict2xml({'scan': scan_tag}))).getroot()

diff --git a/agml/synthetic/options.py b/agml/synthetic/options.py
@@ -35,6 +35,7 @@ class SimulationType(Enum):
     """The simulation render (RGB vs. LiDAR) that is generated."""
     RGB: str = "rgb"
     LiDAR: str = "lidar"
+    Both: str = "rgb lidar"
 
 
 NumberOrMaybeList = TypeVar('NumberOrMaybeList', Number, List[Number])

diff --git a/agml/synthetic/synthetic_data_generation/generate.cpp b/agml/synthetic/synthetic_data_generation/generate.cpp
@@ -12,7 +12,7 @@ struct SyntheticAnnotationConfig {
 public:
     int num_images;
     vector<string> annotation_type;
-    string simulation_type;
+    vector<string> simulation_type;
     vector<string> labels;
     string xml_path;
     string output_path;
@@ -53,7 +53,14 @@ void SyntheticAnnotationConfig::load_config(const char* path) {
             }
             this->annotation_type.push_back(line);
         } else if (i == 2) {
-            this->simulation_type = line;
+            string delimeter = " "; size_t pos;
+            vector<string> simulation_type;
+            while ((pos = line.find(' ')) != string::npos)
+            {
+                this -> simulation_type.push_back(line.substr(0,pos));
+                line.erase(0, pos + delimeter.length());
+            }
+            this->simulation_type.push_back(line);
         } else if (i == 3) {
             string delimeter = " "; size_t pos;
             vector<string> labels;
@@ -150,7 +157,7 @@ int main(int argc, char** argv) {
         SyntheticAnnotation annotation(&context);
 
         // Choose either the LiDAR or RGB image simulation.
-        if (config.simulation_type == "lidar") {
+        if (!config.simulation_type.empty() && config.simulation_type[1] == "lidar") {
             // Get the UUID of all the elements on the scene
             vector<uint> UUID_trunk = cgen.getTrunkUUIDs();
             vector<uint> UUID_shoot = cgen.getBranchUUIDs();
@@ -190,7 +197,9 @@ int main(int argc, char** argv) {
             string cloud_export = this_image_dir + "/" + string("point_cloud_" + to_string(i) + ".xyz");
             std::cout << "Writing LiDAR Point cloud to " << cloud_export << " " << std::endl;
             lidarcloud.exportPointCloud(cloud_export.c_str());
-        } else {
+        }
+        if (!config.simulation_type.empty() && config.simulation_type[0] == "rgb")
+        {
             if (!config.annotation_type.empty() && config.annotation_type[0] != "none") {
                 // Set the annotation type based on the configuration.
                 vector<string> va = config.annotation_type;
@@ -207,7 +216,7 @@ int main(int argc, char** argv) {
                 // Add labels according to whatever scheme we want.
                 vector<string> vl = config.labels;
                 for (int p = 0; p < cgen.getPlantCount(); p++) { // loop over vines
-                    if (config.simulation_type == "rgb") {
+                    if (!config.simulation_type.empty() && config.simulation_type[0] == "rgb") {
                         if (contains(vl, "trunks")) {
                             annotation.labelPrimitives(cgen.getTrunkUUIDs(p), "trunks");
                         }

diff --git a/agml/viz/boxes.py b/agml/viz/boxes.py
@@ -110,6 +110,8 @@ def annotate_object_detection(image,
                     "either `bbox` or `bboxes` for bounding boxes.")
     if bbox_format is not None:
         bboxes = convert_bbox_format(bboxes, bbox_format)
+    if labels is None:
+        labels = [0] * len(bboxes)
 
     # Run a few final checks in order to ensure data is formatted properly.
     image = format_image(image, mask = False)

diff --git a/docs/datasets/ghai_strawberry_fruit_detection.md b/docs/datasets/ghai_strawberry_fruit_detection.md
@@ -0,0 +1,23 @@
+
+# `ghai_strawberry_fruit_detection`
+
+## Dataset Metadata
+
+| Metadata | Value |
+| --- | --- |
+| **Classes** | Bud, Calyx, Detached Fruit, Flower, Large green, Leaf, Ripe fruit, Small Green, Stem, Unripe fruit |
+| **Machine Learning Task** | object_detection |
+| **Agricultural Task** | crop_detection |
+| **Location** | United States, North America |
+| **Sensor Modality** | RGB |
+| **Real or Synthetic** | real |
+| **Platform** | handheld/ground |
+| **Input Data Format** | JPG |
+| **Annotation Format** | coco_json |
+| **Number of Images** | 500 |
+| **Documentation** | https://github.com/AxisAg/GHAIDatasets/blob/main/datasets/strawberry.md |
+
+
+## Examples
+
+![Example Images for ghai_strawberry_fruit_detection](https://github.com/Project-AgML/AgML/blob/main/docs/sample_images/ghai_strawberry_fruit_detection_examples.png)
diff --git a/docs/sample_images/ghai_strawberry_fruit_detection_examples.png b/docs/sample_images/ghai_strawberry_fruit_detection_examples.png