Landan/deeplabv3 ade20k example (#1593)

Adds an example to train Deeplabv3+ on ADE20k using argparse only. In addition, the local ADE20k class required some unnecessary tweaks to the dataset, so these were removed.
mosaicml · Oct 14, 2022 · 9404bc0 · 9404bc0
1 parent 07428c4
commit 9404bc0
Show file tree

Hide file tree

Showing 6 changed files with 412 additions and 10 deletions.
diff --git a/composer/datasets/ade20k.py b/composer/datasets/ade20k.py
@@ -336,7 +336,7 @@ class ADE20k(Dataset):
 
     Args:
         datadir (str): the path to the ADE20k folder.
-        split (str): the dataset split to use, either 'train', 'val', or 'test'. Default: ``'train'``.
+        split (str): the dataset split to use, either 'training', 'validation', or 'test'. Default: ``'training'``.
         both_transforms (torch.nn.Module): transformations to apply to the image and target simultaneously.
             Default: ``None``.
         image_transforms (torch.nn.Module): transformations to apply to the image only. Default: ``None``.
@@ -345,7 +345,7 @@ class ADE20k(Dataset):
 
     def __init__(self,
                  datadir: str,
-                 split: str = 'train',
+                 split: str = 'training',
                  both_transforms: Optional[torch.nn.Module] = None,
                  image_transforms: Optional[torch.nn.Module] = None,
                  target_transforms: Optional[torch.nn.Module] = None):
@@ -363,8 +363,8 @@ def __init__(self,
             raise FileNotFoundError(f'datadir path does not exist: {self.datadir}')
 
         # Check split value
-        if self.split not in ['train', 'val', 'test']:
-            raise ValueError(f'split must be one of [`train`, `val`, `test`] but is: {self.split}')
+        if self.split not in ['training', 'validation', 'test']:
+            raise ValueError(f'split must be one of [`training`, `validation`, `test`] but is: {self.split}')
 
         self.image_dir = os.path.join(self.datadir, 'images', self.split)
         if not os.path.exists(self.image_dir):
@@ -376,7 +376,7 @@ def __init__(self,
         self.image_files = [f for f in self.image_files if f[:3] == 'ADE']
 
         # Remove grayscale samples
-        if self.split == 'train':
+        if self.split == 'training':
             corrupted_samples = ['00003020', '00001701', '00013508', '00008455']
             for sample in corrupted_samples:
                 sample_file = f'ADE_train_{sample}.jpg'
@@ -390,7 +390,7 @@ def __getitem__(self, index):
         image = Image.open(image_path)
 
         # Load annotation target if using either train or val splits
-        if self.split in ['train', 'val']:
+        if self.split in ['training', 'validation']:
             target_path = os.path.join(self.datadir, 'annotations', self.split, image_file.split('.')[0] + '.png')
             target = Image.open(target_path)
 
@@ -403,7 +403,7 @@ def __getitem__(self, index):
         if self.image_transforms:
             image = self.image_transforms(image)
 
-        if self.split in ['train', 'val']:
+        if self.split in ['training', 'validation']:
             return image, target  # type: ignore
         else:
             return image

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -139,7 +139,9 @@ def _get_commit_sha() -> str:
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'examples/imagenet/README.md']
+exclude_patterns = [
+    '_build', 'Thumbs.db', '.DS_Store', 'examples/imagenet/README.md', 'examples/segmentation/README.md'
+]
 
 napoleon_custom_sections = [('Returns', 'params_style')]
 

diff --git a/examples/imagenet/README.md b/examples/imagenet/README.md
@@ -3,7 +3,7 @@
 These examples illustrate how to train models on ImageNet-1k using Composer.
 
 Prerequisites:
-- Install Compose: `pip install mosaicml`
+- Install Composer: `pip install mosaicml`
 - ImageNet already downloaded on the system used for training
 
 ## ResNet

diff --git a/examples/imagenet/train_resnet_imagenet1k.py b/examples/imagenet/train_resnet_imagenet1k.py
@@ -100,7 +100,6 @@
                     type=Time.from_timestring,
                     default='1ep')
 
-# Local storage checkpointing
 args = parser.parse_args()
 
 

diff --git a/examples/segmentation/README.md b/examples/segmentation/README.md
@@ -0,0 +1,41 @@
+# Semantic Segmentation Example
+
+This example illustrates how to train a semantic segmentation model in composer.
+
+## Installation
+
+First, install [Composer](https://github.com/mosaicml/composer) with `pip install mosaicml`. Additionally, our models are pulled from [MMsegmentation](https://github.com/open-mmlab/mmsegmentation), so follow the [MMcv install instructions](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) (which is dependent on your CUDA and PyTorch versions), then install MMsegmentation with `pip install mmsegmentation`.
+
+Alternatively, we have publicly available Docker images to reproduce our results. Use `mosaicml/pytorch_vision:1.12.1_cu116-python3.9-ubuntu20.04` for running on GPUs or `mosaicml/pytorch_vision:1.12.1_cpu-python3.9-ubuntu20.04` for running on CPUs.
+
+## DeepLabv3+ on ADE20k
+
+The `train_deeplabv3_ade20k.py` script trains a DeepLabv3+ model with either a ResNet-50 or ResNet-101 backbone on the ADE20k semantic segmentation benchmark. To download ADE20k locally (~1 GB), specify the `--download` option when running the script, then the dataset will be downloaded data directory path i.e. the first argument.
+
+We designed the script to be hackable, so try our recipes on your own models and datsets!
+### Example configurations
+
+<!--pytest.mark.skip-->
+
+```bash
+# Downloads ADE20k and does single GPU/CPU training depending on torch.cuda.is_available():
+python train_deeplabv3_ade20k.py /path/to/ade20k --download
+
+# Log experiments to Weights and Biases:
+python train_deeplabv3_ade20k.py /path/to/ade20k --wandb_logger --wandb_entity my_username --wandb_project my_project --run_name my_run_name
+
+# Single/Multi GPU training (infers the number of GPUs available):
+composer train_deeplabv3_ade20k.py /path/to/ade20k
+
+# Manually specify number of GPUs to use:
+composer -n $N_GPUS train_deeplabv3_ade20k.py /path/to/ade20k
+
+# Mild DeepLabv3+ recipe for fastest training to 45.6 mIoU:
+composer train_deeplabv3_ade20k.py /path/to/ade20k/ --recipe_name mild --max_duration 25ep
+
+# Medium DeepLabv3+ recipe for highest mIoU (49.15) with similar training time as baseline:
+composer train_deeplabv3_ade20k.py /path/to/ade20k/ --recipe_name medium --max_duration 90ep
+
+# Hot DeepLabv3+ recipe for highest mIoU (49.83) with a long training schedule:
+composer train_deeplabv3_ade20k.py /path/to/ade20k --recipe_name hot --max_duration 256ep
+```