diff --git a/CHANGELOG.md b/CHANGELOG.md index c650373fab..92ca4b6736 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,19 +5,56 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). -## [0.1.0] - 02/02/2021 +## [Unreleased] - 2021-MM-DD ### Added -- Added flash_notebook examples ([#9](https://github.com/PyTorchLightning/lightning-flash/pull/9)) -- Added `strategy` to `trainer.finetune` with `NoFreeze`, `Freeze`, `FreezeUnfreeze`, `UnfreezeMilestones` Callbacks([#39](https://github.com/PyTorchLightning/lightning-flash/pull/39)) -- Added `SummarizationData`, `SummarizationTask` and `TranslationData`, `TranslationTask` ([#37](https://github.com/PyTorchLightning/lightning-flash/pull/37)) -- Added `ImageEmbedder` ([#36](https://github.com/PyTorchLightning/lightning-flash/pull/36)) + + +### Changed + + + +### Fixed + + + +### Removed + + + + +## [0.2.0] - 2021-02-12 + +### Added + +- Added `ObjectDetector` Task ([#56](https://github.com/PyTorchLightning/lightning-flash/pull/56)) +- Added TabNet for tabular classification ([#101](https://github.com/PyTorchLightning/lightning-flash/pull/#101)) +- Added support for more backbones(mobilnet, vgg, densenet, resnext) ([#45](https://github.com/PyTorchLightning/lightning-flash/pull/45)) +- Added backbones for image embedding model ([#63](https://github.com/PyTorchLightning/lightning-flash/pull/63)) +- Added SWAV and SimCLR models to `imageclassifier` + backbone reorg ([#68](https://github.com/PyTorchLightning/lightning-flash/pull/68)) ### Changed +- Applied transform in `FilePathDataset` ([#97](https://github.com/PyTorchLightning/lightning-flash/pull/97)) +- Moved classification integration from vision root to folder ([#86](https://github.com/PyTorchLightning/lightning-flash/pull/86)) ### Fixed +- Unfreeze default number of workers in datamodule ([#57](https://github.com/PyTorchLightning/lightning-flash/pull/57)) +- Fixed wrong label in `FilePathDataset` ([#94](https://github.com/PyTorchLightning/lightning-flash/pull/94)) ### Removed + +- Removed `densenet161` duplicate in `DENSENET_MODELS` ([#76](https://github.com/PyTorchLightning/lightning-flash/pull/76)) +- Removed redundant `num_features` arg from Classification model ([#88](https://github.com/PyTorchLightning/lightning-flash/pull/88)) + + +## [0.1.0] - 2021-02-02 + +### Added + +- Added flash_notebook examples ([#9](https://github.com/PyTorchLightning/lightning-flash/pull/9)) +- Added `strategy` to `trainer.finetune` with `NoFreeze`, `Freeze`, `FreezeUnfreeze`, `UnfreezeMilestones` Callbacks([#39](https://github.com/PyTorchLightning/lightning-flash/pull/39)) +- Added `SummarizationData`, `SummarizationTask` and `TranslationData`, `TranslationTask` ([#37](https://github.com/PyTorchLightning/lightning-flash/pull/37)) +- Added `ImageEmbedder` ([#36](https://github.com/PyTorchLightning/lightning-flash/pull/36)) diff --git a/docs/source/conf.py b/docs/source/conf.py index 181e779497..b895278749 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -44,6 +44,7 @@ 'sphinx.ext.intersphinx', # 'sphinx.ext.todo', # 'sphinx.ext.coverage', + 'sphinx.ext.viewcode', 'sphinx.ext.autosummary', 'sphinx.ext.napoleon', 'sphinx.ext.imgmath', diff --git a/docs/source/index.rst b/docs/source/index.rst index d9eaa37540..e84c2aee51 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -26,6 +26,7 @@ Lightning Flash reference/text_classification reference/tabular_classification reference/translation + reference/object_detection .. toctree:: :maxdepth: 1 diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst index 5216730768..f15e7b4199 100644 --- a/docs/source/reference/image_embedder.rst +++ b/docs/source/reference/image_embedder.rst @@ -29,7 +29,7 @@ Use the :class:`~flash.vision.ImageEmbedder` pretrained model for inference on a embedder = ImageEmbedder(backbone="resnet18") # 2. Perform inference on an image file - embeddings = model.predict("path/to/image.png") + embeddings = embedder.predict("path/to/image.png") print(embeddings) Or on a random image tensor @@ -91,13 +91,12 @@ By default, we use the encoder from `SwAV .. note:: - When changing the backbone, make sure you pass in the same backbone to the Task and the Data object! + When changing the backbone, make sure you pass in the same backbone to the Task! .. code-block:: python # 1. organize the data data = ImageClassificationData.from_folders( - backbone="resnet34", train_folder="data/hymenoptera_data/train/", valid_folder="data/hymenoptera_data/val/" ) diff --git a/docs/source/reference/object_detection.rst b/docs/source/reference/object_detection.rst new file mode 100644 index 0000000000..6b9ae98d06 --- /dev/null +++ b/docs/source/reference/object_detection.rst @@ -0,0 +1,132 @@ + +.. _object_detection: + +################ +Object Detection +################ + +******** +The task +******** + +The object detection task identifies instances of objects of a certain class within an image. + +------ + +********* +Inference +********* + +The :class:`~flash.vision.ObjectDetector` is already pre-trained on `COCO train2017 `_, a dataset with `91 classes `_ (123,287 images, 886,284 instances). + +.. code-block:: + + annotation{ + "id": int, + "image_id": int, + "category_id": int, + "segmentation": RLE or [polygon], + "area": float, + "bbox": [x,y,width,height], + "iscrowd": 0 or 1, + } + + categories[{ + "id": int, + "name": str, + "supercategory": str, + }] + +Use the :class:`~flash.vision.ObjectDetector` pretrained model for inference on any image tensor or image path using :func:`~flash.vision.ObjectDetector.predict`: + +.. code-block:: python + + from flash.vision import ObjectDetector + + # 1. Load the model + detector = ObjectDetector() + + # 2. Perform inference on an image file + predictions = detector.predict("path/to/image.png") + print(predictions) + +Or on a random image tensor + +.. code-block:: python + + # Perform inference on a random image tensor + import torch + images = torch.rand(32, 3, 1080, 1920) + predictions = detector.predict(images) + print(predictions) + +For more advanced inference options, see :ref:`predictions`. + +------ + +********** +Finetuning +********** + +To tailor the object detector to your dataset, you would need to have it in `COCO Format `_, and then finetune the model. + +.. code-block:: python + + import flash + from flash.core.data import download_data + from flash.vision import ObjectDetectionData, ObjectDetector + + # 1. Download the data + # Dataset Credit: https://www.kaggle.com/ultralytics/coco128 + download_data("https://github.com/zhiqwang/yolov5-rt-stack/releases/download/v0.3.0/coco128.zip", "data/") + + # 2. Load the Data + datamodule = ObjectDetectionData.from_coco( + train_folder="data/coco128/images/train2017/", + train_ann_file="data/coco128/annotations/instances_train2017.json", + batch_size=2 + ) + + # 3. Build the model + model = ObjectDetector(num_classes=datamodule.num_classes) + + # 4. Create the trainer. Run thrice on data + trainer = flash.Trainer(max_epochs=3) + + # 5. Finetune the model + trainer.finetune(model, datamodule) + + # 6. Save it! + trainer.save_checkpoint("object_detection_model.pt") + +------ + +***** +Model +***** + +By default, we use the `Faster R-CNN `_ model with a ResNet-50 FPN backbone. The inputs could be images of different sizes. The model behaves differently for training and evaluation. For training, it expects both the input tensors as well as the targets. And during evaluation, it expects only the input tensors and returns predictions for each image. The predictions are a list of boxes, labels and scores. + +------ + +************* +API reference +************* + +.. _object_detector: + +ObjectDetector +-------------- + +.. autoclass:: flash.vision.ObjectDetector + :members: + :exclude-members: forward + +.. _object_detection_data: + +ObjectDetectionData +------------------- + +.. autoclass:: flash.vision.ObjectDetectionData + +.. automethod:: flash.vision.ObjectDetectionData.from_coco diff --git a/flash/__init__.py b/flash/__init__.py index 0fab82aec9..76589297c7 100644 --- a/flash/__init__.py +++ b/flash/__init__.py @@ -14,7 +14,7 @@ """Root package info.""" import os -__version__ = "0.2.0rc1" +__version__ = "0.2.1-dev" __author__ = "PyTorchLightning et al." __author_email__ = "name@pytorchlightning.ai" __license__ = 'Apache-2.0' @@ -56,5 +56,13 @@ from flash.core.trainer import Trainer __all__ = [ - "Task", "ClassificationTask", "DataModule", "vision", "text", "tabular", "data", "utils", "download_data" + "Task", + "ClassificationTask", + "DataModule", + "vision", + "text", + "tabular", + "data", + "utils", + "download_data", ] diff --git a/flash/tabular/classification/model.py b/flash/tabular/classification/model.py index 989ac160b8..166a35a1d5 100644 --- a/flash/tabular/classification/model.py +++ b/flash/tabular/classification/model.py @@ -11,12 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, List, Optional, Tuple, Type, Union +from typing import Any, Callable, List, Optional, Tuple, Type import torch from pytorch_lightning.metrics import Metric from pytorch_tabnet.tab_network import TabNet -from torch import nn from torch.nn import functional as F from flash.core.classification import ClassificationTask diff --git a/flash/vision/detection/model.py b/flash/vision/detection/model.py index 5c19b341e7..dead995515 100644 --- a/flash/vision/detection/model.py +++ b/flash/vision/detection/model.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Mapping, Optional, Sequence, Type, Union +from typing import Any, Callable, Mapping, Sequence, Type, Union import torch import torchvision @@ -20,7 +20,6 @@ from torchvision.ops import box_iou from flash.core import Task -from flash.core.data import DataPipeline from flash.vision.detection.data import ObjectDetectionDataPipeline from flash.vision.detection.finetuning import ObjectDetectionFineTuning @@ -29,8 +28,7 @@ def _evaluate_iou(target, pred): """ - Evaluate intersection over union (IOU) for target from dataset and output prediction - from model + Evaluate intersection over union (IOU) for target from dataset and output prediction from model """ if pred["boxes"].shape[0] == 0: # no box detected, 0 IOU @@ -42,17 +40,16 @@ class ObjectDetector(Task): """Image detection task Ref: Lightning Bolts https://github.com/PyTorchLightning/pytorch-lightning-bolts + Args: num_classes: the number of classes for detection, including background model: either a string of :attr`_models` or a custom nn.Module. Defaults to 'fasterrcnn_resnet50_fpn'. loss: the function(s) to update the model with. Has no effect for torchvision detection models. metrics: The provided metrics. All metrics here will be logged to progress bar and the respective logger. - Defaults to None. optimizer: The optimizer to use for training. Can either be the actual class or the class name. - Defaults to Adam. pretrained: Whether the model from torchvision should be loaded with it's pretrained weights. - Has no effect for custom models. Defaults to True. + Has no effect for custom models. learning_rate: The learning rate to use for training """ @@ -89,8 +86,7 @@ def __init__( ) def training_step(self, batch, batch_idx) -> Any: - """The training step. - Overrides Task.training_step + """The training step. Overrides ``Task.training_step`` """ images, targets = batch targets = [{k: v for k, v in t.items()} for t in targets] diff --git a/flash/vision/embedding/image_embedder_model.py b/flash/vision/embedding/image_embedder_model.py index 28036f0d02..e388cffd96 100644 --- a/flash/vision/embedding/image_embedder_model.py +++ b/flash/vision/embedding/image_embedder_model.py @@ -14,7 +14,6 @@ from typing import Any, Callable, Mapping, Optional, Sequence, Type, Union import torch -import torchvision from pytorch_lightning.metrics import Accuracy from pytorch_lightning.utilities.distributed import rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException diff --git a/tests/vision/classification/test_model.py b/tests/vision/classification/test_model.py index c419a22a96..6570df4c2e 100644 --- a/tests/vision/classification/test_model.py +++ b/tests/vision/classification/test_model.py @@ -13,7 +13,6 @@ # limitations under the License. import pytest import torch -from pytorch_lightning.utilities.exceptions import MisconfigurationException from flash import Trainer from flash.vision import ImageClassifier diff --git a/tests/vision/detection/test_data.py b/tests/vision/detection/test_data.py index bb40e56c16..bf4ba2a170 100644 --- a/tests/vision/detection/test_data.py +++ b/tests/vision/detection/test_data.py @@ -3,16 +3,12 @@ from pathlib import Path import pytest -import torch from PIL import Image from pytorch_lightning.utilities import _module_available -from torchvision import transforms as T from flash.vision.detection.data import ObjectDetectionData _COCO_AVAILABLE = _module_available("pycocotools") -if _COCO_AVAILABLE: - from pycocotools.coco import COCO def _create_dummy_coco_json(dummy_json_path): diff --git a/tests/vision/detection/test_data_model_integration.py b/tests/vision/detection/test_data_model_integration.py index de3c56fffa..ac814c7616 100644 --- a/tests/vision/detection/test_data_model_integration.py +++ b/tests/vision/detection/test_data_model_integration.py @@ -23,8 +23,6 @@ from tests.vision.detection.test_data import _create_synth_coco_dataset _COCO_AVAILABLE = _module_available("pycocotools") -if _COCO_AVAILABLE: - from pycocotools.coco import COCO @pytest.mark.skipif(not _COCO_AVAILABLE, reason="pycocotools is not installed for testing")