Skip to content

Commit

Permalink
[CodeCamp2023-xxx] Add new configs of RTMDet (#10840)
Browse files Browse the repository at this point in the history
  • Loading branch information
RangeKing authored Aug 28, 2023
1 parent 8a5b70f commit 6f5971e
Show file tree
Hide file tree
Showing 10 changed files with 463 additions and 6 deletions.
2 changes: 1 addition & 1 deletion demo/inference_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
" </div>\n",
" <div>&nbsp;</div>\n",
"\n",
"<a href=\"https://colab.research.google.com/github/open-mmlab/mmdetection/blob/dev-3.x/demo/MMDet_Tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
"<a href=\"https://colab.research.google.com/github/open-mmlab/mmdetection/blob/dev-3.x/demo/inference_demo.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
"\n",
"[![PyPI](https://img.shields.io/pypi/v/mmdet)](https://pypi.org/project/mmdet)\n",
"[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmdetection.readthedocs.io/en/latest/)\n",
Expand Down
12 changes: 7 additions & 5 deletions mmdet/apis/det_inferencer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import copy
import os.path as osp
import warnings
from typing import Dict, Iterable, List, Optional, Sequence, Union
from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union

import mmcv
import mmengine
import numpy as np
import torch.nn as nn
from mmcv.transforms import LoadImageFromFile
from mmengine.dataset import Compose
from mmengine.fileio import (get_file_backend, isdir, join_path,
list_dir_or_file)
Expand Down Expand Up @@ -165,21 +166,22 @@ def _init_pipeline(self, cfg: ConfigType) -> Compose:
meta_key for meta_key in pipeline_cfg[-1]['meta_keys']
if meta_key != 'img_id')

load_img_idx = self._get_transform_idx(pipeline_cfg,
'LoadImageFromFile')
load_img_idx = self._get_transform_idx(
pipeline_cfg, ('LoadImageFromFile', LoadImageFromFile))
if load_img_idx == -1:
raise ValueError(
'LoadImageFromFile is not found in the test pipeline')
pipeline_cfg[load_img_idx]['type'] = 'mmdet.InferencerLoader'
return Compose(pipeline_cfg)

def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int:
def _get_transform_idx(self, pipeline_cfg: ConfigType,
name: Union[str, Tuple[str, type]]) -> int:
"""Returns the index of the transform in a pipeline.
If the transform is not found, returns -1.
"""
for i, transform in enumerate(pipeline_cfg):
if transform['type'] == name:
if transform['type'] in name:
return i
return -1

Expand Down
134 changes: 134 additions & 0 deletions mmdet/configs/rtmdet/rtmdet_ins_l_8xb32_300e_coco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Copyright (c) OpenMMLab. All rights reserved.

# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0

from mmengine.config import read_base

with read_base():
from .rtmdet_l_8xb32_300e_coco import *

from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmengine.hooks.ema_hook import EMAHook
from torch.nn.modules.activation import SiLU

from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import (FilterAnnotations,
LoadAnnotations)
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
from mmdet.engine.hooks.pipeline_switch_hook import PipelineSwitchHook
from mmdet.models.dense_heads.rtmdet_ins_head import RTMDetInsSepBNHead
from mmdet.models.layers.ema import ExpMomentumEMA
from mmdet.models.losses.dice_loss import DiceLoss
from mmdet.models.losses.gfocal_loss import QualityFocalLoss
from mmdet.models.losses.iou_loss import GIoULoss
from mmdet.models.task_modules.coders.distance_point_bbox_coder import \
DistancePointBBoxCoder
from mmdet.models.task_modules.prior_generators.point_generator import \
MlvlPointGenerator

model.merge(
dict(
bbox_head=dict(
_delete_=True,
type=RTMDetInsSepBNHead,
num_classes=80,
in_channels=256,
stacked_convs=2,
share_conv=True,
pred_kernel_size=1,
feat_channels=256,
act_cfg=dict(type=SiLU, inplace=True),
norm_cfg=dict(type='SyncBN', requires_grad=True),
anchor_generator=dict(
type=MlvlPointGenerator, offset=0, strides=[8, 16, 32]),
bbox_coder=dict(type=DistancePointBBoxCoder),
loss_cls=dict(
type=QualityFocalLoss,
use_sigmoid=True,
beta=2.0,
loss_weight=1.0),
loss_bbox=dict(type=GIoULoss, loss_weight=2.0),
loss_mask=dict(
type=DiceLoss, loss_weight=2.0, eps=5e-6, reduction='mean')),
test_cfg=dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=100,
mask_thr_binary=0.5),
))

train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(type=CachedMosaic, img_scale=(640, 640), pad_val=114.0),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.1, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=20,
pad_val=(114, 114, 114)),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=PackDetInputs)
]

train_dataloader.update(
dict(pin_memory=True, dataset=dict(pipeline=train_pipeline)))

train_pipeline_stage2 = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(
type=RandomResize,
scale=(640, 640),
ratio_range=(0.1, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=PackDetInputs)
]
custom_hooks = [
dict(
type=EMAHook,
ema_type=ExpMomentumEMA,
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type=PipelineSwitchHook,
switch_epoch=280,
switch_pipeline=train_pipeline_stage2)
]

val_evaluator.update(dict(metric=['bbox', 'segm']))
test_evaluator = val_evaluator
17 changes: 17 additions & 0 deletions mmdet/configs/rtmdet/rtmdet_ins_m_8xb32_300e_coco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) OpenMMLab. All rights reserved.

# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0

from mmengine.config import read_base

with read_base():
from .rtmdet_ins_l_8xb32_300e_coco import *

model.update(
dict(
backbone=dict(deepen_factor=0.67, widen_factor=0.75),
neck=dict(
in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2),
bbox_head=dict(in_channels=192, feat_channels=192)))
101 changes: 101 additions & 0 deletions mmdet/configs/rtmdet/rtmdet_ins_s_8xb32_300e_coco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Copyright (c) OpenMMLab. All rights reserved.

# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0

from mmengine.config import read_base

with read_base():
from .rtmdet_ins_l_8xb32_300e_coco import *

from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize
from mmengine.hooks.ema_hook import EMAHook

from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import (FilterAnnotations,
LoadAnnotations)
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)
from mmdet.engine.hooks.pipeline_switch_hook import PipelineSwitchHook
from mmdet.models.layers.ema import ExpMomentumEMA

checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
model.update(
dict(
backbone=dict(
deepen_factor=0.33,
widen_factor=0.5,
init_cfg=dict(
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
neck=dict(
in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1),
bbox_head=dict(in_channels=128, feat_channels=128)))

train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(type=CachedMosaic, img_scale=(640, 640), pad_val=114.0),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=20,
pad_val=(114, 114, 114)),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=PackDetInputs)
]

train_pipeline_stage2 = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(
type=RandomResize,
scale=(640, 640),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(
type=RandomCrop,
crop_size=(640, 640),
recompute_bbox=True,
allow_negative_crop=True),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type=PackDetInputs)
]

train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))

custom_hooks = [
dict(
type=EMAHook,
ema_type=ExpMomentumEMA,
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type=PipelineSwitchHook,
switch_epoch=280,
switch_pipeline=train_pipeline_stage2)
]
67 changes: 67 additions & 0 deletions mmdet/configs/rtmdet/rtmdet_ins_tiny_8xb32_300e_coco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright (c) OpenMMLab. All rights reserved.

# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
# mmcv >= 2.0.1
# mmengine >= 0.8.0

from mmengine.config import read_base

with read_base():
from .rtmdet_ins_s_8xb32_300e_coco import *

from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize

from mmdet.datasets.transforms.formatting import PackDetInputs
from mmdet.datasets.transforms.loading import (FilterAnnotations,
LoadAnnotations)
from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
Pad, RandomCrop, RandomFlip,
Resize, YOLOXHSVRandomAug)

checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa

model.update(
dict(
backbone=dict(
deepen_factor=0.167,
widen_factor=0.375,
init_cfg=dict(
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
neck=dict(
in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1),
bbox_head=dict(in_channels=96, feat_channels=96)))

train_pipeline = [
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
dict(
type=CachedMosaic,
img_scale=(640, 640),
pad_val=114.0,
max_cached_images=20,
random_pop=False),
dict(
type=RandomResize,
scale=(1280, 1280),
ratio_range=(0.5, 2.0),
resize_type=Resize,
keep_ratio=True),
dict(type=RandomCrop, crop_size=(640, 640)),
dict(type=YOLOXHSVRandomAug),
dict(type=RandomFlip, prob=0.5),
dict(type=Pad, size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type=CachedMixUp,
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=10,
random_pop=False,
pad_val=(114, 114, 114),
prob=0.5),
dict(type=FilterAnnotations, min_gt_bbox_wh=(1, 1)),
dict(type=PackDetInputs)
]

train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
Loading

0 comments on commit 6f5971e

Please sign in to comment.