diff --git a/README.md b/README.md
index 8a014a1..fde6093 100644
--- a/README.md
+++ b/README.md
@@ -40,64 +40,53 @@ cd YOLO-NAS
```
conda create -n yolo-nas python=3.9 -y
conda activate yolo-nas
-conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch -y
+pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113
+pip install pytorch-quantization==2.1.2 --extra-index-url https://pypi.ngc.nvidia.com
pip install super-gradients==3.1.3
+pip install urllib3==1.25.9
```
#### OR
```
pip3 install -r requirements.txt
```
### 🎒 Prepare Dataset
-Your custom dataset should be in **COCO** data format.
+Your custom dataset should be in **COCO JSON** data format.
+To convert **YOLO (.txt) / PASCAL VOC (.XML)** format to **COCO JSON**.
+Using JSON Converter https://github.com/naseemap47/autoAnnoter#10-yolo_to_jsonpy
**COCO Data Format**:
```
├── Dataset
+| ├── annotations
+│ │ ├── train.json
+│ │ ├── valid.json
+│ │ ├── test.json
│ ├── train
-│ │ ├── images
-│ │ │ ├── 1.jpg
-│ │ │ ├── abc.png
-| | | ├── ....
-│ │ ├── labels
-│ │ │ ├── 1.txt
-│ │ │ ├── abc.txt
-| | | ├── ....
+│ │ ├── 1.jpg
+│ │ ├── abc.png
+| | ├── ....
│ ├── val
-│ │ ├── images
-│ │ │ ├── 2.jpg
-│ │ │ ├── fram.png
-| | | ├── ....
-│ │ ├── labels
-│ │ │ ├── 2.txt
-│ │ │ ├── fram.txt
-| | | ├── ....
+│ │ ├── 2.jpg
+│ │ ├── fram.png
+| | ├── ....
│ ├── test
-│ │ ├── images
-│ │ │ ├── img23.jpeg
-│ │ │ ├── 50.jpg
-| | | ├── ....
-│ │ ├── labels
-│ │ │ ├── img23.txt
-│ │ │ ├── 50.txt
-| | | ├── ....
+│ │ ├── img23.jpeg
+│ │ ├── 50.jpg
+| | ├── ....
```
To training custom model using your custom data.
You need to create [data.yaml](https://github.com/naseemap47/YOLO-NAS/blob/master/data.yaml)
Example:
```
-names:
-- class1
-- class2
-- class3
-Dir: '/home/user/my_data' # path to your custom data
+Dir: 'Data'
images:
- test: test/images
- train: train/images
- val: valid/images
+ test: test
+ train: train
+ val: valid
labels:
- test: test/labels
- train: train/labels
- val: valid/labels
+ test: annotations/test.json
+ train: annotations/train.json
+ val: annotations/valid.json
```
## 🤖 Train
diff --git a/data.yaml b/data.yaml
index 836a396..8fee902 100644
--- a/data.yaml
+++ b/data.yaml
@@ -1,13 +1,9 @@
-names:
-- Paper
-- Rock
-- Scissors
Dir: 'Data'
images:
- test: test/images
- train: train/images
- val: valid/images
+ test: test
+ train: train
+ val: valid
labels:
- test: test/labels
- train: train/labels
- val: valid/labels
+ test: annotations/test.json
+ train: annotations/train.json
+ val: annotations/valid.json
diff --git a/train.py b/train.py
index 27f43b0..aa7d967 100644
--- a/train.py
+++ b/train.py
@@ -1,16 +1,23 @@
-from super_gradients.training.dataloaders.dataloaders import coco_detection_yolo_format_train, coco_detection_yolo_format_val
from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback
+from super_gradients.training.datasets.detection_datasets.coco_format_detection import COCOFormatDetectionDataset
+from super_gradients.training.transforms.transforms import DetectionMosaic, DetectionRandomAffine, DetectionHSV, \
+ DetectionHorizontalFlip, DetectionPaddedRescale, DetectionStandardize, DetectionTargetsFormatTransform
+from super_gradients.training.datasets.datasets_utils import worker_init_reset_seed
+from super_gradients.training.utils.detection_utils import CrowdDetectionCollateFN
from super_gradients.training.metrics import DetectionMetrics_050
from super_gradients.training.losses import PPYoloELoss
+from super_gradients.training import dataloaders
from super_gradients.training import Trainer
from super_gradients.training import models
import argparse
import torch
import time
import yaml
+import json
import os
+
if __name__ == '__main__':
ap = argparse.ArgumentParser()
@@ -52,15 +59,15 @@
help="LR Mode")
ap.add_argument("--cosine_final_lr_ratio", type=float, default=0.1,
help="Cosine Final LR Ratio")
- ap.add_argument("--optimizer", type=str, default='Adam',
+ ap.add_argument("--optimizer", type=str, default='AdamW',
help="Optimizer")
ap.add_argument("--weight_decay", type=float, default=0.0001,
help="Weight Decay")
args = vars(ap.parse_args())
+ # Start Time
s_time = time.time()
-
if args['name'] is None:
name = 'train'
else:
@@ -77,8 +84,8 @@
break
else:
n += 1
-
print(f"[INFO] Checkpoints saved in \033[1m{os.path.join('runs', name)}\033[0m")
+
# Training on GPU or CPU
if args['cpu']:
print('[INFO] Training on \033[1mCPU\033[0m')
@@ -90,67 +97,100 @@
print(f'[INFO] Training on GPU: \033[1m{torch.cuda.get_device_name()}\033[0m')
trainer = Trainer(experiment_name=name, ckpt_root_dir='runs')
+ # Load Path Params
yaml_params = yaml.safe_load(open(args['data'], 'r'))
+ with open(os.path.join(yaml_params['Dir'], yaml_params['labels']['train'])) as f:
+ no_class = len(json.load(f)['categories'])
+ f.close()
+ print(f"\033[1m[INFO] Number of Classes: {no_class}\033[0m")
+
+ # Reain Dataset
+ trainset = COCOFormatDetectionDataset(data_dir=yaml_params['Dir'],
+ images_dir=yaml_params['images']['train'],
+ json_annotation_file=yaml_params['labels']['train'],
+ input_dim=(args['size'], args['size']),
+ ignore_empty_annotations=False,
+ transforms=[
+ DetectionMosaic(prob=1., input_dim=(args['size'], args['size'])),
+ DetectionRandomAffine(degrees=0., scales=(0.5, 1.5), shear=0.,
+ target_size=(args['size'], args['size']),
+ filter_box_candidates=False, border_value=128),
+ DetectionHSV(prob=1., hgain=5, vgain=30, sgain=30),
+ DetectionHorizontalFlip(prob=0.5),
+ DetectionPaddedRescale(input_dim=(args['size'], args['size']), max_targets=300),
+ DetectionStandardize(max_value=255),
+ DetectionTargetsFormatTransform(max_targets=300, input_dim=(args['size'], args['size']),
+ output_format="LABEL_CXCYWH")
+ ])
+ train_loader = dataloaders.get(dataset=trainset, dataloader_params={
+ "shuffle": True,
+ "batch_size": args['batch'],
+ "drop_last": False,
+ "pin_memory": True,
+ "collate_fn": CrowdDetectionCollateFN(),
+ "worker_init_fn": worker_init_reset_seed,
+ "min_samples": 512
+ })
+ # Valid Data
+ valset = COCOFormatDetectionDataset(data_dir=yaml_params['Dir'],
+ images_dir=yaml_params['images']['val'],
+ json_annotation_file=yaml_params['labels']['val'],
+ input_dim=(args['size'], args['size']),
+ ignore_empty_annotations=False,
+ transforms=[
+ DetectionPaddedRescale(input_dim=(args['size'], args['size']), max_targets=300),
+ DetectionStandardize(max_value=255),
+ DetectionTargetsFormatTransform(max_targets=300, input_dim=(args['size'], args['size']),
+ output_format="LABEL_CXCYWH")
+ ])
+ valid_loader = dataloaders.get(dataset=valset, dataloader_params={
+ "shuffle": False,
+ "batch_size": int(args['batch']*2),
+ "num_workers": args['worker'],
+ "drop_last": False,
+ "pin_memory": True,
+ "collate_fn": CrowdDetectionCollateFN(),
+ "worker_init_fn": worker_init_reset_seed
+ })
- train_data = coco_detection_yolo_format_train(
- dataset_params={
- 'data_dir': yaml_params['Dir'],
- 'images_dir': yaml_params['images']['train'],
- 'labels_dir': yaml_params['labels']['train'],
- 'classes': yaml_params['names'],
- 'input_dim': (args['size'], args['size'])
- },
- dataloader_params={
- 'batch_size': args['batch'],
- 'num_workers': args['worker']
- }
- )
-
- val_data = coco_detection_yolo_format_val(
- dataset_params={
- 'data_dir': yaml_params['Dir'],
- 'images_dir': yaml_params['images']['val'],
- 'labels_dir': yaml_params['labels']['val'],
- 'classes': yaml_params['names'],
- 'input_dim': (args['size'], args['size'])
- },
- dataloader_params={
- 'batch_size':args['batch'],
- 'num_workers': args['worker']
- }
- )
-
+ # Test Data
if 'test' in (yaml_params['images'].keys() or yaml_params['labels'].keys()):
- test_data = coco_detection_yolo_format_val(
- dataset_params={
- 'data_dir': yaml_params['Dir'],
- 'images_dir': yaml_params['images']['test'],
- 'labels_dir': yaml_params['labels']['test'],
- 'classes': yaml_params['names'],
- 'input_dim': (args['size'], args['size'])
- },
- dataloader_params={
- 'batch_size':args['batch'],
- 'num_workers': args['worker']
- }
- )
+ testset = COCOFormatDetectionDataset(data_dir=yaml_params['Dir'],
+ images_dir=yaml_params['images']['test'],
+ json_annotation_file=yaml_params['labels']['test'],
+ input_dim=(args['size'], args['size']),
+ ignore_empty_annotations=False,
+ transforms=[
+ DetectionPaddedRescale(input_dim=(args['size'], args['size']), max_targets=300),
+ DetectionStandardize(max_value=255),
+ DetectionTargetsFormatTransform(max_targets=300, input_dim=(args['size'], args['size']),
+ output_format="LABEL_CXCYWH")
+ ])
+ test_loader = dataloaders.get(dataset=testset, dataloader_params={
+ "shuffle": False,
+ "batch_size": int(args['batch']*2),
+ "num_workers": args['worker'],
+ "drop_last": False,
+ "pin_memory": True,
+ "collate_fn": CrowdDetectionCollateFN(),
+ "worker_init_fn": worker_init_reset_seed
+ })
# To Resume Training
if args['resume']:
model = models.get(
args['model'],
- num_classes=len(yaml_params['names']),
+ num_classes=no_class,
checkpoint_path=args["weight"]
)
else:
model = models.get(
args['model'],
- num_classes=len(yaml_params['names']),
+ num_classes=no_class,
pretrained_weights=args["weight"]
)
train_params = {
- # ENABLING SILENT MODE
'silent_mode': False,
"average_best_models":True,
"warmup_mode": args['warmup_mode'],
@@ -168,15 +208,14 @@
"mixed_precision": True,
"loss": PPYoloELoss(
use_static_assigner=False,
- num_classes=len(yaml_params['names']),
+ num_classes=no_class,
reg_max=16
),
"valid_metrics_list": [
DetectionMetrics_050(
score_thres=0.1,
top_k_predictions=300,
- # NOTE: num_classes needs to be defined here
- num_cls=len(yaml_params['names']),
+ num_cls=no_class,
normalize_targets=True,
post_prediction_callback=PPYoloEPostPredictionCallback(
score_threshold=0.01,
@@ -196,24 +235,25 @@
# Print Training Params
print('[INFO] Training Params:\n', train_params)
+ # Model Training...
trainer.train(
model=model,
training_params=train_params,
- train_loader=train_data,
- valid_loader=val_data
+ train_loader=train_loader,
+ valid_loader=valid_loader
)
# Load best model
best_model = models.get(args['model'],
- num_classes=len(yaml_params['names']),
+ num_classes=no_class,
checkpoint_path=os.path.join('runs', name, 'ckpt_best.pth'))
# Evaluating on Val Dataset
eval_model = trainer.test(model=best_model,
- test_loader=val_data,
+ test_loader=valid_loader,
test_metrics_list=DetectionMetrics_050(score_thres=0.1,
top_k_predictions=300,
- num_cls=len(yaml_params['names']),
+ num_cls=no_class,
normalize_targets=True,
post_prediction_callback=PPYoloEPostPredictionCallback(score_threshold=0.01,
nms_top_k=1000,
@@ -227,10 +267,10 @@
# Evaluating on Test Dataset
if 'test' in (yaml_params['images'].keys() or yaml_params['labels'].keys()):
test_result = trainer.test(model=best_model,
- test_loader=test_data,
+ test_loader=test_loader,
test_metrics_list=DetectionMetrics_050(score_thres=0.1,
top_k_predictions=300,
- num_cls=len(yaml_params['names']),
+ num_cls=no_class,
normalize_targets=True,
post_prediction_callback=PPYoloEPostPredictionCallback(score_threshold=0.01,
nms_top_k=1000,
@@ -241,4 +281,3 @@
for i in test_result:
print(f"{i}: {float(test_result[i])}")
print(f'[INFO] Training Completed in \033[1m{(time.time()-s_time)/3600} Hours\033[0m')
-
\ No newline at end of file