From 5c165bf563e32611d5d0f29e44a6fd40d7864ca3 Mon Sep 17 00:00:00 2001
From: ahmadmustafaanis <ahmadanis5050@gmail.com>
Date: Tue, 10 Aug 2021 22:43:49 +0500
Subject: [PATCH 1/6] no cache option

---
 utils/datasets.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/utils/datasets.py b/utils/datasets.py
index 1c780cdbac4b..315e7c957a3f 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -361,7 +361,7 @@ def img2label_paths(img_paths):
 
 class LoadImagesAndLabels(Dataset):  # for training/testing
     def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
+                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', read_data_from_cache=True):
         self.img_size = img_size
         self.augment = augment
         self.hyp = hyp
@@ -397,12 +397,18 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
         # Check cache
         self.label_files = img2label_paths(self.img_files)  # labels
         cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
-        try:
-            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
-            assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files)
-        except:
+
+        if read_data_from_cache: #if false, data would be read from scratch even if cache exists
+            try:
+                cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
+                assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files)
+            except:
+                print("No Cache Exists, Reading from Disc instead")
+                cache, exists = self.cache_labels(cache_path, prefix), False  # cache
+        else:
             cache, exists = self.cache_labels(cache_path, prefix), False  # cache
 
+
         # Display cache
         nf, nm, ne, nc, n = cache.pop('results')  # found, missing, empty, corrupted, total
         if exists:

From 2525e78cfdc0a83774a915e1cf89f862c6bf93f7 Mon Sep 17 00:00:00 2001
From: ahmadmustafaanis <ahmadanis5050@gmail.com>
Date: Tue, 10 Aug 2021 23:05:52 +0500
Subject: [PATCH 2/6] no cache option

---
 train.py          | 8 +++++---
 utils/datasets.py | 4 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/train.py b/train.py
index 23f4971b1758..11632355aca4 100644
--- a/train.py
+++ b/train.py
@@ -55,9 +55,9 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
           device,
           callbacks=Callbacks()
           ):
-    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, read_data_from_cache = \
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
-        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
+        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.read_data_from_cache
 
     # Directories
     w = save_dir / 'weights'  # weights dir
@@ -203,7 +203,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
     train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
                                               hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=RANK,
                                               workers=workers, image_weights=opt.image_weights, quad=opt.quad,
-                                              prefix=colorstr('train: '))
+                                              prefix=colorstr('train: '), read_data_from_cache=read_data_from_cache)
     mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
     nb = len(train_loader)  # number of batches
     assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
@@ -452,6 +452,8 @@ def parse_opt(known=False):
     parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
     parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
     parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
+    parser.add_argument('--cache', action='store_true', help='Read data from Cache if exists. Default=True')
+
     opt = parser.parse_known_args()[0] if known else parser.parse_args()
     return opt
 
diff --git a/utils/datasets.py b/utils/datasets.py
index 315e7c957a3f..499abd030cb6 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -89,7 +89,7 @@ def exif_transpose(image):
 
 
 def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
-                      rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix=''):
+                      rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', read_data_from_cache=True):
     # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
     with torch_distributed_zero_first(rank):
         dataset = LoadImagesAndLabels(path, imgsz, batch_size,
@@ -101,7 +101,7 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non
                                       stride=int(stride),
                                       pad=pad,
                                       image_weights=image_weights,
-                                      prefix=prefix)
+                                      prefix=prefix, read_data_from_cache=read_data_from_cache)
 
     batch_size = min(batch_size, len(dataset))
     nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers])  # number of workers

From 83bbce71e52fb2a747ae2e4b59409155a56d79dd Mon Sep 17 00:00:00 2001
From: ahmadmustafaanis <ahmadanis5050@gmail.com>
Date: Wed, 11 Aug 2021 11:50:31 +0500
Subject: [PATCH 3/6] bit change

---
 train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/train.py b/train.py
index 11632355aca4..997d540097ff 100644
--- a/train.py
+++ b/train.py
@@ -452,7 +452,7 @@ def parse_opt(known=False):
     parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
     parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
     parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
-    parser.add_argument('--cache', action='store_true', help='Read data from Cache if exists. Default=True')
+    parser.add_argument('--cache', default=False, action='store_true', help='Read data from Cache if exists. Default=True')
 
     opt = parser.parse_known_args()[0] if known else parser.parse_args()
     return opt

From 46d82a3295b71a69a0da04365ee496c887e65234 Mon Sep 17 00:00:00 2001
From: ahmadmustafaanis <ahmadanis5050@gmail.com>
Date: Wed, 11 Aug 2021 15:49:56 +0500
Subject: [PATCH 4/6] changed to 0,1 instead of True False

---
 train.py          | 2 +-
 utils/datasets.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/train.py b/train.py
index 997d540097ff..f5fa2bfae20e 100644
--- a/train.py
+++ b/train.py
@@ -452,7 +452,7 @@ def parse_opt(known=False):
     parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
     parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
     parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
-    parser.add_argument('--cache', default=False, action='store_true', help='Read data from Cache if exists. Default=True')
+    parser.add_argument('--cache', default=1, type=int, help='Read data from Cache if exists. Default=1, Change to 0 for reading data from disc instead of cache')
 
     opt = parser.parse_known_args()[0] if known else parser.parse_args()
     return opt
diff --git a/utils/datasets.py b/utils/datasets.py
index 499abd030cb6..30009126c4e3 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -89,7 +89,7 @@ def exif_transpose(image):
 
 
 def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
-                      rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', read_data_from_cache=True):
+                      rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', read_data_from_cache=1):
     # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
     with torch_distributed_zero_first(rank):
         dataset = LoadImagesAndLabels(path, imgsz, batch_size,
@@ -361,7 +361,7 @@ def img2label_paths(img_paths):
 
 class LoadImagesAndLabels(Dataset):  # for training/testing
     def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', read_data_from_cache=True):
+                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', read_data_from_cache=1):
         self.img_size = img_size
         self.augment = augment
         self.hyp = hyp
@@ -398,7 +398,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
         self.label_files = img2label_paths(self.img_files)  # labels
         cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
 
-        if read_data_from_cache: #if false, data would be read from scratch even if cache exists
+        if read_data_from_cache==1: #if false, data would be read from scratch even if cache exists
             try:
                 cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
                 assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files)

From 27bad8d71000c8a9d6e55f059b75736d9629dbf7 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Fri, 13 Aug 2021 13:23:56 +0200
Subject: [PATCH 5/6] Update train.py

---
 train.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/train.py b/train.py
index f5fa2bfae20e..24152f1a1198 100644
--- a/train.py
+++ b/train.py
@@ -55,9 +55,9 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
           device,
           callbacks=Callbacks()
           ):
-    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, read_data_from_cache = \
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
-        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.read_data_from_cache
+        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
 
     # Directories
     w = save_dir / 'weights'  # weights dir
@@ -203,8 +203,8 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
     train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
                                               hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=RANK,
                                               workers=workers, image_weights=opt.image_weights, quad=opt.quad,
-                                              prefix=colorstr('train: '), read_data_from_cache=read_data_from_cache)
-    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
+                                              prefix=colorstr('train: '))
+    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
     nb = len(train_loader)  # number of batches
     assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
 
@@ -452,8 +452,6 @@ def parse_opt(known=False):
     parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
     parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
     parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
-    parser.add_argument('--cache', default=1, type=int, help='Read data from Cache if exists. Default=1, Change to 0 for reading data from disc instead of cache')
-
     opt = parser.parse_known_args()[0] if known else parser.parse_args()
     return opt
 

From 752da74468beac489fdd95e144dd10e670270746 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Fri, 13 Aug 2021 13:24:16 +0200
Subject: [PATCH 6/6] Update datasets.py

---
 utils/datasets.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/utils/datasets.py b/utils/datasets.py
index 30009126c4e3..1c780cdbac4b 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -89,7 +89,7 @@ def exif_transpose(image):
 
 
 def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
-                      rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', read_data_from_cache=1):
+                      rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix=''):
     # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
     with torch_distributed_zero_first(rank):
         dataset = LoadImagesAndLabels(path, imgsz, batch_size,
@@ -101,7 +101,7 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non
                                       stride=int(stride),
                                       pad=pad,
                                       image_weights=image_weights,
-                                      prefix=prefix, read_data_from_cache=read_data_from_cache)
+                                      prefix=prefix)
 
     batch_size = min(batch_size, len(dataset))
     nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers])  # number of workers
@@ -361,7 +361,7 @@ def img2label_paths(img_paths):
 
 class LoadImagesAndLabels(Dataset):  # for training/testing
     def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', read_data_from_cache=1):
+                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
         self.img_size = img_size
         self.augment = augment
         self.hyp = hyp
@@ -397,18 +397,12 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
         # Check cache
         self.label_files = img2label_paths(self.img_files)  # labels
         cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
-
-        if read_data_from_cache==1: #if false, data would be read from scratch even if cache exists
-            try:
-                cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
-                assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files)
-            except:
-                print("No Cache Exists, Reading from Disc instead")
-                cache, exists = self.cache_labels(cache_path, prefix), False  # cache
-        else:
+        try:
+            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
+            assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files)
+        except:
             cache, exists = self.cache_labels(cache_path, prefix), False  # cache
 
-
         # Display cache
         nf, nm, ne, nc, n = cache.pop('results')  # found, missing, empty, corrupted, total
         if exists: