From 5c165bf563e32611d5d0f29e44a6fd40d7864ca3 Mon Sep 17 00:00:00 2001 From: ahmadmustafaanis Date: Tue, 10 Aug 2021 22:43:49 +0500 Subject: [PATCH 1/6] no cache option --- utils/datasets.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index 1c780cdbac4b..315e7c957a3f 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -361,7 +361,7 @@ def img2label_paths(img_paths): class LoadImagesAndLabels(Dataset): # for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''): + cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', read_data_from_cache=True): self.img_size = img_size self.augment = augment self.hyp = hyp @@ -397,12 +397,18 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r # Check cache self.label_files = img2label_paths(self.img_files) # labels cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') - try: - cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict - assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files) - except: + + if read_data_from_cache: #if false, data would be read from scratch even if cache exists + try: + cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict + assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files) + except: + print("No Cache Exists, Reading from Disc instead") + cache, exists = self.cache_labels(cache_path, prefix), False # cache + else: cache, exists = self.cache_labels(cache_path, prefix), False # cache + # Display cache nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total if exists: From 2525e78cfdc0a83774a915e1cf89f862c6bf93f7 Mon Sep 17 00:00:00 2001 From: ahmadmustafaanis Date: Tue, 10 Aug 2021 23:05:52 +0500 Subject: [PATCH 2/6] no cache option --- train.py | 8 +++++--- utils/datasets.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/train.py b/train.py index 23f4971b1758..11632355aca4 100644 --- a/train.py +++ b/train.py @@ -55,9 +55,9 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary device, callbacks=Callbacks() ): - save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \ + save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, read_data_from_cache = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ - opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze + opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.read_data_from_cache # Directories w = save_dir / 'weights' # weights dir @@ -203,7 +203,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=RANK, workers=workers, image_weights=opt.image_weights, quad=opt.quad, - prefix=colorstr('train: ')) + prefix=colorstr('train: '), read_data_from_cache=read_data_from_cache) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(train_loader) # number of batches assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' @@ -452,6 +452,8 @@ def parse_opt(known=False): parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24') + parser.add_argument('--cache', action='store_true', help='Read data from Cache if exists. Default=True') + opt = parser.parse_known_args()[0] if known else parser.parse_args() return opt diff --git a/utils/datasets.py b/utils/datasets.py index 315e7c957a3f..499abd030cb6 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -89,7 +89,7 @@ def exif_transpose(image): def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, - rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix=''): + rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', read_data_from_cache=True): # Make sure only the first process in DDP process the dataset first, and the following others can use the cache with torch_distributed_zero_first(rank): dataset = LoadImagesAndLabels(path, imgsz, batch_size, @@ -101,7 +101,7 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non stride=int(stride), pad=pad, image_weights=image_weights, - prefix=prefix) + prefix=prefix, read_data_from_cache=read_data_from_cache) batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers From 83bbce71e52fb2a747ae2e4b59409155a56d79dd Mon Sep 17 00:00:00 2001 From: ahmadmustafaanis Date: Wed, 11 Aug 2021 11:50:31 +0500 Subject: [PATCH 3/6] bit change --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 11632355aca4..997d540097ff 100644 --- a/train.py +++ b/train.py @@ -452,7 +452,7 @@ def parse_opt(known=False): parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24') - parser.add_argument('--cache', action='store_true', help='Read data from Cache if exists. Default=True') + parser.add_argument('--cache', default=False, action='store_true', help='Read data from Cache if exists. Default=True') opt = parser.parse_known_args()[0] if known else parser.parse_args() return opt From 46d82a3295b71a69a0da04365ee496c887e65234 Mon Sep 17 00:00:00 2001 From: ahmadmustafaanis Date: Wed, 11 Aug 2021 15:49:56 +0500 Subject: [PATCH 4/6] changed to 0,1 instead of True False --- train.py | 2 +- utils/datasets.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/train.py b/train.py index 997d540097ff..f5fa2bfae20e 100644 --- a/train.py +++ b/train.py @@ -452,7 +452,7 @@ def parse_opt(known=False): parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24') - parser.add_argument('--cache', default=False, action='store_true', help='Read data from Cache if exists. Default=True') + parser.add_argument('--cache', default=1, type=int, help='Read data from Cache if exists. Default=1, Change to 0 for reading data from disc instead of cache') opt = parser.parse_known_args()[0] if known else parser.parse_args() return opt diff --git a/utils/datasets.py b/utils/datasets.py index 499abd030cb6..30009126c4e3 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -89,7 +89,7 @@ def exif_transpose(image): def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, - rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', read_data_from_cache=True): + rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', read_data_from_cache=1): # Make sure only the first process in DDP process the dataset first, and the following others can use the cache with torch_distributed_zero_first(rank): dataset = LoadImagesAndLabels(path, imgsz, batch_size, @@ -361,7 +361,7 @@ def img2label_paths(img_paths): class LoadImagesAndLabels(Dataset): # for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', read_data_from_cache=True): + cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', read_data_from_cache=1): self.img_size = img_size self.augment = augment self.hyp = hyp @@ -398,7 +398,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r self.label_files = img2label_paths(self.img_files) # labels cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') - if read_data_from_cache: #if false, data would be read from scratch even if cache exists + if read_data_from_cache==1: #if false, data would be read from scratch even if cache exists try: cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files) From 27bad8d71000c8a9d6e55f059b75736d9629dbf7 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 13 Aug 2021 13:23:56 +0200 Subject: [PATCH 5/6] Update train.py --- train.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/train.py b/train.py index f5fa2bfae20e..24152f1a1198 100644 --- a/train.py +++ b/train.py @@ -55,9 +55,9 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary device, callbacks=Callbacks() ): - save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, read_data_from_cache = \ + save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ - opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.read_data_from_cache + opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze # Directories w = save_dir / 'weights' # weights dir @@ -203,8 +203,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=RANK, workers=workers, image_weights=opt.image_weights, quad=opt.quad, - prefix=colorstr('train: '), read_data_from_cache=read_data_from_cache) - mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class + prefix=colorstr('train: ')) + mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class nb = len(train_loader) # number of batches assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' @@ -452,8 +452,6 @@ def parse_opt(known=False): parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24') - parser.add_argument('--cache', default=1, type=int, help='Read data from Cache if exists. Default=1, Change to 0 for reading data from disc instead of cache') - opt = parser.parse_known_args()[0] if known else parser.parse_args() return opt From 752da74468beac489fdd95e144dd10e670270746 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 13 Aug 2021 13:24:16 +0200 Subject: [PATCH 6/6] Update datasets.py --- utils/datasets.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index 30009126c4e3..1c780cdbac4b 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -89,7 +89,7 @@ def exif_transpose(image): def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, - rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', read_data_from_cache=1): + rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix=''): # Make sure only the first process in DDP process the dataset first, and the following others can use the cache with torch_distributed_zero_first(rank): dataset = LoadImagesAndLabels(path, imgsz, batch_size, @@ -101,7 +101,7 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non stride=int(stride), pad=pad, image_weights=image_weights, - prefix=prefix, read_data_from_cache=read_data_from_cache) + prefix=prefix) batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers @@ -361,7 +361,7 @@ def img2label_paths(img_paths): class LoadImagesAndLabels(Dataset): # for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', read_data_from_cache=1): + cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''): self.img_size = img_size self.augment = augment self.hyp = hyp @@ -397,18 +397,12 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r # Check cache self.label_files = img2label_paths(self.img_files) # labels cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') - - if read_data_from_cache==1: #if false, data would be read from scratch even if cache exists - try: - cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict - assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files) - except: - print("No Cache Exists, Reading from Disc instead") - cache, exists = self.cache_labels(cache_path, prefix), False # cache - else: + try: + cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict + assert cache['version'] == 0.4 and cache['hash'] == get_hash(self.label_files + self.img_files) + except: cache, exists = self.cache_labels(cache_path, prefix), False # cache - # Display cache nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total if exists: