Merge pull request #2 from LielinJiang/multiprocess-dataset

Support multiprocess dict dataset
AnnaTrainingG · Jul 6, 2020 · 0ea2997 · 0ea2997
2 parents d700d09 + 2e3f9c3
commit 0ea2997
Show file tree

Hide file tree

Showing 13 changed files with 147 additions and 133 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,41 @@
+-   repo: local
+    hooks:
+    -   id: yapf
+        name: yapf
+        entry: yapf
+        language: system
+        args: [-i, --style .style.yapf]
+        files: \.py$
+
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    sha: a11d9314b22d8f8c7556443875b731ef05965464
+    hooks:
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+    -   id: detect-private-key
+    -   id: check-symlinks
+    -   id: check-added-large-files
+
+-   repo: local
+    hooks:
+    -   id: flake8
+        name: flake8
+        entry: flake8
+        language: system
+        args:
+        -   --count
+        -   --select=E9,F63,F7,F82
+        -   --show-source
+        -   --statistics
+        files: \.py$
+
+-   repo: local
+    hooks:
+    -   id: copyright_checker
+        name: copyright_checker
+        entry: python ./.copyright.hook
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
+        exclude: (?!.*third_party)^.*$
diff --git a/.style.yapf b/.style.yapf
@@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
diff --git a/configs/cyclegan-cityscapes.yaml → configs/cyclegan_cityscapes.yaml b/configs/cyclegan-cityscapes.yaml → configs/cyclegan_cityscapes.yaml
@@ -26,7 +26,7 @@ model:
 
 dataset:
   train:
-    name: UnalignedDataset
+    name: UnpairedDataset
     dataroot: data/cityscapes
     phase: train
     max_dataset_size: inf

diff --git a/configs/cyclegan-horse2zebra.yaml → configs/cyclegan_horse2zebra.yaml b/configs/cyclegan-horse2zebra.yaml → configs/cyclegan_horse2zebra.yaml
@@ -26,7 +26,7 @@ model:
 
 dataset:
   train:
-    name: UnalignedDataset
+    name: UnpairedDataset
     dataroot: data/horse2zebra
     phase: train
     max_dataset_size: inf

diff --git a/configs/pix2pix-cityscapes.yaml → configs/pix2pix_cityscapes.yaml b/configs/pix2pix-cityscapes.yaml → configs/pix2pix_cityscapes.yaml
@@ -23,7 +23,7 @@ model:
 
 dataset:
   train:
-    name: AlignedDataset
+    name: PairedDataset
     dataroot: data/cityscapes
     phase: train
     max_dataset_size: inf
@@ -38,7 +38,7 @@ dataset:
       preprocess: resize_and_crop
       no_flip: False
   test:
-    name: AlignedDataset
+    name: PairedDataset
     dataroot: data/cityscapes/
     phase: test
     max_dataset_size: inf

diff --git a/configs/pix2pix-cityscapes-2gpus.yaml → configs/pix2pix_cityscapes_2gpus.yaml b/configs/pix2pix-cityscapes-2gpus.yaml → configs/pix2pix_cityscapes_2gpus.yaml
@@ -23,7 +23,7 @@ model:
 
 dataset:
   train:
-    name: AlignedDataset
+    name: PairedDataset
     dataroot: data/cityscapes
     phase: train
     max_dataset_size: inf
@@ -38,7 +38,7 @@ dataset:
       preprocess: resize_and_crop
       no_flip: False
   test:
-    name: AlignedDataset
+    name: PairedDataset
     dataroot: data/cityscapes/
     phase: test
     max_dataset_size: inf

diff --git a/ppgan/datasets/__init__.py b/ppgan/datasets/__init__.py
@@ -1,3 +1,3 @@
-from .unaligned_dataset import UnalignedDataset
+from .unpaired_dataset import UnpairedDataset
 from .single_dataset import SingleDataset
-from .aligned_dataset import AlignedDataset
+from .paired_dataset import PairedDataset
diff --git a/ppgan/datasets/builder.py b/ppgan/datasets/builder.py
@@ -1,3 +1,4 @@
+import time
 import paddle
 import numbers
 import numpy as np
@@ -23,7 +24,7 @@ def __init__(self, dataset):
 
         for k, v in single_item.items():
             if not isinstance(v, (numbers.Number, np.ndarray)):
-                self.non_tensor_dict.update({k: {}})
+                setattr(self, k, Manager().dict())
                 self.non_tensor_keys_set.add(k)
             else:
                 self.tensor_keys_set.add(k)
@@ -38,9 +39,7 @@ def __getitem__(self, index):
             if isinstance(v, (numbers.Number, np.ndarray)):
                 tmp_list.append(v)
             else:
-                tmp_dict = self.non_tensor_dict[k]
-                tmp_dict.update({index: v})
-                self.non_tensor_dict[k] = tmp_dict
+                getattr(self, k).update({index: v})
 
         tmp_list.append(index)
         return tuple(tmp_list)
@@ -50,11 +49,11 @@ def __len__(self):
 
     def reset(self):
         for k in self.non_tensor_keys_set:
-            self.non_tensor_dict[k] = {}
+            setattr(self, k, Manager().dict())
 
 
 class DictDataLoader():
-    def __init__(self, dataset, batch_size, is_train, num_workers=0):
+    def __init__(self, dataset, batch_size, is_train, num_workers=4):
 
         self.dataset = DictDataset(dataset)
 
@@ -97,14 +96,15 @@ def get_items_by_indexs(self, key, indexs):
         if isinstance(indexs, paddle.Variable):
             indexs = indexs.numpy()
         current_items = []
-        items = self.dataset.non_tensor_dict[key]
+        items = getattr(self.dataset, key)
 
         for index in indexs:
             current_items.append(items[index])
 
         return current_items
 
 
+
 def build_dataloader(cfg, is_train=True):
     dataset = DATASETS.get(cfg.name)(cfg)
 

diff --git a/ppgan/datasets/aligned_dataset.py → ppgan/datasets/paired_dataset.py b/ppgan/datasets/aligned_dataset.py → ppgan/datasets/paired_dataset.py
@@ -8,19 +8,19 @@
 
 
 @DATASETS.register()
-class AlignedDataset(BaseDataset):
+class PairedDataset(BaseDataset):
     """A dataset class for paired image dataset.
     """
 
-    def __init__(self, opt):
+    def __init__(self, cfg):
         """Initialize this dataset class.
 
         Args:
             cfg (dict) -- stores all the experiment flags
         """
-        BaseDataset.__init__(self, opt)
-        self.dir_AB = os.path.join(opt.dataroot, opt.phase)  # get the image directory
-        self.AB_paths = sorted(make_dataset(self.dir_AB, opt.max_dataset_size))  # get image paths
+        BaseDataset.__init__(self, cfg)
+        self.dir_AB = os.path.join(cfg.dataroot, cfg.phase)  # get the image directory
+        self.AB_paths = sorted(make_dataset(self.dir_AB, cfg.max_dataset_size))  # get image paths
         assert(self.cfg.transform.load_size >= self.cfg.transform.crop_size)   # crop_size should be smaller than the size of loaded image
         self.input_nc = self.cfg.output_nc if self.cfg.direction == 'BtoA' else self.cfg.input_nc
         self.output_nc = self.cfg.input_nc if self.cfg.direction == 'BtoA' else self.cfg.output_nc

diff --git a/ppgan/datasets/unaligned_dataset.py → ppgan/datasets/unpaired_dataset.py b/ppgan/datasets/unaligned_dataset.py → ppgan/datasets/unpaired_dataset.py
@@ -8,7 +8,7 @@
 
 
 @DATASETS.register()
-class UnalignedDataset(BaseDataset):
+class UnpairedDataset(BaseDataset):
     """
     """
 

diff --git a/ppgan/models/cycle_gan_model.py b/ppgan/models/cycle_gan_model.py
@@ -5,7 +5,7 @@
 from .generators.builder import build_generator
 from .discriminators.builder import build_discriminator
 from .losses import GANLoss
-# from ..modules.nn import L1Loss
+
 from ..solver import build_optimizer
 from ..utils.image_pool import ImagePool
 
@@ -27,20 +27,23 @@ def __init__(self, opt):
         """Initialize the CycleGAN class.
 
         Parameters:
-            opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
+            opt (config)-- stores all the experiment flags; needs to be a subclass of Dict
         """
         BaseModel.__init__(self, opt)
         # specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses>
         self.loss_names = ['D_A', 'G_A', 'cycle_A', 'idt_A', 'D_B', 'G_B', 'cycle_B', 'idt_B']
         # specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
         visual_names_A = ['real_A', 'fake_B', 'rec_A']
         visual_names_B = ['real_B', 'fake_A', 'rec_B']
-        if self.isTrain and self.opt.lambda_identity > 0.0:  # if identity loss is used, we also visualize idt_B=G_A(B) ad idt_A=G_A(B)
+
+         # if identity loss is used, we also visualize idt_B=G_A(B) ad idt_A=G_A(B)
+        if self.isTrain and self.opt.lambda_identity > 0.0:
             visual_names_A.append('idt_B')
             visual_names_B.append('idt_A')
 
-        self.visual_names = visual_names_A + visual_names_B  # combine visualizations for A and B
-        # specify the models you want to save to the disk. The training/test scripts will call <BaseModel.save_networks> and <BaseModel.load_networks>.
+        # combine visualizations for A and B
+        self.visual_names = visual_names_A + visual_names_B
+        # specify the models you want to save to the disk.
         if self.isTrain:
             self.model_names = ['G_A', 'G_B', 'D_A', 'D_B']
         else:  # during test time, only load Gs
@@ -59,22 +62,22 @@ def __init__(self, opt):
         if self.isTrain:
             if opt.lambda_identity > 0.0:  # only works when input and output images have the same number of channels
                 assert(opt.dataset.train.input_nc == opt.dataset.train.output_nc)
-            self.fake_A_pool = ImagePool(opt.dataset.train.pool_size)  # create image buffer to store previously generated images
-            self.fake_B_pool = ImagePool(opt.dataset.train.pool_size)  # create image buffer to store previously generated images
+            # create image buffer to store previously generated images
+            self.fake_A_pool = ImagePool(opt.dataset.train.pool_size)
+            # create image buffer to store previously generated images
+            self.fake_B_pool = ImagePool(opt.dataset.train.pool_size)
             # define loss functions
-            self.criterionGAN = GANLoss(opt.model.gan_mode, [[[[1.0]]]], [[[[0.0]]]])#.to(self.device)  # define GAN loss.
+            self.criterionGAN = GANLoss(opt.model.gan_mode)
             self.criterionCycle = paddle.nn.L1Loss() 
             self.criterionIdt = paddle.nn.L1Loss()
 
             self.optimizer_G =  build_optimizer(opt.optimizer, parameter_list=self.netG_A.parameters() + self.netG_B.parameters())
             self.optimizer_D = build_optimizer(opt.optimizer, parameter_list=self.netD_A.parameters() + self.netD_B.parameters())
-            # self.optimizer_DA = build_optimizer(opt.optimizer, parameter_list=self.netD_A.parameters()) 
-            # self.optimizer_DB = build_optimizer(opt.optimizer, parameter_list=self.netD_B.parameters()) 
+
             self.optimizers.append(self.optimizer_G)
             self.optimizers.append(self.optimizer_D)
-            # self.optimizers.append(self.optimizer_DA)
-            # self.optimizers.append(self.optimizer_DB)
-            self.optimizer_names.extend(['optimizer_G', 'optimizer_D'])#A', 'optimizer_DB'])
+
+            self.optimizer_names.extend(['optimizer_G', 'optimizer_D'])
 
     def set_input(self, input):
         """Unpack input data from the dataloader and perform necessary pre-processing steps.
@@ -102,7 +105,7 @@ def set_input(self, input):
             self.image_paths = input['A_paths']
         elif 'B_paths' in input:
             self.image_paths = input['B_paths']
-        # self.image_paths = input['A_paths' if AtoB else 'B_paths']
+
 
     def forward(self):
         """Run forward pass; called by both functions <optimize_parameters> and <test>."""
@@ -115,20 +118,6 @@ def forward(self):
             self.rec_B = self.netG_A(self.fake_A)   # G_A(G_B(B))
 
 
-    # def forward_test(self, input):
-    #     input = paddle.imperative.to_variable(input)
-    #     net_g = getattr(self, 'netG_' + self.opt.dataset.test.direction[0])
-    #     return net_g(input)
-
-    # def test(self, input):
-    #     """Forward function used in test time.
-
-    #     This function wraps <forward> function in no_grad() so we don't save intermediate steps for backprop
-    #     It also calls <compute_visuals> to produce additional visualization results
-    #     """
-    #     with paddle.imperative.no_grad():
-    #         return self.forward_test(input)
-
     def backward_D_basic(self, netD, real, fake):
         """Calculate GAN loss for the discriminator
 
@@ -193,27 +182,26 @@ def backward_G(self):
     def optimize_parameters(self):
         """Calculate losses, gradients, and update network weights; called in every training iteration"""
         # forward
-        self.forward()      # compute fake images and reconstruction images.
+        # compute fake images and reconstruction images.
+        self.forward()
         # G_A and G_B
-        self.set_requires_grad([self.netD_A, self.netD_B], False)  # Ds require no gradients when optimizing Gs
-        self.optimizer_G.clear_gradients() #zero_grad()  # set G_A and G_B's gradients to zero
-        self.backward_G()             # calculate gradients for G_A and G_B
-        self.optimizer_G.minimize(self.loss_G) #step()       # update G_A and G_B's weights
-        # self.optimizer_G.clear_gradients()
-        # self.optimizer_G.clear_gradients()
+        # Ds require no gradients when optimizing Gs
+        self.set_requires_grad([self.netD_A, self.netD_B], False)
+        # set G_A and G_B's gradients to zero
+        self.optimizer_G.clear_gradients()
+        # calculate gradients for G_A and G_B
+        self.backward_G()
+        # update G_A and G_B's weights
+        self.optimizer_G.minimize(self.loss_G)
         # D_A and D_B
         self.set_requires_grad([self.netD_A, self.netD_B], True)
-        # self.set_requires_grad(self.netD_A, True)
-        self.optimizer_D.clear_gradients() #zero_grad()   # set D_A and D_B's gradients to zero
-        self.backward_D_A()      # calculate gradients for D_A
-        self.backward_D_B()      # calculate graidents for D_B
-        self.optimizer_D.minimize(self.loss_D_A + self.loss_D_B)  # update D_A and D_B's weights
-        # self.backward_D_A()      # calculate gradients for D_A
-        # self.optimizer_DA.minimize(self.loss_D_A) #step()  # update D_A and D_B's weights
-        # self.optimizer_DA.clear_gradients() #zero_g
-        # self.set_requires_grad(self.netD_B, True)
-        # self.optimizer_DB.clear_gradients() #zero_grad()   # set D_A and D_B's gradients to zero
-
-        # self.backward_D_B()      # calculate graidents for D_B
-        # self.optimizer_DB.minimize(self.loss_D_B) #step()  # update D_A and D_B's weights
-        # self.optimizer_DB.clear_gradients() #zero_grad()   # set D_A and D_B's gradients to zero
+
+        # set D_A and D_B's gradients to zero
+        self.optimizer_D.clear_gradients()
+        # calculate gradients for D_A
+        self.backward_D_A()
+        # calculate graidents for D_B
+        self.backward_D_B()
+          # update D_A and D_B's weights
+        self.optimizer_D.minimize(self.loss_D_A + self.loss_D_B)
+
diff --git a/ppgan/models/losses.py b/ppgan/models/losses.py
@@ -4,6 +4,7 @@
 
 from ..modules.nn import BCEWithLogitsLoss
 
+
 class GANLoss(paddle.fluid.dygraph.Layer):
     """Define different GAN objectives.
 
@@ -23,16 +24,14 @@ def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0):
         LSGAN needs no sigmoid. vanilla GANs will handle it with BCEWithLogitsLoss.
         """
         super(GANLoss, self).__init__()
-        self.real_label = paddle.fluid.dygraph.to_variable(np.array(target_real_label))
-        self.fake_label = paddle.fluid.dygraph.to_variable(np.array(target_fake_label))
-        # self.real_label.stop_gradients = True
-        # self.fake_label.stop_gradients = True
+        self.target_real_label = target_real_label
+        self.target_fake_label = target_fake_label
 
         self.gan_mode = gan_mode
         if gan_mode == 'lsgan':
             self.loss = nn.MSELoss()
         elif gan_mode == 'vanilla':
-            self.loss = BCEWithLogitsLoss()#nn.BCEWithLogitsLoss()
+            self.loss = BCEWithLogitsLoss()
         elif gan_mode in ['wgangp']:
             self.loss = None
         else:
@@ -50,14 +49,16 @@ def get_target_tensor(self, prediction, target_is_real):
         """
 
         if target_is_real:
-            target_tensor = paddle.fill_constant(shape=paddle.shape(prediction), value=1.0, dtype='float32')#self.real_label
+            if not hasattr(self, 'target_real_tensor'):
+                self.target_real_tensor = paddle.fill_constant(shape=paddle.shape(prediction), value=self.target_real_label, dtype='float32')
+            target_tensor = self.target_real_tensor
         else:
-            target_tensor = paddle.fill_constant(shape=paddle.shape(prediction), value=0.0, dtype='float32')#self.fake_label
+            if not hasattr(self, 'target_fake_tensor'):
+                self.target_fake_tensor = paddle.fill_constant(shape=paddle.shape(prediction), value=self.target_fake_label, dtype='float32')
+            target_tensor = self.target_fake_tensor
 
-        # target_tensor = paddle.cast(target_tensor, prediction.dtype)
-        # target_tensor = paddle.expand_as(target_tensor, prediction)
         # target_tensor.stop_gradient = True
-        return target_tensor#paddle.expand_as(target_tensor, prediction)
+        return target_tensor
 
     def __call__(self, prediction, target_is_real):
         """Calculate loss given Discriminator's output and grount truth labels.