refine psgan code (PaddlePaddle#67)

* update psgan pretrained model link in the tutorial doc
AnnaTrainingG · Nov 2, 2020 · e54169d · e54169d
1 parent 2533587
commit e54169d
Show file tree

Hide file tree

Showing 16 changed files with 150 additions and 84 deletions.
diff --git a/docs/en_US/tutorials/motion_driving.md b/docs/en_US/tutorials/motion_driving.md
@@ -13,9 +13,10 @@
 Users can upload the prepared source image and driving video, then substitute the path of source image and driving video for the `source_image` and `driving_video` parameter in the following running command. It will geneate a video file named `result.mp4` in the `output` folder, which is the animated video file.
 
 ```
-python -u tools/first-order-demo.py \
-     --driving_video ./ravel_10.mp4  \
-     --source_image ./sudaqiang.png \
+cd applications/
+python -u tools/first-order-demo.py  \
+     --driving_video ../docs/imgs/fom_dv.mp4 \
+     --source_image ../docs/imgs/fom_source_image.png \
      --relative --adapt_scale
 ```
 

diff --git a/docs/en_US/tutorials/psgan.md b/docs/en_US/tutorials/psgan.md
@@ -10,15 +10,17 @@ This paper is to address the makeup transfer task, which aims to transfer the ma
 
 ## 2. How to use
 ### 2.1 Test
+Pretrained model can be downloaded under following link: [psgan_weight](https://paddlegan.bj.bcebos.com/models/psgan_weight.pkl)
+
 Running the following command to complete the makeup transfer task. It will geneate the transfered image in the current path when the program running sucessfully.
 
 ```
-cd applications
-python tools/ps_demo.py \  
+python tools/psgan_infer.py \  
   --config-file configs/makeup.yaml \
   --model_path /your/model/path \
-  --source_path  /your/source/image/path  \
-  --reference_dir /your/ref/image/path
+  --source_path  docs/imgs/ps_source.png  \
+  --reference_dir docs/imgs/ref/ps_ref \
+  --evaluate-only True
 ```
 **params:**
 - config-file: PSGAN network configuration file, yaml format
@@ -77,7 +79,7 @@ Notation: In train phase, the `isTrain` value in makeup.yaml file is `True`, but
 
 Model|Dataset|BatchSize|Inference speed|Download
 ---|:--:|:--:|:--:|:--:
-PSGAN|MT-Dataset| 1 | 1.9s/image (GPU:P40) | [model]()
+PSGAN|MT-Dataset| 1 | 1.9s/image (GPU:P40) | [model](https://paddlegan.bj.bcebos.com/models/psgan_weight.pkl)
 
 ## 3. Result
 ![](../../imgs/makeup_shifter.png)

diff --git a/docs/imgs/fom_dv.mp4 b/docs/imgs/fom_dv.mp4
diff --git a/docs/imgs/fom_source_image.png b/docs/imgs/fom_source_image.png
diff --git a/docs/imgs/ps_source.png b/docs/imgs/ps_source.png
diff --git a/docs/imgs/ref/ps_ref.png b/docs/imgs/ref/ps_ref.png
diff --git a/docs/zh_CN/tutorials/motion_driving.md b/docs/zh_CN/tutorials/motion_driving.md
@@ -17,9 +17,10 @@ First order motion model的任务是image animation，给定一张源图片，
 用户可以上传自己准备的视频和图片，并在如下命令中的source_image参数和driving_video参数分别换成自己的图片和视频路径，然后运行如下命令，就可以完成动作表情迁移，程序运行成功后，会在ouput文件夹生成名为result.mp4的视频文件，该文件即为动作迁移后的视频。本项目中提供了原始图片和驱动视频供展示使用。运行的命令如下所示：
 
 ```
+cd applications/
 python -u tools/first-order-demo.py  \
-     --driving_video ./ravel_10.mp4 \
-     --source_image ./sudaqiang.png \
+     --driving_video ../docs/imgs/fom_dv.mp4 \
+     --source_image ../docs/imgs/fom_source_image.png \
      --relative --adapt_scale
 ```
 

diff --git a/docs/zh_CN/tutorials/psgan.md b/docs/zh_CN/tutorials/psgan.md
@@ -10,15 +10,17 @@
 
 ## 2. 使用方法
 ### 2.1 测试
+预训练模型可以从如下地址下载: [psgan_weight](https://paddlegan.bj.bcebos.com/models/psgan_weight.pkl)
+
 运行如下命令，就可以完成妆容迁移，程序运行成功后，会在当前文件夹生成妆容迁移后的图片文件。本项目中提供了原始图片和参考供展示使用，具体命令如下所示：
 
 ```
-cd applications/
-python tools/ps_demo.py \  
+python tools/psgan_infer.py \  
   --config-file configs/makeup.yaml \
   --model_path /your/model/path \
-  --source_path  /your/source/image/path  \
-  --reference_dir /your/ref/image/path
+  --source_path  docs/imgs/ps_source.png  \
+  --reference_dir docs/imgs/ref/ps_ref \
+  --evaluate-only True
 ```
 **参数说明:**
 - config-file: PSGAN网络到参数配置文件，格式为yaml
@@ -73,7 +75,7 @@ data
 ### 2.3 模型
 Model|Dataset|BatchSize|Inference speed|Download
 ---|:--:|:--:|:--:|:--:
-PSGAN|MT-Dataset| 1 | 1.9s(GPU:P40) | [model]()
+PSGAN|MT-Dataset| 1 | 1.9s(GPU:P40) | [model](https://paddlegan.bj.bcebos.com/models/psgan_weight.pkl)
 
 ## 3. 妆容迁移结果展示
 

diff --git a/applications/tools/ps_demo.py → ppgan/apps/psgan_predictor.py b/applications/tools/ps_demo.py → ppgan/apps/psgan_predictor.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@
 from ppgan.engine.trainer import Trainer
 from ppgan.models.builder import build_model
 from ppgan.utils.preprocess import *
+from .base_predictor import BasePredictor
 
 
 def toImage(net_output):
@@ -52,14 +53,17 @@ def mask2image(mask: np.array, format="HWC"):
     return canvas
 
 
+PS_WEIGHT_URL = "https://paddlegan.bj.bcebos.com/models/psgan_weight.pkl"
+
+
 class PreProcess:
     def __init__(self, config, need_parser=True):
         self.img_size = 256
         self.transform = transform = T.Compose([
             T.Resize(size=256),
-            T.Permute(to_rgb=False),
+            T.ToTensor(),
         ])
-        self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
+        self.norm = T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
         if need_parser:
             self.face_parser = futils.mask.FaceParser()
         self.up_ratio = 0.6 / 0.85
@@ -82,8 +86,6 @@ def __call__(self, image):
         mask = cv2.resize(mask.numpy(), (self.img_size, self.img_size),
                           interpolation=cv2.INTER_NEAREST)
         mask = mask.astype(np.uint8)
-        mask_color = mask2image(mask)
-        cv2.imwrite('mask_temp.png', mask_color)
         mask_tensor = paddle.to_tensor(mask)
 
         lms = futils.dlib.landmarks(image, face) * self.img_size / image.width
@@ -97,7 +99,7 @@ def __call__(self, image):
         image = self.transform(np_image)
 
         return [
-            self.norm(image),
+            self.norm(image).unsqueeze(0),
             np.float32(mask_aug),
             np.float32(P_np),
             np.float32(mask)
@@ -145,11 +147,12 @@ def transfer(self, source, reference, with_face=False):
             if with_face:
                 return None, None
             return
-        for i in range(len(source_input) - 1):
+
+        for i in range(1, len(source_input) - 1):
             source_input[i] = paddle.to_tensor(
                 np.expand_dims(source_input[i], 0))
 
-        for i in range(len(reference_input) - 1):
+        for i in range(1, len(reference_input) - 1):
             reference_input[i] = paddle.to_tensor(
                 np.expand_dims(reference_input[i], 0))
 
@@ -163,49 +166,52 @@ def transfer(self, source, reference, with_face=False):
             'consis_mask': consis_mask
         }
         state_dicts = load(self.model_path)
-        net = getattr(self.model, 'netG')
-        net.set_dict(state_dicts['netG'])
+        for net_name, net in self.model.nets.items():
+            net.set_state_dict(state_dicts[net_name])
         result, _ = self.model.test(input_data)
-        print('result shape: ', result.shape)
         min_, max_ = result.min(), result.max()
         result += -min_
         result = paddle.divide(result, max_ - min_ + 1e-5)
         img = toImage(result)
 
         if with_face:
             return img, crop_face
-        img.save('before.png')
 
         return img
 
 
-def main(args, cfg, save_path='transferred_image.png'):
-
-    setup(args, cfg)
-
-    inference = Inference(cfg, args.model_path)
-    postprocess = PostProcess(cfg)
-
-    source = Image.open(args.source_path).convert("RGB")
-    reference_paths = list(Path(args.reference_dir).glob("*"))
-    np.random.shuffle(reference_paths)
-    for reference_path in reference_paths:
-        if not reference_path.is_file():
-            print(reference_path, "is not a valid file.")
-            continue
-
-        reference = Image.open(reference_path).convert("RGB")
-
-        # Transfer the psgan from reference to source.
-        image, face = inference.transfer(source, reference, with_face=True)
-        image.save('before.png')
-        source_crop = source.crop(
-            (face.left(), face.top(), face.right(), face.bottom()))
-        image = postprocess(source_crop, image)
-        image.save(save_path)
-
-
-if __name__ == '__main__':
-    args = parse_args()
-    cfg = get_config(args.config_file)
-    main(args, cfg)
+class PSGANPredictor(BasePredictor):
+    def __init__(self, args, cfg, output_path='output'):
+        self.args = args
+        self.cfg = cfg
+        self.weight_path = self.args.model_path
+        if self.weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
+            self.weight_path = get_path_from_url(PS_WEIGHT_URL, cur_path)
+        self.output_path = output_path
+
+    def run(self):
+        setup(self.args, self.cfg)
+        inference = Inference(self.cfg, self.weight_path)
+        postprocess = PostProcess(self.cfg)
+
+        source = Image.open(self.args.source_path).convert("RGB")
+        reference_paths = list(Path(self.args.reference_dir).glob("*"))
+        np.random.shuffle(reference_paths)
+        for reference_path in reference_paths:
+            if not reference_path.is_file():
+                print(reference_path, "is not a valid file.")
+                continue
+
+            reference = Image.open(reference_path).convert("RGB")
+
+            # Transfer the psgan from reference to source.
+            image, face = inference.transfer(source, reference, with_face=True)
+            source_crop = source.crop(
+                (face.left(), face.top(), face.right(), face.bottom()))
+            image = postprocess(source_crop, image)
+
+            ref_img_name = os.path.split(reference_path)[1]
+            save_path = os.path.join(self.output_path,
+                                     'transfered_ref_' + ref_img_name)
+            image.save(save_path)
diff --git a/ppgan/faceutils/dlibutils/lms.dat b/ppgan/faceutils/dlibutils/lms.dat
diff --git a/ppgan/faceutils/mask/face_parser.py b/ppgan/faceutils/mask/face_parser.py
@@ -23,7 +23,7 @@
 import pickle
 from .model import BiSeNet
 
-BISENET_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/bisnet.pdparams'
+BISENET_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/bisenet.pdparams'
 
 
 class FaceParser:
@@ -65,7 +65,7 @@ def parse(self, image):
         image = image.transpose((2, 0, 1))
         image = self.transforms(image)
 
-        state_dict, _ = paddle.load(self.save_pth)
+        state_dict = paddle.load(self.save_pth)
         self.net.set_dict(state_dict)
         self.net.eval()
 
@@ -75,8 +75,6 @@ def parse(self, image):
             out = self.net(image)[0]
             parsing = out.squeeze(0).argmax(0)  #argmax(0).astype('float32')
 
-        #parsing = paddle.nn.functional.embedding(x=self.dict, weight=parsing)
-
         parse_np = parsing.numpy()
         h, w = parse_np.shape
         result = np.zeros((h, w))

diff --git a/ppgan/models/generators/makeup.py b/ppgan/models/generators/makeup.py
@@ -296,31 +296,65 @@ def forward(self, x, y, x_p, y_p, consistency_mask, mask_x, mask_y):
         # x -> src img
         x = self.encoder(x)
         _, c, h, w = x.shape
-        x_flat = x.reshape([-1, c, h * w])
-        x_flat = self.w * x_flat
-        if x_p is not None:
-            x_flat = paddle.concat([x_flat, x_p], axis=1)
 
         _, c2, h2, w2 = y.shape
-        y_flat = y.reshape([-1, c2, h2 * w2])
-        y_flat = self.w * y_flat
-        if y_p is not None:
-            y_flat = paddle.concat([y_flat, y_p], axis=1)
-        a_ = paddle.matmul(x_flat, y_flat, transpose_x=True) * 200.0
-
-        # mask softmax
-        if consistency_mask is not None:
-            a_ = a_ - 100.0 * (1 - consistency_mask)
+
+        mask_x = F.interpolate(mask_x, size=(64, 64))
+        mask_x = mask_x.transpose((1, 0, 2, 3))
+        mask_x_re = mask_x.tile([1, x.shape[1], 1, 1])
+        mask_x_diff_re = mask_x.tile([1, x_p.shape[1], 1, 1])
+        mask_y = F.interpolate(mask_y, size=(64, 64))
+        mask_y = mask_y.transpose((1, 0, 2, 3))
+        mask_y_re = mask_y.tile([1, y.shape[1], 1, 1])
+        mask_y_diff_re = mask_y.tile([1, y_p.shape[1], 1, 1])
+
+        x_re = x.tile([3, 1, 1, 1])
+        y_re = y.tile([3, 1, 1, 1])
+        x_flat = x_re * mask_x_re
+        y_flat = y_re * mask_y_re
+
+        x_p = x_p.tile([3, 1, 1, 1]) * mask_x_diff_re
+        y_p = y_p.tile([3, 1, 1, 1]) * mask_y_diff_re
+
+        norm_x = paddle.norm(x_p, axis=1,
+                             keepdim=True).tile([1, x_p.shape[1], 1, 1])
+        norm_x = paddle.where(norm_x == 0, paddle.to_tensor(1e10), norm_x)
+        x_p = x_p / norm_x
+        norm_y = paddle.norm(y_p, axis=1,
+                             keepdim=True).tile([1, y_p.shape[1], 1, 1])
+        norm_y = paddle.where(norm_y == 0, paddle.to_tensor(1e10), norm_y)
+        y_p = y_p / norm_y
+
+        x_flat = paddle.concat([x_flat * 0.01, x_p], axis=1)
+        y_flat = paddle.concat([y_flat * 0.01, y_p], axis=1)
+
+        x_flat_re = x_flat.reshape([3, x_flat.shape[1], h * w])
+        y_flat_re = y_flat.reshape([3, y_flat.shape[1], h2 * w2])
+
+        a_ = paddle.matmul(x_flat_re, y_flat_re, transpose_x=True)
+
+        with paddle.no_grad():
+            a_mask = a_ != 0
+
+        a_ *= 200
         a = F.softmax(a_, axis=-1)
+        a = a * a_mask
 
         gamma, beta = self.simple_spade(y)
+        gamma = gamma.tile([3, 1, 1, 1]) * mask_y
+        beta = beta.tile([3, 1, 1, 1]) * mask_y
 
         beta = beta.reshape([-1, h2 * w2, 1])
         beta = paddle.matmul(a, beta)
+        beta = beta.transpose((0, 2, 1))
         beta = beta.reshape([-1, 1, h2, w2])
         gamma = gamma.reshape([-1, h2 * w2, 1])
         gamma = paddle.matmul(a, gamma)
+        gamma = gamma.transpose((0, 2, 1))
         gamma = gamma.reshape([-1, 1, h2, w2])
+
+        beta = (beta[0] + beta[1] + beta[2]).unsqueeze(0)
+        gamma = (gamma[0] + gamma[1] + gamma[2]).unsqueeze(0)
         x = x * (1 + gamma) + beta
 
         for i in range(self.repeat_num):

diff --git a/ppgan/models/makeup_model.py b/ppgan/models/makeup_model.py
@@ -323,9 +323,9 @@ def backward_G(self):
         g_B_eye_loss_his = self.criterionL1(fake_B_eye_masked, fake_match_eye_B)
 
         self.loss_G_A_his = (g_A_eye_loss_his + g_A_lip_loss_his +
-                             g_A_skin_loss_his * 0.1) * 0.01
+                             g_A_skin_loss_his * 0.1) * 0.1
         self.loss_G_B_his = (g_B_eye_loss_his + g_B_lip_loss_his +
-                             g_B_skin_loss_his * 0.1) * 0.01
+                             g_B_skin_loss_his * 0.1) * 0.1
 
         self.losses['G_A_his_loss'] = self.loss_G_A_his
         self.losses['G_B_his_loss'] = self.loss_G_A_his
@@ -343,9 +343,9 @@ def backward_G(self):
         self.loss_B_vgg = self.criterionL2(vgg_fake_B,
                                            vgg_r) * lambda_B * lambda_vgg
 
-        self.loss_rec = (self.loss_cycle_A + self.loss_cycle_B +
-                         self.loss_A_vgg + self.loss_B_vgg) * 0.2
-        self.loss_idt = (self.loss_idt_A + self.loss_idt_B) * 0.2
+        self.loss_rec = (self.loss_cycle_A * 0.2 + self.loss_cycle_B * 0.2 +
+                         self.loss_A_vgg + self.loss_B_vgg) * 0.5
+        self.loss_idt = (self.loss_idt_A + self.loss_idt_B) * 0.1
 
         self.losses['G_A_vgg_loss'] = self.loss_A_vgg
         self.losses['G_B_vgg_loss'] = self.loss_B_vgg

diff --git a/ppgan/utils/options.py b/ppgan/utils/options.py
@@ -57,7 +57,7 @@ def parse_args():
     parser.add_argument("--reference_dir",
                         default="",
                         help="path to reference images")
-    parser.add_argument("--model_path", default="", help="model for loading")
+    parser.add_argument("--model_path", default=None, help="model for loading")
 
     args = parser.parse_args()
 

diff --git a/ppgan/utils/preprocess.py b/ppgan/utils/preprocess.py
@@ -30,11 +30,9 @@ def generate_P_from_lmks(lmks, resize, w, h):
     diff = fix - lmks
     diff = diff.transpose(1, 2, 0)
     diff = cv2.resize(diff, diff_size, interpolation=cv2.INTER_NEAREST)
-    diff = diff.transpose(2, 0, 1).reshape(136, -1)
-    norm = np.linalg.norm(diff, axis=0)
-    P_np = diff / norm
+    diff = diff.transpose(2, 0, 1)
 
-    return P_np
+    return diff
 
 
 def copy_area(tar, src, lms):