Skip to content

Commit

Permalink
Integrate with visualdl (PaddlePaddle#115)
Browse files Browse the repository at this point in the history
* Integrate with visualdl
* visualdl: add doc and control paramter
* visual dl: move import, add install doc
  • Loading branch information
zhen8838 authored Dec 16, 2020
1 parent 6c81cc3 commit 202dce6
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 29 deletions.
6 changes: 6 additions & 0 deletions docs/en_US/get_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ output_dir
├── epoch002_rec_A.png
└── epoch002_rec_B.png
```

Also, you can add the parameter ```enable_visualdl: true``` in the configuration file, use [PaddlePaddle VisualDL](https://github.com/PaddlePaddle/VisualDL) record the metrics or images generated in the training process, and run the command to monitor the training process:
```
visualdl --logdir output_dir/CycleGANModel-2020-10-29-09-21/
```

#### Recovery of training

The checkpoint of the previous epoch will be saved by default during the training process to facilitate the recovery of training
Expand Down
9 changes: 8 additions & 1 deletion docs/en_US/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Note: command above will install paddle with cuda10.2,if your installed cuda i

Visit home page of [paddlepaddle](https://www.paddlepaddle.org.cn/install/quick) for support of other systems, such as Windows10.

### 2. Install paddleGAN
### 2. Install paddleGAN

#### 2.1 Install through pip

Expand Down Expand Up @@ -59,3 +59,10 @@ If you need to use ppgan to handle video-related tasks, you need to install ffmp
```
conda install x264=='1!152.20180717' ffmpeg=4.0.2 -c conda-forge
```

#### 4.2 Visual DL
If you want to use [PaddlePaddle VisualDL](https://github.com/PaddlePaddle/VisualDL) to monitor the training process, Please install `VisualDL`(For more detail refer [here](./get_started.md)):

```
python -m pip install visualdl -i https://mirror.baidu.com/pypi/simple
```
4 changes: 4 additions & 0 deletions docs/zh_CN/get_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ output_dir
├── epoch002_rec_A.png
└── epoch002_rec_B.png
```
同时可以通过在配置文件中添加参数```enable_visualdl: true```使用[飞桨VisualDL](https://github.com/PaddlePaddle/VisualDL)对训练过程产生的指标或生成的图像进行记录,并运行相应命令对训练过程进行实时监控:
```
visualdl --logdir output_dir/CycleGANModel-2020-10-29-09-21/
```

#### 恢复训练

Expand Down
8 changes: 8 additions & 0 deletions docs/zh_CN/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,11 @@ pip install -v -e . # or "python setup.py develop"
```
conda install x264=='1!152.20180717' ffmpeg=4.0.2 -c conda-forge
```

#### 4.2 Visual DL

如果需要使用[飞桨VisualDL](https://github.com/PaddlePaddle/VisualDL)对训练过程进行可视化监控,请安装`VisualDL`(使用方法请参考[这里](./get_started.md)):

```
python -m pip install visualdl -i https://mirror.baidu.com/pypi/simple
```
71 changes: 57 additions & 14 deletions ppgan/engine/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,18 @@ def __init__(self, cfg):
self.distributed_data_parallel()

self.logger = logging.getLogger(__name__)
self.enable_visualdl = cfg.get('enable_visualdl', False)
if self.enable_visualdl:
import visualdl
self.vdl_logger = visualdl.LogWriter(logdir=cfg.output_dir)

# base config
self.output_dir = cfg.output_dir
self.epochs = cfg.epochs
self.start_epoch = 1
self.current_epoch = 1
self.batch_id = 0
self.global_steps = 0
self.weight_interval = cfg.snapshot_config.interval
self.log_interval = cfg.log_config.interval
self.visual_interval = cfg.log_config.visiual_interval
Expand Down Expand Up @@ -106,7 +111,7 @@ def train(self):

if i % self.visual_interval == 0:
self.visual('visual_train')

self.global_steps += 1
step_start_time = time.time()

self.logger.info(
Expand Down Expand Up @@ -165,7 +170,9 @@ def validate(self):
tensor2img(current_visuals['gt'][j], (0., 1.)),
**self.cfg.validate.metrics.ssim)

self.visual('visual_val', visual_results=visual_results)
self.visual('visual_val',
visual_results=visual_results,
step=self.batch_id)

if i % self.log_interval == 0:
self.logger.info('val iter: [%d/%d]' %
Expand Down Expand Up @@ -201,7 +208,10 @@ def test(self):
name = '%s_%s' % (basename, k)
visual_results.update({name: img_tensor[j]})

self.visual('visual_test', visual_results=visual_results)
self.visual('visual_test',
visual_results=visual_results,
step=self.batch_id,
is_save_image=True)

if i % self.log_interval == 0:
self.logger.info('Test iter: [%d/%d]' %
Expand All @@ -215,6 +225,8 @@ def print_log(self):

for k, v in losses.items():
message += '%s: %.3f ' % (k, v)
if self.enable_visualdl:
self.vdl_logger.add_scalar(k, v, step=self.global_steps)

if hasattr(self, 'step_time'):
message += 'batch_cost: %.5f sec ' % self.step_time
Expand All @@ -240,26 +252,48 @@ def current_learning_rate(self):
for optimizer in self.model.optimizers.values():
return optimizer.get_lr()

def visual(self, results_dir, visual_results=None):
def visual(self,
results_dir,
visual_results=None,
step=None,
is_save_image=False):
"""
visual the images, use visualdl or directly write to the directory
Parameters:
results_dir (str) -- directory name which contains saved images
visual_results (dict) -- the results images dict
step (int) -- global steps, used in visualdl
is_save_image (bool) -- weather write to the directory or visualdl
"""
self.model.compute_visuals()

if visual_results is None:
visual_results = self.model.get_current_visuals()

if self.cfg.is_train:
msg = 'epoch%.3d_' % self.current_epoch
else:
msg = ''

makedirs(os.path.join(self.output_dir, results_dir))
min_max = self.cfg.get('min_max', None)
if min_max is None:
min_max = (-1., 1.)
image_num = self.cfg.get('image_num', None)
if (image_num is None) or (not self.enable_visualdl):
image_num = 1
for label, image in visual_results.items():
image_numpy = tensor2img(image, min_max)
img_path = os.path.join(self.output_dir, results_dir,
msg + '%s.png' % (label))
save_image(image_numpy, img_path)
image_numpy = tensor2img(image, min_max, image_num)
if (not is_save_image) and self.enable_visualdl:
self.vdl_logger.add_image(
results_dir + '/' + label,
image_numpy,
step=step if step else self.global_steps,
dataformats="HWC" if image_num == 1 else "NCHW")
else:
if self.cfg.is_train:
msg = 'epoch%.3d_' % self.current_epoch
else:
msg = ''
makedirs(os.path.join(self.output_dir, results_dir))
img_path = os.path.join(self.output_dir, results_dir,
msg + '%s.png' % (label))
save_image(image_numpy, img_path)

def save(self, epoch, name='checkpoint', keep=1):
if self.local_rank != 0:
Expand Down Expand Up @@ -299,6 +333,7 @@ def resume(self, checkpoint_path):
state_dicts = load(checkpoint_path)
if state_dicts.get('epoch', None) is not None:
self.start_epoch = state_dicts['epoch'] + 1
self.global_steps = self.steps_per_epoch * state_dicts['epoch']

for net_name, net in self.model.nets.items():
net.set_state_dict(state_dicts[net_name])
Expand All @@ -311,3 +346,11 @@ def load(self, weight_path):

for net_name, net in self.model.nets.items():
net.set_state_dict(state_dicts[net_name])

def close(self):
"""
when finish the training need close file handler or other.
"""
if self.enable_visualdl:
self.vdl_logger.close()
54 changes: 42 additions & 12 deletions ppgan/utils/visual.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from PIL import Image

irange = range


def make_grid(tensor, nrow=8, normalize=False, range=None, scale_each=False):
"""Make a grid of images.
Args:
Expand Down Expand Up @@ -82,35 +84,63 @@ def norm_range(t, range):
ymaps = int(math.ceil(float(nmaps) / xmaps))
height, width = int(tensor.shape[2]), int(tensor.shape[3])
num_channels = tensor.shape[1]
canvas = paddle.zeros((num_channels, height * ymaps, width * xmaps), dtype=tensor.dtype)
canvas = paddle.zeros((num_channels, height * ymaps, width * xmaps),
dtype=tensor.dtype)
k = 0
for y in irange(ymaps):
for x in irange(xmaps):
if k >= nmaps:
break
canvas[:, y * height:(y + 1) * height, x * width:(x + 1) * width] = tensor[k]
canvas[:, y * height:(y + 1) * height,
x * width:(x + 1) * width] = tensor[k]
k = k + 1
return canvas


def tensor2img(input_image, min_max=(-1., 1.), imtype=np.uint8):
def tensor2img(input_image, min_max=(-1., 1.), image_num=1, imtype=np.uint8):
""""Converts a Tensor array into a numpy image array.
Parameters:
input_image (tensor) -- the input image tensor array
image_num (int) -- the convert iamge numbers
imtype (type) -- the desired type of the converted numpy array
"""
def processing(im, transpose=True):
""""processing one numpy image.
Parameters:
im (tensor) -- the input image numpy array
"""
if im.shape[0] == 1: # grayscale to RGB
im = np.tile(im, (3, 1, 1))
im = im.clip(min_max[0], min_max[1])
im = (im - min_max[0]) / (min_max[1] - min_max[0])
im = im * 255.0 # scaling
im = np.transpose(im, (1, 2, 0)) if transpose else im # tranpose
return im

if not isinstance(input_image, np.ndarray):
image_numpy = input_image.numpy() # convert it into a numpy array
if len(image_numpy.shape) == 4:
image_numpy = image_numpy[0]
if image_numpy.shape[0] == 1: # grayscale to RGB
image_numpy = np.tile(image_numpy, (3, 1, 1))
image_numpy = image_numpy.clip(min_max[0], min_max[1])
image_numpy = (image_numpy - min_max[0]) / (min_max[1] - min_max[0])
image_numpy = (np.transpose(
image_numpy,
(1, 2, 0))) * 255.0 # post-processing: tranpose and scaling
ndim = image_numpy.ndim
if ndim == 4:
image_numpy = image_numpy[0:image_num]
elif ndim == 3:
# NOTE for eval mode, need add dim
image_numpy = np.expand_dims(image_numpy, 0)
image_num = 1
else:
raise ValueError(
"Image numpy ndim is {} not 3 or 4, Please check data".format(
ndim))

if image_num == 1:
# for one image, log HWC image
image_numpy = processing(image_numpy[0])
else:
# for more image, log NCHW image
image_numpy = np.stack(
[processing(im, transpose=False) for im in image_numpy])

else: # if it is a numpy array, do nothing
image_numpy = input_image
return image_numpy.astype(imtype)
Expand Down
8 changes: 6 additions & 2 deletions tools/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,12 @@ def main(args, cfg):
if args.evaluate_only:
trainer.test()
return

trainer.train()
# training, when keyboard interrupt save weights
try:
trainer.train()
except KeyboardInterrupt as e:
trainer.save(trainer.current_epoch)
trainer.close()


if __name__ == '__main__':
Expand Down

0 comments on commit 202dce6

Please sign in to comment.