-
Notifications
You must be signed in to change notification settings - Fork 45
/
train.sh
54 lines (41 loc) · 1.08 KB
/
train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env bash
# This script is used to train a model.
# basic settings
root_dir=.
degradation=$1
model=$2
gpu_ids=0,1 # set to -1 to use cpu
master_port=4321
debug=0
# retain training or train from scratch
start_iter=0
if [[ ${start_iter} > 0 ]]; then
suffix=_iter${start_iter}
else
suffix=''
fi
exp_dir=${root_dir}/experiments_${degradation}/${model}
# check
if [ -d "$exp_dir/train" ]; then
echo ">> Experiment dir already exists: $exp_dir/train"
echo ">> Please delete it for retraining"
exit 1
fi
# make dir
mkdir -p ${exp_dir}/train
# backup codes
if [[ ${debug} > 0 ]]; then
cp -r ${root_dir}/codes ${exp_dir}/train/codes_backup${suffix}
fi
# run
num_gpus=`echo ${gpu_ids} | awk -F\, '{print NF}'`
if [[ ${num_gpus} > 1 ]]; then
dist_args="-m torch.distributed.launch --nproc_per_node ${num_gpus} --master_port ${master_port}"
fi
CUDA_VISIBLE_DEVICES=${gpu_ids} \
python ${dist_args} ${root_dir}/codes/main.py \
--exp_dir ${exp_dir} \
--mode train \
--opt train${suffix}.yml \
--gpu_ids ${gpu_ids} \
> ${exp_dir}/train/train${suffix}.log 2>&1 &