-
Notifications
You must be signed in to change notification settings - Fork 12
/
reranker_train.sh
executable file
·65 lines (47 loc) · 1.9 KB
/
reranker_train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash
set -e
set -x
if [ $# -lt 2 ]; then
echo "Usage: `basename $0` <outputs_dir> <dev_data> <optional-gpu-num>"
exit 1
fi
moses_dir=tools/mosesdecoder
if [ ! -e $moses_dir ] ; then
echo "Moses not found at path: $moses_dir"
echo "Set variable moses_dir to Moses SMT path"
exit 1
fi
nbest_reranker=tools/nbest-reranker
test_data_dir=data/test
###############
# training
###############
fairseq_outputs_dir=$1
dev_set=$2
input_dev=$fairseq_outputs_dir/$dev_set.out
output_dir=$fairseq_outputs_dir/reranking.$dev_set
train_dir=$output_dir/training/
lm_file='models/ngramlm/cclm.trie'
mkdir -p $train_dir
echo "[weight]" > $train_dir/rerank_config.ini
echo "F0= 0.5" >> $train_dir/rerank_config.ini
echo "EditOps0= 0.2 0.2 0.2" >> $train_dir/rerank_config.ini
echo "LM0= 0.5" >> $train_dir/rerank_config.ini
echo "BERT0= 0.5" >> $train_dir/rerank_config.ini
echo "WordPenalty0= -1" >> $train_dir/rerank_config.ini
featstring="EditOps(name='EditOps0'), LM('LM0', '$lm_file', normalize=False), BERT(name='BERT0', cased=True, large=False), WordPenalty(name='WordPenalty0')"
########################
##### TRAINING #########
########################
# reformating the nbest file
python2.7 scripts/nbest_reformat.py -i $input_dev --debpe > $input_dev.mosesfmt
# # augmenting the dev nbest
if [ $# -eq 2 ]; then
device=$3
else
device=0
fi
CUDA_VISIBLE_DEVICES=$device python3 $nbest_reranker/augmenter.py -s $test_data_dir/$dev_set/$dev_set.tok.src -i $input_dev.mosesfmt -o $train_dir/$dev_set.moses-nbest.augmented.txt -f "$featstring"
# # training the nbest to obtain the weights
python3 $nbest_reranker/train.py -i $train_dir/$dev_set.moses-nbest.augmented.txt -r $test_data_dir/$dev_set/$dev_set.m2 -c $train_dir/rerank_config.ini --threads 12 --tuning-metric m2 --predictable-seed -o $train_dir --moses-dir $moses_dir --no-add-weight
cp $train_dir/weights.txt $output_dir/weights.$dev_set.txt