-
Notifications
You must be signed in to change notification settings - Fork 21
/
prepare_data.sh
executable file
·62 lines (54 loc) · 1.23 KB
/
prepare_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
usage() {
echo "Usage: $0 -v <AMR corpus version. Possible values: 1 or 2> -p <Path to AMR corpus>"
echo " Make sure your AMR corpus is untouched."
echo " It should organized like below:"
echo " <AMR corpus>"
echo " data/"
echo " docs/"
echo " index.html"
exit 1;
}
while getopts ":h:v:p:" o; do
case "${o}" in
h)
usage
;;
v)
v=${OPTARG}
((v == 1 || v == 2)) || usage
;;
p)
p=${OPTARG}
;;
\? )
usage
;;
esac
done
shift $((OPTIND-1))
if [ -z $v ]; then
usage
fi
if [ -z $p ]; then
usage
fi
if [[ "$v" == "2" ]]; then
DATA_DIR=data/AMR/amr_2.0
SPLIT_DIR=$p/data/amrs/split
TRAIN=${SPLIT_DIR}/training
DEV=${SPLIT_DIR}/dev
TEST=${SPLIT_DIR}/test
else
DATA_DIR=data/AMR/amr_1.0
SPLIT_DIR=$p/data/amrs/split
TRAIN=${SPLIT_DIR}/training
DEV=${SPLIT_DIR}/dev
TEST=${SPLIT_DIR}/test
fi
echo "Preparing data in ${DATA_DIR}...`date`"
mkdir -p ${DATA_DIR}
awk FNR!=1 ${TRAIN}/* > ${DATA_DIR}/train.txt
awk FNR!=1 ${DEV}/* > ${DATA_DIR}/dev.txt
awk FNR!=1 ${TEST}/* > ${DATA_DIR}/test.txt
echo "Done..`date`"