Skip to content

Commit

Permalink
fix: rw scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
engisalor committed Oct 11, 2024
1 parent 212f88a commit ba35b18
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions rw_corpora_update.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ fi
echo "... get and process source data"
python3 rw_corpora_update.py "${1}" "${2}" || exit 1
echo "... convert source files to .conllu (run Stanza NLP)"
ES_FILES=$(find -name reliefweb_es*.txt)
FR_FILES=$(find -name reliefweb_fr*.txt)
EN_FILES=$(find -name reliefweb_en*.txt)
ES_FILES=$(find -name "reliefweb_es*.txt")
FR_FILES=$(find -name "reliefweb_fr*.txt")
EN_FILES=$(find -name "reliefweb_en*.txt")
python3 ./pipeline/stanza/base_pipeline.py to-conll -l es $ES_FILES || exit 1
python3 ./pipeline/stanza/base_pipeline.py to-conll -l fr $FR_FILES || exit 1
python3 ./pipeline/stanza/base_pipeline.py to-conll -l en $EN_FILES || exit 1
echo "... convert files to .vert and compress"
CONLLU_FILES=$(find -name reliefweb_*.txt.conllu)
CONLLU_FILES=$(find -name "reliefweb_*.txt.conllu")
python3 ./pipeline/stanza/base_pipeline.py conll-to-vert $CONLLU_FILES || exit 1
echo "... rw_corpora_update.sh completed"

0 comments on commit ba35b18

Please sign in to comment.