-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_fast-align.sh
28 lines (21 loc) · 1015 Bytes
/
run_fast-align.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env python
lg1=$1
lg2=$2
LG1_WP_TOK=$3
LG2_WP_TOK=$4
OUTPUT_DIR=$4
mkdir -p $OUTPUT_DIR
if [ ! -f "$OUTPUT_DIR/text.$lg1-$lg2" ]; then
:|paste -d ' ||| ' $SRC_WP_TOK - - - - $TGT_WP_TOK > $OUTPUT_DIR/text.$lg1-$lg2
fi
echo "Bitext saved in $OUTPUT_DIR/text.$lg1-$lg2"
if [ ! -f "$OUTPUT_DIR/cleaned.$lg1-$lg2" ]; then
python3 utils/clean_bitext.py --bitxt $OUTPUT_DIR/text.$lg1-$lg2 --save $OUTPUT_DIR/cleaned.$lg1-$lg2
fi
echo "Cleaned bitext saved in $OUTPUT_DIR/cleaned.$lg1-$lg2"
if [ ! -f "$OUTPUT_DIR/align.$lg1-$lg2" ]; then
tools/fast_align/build/fast_align -i $OUTPUT_DIR/cleaned.$lg1-$lg2 -d -o -v -I 10 > $OUTPUT_DIR/forward.$lg1-$lg2
tools/fast_align/build/fast_align -i $OUTPUT_DIR/cleaned.$lg1-$lg2 -d -o -v -r -I 10 > $OUTPUT_DIR/reverse.$lg1-$lg2
tools/fast_align/build/atools -i $OUTPUT_DIR/forward.$lg1-$lg2 -j $OUTPUT_DIR/reverse.$lg1-$lg2 -c grow-diag-final-and > $OUTPUT_DIR/align.$lg1-$lg2
fi
echo "Alignments computed using fast-align in $OUTPUT_DIR/align.$lg1-$lg2"