Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding yomdle_fa egs #2702

Merged
merged 24 commits into from
Oct 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions egs/cifar/v1/image/ocr/make_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
parser.add_argument('--padding', type=int, default=5,
help='Number of white pixels to pad on the left'
'and right side of the image.')
parser.add_argument('--num-channels', type=int, default=1,
help='Number of color channels')
parser.add_argument('--fliplr', type=lambda x: (str(x).lower()=='true'), default=False,
help="Flip the image left-right for right to left languages")
parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
Expand Down Expand Up @@ -84,9 +86,9 @@ def horizontal_pad(im, allowed_lengths = None):
left_padding = int(padding // 2)
right_padding = padding - left_padding
dim_y = im.shape[0] # height
im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
im_pad = np.concatenate((255 * np.ones((dim_y, left_padding, args.num_channels),
dtype=int), im), axis=1)
im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding, args.num_channels),
dtype=int)), axis=1)
return im_pad1

Expand Down Expand Up @@ -150,7 +152,13 @@ def get_scaled_image_aug(im, mode='normal'):
if im_horizontal_padded is None:
num_fail += 1
continue
data = np.transpose(im_horizontal_padded, (1, 0))
if args.num_channels == 1:
data = np.transpose(im_horizontal_padded, (1, 0))
elif args.num_channels == 3:
H = im_horizontal_padded.shape[0]
W = im_horizontal_padded.shape[1]
C = im_horizontal_padded.shape[2]
data = np.reshape(np.transpose(im_horizontal_padded, (1, 0, 2)), (W, H * C))
data = np.divide(data, 255.0)
num_ok += 1
write_kaldi_matrix(out_fh, data, image_id)
Expand Down
3 changes: 3 additions & 0 deletions egs/yomdle_fa/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
This directory contains example scripts for OCR on the Yomdle and Slam datasets.
Training is done on the Yomdle dataset and testing is done on Slam.
LM rescoring is also done with extra corpus data obtained from various newswires (e.g. Hamshahri)
13 changes: 13 additions & 0 deletions egs/yomdle_fa/v1/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export cmd="queue.pl"
1 change: 1 addition & 0 deletions egs/yomdle_fa/v1/image
35 changes: 35 additions & 0 deletions egs/yomdle_fa/v1/local/augment_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
# Copyright 2018 Hossein Hadian
# 2018 Ashish Arora

# Apache 2.0
# This script performs data augmentation.

nj=4
cmd=run.pl
feat_dim=40
fliplr=false
echo "$0 $@"

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh || exit 1;

srcdir=$1
outdir=$2
datadir=$3

mkdir -p $datadir/augmentations
echo "copying $srcdir to $datadir/augmentations/aug1, allowed length, creating feats.scp"

for set in aug1; do
image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
$srcdir $datadir/augmentations/$set
cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
--fliplr $fliplr --augment true $datadir/augmentations/$set
done

echo " combine original data and data from different augmentations"
utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/aug1
cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
58 changes: 58 additions & 0 deletions egs/yomdle_fa/v1/local/bidi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python3
# Copyright 2018 Chun-Chieh Chang

# This script is largely written by Stephen Rawls
# and uses the python package https://pypi.org/project/PyICU_BiDi/
# The code leaves right to left text alone and reverses left to right text.

import icu_bidi
import io
import sys
import unicodedata
# R=strong right-to-left; AL=strong arabic right-to-left
rtl_set = set(chr(i) for i in range(sys.maxunicode)
if unicodedata.bidirectional(chr(i)) in ['R','AL'])
def determine_text_direction(text):
# Easy case first
for char in text:
if char in rtl_set:
return icu_bidi.UBiDiLevel.UBIDI_RTL
# If we made it here we did not encounter any strongly rtl char
return icu_bidi.UBiDiLevel.UBIDI_LTR

def utf8_visual_to_logical(text):
text_dir = determine_text_direction(text)

bidi = icu_bidi.Bidi()
bidi.inverse = True
bidi.reordering_mode = icu_bidi.UBiDiReorderingMode.UBIDI_REORDER_INVERSE_LIKE_DIRECT
bidi.reordering_options = icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_DEFAULT # icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_INSERT_MARKS

bidi.set_para(text, text_dir, None)

res = bidi.get_reordered(0 | icu_bidi.UBidiWriteReorderedOpt.UBIDI_DO_MIRRORING | icu_bidi.UBidiWriteReorderedOpt.UBIDI_KEEP_BASE_COMBINING)

return res

def utf8_logical_to_visual(text):
text_dir = determine_text_direction(text)

bidi = icu_bidi.Bidi()

bidi.reordering_mode = icu_bidi.UBiDiReorderingMode.UBIDI_REORDER_DEFAULT
bidi.reordering_options = icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_DEFAULT #icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_INSERT_MARKS

bidi.set_para(text, text_dir, None)

res = bidi.get_reordered(0 | icu_bidi.UBidiWriteReorderedOpt.UBIDI_DO_MIRRORING | icu_bidi.UBidiWriteReorderedOpt.UBIDI_KEEP_BASE_COMBINING)

return res


##main##
sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding="utf8")
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf8")
for line in sys.stdin:
line = line.strip()
line = utf8_logical_to_visual(line)[::-1]
sys.stdout.write(line + '\n')
67 changes: 67 additions & 0 deletions egs/yomdle_fa/v1/local/chain/compare_wer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash

# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}

# Copyright 2017 Chun Chieh Chang
# 2017 Ashish Arora

if [ $# == 0 ]; then
echo "Usage: $0: <dir1> [<dir2> ... ]"
echo "e.g.: $0 exp/chain/cnn{1a,1b}"
exit 1
fi

echo "# $0 $*"
used_epochs=false

echo -n "# System "
for x in $*; do printf "% 10s" " $(basename $x)"; done
echo

echo -n "# WER "
for x in $*; do
wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}')
printf "% 10s" $wer
done
echo

echo -n "# CER "
for x in $*; do
cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
printf "% 10s" $cer
done
echo


if $used_epochs; then
exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
fi

echo -n "# Final train prob "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final train prob (xent) "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob (xent) "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo
Loading