Skip to content

Commit

Permalink
[egs] Added tri3b and chain training for Aurora4 (#3638)
Browse files Browse the repository at this point in the history
  • Loading branch information
desh2608 authored and danpovey committed Oct 4, 2019
1 parent 68cfbba commit 743eb23
Show file tree
Hide file tree
Showing 8 changed files with 764 additions and 127 deletions.
19 changes: 15 additions & 4 deletions egs/aurora4/s5/RESULTS
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
for x in exp/{mono,tri,sgmm,nnet,dnn}*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
for x in exp/{mono,tri,sgmm,nnet,dnn,chain/tdnn*}*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done

%WER 19.61 [ 14698 / 74942, 1233 ins, 3759 del, 9706 sub ] exp/tri2b_multi/decode_tgpr_5k_eval92/wer_13
%WER 13.93 [ 10437 / 74942, 732 ins, 2695 del, 7010 sub ] exp/tri3a_dnn/decode_tgpr_5k_eval92/wer_10
%WER 13.61 [ 10202 / 74942, 660 ins, 2987 del, 6555 sub ] exp/tri4a_dnn/decode_tgpr_5k_eval92/wer_11
# mono
%WER 37.42 [ 14223 / 38010, 1030 ins, 2613 del, 10580 sub ] exp/mono0a_multi/decode_tgpr_0166/wer_10
%WER 38.18 [ 28612 / 74942, 1919 ins, 5319 del, 21374 sub ] exp/mono0a_multi/decode_tgpr_eval92/wer_10

# tri2b
%WER 20.42 [ 7763 / 38010, 827 ins, 1905 del, 5031 sub ] exp/tri2b_multi/decode_tgpr_5k_0166/wer_12
%WER 19.61 [ 14728 / 74942, 1411 ins, 3548 del, 9769 sub ] exp/tri2b_multi/decode_tgpr_5k_eval92/wer_12

# tri3b
%WER 15.71 [ 5970 / 38010, 641 ins, 1403 del, 3926 sub ] exp/tri3b_multi/decode_tgpr_0166/wer_13
%WER 15.28 [ 11454 / 74942, 1082 ins, 2633 del, 7739 sub ] exp/tri3b_multi/decode_tgpr_eval92/wer_13

# chain
%WER 7.88 [ 2994 / 38010, 216 ins, 1045 del, 1733 sub ] exp/chain/tdnn1a_sp/decode_tgpr_5k_0166/wer_15
%WER 7.67 [ 5745 / 74942, 392 ins, 1758 del, 3595 sub ] exp/chain/tdnn1a_sp/decode_tgpr_5k_eval92/wer_13

for x in /mnt/matylda3/qmallidi/Karels_New-Parametric-ReLU/kaldi/egs/aurora4/s5_PReLU/exp/{mono,tri,sgmm,nnet,dnn}*/decode*; do [ -d $x ] && grep WER $x/wer_* | /mnt/matylda5/iveselyk/DEVEL/kaldi-official/egs/aurora4/s5/utils/best_wer.sh; done
10 changes: 10 additions & 0 deletions egs/aurora4/s5/conf/mfcc_hires.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# config for high-resolution MFCC features, intended for neural network training
# Note: we keep all cepstra, so it has the same info as filterbank features,
# but MFCC is more easily compressible (because less correlated) which is why
# we prefer this method.
--use-energy=false # use average of log energy, not energy.
--num-mel-bins=40 # similar to Google's setup.
--num-ceps=40 # there is no dimensionality reduction.
--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so
# there might be some information at the low end.
--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
1 change: 1 addition & 0 deletions egs/aurora4/s5/conf/online_cmvn.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh
137 changes: 137 additions & 0 deletions egs/aurora4/s5/local/chain/compare_wer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#!/bin/bash

# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
# For use with discriminatively trained systems you specify the epochs after a colon:
# for instance,
# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}


if [ $# == 0 ]; then
echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
echo "or (with epoch numbers for discriminative training):"
echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
exit 1
fi

echo "# $0 $*"

include_looped=false
if [ "$1" == "--looped" ]; then
include_looped=true
shift
fi
include_online=false
if [ "$1" == "--online" ]; then
include_online=true
shift
fi


used_epochs=false

# this function set_names is used to separate the epoch-related parts of the name
# [for discriminative training] and the regular parts of the name.
# If called with a colon-free directory name, like:
# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
# If called with something like:
# set_names exp/chain/tdnn_d_sp_smbr:3
# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"


set_names() {
if [ $# != 1 ]; then
echo "compare_wer_general.sh: internal error"
exit 1 # exit the program
fi
dirname=$(echo $1 | cut -d: -f1)
epoch=$(echo $1 | cut -s -d: -f2)
if [ -z $epoch ]; then
epoch_infix=""
else
used_epochs=true
epoch_infix=_epoch${epoch}
fi
}



echo -n "# System "
for x in $*; do printf "% 10s" " $(basename $x)"; done
echo

strings=(
"# WER eval92 (tgpr_5k) "
"# WER 0166 (tgpr_5k) ")

for n in 0 1; do
echo -n "${strings[$n]}"
for x in $*; do
set_names $x # sets $dirname and $epoch_infix
decode_names=(tgpr_5k_eval92 tgpr_5k_0166)

wer=$(cat $dirname/decode_${decode_names[$n]}/wer* | utils/best_wer.sh | awk '{print $2}')
printf "% 10s" $wer
done
echo
if $include_looped; then
echo -n "# [looped:] "
for x in $*; do
set_names $x # sets $dirname and $epoch_infix
wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer* | utils/best_wer.sh | awk '{print $2}')
printf "% 10s" $wer
done
echo
fi
if $include_online; then
echo -n "# [online:] "
for x in $*; do
set_names $x # sets $dirname and $epoch_infix
wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer* | utils/best_wer.sh | awk '{print $2}')
printf "% 10s" $wer
done
echo
fi
done


if $used_epochs; then
exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
fi


echo -n "# Final train prob "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final train prob (xent)"
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob (xent)"
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Num-params "
for x in $*; do
printf "% 10s" $(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
done
echo
1 change: 1 addition & 0 deletions egs/aurora4/s5/local/chain/run_tdnn.sh
Loading

0 comments on commit 743eb23

Please sign in to comment.