-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[egs] Added tri3b and chain training for Aurora4 (#3638)
- Loading branch information
Showing
8 changed files
with
764 additions
and
127 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,19 @@ | ||
for x in exp/{mono,tri,sgmm,nnet,dnn}*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done | ||
for x in exp/{mono,tri,sgmm,nnet,dnn,chain/tdnn*}*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done | ||
|
||
%WER 19.61 [ 14698 / 74942, 1233 ins, 3759 del, 9706 sub ] exp/tri2b_multi/decode_tgpr_5k_eval92/wer_13 | ||
%WER 13.93 [ 10437 / 74942, 732 ins, 2695 del, 7010 sub ] exp/tri3a_dnn/decode_tgpr_5k_eval92/wer_10 | ||
%WER 13.61 [ 10202 / 74942, 660 ins, 2987 del, 6555 sub ] exp/tri4a_dnn/decode_tgpr_5k_eval92/wer_11 | ||
# mono | ||
%WER 37.42 [ 14223 / 38010, 1030 ins, 2613 del, 10580 sub ] exp/mono0a_multi/decode_tgpr_0166/wer_10 | ||
%WER 38.18 [ 28612 / 74942, 1919 ins, 5319 del, 21374 sub ] exp/mono0a_multi/decode_tgpr_eval92/wer_10 | ||
|
||
# tri2b | ||
%WER 20.42 [ 7763 / 38010, 827 ins, 1905 del, 5031 sub ] exp/tri2b_multi/decode_tgpr_5k_0166/wer_12 | ||
%WER 19.61 [ 14728 / 74942, 1411 ins, 3548 del, 9769 sub ] exp/tri2b_multi/decode_tgpr_5k_eval92/wer_12 | ||
|
||
# tri3b | ||
%WER 15.71 [ 5970 / 38010, 641 ins, 1403 del, 3926 sub ] exp/tri3b_multi/decode_tgpr_0166/wer_13 | ||
%WER 15.28 [ 11454 / 74942, 1082 ins, 2633 del, 7739 sub ] exp/tri3b_multi/decode_tgpr_eval92/wer_13 | ||
|
||
# chain | ||
%WER 7.88 [ 2994 / 38010, 216 ins, 1045 del, 1733 sub ] exp/chain/tdnn1a_sp/decode_tgpr_5k_0166/wer_15 | ||
%WER 7.67 [ 5745 / 74942, 392 ins, 1758 del, 3595 sub ] exp/chain/tdnn1a_sp/decode_tgpr_5k_eval92/wer_13 | ||
|
||
for x in /mnt/matylda3/qmallidi/Karels_New-Parametric-ReLU/kaldi/egs/aurora4/s5_PReLU/exp/{mono,tri,sgmm,nnet,dnn}*/decode*; do [ -d $x ] && grep WER $x/wer_* | /mnt/matylda5/iveselyk/DEVEL/kaldi-official/egs/aurora4/s5/utils/best_wer.sh; done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# config for high-resolution MFCC features, intended for neural network training | ||
# Note: we keep all cepstra, so it has the same info as filterbank features, | ||
# but MFCC is more easily compressible (because less correlated) which is why | ||
# we prefer this method. | ||
--use-energy=false # use average of log energy, not energy. | ||
--num-mel-bins=40 # similar to Google's setup. | ||
--num-ceps=40 # there is no dimensionality reduction. | ||
--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so | ||
# there might be some information at the low end. | ||
--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
#!/bin/bash | ||
|
||
# this script is used for comparing decoding results between systems. | ||
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp | ||
# For use with discriminatively trained systems you specify the epochs after a colon: | ||
# for instance, | ||
# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3} | ||
|
||
|
||
if [ $# == 0 ]; then | ||
echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]" | ||
echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" | ||
echo "or (with epoch numbers for discriminative training):" | ||
echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" | ||
exit 1 | ||
fi | ||
|
||
echo "# $0 $*" | ||
|
||
include_looped=false | ||
if [ "$1" == "--looped" ]; then | ||
include_looped=true | ||
shift | ||
fi | ||
include_online=false | ||
if [ "$1" == "--online" ]; then | ||
include_online=true | ||
shift | ||
fi | ||
|
||
|
||
used_epochs=false | ||
|
||
# this function set_names is used to separate the epoch-related parts of the name | ||
# [for discriminative training] and the regular parts of the name. | ||
# If called with a colon-free directory name, like: | ||
# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr | ||
# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" | ||
# If called with something like: | ||
# set_names exp/chain/tdnn_d_sp_smbr:3 | ||
# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" | ||
|
||
|
||
set_names() { | ||
if [ $# != 1 ]; then | ||
echo "compare_wer_general.sh: internal error" | ||
exit 1 # exit the program | ||
fi | ||
dirname=$(echo $1 | cut -d: -f1) | ||
epoch=$(echo $1 | cut -s -d: -f2) | ||
if [ -z $epoch ]; then | ||
epoch_infix="" | ||
else | ||
used_epochs=true | ||
epoch_infix=_epoch${epoch} | ||
fi | ||
} | ||
|
||
|
||
|
||
echo -n "# System " | ||
for x in $*; do printf "% 10s" " $(basename $x)"; done | ||
echo | ||
|
||
strings=( | ||
"# WER eval92 (tgpr_5k) " | ||
"# WER 0166 (tgpr_5k) ") | ||
|
||
for n in 0 1; do | ||
echo -n "${strings[$n]}" | ||
for x in $*; do | ||
set_names $x # sets $dirname and $epoch_infix | ||
decode_names=(tgpr_5k_eval92 tgpr_5k_0166) | ||
|
||
wer=$(cat $dirname/decode_${decode_names[$n]}/wer* | utils/best_wer.sh | awk '{print $2}') | ||
printf "% 10s" $wer | ||
done | ||
echo | ||
if $include_looped; then | ||
echo -n "# [looped:] " | ||
for x in $*; do | ||
set_names $x # sets $dirname and $epoch_infix | ||
wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer* | utils/best_wer.sh | awk '{print $2}') | ||
printf "% 10s" $wer | ||
done | ||
echo | ||
fi | ||
if $include_online; then | ||
echo -n "# [online:] " | ||
for x in $*; do | ||
set_names $x # sets $dirname and $epoch_infix | ||
wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer* | utils/best_wer.sh | awk '{print $2}') | ||
printf "% 10s" $wer | ||
done | ||
echo | ||
fi | ||
done | ||
|
||
|
||
if $used_epochs; then | ||
exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. | ||
fi | ||
|
||
|
||
echo -n "# Final train prob " | ||
for x in $*; do | ||
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') | ||
printf "% 10s" $prob | ||
done | ||
echo | ||
|
||
echo -n "# Final valid prob " | ||
for x in $*; do | ||
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') | ||
printf "% 10s" $prob | ||
done | ||
echo | ||
|
||
echo -n "# Final train prob (xent)" | ||
for x in $*; do | ||
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') | ||
printf "% 10s" $prob | ||
done | ||
echo | ||
|
||
echo -n "# Final valid prob (xent)" | ||
for x in $*; do | ||
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') | ||
printf "% 10s" $prob | ||
done | ||
echo | ||
|
||
echo -n "# Num-params " | ||
for x in $*; do | ||
printf "% 10s" $(grep num-parameters $x/log/progress.1.log | awk '{print $2}') | ||
done | ||
echo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
tuning/run_tdnn_1a.sh |
Oops, something went wrong.