-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,8 @@ | |
# 2016 Vimal Manohar | ||
# Apache 2.0 | ||
|
||
# This script is similar to steps/cleanup/segment_long_utterances.sh, but | ||
|
||
# This script is similar to steps/cleanup/segment_long_utterances.sh, but | ||
# uses nnet3 acoustic model instead of GMM acoustic model for decoding. | ||
# This script performs segmentation of the input data based on the transcription | ||
# and outputs segmented data along with the corresponding aligned transcription. | ||
|
@@ -13,7 +14,7 @@ | |
# are of manageable length for further processing, along with the portion of the | ||
# transcript that seems to match (aligns with) each segment. | ||
# This the light-supervised training scenario where the input transcription is | ||
# not expected to be completely clean and may have significant errors. | ||
# not expected to be completely clean and may have significant errors. | ||
# See "JHU Kaldi System for Arabic MGB-3 ASR Challenge using Diarization, | ||
# Audio-transcript Alignment and Transfer Learning": Vimal Manohar, Daniel | ||
# Povey, Sanjeev Khudanpur, ASRU 2017 | ||
|
@@ -39,24 +40,22 @@ seconds_per_spk_max=30 | |
|
||
# Decode options | ||
graph_opts= | ||
scale_opts= # for making the graphs | ||
beam=15.0 | ||
lattice_beam=1.0 | ||
lmwt=10 | ||
|
||
acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. | ||
post_decode_acwt=1.0 # can be used in 'chain' systems to scale acoustics by 10 so the | ||
# regular scoring script works. | ||
|
||
# Contexts must ideally match training | ||
extra_left_context=0 # Set to some large value, typically 40 for LSTM (must match training) | ||
extra_right_context=0 | ||
extra_right_context=0 | ||
extra_left_context_initial=-1 | ||
extra_right_context_final=-1 | ||
frames_per_chunk=150 | ||
|
||
# i-vector options | ||
extractor= # i-Vector extractor. If provided, will extract i-vectors. | ||
# Required if the network was trained with i-vector extractor. | ||
extractor= # i-Vector extractor. If provided, will extract i-vectors. | ||
# Required if the network was trained with i-vector extractor. | ||
use_vad=false # Use energy-based VAD for i-vector extraction | ||
|
||
# TF-IDF similarity search options | ||
|
@@ -116,12 +115,12 @@ it and eliminate data where the transcript doesn't seem to match. | |
--segmentation-extra-opts 'opts' # Additional options to segment_ctm_edits_mild.py. | ||
# Please run steps/cleanup/internal/segment_ctm_edits_mild.py | ||
# without arguments to see allowed options. | ||
--align-full-hyp <true|false> # If true, align full hypothesis | ||
i.e. trackback from the end to get the alignment. | ||
This is different from the normal | ||
--align-full-hyp <true|false> # If true, align full hypothesis | ||
i.e. trackback from the end to get the alignment. | ||
This is different from the normal | ||
Smith-Waterman alignment, where the | ||
traceback will be from the maximum score. | ||
--extractor <extractor> # i-vector extractor directory if i-vector is | ||
--extractor <extractor> # i-vector extractor directory if i-vector is | ||
# to be used during decoding. Must match | ||
# the extractor used for training neural-network. | ||
--use-vad <true|false> # If true, uses energy-based VAD to apply frame weights | ||
|
@@ -168,6 +167,23 @@ cp $srcdir/cmvn_opts $dir | |
cp $srcdir/{splice_opts,delta_opts,final.mat,final.alimdl} $dir 2>/dev/null || true | ||
cp $srcdir/frame_subsampling_factor $dir 2>/dev/null || true | ||
|
||
if [ -f $srcdir/frame_subsampling_factor ]; then | ||
echo "$0: guessing that this is a chain system, checking parameters." | ||
if [ -z $scale_opts ]; then | ||
echo "$0: setting scale_opts" | ||
scale_opts="--self-loop-scale=1.0 --transition-scale=1.0" | ||
fi | ||
if [ $acwt == 0.1 ]; then | ||
echo "$0: setting acwt=1.0" | ||
acwt=1.0 | ||
fi | ||
if [ $lmwt == 10 ]; then | ||
echo "$0: setting lmwt=1.0" | ||
lmwt=1 | ||
fi | ||
fi | ||
|
||
|
||
utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
danpovey
via email
Author
Contributor
|
||
cp $lang/phones.txt $dir | ||
|
||
|
@@ -221,6 +237,7 @@ if [ $stage -le 3 ]; then | |
|
||
# Make graphs w.r.t. to the original text (usually recording-level) | ||
steps/cleanup/make_biased_lm_graphs.sh $graph_opts \ | ||
--scale-opts "$scale_opts" \ | ||
--nj $nj --cmd "$cmd" $text \ | ||
$lang $dir $dir/graphs | ||
if [ -z "$utt2text" ]; then | ||
|
@@ -267,7 +284,7 @@ if [ $stage -le 5 ]; then | |
echo "$0: Decoding with biased language models..." | ||
|
||
steps/cleanup/decode_segmentation_nnet3.sh \ | ||
--acwt $acwt --post-decode-acwt $post_decode_acwt \ | ||
--acwt $acwt \ | ||
--beam $beam --lattice-beam $lattice_beam --nj $nj --cmd "$cmd --mem 4G" \ | ||
--skip-scoring true --allow-partial false \ | ||
--extra-left-context $extra_left_context \ | ||
|
Question) I have been using this script with --lmwt 12 and --post-decode-acwt 10.
Then I have to set lmwt by 1.2 right?
Is this for simple options?