[egs,scripts] Fix bugs in Dihard 2018 (#2897)

kaldi-asr · Dec 3, 2018 · ff514e3 · ff514e3
1 parent 40a9dc6
commit ff514e3
Show file tree

Hide file tree

Showing 6 changed files with 15 additions and 15 deletions.
diff --git a/egs/dihard_2018/README.txt b/egs/dihard_2018/README.txt
@@ -1,10 +1,10 @@
 
  This is a Kaldi recipe for The First DIHARD Speech Diarization Challenge.  
- DIHARD is a new annual challenge focusing on “hard” diarization; that is,
+ DIHARD is a new annual challenge focusing on "hard" diarization; that is,
  speech diarization for challenging corpora where there is an expectation that
  the current state-of-the-art will fare poorly, including, but not limited
  to: clinical interviews, extended child language acquisition recordings,
- YouTube videos and “speech in the wild” (e.g., recordings in restaurants)
+ YouTube videos and "speech in the wild" (e.g., recordings in restaurants)
  See https://coml.lscp.ens.fr/dihard/index.html for details.
 
  The subdirectories "v1" and so on are different speaker diarization

diff --git a/egs/dihard_2018/v1/path.sh b/egs/dihard_2018/v1/path.sh
@@ -1,5 +1,5 @@
 export KALDI_ROOT=`pwd`/../../..
-export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
diff --git a/egs/dihard_2018/v2/path.sh b/egs/dihard_2018/v2/path.sh
@@ -1,5 +1,5 @@
 export KALDI_ROOT=`pwd`/../../..
-export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
diff --git a/egs/dihard_2018/v2/run.sh b/egs/dihard_2018/v2/run.sh
@@ -98,7 +98,7 @@ if [ $stage -le 2 ]; then
 
   # Make a reverberated version of the training data.  Note that we don't add any
   # additive noise here.
-  python steps/data/reverberate_data_dir.py \
+  steps/data/reverberate_data_dir.py \
     "${rvb_opts[@]}" \
     --speech-rvb-probability 1 \
     --pointsource-noise-addition-probability 0 \
@@ -123,11 +123,11 @@ if [ $stage -le 2 ]; then
   done
 
   # Augment with musan_noise
-  python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train data/train_noise
+  steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train data/train_noise
   # Augment with musan_music
-  python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train data/train_music
+  steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train data/train_music
   # Augment with musan_speech
-  python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train data/train_babble
+  steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train data/train_babble
 
   # Combine reverb, noise, music, and babble into one directory.
   utils/combine_data.sh data/train_aug data/train_reverb data/train_noise data/train_music data/train_babble
@@ -185,7 +185,7 @@ if [ $stage -le 5 ]; then
 fi
 
 # Stages 6 through 8 are handled in run_xvector.sh, a TDNN embedding extractor is trained.
-local/nnet3/xvector/run_xvector.sh --stage $stage \
+local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage -1 \
   --data data/train_combined_no_sil --nnet-dir $nnet_dir \
   --egs-dir $nnet_dir/egs
 

diff --git a/egs/wsj/s5/steps/data/augment_data_dir.py b/egs/wsj/s5/steps/data/augment_data_dir.py
@@ -103,8 +103,8 @@ def AugmentWav(utt, wav, dur, fg_snr_opts, bg_snr_opts, fg_noise_utts, \
             tot_noise_dur += noise_dur + interval
             noises.append(noise)
 
-    start_times_str = "--start-times='" + ",".join(map(str,start_times)) + "'"
-    snrs_str = "--snrs='" + ",".join(map(str,snrs)) + "'"
+    start_times_str = "--start-times='" + ",".join(list(map(str,start_times))) + "'"
+    snrs_str = "--snrs='" + ",".join(list(map(str,snrs))) + "'"
     noises_str = "--additive-signals='" + ",".join(noises).strip() + "'"
 
     # If the wav is just a file
@@ -130,11 +130,11 @@ def CopyFileIfExists(utt_suffix, filename, input_dir, output_dir):
 
 def main():
     args = GetArgs()
-    fg_snrs = map(int, args.fg_snr_str.split(":"))
-    bg_snrs = map(int, args.bg_snr_str.split(":"))
+    fg_snrs = list(map(int, args.fg_snr_str.split(":")))
+    bg_snrs = list(map(int, args.bg_snr_str.split(":")))
     input_dir = args.input_dir
     output_dir = args.output_dir
-    num_bg_noises = map(int, args.num_bg_noises.split(":"))
+    num_bg_noises = list(map(int, args.num_bg_noises.split(":")))
     reco2dur = ParseFileToDict(input_dir + "/reco2dur",
         value_processor = lambda x: float(x[0]))
     wav_scp_file = open(input_dir + "/wav.scp", 'r').readlines()

diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py
@@ -321,7 +321,7 @@ def train(args, run_opts):
     num_archives_expanded = num_archives * args.frames_per_eg
     num_archives_to_process = int(args.num_epochs * num_archives_expanded)
     num_archives_processed = 0
-    num_iters = ((num_archives_to_process * 2)
+    num_iters = int((num_archives_to_process * 2)
                  / (args.num_jobs_initial + args.num_jobs_final))
 
     # If do_final_combination is True, compute the set of models_to_combine.