Adding/modifying calling-scripts for testing basis-fMLLR.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@1155 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8

Adding/modifying calling-scripts for testing basis-fMLLR.
e617f848 · Dan Povey · 42c91e81 · e617f848 · e617f848
Commit e617f848 authored Jul 29, 2012 by Dan Povey
--- a/egs/wsj/s5/local/run_basis_fmllr.sh
+++ b/egs/wsj/s5/local/run_basis_fmllr.sh
+#!/bin/bash
+
+. cmd.sh
+
+
+# Make "per-utterance" versions of the test sets where the speaker
+# information corresponds to utterances-- to demonstrate adaptation on
+# short utterances, particularly for basis fMLLR
+for x in test_eval92 test_eval93 test_dev93 ; do
+  y=${x}_utt
+  cp -r data/$x data/$y
+  rm -r data/$y/split* 2>/dev/null
+  cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
+  cp data/$y/utt2spk data/$y/spk2utt;
+  steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; 
+done
+
+# Make "per-utterance" versions of the test sets where the speaker
+# information corresponds to utterances-- to demonstrate adaptation on
+# short utterances, particularly for basis fMLLR
+for x in test_eval92 test_eval93 test_dev93 ; do
+  y=${x}_utt
+  cp -r data/$x data/$y
+  rm -r data/$y/split* 2>/dev/null
+  cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
+  cp data/$y/utt2spk data/$y/spk2utt;
+  steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; 
+done
+
+# Make "per-utterance" versions of the test sets where the speaker
+# information corresponds to utterances-- to demonstrate adaptation on
+# short utterances, particularly for basis fMLLR
+for x in test_eval92 test_eval93 test_dev93 ; do
+  y=${x}_utt
+  cp -r data/$x data/$y
+  rm -r data/$y/split* 2>/dev/null
+  cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
+  cp data/$y/utt2spk data/$y/spk2utt;
+  steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; 
+done
+
+
+ # basis fMLLR experiments.
+ # First a baseline: decode per-utterance with normal fMLLR.
+steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_dev93_utt exp/tri3b/decode_tgpr_dev93_utt || exit 1;
+steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_eval92_utt exp/tri3b/decode_tgpr_eval92_utt || exit 1;
+
+ # get the fMLLR basis.
+steps/get_fmllr_basis.sh --cmd "$train_cmd" data/train_si84 data/lang exp/tri3b
+
+ # decoding tri3b with basis fMLLR
+steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_dev93 exp/tri3b/decode_tgpr_dev93_basis || exit 1;
+steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_eval92 exp/tri3b/decode_tgpr_eval92_basis || exit 1;
+
+  # The same, per-utterance.
+steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_dev93_utt exp/tri3b/decode_tgpr_dev93_basis_utt || exit 1;
+steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_eval92_utt exp/tri3b/decode_tgpr_eval92_basis_utt || exit 1;
+
--- a/egs/wsj/s5/run.sh
+++ b/egs/wsj/s5/run.sh
@@ -65,6 +65,7 @@ for x in test_eval92 test_eval93 test_dev93 train_si284; do
 steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
 done

+
 utils/subset_data_dir.sh --first data/train_si284 7138 data/train_si84 || exit 1

 # Now make subset with the shortest 2k utterances from si-84.
@@ -194,11 +195,11 @@ steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
 steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
  exp/tri3b/graph_tgpr data/test_eval92 exp/tri3b/decode_tgpr_eval92 || exit 1;

-# decoding train3b with basis fMLLR
-steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \
-  exp/tri3b/graph_tgpr data/train_si84 data/test_dev93 exp/tri3b/decode_tgpr_dev93 || exit 1;
-steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \
-  exp/tri3b/graph_tgpr data/train_si84 data/test_eval92 exp/tri3b/decode_tgpr_eval92 || exit 1;
+# At this point you could run the command below; this gets
+# results that demonstrate the basis-fMLLR adaptation (adaptation
+# on small amounts of adaptation data).
+# local/run_basis_fmllr.sh
+

 # steps/decode_fmllr_thresh.sh --nj 10 --cmd "$decode_cmd" \
 #   exp/tri3b/graph_tgpr data/test_dev93 exp/tri3b/decode_tgpr_dev93_thresh || exit 1;