Skip to content
Snippets Groups Projects
Commit 6275a3a8 authored by David Snyder's avatar David Snyder
Browse files

trunk: adding multithreaded and scoring options to...

trunk: adding multithreaded and scoring options to steps/decode_basis_fmllr.sh. Also adding the thread_str suffix to an invocation of lattice-determinize-pruned in steps/decode_fmllr.sh

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4481 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 5f470f15
No related branches found
No related tags found
No related merge requests found
......@@ -2,6 +2,7 @@
# Copyright 2012 Carnegie Mellon University (Author: Yajie Miao)
# Johns Hopkins University (Author: Daniel Povey)
# 2014 David Snyder
# Decoding script that does basis fMLLR. This can be on top of delta+delta-delta,
# or LDA+MLLT features.
......@@ -47,6 +48,10 @@ nj=4
silence_weight=0.01
cmd=run.pl
si_dir=
num_threads=1 # if >1, will use gmm-latgen-faster-parallel
parallel_opts= # If you supply num-threads, you should supply this too.
skip_scoring=false
scoring_opts=
# End configuration section
echo "$0 $@" # Print the command line for logging
......@@ -68,7 +73,9 @@ if [ $# != 3 ]; then
echo " --si-dir <speaker-indep-decoding-dir> # use this to skip 1st pass of decoding"
echo " # Caution-- must be with same tree"
echo " --acwt <acoustic-weight> # default 0.08333 ... used to get posteriors"
echo " --scoring-opts <string> # options to local/score.sh"
echo " --num-threads <n> # number of threads to use, default 1."
echo " --parallel-opts <opts> # e.g. '-pe smp 4' if you supply --num-threads 4"
exit 1;
fi
......@@ -80,6 +87,9 @@ dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
sdata=$data/split$nj;
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
......@@ -106,7 +116,11 @@ fi
if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
if [ $stage -le 0 ]; then
steps/decode.sh --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam --model $alignment_model --max-active $first_max_active $graphdir $data $si_dir || exit 1;
steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \
--num-threads $num_threads --skip-scoring $skip_scoring \
--acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \
--model $alignment_model --max-active \
$first_max_active $graphdir $data $si_dir || exit 1;
fi
fi
##
......@@ -156,7 +170,7 @@ pass1feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/p
if [ $stage -le 2 ]; then
echo "$0: doing main lattice generation phase"
$cmd JOB=1:$nj $dir/log/decode.JOB.log \
gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt \
--determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat.tmp.JOB.gz" \
......@@ -170,7 +184,7 @@ fi
if [ $stage -le 3 ]; then
echo "$0: estimating fMLLR transforms a second time."
$cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=4.0 \
"ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
......@@ -194,13 +208,15 @@ if [ $stage -le 4 ]; then
echo "$0: doing a final pass of acoustic rescoring."
$cmd JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
fi
[ ! -x local/score.sh ] && \
echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $data $graphdir $dir
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $data $graphdir $dir
fi
rm $dir/{trans_tmp,pre_trans}.*
......
......@@ -175,7 +175,7 @@ fi
if [ $stage -le 3 ]; then
echo "$0: estimating fMLLR transforms a second time."
$cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=4.0 \
"ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment