Skip to content
Snippets Groups Projects
Commit 34128c51 authored by Hossein Hadian's avatar Hossein Hadian Committed by Daniel Povey
Browse files

[scripts,egs] sMBR on LFR xent system with shifted feats (#1477)

parent 5bad9b2d
Branches
No related tags found
No related merge requests found
tuning/run_tdnn_lfr1c_disc.sh
\ No newline at end of file
#!/bin/bash
# This script does discriminative training on top of the CE nnet3 LFR system
# from run_tdnn_lfr1c. To simplify things, this assumes you are using the
# "speed-perturbed" data
# (--speed_perturb true, which is the default) in the baseline run_tdnn_d.sh script.
#
# note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
# since the lattice generation runs in about real-time, so takes of the order of
# 1000 hours of CPU time.
# Comparing effect of shift:
# System tdnn_lfr1c_sp_smbr:1 tdnn_lfr1c_sp_smbr:2 tdnn_lfr1c_sp_smbr:3 tdnn_lfr1c_sp_fs_smbr:1 tdnn_lfr1c_sp_fs_smbr:2 tdnn_lfr1c_sp_fs_smbr:3
# WER on train_dev(tg) 16.26 16.11 16.02 16.02 15.77 15.78
# WER on train_dev(fg) 15.01 14.91 14.80 14.79 14.58 14.50
# WER on eval2000(tg) 18.9 18.7 18.6 18.6 18.5 18.5
# WER on eval2000(fg) 17.4 17.2 17.1 17.1 17.0 16.9
set -e
set -uo pipefail
stage=0
train_stage=-10 # can be used to start training in the middle.
get_egs_stage=0
use_gpu=true # for training
cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like
# alignments and degs).
degs_dir= # set this to use preexisting degs.
nj=65 # have a high number of jobs because this could take a while, and we might
# have some stragglers.
## Objective options
criterion=smbr
one_silence_class=true
# you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b"
# originally ran with no affix, with effective_learning_rate=0.0000125;
# reran by mistake with no affix with effective_learning_rate=0.000005 [was a bit
# better, see NOTES, but still best after 1st epoch].
# reran again with affix=slow and effective_learning_rate=0.0000025
# reran again with affix=slow2 and effective_learning_rate=0.00000125 (this was
# about the best).
# before checking in the script, removed the slow2 affix but left with
# the lowest learning rate.
disc_affix=
## Egs options. Give quite a few choices of chunk length,
## so it can split utterances without much gap or overlap.
frames_per_eg=300,280,150,120,100
frames_overlap_per_eg=0
frames_per_chunk_decoding=200
## these context options should match the training condition. (chunk_left_context,
## chunk_right_context)
## We set --extra-left-context-initial 0 and --extra-right-context-final 0
## directly in the script below, but this should also match the training condition.
## Note: extra-left-context and extra-right-context are 0 because this is a TDNN,
## it's not a recurrent model like an LSTM or BLSTM.
extra_left_context=0
extra_right_context=0
## Nnet training options
effective_learning_rate=0.00000125
max_param_change=1
num_jobs_nnet=4
num_epochs=3
regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options,
# in chain models.
minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up);
# if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up).
shift_feats=false
## Decode options
decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more.
. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh
srcdir=exp/nnet3/tdnn_lfr1c_sp
graph_dir=$srcdir/graph_sw1_tg
train_data_dir=data/train_nodup_sp_hires
online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp
dir=${srcdir}_${criterion}${disc_affix}
if $use_gpu; then
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
EOF
fi
num_threads=1
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
num_threads=16
fi
if [ ! -f ${srcdir}/final.mdl ]; then
echo "$0: expected ${srcdir}/final.mdl to exist"
exit 1;
fi
frame_subsampling_factor=1
if [ -f $srcdir/frame_subsampling_factor ]; then
frame_subsampling_factor=$(cat $srcdir/frame_subsampling_factor)
fi
affix= # Will be set if doing input frame shift
if [[ "$shift_feats" = true && $frame_subsampling_factor -ne 1 ]]; then
if [ $stage -le 0 ]; then
utils/data/shift_and_combine_feats.sh --write-utt2orig $dir/utt2orig \
$frame_subsampling_factor $train_data_dir ${train_data_dir}_fs
steps/online/nnet2/copy_ivector_dir.sh --utt2orig $dir/utt2orig \
$online_ivector_dir ${online_ivector_dir}_fs
rm $dir/utt2orig
fi
online_ivector_dir=${online_ivector_dir}_fs
train_data_dir=${train_data_dir}_fs
affix=_fs
fi
if [ $stage -le 1 ]; then
# hardcode no-GPU for alignment, although you could use GPU [you wouldn't
# get excellent GPU utilization though.]
steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \
--scale-opts '--transition-scale=1.0 --acoustic-scale=0.333 --self-loop-scale=0.333' \
--frames-per-chunk $frames_per_chunk_decoding \
--extra-left-context $extra_left_context --extra-right-context $extra_right_context \
--extra-left-context-initial 0 --extra-right-context-final 0 \
--online-ivector-dir $online_ivector_dir \
--nj $nj $train_data_dir data/lang $srcdir ${srcdir}_ali${affix} ;
fi
if [ -z "$degs_dir" ]; then
if [ $stage -le 2 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then
utils/create_split_dir.pl \
/export/b{09,10,11,12}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage
fi
if [ -d ${srcdir}_degs/storage ]; then max_copy_jobs=10; else max_copy_jobs=5; fi
steps/nnet3/get_degs.sh \
--cmd "$decode_cmd --mem 10G" --num-threads 3 \
--self-loop-scale 0.333 --acwt 0.333 \
--max-copy-jobs $max_copy_jobs \
--extra-left-context $extra_left_context \
--extra-right-context $extra_right_context \
--extra-left-context-initial 0 --extra-right-context-final 0 \
--frames-per-chunk-decoding "$frames_per_chunk_decoding" \
--stage $get_egs_stage \
--online-ivector-dir $online_ivector_dir \
--frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \
$train_data_dir data/lang ${srcdir} ${srcdir}_ali${affix} ${srcdir}_degs${affix} || exit 1
fi
fi
if [ $stage -le 3 ]; then
[ -z "$degs_dir" ] && degs_dir=${srcdir}_degs${affix}
steps/nnet3/train_discriminative.sh --cmd "$decode_cmd" \
--stage $train_stage \
--acoustic-scale 0.333 \
--effective-lrate $effective_learning_rate --max-param-change $max_param_change \
--criterion $criterion --drop-frames true \
--num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size "$minibatch_size" \
--num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \
--regularization-opts "$regularization_opts" \
${degs_dir} $dir
fi
if [ $stage -le 4 ]; then
for x in `seq $decode_start_epoch $num_epochs`; do
for decode_set in train_dev eval2000; do
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
for iter in epoch$x epoch${x}_adj; do
(
steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \
--acwt 0.333 --post-decode-acwt 3.0 \
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_sw1_tg_${iter} || exit 1;
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \
$dir/decode_${decode_set}_sw1_{tg,fsh_fg}_${iter} || exit 1;
) &
done
done
done
fi
wait;
if [ $stage -le 5 ] && $cleanup; then
# if you run with "--cleanup true --stage 6" you can clean up.
# actually, keep the alignments in case we need them later.. they're slow to
# create, and quite big.
# rm ${srcdir}_ali/ali.*.gz || true
steps/nnet2/remove_egs.sh ${srcdir}_degs || true
fi
wait;
exit 0;
#!/bin/bash
# Copyright 2017 Johns Hopkins University (author: Hossein Hadian)
# Apache 2.0
# This script copies the necessary parts of an online ivector directory
# optionally applying a mapping to the ivector_online.scp file
utt2orig=
. utils/parse_options.sh
if [ $# != 2 ]; then
echo "Usage: "
echo " $0 [options] <srcdir> <destdir>"
echo "e.g.:"
echo " $0 exp/nnet3/online_ivector_train exp/nnet3/online_ivector_train_fs"
echo "Options"
echo " --utt2orig=<file> # utterance id mapping to use"
exit 1;
fi
srcdir=$1
destdir=$2
if [ ! -f $srcdir/ivector_period ]; then
echo "$0: no such file $srcdir/ivector_period"
exit 1;
fi
if [ "$destdir" == "$srcdir" ]; then
echo "$0: this script requires <srcdir> and <destdir> to be different."
exit 1
fi
set -e;
mkdir -p $destdir
cp -r $srcdir/{conf,ivector_period} $destdir
if [ -z $utt2orig ]; then
cp $srcdir/ivector_online.scp $destdir
else
utils/apply_map.pl -f 2 $srcdir/ivector_online.scp < $utt2orig > $destdir/ivector_online.scp
fi
cp $srcdir/final.ie.id $destdir
echo "$0: Copied necessary parts of online ivector directory $srcdir to $destdir"
......@@ -4,6 +4,11 @@
# Apache 2.0
write_utt2orig= # if provided, this script will write
# a mapping of shifted utterance ids
# to the original ones into the file
# specified by this option
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. utils/parse_options.sh
......@@ -34,11 +39,18 @@ if [ -f $destdir/feats.scp ]; then
exit 1
fi
if [ ! -z $write_utt2orig ]; then
awk '{print $1 " " $1}' $srcdir/feats.scp >$write_utt2orig
fi
tmp_shift_destdirs=()
for frame_shift in `seq $[-(frame_subsampling_factor/2)] $[-(frame_subsampling_factor/2) + frame_subsampling_factor - 1]`; do
if [ "$frame_shift" == 0 ]; then continue; fi
utils/data/shift_feats.sh $frame_shift $srcdir ${destdir}_fs$frame_shift || exit 1
tmp_shift_destdirs+=("${destdir}_fs$frame_shift")
if [ ! -z $write_utt2orig ]; then
awk -v prefix="fs$frame_shift-" '{printf("%s%s %s\n", prefix, $1, $1);}' $srcdir/feats.scp >>$write_utt2orig
fi
done
utils/data/combine_data.sh $destdir $srcdir ${tmp_shift_destdirs[@]} || exit 1
rm -r ${tmp_shift_destdirs[@]}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment