Skip to content
Snippets Groups Projects
Commit f8cb7d99 authored by Dan Povey's avatar Dan Povey
Browse files

trunk: various minor script extensions (supporting max-count option in iVector extraction script)

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4870 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent a2893ea0
Branches
No related tags found
No related merge requests found
......@@ -34,6 +34,13 @@ posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for
# with the neural nets trained with these iVectors.
compress=true # If true, compress the iVectors stored on disk (it's lossy
# compression, as used for feature matrices).
max_count=0 # The use of this option (e.g. --max-count 100) can make
# iVectors more consistent for different lengths of
# utterance, by scaling up the prior term when the
# data-count exceeds this value. The data-count is after
# posterior-scaling, so assuming the posterior-scale is 0.1,
# --max-count 100 starts having effect after 1000 frames, or
# 10 seconds of data.
# End configuration section.
......@@ -95,6 +102,7 @@ echo "--num-gselect=$num_gselect" >>$ieconf
echo "--min-post=$min_post" >>$ieconf
echo "--posterior-scale=$posterior_scale" >>$ieconf
echo "--max-remembered-frames=1000" >>$ieconf # the default
echo "--max-count=$max_count" >>$ieconf
......
......@@ -23,6 +23,12 @@ posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for
min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
# caution: you should use the same value in the online-estimation
# code.
max_count=100 # This max-count of 100 can make iVectors more consistent for
# different lengths of utterance, by scaling up the prior term
# when the data-count exceeds this value. The data-count is
# after posterior-scaling, so assuming the posterior-scale is
# 0.1, --max-count 100 starts having effect after 1000 frames,
# or 10 seconds of data.
iter=final
# End configuration.
......@@ -138,6 +144,7 @@ if [ ! -z "$iedir" ]; then
echo "--min-post=$min_post" >>$ieconf
echo "--posterior-scale=$posterior_scale" >>$ieconf # this is currently the default in the scripts.
echo "--max-remembered-frames=1000" >>$ieconf # the default
echo "--max-count=$max_count" >>$ieconf
fi
if $add_pitch; then
......
......@@ -49,6 +49,12 @@ rm $data/feats.scp 2>/dev/null
# use "name" as part of name of the archive.
name=`basename $data`
for j in $(seq $nj); do
# the next command does nothing unless $mfccdir/storage/ exists, see
# utils/create_data_link.pl for more info.
utils/create_data_link.pl $ark_dir/pasted_$name.$j.ark
done
$cmd JOB=1:$nj $logdir/append.JOB.log \
select-feats "$selector" scp:$data_in/split$nj/JOB/feats.scp ark:- \| \
copy-feats --compress=$compress ark:- \
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment