Skip to content
Snippets Groups Projects
Commit 467cb2e6 authored by Karel Vesely's avatar Karel Vesely
Browse files

trunk: minor changes,

* nnet-forward.cc - disable GPU memory caching
* cmvn-to-nnet.cc - add variance flooring
* paste-feats.cc - cosmetic change option description
* train_nnet.sh - import cmvn settings from pre-training
* pretrain_dbn.sh - allow generating input transform from a prototype



git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3449 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 8f5a2c4b
No related branches found
No related tags found
No related merge requests found
......@@ -30,6 +30,8 @@
# nnet config
nn_depth=6 #number of hidden layers
hid_dim=2048 #number of units per layer
param_stddev_first=0.1 #init parameters in 1st RBM
param_stddev=0.1 #init parameters in other RBMs
# number of iterations
rbm_iter=1 #number of pre-training epochs (Gaussian-Bernoulli RBM has 2x more)
rbm_drop_data=0.0 #sample the training set, 1.0 drops all the data, 0.0 keeps all
......@@ -42,6 +44,7 @@ rbm_extra_opts=
copy_feats=true # resave the features randomized consecutively to tmpdir
# feature config
feature_transform= # Optionally reuse feature processing front-end (override splice,etc.)
feature_transform_proto= # Optionally pass prototype of feature transform
delta_order= # Optionally use deltas on the input features
apply_cmvn=false # Optionally do CMVN of the input features
norm_vars=false # When apply_cmvn=true, this enables CVN
......@@ -161,10 +164,16 @@ if [ ! -z "$feature_transform" ]; then
echo Using already prepared feature_transform: $feature_transform
cp $feature_transform $dir/final.feature_transform
else
# Generate the splice transform
echo "Using splice +/- $splice , step $splice_step"
feature_transform=$dir/tr_splice$splice-$splice_step.nnet
utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice --splice-step=$splice_step > $feature_transform
if [ ! -z "$feature_transform_proto" ]; then
feature_transform=$dir/tr_$(basename $feature_transform_proto)
log=$dir/log/feature-transform-initialize.log
nnet-initialize --binary=false $feature_transform_proto $feature_transform 2>$log || { cat $log; exit 1; }
else
# Generate the splice transform
echo "Using splice +/- $splice , step $splice_step"
feature_transform=$dir/tr_splice$splice-$splice_step.nnet
utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice --splice-step=$splice_step > $feature_transform
fi
# Renormalize the MLP input to zero mean and unit variance
feature_transform_old=$feature_transform
......@@ -201,7 +210,7 @@ for depth in $(seq 1 $nn_depth); do
#initialize
echo "Initializing '$RBM.init'"
echo "<NnetProto>
<Rbm> <InputDim> $num_fea <OutputDim> $num_hid <VisibleType> gauss <HiddenType> bern
<Rbm> <InputDim> $num_fea <OutputDim> $num_hid <VisibleType> gauss <HiddenType> bern <ParamStddev> $param_stddev_first
</NnetProto>
" > $RBM.proto
nnet-initialize $RBM.proto $RBM.init 2>$dir/log/nnet-initialize.$depth.log || exit 1
......@@ -229,7 +238,7 @@ for depth in $(seq 1 $nn_depth); do
#initialize
echo "Initializing '$RBM.init'"
echo "<NnetProto>
<Rbm> <InputDim> $num_hid <OutputDim> $num_hid <VisibleType> bern <HiddenType> bern <VisibleBiasCmvnFilename> $dir/$depth.cmvn
<Rbm> <InputDim> $num_hid <OutputDim> $num_hid <VisibleType> bern <HiddenType> bern <ParamStddev> $param_stddev <VisibleBiasCmvnFilename> $dir/$depth.cmvn
</NnetProto>
" > $RBM.proto
nnet-initialize $RBM.proto $RBM.init 2>$dir/log/nnet-initialize.$depth.log || exit 1
......
......@@ -144,6 +144,15 @@ head -n 10000 $dir/train.scp > $dir/train.scp.10k
feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |"
feats_cv="ark:copy-feats scp:$dir/cv.scp ark:- |"
# CMVN:
# optionally import CMVN config from pre-training
if [ ! -z $feature_transform ]; then
norm_vars_file=$(dirname $feature_transform)/norm_vars
if [ -e $norm_vars_file ]; then
apply_cmvn=true; norm_vars=$(cat $norm_vars_file);
fi
echo "Imported CMVN config from pre-training: apply_cmvn=$apply_cmvn; norm_vars=$norm_vars"
fi
# optionally add per-speaker CMVN
if [ $apply_cmvn == "true" ]; then
echo "Will use CMVN statistics : $data/cmvn.scp, $data_cv/cmvn.scp"
......
......@@ -86,7 +86,7 @@ int main(int argc, char *argv[]) {
int32 length_tolerance = 0;
bool binary = true;
po.Register("length-tolerance", &length_tolerance,
"Tolerate small length differences of feats (warn and trim at end)");
"If length is different, trim as shortest up to a frame difference of length-tolerance, otherwise exclude segment.");
po.Register("binary", &binary, "If true, output files in binary "
"(only relevant for single-file operation, i.e. no tables)");
......
......@@ -37,10 +37,12 @@ int main(int argc, char *argv[]) {
bool binary_write = false;
bool tied_normalzation = false;
float var_floor = 1e-10;
ParseOptions po(usage);
po.Register("binary", &binary_write, "Write output in binary mode");
po.Register("tied-normalization", &tied_normalzation, "The normalization is tied accross all the input dimensions");
po.Register("var-floor", &var_floor, "Floor the variance, so the factors in <Rescale> are bounded.");
po.Read(argc, argv);
......@@ -73,6 +75,10 @@ int main(int argc, char *argv[]) {
for(int32 d=0; d<cmvn_stats.NumCols()-1; d++) {
BaseFloat mean = cmvn_stats(0,d)/count;
BaseFloat var = cmvn_stats(1,d)/count - mean*mean;
if (var <= var_floor) {
KALDI_WARN << "Very small variance " << var << " flooring to " << var_floor;
var = var_floor;
}
shift(d) = -mean;
scale(d) = 1.0 / sqrt(var);
}
......
......@@ -72,6 +72,7 @@ int main(int argc, char *argv[]) {
//Select the GPU
#if HAVE_CUDA==1
CuDevice::Instantiate().SelectGpuId(use_gpu);
CuDevice::Instantiate().DisableCaching();
#endif
Nnet nnet_transf;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment