Changed the neural net combination code, adding option to select starting point.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan@1713 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8

Changed the neural net combination code, adding option to select starting point.
5abfb0ed · Dan Povey · 3016313c · 5abfb0ed · 5abfb0ed · 5abfb0ed
Commit 5abfb0ed authored Dec 27, 2012 by Dan Povey
--- a/src/nnet-cpu/combine-nnet.cc
+++ b/src/nnet-cpu/combine-nnet.cc
@@ -40,20 +40,20 @@ static void CombineNnets(const Vector<BaseFloat> &scale_params,
  }
 }
+/// Returns an integer saying which model to use:
-// This function chooses from among the neural nets, the one
+/// either 0 ... num-models - 1 for the best individual model,
-// which has the best validation set objective function.
+/// or (#models) for the average of all of them.
-static void GetInitialScaleParams(
+static int32 GetInitialModel(
    const std::vector<NnetTrainingExample> &validation_set,
-    const std::vector<Nnet> &nnets,
+    const std::vector<Nnet> &nnets) {
-    Vector<double> *scale_params) {
  int32 minibatch_size = 1024;
+  int32 num_nnets = static_cast<int32>(nnets.size());
  KALDI_ASSERT(!nnets.empty());
  BaseFloat tot_frames = validation_set.size();
  int32 best_n = -1;
  BaseFloat best_objf;
  Vector<BaseFloat> objfs(nnets.size());
-  for (int32 n = 0; n < static_cast<int32>(nnets.size()); n++) {
+  for (int32 n = 0; n < num_nnets; n++) {
    BaseFloat objf = ComputeNnetObjf(nnets[n], validation_set,
                                     minibatch_size) / tot_frames;
@@ -64,34 +64,55 @@ static void GetInitialScaleParams(
    objfs(n) = objf;
  }
  KALDI_LOG << "Objective functions for the source neural nets are " << objfs;
  int32 num_uc = nnets[0].NumUpdatableComponents();
  { // Now try a version where all the neural nets have the same weight.
-    scale_params->Resize(num_uc * nnets.size());
+    Vector<BaseFloat> scale_params(num_uc * num_nnets);
-    scale_params->Set(1.0 / nnets.size());
+    scale_params.Set(1.0 / num_nnets);
    Nnet average_nnet;
-    Vector<BaseFloat> scale_params_float(*scale_params);
+    CombineNnets(scale_params, nnets, &average_nnet);
-    CombineNnets(scale_params_float, nnets, &average_nnet);
    BaseFloat objf = ComputeNnetObjf(average_nnet, validation_set,
                                     minibatch_size) / tot_frames;
-    KALDI_LOG << "Objf with all neural nets averaged is "
+    KALDI_LOG << "Objf with all neural nets averaged is " << objf;
-              << objf;
    if (objf > best_objf) {
-      KALDI_LOG << "Initializing with all neural nets averaged.";
+      return num_nnets;
-      return;
+    } else {
+      return best_n;
    }
  }
+}
+// This function chooses from among the neural nets, the one
+// which has the best validation set objective function.
+static void GetInitialScaleParams(
+    const NnetCombineConfig &combine_config,
+    const std::vector<NnetTrainingExample> &validation_set,
+    const std::vector<Nnet> &nnets,
+    Vector<double> *scale_params) {
-  KALDI_LOG << "Using neural net with index " << best_n
+  int32 initial_model = combine_config.initial_model,
-            << ", objective function was " << best_objf;
+      num_nnets = static_cast<int32>(nnets.size());
+  if (initial_model < 0 || initial_model > num_nnets)
+    initial_model = GetInitialModel(validation_set, nnets);
+  KALDI_ASSERT(initial_model >= 0 && initial_model <= num_nnets);
+  int32 num_uc = nnets[0].NumUpdatableComponents();
+  if (initial_model < num_nnets) {
+    KALDI_LOG << "Initializing with neural net with index " << initial_model;
    // At this point we're using the best of the individual neural nets.
    scale_params->Set(0.0);
    // Set the block of parameters corresponding to the "best" of the
    // source neural nets to
-  SubVector<double> best_block(*scale_params, num_uc * best_n, num_uc);
+    SubVector<double> best_block(*scale_params, num_uc * initial_model, num_uc);
    best_block.Set(1.0);
+  } else { // initial_model == num_nnets
+    KALDI_LOG << "Initializing with all neural nets averaged.";
+    scale_params->Resize(num_uc * num_nnets);
+    scale_params->Set(1.0 / num_nnets);
+  }
 }
 static BaseFloat ComputeObjfAndGradient(
@@ -143,7 +164,8 @@ void CombineNnets(const NnetCombineConfig &combine_config,
  Vector<double> scale_params;
-  GetInitialScaleParams(validation_set,
+  GetInitialScaleParams(combine_config,
+                        validation_set,
                        nnets,
                        &scale_params);

--- a/src/nnet-cpu/combine-nnet.h
+++ b/src/nnet-cpu/combine-nnet.h
@@ -29,6 +29,8 @@ namespace kaldi {
    combination of the different neural-net parameters.
 */
 struct NnetCombineConfig {
+  int32 initial_model; // If provided, the index of the initial model to start
+  // the optimization from.
  int32 num_bfgs_iters; // The dimension is small (e.g. 3 to 5 times the
  // number of neural nets we were given, e.g. 10) so we do
  // BFGS.  We actually implement this as L-BFGS but setting the number of
@@ -38,10 +40,14 @@ struct NnetCombineConfig {
  BaseFloat initial_step;
  BaseFloat min_objf_change;
-  NnetCombineConfig(): num_bfgs_iters(30), initial_step(0.1),
+  NnetCombineConfig(): initial_model(-1), num_bfgs_iters(30),
-                       min_objf_change(1.0e-05) { }
+                       initial_step(0.1), min_objf_change(1.0e-05) { }
  void Register(ParseOptions *po) {
+    po->Register("initial-model", &initial_model, "Specifies where to start the "
+                 "optimization from.  If 0 ... #models-1, then specifies the model; "
+                 "if #models, then the average of all inputs; otherwise, chosen "
+                 "automatically from the previous options.");
    po->Register("num-bfgs-iters", &num_bfgs_iters, "Maximum number of function "
                 "evaluations for BFGS to use when optimizing combination weights");
    po->Register("initial-step", &initial_step, "Parameter in the optimization, "

--- a/src/nnet-cpubin/nnet-combine.cc
+++ b/src/nnet-cpubin/nnet-combine.cc
@@ -97,7 +97,6 @@ int main(int argc, char *argv[]) {
      KALDI_ASSERT(validation_set.size() > 0);
    }
    CombineNnets(combine_config,
                 validation_set,
                 nnets,