Skip to content
Snippets Groups Projects
Commit 5abfb0ed authored by Dan Povey's avatar Dan Povey
Browse files

Changed the neural net combination code, adding option to select starting point.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan@1713 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 3016313c
Branches
No related tags found
No related merge requests found
...@@ -40,20 +40,20 @@ static void CombineNnets(const Vector<BaseFloat> &scale_params, ...@@ -40,20 +40,20 @@ static void CombineNnets(const Vector<BaseFloat> &scale_params,
} }
} }
/// Returns an integer saying which model to use:
// This function chooses from among the neural nets, the one /// either 0 ... num-models - 1 for the best individual model,
// which has the best validation set objective function. /// or (#models) for the average of all of them.
static void GetInitialScaleParams( static int32 GetInitialModel(
const std::vector<NnetTrainingExample> &validation_set, const std::vector<NnetTrainingExample> &validation_set,
const std::vector<Nnet> &nnets, const std::vector<Nnet> &nnets) {
Vector<double> *scale_params) {
int32 minibatch_size = 1024; int32 minibatch_size = 1024;
int32 num_nnets = static_cast<int32>(nnets.size());
KALDI_ASSERT(!nnets.empty()); KALDI_ASSERT(!nnets.empty());
BaseFloat tot_frames = validation_set.size(); BaseFloat tot_frames = validation_set.size();
int32 best_n = -1; int32 best_n = -1;
BaseFloat best_objf; BaseFloat best_objf;
Vector<BaseFloat> objfs(nnets.size()); Vector<BaseFloat> objfs(nnets.size());
for (int32 n = 0; n < static_cast<int32>(nnets.size()); n++) { for (int32 n = 0; n < num_nnets; n++) {
BaseFloat objf = ComputeNnetObjf(nnets[n], validation_set, BaseFloat objf = ComputeNnetObjf(nnets[n], validation_set,
minibatch_size) / tot_frames; minibatch_size) / tot_frames;
...@@ -64,34 +64,55 @@ static void GetInitialScaleParams( ...@@ -64,34 +64,55 @@ static void GetInitialScaleParams(
objfs(n) = objf; objfs(n) = objf;
} }
KALDI_LOG << "Objective functions for the source neural nets are " << objfs; KALDI_LOG << "Objective functions for the source neural nets are " << objfs;
int32 num_uc = nnets[0].NumUpdatableComponents(); int32 num_uc = nnets[0].NumUpdatableComponents();
{ // Now try a version where all the neural nets have the same weight. { // Now try a version where all the neural nets have the same weight.
scale_params->Resize(num_uc * nnets.size()); Vector<BaseFloat> scale_params(num_uc * num_nnets);
scale_params->Set(1.0 / nnets.size()); scale_params.Set(1.0 / num_nnets);
Nnet average_nnet; Nnet average_nnet;
Vector<BaseFloat> scale_params_float(*scale_params); CombineNnets(scale_params, nnets, &average_nnet);
CombineNnets(scale_params_float, nnets, &average_nnet);
BaseFloat objf = ComputeNnetObjf(average_nnet, validation_set, BaseFloat objf = ComputeNnetObjf(average_nnet, validation_set,
minibatch_size) / tot_frames; minibatch_size) / tot_frames;
KALDI_LOG << "Objf with all neural nets averaged is " KALDI_LOG << "Objf with all neural nets averaged is " << objf;
<< objf;
if (objf > best_objf) { if (objf > best_objf) {
KALDI_LOG << "Initializing with all neural nets averaged."; return num_nnets;
return; } else {
return best_n;
} }
} }
}
// This function chooses from among the neural nets, the one
// which has the best validation set objective function.
static void GetInitialScaleParams(
const NnetCombineConfig &combine_config,
const std::vector<NnetTrainingExample> &validation_set,
const std::vector<Nnet> &nnets,
Vector<double> *scale_params) {
KALDI_LOG << "Using neural net with index " << best_n int32 initial_model = combine_config.initial_model,
<< ", objective function was " << best_objf; num_nnets = static_cast<int32>(nnets.size());
if (initial_model < 0 || initial_model > num_nnets)
initial_model = GetInitialModel(validation_set, nnets);
KALDI_ASSERT(initial_model >= 0 && initial_model <= num_nnets);
int32 num_uc = nnets[0].NumUpdatableComponents();
if (initial_model < num_nnets) {
KALDI_LOG << "Initializing with neural net with index " << initial_model;
// At this point we're using the best of the individual neural nets. // At this point we're using the best of the individual neural nets.
scale_params->Set(0.0); scale_params->Set(0.0);
// Set the block of parameters corresponding to the "best" of the // Set the block of parameters corresponding to the "best" of the
// source neural nets to // source neural nets to
SubVector<double> best_block(*scale_params, num_uc * best_n, num_uc); SubVector<double> best_block(*scale_params, num_uc * initial_model, num_uc);
best_block.Set(1.0); best_block.Set(1.0);
} else { // initial_model == num_nnets
KALDI_LOG << "Initializing with all neural nets averaged.";
scale_params->Resize(num_uc * num_nnets);
scale_params->Set(1.0 / num_nnets);
}
} }
static BaseFloat ComputeObjfAndGradient( static BaseFloat ComputeObjfAndGradient(
...@@ -143,7 +164,8 @@ void CombineNnets(const NnetCombineConfig &combine_config, ...@@ -143,7 +164,8 @@ void CombineNnets(const NnetCombineConfig &combine_config,
Vector<double> scale_params; Vector<double> scale_params;
GetInitialScaleParams(validation_set, GetInitialScaleParams(combine_config,
validation_set,
nnets, nnets,
&scale_params); &scale_params);
......
...@@ -29,6 +29,8 @@ namespace kaldi { ...@@ -29,6 +29,8 @@ namespace kaldi {
combination of the different neural-net parameters. combination of the different neural-net parameters.
*/ */
struct NnetCombineConfig { struct NnetCombineConfig {
int32 initial_model; // If provided, the index of the initial model to start
// the optimization from.
int32 num_bfgs_iters; // The dimension is small (e.g. 3 to 5 times the int32 num_bfgs_iters; // The dimension is small (e.g. 3 to 5 times the
// number of neural nets we were given, e.g. 10) so we do // number of neural nets we were given, e.g. 10) so we do
// BFGS. We actually implement this as L-BFGS but setting the number of // BFGS. We actually implement this as L-BFGS but setting the number of
...@@ -38,10 +40,14 @@ struct NnetCombineConfig { ...@@ -38,10 +40,14 @@ struct NnetCombineConfig {
BaseFloat initial_step; BaseFloat initial_step;
BaseFloat min_objf_change; BaseFloat min_objf_change;
NnetCombineConfig(): num_bfgs_iters(30), initial_step(0.1), NnetCombineConfig(): initial_model(-1), num_bfgs_iters(30),
min_objf_change(1.0e-05) { } initial_step(0.1), min_objf_change(1.0e-05) { }
void Register(ParseOptions *po) { void Register(ParseOptions *po) {
po->Register("initial-model", &initial_model, "Specifies where to start the "
"optimization from. If 0 ... #models-1, then specifies the model; "
"if #models, then the average of all inputs; otherwise, chosen "
"automatically from the previous options.");
po->Register("num-bfgs-iters", &num_bfgs_iters, "Maximum number of function " po->Register("num-bfgs-iters", &num_bfgs_iters, "Maximum number of function "
"evaluations for BFGS to use when optimizing combination weights"); "evaluations for BFGS to use when optimizing combination weights");
po->Register("initial-step", &initial_step, "Parameter in the optimization, " po->Register("initial-step", &initial_step, "Parameter in the optimization, "
......
...@@ -97,7 +97,6 @@ int main(int argc, char *argv[]) { ...@@ -97,7 +97,6 @@ int main(int argc, char *argv[]) {
KALDI_ASSERT(validation_set.size() > 0); KALDI_ASSERT(validation_set.size() > 0);
} }
CombineNnets(combine_config, CombineNnets(combine_config,
validation_set, validation_set,
nnets, nnets,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment