Minor fixes to MMI code [+add mincount] and UBM clustering code [fix test...

Minor fixes to MMI code [+add mincount] and UBM clustering code [fix test failure]; add program to get A matrix of exponential transform. git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@656 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8

Minor fixes to MMI code [+add mincount] and UBM clustering code [fix test...
8859328d · Dan Povey · 80462b98 · 8859328d · 8859328d · 8859328d
Commit 8859328d authored 13 years ago by Dan Povey
--- a/src/TODO
+++ b/src/TODO
@@ -12,6 +12,7 @@ major:


 minor:
+ Refactor UBM-building code "ClusterGaussiansToUbm"
 add combination scripts for wsj/s3
 look into why results not reproducible?   check if reproducible?
 change UBM-building in WSJ to match RM, and test the effect of this.

--- a/src/doc/install.dox
+++ b/src/doc/install.dox
@@ -37,7 +37,7 @@
    svn update
  \endverbatim
 If "svn update" prints out scary looking messages about conflicts (caused by
- you changing parts of files that were later modified centerally),
+ you changing parts of files that were later modified centrally),
 you may have to resolve the conflicts; for that, we recommend that you
 read about how svn works.


--- a/src/gmm/am-diag-gmm-test.cc
+++ b/src/gmm/am-diag-gmm-test.cc
@@ -70,7 +70,7 @@ void TestSplitStates(const AmDiagGmm &am_gmm) {
  int32 target_comp = 2 * am_gmm.NumGauss();
  kaldi::Vector<BaseFloat> occs(am_gmm.NumPdfs());
  for (int32 i = 0; i < occs.Dim(); ++i)
-    occs(i) = std::fabs(kaldi::RandGauss()) * (kaldi::RandUniform()+1);
+    occs(i) = std::fabs(kaldi::RandGauss()) * (kaldi::RandUniform()+1) * 4;
  AmDiagGmm *am_gmm1 = new AmDiagGmm();
  am_gmm1->CopyFromAmDiagGmm(am_gmm);
  am_gmm1->SplitByCount(occs, target_comp, 0.01, 0.2, 0.0);
@@ -92,9 +92,9 @@ void TestClustering(const AmDiagGmm &am_gmm) {
      interm_comp = am_gmm.NumGauss() / 2;
  kaldi::Vector<BaseFloat> occs(am_gmm.NumPdfs());
  for (int32 i = 0; i < occs.Dim(); ++i)
-    occs(i) = std::fabs(kaldi::RandGauss()) * (kaldi::RandUniform()+1);
+    occs(i) = std::fabs(kaldi::RandGauss()) * (kaldi::RandUniform()+1) * 4;

-  kaldi::UbmClusteringOptions ubm_opts(target_comp, 0.2, interm_comp, 0.01);
+  kaldi::UbmClusteringOptions ubm_opts(target_comp, 0.2, interm_comp, 0.01, 30);
  kaldi::DiagGmm ubm;
  ClusterGaussiansToUbm(am_gmm, occs, ubm_opts, &ubm);
 }

--- a/src/gmm/am-diag-gmm.cc
+++ b/src/gmm/am-diag-gmm.cc
@@ -254,7 +254,7 @@ void UbmClusteringOptions::Check() {

 void ClusterGaussiansToUbm(const AmDiagGmm& am,
                           const Vector<BaseFloat> &state_occs,
-                           const UbmClusteringOptions &opts,
+                           UbmClusteringOptions opts,
                           DiagGmm *ubm_out) {
  if (am.NumGauss() > opts.max_am_gauss) {
    KALDI_LOG << "ClusterGaussiansToUbm: first reducing num-gauss from " << am.NumGauss()
@@ -264,13 +264,13 @@ void ClusterGaussiansToUbm(const AmDiagGmm& am,
    BaseFloat power = 1.0, min_count = 1.0; // Make the power 1, which I feel
    // is appropriate to the way we're doing the overall clustering procedure.
    tmp_am.MergeByCount(state_occs, opts.max_am_gauss, power, min_count);
-    UbmClusteringOptions opts_tmp(opts);
+
    if (tmp_am.NumGauss() > opts.max_am_gauss) {
      KALDI_LOG << "Clustered down to " << tmp_am.NumGauss()
                << "; will not cluster further";
-      opts_tmp.max_am_gauss = tmp_am.NumGauss();
+      opts.max_am_gauss = tmp_am.NumGauss();
    }
-    ClusterGaussiansToUbm(tmp_am, state_occs, opts_tmp, ubm_out);
+    ClusterGaussiansToUbm(tmp_am, state_occs, opts, ubm_out);
    return;
  }
  
@@ -330,6 +330,13 @@ void ClusterGaussiansToUbm(const AmDiagGmm& am,
    }
  }

+  if (opts.intermediate_numcomps > am.NumGauss()) {
+    KALDI_WARN << "Intermediate numcomps " << opts.intermediate_numcomps
+               << " is more than num-gauss " << am.NumGauss()
+               << ", reducing it to " << am.NumGauss();
+    opts.intermediate_numcomps = am.NumGauss();
+  }
+    
  KALDI_VLOG(1) << "Merging from " << am.NumGauss() << " Gaussians in the "
                << "acoustic model, down to " << opts.intermediate_numcomps
                << " Gaussians.";
@@ -353,6 +360,7 @@ void ClusterGaussiansToUbm(const AmDiagGmm& am,
      GaussClusterable *this_cluster = static_cast<GaussClusterable*>(
          gauss_clusters_out[clust_index][i]);
      BaseFloat weight = this_cluster->count();
+      KALDI_ASSERT(weight > 0);
      tmp_weights(gauss_index) = weight;
      tmp_vec.CopyFromVec(this_cluster->x_stats());
      tmp_vec.Scale(1/weight);
@@ -372,8 +380,13 @@ void ClusterGaussiansToUbm(const AmDiagGmm& am,
  tmp_gmm.SetInvVarsAndMeans(tmp_vars, tmp_means);

  // Finally, cluster to the desired number of Gaussians in the UBM.
-  tmp_gmm.Merge(opts.ubm_numcomps);
-  KALDI_VLOG(1) << "Merged down to " << tmp_gmm.NumGauss() << " Gaussians.";
+  if (opts.ubm_numcomps < tmp_gmm.NumGauss()) {
+    tmp_gmm.Merge(opts.ubm_numcomps);
+    KALDI_VLOG(1) << "Merged down to " << tmp_gmm.NumGauss() << " Gaussians.";
+  } else {
+    KALDI_WARN << "Not merging Gaussians since " << opts.ubm_numcomps
+               << " < " << tmp_gmm.NumGauss();
+  }
  ubm_out->CopyFromDiagGmm(tmp_gmm);
 }


--- a/src/gmm/am-diag-gmm.h
+++ b/src/gmm/am-diag-gmm.h
@@ -173,9 +173,10 @@ struct UbmClusteringOptions {
        intermediate_numcomps(4000), cluster_varfloor(0.01),
        max_am_gauss(20000) {}
  UbmClusteringOptions(int32 ncomp, BaseFloat red, int32 interm_comps,
-                       BaseFloat vfloor)
+                       BaseFloat vfloor, int32 max_am_gauss)
        : ubm_numcomps(ncomp), reduce_state_factor(red),
-          intermediate_numcomps(interm_comps), cluster_varfloor(vfloor) {}
+          intermediate_numcomps(interm_comps), cluster_varfloor(vfloor),
+          max_am_gauss(max_am_gauss) {}
  void Register(ParseOptions *po) {
    std::string module = "UbmClusteringOptions: ";
    po->Register("max-am-gauss", &max_am_gauss, module+
@@ -206,7 +207,7 @@ struct UbmClusteringOptions {
 */
 void ClusterGaussiansToUbm(const AmDiagGmm& am,
                           const Vector<BaseFloat> &state_occs,
-                           const UbmClusteringOptions &opts,
+                           UbmClusteringOptions opts,
                           DiagGmm *ubm_out);

 }  // namespace kaldi

--- a/src/gmm/diag-gmm.cc
+++ b/src/gmm/diag-gmm.cc
@@ -175,6 +175,7 @@ void DiagGmm::Split(int32 target_components, float perturb_factor, std::vector<i
 }

 void DiagGmm::Merge(int32 target_components, std::vector<int32> *history) {
+  
  if (target_components <= 0 || NumGauss() < target_components) {
    KALDI_ERR << "Invalid argument for target number of Gaussians (="
              << target_components << "), #Gauss = " << NumGauss();
@@ -265,7 +266,7 @@ void DiagGmm::Merge(int32 target_components, std::vector<int32> *history) {
    // Search for the least significant change in likelihood
    // (maximum of negative delta_likes)
    BaseFloat max_delta_like = -std::numeric_limits<BaseFloat>::max();
-    int32 max_i = 0, max_j = 0;
+    int32 max_i = -1, max_j = -1;
    for (int32 i = 0; i < NumGauss(); ++i) {
      if (discarded_component[i]) continue;
      for (int32 j = 0; j < i; ++j) {
@@ -277,9 +278,9 @@ void DiagGmm::Merge(int32 target_components, std::vector<int32> *history) {
        }
      }
    }
-
+    
    // make sure that different components will be merged
-    assert(max_i != max_j);
+    KALDI_ASSERT(max_i != max_j && max_i != -1 && max_j != -1);

    // remember the merge candidates
    if (history != NULL) {
@@ -383,7 +384,7 @@ BaseFloat DiagGmm::ComponentLogLikelihood(const VectorBase<BaseFloat> &data,
    KALDI_ERR << "Must call ComputeGconsts() before computing likelihood";
  if (static_cast<int32>(data.Dim()) != Dim()) {
    KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension "
-        << "mismatch" << (data.Dim()) << "vs. "<< (Dim());
+        << "mismatch" << (data.Dim()) << " vs. "<< (Dim());
  }
  BaseFloat loglike;
  Vector<BaseFloat> data_sq(data);
@@ -414,7 +415,7 @@ void DiagGmm::LogLikelihoods(const VectorBase<BaseFloat> &data,
  loglikes->CopyFromVec(gconsts_);
  if (static_cast<int32>(data.Dim()) != Dim()) {
    KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension "
-        << "mismatch" << (data.Dim()) << "vs. "<< (Dim());
+        << "mismatch" << (data.Dim()) << " vs. "<< (Dim());
  }
  Vector<BaseFloat> data_sq(data);
  data_sq.ApplyPow(2.0);

--- a/src/gmm/mmie-diag-gmm-test.cc
+++ b/src/gmm/mmie-diag-gmm-test.cc
@@ -132,12 +132,7 @@ void UnitTestEstimateMmieDiagGmm() {
    
  mmie_gmm.Resize(gmm->NumGauss(), gmm->Dim(), flags);

-
-// iterate
  size_t iteration = 0;
-  float lastloglike = 0.0;
-  int32 lastloglike_nM = 0;
-
  while (iteration < maxiterations) {
    Vector<BaseFloat> featvec_num(dim);
    Vector<BaseFloat> featvec_den(dim);

--- a/src/gmm/mmie-diag-gmm.cc
+++ b/src/gmm/mmie-diag-gmm.cc
@@ -329,7 +329,8 @@ void MmieAccumDiagGmm::Update(const MmieDiagGmmOptions &config,
  }

  // Now update weights...
-  if (flags & kGmmWeights) {
+  if (flags & kGmmWeights && num_comp > 1 &&
+      num_occupancy_.Sum() > config.min_count_weight_update) {
    double weight_auxf_at_start = 0.0, weight_auxf_at_end = 0.0;
    Vector<double> weights(diaggmmnormal.weights_);
    for (int32 g = 0; g < num_comp; g++) {   // c.f. eq. 4.32 in Dan Povey's thesis.

--- a/src/gmm/mmie-diag-gmm.h
+++ b/src/gmm/mmie-diag-gmm.h
@@ -33,16 +33,20 @@ namespace kaldi {
 *  needed in the estimation process.
 */
 struct MmieDiagGmmOptions : public MleDiagGmmOptions {
-  BaseFloat i_smooth_tau;
+  BaseFloat i_smooth_tau;  
  BaseFloat ebw_e;
+  BaseFloat min_count_weight_update;
  MmieDiagGmmOptions() : MleDiagGmmOptions() {
    i_smooth_tau = 100.0;
    ebw_e = 2.0;
+    min_count_weight_update = 10.0;
  }
  void Register(ParseOptions *po) {
    std::string module = "MmieDiagGmmOptions: ";
    po->Register("min-gaussian-weight", &min_gaussian_weight,
                 module+"Min Gaussian weight before we remove it.");
+    po->Register("min-count-weight-update", &min_count_weight_update,
+                 module+"Minimum state-level numerator count required to do the weight update");
    po->Register("min-variance", &min_variance,
                 module+"Variance floor (absolute variance).");
    po->Register("remove-low-count-gaussians", &remove_low_count_gaussians,

--- a/src/gmmbin/Makefile
+++ b/src/gmmbin/Makefile
@@ -10,7 +10,7 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
           gmm-acc-stats gmm-init-lvtln gmm-est-lvtln-trans gmm-train-lvtln-special \
           gmm-acc-mllt gmm-mixup gmm-init-model \
           gmm-acc-hlda gmm-est-hlda gmm-transform-means gmm-init-et gmm-est-et \
-           gmm-et-acc-a gmm-et-est-a gmm-copy-et gmm-et-get-b \
+           gmm-et-acc-a gmm-et-est-a gmm-copy-et gmm-et-get-b gmm-et-get-a \
           gmm-make-regtree gmm-decode-faster-regtree-fmllr gmm-post-to-gpost \
           gmm-est-fmllr-gpost gmm-est-fmllr gmm-est-regtree-fmllr-ali \
           gmm-est-regtree-mllr gmm-decode-kaldi gmm-compute-likes \

--- a/src/gmmbin/gmm-et-get-a.cc
+++ b/src/gmmbin/gmm-et-get-a.cc
+// gmmbin/gmm-et-get-a.cc
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "transform/exponential-transform.h"
+
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using kaldi::int32;
+
+    const char *usage =
+        "Write out the A matrix from the exponential transform\n"
+        " to a single file\n"
+        "Usage:  gmm-et-get-a [options] <et-object-in> <matrix-out>\n"
+        "e.g.: \n"
+        " gmm-et-get-a --binary=false 12.et A.mat\n";
+
+    bool binary = true;
+
+    ParseOptions po(usage);
+    po.Register("binary", &binary, "Write output in binary mode");
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string et_rxfilename = po.GetArg(1),
+        a_wxfilename = po.GetArg(2);
+    
+    ExponentialTransform et;
+    {
+      bool binary_in;
+      Input ki(et_rxfilename, &binary_in);
+      et.Read(ki.Stream(), binary_in);
+    }
+    Matrix<BaseFloat> A;
+    et.GetDefaultTransform(&A);
+    Output ko(a_wxfilename, binary);
+    A.Write(ko.Stream(), binary);
+    KALDI_LOG << "Wrote A  to " << a_wxfilename;
+    return 0;
+  } catch(const std::exception& e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
+
--- a/src/transform/exponential-transform.h
+++ b/src/transform/exponential-transform.h
@@ -95,6 +95,10 @@ class ExponentialTransform {
  /// that we have.
  void GetDefaultTransform(Matrix<BaseFloat> *transform) const;

+  void GetATransform(Matrix<BaseFloat> *transform) const {
+    transform->CopyFromMat(A_);
+  }
+
  /// Make B unit; this can be useful for combining the B part of the
  /// transform with MLLT.
  void MakeBUnit() { B_.SetUnit(); }