Skip to content
Snippets Groups Projects
Commit 59790029 authored by Dan Povey's avatar Dan Povey
Browse files

Change how model initialization works-- accept empty leaves.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@876 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent f1891a84
No related branches found
No related tags found
No related merge requests found
......@@ -31,40 +31,39 @@ namespace kaldi {
/// InitAmGmm initializes the GMM with one Gaussian per state.
void InitAmGmm(const BuildTreeStatsType &stats,
const EventMap &to_pdf_map,
AmDiagGmm *am_gmm,
bool allow_empty_leaves) {
AmDiagGmm *am_gmm) {
// Get stats split by tree-leaf ( == pdf):
std::vector<BuildTreeStatsType> split_stats;
SplitStatsByMap(stats, to_pdf_map, &split_stats);
split_stats.resize(to_pdf_map.MaxResult() + 1); // ensure that
// if the last leaf had no stats, this vector still has the right size.
// Make sure each leaf has stats.
for (size_t i = 0; i < split_stats.size(); i++) {
if (split_stats[i].empty()) {
std::ostringstream errmsg;
errmsg << "Tree has pdf-id " << i << "with no stats. "
"This probably means you have phones that were unseen in training "
"and were not shared with other phones in the roots file. "
"You should modify your roots file as necessary to fix this."
"(i.e. share that phone with a similar but seen phone on one line "
"of the roots file). Be sure to regenerate roots.int from roots.txt, "
"if using s5 scripts. To work out the phone, search for "
"pdf-id " << i << " in the output of show-transitions (for this model). ";
if (allow_empty_leaves) KALDI_WARN << errmsg.str();
else KALDI_ERR << errmsg.str()
<< "Use --allow-empty-leaves to make this a warning.";
KALDI_WARN << "Tree has pdf-id " << i << "with no stats. ";
/*
This probably means you have phones that were unseen in training
and were not shared with other phones in the roots file.
You should modify your roots file as necessary to fix this.
(i.e. share that phone with a similar but seen phone on one line
of the roots file). Be sure to regenerate roots.int from roots.txt,
if using s5 scripts. To work out the phone, search for
pdf-id i in the output of show-transitions (for this model). */
}
}
std::vector<Clusterable*> summed_stats;
SumStatsVec(split_stats, &summed_stats);
Clusterable *avg_stats = SumClusterable(summed_stats);
KALDI_ASSERT(avg_stats != NULL && "No stats available in gmm-init-model.");
for (size_t i = 0; i < summed_stats.size(); i++) {
assert(summed_stats[i] != NULL);
GaussClusterable *c =
static_cast<GaussClusterable*>(summed_stats[i] != NULL ? summed_stats[i] : avg_stats);
DiagGmm gmm;
Vector<BaseFloat> x (static_cast<GaussClusterable*>(summed_stats[i])->x_stats());
Vector<BaseFloat> x2 (static_cast<GaussClusterable*>(summed_stats[i])->x2_stats());
BaseFloat count = static_cast<GaussClusterable*>(summed_stats[i])->count();
Vector<BaseFloat> x (c->x_stats());
Vector<BaseFloat> x2 (c->x2_stats());
BaseFloat count = c->count();
gmm.Resize(1, x.Dim());
if (count < 100) {
KALDI_WARN << "Very small count for state "<< i << ": " << count;
......@@ -88,6 +87,7 @@ void InitAmGmm(const BuildTreeStatsType &stats,
am_gmm->AddPdf(gmm);
}
DeletePointers(&summed_stats);
delete avg_stats;
}
/// Get state occupation counts.
......@@ -219,13 +219,10 @@ int main(int argc, char *argv[]) {
" gmm-init-model tree treeacc topo 1.mdl prev/tree prev/30.mdl\n";
bool binary = true;
bool allow_empty_leaves = false;
std::string occs_out_filename;
ParseOptions po(usage);
po.Register("binary", &binary, "Write output in binary mode");
po.Register("allow-empty-leaves", &allow_empty_leaves, "Do not die when there are "
"leaves in the tree with no stats.");
po.Register("write-occs", &occs_out_filename, "File to write state "
"occupancies to.");
......@@ -272,7 +269,7 @@ int main(int argc, char *argv[]) {
// Now, the summed_stats will be used to initialize the GMM.
AmDiagGmm am_gmm;
if (old_tree_filename.empty())
InitAmGmm(stats, to_pdf, &am_gmm, allow_empty_leaves); // Normal case: initialize 1 Gauss/model from tree stats.
InitAmGmm(stats, to_pdf, &am_gmm); // Normal case: initialize 1 Gauss/model from tree stats.
else {
InitAmGmmFromOld(stats, to_pdf,
ctx_dep.ContextWidth(),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment