Skip to content
Snippets Groups Projects
Commit 467b91b1 authored by Daniel Povey's avatar Daniel Povey
Browse files

Some code changes to make reported real-time factors more precise.

parent abcd890c
No related branches found
No related tags found
No related merge requests found
......@@ -3,7 +3,7 @@
// Copyright 2009-2012 Microsoft Corporation, Karel Vesely
// 2013 Johns Hopkins University (author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
......@@ -48,14 +48,14 @@ int main(int argc, char *argv[]) {
bool allow_partial = false;
BaseFloat acoustic_scale = 0.1;
LatticeFasterDecoderConfig config;
std::string word_syms_filename;
config.Register(&po);
po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods");
po.Register("word-symbol-table", &word_syms_filename, "Symbol table for words [for debug output]");
po.Register("allow-partial", &allow_partial, "If true, produce output even if end state was not reached.");
po.Read(argc, argv);
if (po.NumArgs() < 4 || po.NumArgs() > 6) {
......@@ -69,7 +69,7 @@ int main(int argc, char *argv[]) {
lattice_wspecifier = po.GetArg(4),
words_wspecifier = po.GetOptArg(5),
alignment_wspecifier = po.GetOptArg(6);
TransitionModel trans_model;
ReadKaldiObject(model_in_filename, &trans_model);
......@@ -86,7 +86,7 @@ int main(int argc, char *argv[]) {
Int32VectorWriter alignment_writer(alignment_wspecifier);
fst::SymbolTable *word_syms = NULL;
if (word_syms_filename != "")
if (word_syms_filename != "")
if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
KALDI_ERR << "Could not read symbol table from file "
<< word_syms_filename;
......@@ -99,10 +99,11 @@ int main(int argc, char *argv[]) {
SequentialBaseFloatMatrixReader loglike_reader(feature_rspecifier);
// Input FST is just one FST, not a table of FSTs.
VectorFst<StdArc> *decode_fst = fst::ReadFstKaldi(fst_in_str);
timer.Reset();
{
LatticeFasterDecoder decoder(*decode_fst, config);
for (; !loglike_reader.Done(); loglike_reader.Next()) {
std::string utt = loglike_reader.Key();
Matrix<BaseFloat> loglikes (loglike_reader.Value());
......@@ -112,7 +113,7 @@ int main(int argc, char *argv[]) {
num_fail++;
continue;
}
DecodableMatrixScaledMapped decodable(trans_model, loglikes, acoustic_scale);
double like;
......@@ -130,7 +131,7 @@ int main(int argc, char *argv[]) {
delete decode_fst; // delete this only after decoder goes out of scope.
} else { // We have different FSTs for different utterances.
SequentialTableReader<fst::VectorFstHolder> fst_reader(fst_in_str);
RandomAccessBaseFloatMatrixReader loglike_reader(feature_rspecifier);
RandomAccessBaseFloatMatrixReader loglike_reader(feature_rspecifier);
for (; !fst_reader.Done(); fst_reader.Next()) {
std::string utt = fst_reader.Key();
if (!loglike_reader.HasKey(utt)) {
......@@ -158,7 +159,7 @@ int main(int argc, char *argv[]) {
} else num_fail++;
}
}
double elapsed = timer.Elapsed();
KALDI_LOG << "Time taken "<< elapsed
<< "s: real-time factor assuming 100 frames/sec is "
......
......@@ -53,7 +53,7 @@ int main(int argc, char *argv[]) {
BaseFloat log_sum_exp_prune = 0.0;
LatticeFasterDecoderConfig latgen_config;
TaskSequencerConfig sequencer_config; // has --num-threads option
std::string word_syms_filename;
latgen_config.Register(&po);
sequencer_config.Register(&po);
......@@ -66,7 +66,7 @@ int main(int argc, char *argv[]) {
"Symbol table for words [for debug output]");
po.Register("allow-partial", &allow_partial,
"If true, produce output even if end state was not reached.");
po.Read(argc, argv);
if (po.NumArgs() < 4 || po.NumArgs() > 6) {
......@@ -80,7 +80,7 @@ int main(int argc, char *argv[]) {
lattice_wspecifier = po.GetArg(4),
words_wspecifier = po.GetOptArg(5),
alignment_wspecifier = po.GetOptArg(6);
TransitionModel trans_model;
AmDiagGmm am_gmm;
{
......@@ -103,7 +103,7 @@ int main(int argc, char *argv[]) {
Int32VectorWriter alignment_writer(alignment_wspecifier);
fst::SymbolTable *word_syms = NULL;
if (word_syms_filename != "")
if (word_syms_filename != "")
if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
KALDI_ERR << "Could not read symbol table from file "
<< word_syms_filename;
......@@ -113,16 +113,17 @@ int main(int argc, char *argv[]) {
int num_done = 0, num_err = 0;
VectorFst<StdArc> *decode_fst = NULL; // only used if there is a single
// decoding graph.
TaskSequencer<DecodeUtteranceLatticeFasterClass> sequencer(sequencer_config);
if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) {
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
// Input FST is just one FST, not a table of FSTs.
decode_fst = fst::ReadFstKaldi(fst_in_str);
{
timer.Reset();
{
for (; !feature_reader.Done(); feature_reader.Next()) {
std::string utt = feature_reader.Key();
Matrix<BaseFloat> *features =
......@@ -134,12 +135,12 @@ int main(int argc, char *argv[]) {
delete features;
continue;
}
LatticeFasterDecoder *decoder = new LatticeFasterDecoder(*decode_fst,
latgen_config);
// takes ownership of "features"
DecodableAmDiagGmmScaled *gmm_decodable =
new DecodableAmDiagGmmScaled(am_gmm, trans_model,
new DecodableAmDiagGmmScaled(am_gmm, trans_model,
acoustic_scale,
log_sum_exp_prune,
features);
......@@ -151,14 +152,14 @@ int main(int argc, char *argv[]) {
allow_partial, &alignment_writer, &words_writer,
&compact_lattice_writer, &lattice_writer,
&tot_like, &frame_count, &num_done, &num_err, NULL);
sequencer.Run(task); // takes ownership of "task",
// and will delete it when done.
}
}
} else { // We have different FSTs for different utterances.
SequentialTableReader<fst::VectorFstHolder> fst_reader(fst_in_str);
RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
for (; !fst_reader.Done(); fst_reader.Next()) {
std::string utt = fst_reader.Key();
if (!feature_reader.HasKey(utt)) {
......@@ -176,11 +177,11 @@ int main(int argc, char *argv[]) {
continue;
}
// the "decoder" object takes ownership of the new FST object.
// the "decoder" object takes ownership of the new FST object.
LatticeFasterDecoder *decoder = new LatticeFasterDecoder(
latgen_config,
new VectorFst<StdArc>(fst_reader.Value()));
// The "decodable" object takes ownership of the features.
DecodableAmDiagGmmScaled *gmm_decodable =
new DecodableAmDiagGmmScaled(am_gmm, trans_model, acoustic_scale,
......@@ -200,7 +201,7 @@ int main(int argc, char *argv[]) {
sequencer.Wait();
delete decode_fst;
double elapsed = timer.Elapsed();
KALDI_LOG << "Decoded with " << sequencer_config.num_threads << " threads.";
KALDI_LOG << "Time taken "<< elapsed
......
......@@ -48,7 +48,7 @@ int main(int argc, char *argv[]) {
bool allow_partial = false;
BaseFloat acoustic_scale = 0.1;
LatticeFasterDecoderConfig config;
std::string word_syms_filename;
config.Register(&po);
po.Register("acoustic-scale", &acoustic_scale,
......@@ -57,7 +57,7 @@ int main(int argc, char *argv[]) {
"Symbol table for words [for debug output]");
po.Register("allow-partial", &allow_partial,
"If true, produce output even if end state was not reached.");
po.Read(argc, argv);
if (po.NumArgs() < 4 || po.NumArgs() > 6) {
......@@ -71,7 +71,7 @@ int main(int argc, char *argv[]) {
lattice_wspecifier = po.GetArg(4),
words_wspecifier = po.GetOptArg(5),
alignment_wspecifier = po.GetOptArg(6);
TransitionModel trans_model;
AmDiagGmm am_gmm;
{
......@@ -94,7 +94,7 @@ int main(int argc, char *argv[]) {
Int32VectorWriter alignment_writer(alignment_wspecifier);
fst::SymbolTable *word_syms = NULL;
if (word_syms_filename != "")
if (word_syms_filename != "")
if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
KALDI_ERR << "Could not read symbol table from file "
<< word_syms_filename;
......@@ -107,10 +107,11 @@ int main(int argc, char *argv[]) {
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
// Input FST is just one FST, not a table of FSTs.
VectorFst<StdArc> *decode_fst = fst::ReadFstKaldi(fst_in_str);
timer.Reset();
{
LatticeFasterDecoder decoder(*decode_fst, config);
for (; !feature_reader.Done(); feature_reader.Next()) {
std::string utt = feature_reader.Key();
Matrix<BaseFloat> features (feature_reader.Value());
......@@ -120,7 +121,7 @@ int main(int argc, char *argv[]) {
num_err++;
continue;
}
DecodableAmDiagGmmScaled gmm_decodable(am_gmm, trans_model, features,
acoustic_scale);
......@@ -139,7 +140,7 @@ int main(int argc, char *argv[]) {
delete decode_fst; // delete this only after decoder goes out of scope.
} else { // We have different FSTs for different utterances.
SequentialTableReader<fst::VectorFstHolder> fst_reader(fst_in_str);
RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
for (; !fst_reader.Done(); fst_reader.Next()) {
std::string utt = fst_reader.Key();
if (!feature_reader.HasKey(utt)) {
......@@ -170,7 +171,7 @@ int main(int argc, char *argv[]) {
} else num_err++;
}
}
double elapsed = timer.Elapsed();
KALDI_LOG << "Time taken "<< elapsed
<< "s: real-time factor assuming 100 frames/sec is "
......
......@@ -111,6 +111,7 @@ int main(int argc, char *argv[]) {
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
decode_fst = fst::ReadFstKaldi(fst_in_str);
timer.Reset();
{
......
......@@ -48,15 +48,15 @@ int main(int argc, char *argv[]) {
bool allow_partial = false;
BaseFloat acoustic_scale = 0.1;
LatticeFasterDecoderConfig config;
std::string word_syms_filename;
config.Register(&po);
po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods");
po.Register("word-symbol-table", &word_syms_filename, "Symbol table for words [for debug output]");
po.Register("allow-partial", &allow_partial, "If true, produce output even if end state was not reached.");
po.Read(argc, argv);
if (po.NumArgs() < 4 || po.NumArgs() > 6) {
po.PrintUsage();
exit(1);
......@@ -68,7 +68,7 @@ int main(int argc, char *argv[]) {
lattice_wspecifier = po.GetArg(4),
words_wspecifier = po.GetOptArg(5),
alignment_wspecifier = po.GetOptArg(6);
TransitionModel trans_model;
AmNnet am_nnet;
{
......@@ -91,7 +91,7 @@ int main(int argc, char *argv[]) {
Int32VectorWriter alignment_writer(alignment_wspecifier);
fst::SymbolTable *word_syms = NULL;
if (word_syms_filename != "")
if (word_syms_filename != "")
if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
KALDI_ERR << "Could not read symbol table from file "
<< word_syms_filename;
......@@ -103,13 +103,14 @@ int main(int argc, char *argv[]) {
if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) {
SequentialBaseFloatCuMatrixReader feature_reader(feature_rspecifier);
// Input FST is just one FST, not a table of FSTs.
VectorFst<StdArc> *decode_fst = fst::ReadFstKaldi(fst_in_str);
timer.Reset();
{
LatticeFasterDecoder decoder(*decode_fst, config);
for (; !feature_reader.Done(); feature_reader.Next()) {
std::string utt = feature_reader.Key();
const CuMatrix<BaseFloat> &features (feature_reader.Value());
......@@ -139,7 +140,7 @@ int main(int argc, char *argv[]) {
delete decode_fst; // delete this only after decoder goes out of scope.
} else { // We have different FSTs for different utterances.
SequentialTableReader<fst::VectorFstHolder> fst_reader(fst_in_str);
RandomAccessBaseFloatCuMatrixReader feature_reader(feature_rspecifier);
RandomAccessBaseFloatCuMatrixReader feature_reader(feature_rspecifier);
for (; !fst_reader.Done(); fst_reader.Next()) {
std::string utt = fst_reader.Key();
if (!feature_reader.HasKey(utt)) {
......@@ -154,7 +155,7 @@ int main(int argc, char *argv[]) {
num_fail++;
continue;
}
LatticeFasterDecoder decoder(fst_reader.Value(), config);
bool pad_input = true;
......@@ -175,7 +176,7 @@ int main(int argc, char *argv[]) {
} else num_fail++;
}
}
double elapsed = timer.Elapsed();
KALDI_LOG << "Time taken "<< elapsed
<< "s: real-time factor assuming 100 frames/sec is "
......
......@@ -48,6 +48,7 @@ int main(int argc, char *argv[]) {
"Usage: nnet3-latgen-faster-parallel [options] <nnet-in> <fst-in|fsts-rspecifier> <features-rspecifier>"
" <lattice-wspecifier> [ <words-wspecifier> [<alignments-wspecifier>] ]\n";
ParseOptions po(usage);
Timer timer;
bool allow_partial = false;
TaskSequencerConfig sequencer_config; // has --num-threads option
......@@ -131,6 +132,7 @@ int main(int argc, char *argv[]) {
// Input FST is just one FST, not a table of FSTs.
VectorFst<StdArc> *decode_fst = fst::ReadFstKaldi(fst_in_str);
timer.Reset();
{
LatticeFasterDecoder decoder(*decode_fst, config);
......@@ -249,10 +251,14 @@ int main(int argc, char *argv[]) {
sequencer.Wait(); // Waits for all tasks to be done.
}
kaldi::int64 input_frame_count =
frame_count * decodable_opts.frame_subsampling_factor;
double elapsed = timer.Elapsed();
KALDI_LOG << "Time taken " << elapsed
<< "s: real-time factor assuming 100 frames/sec is "
<< (elapsed * 100.0 / frame_count);
<< "s: real-time factor assuming 100 feature frames/sec is "
<< (sequencer_config.num_threads * elapsed * 100.0 /
input_frame_count);
KALDI_LOG << "Done " << num_success << " utterances, failed for "
<< num_fail;
KALDI_LOG << "Overall log-likelihood per frame is "
......
......@@ -130,6 +130,7 @@ int main(int argc, char *argv[]) {
// Input FST is just one FST, not a table of FSTs.
VectorFst<StdArc> *decode_fst = fst::ReadFstKaldi(fst_in_str);
timer.Reset();
{
LatticeFasterDecoder decoder(*decode_fst, config);
......
......@@ -110,7 +110,7 @@ int main(int argc, char *argv[]) {
LatticeFasterDecoderConfig decoder_opts;
SgmmGselectConfig sgmm_opts;
decoder_opts.Register(&po);
decoder_opts.Register(&po);
sgmm_opts.Register(&po);
po.Register("acoustic-scale", &acoustic_scale,
......@@ -152,18 +152,18 @@ int main(int argc, char *argv[]) {
CompactLatticeWriter compact_lattice_writer;
LatticeWriter lattice_writer;
bool determinize = decoder_opts.determinize_lattice;
bool determinize = decoder_opts.determinize_lattice;
if (! (determinize ? compact_lattice_writer.Open(lattice_wspecifier)
: lattice_writer.Open(lattice_wspecifier)))
KALDI_ERR << "Could not open table for writing lattices: "
<< lattice_wspecifier;
Int32VectorWriter words_writer(words_wspecifier);
Int32VectorWriter alignment_writer(alignment_wspecifier);
fst::SymbolTable *word_syms = NULL;
if (word_syms_filename != "")
if (word_syms_filename != "")
if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
KALDI_ERR << "Could not read symbol table from file "
<< word_syms_filename;
......@@ -171,14 +171,14 @@ int main(int argc, char *argv[]) {
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier,
utt2spk_rspecifier);
BaseFloat tot_like = 0.0;
kaldi::int64 frame_count = 0;
int num_success = 0, num_fail = 0;
Timer timer;
if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) { // a single FST.
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
// It's important that we initialize decode_fst after feature_reader, as it
......@@ -188,10 +188,10 @@ int main(int argc, char *argv[]) {
// lot of virtual memory.
VectorFst<StdArc> *decode_fst = fst::ReadFstKaldi(fst_in_str);
timer.Reset(); // exclude graph loading time.
{
LatticeFasterDecoder decoder(*decode_fst, decoder_opts);
const std::vector<std::vector<int32> > empty_gselect;
for (; !feature_reader.Done(); feature_reader.Next()) {
......@@ -220,7 +220,7 @@ int main(int argc, char *argv[]) {
delete decode_fst; // only safe to do this after decoder goes out of scope.
} else { // We have different FSTs for different utterances.
SequentialTableReader<fst::VectorFstHolder> fst_reader(fst_in_str);
RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
for (; !fst_reader.Done(); fst_reader.Next()) {
std::string utt = fst_reader.Key();
if (!feature_reader.HasKey(utt)) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment