Skip to content
Snippets Groups Projects
Commit bbac0793 authored by Daniel Povey's avatar Daniel Povey Committed by Dan Povey
Browse files

Adding some CCTC training code

parent a0804fd8
No related branches found
No related tags found
No related merge requests found
// ctc/cctc-training.cc
// Copyright 2015 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "ctc/cctc-training.h"
namespace kaldi {
namespace ctc {
void CctcTraining::Forward(const CtcSupervision &supervision,
const CuMatrixBase<BaseFloat> &nnet_output,
ForwardData *forward_data) {
KALDI_ASSERT(supervision.num_frames == nnet_output.NumRows() &&
supervision.
}
} // namespace ctc
} // namespace kaldi
// ctc/cctc-training.h
// Copyright 2015 Johns Hopkins University (Author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_CTC_CCTC_TRAINING_H_
#define KALDI_CTC_CCTC_TRAINING_H_
#include <vector>
#include <map>
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "fstext/fstext-lib.h"
#include "tree/context-dep.h"
#include "lat/kaldi-lattice.h"
#include "matrix/kaldi-matrix.h"
#include "ctc/language-model.h"
namespace kaldi {
namespace ctc {
// CTC means Connectionist Temporal Classification, see the paper by Graves et
// al. CCTC means context-dependent CTC, it's an extension of the original model,
// in which the next phone is dependent on the phone history (actually, a truncation
// thereof) in addition to the acoustic history.
struct CctcTrainingOptions {
BaseFloat normalizing_weight;
CctcTrainingOptions(): normalizing_weight(0.0001) { }
void Register(OptionsItf *opts) {
opts->Register("normalizing-weight", &normalizing_weight, "Weight on a "
"term in the objective function that's a squared "
"log of the denominator in the CCTC likelihood; it "
"exists to keep the network outputs in a reasonable "
"range so we can exp() them without overflow.");
}
};
// This class is not responsible for the entire process of CCTC model training;
// it is only responsible for the forward-backward from the neural net output,
// and the derivative computation.
class CctcTraining {
struct ForwardData {
CuMatrix<BaseFloat> exp_nnet_output;
CuMatrix<BaseFloat> normalizers;
// The log-alpha value (forward score) for each state in the lattice.
Vector<double> alpha;
BaseFloat tot_like;
};
CctcTraining(const CctcTrainingOptions &opts,
const CctcTransitionModel &trans_model):
opts_(opts), trans_model_(trans_model) { }
/**
This function does the forward computation, up to
computing the objective.
*/
void Forward(const CtcSupervision &supervision,
const CuMatrixBase<BaseFloat> &nnet_output,
ForwardData *forward_data);
void Backward(const CtcSupervision &supervision,
const CuMatrixBase<BaseFloat> &nnet_output,
const ForwardData &forward_data,
CuMatrixBase<BaseFloat> *nnet_deriv);
private:
CctcTrainingOptions &opts_;
const CctcTransitionModel &trans_model_;
// CUDA copy of trans_model_.Weights().
CuMatrix<BaseFloat> weights_;
};
} // namespace ctc
} // namespace kaldi
#endif // KALDI_CTC_CCTC_TRAINING_H_
......@@ -217,18 +217,17 @@ void CctcTransitionModel::ComputeWeights() {
int32 num_history_states = history_state_info_.size(),
num_output_indexes = num_output_indexes_,
num_phones = num_phones_;
Matrix<BaseFloat> weights(num_history_states,
num_output_indexes);
weights_.Resize(num_history_states,
num_output_indexes);
for (int32 h = 0; h < num_history_states; h++) {
const HistoryStateInfo &info = history_state_info_[h];
SubVector<BaseFloat> row(weights, h);
SubVector<BaseFloat> row(weights_, h);
for (int32 p = 0; p <= num_phones; p++) {
int32 output_index = info.output_index[p];
BaseFloat lm_prob = info.phone_lm_prob(p);
row(output_index) += lm_prob;
}
}
weights_.Swap(&weights);
}
CctcTransitionModelCreator::CctcTransitionModelCreator(
......
// lat/cctc-transition-model.h
// ctc/cctc-transition-model.h
// Copyright 2015 Johns Hopkins University (Author: Daniel Povey)
......@@ -30,7 +30,7 @@
#include "fstext/fstext-lib.h"
#include "tree/context-dep.h"
#include "lat/kaldi-lattice.h"
#include "cudamatrix/cu-matrix.h"
#include "matrix/kaldi-matrix.h"
#include "ctc/language-model.h"
namespace kaldi {
......@@ -76,7 +76,7 @@ class CctcTransitionModel {
// probabilities: row index is history-state index from 0 to
// NumHistoryStates() - 1, column index is neural-net output index, from 0 to
// NumOutputIndexes() - 1.
const CuMatrix<BaseFloat> &GetWeights() const { return weights_; }
const Matrix<BaseFloat> &GetWeights() const { return weights_; }
// A graph-label is a similar concept to a transition-id in HMM-based models;
// it's a one-based index that appears on the input side of a decoding graph
......@@ -195,7 +195,7 @@ class CctcTransitionModel {
// and its column dimension is the output dimension of the neural net.
// A row of this will be dotted with the output of the neural net to
// get the denominator of the probability value.
CuMatrix<BaseFloat> weights_;
Matrix<BaseFloat> weights_;
friend class CctcTransitionModelCreator;
};
......
......@@ -39,7 +39,15 @@ namespace kaldi {
"Fast and Accurate Recurrent Neural Network Acoustic Models for Speech
Recognition" by Hasim Sak, Andrew Senior et al.
Todo: finish this intro.
What we are implementing is something we are calling Context-dependent CTC (CCTC),
which is an extension of the original CTC model whereby the next symbol is dependent not only on the acoustic history, but
also on the phonetic history. And we train against a phone-level language model
in order to force the model to address only the acoustic side of things.
We also don't allow repeats of non-blank symbols. The phonetic history is not
given as an input to the neural net; it all has to do with what sets of symbols
we normalize the probabilities over, which is phone-history dependent.
We'll have to create a more extensive introduction.
\section Context-dependent CTC
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment