Skip to content
Snippets Groups Projects
Commit cb05ff96 authored by Daniel Povey's avatar Daniel Povey
Browse files

Merge pull request #145 from david-ryan-snyder/nnet-am-compute-chunked

Adding nnet2 function NnetComputationChunked
parents 0628d2db ab57be7b
No related branches found
No related tags found
No related merge requests found
// nnet2/nnet-compute-test.cc
// Copyright 2014 Johns Hopkins University (author: Daniel Povey)
// Copyright 2015 David Snyder
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -80,6 +81,41 @@ void UnitTestNnetCompute() {
delete nnet;
}
void UnitTestNnetComputeChunked() {
int32 input_dim = 10 + rand() % 40, output_dim = 100 + rand() % 500;
bool pad_input = true;
Nnet *nnet = GenRandomNnet(input_dim, output_dim);
int32 num_feats = 100 + rand() % 500;
int32 chunk_size = num_feats / (2 + rand() % 10);
CuMatrix<BaseFloat> input(num_feats, input_dim);
input.SetRandn();
KALDI_LOG << "Left context = " << nnet->LeftContext()
<< ", right context = " << nnet->RightContext()
<< ", chunk size = " << chunk_size;
KALDI_LOG << "NNet info is " << nnet->Info();
int32 num_output_rows = num_feats;
CuMatrix<BaseFloat> cu_output1(num_output_rows, output_dim);
Matrix<BaseFloat> output2(num_output_rows, output_dim);
NnetComputation(*nnet, input, pad_input, &cu_output1);
NnetComputationChunked(*nnet, Matrix<BaseFloat>(input), chunk_size,
&output2);
Matrix<BaseFloat> output1(cu_output1);
AssertEqual(output1, output2);
for (int32 i = 0; i < output1.NumRows(); i++) {
// just double-check that the frames near the end are right, in case
// the test above somehow passed despite that.
if (i < 10 || output1.NumRows() - i < 10) {
SubVector<BaseFloat> vec1(output1, i), vec2(output2, i);
AssertEqual(vec1, vec2);
}
}
KALDI_LOG << "OK";
delete nnet;
}
} // namespace nnet2
} // namespace kaldi
......@@ -92,6 +128,7 @@ int main() {
for (int32 i = 0; i < 10; i++)
UnitTestNnetCompute();
UnitTestNnetComputeChunked();
return 0;
}
// nnet2/nnet-compute.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Copyright 2015 David Snyder
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -165,6 +166,45 @@ void NnetComputation(const Nnet &nnet,
output->CopyFromMat(nnet_computer.GetOutput());
}
void NnetComputationChunked(const Nnet &nnet,
const Matrix<BaseFloat> &input, // features
int32 chunk_size,
Matrix<BaseFloat> *output) {
int32 num_rows,
num_chunks = ceil((BaseFloat)input.NumRows() / chunk_size),
dim = input.NumCols(),
left_context = nnet.LeftContext(),
right_context = nnet.RightContext();
Matrix<BaseFloat> full_input;
num_rows = left_context + input.NumRows() + right_context;
full_input.Resize(num_rows, dim);
full_input.Range(left_context, input.NumRows(),
0, dim).CopyFromMat(input);
for (int32 i = 0; i < left_context; i++)
full_input.Row(i).CopyFromVec(input.Row(0));
int32 last_row = input.NumRows() - 1;
for (int32 i = 0; i < right_context; i++)
full_input.Row(num_rows - i - 1).CopyFromVec(input.Row(last_row));
for (int32 i = 0; i < num_chunks; i++) {
int32 index = i * chunk_size,
offset = std::min(num_rows - chunk_size * i,
left_context + chunk_size + right_context);
SubMatrix<BaseFloat> chunk_input(full_input, index, offset, 0, dim);
CuMatrix<BaseFloat> cu_chunk_input(chunk_input);
// Note: we have already accounted for input padding, so we pass
// pad_input==false to the NnetComputer.
NnetComputer nnet_computer(nnet, cu_chunk_input, false, NULL);
nnet_computer.Propagate();
CuMatrix<BaseFloat> cu_chunk_output(nnet_computer.GetOutput());
SubMatrix<BaseFloat> chunk_out(*output, i * chunk_size,
cu_chunk_output.NumRows(), 0,
cu_chunk_output.NumCols());
chunk_out.CopyFromMat(cu_chunk_output);
}
}
BaseFloat NnetGradientComputation(const Nnet &nnet,
const CuMatrixBase<BaseFloat> &input,
bool pad_input,
......
// nnet2/nnet-compute.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Copyright 2015 David Snyder
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -45,6 +46,19 @@ void NnetComputation(const Nnet &nnet,
const CuMatrixBase<BaseFloat> &input, // features
bool pad_input,
CuMatrixBase<BaseFloat> *output); // posteriors.
/**
Does the basic neural net computation, on a sequence of data (e.g.
an utterance). This variant of NnetComputation chunks the input
according to chunk_size and does the posterior computation chunk
by chunk. This allows the computation to be performed on the GPU
when the input matrix is very large. Input is padded with enough
frames of context so that the output will be a matrix with
input.NumRows().
*/
void NnetComputationChunked(const Nnet &nnet,
const Matrix<BaseFloat> &input, // features
int32 chunk_size,
Matrix<BaseFloat> *output); // posteriors.
/** Does the neural net computation and backprop, given input and labels.
Note: if pad_input==true the number of rows of input should be the
......
......@@ -2,6 +2,7 @@
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// 2015 Johns Hopkins University (author: Daniel Garcia-Romero)
// 2015 David Snyder
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -44,6 +45,7 @@ int main(int argc, char *argv[]) {
bool apply_log = false;
bool pad_input = true;
std::string use_gpu = "no";
int32 chunk_size = 0;
ParseOptions po(usage);
po.Register("apply-log", &apply_log, "Apply a log to the result of the computation "
"before outputting.");
......@@ -52,6 +54,9 @@ int main(int argc, char *argv[]) {
"of output being less than those of input.");
po.Register("use-gpu", &use_gpu,
"yes|no|optional|wait, only has effect if compiled with CUDA");
po.Register("chunk-size", &chunk_size, "Process the feature matrix in chunks. "
"This is useful when processing large feature files in the GPU. "
"If chunk-size > 0, pad-input must be true.");
po.Read(argc, argv);
......@@ -59,6 +64,9 @@ int main(int argc, char *argv[]) {
po.PrintUsage();
exit(1);
}
// If chunk_size is greater than 0, pad_input needs to be true.
KALDI_ASSERT(chunk_size < 0 || pad_input);
#if HAVE_CUDA==1
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
......@@ -79,12 +87,12 @@ int main(int argc, char *argv[]) {
Nnet &nnet = am_nnet.GetNnet();
int64 num_done = 0, num_frames = 0;
SequentialBaseFloatCuMatrixReader feature_reader(features_rspecifier);
BaseFloatCuMatrixWriter writer(features_or_loglikes_wspecifier);
SequentialBaseFloatMatrixReader feature_reader(features_rspecifier);
BaseFloatMatrixWriter writer(features_or_loglikes_wspecifier);
for (; !feature_reader.Done(); feature_reader.Next()) {
std::string utt = feature_reader.Key();
const CuMatrix<BaseFloat> &feats = feature_reader.Value();
const Matrix<BaseFloat> &feats = feature_reader.Value();
int32 output_frames = feats.NumRows(), output_dim = nnet.OutputDim();
if (!pad_input)
......@@ -94,8 +102,16 @@ int main(int argc, char *argv[]) {
<< "would be empty.";
continue;
}
CuMatrix<BaseFloat> output(output_frames, output_dim);
NnetComputation(nnet, feats, pad_input, &output);
Matrix<BaseFloat> output(output_frames, output_dim);
if (chunk_size > 0 && chunk_size < feats.NumRows()) {
NnetComputationChunked(nnet, feats, chunk_size, &output);
} else {
CuMatrix<BaseFloat> cu_feats(feats);
CuMatrix<BaseFloat> cu_output(output);
NnetComputation(nnet, cu_feats, pad_input, &cu_output);
output.CopyFromMat(cu_output);
}
if (apply_log) {
output.ApplyFloor(1.0e-20);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment