Skip to content
Snippets Groups Projects
Commit 7d4bbace authored by Karel Vesely's avatar Karel Vesely
Browse files

remove the compile-time dependency on the libcuda.so, use dynamic loading instead


git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@1758 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 56f5d5cc
No related branches found
No related tags found
No related merge requests found
......@@ -23,6 +23,7 @@
#include <cuda.h>
#include <vector>
#include <dlfcn.h>
#include "cudamatrix/cu-common.h"
#include "cudamatrix/cu-device.h"
......@@ -48,7 +49,7 @@ CuDevice::CuDevice()
|| gpu_prop.computeMode == cudaComputeModeExclusiveProcess) {
cudaDeviceSynchronize();
char gpu_name[128];
cuDeviceGetName(gpu_name, 128, gpu_id);
DeviceGetName(gpu_name, 128, gpu_id);
std::string mem_stats = GetFreeMemory(NULL, NULL);
KALDI_LOG << "CUDA setup operating under Compute Exclusive Mode.\n"
<< " Using device " << gpu_id << ": " << gpu_name << "\t" << mem_stats;
......@@ -70,7 +71,7 @@ CuDevice::CuDevice()
cudaThreadSynchronize(); //deprecated, but for legacy reason...
//get GPU name
char name[128];
cuDeviceGetName(name,128,n);
DeviceGetName(name,128,n);
//get GPU memory stats
int64 free, total;
std::string mem_stats;
......@@ -202,13 +203,41 @@ void CuDevice::PrintProfile() {
std::string CuDevice::GetFreeMemory(int64* free, int64* total) {
// WARNING! the CUDA API is inconsistent accross versions!
#if (CUDA_VERSION >= 3020)
//define the function signature type
size_t mem_free, mem_total;
#else
unsigned int mem_free, mem_total;
#endif
// get the free memory stats
cuMemGetInfo(&mem_free, &mem_total);
// post them outside
{
//we will load the cuMemGetInfo dynamically from libcuda.so
//cuMemGetInfo(&mem_free, &mem_total);
//pre-fill ``safe'' values that will not cause problems
mem_free = 1; mem_total = 1;
//open libcuda.so
void* libcuda = dlopen("libcuda.so",RTLD_LAZY);
if(NULL == libcuda) {
KALDI_WARN << "cannot open libcuda.so";
} else {
//define the function signature type
//and get the symbol
#if (CUDA_VERSION >= 3020)
typedef CUresult (*cu_fun_ptr)(size_t*, size_t*);
cu_fun_ptr dl_cuMemGetInfo = (cu_fun_ptr)dlsym(libcuda,"cuMemGetInfo_v2");
#else
typedef CUresult (*cu_fun_ptr)(int*, int*);
cu_fun_ptr dl_cuMemGetInfo = (cu_fun_ptr)dlsym(libcuda,"cuMemGetInfo");
#endif
if(NULL == dl_cuMemGetInfo) {
KALDI_WARN << "cannot load cuMemGetInfo from libcuda.so";
} else {
//call the function
dl_cuMemGetInfo(&mem_free, &mem_total);
}
//close the library
dlclose(libcuda);
}
}
// copy the output values outside
if(NULL != free) *free = mem_free;
if(NULL != total) *total = mem_total;
// prepare the text output
......@@ -221,6 +250,29 @@ std::string CuDevice::GetFreeMemory(int64* free, int64* total) {
}
void CuDevice::DeviceGetName(char* name, int32 len, int32 dev) {
//prefill with something reasonable
strncpy(name,"Unknown GPU",len);
//open libcuda.so
void* libcuda = dlopen("libcuda.so",RTLD_LAZY);
if(NULL == libcuda) {
KALDI_WARN << "cannot open libcuda.so";
} else {
//define the function signature type
typedef CUresult (*cu_fun_ptr)(char*,int,CUdevice);
//get the symbol
cu_fun_ptr cuDeviceGetName_ptr = (cu_fun_ptr)dlsym(libcuda,"cuDeviceGetName");
if(NULL == cuDeviceGetName_ptr) {
KALDI_WARN << "cannot load cuDeviceGetName from libcuda.so";
} else {
//call the function
cuDeviceGetName_ptr(name, len, dev);
}
//close the library
dlclose(libcuda);
}
}
////////////////////////////////////////////////
// The instance of the static singleton
......
......@@ -79,6 +79,8 @@ class CuDevice {
/// Get the actual GPU memory use stats
std::string GetFreeMemory(int64* free = NULL, int64* total = NULL);
/// Get the name of the GPU
void DeviceGetName(char* name, int32 len, int32 dev);
private:
......
......@@ -4,5 +4,5 @@ CUDA_FLAGS = -g -Xcompiler -fPIC --verbose --machine 32 -DHAVE_CUDA
CXXFLAGS += -DHAVE_CUDA -I$(CUDATKDIR)/include
LDFLAGS += -L$(CUDATKDIR)/lib -Wl,-rpath=$(CUDATKDIR)/lib
LDFLAGS += -lcublas -lcudart -lcuda
LDFLAGS += -lcublas -lcudart
......@@ -4,5 +4,5 @@ CUDA_FLAGS = -g -Xcompiler -fPIC --verbose --machine 64 -DHAVE_CUDA
CXXFLAGS += -DHAVE_CUDA -I$(CUDATKDIR)/include
LDFLAGS += -L$(CUDATKDIR)/lib64 -Wl,-rpath,$(CUDATKDIR)/lib64
LDFLAGS += -lcublas -lcudart -lcuda
LDFLAGS += -lcublas -lcudart
// gmmbin/transf-to-nnet.cc
// gmmbin/cmvn-to-nnet.cc
// Copyright 2012 Brno University of Technology
......@@ -33,9 +33,11 @@ int main(int argc, char *argv[]) {
bool binary_write = false;
bool tied_normalzation = false;
ParseOptions po(usage);
po.Register("binary", &binary_write, "Write output in binary mode");
po.Register("tied-normalization", &tied_normalzation, "The normalization is tied accross all the input dimensions");
po.Read(argc, argv);
......@@ -68,8 +70,19 @@ int main(int argc, char *argv[]) {
for(int32 d=0; d<cmvn_stats.NumCols()-1; d++) {
BaseFloat mean = cmvn_stats(0,d)/count;
BaseFloat var = cmvn_stats(1,d)/count - mean*mean;
shift(d) = -mean;
scale(d) = 1.0 / sqrt(var);
shift(d) = -mean * scale(d);
}
if(tied_normalzation) {
//just average the variances
BaseFloat sum_var = 0.0;
for(int32 i=0; i<scale.Dim(); i++) {
sum_var += 1.0 / (scale(i)*scale(i));
}
BaseFloat mean_var = sum_var / scale.Dim();
BaseFloat tied_scale = 1.0 / sqrt(mean_var);
scale.Set(tied_scale);
}
//we will put the shift and scale to the nnet
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment