Skip to content

Commit

Permalink
Sparse data support: SparseBlob and InnerProduct with sparse support
Browse files Browse the repository at this point in the history
  • Loading branch information
alemagnani committed Aug 24, 2014
1 parent 76fd372 commit 8b827d9
Show file tree
Hide file tree
Showing 28 changed files with 2,438 additions and 160 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include
ifneq ($(CPU_ONLY), 1)
INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
LIBRARY_DIRS += $(CUDA_LIB_DIR)
LIBRARIES := cudart cublas curand
LIBRARIES := cudart cublas cusparse curand
endif
LIBRARIES += \
glog gflags pthread protobuf leveldb snappy \
Expand Down
41 changes: 21 additions & 20 deletions include/caffe/blob.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class Blob {
count_(0) {}
explicit Blob(const int num, const int channels, const int height,
const int width);
void Reshape(const int num, const int channels, const int height,
virtual void Reshape(const int num, const int channels, const int height,
const int width);
void ReshapeLike(const Blob& other);
inline int num() const { return num_; }
Expand All @@ -38,41 +38,42 @@ class Blob {
}
// Copy from source. If copy_diff is false, we copy the data; if copy_diff
// is true, we copy the diff.
void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false,
virtual void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false,
bool reshape = false);

inline Dtype data_at(const int n, const int c, const int h,
virtual inline Dtype data_at(const int n, const int c, const int h,
const int w) const {
return *(cpu_data() + offset(n, c, h, w));
}

inline Dtype diff_at(const int n, const int c, const int h,
virtual inline Dtype diff_at(const int n, const int c, const int h,
const int w) const {
return *(cpu_diff() + offset(n, c, h, w));
}

inline const shared_ptr<SyncedMemory>& data() const {
virtual inline const shared_ptr<SyncedMemory>& data() const {
CHECK(data_);
return data_;
}

inline const shared_ptr<SyncedMemory>& diff() const {
virtual inline const shared_ptr<SyncedMemory>& diff() const {
CHECK(diff_);
return diff_;
}

const Dtype* cpu_data() const;
void set_cpu_data(Dtype* data);
const Dtype* gpu_data() const;
const Dtype* cpu_diff() const;
const Dtype* gpu_diff() const;
Dtype* mutable_cpu_data();
Dtype* mutable_gpu_data();
Dtype* mutable_cpu_diff();
Dtype* mutable_gpu_diff();
void Update();
void FromProto(const BlobProto& proto);
void ToProto(BlobProto* proto, bool write_diff = false) const;
virtual const Dtype* cpu_data() const;
virtual void set_cpu_data(Dtype* data);
virtual void set_gpu_data(Dtype* data);
virtual const Dtype* gpu_data() const;
virtual const Dtype* cpu_diff() const;
virtual const Dtype* gpu_diff() const;
virtual Dtype* mutable_cpu_data();
virtual Dtype* mutable_gpu_data();
virtual Dtype* mutable_cpu_diff();
virtual Dtype* mutable_gpu_diff();
virtual void Update();
virtual void FromProto(const BlobProto& proto);
virtual void ToProto(BlobProto* proto, bool write_diff = false) const;

// Compute the sum of absolute values (L1 norm) of the data or diff.
Dtype asum_data() const;
Expand All @@ -83,8 +84,8 @@ class Blob {
// in their forward or backward pass.
// This deallocates the SyncedMemory holding this blob's data/diff, as
// shared_ptr calls its destructor when reset with the = operator.
void ShareData(const Blob& other);
void ShareDiff(const Blob& other);
virtual void ShareData(const Blob& other);
virtual void ShareDiff(const Blob& other);

protected:
shared_ptr<SyncedMemory> data_;
Expand Down
18 changes: 18 additions & 0 deletions include/caffe/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <boost/shared_ptr.hpp>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <algorithm>

#include <cmath>
#include <map>
Expand Down Expand Up @@ -53,6 +54,14 @@ private:\
<< caffe::cublasGetErrorString(status); \
} while (0)

#define CUSPARSE_CHECK(condition) \
do { \
cusparseStatus_t status = condition; \
CHECK_EQ(status, CUSPARSE_STATUS_SUCCESS) << " " \
<< caffe::cusparseGetErrorString(status); \
} while (0)


#define CURAND_CHECK(condition) \
do { \
curandStatus_t status = condition; \
Expand Down Expand Up @@ -132,6 +141,12 @@ class Caffe {
}
#ifndef CPU_ONLY
inline static cublasHandle_t cublas_handle() { return Get().cublas_handle_; }
inline static cusparseHandle_t cusparse_handle() {
return Get().cusparse_handle_;
}
inline static cusparseMatDescr_t cusparse_mat_descr() {
return Get().cusparse_mat_descr_;
}
inline static curandGenerator_t curand_generator() {
return Get().curand_generator_;
}
Expand Down Expand Up @@ -160,6 +175,8 @@ class Caffe {
protected:
#ifndef CPU_ONLY
cublasHandle_t cublas_handle_;
cusparseHandle_t cusparse_handle_;
cusparseMatDescr_t cusparse_mat_descr_;
curandGenerator_t curand_generator_;
#endif
shared_ptr<RNG> random_generator_;
Expand All @@ -179,6 +196,7 @@ class Caffe {

// NVIDIA_CUDA-5.5_Samples/common/inc/helper_cuda.h
const char* cublasGetErrorString(cublasStatus_t error);
const char* cusparseGetErrorString(cusparseStatus_t error);
const char* curandGetErrorString(curandStatus_t error);

// CUDA: thread number configuration.
Expand Down
12 changes: 12 additions & 0 deletions include/caffe/common_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,18 @@ class InnerProductLayer : public Layer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

// used to support the sparse operation
void Forward_sparse_cpu(const SparseBlob<Dtype>* bottom,
vector<Blob<Dtype>*>* top);
void Forward_sparse_gpu(const SparseBlob<Dtype>* bottom,
vector<Blob<Dtype>*>* top);
void Backward_sparse_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
const SparseBlob<Dtype>* bottom);
void Backward_sparse_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
const SparseBlob<Dtype>* bottom);

int M_;
int K_;
int N_;
Expand Down
50 changes: 50 additions & 0 deletions include/caffe/data_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/sparse_blob.hpp"

namespace caffe {

Expand Down Expand Up @@ -80,6 +81,55 @@ class DataLayer : public Layer<Dtype>, public InternalThread {
Caffe::Phase phase_;
};

template<typename Dtype>
void* DataLayerSparseInputPrefetch(void* layer_pointer);

template<typename Dtype>
class DataLayerSparseInput : public Layer<Dtype> {
// The function used to perform prefetching.
friend void* DataLayerSparseInputPrefetch<Dtype>(void* layer_pointer);

public:
explicit DataLayerSparseInput(const LayerParameter& param)
: Layer<Dtype>(param) {
}
virtual ~DataLayerSparseInput();
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
vector<Blob<Dtype>*>* bottom) {
return;
}
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
vector<Blob<Dtype>*>* bottom) {
return;
}

virtual void CreatePrefetchThread();
virtual void JoinPrefetchThread();

shared_ptr<leveldb::DB> db_;
shared_ptr<leveldb::Iterator> iter_;
int datum_size_;

pthread_t thread_;
shared_ptr<SparseBlob<Dtype> > prefetch_data_;
shared_ptr<SparseBlob<Dtype> > prefetch_data_copy_;
shared_ptr<Blob<Dtype> > prefetch_label_;
shared_ptr<Blob<Dtype> > prefetch_label_copy_;

bool output_labels_;
Caffe::Phase phase_;
};

template <typename Dtype>
class DummyDataLayer : public Layer<Dtype> {
public:
Expand Down
3 changes: 3 additions & 0 deletions include/caffe/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,9 @@ void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
template <typename Dtype>
Layer<Dtype>* GetLayer(const LayerParameter& param);

template <typename Dtype>
Blob<Dtype>* GetTopBlob(const shared_ptr<LayerParameter>& param, int pos);

} // namespace caffe

#endif // CAFFE_LAYER_H_
117 changes: 117 additions & 0 deletions include/caffe/sparse_blob.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#ifndef CAFFE_SPARSE_BLOB_HPP_
#define CAFFE_SPARSE_BLOB_HPP_

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/syncedmem.hpp"

namespace caffe {

template<typename Dtype>
class SparseBlob : public Blob<Dtype> {
public:
SparseBlob()
: Blob<Dtype>(),
indices_(),
ptr_(),
nzz_(0) {
}

explicit SparseBlob(const int num, const int channels, const int nzz);

virtual void Reshape(const int num, const int channels, const int height,
const int width);

void Reshape(const int num, const int channels, const int nzz);

virtual void ReshapeLike(const Blob<Dtype>& other);

virtual inline int height() const {
return 1;
}
virtual inline int width() const {
return 1;
}
inline int nzz() const {
return nzz_;
}

virtual inline int offset(const int n, const int c = 0, const int h = 0,
const int w = 0) const {
LOG(FATAL)<< "Offset not supported in sparse blob.";
return 0;
}

virtual inline Dtype data_at(const int n, const int c, const int h,
const int w) const {
LOG(FATAL) << "data_at not implemented yet.";
return (Dtype)0;
}

virtual inline Dtype diff_at(const int n, const int c, const int h,
const int w) const {
LOG(FATAL) << "Diff data is not supported in sparse blob.";
return (Dtype)0;
}

inline const shared_ptr<SyncedMemory>& indices() const {
CHECK(indices_);
return indices_;
}

inline const shared_ptr<SyncedMemory>& ptr() const {
CHECK(ptr_);
return ptr_;
}

const int* cpu_indices() const;
const int* cpu_ptr() const;

const int* gpu_indices() const;
const int* gpu_ptr() const;

int* mutable_cpu_indices();
int* mutable_cpu_ptr();

int* mutable_gpu_indices();
int* mutable_gpu_ptr();

virtual void set_cpu_data(Dtype* data);
virtual void set_gpu_data(Dtype* data);

// the num and channels are assumed to be the same but
// nzz might change that is why is an argument
// also the actual size of data and indices might exceed nzz
// to allow for easy slicing.
// If total_size is -1 is assumed to be equal to nzz
void set_cpu_data(Dtype* data, int* indices, int* ptr, int nzz,
int total_size=-1);
void set_gpu_data(Dtype* data, int* indices, int* ptr, int nzz,
int total_size=-1);

virtual const Dtype* cpu_diff() const;
virtual const Dtype* gpu_diff() const;
virtual Dtype* mutable_cpu_diff();
virtual Dtype* mutable_gpu_diff();

virtual void ShareData(const Blob<Dtype>& other);
virtual void ShareDiff(const Blob<Dtype>& other);
virtual void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false,
bool reshape = false);

virtual void Update();
virtual void FromProto(const BlobProto& proto);
virtual void ToProto(BlobProto* proto, bool write_diff = false) const;

protected:
shared_ptr<SyncedMemory> indices_;
shared_ptr<SyncedMemory> ptr_;
int nzz_;

DISABLE_COPY_AND_ASSIGN(SparseBlob);
}; // class SparseBlob

} // namespace caffe

#endif // CAFFE_SPARSE_BLOB_HPP_
11 changes: 8 additions & 3 deletions include/caffe/syncedmem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,16 @@ class SyncedMemory {
public:
SyncedMemory()
: cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),
own_cpu_data_(false) {}
own_cpu_data_(false), own_gpu_data_(false) {}
explicit SyncedMemory(size_t size)
: cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED),
own_cpu_data_(false) {}
own_cpu_data_(false), own_gpu_data_(false) {}
~SyncedMemory();
const void* cpu_data();
void set_cpu_data(void* data);

// if size if -1 the size is not changed
void set_cpu_data(void* data, int size=-1);
void set_gpu_data(void* data, int size=-1);
const void* gpu_data();
void* mutable_cpu_data();
void* mutable_gpu_data();
Expand All @@ -52,11 +55,13 @@ class SyncedMemory {
private:
void to_cpu();
void to_gpu();
void clear_data();
void* cpu_ptr_;
void* gpu_ptr_;
size_t size_;
SyncedHead head_;
bool own_cpu_data_;
bool own_gpu_data_;

DISABLE_COPY_AND_ASSIGN(SyncedMemory);
}; // class SyncedMemory
Expand Down
1 change: 1 addition & 0 deletions include/caffe/util/device_alternate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
#include <cuda.h>
#include <cuda_runtime.h>
#include <curand.h>
#include <cusparse_v2.h>
#include <driver_types.h> // cuda driver types

#endif // CPU_ONLY
Expand Down
Loading

0 comments on commit 8b827d9

Please sign in to comment.