Sparse data support: SparseBlob and InnerProduct with sparse support

BVLC · Aug 24, 2014 · 8b827d9 · 8b827d9
1 parent 76fd372
commit 8b827d9
Show file tree

Hide file tree

Showing 28 changed files with 2,438 additions and 160 deletions.
diff --git a/Makefile b/Makefile
@@ -156,7 +156,7 @@ INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include
 ifneq ($(CPU_ONLY), 1)
 	INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
 	LIBRARY_DIRS += $(CUDA_LIB_DIR)
-	LIBRARIES := cudart cublas curand
+	LIBRARIES := cudart cublas cusparse curand
 endif
 LIBRARIES += \
 	glog gflags pthread protobuf leveldb snappy \

diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
@@ -16,7 +16,7 @@ class Blob {
        count_(0) {}
   explicit Blob(const int num, const int channels, const int height,
     const int width);
-  void Reshape(const int num, const int channels, const int height,
+  virtual void Reshape(const int num, const int channels, const int height,
     const int width);
   void ReshapeLike(const Blob& other);
   inline int num() const { return num_; }
@@ -38,41 +38,42 @@ class Blob {
   }
   // Copy from source. If copy_diff is false, we copy the data; if copy_diff
   // is true, we copy the diff.
-  void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false,
+  virtual void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false,
       bool reshape = false);
 
-  inline Dtype data_at(const int n, const int c, const int h,
+  virtual inline Dtype data_at(const int n, const int c, const int h,
       const int w) const {
     return *(cpu_data() + offset(n, c, h, w));
   }
 
-  inline Dtype diff_at(const int n, const int c, const int h,
+  virtual inline Dtype diff_at(const int n, const int c, const int h,
       const int w) const {
     return *(cpu_diff() + offset(n, c, h, w));
   }
 
-  inline const shared_ptr<SyncedMemory>& data() const {
+  virtual inline const shared_ptr<SyncedMemory>& data() const {
     CHECK(data_);
     return data_;
   }
 
-  inline const shared_ptr<SyncedMemory>& diff() const {
+  virtual inline const shared_ptr<SyncedMemory>& diff() const {
     CHECK(diff_);
     return diff_;
   }
 
-  const Dtype* cpu_data() const;
-  void set_cpu_data(Dtype* data);
-  const Dtype* gpu_data() const;
-  const Dtype* cpu_diff() const;
-  const Dtype* gpu_diff() const;
-  Dtype* mutable_cpu_data();
-  Dtype* mutable_gpu_data();
-  Dtype* mutable_cpu_diff();
-  Dtype* mutable_gpu_diff();
-  void Update();
-  void FromProto(const BlobProto& proto);
-  void ToProto(BlobProto* proto, bool write_diff = false) const;
+  virtual const Dtype* cpu_data() const;
+  virtual void set_cpu_data(Dtype* data);
+  virtual void set_gpu_data(Dtype* data);
+  virtual const Dtype* gpu_data() const;
+  virtual const Dtype* cpu_diff() const;
+  virtual const Dtype* gpu_diff() const;
+  virtual Dtype* mutable_cpu_data();
+  virtual Dtype* mutable_gpu_data();
+  virtual Dtype* mutable_cpu_diff();
+  virtual Dtype* mutable_gpu_diff();
+  virtual void Update();
+  virtual void FromProto(const BlobProto& proto);
+  virtual void ToProto(BlobProto* proto, bool write_diff = false) const;
 
   // Compute the sum of absolute values (L1 norm) of the data or diff.
   Dtype asum_data() const;
@@ -83,8 +84,8 @@ class Blob {
   // in their forward or backward pass.
   // This deallocates the SyncedMemory holding this blob's data/diff, as
   // shared_ptr calls its destructor when reset with the = operator.
-  void ShareData(const Blob& other);
-  void ShareDiff(const Blob& other);
+  virtual void ShareData(const Blob& other);
+  virtual void ShareDiff(const Blob& other);
 
  protected:
   shared_ptr<SyncedMemory> data_;

diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
@@ -4,6 +4,7 @@
 #include <boost/shared_ptr.hpp>
 #include <gflags/gflags.h>
 #include <glog/logging.h>
+#include <algorithm>
 
 #include <cmath>
 #include <map>
@@ -53,6 +54,14 @@ private:\
       << caffe::cublasGetErrorString(status); \
   } while (0)
 
+#define CUSPARSE_CHECK(condition) \
+  do { \
+    cusparseStatus_t status = condition; \
+    CHECK_EQ(status, CUSPARSE_STATUS_SUCCESS) << " " \
+      << caffe::cusparseGetErrorString(status); \
+  } while (0)
+
+
 #define CURAND_CHECK(condition) \
   do { \
     curandStatus_t status = condition; \
@@ -132,6 +141,12 @@ class Caffe {
   }
 #ifndef CPU_ONLY
   inline static cublasHandle_t cublas_handle() { return Get().cublas_handle_; }
+  inline static cusparseHandle_t cusparse_handle() {
+    return Get().cusparse_handle_;
+  }
+  inline static cusparseMatDescr_t cusparse_mat_descr() {
+    return Get().cusparse_mat_descr_;
+  }
   inline static curandGenerator_t curand_generator() {
     return Get().curand_generator_;
   }
@@ -160,6 +175,8 @@ class Caffe {
  protected:
 #ifndef CPU_ONLY
   cublasHandle_t cublas_handle_;
+  cusparseHandle_t cusparse_handle_;
+  cusparseMatDescr_t cusparse_mat_descr_;
   curandGenerator_t curand_generator_;
 #endif
   shared_ptr<RNG> random_generator_;
@@ -179,6 +196,7 @@ class Caffe {
 
 // NVIDIA_CUDA-5.5_Samples/common/inc/helper_cuda.h
 const char* cublasGetErrorString(cublasStatus_t error);
+const char* cusparseGetErrorString(cusparseStatus_t error);
 const char* curandGetErrorString(curandStatus_t error);
 
 // CUDA: thread number configuration.

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
@@ -172,6 +172,18 @@ class InnerProductLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
 
+  // used to support the sparse operation
+  void Forward_sparse_cpu(const SparseBlob<Dtype>* bottom,
+                          vector<Blob<Dtype>*>* top);
+  void Forward_sparse_gpu(const SparseBlob<Dtype>* bottom,
+                          vector<Blob<Dtype>*>* top);
+  void Backward_sparse_cpu(const vector<Blob<Dtype>*>& top,
+                           const bool propagate_down,
+                           const SparseBlob<Dtype>* bottom);
+  void Backward_sparse_gpu(const vector<Blob<Dtype>*>& top,
+                           const bool propagate_down,
+                           const SparseBlob<Dtype>* bottom);
+
   int M_;
   int K_;
   int N_;

diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp
@@ -17,6 +17,7 @@
 #include "caffe/internal_thread.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
+#include "caffe/sparse_blob.hpp"
 
 namespace caffe {
 
@@ -80,6 +81,55 @@ class DataLayer : public Layer<Dtype>, public InternalThread {
   Caffe::Phase phase_;
 };
 
+template<typename Dtype>
+void* DataLayerSparseInputPrefetch(void* layer_pointer);
+
+template<typename Dtype>
+class DataLayerSparseInput : public Layer<Dtype> {
+  // The function used to perform prefetching.
+  friend void* DataLayerSparseInputPrefetch<Dtype>(void* layer_pointer);
+
+ public:
+  explicit DataLayerSparseInput(const LayerParameter& param)
+      : Layer<Dtype>(param) {
+  }
+  virtual ~DataLayerSparseInput();
+  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
+                     vector<Blob<Dtype>*>* top);
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+                           vector<Blob<Dtype>*>* top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+                           vector<Blob<Dtype>*>* top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+                            const vector<bool>& propagate_down,
+                            vector<Blob<Dtype>*>* bottom) {
+    return;
+  }
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+                            const vector<bool>& propagate_down,
+                            vector<Blob<Dtype>*>* bottom) {
+    return;
+  }
+
+  virtual void CreatePrefetchThread();
+  virtual void JoinPrefetchThread();
+
+  shared_ptr<leveldb::DB> db_;
+  shared_ptr<leveldb::Iterator> iter_;
+  int datum_size_;
+
+  pthread_t thread_;
+  shared_ptr<SparseBlob<Dtype> > prefetch_data_;
+  shared_ptr<SparseBlob<Dtype> > prefetch_data_copy_;
+  shared_ptr<Blob<Dtype> > prefetch_label_;
+  shared_ptr<Blob<Dtype> > prefetch_label_copy_;
+
+  bool output_labels_;
+  Caffe::Phase phase_;
+};
+
 template <typename Dtype>
 class DummyDataLayer : public Layer<Dtype> {
  public:

diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
@@ -294,6 +294,9 @@ void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
 template <typename Dtype>
 Layer<Dtype>* GetLayer(const LayerParameter& param);
 
+template <typename Dtype>
+Blob<Dtype>* GetTopBlob(const shared_ptr<LayerParameter>& param, int pos);
+
 }  // namespace caffe
 
 #endif  // CAFFE_LAYER_H_
diff --git a/include/caffe/sparse_blob.hpp b/include/caffe/sparse_blob.hpp
@@ -0,0 +1,117 @@
+#ifndef CAFFE_SPARSE_BLOB_HPP_
+#define CAFFE_SPARSE_BLOB_HPP_
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/proto/caffe.pb.h"
+#include "caffe/syncedmem.hpp"
+
+namespace caffe {
+
+template<typename Dtype>
+class SparseBlob : public Blob<Dtype> {
+ public:
+  SparseBlob()
+      : Blob<Dtype>(),
+        indices_(),
+        ptr_(),
+        nzz_(0) {
+  }
+
+  explicit SparseBlob(const int num, const int channels, const int nzz);
+
+  virtual void Reshape(const int num, const int channels, const int height,
+                       const int width);
+
+  void Reshape(const int num, const int channels, const int nzz);
+
+  virtual void ReshapeLike(const Blob<Dtype>& other);
+
+  virtual inline int height() const {
+    return 1;
+  }
+  virtual inline int width() const {
+    return 1;
+  }
+  inline int nzz() const {
+    return nzz_;
+  }
+
+  virtual inline int offset(const int n, const int c = 0, const int h = 0,
+                            const int w = 0) const {
+    LOG(FATAL)<< "Offset not supported in sparse blob.";
+    return 0;
+  }
+
+  virtual inline Dtype data_at(const int n, const int c, const int h,
+      const int w) const {
+    LOG(FATAL) << "data_at not implemented yet.";
+    return (Dtype)0;
+  }
+
+  virtual inline Dtype diff_at(const int n, const int c, const int h,
+      const int w) const {
+    LOG(FATAL) << "Diff data is not supported in sparse blob.";
+    return (Dtype)0;
+  }
+
+  inline const shared_ptr<SyncedMemory>& indices() const {
+    CHECK(indices_);
+    return indices_;
+  }
+
+  inline const shared_ptr<SyncedMemory>& ptr() const {
+    CHECK(ptr_);
+    return ptr_;
+  }
+
+  const int* cpu_indices() const;
+  const int* cpu_ptr() const;
+
+  const int* gpu_indices() const;
+  const int* gpu_ptr() const;
+
+  int* mutable_cpu_indices();
+  int* mutable_cpu_ptr();
+
+  int* mutable_gpu_indices();
+  int* mutable_gpu_ptr();
+
+  virtual void set_cpu_data(Dtype* data);
+  virtual void set_gpu_data(Dtype* data);
+
+  // the num and channels are assumed to be the same but
+  // nzz might change that is why is an argument
+  // also the actual size of data and indices might exceed nzz
+  // to allow for easy slicing.
+  // If total_size is -1 is assumed to be equal to nzz
+  void set_cpu_data(Dtype* data, int* indices, int* ptr, int nzz,
+                     int total_size=-1);
+  void set_gpu_data(Dtype* data, int* indices, int* ptr, int nzz,
+                     int total_size=-1);
+
+  virtual const Dtype* cpu_diff() const;
+  virtual const Dtype* gpu_diff() const;
+  virtual Dtype* mutable_cpu_diff();
+  virtual Dtype* mutable_gpu_diff();
+
+  virtual void ShareData(const Blob<Dtype>& other);
+  virtual void ShareDiff(const Blob<Dtype>& other);
+  virtual void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false,
+      bool reshape = false);
+
+  virtual void Update();
+  virtual void FromProto(const BlobProto& proto);
+  virtual void ToProto(BlobProto* proto, bool write_diff = false) const;
+
+ protected:
+  shared_ptr<SyncedMemory> indices_;
+  shared_ptr<SyncedMemory> ptr_;
+  int nzz_;
+
+  DISABLE_COPY_AND_ASSIGN(SparseBlob);
+};  // class SparseBlob
+
+}  // namespace caffe
+
+#endif  // CAFFE_SPARSE_BLOB_HPP_
diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp
@@ -35,13 +35,16 @@ class SyncedMemory {
  public:
   SyncedMemory()
       : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),
-        own_cpu_data_(false) {}
+        own_cpu_data_(false), own_gpu_data_(false) {}
   explicit SyncedMemory(size_t size)
       : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED),
-        own_cpu_data_(false) {}
+        own_cpu_data_(false), own_gpu_data_(false) {}
   ~SyncedMemory();
   const void* cpu_data();
-  void set_cpu_data(void* data);
+
+  // if size if -1 the size is not changed
+  void set_cpu_data(void* data, int size=-1);
+  void set_gpu_data(void* data, int size=-1);
   const void* gpu_data();
   void* mutable_cpu_data();
   void* mutable_gpu_data();
@@ -52,11 +55,13 @@ class SyncedMemory {
  private:
   void to_cpu();
   void to_gpu();
+  void clear_data();
   void* cpu_ptr_;
   void* gpu_ptr_;
   size_t size_;
   SyncedHead head_;
   bool own_cpu_data_;
+  bool own_gpu_data_;
 
   DISABLE_COPY_AND_ASSIGN(SyncedMemory);
 };  // class SyncedMemory

diff --git a/include/caffe/util/device_alternate.hpp b/include/caffe/util/device_alternate.hpp
@@ -35,6 +35,7 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <curand.h>
+#include <cusparse_v2.h>
 #include <driver_types.h>  // cuda driver types
 
 #endif  // CPU_ONLY