Skip to content

Commit

Permalink
Adding cusparse support in SpMV
Browse files Browse the repository at this point in the history
Supporting both cuda 9 interface and cuda 10.2 interface
Support for float_int_int and double_int_int
Could potentially support int64_t with cuda 10.2 interface.
Modifying the spmv_struct_tunning test to make it compile appropriately.
  • Loading branch information
lucbv committed Mar 5, 2020
1 parent a5a7337 commit 1cd23df
Show file tree
Hide file tree
Showing 5 changed files with 305 additions and 21 deletions.
61 changes: 49 additions & 12 deletions perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,48 @@ void print_help() {
printf(" --print-lp : Print launch parameters to screen.\n");
}

template<typename graph_type>
struct copy_crs_data {
using execution_space = typename graph_type::device_type::execution_space;
using cusparse_int_type = typename Kokkos::View<int*,
typename graph_type::entries_type::array_layout,
typename graph_type::device_type>;

// Dispatch tags
struct rowPtrTag{};
struct colIndTag{};

typename graph_type::row_map_type::const_type Arowptr;
typename graph_type::entries_type::const_type Acolind;
cusparse_int_type cusparse_Arowptr, cusparse_Acolind;

copy_crs_data(typename graph_type::row_map_type::const_type Arowptr_,
typename graph_type::entries_type::const_type Acolind_,
cusparse_int_type cusparse_Arowptr_,
cusparse_int_type cusparse_Acolind_) :
Arowptr(Arowptr_), Acolind(Acolind_),
cusparse_Arowptr(cusparse_Arowptr_),
cusparse_Acolind(cusparse_Acolind_) {};

void doCopy() {
Kokkos::RangePolicy<execution_space, rowPtrTag> rowPtrPolicy(0, Arowptr.extent(0));
Kokkos::parallel_for("copy rowPtr to cusparse", rowPtrPolicy, *this);

Kokkos::RangePolicy<execution_space, colIndTag> colIndPolicy(0, Acolind.extent(0));
Kokkos::parallel_for("copy colInd to cusparse", colIndPolicy, *this);
}

KOKKOS_INLINE_FUNCTION
void operator() (const rowPtrTag&, const size_t idx) const {
cusparse_Arowptr(idx) = int(Arowptr(idx));
}

KOKKOS_INLINE_FUNCTION
void operator() (const colIndTag&, const size_t idx) const {
cusparse_Acolind(idx) = int(Acolind(idx));
}
};

template<class AMatrix,
class XVector,
class YVector>
Expand All @@ -101,7 +143,7 @@ void struct_matvec(const int stencil_type,
const AMatrix& A,
const XVector& x,
typename YVector::const_value_type& beta,
const YVector& y,
YVector& y,
int team_size_int,
int vector_length,
int64_t rows_per_thread_int,
Expand Down Expand Up @@ -183,7 +225,7 @@ void matvec(typename YVector::const_value_type& alpha,
const AMatrix& A,
const XVector& x,
typename YVector::const_value_type& beta,
const YVector& y,
YVector& y,
int team_size,
int vector_length,
int64_t rows_per_thread,
Expand Down Expand Up @@ -444,20 +486,15 @@ int main(int argc, char **argv)
using graph_type = typename matrix_type::StaticCrsGraphType;
using cusparse_int_type = typename Kokkos::View<int*,
typename graph_type::entries_type::array_layout,
typename matrix_type::device_type>;
typename graph_type::device_type>;

typename graph_type::row_map_type::const_type Arowptr = A.graph.row_map;
typename graph_type::entries_type::const_type Acolind = A.graph.entries;
typename matrix_type::values_type::non_const_type Avals = A.values;
cusparse_int_type Arowptr_cusparse, Acolind_cusparse;
Arowptr_cusparse = cusparse_int_type("Arowptr", Arowptr.extent(0));
Acolind_cusparse = cusparse_int_type("Acolind", Acolind.extent(0));
Kokkos::parallel_for(Arowptr.extent(0), KOKKOS_LAMBDA(const size_t idx) {
Arowptr_cusparse[idx] = Arowptr[idx];
});
Kokkos::parallel_for(Acolind.extent(0), KOKKOS_LAMBDA(const size_t idx) {
Acolind_cusparse[idx] = Acolind[idx];
});
cusparse_int_type Arowptr_cusparse("Arowptr", Arowptr.extent(0));
cusparse_int_type Acolind_cusparse("Acolind", Acolind.extent(0));
copy_crs_data<graph_type> myCopyFunctor(Arowptr, Acolind, Arowptr_cusparse, Acolind_cusparse);
myCopyFunctor.doCopy();

int* rows = reinterpret_cast<int*>(Arowptr_cusparse.data());
int* cols = reinterpret_cast<int*>(Acolind_cusparse.data());
Expand Down
41 changes: 41 additions & 0 deletions src/impl/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,47 @@ struct spmv_tpl_spec_avail {
enum : bool { value = false };
};

// cuSPARSE
#if defined (KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (9000 <= CUDA_VERSION)

#define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(SCALAR, XL, YL, MEMSPACE) \
template <> \
struct spmv_tpl_spec_avail<const SCALAR, const int, Kokkos::Device<Kokkos::Cuda, MEMSPACE>, Kokkos::MemoryTraits<Kokkos::Unmanaged>, const int, \
const SCALAR*, XL, Kokkos::Device<Kokkos::Cuda, MEMSPACE>, Kokkos::MemoryTraits<Kokkos::Unmanaged | Kokkos::RandomAccess>, \
SCALAR*, YL, Kokkos::Device<Kokkos::Cuda, MEMSPACE>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > { \
enum : bool { value = true }; \
};

#if defined (KOKKOSKERNELS_INST_FLOAT) \
&& defined (KOKKOSKERNELS_INST_LAYOUTLEFT) \
&& defined (KOKKOSKERNELS_INST_ORDINAL_INT) \
&& defined (KOKKOSKERNELS_INST_OFFSET_INT)
KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace)
#endif

#if defined (KOKKOSKERNELS_INST_DOUBLE) \
&& defined (KOKKOSKERNELS_INST_LAYOUTLEFT) \
&& defined (KOKKOSKERNELS_INST_ORDINAL_INT) \
&& defined (KOKKOSKERNELS_INST_OFFSET_INT)
KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace)
#endif

#if defined (KOKKOSKERNELS_INST_FLOAT) \
&& defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) \
&& defined (KOKKOSKERNELS_INST_ORDINAL_INT) \
&& defined (KOKKOSKERNELS_INST_OFFSET_INT)
KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace)
#endif

#if defined (KOKKOSKERNELS_INST_DOUBLE) \
&& defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) \
&& defined (KOKKOSKERNELS_INST_ORDINAL_INT) \
&& defined (KOKKOSKERNELS_INST_OFFSET_INT)
KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace)
#endif

#endif

// Specialization struct which defines whether a specialization exists
template<class AT, class AO, class AD, class AM, class AS,
class XT, class XL, class XD, class XM,
Expand Down
211 changes: 208 additions & 3 deletions src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// PROFIS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Expand All @@ -44,9 +44,214 @@
#ifndef KOKKOSPARSE_SPMV_TPL_SPEC_DECL_HPP_
#define KOKKOSPARSE_SPMV_TPL_SPEC_DECL_HPP_

// cuSPARSE
#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
#include "cusparse.h"

namespace KokkosSparse {
namespace Impl {
}
}

template <class AMatrix, class XVector, class YVector>
void spmv_cusparse(const char mode[],
typename YVector::non_const_value_type const & alpha,
const AMatrix& A,
const XVector& x,
typename YVector::non_const_value_type const & beta,
const YVector& y) {
using offset_type = typename AMatrix::non_const_size_type;
// using ordinal_type = typename AMatrix::non_const_ordinal_type;
using value_type = typename AMatrix::non_const_value_type;

#if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)

cudaError_t cuError;
cusparseStatus_t status;
cusparseHandle_t handle=0;

cusparseIndexType_t myCusparseIndexType;
if(std::is_same<offset_type, int>::value) {myCusparseIndexType = CUSPARSE_INDEX_32I;}
if(std::is_same<offset_type, int64_t>::value) {myCusparseIndexType = CUSPARSE_INDEX_64I;}
cudaDataType myCudaDataType;
if(std::is_same<value_type, float>::value) {myCudaDataType = CUDA_R_32F;}
if(std::is_same<value_type, double>::value) {myCudaDataType = CUDA_R_64F;}

/* initialize cusparse library */
status = cusparseCreate(&handle);
if (status != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse was not initialized correctly");
}

/* create matrix */
cusparseSpMatDescr_t A_cusparse;
status = cusparseCreateCsr(&A_cusparse, A.numRows(), A.numCols(), A.nnz(),
const_cast<offset_type*>(A.graph.row_map.data()),
const_cast<offset_type*>(A.graph.entries.data()),
const_cast<value_type*>(A.values.data()),
myCusparseIndexType,
myCusparseIndexType,
CUSPARSE_INDEX_BASE_ZERO,
myCudaDataType);
if (status != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse matrix was not created correctly");
}

/* create lhs and rhs */
cusparseDnVecDescr_t vecX, vecY;
status = cusparseCreateDnVec(&vecX, x.extent_int(0), const_cast<value_type*>(x.data()), myCudaDataType);
if (status != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse vecX was not created correctly");
}
status = cusparseCreateDnVec(&vecY, y.extent_int(0), const_cast<value_type*>(y.data()), myCudaDataType);
if (status != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse vecY was not created correctly");
}

size_t bufferSize = 0;
void* dBuffer = NULL;
status = cusparseSpMV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, A_cusparse, vecX, &beta, vecY, myCudaDataType,
CUSPARSE_CSRMV_ALG1, &bufferSize);
if (status != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse bufferSize computation failed");
}
cuError = cudaMalloc(&dBuffer, bufferSize);
if (cuError != cudaSuccess) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cuda buffer allocation failed");
}

/* perform SpMV */
status = cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, A_cusparse, vecX, &beta, vecY, myCudaDataType,
CUSPARSE_CSRMV_ALG1, dBuffer);
if (status != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparseSpMV() failed");
}

cuError = cudaFree(dBuffer);
if (cuError != cudaSuccess) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cuda buffer deallocation failed");
}
status = cusparseDestroyDnVec(vecX);
if (status != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse vecX was not destroyed correctly");
}
status = cusparseDestroyDnVec(vecY);
if (status != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse vecY was not destroyed correctly");
}
status = cusparseDestroySpMat(A_cusparse);
if (status != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse matrix was not destroyed correctly");
}
status = cusparseDestroy(handle);
handle = 0;
if (status != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse handle was not desctroyed correctly");
}

#else

/* Initialize cusparse */
cusparseStatus_t cusparseStatus;
cusparseHandle_t cusparseHandle=0;
cusparseStatus = cusparseCreate(&cusparseHandle);
if(cusparseStatus != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cannot initialize cusparse handle");
}

/* create and set the matrix descriptor */
cusparseMatDescr_t descrA = 0;
cusparseStatus = cusparseCreateMatDescr(&descrA);
if(cusparseStatus != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: error creating the matrix descriptor");
}
cusparseStatus = cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL);
if(cusparseStatus != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: error setting the matrix type");
}
cusparseStatus = cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO);
if(cusparseStatus != CUSPARSE_STATUS_SUCCESS) {
throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: error setting the matrix index base");
}

/* perform the actual SpMV operation */
if(std::is_same<int, offset_type>::value) {
if (std::is_same<value_type,float>::value) {
cusparseStatus = cusparseScsrmv(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
A.numRows(), A.numCols(), A.nnz(),
(const float *) &alpha, descrA,
(const float *) A.values.data(), A.graph.row_map.data(), A.graph.entries.data(),
(const float *) x.data(),
(const float *) &beta,
(float *) y.data());

} else if (std::is_same<value_type,double>::value) {
cusparseStatus = cusparseDcsrmv(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
A.numRows(), A.numCols(), A.nnz(),
(double const *) &alpha, descrA,
(double const *) A.values.data(), A.graph.row_map.data(), A.graph.entries.data(),
(double const *) x.data(),
(double const *) &beta,
(double *) y.data());
} else {
throw std::logic_error("Trying to call cusparse SpMV with a scalar type that is not float or double!");
}
} else {
throw std::logic_error("Trying to call cusparse SpMV with an offset type that is not int!");
}

cusparseStatus = cusparseDestroyMatDescr(descrA);
if (cusparseStatus != CUSPARSE_STATUS_SUCCESS) {
throw("KokkosSparse::spmv[TPL_CUSPARSE,double]: matrix descriptor was not desctroyed correctly");
}
cusparseStatus = cusparseDestroy(cusparseHandle);
cusparseHandle = 0;
if (cusparseStatus != CUSPARSE_STATUS_SUCCESS) {
throw("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse handle was not desctroyed correctly");
}

#endif // CUSPARSE_VERSION
}

#define KOKKOSSPARSE_SPMV_CUSPARSE(SCALAR, OFFSET, LAYOUT, COMPILE_LIBRARY) \
template<> \
struct SPMV<SCALAR const, OFFSET const, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, Kokkos::MemoryTraits<Kokkos::Unmanaged>, OFFSET const, \
SCALAR const*, LAYOUT, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, Kokkos::MemoryTraits<Kokkos::Unmanaged|Kokkos::RandomAccess>, \
SCALAR*, LAYOUT, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, Kokkos::MemoryTraits<Kokkos::Unmanaged>, \
true, COMPILE_LIBRARY> { \
using device_type = Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>; \
using memory_trait_type = Kokkos::MemoryTraits<Kokkos::Unmanaged>; \
using AMatrix = CrsMatrix<SCALAR const, OFFSET const, device_type, memory_trait_type, OFFSET const>; \
using XVector = Kokkos::View<SCALAR const*, LAYOUT,device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged|Kokkos::RandomAccess>>; \
using YVector = Kokkos::View<SCALAR*, LAYOUT, device_type, memory_trait_type>; \
\
using coefficient_type = typename YVector::non_const_value_type; \
\
static void spmv (const char mode[], \
const coefficient_type& alpha, \
const AMatrix& A, \
const XVector& x, \
const coefficient_type& beta, \
const YVector& y) { \
Kokkos::Profiling::pushRegion("KokkosSparse::spmv[TPL_CUSPARSE,double]"); \
spmv_cusparse(mode, alpha, A, x, beta, y); \
Kokkos::Profiling::popRegion(); \
} \
};

KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutLeft, true)
KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutLeft, false)
KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutRight, true)
KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutRight, false)
KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutLeft, true)
KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutLeft, false)
KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutRight, true)
KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutRight, false)

#undef KOKKOSSPARSE_SPMV_CUSPARSE

} // namespace Impl
} // namespace KokkosSparse
#endif // KOKKOSKERNELS_ENABLE_TPL_CUSPARSE

#endif // KOKKOSPARSE_SPMV_TPL_SPEC_DECL_HPP_
10 changes: 5 additions & 5 deletions src/sparse/impl/KokkosSparse_spmv_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ namespace Impl {
/// AO: ordinal type (type of column indices) of the sparse matrix
/// AS: offset type (type of row offsets) of the sparse matrix
///
/// The next 5 template parameters (that start with X) correspond to
/// the input Kokkos::View. The last 5 template parameters (that start
/// The next 4 template parameters (that start with X) correspond to
/// the input Kokkos::View. The last 4 template parameters (that start
/// with Y) correspond to the output Kokkos::View.
///
/// For the implementation of KokkosSparse::spmv for multivectors (2-D
Expand Down Expand Up @@ -191,8 +191,8 @@ struct SPMV{
/// AO: ordinal type (type of column indices) of the sparse matrix
/// AS: offset type (type of row offsets) of the sparse matrix
///
/// The next 5 template parameters (that start with X) correspond to
/// the input Kokkos::View. The 5 template parameters after that
/// The next 4 template parameters (that start with X) correspond to
/// the input Kokkos::View. The 4 template parameters after that
/// (that start with lower-case b) are the template parameters of the
/// input 1-D View of coefficients 'beta'. Next, the 5 template
/// parameters that start with Y correspond to the output
Expand Down Expand Up @@ -359,7 +359,7 @@ struct SPMV_MV<AT, AO, AD, AM, AS,

//
// Macro for declaration of full specialization of
// KokkosSparse::Impl::Dot for rank == 2. This is NOT for users!!! All
// KokkosSparse::Impl::SpMV. This is NOT for users!!! All
// the declarations of full specializations go in this header file.
// We may spread out definitions (see _DEF macro below) across one or
// more .cpp files.
Expand Down
Loading

0 comments on commit 1cd23df

Please sign in to comment.