diff --git a/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp b/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp index 9ab3d2b682..150afb7e53 100644 --- a/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp +++ b/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp @@ -92,6 +92,48 @@ void print_help() { printf(" --print-lp : Print launch parameters to screen.\n"); } +template +struct copy_crs_data { + using execution_space = typename graph_type::device_type::execution_space; + using cusparse_int_type = typename Kokkos::View; + + // Dispatch tags + struct rowPtrTag{}; + struct colIndTag{}; + + typename graph_type::row_map_type::const_type Arowptr; + typename graph_type::entries_type::const_type Acolind; + cusparse_int_type cusparse_Arowptr, cusparse_Acolind; + + copy_crs_data(typename graph_type::row_map_type::const_type Arowptr_, + typename graph_type::entries_type::const_type Acolind_, + cusparse_int_type cusparse_Arowptr_, + cusparse_int_type cusparse_Acolind_) : + Arowptr(Arowptr_), Acolind(Acolind_), + cusparse_Arowptr(cusparse_Arowptr_), + cusparse_Acolind(cusparse_Acolind_) {}; + + void doCopy() { + Kokkos::RangePolicy rowPtrPolicy(0, Arowptr.extent(0)); + Kokkos::parallel_for("copy rowPtr to cusparse", rowPtrPolicy, *this); + + Kokkos::RangePolicy colIndPolicy(0, Acolind.extent(0)); + Kokkos::parallel_for("copy colInd to cusparse", colIndPolicy, *this); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const rowPtrTag&, const size_t idx) const { + cusparse_Arowptr(idx) = int(Arowptr(idx)); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const colIndTag&, const size_t idx) const { + cusparse_Acolind(idx) = int(Acolind(idx)); + } +}; + template @@ -101,7 +143,7 @@ void struct_matvec(const int stencil_type, const AMatrix& A, const XVector& x, typename YVector::const_value_type& beta, - const YVector& y, + YVector& y, int team_size_int, int vector_length, int64_t rows_per_thread_int, @@ -183,7 +225,7 @@ void matvec(typename YVector::const_value_type& alpha, const AMatrix& A, const XVector& x, typename YVector::const_value_type& beta, - const YVector& y, + YVector& y, int team_size, int vector_length, int64_t rows_per_thread, @@ -444,20 +486,15 @@ int main(int argc, char **argv) using graph_type = typename matrix_type::StaticCrsGraphType; using cusparse_int_type = typename Kokkos::View; + typename graph_type::device_type>; typename graph_type::row_map_type::const_type Arowptr = A.graph.row_map; typename graph_type::entries_type::const_type Acolind = A.graph.entries; typename matrix_type::values_type::non_const_type Avals = A.values; - cusparse_int_type Arowptr_cusparse, Acolind_cusparse; - Arowptr_cusparse = cusparse_int_type("Arowptr", Arowptr.extent(0)); - Acolind_cusparse = cusparse_int_type("Acolind", Acolind.extent(0)); - Kokkos::parallel_for(Arowptr.extent(0), KOKKOS_LAMBDA(const size_t idx) { - Arowptr_cusparse[idx] = Arowptr[idx]; - }); - Kokkos::parallel_for(Acolind.extent(0), KOKKOS_LAMBDA(const size_t idx) { - Acolind_cusparse[idx] = Acolind[idx]; - }); + cusparse_int_type Arowptr_cusparse("Arowptr", Arowptr.extent(0)); + cusparse_int_type Acolind_cusparse("Acolind", Acolind.extent(0)); + copy_crs_data myCopyFunctor(Arowptr, Acolind, Arowptr_cusparse, Acolind_cusparse); + myCopyFunctor.doCopy(); int* rows = reinterpret_cast(Arowptr_cusparse.data()); int* cols = reinterpret_cast(Acolind_cusparse.data()); diff --git a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp index 3659eb4ef7..0e75a5ce9c 100644 --- a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp +++ b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp @@ -54,6 +54,47 @@ struct spmv_tpl_spec_avail { enum : bool { value = false }; }; +// cuSPARSE +#if defined (KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (9000 <= CUDA_VERSION) + +#define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(SCALAR, XL, YL, MEMSPACE) \ +template <> \ +struct spmv_tpl_spec_avail, Kokkos::MemoryTraits, const int, \ + const SCALAR*, XL, Kokkos::Device, Kokkos::MemoryTraits, \ + SCALAR*, YL, Kokkos::Device, Kokkos::MemoryTraits > { \ + enum : bool { value = true }; \ +}; + +#if defined (KOKKOSKERNELS_INST_FLOAT) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) \ + && defined (KOKKOSKERNELS_INST_ORDINAL_INT) \ + && defined (KOKKOSKERNELS_INST_OFFSET_INT) + KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif + +#if defined (KOKKOSKERNELS_INST_DOUBLE) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) \ + && defined (KOKKOSKERNELS_INST_ORDINAL_INT) \ + && defined (KOKKOSKERNELS_INST_OFFSET_INT) + KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif + +#if defined (KOKKOSKERNELS_INST_FLOAT) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) \ + && defined (KOKKOSKERNELS_INST_ORDINAL_INT) \ + && defined (KOKKOSKERNELS_INST_OFFSET_INT) + KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +#endif + +#if defined (KOKKOSKERNELS_INST_DOUBLE) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) \ + && defined (KOKKOSKERNELS_INST_ORDINAL_INT) \ + && defined (KOKKOSKERNELS_INST_OFFSET_INT) + KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_CUSPARSE(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +#endif + +#endif + // Specialization struct which defines whether a specialization exists template + void spmv_cusparse(const char mode[], + typename YVector::non_const_value_type const & alpha, + const AMatrix& A, + const XVector& x, + typename YVector::non_const_value_type const & beta, + const YVector& y) { + using offset_type = typename AMatrix::non_const_size_type; + // using ordinal_type = typename AMatrix::non_const_ordinal_type; + using value_type = typename AMatrix::non_const_value_type; + +#if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION) + + cudaError_t cuError; + cusparseStatus_t status; + cusparseHandle_t handle=0; + + cusparseIndexType_t myCusparseIndexType; + if(std::is_same::value) {myCusparseIndexType = CUSPARSE_INDEX_32I;} + if(std::is_same::value) {myCusparseIndexType = CUSPARSE_INDEX_64I;} + cudaDataType myCudaDataType; + if(std::is_same::value) {myCudaDataType = CUDA_R_32F;} + if(std::is_same::value) {myCudaDataType = CUDA_R_64F;} + + /* initialize cusparse library */ + status = cusparseCreate(&handle); + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse was not initialized correctly"); + } + + /* create matrix */ + cusparseSpMatDescr_t A_cusparse; + status = cusparseCreateCsr(&A_cusparse, A.numRows(), A.numCols(), A.nnz(), + const_cast(A.graph.row_map.data()), + const_cast(A.graph.entries.data()), + const_cast(A.values.data()), + myCusparseIndexType, + myCusparseIndexType, + CUSPARSE_INDEX_BASE_ZERO, + myCudaDataType); + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse matrix was not created correctly"); + } + + /* create lhs and rhs */ + cusparseDnVecDescr_t vecX, vecY; + status = cusparseCreateDnVec(&vecX, x.extent_int(0), const_cast(x.data()), myCudaDataType); + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse vecX was not created correctly"); + } + status = cusparseCreateDnVec(&vecY, y.extent_int(0), const_cast(y.data()), myCudaDataType); + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse vecY was not created correctly"); + } + + size_t bufferSize = 0; + void* dBuffer = NULL; + status = cusparseSpMV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, A_cusparse, vecX, &beta, vecY, myCudaDataType, + CUSPARSE_CSRMV_ALG1, &bufferSize); + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse bufferSize computation failed"); + } + cuError = cudaMalloc(&dBuffer, bufferSize); + if (cuError != cudaSuccess) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cuda buffer allocation failed"); + } + + /* perform SpMV */ + status = cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, A_cusparse, vecX, &beta, vecY, myCudaDataType, + CUSPARSE_CSRMV_ALG1, dBuffer); + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparseSpMV() failed"); + } + + cuError = cudaFree(dBuffer); + if (cuError != cudaSuccess) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cuda buffer deallocation failed"); + } + status = cusparseDestroyDnVec(vecX); + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse vecX was not destroyed correctly"); + } + status = cusparseDestroyDnVec(vecY); + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse vecY was not destroyed correctly"); + } + status = cusparseDestroySpMat(A_cusparse); + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse matrix was not destroyed correctly"); + } + status = cusparseDestroy(handle); + handle = 0; + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse handle was not desctroyed correctly"); + } + +#else + + /* Initialize cusparse */ + cusparseStatus_t cusparseStatus; + cusparseHandle_t cusparseHandle=0; + cusparseStatus = cusparseCreate(&cusparseHandle); + if(cusparseStatus != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: cannot initialize cusparse handle"); + } + + /* create and set the matrix descriptor */ + cusparseMatDescr_t descrA = 0; + cusparseStatus = cusparseCreateMatDescr(&descrA); + if(cusparseStatus != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: error creating the matrix descriptor"); + } + cusparseStatus = cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL); + if(cusparseStatus != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: error setting the matrix type"); + } + cusparseStatus = cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO); + if(cusparseStatus != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("KokkosSparse::spmv[TPL_CUSPARSE,double]: error setting the matrix index base"); + } + + /* perform the actual SpMV operation */ + if(std::is_same::value) { + if (std::is_same::value) { + cusparseStatus = cusparseScsrmv(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, + A.numRows(), A.numCols(), A.nnz(), + (const float *) &alpha, descrA, + (const float *) A.values.data(), A.graph.row_map.data(), A.graph.entries.data(), + (const float *) x.data(), + (const float *) &beta, + (float *) y.data()); + + } else if (std::is_same::value) { + cusparseStatus = cusparseDcsrmv(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, + A.numRows(), A.numCols(), A.nnz(), + (double const *) &alpha, descrA, + (double const *) A.values.data(), A.graph.row_map.data(), A.graph.entries.data(), + (double const *) x.data(), + (double const *) &beta, + (double *) y.data()); + } else { + throw std::logic_error("Trying to call cusparse SpMV with a scalar type that is not float or double!"); + } + } else { + throw std::logic_error("Trying to call cusparse SpMV with an offset type that is not int!"); + } + + cusparseStatus = cusparseDestroyMatDescr(descrA); + if (cusparseStatus != CUSPARSE_STATUS_SUCCESS) { + throw("KokkosSparse::spmv[TPL_CUSPARSE,double]: matrix descriptor was not desctroyed correctly"); + } + cusparseStatus = cusparseDestroy(cusparseHandle); + cusparseHandle = 0; + if (cusparseStatus != CUSPARSE_STATUS_SUCCESS) { + throw("KokkosSparse::spmv[TPL_CUSPARSE,double]: cusparse handle was not desctroyed correctly"); + } + +#endif // CUSPARSE_VERSION + } + +#define KOKKOSSPARSE_SPMV_CUSPARSE(SCALAR, OFFSET, LAYOUT, COMPILE_LIBRARY) \ + template<> \ + struct SPMV, Kokkos::MemoryTraits, OFFSET const, \ + SCALAR const*, LAYOUT, Kokkos::Device, Kokkos::MemoryTraits, \ + SCALAR*, LAYOUT, Kokkos::Device, Kokkos::MemoryTraits, \ + true, COMPILE_LIBRARY> { \ + using device_type = Kokkos::Device; \ + using memory_trait_type = Kokkos::MemoryTraits; \ + using AMatrix = CrsMatrix; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View; \ + \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv (const char mode[], \ + const coefficient_type& alpha, \ + const AMatrix& A, \ + const XVector& x, \ + const coefficient_type& beta, \ + const YVector& y) { \ + Kokkos::Profiling::pushRegion("KokkosSparse::spmv[TPL_CUSPARSE,double]"); \ + spmv_cusparse(mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + + KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutLeft, true) + KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutLeft, false) + KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutRight, true) + KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutRight, false) + KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutLeft, true) + KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutLeft, false) + KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutRight, true) + KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutRight, false) + +#undef KOKKOSSPARSE_SPMV_CUSPARSE + +} // namespace Impl +} // namespace KokkosSparse +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #endif // KOKKOSPARSE_SPMV_TPL_SPEC_DECL_HPP_ diff --git a/src/sparse/impl/KokkosSparse_spmv_spec.hpp b/src/sparse/impl/KokkosSparse_spmv_spec.hpp index c47c8367c3..d2baf7c627 100644 --- a/src/sparse/impl/KokkosSparse_spmv_spec.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_spec.hpp @@ -130,8 +130,8 @@ namespace Impl { /// AO: ordinal type (type of column indices) of the sparse matrix /// AS: offset type (type of row offsets) of the sparse matrix /// -/// The next 5 template parameters (that start with X) correspond to -/// the input Kokkos::View. The last 5 template parameters (that start +/// The next 4 template parameters (that start with X) correspond to +/// the input Kokkos::View. The last 4 template parameters (that start /// with Y) correspond to the output Kokkos::View. /// /// For the implementation of KokkosSparse::spmv for multivectors (2-D @@ -191,8 +191,8 @@ struct SPMV{ /// AO: ordinal type (type of column indices) of the sparse matrix /// AS: offset type (type of row offsets) of the sparse matrix /// -/// The next 5 template parameters (that start with X) correspond to -/// the input Kokkos::View. The 5 template parameters after that +/// The next 4 template parameters (that start with X) correspond to +/// the input Kokkos::View. The 4 template parameters after that /// (that start with lower-case b) are the template parameters of the /// input 1-D View of coefficients 'beta'. Next, the 5 template /// parameters that start with Y correspond to the output @@ -359,7 +359,7 @@ struct SPMV_MV (); \ @@ -689,7 +690,7 @@ TEST_F( TestCategory,sparse ## _ ## spmv_mv_struct ## _ ## SCALAR ## _ ## ORDINA && defined (KOKKOSKERNELS_INST_OFFSET_INT) ) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) EXECUTE_TEST(double, int, int, TestExecSpace) EXECUTE_TEST_STRUCT(double, int, int, TestExecSpace) - #endif +#endif #if (defined (KOKKOSKERNELS_INST_DOUBLE) \ && defined (KOKKOSKERNELS_INST_ORDINAL_INT64_T) \