-
Notifications
You must be signed in to change notification settings - Fork 578
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tacho - cuSolver interface #6695
Merged
trilinos-autotester
merged 10 commits into
trilinos:develop
from
kyungjoo-kim:tacho-develop
Feb 3, 2020
+2,798
−2,083
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
c797c77
Tacho - add crs matrix permutation
kyungjoo-kim bd3146b
Tacho - use device type instead of exec space
kyungjoo-kim c1e3e1c
Tacho - cusolver example interface
kyungjoo-kim 0c8cce1
Merge remote-tracking branch 'upstream/develop' into tacho-develop
kyungjoo-kim 49f96db
Tacho - cuSolver interface can accept size_type for rowptr
kyungjoo-kim 25bcc4a
Tacho - avoid unnecessary initialization of kokkos views
kyungjoo-kim 5020b2d
Tacho - fix crs matrix permute test
kyungjoo-kim 9b780da
Tacho - change cuda space back to cuda uvm space
kyungjoo-kim b48e9ac
Merge remote-tracking branch 'upstream/develop' into tacho-develop
kyungjoo-kim 3196351
Tacho - fix for warning as error
kyungjoo-kim File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
173 changes: 173 additions & 0 deletions
173
packages/shylu/shylu_node/tacho/example/Tacho_ExampleCholCuSolver.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
#include "ShyLU_NodeTacho_config.h" | ||
|
||
#include <Kokkos_Core.hpp> | ||
#include <impl/Kokkos_Timer.hpp> | ||
|
||
#include "Tacho.hpp" | ||
#include "Tacho_CommandLineParser.hpp" | ||
|
||
#if defined (KOKKOS_ENABLE_CUDA) | ||
#include "Tacho_CuSolver.hpp" | ||
#endif | ||
|
||
using namespace Tacho; | ||
|
||
int main (int argc, char *argv[]) { | ||
CommandLineParser opts("This example program measure the performance of cuSolver on Kokkos::Cuda"); | ||
|
||
bool verbose = true; | ||
std::string file = "test.mtx"; | ||
int nrhs = 1; | ||
|
||
opts.set_option<bool>("verbose", "Flag for verbose printing", &verbose); | ||
opts.set_option<std::string>("file", "Input file (MatrixMarket SPD matrix)", &file); | ||
opts.set_option<int>("nrhs", "Number of RHS vectors", &nrhs); | ||
|
||
const bool r_parse = opts.parse(argc, argv); | ||
if (r_parse) return 0; // print help return | ||
|
||
Kokkos::initialize(argc, argv); | ||
|
||
const bool detail = false; | ||
|
||
typedef double value_type; | ||
|
||
typedef UseThisDevice<Kokkos::Cuda>::device_type device_type; | ||
typedef UseThisDevice<Kokkos::DefaultHostExecutionSpace>::device_type host_device_type; | ||
|
||
Tacho::printExecSpaceConfiguration<typename device_type::execution_space>("DeviceSpace", detail); | ||
Tacho::printExecSpaceConfiguration<typename host_device_type::execution_space>("HostSpace", detail); | ||
|
||
Kokkos::Impl::Timer timer; | ||
|
||
#if defined(KOKKOS_ENABLE_CUDA) | ||
int r_val = 0; | ||
{ | ||
/// | ||
/// read from crs matrix | ||
/// | ||
typedef Tacho::CrsMatrixBase<value_type,host_device_type> CrsMatrixBaseTypeHost; | ||
typedef Tacho::CrsMatrixBase<value_type,device_type> CrsMatrixBaseType; | ||
typedef Kokkos::View<value_type**,Kokkos::LayoutLeft,device_type> DenseMultiVectorType; | ||
|
||
/// read a spd matrix of matrix market format | ||
CrsMatrixBaseTypeHost h_A; | ||
{ | ||
std::ifstream in; | ||
in.open(file); | ||
if (!in.good()) { | ||
std::cout << "Failed in open the file: " << file << std::endl; | ||
return -1; | ||
} | ||
Tacho::MatrixMarket<value_type>::read(file, h_A, verbose); | ||
} | ||
|
||
/// | ||
/// cuSolver | ||
/// | ||
CuSolver cusolver; | ||
cusolver.setVerbose(verbose); | ||
|
||
/// | ||
/// reorder matrix | ||
/// | ||
#if defined(TACHO_HAVE_METIS) | ||
typedef GraphTools_Metis graph_tools_type; | ||
#else | ||
/// not recommend to use CAMD | ||
typedef GraphTools_CAMD graph_tools_type; | ||
#endif | ||
Graph graph(h_A.NumRows(), h_A.NumNonZeros(), h_A.RowPtr(), h_A.Cols()); | ||
graph_tools_type G(graph); | ||
G.reorder(verbose); | ||
|
||
const auto h_perm = G.PermVector(); | ||
const auto h_peri = G.InvPermVector(); | ||
|
||
const auto perm = Kokkos::create_mirror_view(typename device_type::memory_space(), h_perm); Kokkos::deep_copy(perm, h_perm); | ||
const auto peri = Kokkos::create_mirror_view(typename device_type::memory_space(), h_peri); Kokkos::deep_copy(peri, h_peri); | ||
|
||
CrsMatrixBaseType A; | ||
A.createConfTo(h_A); | ||
A.copy(h_A); | ||
|
||
/// permute ondevice | ||
CrsMatrixBaseType Ap; | ||
{ | ||
timer.reset(); | ||
Ap.createConfTo(A); | ||
Tacho::applyPermutationToCrsMatrixLower(Ap, A, perm, peri); | ||
Kokkos::fence(); | ||
const double t_permute_A = timer.seconds(); | ||
|
||
if (verbose) { | ||
printf("ExampleCuSolver: Construction of permuted matrix A\n"); | ||
printf(" Time\n"); | ||
printf(" time for permutation of A: %10.6f s\n", t_permute_A); | ||
printf("\n"); | ||
} | ||
} | ||
|
||
/// | ||
/// analyze | ||
/// | ||
{ | ||
cusolver.analyze(Ap.NumRows(), Ap.RowPtr(), Ap.Cols()); | ||
} | ||
|
||
/// | ||
/// factorize | ||
/// | ||
{ | ||
cusolver.factorize(Ap.Values()); | ||
} | ||
|
||
/// | ||
/// random right hand side | ||
/// | ||
DenseMultiVectorType | ||
b("b", A.NumRows(), nrhs), // rhs multivector | ||
x("x", A.NumRows(), nrhs), // solution multivector | ||
bb("bb", A.NumRows(), nrhs), // temp workspace (store permuted rhs) | ||
xx("t", A.NumRows(), nrhs); // temp workspace (store permuted rhs) | ||
|
||
{ | ||
Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> random(13718); | ||
Kokkos::fill_random(b, random, value_type(1)); | ||
} | ||
|
||
/// | ||
/// solve | ||
/// | ||
{ | ||
timer.reset(); | ||
applyRowPermutationToDenseMatrix(bb, b, perm); | ||
cusolver.solve(xx, bb); | ||
applyRowPermutationToDenseMatrix(x, xx, peri); | ||
Kokkos::fence(); | ||
const double t_solve = timer.seconds(); | ||
if (verbose) { | ||
printf("ExampleCuSolver: P b, solve, and P^{-1} x\n"); | ||
printf(" Time\n"); | ||
printf(" time for permute and solve: %10.6f s\n", t_solve); | ||
printf("\n"); | ||
} | ||
} | ||
|
||
/// | ||
/// compute residual to check solutions | ||
/// | ||
const double res = computeRelativeResidual(A, x, b); | ||
|
||
std::cout << "cuSolver: residual = " << res << "\n\n"; | ||
|
||
} | ||
#else | ||
r_val = -1; | ||
std::cout << "CUDA is NOT configured in Trilinos" << std::endl; | ||
#endif | ||
|
||
Kokkos::finalize(); | ||
|
||
return r_val; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should ShyLU instead use a TPL macro? Trilinos already has an MKL TPL.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think that makes a difference. INTEL_MKL is defined in mkl.h and mkl.h is guarded by "TACHO_HAVE_MKL" which is determined from TPL_ENABLE_MKL.