Skip to content

Commit

Permalink
Amesos2: Fix SuperLU TRSV for Cuda
Browse files Browse the repository at this point in the history
Also fix build for TRSV off.
  • Loading branch information
MicheldeMessieres committed May 4, 2020
1 parent e57e942 commit 1b295a6
Showing 1 changed file with 76 additions and 43 deletions.
119 changes: 76 additions & 43 deletions packages/amesos2/src/Amesos2_Superlu_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,19 +391,34 @@ Superlu<Matrix,Vector>::solve_impl(const Teuchos::Ptr<MultiVecAdapter<Vector> >
Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
#endif

Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
host_solve_array_t>::do_get(B, host_bValues_,
as<size_t>(ld_rhs),
(is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
this->rowIndexBase_);

// In general we may want to write directly to the x space without a copy.
// So we 'get' x which may be a direct view assignment to the MV.
Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
host_solve_array_t>::do_get(X, host_xValues_,
as<size_t>(ld_rhs),
(is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
this->rowIndexBase_);
if(use_triangular_solves_) { // to device
#ifdef HAVE_AMESOS2_TRIANGULAR_SOLVES
Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
device_solve_array_t>::do_get(X, device_xValues_,
as<size_t>(ld_rhs),
(is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
this->rowIndexBase_);
Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
device_solve_array_t>::do_get(B, device_bValues_,
as<size_t>(ld_rhs),
(is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
this->rowIndexBase_);
#endif
}
else { // to host
Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
host_solve_array_t>::do_get(X, host_xValues_,
as<size_t>(ld_rhs),
(is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
this->rowIndexBase_);
Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
host_solve_array_t>::do_get(B, host_bValues_,
as<size_t>(ld_rhs),
(is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
this->rowIndexBase_);
}
}

// If equilibration was applied at numeric, then gssvx and gsisx are going to
Expand All @@ -413,36 +428,28 @@ Superlu<Matrix,Vector>::solve_impl(const Teuchos::Ptr<MultiVecAdapter<Vector> >
// skip this. Generally need an API which tells us what happened internally
// in above get_1d_copy_helper_kokkos_view - whether is was copy or assign.
if(data_.equed != 'N') {
host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
host_bValues_.extent(0), host_bValues_.extent(1));
Kokkos::deep_copy(copyB, host_bValues_);
host_bValues_ = copyB;
if(use_triangular_solves_) { // to device
#ifdef HAVE_AMESOS2_TRIANGULAR_SOLVES
device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
device_bValues_.extent(0), device_bValues_.extent(1));
Kokkos::deep_copy(copyB, device_bValues_);
device_bValues_ = copyB;
#endif
}
else {
host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
host_bValues_.extent(0), host_bValues_.extent(1));
Kokkos::deep_copy(copyB, host_bValues_);
host_bValues_ = copyB;
}
}

int ierr = 0; // returned error code

magnitude_type rpg, rcond;
if ( this->root_ ) {
Kokkos::resize(data_.ferr, nrhs);
Kokkos::resize(data_.berr, nrhs);

{
#ifdef HAVE_AMESOS2_TIMERS
Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
#endif
SLU::Dtype_t dtype = type_map::dtype;
int i_ld_rhs = as<int>(ld_rhs);
function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
host_bValues_.data(), i_ld_rhs,
SLU::SLU_DN, dtype, SLU::SLU_GE);
function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
host_xValues_.data(), i_ld_rhs,
SLU::SLU_DN, dtype, SLU::SLU_GE);
}

// Note: the values of B and X (after solution) are adjusted
// appropriately within gssvx for row and column scalings.

{ // Do solve!
#ifdef HAVE_AMESOS2_TIMERS
Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
Expand All @@ -452,6 +459,23 @@ Superlu<Matrix,Vector>::solve_impl(const Teuchos::Ptr<MultiVecAdapter<Vector> >
triangular_solve();
}
else {
Kokkos::resize(data_.ferr, nrhs);
Kokkos::resize(data_.berr, nrhs);

{
#ifdef HAVE_AMESOS2_TIMERS
Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
#endif
SLU::Dtype_t dtype = type_map::dtype;
int i_ld_rhs = as<int>(ld_rhs);
function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
host_bValues_.data(), i_ld_rhs,
SLU::SLU_DN, dtype, SLU::SLU_GE);
function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
host_xValues_.data(), i_ld_rhs,
SLU::SLU_DN, dtype, SLU::SLU_GE);
}

if(ILU_Flag_==false) {
function_map::gssvx(&(data_.options), &(data_.A),
data_.perm_c.data(), data_.perm_r.data(),
Expand All @@ -475,16 +499,16 @@ Superlu<Matrix,Vector>::solve_impl(const Teuchos::Ptr<MultiVecAdapter<Vector> >
#endif
&(data_.mem_usage), &(data_.stat), &ierr);
}
}

// Cleanup X and B stores
SLU::Destroy_SuperMatrix_Store( &(data_.X) );
SLU::Destroy_SuperMatrix_Store( &(data_.B) );
data_.X.Store = NULL;
data_.B.Store = NULL;
}

// Cleanup X and B stores
SLU::Destroy_SuperMatrix_Store( &(data_.X) );
SLU::Destroy_SuperMatrix_Store( &(data_.B) );
data_.X.Store = NULL;
data_.B.Store = NULL;
}
} // do solve
} // if ( this->root_ )

/* All processes should have the same error code */
Teuchos::broadcast(*(this->getComm()), 0, &ierr);
Expand All @@ -507,13 +531,25 @@ Superlu<Matrix,Vector>::solve_impl(const Teuchos::Ptr<MultiVecAdapter<Vector> >
Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
#endif

if(use_triangular_solves_) { // to device
#ifdef HAVE_AMESOS2_TRIANGULAR_SOLVES
Util::put_1d_data_helper_kokkos_view<
MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
as<size_t>(ld_rhs),
(is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
this->rowIndexBase_);
#endif
}
else {
Util::put_1d_data_helper_kokkos_view<
MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
as<size_t>(ld_rhs),
(is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
this->rowIndexBase_);
}

}


return(ierr);
}
Expand Down Expand Up @@ -901,9 +937,6 @@ void
Superlu<Matrix,Vector>::triangular_solve() const
{
#ifdef HAVE_AMESOS2_TRIANGULAR_SOLVES
deep_copy_or_assign_view(device_xValues_, host_xValues_);
deep_copy_or_assign_view(device_bValues_, host_bValues_);

size_t ld_rhs = device_xValues_.extent(0);
size_t nrhs = device_xValues_.extent(1);

Expand Down

0 comments on commit 1b295a6

Please sign in to comment.