Skip to content

Commit

Permalink
Merge pull request #10417 from iyamazaki/shylubasker-fix
Browse files Browse the repository at this point in the history
ShyLU Basker :
  • Loading branch information
iyamazaki authored Apr 20, 2022
2 parents 171b3a2 + b2c2e0c commit 69cc9be
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 149 deletions.
139 changes: 1 addition & 138 deletions packages/shylu/shylu_node/basker/src/shylubasker_order.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -819,10 +819,6 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
//currently finds ND and permute BTF_A
//Would like to change so finds permuation,
//and move into 2D-Structure
#if 0 // FIX: Do we need to sort?
sort_matrix_store_valperms(BTF_A, vals_order_ndbtfa_array);
#endif

/*printf(" in apply_scotch_partition\n" );
printf(" T = [\n" );
for(Int j = 0; j < BTF_A.ncol; j++) {
Expand Down Expand Up @@ -953,22 +949,6 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
{
permute_row(BTF_B, part_tree.permtab);
}
//needed because moving into 2D-Structure,
//assumes sorted columns
//sort_matrix(BTF_A);
#if 0 // FIX: do we need to sort (eg for cAMD)?
sort_matrix_store_valperms(BTF_A, vals_order_ndbtfa_array); // BTF_A( perm(i) ) <- BTF_A( i )
if(btf_nblks > 1)
{
//new for sfactor_copy2 replacement
if ( BTF_B.nnz > 0 ) {
sort_matrix_store_valperms(BTF_B, vals_order_ndbtfb_array);
}
if ( BTF_C.nnz > 0 ) {
sort_matrix_store_valperms(BTF_C, vals_order_ndbtfc_array);
}
}
#endif

//--------------------------------------------------------------
//4. Init tree structure
Expand Down Expand Up @@ -1007,7 +987,6 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
#ifdef BASKER_TIMER
scotch_amd_timer.reset();
#endif
#if 1
#if 0
Int nleaves = 1;
printf( " * debug nleaves = 1 *\n" );
Expand All @@ -1018,41 +997,6 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
tempp, temp_col, temp_row, order_csym_array, Options.verbose);
Kokkos::parallel_for("BLK_AMD on A", Kokkos::RangePolicy<Exe_Space>(0, nleaves), amd_functor);
Kokkos::fence();
#else
for(Int b = 0; b < tree.nblks; ++b) {
Int frow = tree.col_tabs(b);
Int erow = tree.col_tabs(b+1);
Int fnnz = AAT.col_ptr(frow);
Int nnz = 0;
temp_col(frow+b) = 0;
for(Int k = frow; k < erow; k++) {
for(Int i = AAT.col_ptr(k); i < AAT.col_ptr(k+1); i++) {
if(AAT.row_idx(i) >= frow && AAT.row_idx(i) < erow) {
temp_row(fnnz + nnz) = AAT.row_idx(i) - frow;
nnz++;
}
}
temp_col(b+k+1) = nnz;
}
Int blk_size = erow - frow;
#ifdef BASKER_SORT_MATRIX_FOR_AMD
sort_matrix(nnz, blk_size, &(temp_col(frow+b)), &(temp_row(fnnz)), &(temp_val(fnnz)));
#endif
double l_nnz = 0;
double lu_work = 0;
BaskerSSWrapper<Int>::amd_order(blk_size, &(temp_col(frow+b)), &(temp_row(fnnz)),
&(tempp(frow)), l_nnz, lu_work);
for(Int k = 0; k < blk_size; k++)
{
order_csym_array(frow+tempp(frow + k)) = frow+k;
}
#ifdef BASKER_TIMER
std::cout << " AMD ( " << b << " ) : " << scotch_amd_timer.seconds() << " seconds " << std::endl;
#endif
}
#endif
if(Options.verbose == BASKER_TRUE) {
std::cout << " ++ Basker AMD_functor on A ++ " << std::endl << std::endl;
}
Expand Down Expand Up @@ -1163,19 +1107,13 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
permute_row(BTF_B, order_csym_array);
//new for sfactor_copy2 replacement
if ( BTF_B.nnz > 0 ) {
#if 0 // no need ??
sort_matrix_store_valperms(BTF_B, vals_order_ndbtfb_array);
#endif
#ifdef BASKER_TIMER
double sortB_time = scotch_timer.seconds();
std::cout << " ++ Basker apply_scotch : sort(B) time : " << sortB_time << std::endl;
scotch_timer.reset();
#endif
}
if ( BTF_C.nnz > 0 ) {
#if 0 // no need ??
sort_matrix_store_valperms(BTF_C, vals_order_ndbtfc_array);
#endif
#ifdef BASKER_TIMER
double sortC_time = scotch_timer.seconds();
std::cout << " ++ Basker apply_scotch : sort(C) time : " << sortC_time << std::endl;
Expand Down Expand Up @@ -2035,43 +1973,22 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
MALLOC_ENTRY_1DARRAY(temp_v, nnz);

//Determine column ptr of output matrix
#if 0
for(Int j = 0; j < n; j++)
{
Int i = col (j);
temp_p (i+1) = M.col_ptr (j+1) - M.col_ptr (j);
}
#else
Kokkos::parallel_for(
"permute_col", n,
KOKKOS_LAMBDA(const int j) {
Int i = col (j);
temp_p (i+1) = M.col_ptr (j+1) - M.col_ptr (j);
});
Kokkos::fence();
#endif

//Get ptrs from lengths
temp_p (0) = 0;
for(Int j = 0; j < n; j++)
{
temp_p (j+1) = temp_p (j+1) + temp_p (j);
}
//copy idxs
#if 0
for(Int ii = 0; ii < n; ii++)
{
Int ko = temp_p (col (ii) );
for(Int k = M.col_ptr (ii); k < M.col_ptr (ii+1); k++)
{
temp_i (ko) = M.row_idx (k); // preserves order of indices in row_idx (they may not be ordered, but won't be reshuffled)
temp_v (ko) = M.val (k); // and corresponding order with vals

vals_order_perm(k) = ko; //this works for first perm or individual; how to best compose subsequent perms? track separately and compose after?
ko++;
}
}
#else
//copy idxs
Kokkos::parallel_for(
"permute_col", n,
KOKKOS_LAMBDA(const int ii) {
Expand All @@ -2095,20 +2012,8 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
vals_order_perm(k) = k;
}
});
#endif

//copy back into A
#if 0
for(Int ii=0; ii < n+1; ii++)
{
M.col_ptr (ii) = temp_p (ii);
}
for(Int ii=0; ii < nnz; ii++)
{
M.row_idx (ii) = temp_i (ii);
M.val (ii) = temp_v (ii);
}
#else
Kokkos::parallel_for(
"permute_col", n+1,
KOKKOS_LAMBDA(const int ii) {
Expand All @@ -2121,7 +2026,6 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
M.val (ii) = temp_v (ii);
});
Kokkos::fence();
#endif

FREE_INT_1DARRAY(temp_p);
FREE_INT_1DARRAY(temp_i);
Expand Down Expand Up @@ -2184,18 +2088,6 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
}

//copy idxs
#if 0
for(Int ii = frow; ii < frow+n; ii++)
{
Int ko = temp_p (col (ii-frow));
for(Int k = M.col_ptr (ii); k < M.col_ptr (ii+1); k++)
{
temp_i (ko) = M.row_idx (k); // preserves order of indices in row_idx (they may not be ordered, but won't be reshuffled)
temp_v (ko) = M.val (k); // and corresponding order with vals
ko++;
}
}
#else
Kokkos::parallel_for(
"permute_col", Kokkos::RangePolicy<Exe_Space> (frow, frow+n),
KOKKOS_LAMBDA(const int ii) {
Expand All @@ -2208,20 +2100,8 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
}
});
Kokkos::fence();
#endif

//copy back into A
#if 0
for(Int ii = frow; ii < frow+n; ii++)
{
M.col_ptr (ii+1) = M.col_ptr (frow) + temp_p (ii-frow+1);
}
for(Int ii = 0; ii < nnz; ii++)
{
M.row_idx (M.col_ptr (frow) + ii) = temp_i (ii);
M.val (M.col_ptr (frow) + ii) = temp_v (ii);
}
#else
Kokkos::parallel_for(
"permute_col", Kokkos::RangePolicy<Exe_Space> (frow, frow+n),
KOKKOS_LAMBDA(const int ii) {
Expand All @@ -2236,7 +2116,6 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
M.val (M.col_ptr (frow) + ii) = temp_v (ii);
});
Kokkos::fence();
#endif

FREE_INT_1DARRAY(temp_p);
FREE_INT_1DARRAY(temp_i);
Expand All @@ -2260,19 +2139,12 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
{ return 0; }

//permute
#if 0
for(Int k = 0; k < nnz; k++)
{
row_idx[k] = row[row_idx[k]];
}
#else
Kokkos::parallel_for(
"permute_row", nnz,
KOKKOS_LAMBDA(const int &k) {
row_idx[k] = row[row_idx[k]];
});
Kokkos::fence();
#endif

return 0;
}//end permute_row(matrix,int)
Expand Down Expand Up @@ -2614,14 +2486,6 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
//for(Int k = 0; k <= nblks; k++) printf( "tree[%d] = %d\n",k,tree.row_tabs[k] );

Int nids = num_threads;
#if 0
for(Int k = 0; k < nblks; k++)
{
for (Int i = tree.row_tabs[k]; i < tree.row_tabs[k+1]; i++) {
nd_map(i) = k;
}
}
#else
Kokkos::parallel_for(
"ndsort_matrix_store_valperms", nids,
KOKKOS_LAMBDA(const int id) {
Expand All @@ -2632,7 +2496,6 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
}
});
Kokkos::fence();
#endif

#ifdef BASKER_TIMER_AMD
std::cout << std::endl << " > sort malloc time : " << timer_order.seconds() << std::endl;
Expand Down
23 changes: 16 additions & 7 deletions packages/shylu/shylu_node/basker/src/shylubasker_order_btf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,13 +700,21 @@ namespace BaskerNS
printf("Basker: break_into_parts2 - short circuit for single block case\n");
#endif
BTF_A = A;
//Options.btf = BASKER_FALSE; // NDE: how is this handled???
//if (A.nrow < Options.btf_large) {
// btf_tabs_offset = 0;
//} else {
// btf_tabs_offset = 1;
//}
btf_tabs_offset = 1;
#if !defined (HAVE_SHYLU_NODEBASKER_METIS) & !defined(HAVE_SHYLU_NODEBASKER_SCOTCH)
if (Options.run_nd_on_leaves == BASKER_TRUE) {
if(Options.verbose == BASKER_TRUE) {
printf("Basker: turning off ND-on-leaves option since no METIS nor SCOTCH (hence sequential)\n");
}
Options.run_nd_on_leaves = BASKER_FALSE;
}
#endif
if (Options.replace_tiny_pivot == BASKER_TRUE) {
if(Options.verbose == BASKER_TRUE) {
printf("Basker: turning off replace-tiny-pivot option since one block (to identify singular matrix)\n");
}
Options.replace_tiny_pivot = BASKER_FALSE;
}
return 0;
}

Expand All @@ -725,7 +733,8 @@ namespace BaskerNS
Int scol_top = 0; // starting column of the BTF_A bloc
Int scol = M.ncol; // starting column of the BTF_C blocks (end of BTF_A block)
Int blk_idx = nblks; // start at lower right corner block, move left and up the diagonal
#if 0 // use Metis on a large block even with one thread
#if !defined (HAVE_SHYLU_NODEBASKER_METIS) & !defined(HAVE_SHYLU_NODEBASKER_SCOTCH)
// use Metis on a large block even with one thread
if (num_threads == 1) {
// Short circuit for single thread = no big block A
scol = 0;
Expand Down
4 changes: 3 additions & 1 deletion packages/shylu/shylu_node/basker/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ IF(Kokkos_ENABLE_OPENMP)
SOURCES simple_test_check.cpp
)

TRIBITS_ADD_EXECUTABLE(
TRIBITS_ADD_EXECUTABLE_AND_TEST(
singular_matrix_check
NOEXEPREFIX
SOURCES singular_matrix_check.cpp
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
)

TRIBITS_ADD_EXECUTABLE(
Expand Down
15 changes: 12 additions & 3 deletions packages/shylu/shylu_node/basker/test/singular_matrix_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ int main(int argc, char* argv[])
Int m = 2;
Int n = 2;
Int nnz = 4;
int error = 0;

Kokkos::InitArguments init_args;
const Int nthreads = 1;
Expand Down Expand Up @@ -74,14 +75,22 @@ int main(int argc, char* argv[])
std::cout << "Setting Threads:" << nthreads << std::endl;

double stime = myTime();
int error = mybasker.Symbolic(m,n,nnz,col_ptr,row_idx,val);
error = mybasker.Symbolic(m,n,nnz,col_ptr,row_idx,val);
std::cout << "Done with Symbolic"
<< "\nError code: " << error
<< "\nTime: "
<< totalTime(stime, myTime()) << std::endl;

double ftime = myTime();
error = mybasker.Factor(m,n,nnz,col_ptr,row_idx,val);
try
{
error = mybasker.Factor(m,n,nnz,col_ptr,row_idx,val);
}
catch (std::runtime_error& e)
{
std::cout << " ** Factor threw exception **" << std::endl;
error = 1;
}
std::cout << "Done with Factor"
<< "\nError code: " << error
<< "\nTime: "
Expand Down Expand Up @@ -132,5 +141,5 @@ int main(int argc, char* argv[])
}
Kokkos::finalize();


return (error == 0 ? 1 : 0);
} //end main

0 comments on commit 69cc9be

Please sign in to comment.