Skip to content

Commit

Permalink
batched/serial: add trtri unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
e10harvey committed Apr 28, 2020
1 parent a1ba5c6 commit 1257bd9
Show file tree
Hide file tree
Showing 12 changed files with 529 additions and 54 deletions.
2 changes: 1 addition & 1 deletion src/batched/KokkosBatched_Trtri_Serial_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ namespace KokkosBatched {
invoke(const AViewType &A) {
return SerialTrtriInternalLower<Algo::Trtri::Unblocked>::invoke(ArgDiag::use_unit_diag,
A.extent(0), A.extent(1),
A.data(), A.stride(0), A.stride(1));
A.data(), A.stride_0(), A.stride_1());
}
};
template<typename ArgDiag>
Expand Down
108 changes: 57 additions & 51 deletions src/batched/KokkosBatched_Trtri_Serial_Internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,36 +89,38 @@ namespace KokkosBatched {
return i+1;
}

A[(am-1)*as0 + (am-1)*as1] = one / A[(am-1)*as0 + (am-1)*as1];

#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
#pragma unroll
#endif
for (int i = am - 2; i >= 0; --i) {
for (int i = am - 1; i >= 0; --i) {
A[i*as0 + i*as1] = one / A[i*as0 + i*as1];
if (use_unit_diag)
A_ii = -one;
else
A_ii = -A[i*as0 + i*as1];

ValueType *__restrict__ A_subblock = &A[(i+1)*as0 + (i+1)*as1];
int A_subblock_m = am - i - 1,
A_subblock_n = am - i - 1;
ValueType *__restrict__ A_col_vec = &A[(i+1)*as0 + i*as1];
int A_col_vec_m = am - i - 1,
A_col_vec_n = 1;
// TRMV/TRMM −− x=Ax
// A((j+1):n,j) = A((j+1):n,(j+1):n) ∗ A((j+1):n,j) ;
SerialTrmmInternalLeftLower<Algo::Trmm::Unblocked>::invoke(use_unit_diag,
false,
A_subblock_m, A_subblock_n,
A_col_vec_m, A_col_vec_n,
one,
A_subblock, as0, as1,
A_col_vec, as0, as1);
// SCAL -- x=ax
// A((j+1):n,j) = A_ii * A((j+1):n,j)
SerialScaleInternal::invoke(A_col_vec_m, A_col_vec_n, A_ii, A_col_vec, as0, as1);
if (i < am - 1) {
if (use_unit_diag)
A_ii = -one;
else
A_ii = -A[i*as0 + i*as1];

ValueType *__restrict__ A_subblock = &A[(i+1)*as0 + (i+1)*as1];
int A_subblock_m = am - i - 1,
A_subblock_n = am - i - 1;
ValueType *__restrict__ A_col_vec = &A[(i+1)*as0 + i*as1];
int A_col_vec_m = am - i - 1,
A_col_vec_n = 1;
// TRMV/TRMM −− x=Ax
// A((j+1):n,j) = A((j+1):n,(j+1):n) ∗ A((j+1):n,j) ;
SerialTrmmInternalLeftLower<Algo::Trmm::Unblocked>::invoke(use_unit_diag,
false,
A_subblock_m, A_subblock_n,
A_col_vec_m, A_col_vec_n,
one,
A_subblock, as0, as1,
A_col_vec, as0, as1);

// SCAL -- x=ax
// A((j+1):n,j) = A_ii * A((j+1):n,j)
SerialScaleInternal::invoke(A_col_vec_m, A_col_vec_n, A_ii, A_col_vec, as0, as1);
}
}
return 0;
}
Expand All @@ -132,6 +134,8 @@ namespace KokkosBatched {
const int am, const int an,
ValueType *__restrict__ A, const int as0, const int as1) {
ValueType one(1.0), zero(0.0), A_ii;


if (!use_unit_diag) {
#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
#pragma unroll
Expand All @@ -142,37 +146,39 @@ namespace KokkosBatched {
return i+1;
}

A[0*as0 + 0*as1] = one / A[0*as0 + 0*as1];

#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
#pragma unroll
#endif
for (int i = 1; i < am; ++i) {
for (int i = 0; i < am; ++i) {
A[i*as0 + i*as1] = one / A[i*as0 + i*as1];
if (use_unit_diag)
A_ii = -one;
else
A_ii = -A[i*as0 + i*as1];

ValueType *__restrict__ A_subblock = &A[0*as0 + 0*as1];
int A_subblock_m = i,
A_subblock_n = i;
ValueType *__restrict__ A_col_vec = &A[0*as0 + i*as1];
int A_col_vec_m = i,
A_col_vec_n = 1;
// TRMV/TRMM −− x=Ax
// A(1:(j-1),j) = A(1:(j-1),1:(j-1)) ∗ A(1:(j-1),j) ;
//SerialTrmm<Side::Left,Uplo::Lower,Trans::NoTranspose,Diag::NoUnit,Algo::Trmm::Unblocked>
SerialTrmmInternalLeftUpper<Algo::Trmm::Unblocked>::invoke(use_unit_diag,
false,
A_subblock_m, A_subblock_n,
A_col_vec_m, A_col_vec_n,
one,
A_subblock, as0, as1,
A_col_vec, as0, as1);
// SCAL -- x=ax
// A((j+1):n,j) = A_ii * A((j+1):n,j)
SerialScaleInternal::invoke(A_col_vec_m, A_col_vec_n, A_ii, A_col_vec, as0, as1);
if (i > 0) {
if (use_unit_diag)
A_ii = -one;
else
A_ii = -A[i*as0 + i*as1];

ValueType *__restrict__ A_subblock = &A[0*as0 + 0*as1];
int A_subblock_m = i,
A_subblock_n = i;
ValueType *__restrict__ A_col_vec = &A[0*as0 + i*as1];
int A_col_vec_m = i,
A_col_vec_n = 1;
// TRMV/TRMM −− x=Ax
// A(1:(j-1),j) = A(1:(j-1),1:(j-1)) ∗ A(1:(j-1),j) ;
//SerialTrmm<Side::Left,Uplo::Lower,Trans::NoTranspose,Diag::NoUnit,Algo::Trmm::Unblocked>
SerialTrmmInternalLeftUpper<Algo::Trmm::Unblocked>::invoke(use_unit_diag,
false,
A_subblock_m, A_subblock_n,
A_col_vec_m, A_col_vec_n,
one,
A_subblock, as0, as1,
A_col_vec, as0, as1);

// SCAL -- x=ax
// A((j+1):n,j) = A_ii * A((j+1):n,j)
SerialScaleInternal::invoke(A_col_vec_m, A_col_vec_n, A_ii, A_col_vec, as0, as1);
}
}
return 0;
}
Expand Down
4 changes: 2 additions & 2 deletions unit_test/batched/Test_Batched_SerialTrmm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ namespace Test {
std::is_same<value_type,Kokkos::complex<double> >::value ? "::ComplexDouble" : "::UnknownValueType" );
std::string name = name_region + name_value_type;
Kokkos::Profiling::pushRegion( name.c_str() );
Kokkos::RangePolicy<DeviceType,ParamTagType> policy(0, _b.extent(0));
Kokkos::RangePolicy<DeviceType,ParamTagType> policy(0, _a.extent(0));
Kokkos::parallel_for(name.c_str(), policy, *this);
Kokkos::Profiling::popRegion();
}
Expand Down Expand Up @@ -298,7 +298,7 @@ int test_batched_trmm() {
for (int i=0;i<10;++i) {
//printf("Testing: LayoutRight, Blksize %d\n", i);
Test::impl_test_batched_trmm<DeviceType,ViewType,ScalarType,ParamTagType,AlgoTagType>(1024, i, 4, &trans);
Test::impl_test_batched_trmm<DeviceType,ViewType,ScalarType,ParamTagType,AlgoTagType>(1024, i, 1), &trans;
Test::impl_test_batched_trmm<DeviceType,ViewType,ScalarType,ParamTagType,AlgoTagType>(1024, i, 1, &trans);
}
}
#endif
Expand Down
Loading

0 comments on commit 1257bd9

Please sign in to comment.