Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kokkos + KokkosKernels Release to 3.5.00 #9836

Merged
merged 22 commits into from
Oct 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
76e1cd9
tpetra: move sort_crs_matrix out of Impl namespace
ndellingwood Jun 10, 2021
da10abe
amesos2: move sort_crs_matrix out of Impl namespace
ndellingwood Jul 8, 2021
f8c911d
zoltan2: modify "Vector" alias in Test_Sphynx
ndellingwood Jul 12, 2021
544eeda
sacado, stokhos: replace KOKKOS_IMPL_CUDA_* macros with Cuda functions
ndellingwood Jul 22, 2021
60e27ce
tpetra: move sort_crs_matrix out of Impl namespace
ndellingwood Jun 10, 2021
c0e1471
amesos2: move sort_crs_matrix out of Impl namespace
ndellingwood Jul 8, 2021
a891ab2
zoltan2: modify "Vector" alias in Test_Sphynx
ndellingwood Jul 12, 2021
f86c91a
sacado, stokhos: replace KOKKOS_IMPL_CUDA_* macros with Cuda functions
ndellingwood Jul 22, 2021
d72eb85
atdm/contributed/weaver: update modules
ndellingwood Oct 1, 2021
e6aee98
intrepid2: workaround intel internal compiler error in Intrepid2_Data
ndellingwood Oct 1, 2021
08f1f07
Merge branch 'kokkos-promotion' of https://github.com/trilinos/Trilin…
ndellingwood Oct 1, 2021
536a384
tpetra: move sort_crs_* out of Impl namespace
ndellingwood Oct 14, 2021
bed59f2
Merge branch 'develop' into kokkos-promotion
ndellingwood Oct 14, 2021
fa115c1
ifpack2,sacado: rename CUDA_SAFE_CALL -> KOKKOS_IMPL_CUDA_SAFE_CALL
ndellingwood Oct 14, 2021
7b63df4
Update packages to move Kokkos::Timer out of impl namespace
ndellingwood Oct 15, 2021
448daeb
intrepid2: remove deprecation warnings
ndellingwood Oct 22, 2021
eb03da3
amesos2: resolve unused warning in superlu interface
ndellingwood Oct 22, 2021
2ac5617
stokhos: resolve -Werror
ndellingwood Oct 22, 2021
4275f6b
Snapshot of kokkos.git from commit 8dc4a906d43ae8eacc951cc5d7e95ad2df…
ndellingwood Oct 28, 2021
cd7a9c7
Snapshot of kokkos-kernels.git from commit 14d29f0a04f9fc959c7c96d98e…
ndellingwood Oct 28, 2021
6909d93
Intrepid2 - deep copy range match
kyungjoo-kim Oct 28, 2021
d63e635
intrepid2: resolve signed-unsigned warning
ndellingwood Oct 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion cmake/std/atdm/contributed/weaver/environment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ elif [[ "$ATDM_CONFIG_COMPILER" == "CUDA"* ]] ; then
fi

# Ninja
module load ninja/1.7.2
#module load ninja/1.7.2

# CMake
#module swap cmake/3.6.2 cmake/3.12.3
Expand Down
3 changes: 2 additions & 1 deletion packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#define TESTING_AMESOS2_WITH_TPETRA_REMOVE_UVM
#if defined(TESTING_AMESOS2_WITH_TPETRA_REMOVE_UVM)
#include "KokkosKernels_SparseUtils.hpp"
#include "KokkosKernels_Sorting.hpp"
#endif

namespace Amesos2 {
Expand Down Expand Up @@ -608,7 +609,7 @@ namespace Amesos2 {
// sort
if( ordering == SORTED_INDICES ) {
using execution_space = typename KV_GS::execution_space;
KokkosKernels::Impl::sort_crs_matrix <execution_space, KV_GS, KV_GO, KV_S>
KokkosKernels::sort_crs_matrix <execution_space, KV_GS, KV_GO, KV_S>
(rowptr, colind, nzval);
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion packages/amesos2/src/Amesos2_Superlu_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1060,7 +1060,6 @@ Superlu<Matrix,Vector>::triangular_solve_factor()
if (data_.options.ConditionNumber == SLU::YES) {
using STM = Teuchos::ScalarTraits<magnitude_type>;
const magnitude_type eps = STM::eps ();
int n = data_.perm_r.extent(0);

SCformat *Lstore = (SCformat*)(data_.L.Store);
int nsuper = 1 + Lstore->nsuper;
Expand All @@ -1077,6 +1076,7 @@ Superlu<Matrix,Vector>::triangular_solve_factor()
condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);

#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
int n = data_.perm_r.extent(0);
std::cout << this->getComm()->getRank()
<< " : anorm = " << data_.anorm << ", rcond = " << data_.rcond << ", n = " << n
<< ", num super cols = " << nsuper << ", max super cols = " << max_cols
Expand Down
8 changes: 4 additions & 4 deletions packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,10 +315,10 @@ namespace Ifpack2 {

#if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKTRIDICONTAINER_ENABLE_PROFILE)
#define IFPACK2_BLOCKTRIDICONTAINER_PROFILER_REGION_BEGIN \
CUDA_SAFE_CALL(cudaProfilerStart());
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaProfilerStart());

#define IFPACK2_BLOCKTRIDICONTAINER_PROFILER_REGION_END \
{ CUDA_SAFE_CALL( cudaProfilerStop() ); }
{ KOKKOS_IMPL_CUDA_SAFE_CALL( cudaProfilerStop() ); }
#else
/// later put vtune profiler region
#define IFPACK2_BLOCKTRIDICONTAINER_PROFILER_REGION_BEGIN
Expand Down Expand Up @@ -676,7 +676,7 @@ namespace Ifpack2 {
exec_instances.clear();
exec_instances.resize(num_streams);
for (local_ordinal_type i=0;i<num_streams;++i) {
CUDA_SAFE_CALL(cudaStreamCreateWithFlags(&stream[i], cudaStreamNonBlocking));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamCreateWithFlags(&stream[i], cudaStreamNonBlocking));
ExecutionSpaceFactory<execution_space>::createInstance(stream[i], exec_instances[i]);
}
}
Expand All @@ -688,7 +688,7 @@ namespace Ifpack2 {
{
const local_ordinal_type num_streams = stream.size();
for (local_ordinal_type i=0;i<num_streams;++i)
CUDA_SAFE_CALL(cudaStreamDestroy(stream[i]));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamDestroy(stream[i]));
}
stream.clear();
exec_instances.clear();
Expand Down
6 changes: 3 additions & 3 deletions packages/ifpack2/src/Ifpack2_Details_FastILU_Base_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
#define __IFPACK2_FASTILU_BASE_DEF_HPP__

#include <Ifpack2_Details_CrsArrays.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <stdexcept>
#include "Teuchos_TimeMonitor.hpp"

Expand Down Expand Up @@ -164,7 +164,7 @@ initialize()
{
throw std::runtime_error(std::string("Called ") + getName() + "::initialize() but matrix was null (call setMatrix() with a non-null matrix first)");
}
Kokkos::Impl::Timer copyTimer;
Kokkos::Timer copyTimer;
CrsArrayReader<Scalar, LocalOrdinal, GlobalOrdinal, Node>::getStructure(mat_.get(), localRowPtrsHost_, localRowPtrs_, localColInds_);
crsCopyTime_ = copyTimer.seconds();
initLocalPrec(); //note: initLocalPrec updates initTime
Expand Down Expand Up @@ -196,7 +196,7 @@ compute()


//get copy of values array from matrix
Kokkos::Impl::Timer copyTimer;
Kokkos::Timer copyTimer;
CrsArrayReader<Scalar, LocalOrdinal, GlobalOrdinal, Node>::getValues(mat_.get(), localValues_, localRowPtrsHost_);
crsCopyTime_ += copyTimer.seconds(); //add to the time spent getting rowptrs/colinds
computeLocalPrec(); //this updates computeTime_
Expand Down
2 changes: 1 addition & 1 deletion packages/ifpack2/src/Ifpack2_Details_Fic_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

#include "Ifpack2_Details_Fic_decl.hpp"
#include "Ifpack2_Details_CrsArrays.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <shylu_fastic.hpp>

namespace Ifpack2
Expand Down
2 changes: 1 addition & 1 deletion packages/ifpack2/src/Ifpack2_Details_Fildl_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

#include "Ifpack2_Details_Fildl_decl.hpp"
#include "Ifpack2_Details_CrsArrays.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <shylu_fastildl.hpp>

namespace Ifpack2
Expand Down
2 changes: 1 addition & 1 deletion packages/ifpack2/src/Ifpack2_Details_Filu_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

#include "Ifpack2_Details_Filu_decl.hpp"
#include "Ifpack2_Details_CrsArrays.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <shylu_fastilu.hpp>

namespace Ifpack2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
#include <iomanip>

#include "Kokkos_Core.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>

#include "Teuchos_CommandLineProcessor.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
#include <iomanip>

#include "Kokkos_Core.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>

#include "Teuchos_CommandLineProcessor.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
#include <iomanip>

#include "Kokkos_Core.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>

#include "Teuchos_CommandLineProcessor.hpp"

Expand Down
6 changes: 3 additions & 3 deletions packages/intrepid2/perf-test/ComputeBasis/test_hgrad.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ namespace Intrepid2 {
constexpr size_t LLC_CAPACITY = 32*1024*1024;
Intrepid2::Test::Flush<LLC_CAPACITY,DeviceSpaceType> flush;

Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
double t_horizontal = 0, t_vertical = 0;
int errorFlag = 0;

Expand Down Expand Up @@ -210,8 +210,8 @@ namespace Intrepid2 {

typedef F_hgrad_eval<ValueType,ValueType,DeviceSpaceType> FunctorType;

using range_policy_type = Kokkos::Experimental::MDRangePolicy
< DeviceSpaceType, Kokkos::Experimental::Rank<2>, Kokkos::IndexType<ordinal_type> >;
using range_policy_type = Kokkos::MDRangePolicy
< DeviceSpaceType, Kokkos::Rank<2>, Kokkos::IndexType<ordinal_type> >;
range_policy_type policy( { 0, 0 },
{ numCells, numPoints } );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ namespace Intrepid2 {
constexpr size_t LLC_CAPACITY = 32*1024*1024;
Intrepid2::Test::Flush<LLC_CAPACITY,DeviceSpaceType> flush;

Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
double t_vectorize = 0;
int errorFlag = 0;

Expand Down Expand Up @@ -173,8 +173,8 @@ namespace Intrepid2 {

typedef F_hgrad_eval<VectorType,ValueType,DeviceSpaceType> FunctorType;

using range_policy_type = Kokkos::Experimental::MDRangePolicy
< DeviceSpaceType, Kokkos::Experimental::Rank<2>, Kokkos::IndexType<ordinal_type> >;
using range_policy_type = Kokkos::MDRangePolicy
< DeviceSpaceType, Kokkos::Rank<2>, Kokkos::IndexType<ordinal_type> >;
range_policy_type policy( { 0, 0 },
{ numCellsAdjusted, numPoints } );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
*/

#include "Kokkos_Core.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>

#include "Teuchos_CommandLineProcessor.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
*/

#include "Kokkos_Core.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>

#include "Teuchos_CommandLineProcessor.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
*/

#include "Kokkos_Core.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>

#include "Teuchos_CommandLineProcessor.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
*/

#include "Kokkos_Core.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>

#include "Teuchos_CommandLineProcessor.hpp"

Expand Down
2 changes: 1 addition & 1 deletion packages/intrepid2/perf-test/DynRankView/test_01.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ namespace Intrepid2 {
<< " Test Array Structure (C,P,D) = " << C << ", " << P << ", " << D << "\n"
<< "===============================================================================\n";

Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
double t_dynrankview[20] = {}, t_view[20] = {};
int errorFlag = 0, itest = 0;

Expand Down
2 changes: 1 addition & 1 deletion packages/intrepid2/perf-test/DynRankView/test_02.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ namespace Intrepid2 {
<< " Test Array Structure (C,P,D) = " << C << ", " << P << ", " << D << "\n"
<< "===============================================================================\n";

Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
double t_without_subview[20] = {}, t_with_subview[20] = {};
int errorFlag = 0, itest = 0;

Expand Down
2 changes: 1 addition & 1 deletion packages/intrepid2/perf-test/test_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
\author Created by Kyungjoo Kim.
*/
#include "Kokkos_Core.hpp"
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>

namespace Intrepid2 {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -805,8 +805,8 @@ namespace Intrepid2 {
// resolve the -1 default argument for endCell into the true end cell index
int endCellResolved = (endCell == -1) ? worksetCell.extent_int(0) : endCell;

using range_policy_type = Kokkos::Experimental::MDRangePolicy
< ExecSpaceType, Kokkos::Experimental::Rank<2>, Kokkos::IndexType<ordinal_type> >;
using range_policy_type = Kokkos::MDRangePolicy
< ExecSpaceType, Kokkos::Rank<2>, Kokkos::IndexType<ordinal_type> >;
range_policy_type policy( { 0, 0 },
{ jacobian.extent(0), jacobian.extent(1) } );
Kokkos::parallel_for( policy, FunctorType(jacobian, worksetCell, gradients, startCell, endCellResolved) );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ namespace Intrepid2 {
// F = outputVals.extent(1),
// P = outputVals.extent(2);

// using range_policy_type = Kokkos::Experimental::MDRangePolicy
// < DeviceType, Kokkos::Experimental::Rank<3>, Kokkos::IndexType<ordinal_type> >;
// using range_policy_type = Kokkos::MDRangePolicy
// < DeviceType, Kokkos::Rank<3>, Kokkos::IndexType<ordinal_type> >;
// range_policy_type policy( { 0, 0, 0 },
// { C, F, P } );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -793,14 +793,14 @@ namespace Intrepid2 {
typedef FunctorArrayTools::F_matvecProduct<OutputViewType, leftInputViewType, rightInputViewType> FunctorType;

if (hasField) {
using range_policy_type = Kokkos::Experimental::MDRangePolicy
< ExecSpaceType, Kokkos::Experimental::Rank<3>, Kokkos::IndexType<ordinal_type> >;
using range_policy_type = Kokkos::MDRangePolicy
< ExecSpaceType, Kokkos::Rank<3>, Kokkos::IndexType<ordinal_type> >;
range_policy_type policy( { 0, 0, 0 },
{ output.extent(0), output.extent(1), output.extent(2) } );
Kokkos::parallel_for( policy, FunctorType(output, leftInput, rightInput, isTranspose) );
} else {
using range_policy_type = Kokkos::Experimental::MDRangePolicy
< ExecSpaceType, Kokkos::Experimental::Rank<2>, Kokkos::IndexType<ordinal_type> >;
using range_policy_type = Kokkos::MDRangePolicy
< ExecSpaceType, Kokkos::Rank<2>, Kokkos::IndexType<ordinal_type> >;
range_policy_type policy( { 0, 0 },
{ output.extent(0), output.extent(1) } );
Kokkos::parallel_for( policy, FunctorType(output, leftInput, rightInput, isTranspose) );
Expand Down
16 changes: 16 additions & 0 deletions packages/intrepid2/src/Shared/Intrepid2_Data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1655,6 +1655,21 @@ class ZeroView {
//! Copies 0.0 to the underlying View.
void clear() const
{
#ifdef KOKKOS_COMPILER_INTEL
// Workaround intel internal compiler errors
DataScalar zero = DataScalar(0);
switch (dataRank_)
{
case 1: {Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, data1_.extent_int(0)), KOKKOS_LAMBDA(int i) {data1_(i) = zero;}); break; }
case 2: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<2>, execution_space>({0,0},{data2_.extent_int(0),data2_.extent_int(1)}), KOKKOS_LAMBDA(int i0, int i1) {data2_(i0, i1) = zero;}); break; }
case 3: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<3>, execution_space>({0,0,0},{data3_.extent_int(0),data3_.extent_int(1),data3_.extent_int(2)}), KOKKOS_LAMBDA(int i0, int i1, int i2) {data3_(i0, i1, i2) = zero;}); break; }
case 4: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<4>, execution_space>({0,0,0,0},{data4_.extent_int(0),data4_.extent_int(1),data4_.extent_int(2),data4_.extent_int(3)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) {data4_(i0, i1, i2, i3) = zero;}); break; }
case 5: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<5>, execution_space>({0,0,0,0,0},{data5_.extent_int(0),data5_.extent_int(1),data5_.extent_int(2),data5_.extent_int(3),data5_.extent_int(4)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) {data5_(i0, i1, i2, i3, i4) = zero;}); break; }
case 6: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<6>, execution_space>({0,0,0,0,0,0},{data6_.extent_int(0),data6_.extent_int(1),data6_.extent_int(2),data6_.extent_int(3),data6_.extent_int(4),data6_.extent_int(5)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {data6_(i0, i1, i2, i3, i4, i5) = zero;}); break; }
case 7: {Kokkos::parallel_for(Kokkos::MDRangePolicy<Kokkos::Rank<6>, execution_space>({0,0,0,0,0,0},{data7_.extent_int(0),data7_.extent_int(1),data7_.extent_int(2),data7_.extent_int(3),data7_.extent_int(4),data7_.extent_int(5)}), KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5 ) {for (int i6 = 0; i6 < data7_.extent_int(6); ++i6) data7_(i0, i1, i2, i3, i4, i5, i6) = zero;}); break; }
default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
}
#else
switch (dataRank_)
{
case 1: Kokkos::deep_copy(data1_, 0.0); break;
Expand All @@ -1666,6 +1681,7 @@ class ZeroView {
case 7: Kokkos::deep_copy(data7_, 0.0); break;
default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
}
#endif
}

//! Copies from the provided DynRankView into the underlying Kokkos::View container storing the unique data.
Expand Down
4 changes: 3 additions & 1 deletion packages/intrepid2/src/Shared/Intrepid2_PointToolsDef.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,9 @@ getWarpBlendLatticeLine( Kokkos::DynRankView<pointValueType,pointPropertie
// this should be fixed after view and dynrankview is interoperatable
auto z = Kokkos::DynRankView<pointValueType,Kokkos::HostSpace>(zHost.data() + offset, np-offset);

Kokkos::deep_copy(pts, z);
const auto common_range = range_type(0, std::min(pts.extent(0), z.extent(0)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kyungjoo-kim It seems to me that a cleaner way to fix this is to declare z as
auto z = Kokkos::DynRankView<pointValueType,Kokkos::HostSpace>(zHost.data() + offset, s);
No reason to create it larger than s = np-2*offset and then do a subview of it. This would avoid two subviews.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

u r right. it will be fixed in the next commit.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are wrong. The common range is necessary. I printed both dimension and for some case its is larger and sometimes z is larger. I did not particularly dig further as the current solution still resave the deep copy mismatch issue.

Kokkos::deep_copy(Kokkos::subview(pts, common_range),
Kokkos::subview(z, common_range));
}
}

Expand Down
8 changes: 4 additions & 4 deletions packages/kokkos-kernels/.jenkins/nightly.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ pipeline {
agent none

stages {
stage('HIP-ROCm-3.10-C++14') {
stage('HIP-ROCm-4.2-C++14') {
agent {
dockerfile {
filename 'Dockerfile.hip'
dir 'scripts/docker'
additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:3.10'
additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:4.2'
label 'rocm-docker && vega'
args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES'
}
Expand All @@ -18,18 +18,18 @@ pipeline {
mkdir build && cd build && \
cmake \
-DCMAKE_CXX_COMPILER=hipcc \
-DCMAKE_CXX_EXTENSIONS=OFF \
-DKokkos_ENABLE_HIP=ON \
-DKokkos_ARCH_VEGA906=ON \
.. && \
make -j8 && make install && \
cd ../.. && rm -rf kokkos'''
sh '''rm -rf build && mkdir -p build && cd build && \
cmake \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_CXX_COMPILER=hipcc \
-DCMAKE_CXX_EXTENSIONS=OFF \
-DKokkosKernels_ENABLE_TESTS=ON \
-DKokkosKernels_ENABLE_EXAMPLES=ON \
-DKokkos_ENABLE_HIP=ON \
-DKokkosKernels_INST_DOUBLE=ON \
-DKokkosKernels_INST_ORDINAL_INT=ON \
-DKokkosKernels_INST_OFFSET_INT=ON \
Expand Down
3 changes: 3 additions & 0 deletions packages/kokkos-kernels/BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ endif()
* KokkosKernels_ENABLE_TESTS: BOOL
* Whether to build tests.
* Default: OFF
* KokkosKernels_ENABLE_DOCS: BOOL
* Whether to build docs.
* Default: OFF
* KokkosKernels_ENABLE_TPL_BLAS: BOOL
* Whether to enable BLAS
* Default: OFF
Expand Down
Loading