Skip to content

Commit

Permalink
Merge pull request #5964 from PhilMiller/cuda-lambda-default
Browse files Browse the repository at this point in the history
Enable Cuda nvcc -extended-lambda unconditionally
  • Loading branch information
crtrott authored May 4, 2023
2 parents a45cc1e + d251954 commit 945281a
Show file tree
Hide file tree
Showing 21 changed files with 31 additions and 82 deletions.
4 changes: 0 additions & 4 deletions .jenkins
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ pipeline {
-DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \
-DKokkos_ENABLE_TESTS=ON \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_OPENMP=ON \
.. && \
make -j8 && ctest --verbose'''
Expand Down Expand Up @@ -313,7 +312,6 @@ pipeline {
-DKokkos_ENABLE_TESTS=ON \
-DKokkos_ENABLE_BENCHMARKS=ON \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_TUNING=ON \
-DKokkos_ARCH_VOLTA70=ON \
.. && \
Expand Down Expand Up @@ -386,7 +384,6 @@ pipeline {
-DKokkos_ENABLE_COMPILER_WARNINGS=ON \
-DKokkos_ENABLE_OPENMP=OFF \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=OFF \
-DKokkos_ENABLE_CUDA_UVM=ON \
-DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \
-DKokkos_ENABLE_DEPRECATED_CODE_3=ON \
Expand Down Expand Up @@ -453,7 +450,6 @@ pipeline {
-DKokkos_ENABLE_TESTS=ON \
-DKokkos_ENABLE_BENCHMARKS=ON \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_LIBDL=OFF \
.. && \
make -j8 && ctest --verbose && \
Expand Down
14 changes: 6 additions & 8 deletions Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -665,15 +665,13 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
endif

ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
KOKKOS_CXXFLAGS += -expt-extended-lambda
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
KOKKOS_CXXFLAGS += -expt-extended-lambda
endif

ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
endif

ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1)
Expand Down
1 change: 0 additions & 1 deletion cmake/KokkosCore_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@

#cmakedefine KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_CUDA_UVM
#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
Expand Down
17 changes: 14 additions & 3 deletions cmake/kokkos_arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,21 @@ ENDIF()
#clear anything that might be in the cache
GLOBAL_SET(KOKKOS_CUDA_OPTIONS)
# Construct the Makefile options
IF (KOKKOS_ENABLE_CUDA_LAMBDA)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
# Extended lambda support was stabilized in nvcc 12
IF(KOKKOS_COMPILER_VERSION_MAJOR EQUAL 11)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-expt-extended-lambda")
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-Wext-lambda-captures-this")
ELSE()
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-extended-lambda")
ENDIF()
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-Wext-lambda-captures-this")
ENDIF()

IF(DEFINED Kokkos_ENABLE_CUDA_LAMBDA)
IF(Kokkos_ENABLE_CUDA_LAMBDA)
MESSAGE(DEPRECATION "CUDA extended lambda support is now always enabled. The option Kokkos_ENABLE_CUDA_LAMBDA will be removed")
ELSE()
MESSAGE(FATAL_ERROR "Support for disabling CUDA extended lambdas has been removed. Please unset Kokkos_ENABLE_CUDA_LAMBDA, or see #5964 if this is necessary for your application")
ENDIF()
ENDIF()

Expand Down
10 changes: 1 addition & 9 deletions cmake/kokkos_enable_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,6 @@ mark_as_advanced(Kokkos_ENABLE_IMPL_MDSPAN)
mark_as_advanced(Kokkos_ENABLE_MDSPAN_EXTERNAL)
mark_as_advanced(Kokkos_ENABLE_IMPL_SKIP_COMPILER_MDSPAN)

IF (Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA)
SET(CUDA_LAMBDA_DEFAULT ON)
ELSEIF (KOKKOS_ENABLE_CUDA)
SET(CUDA_LAMBDA_DEFAULT ON)
ELSE()
SET(CUDA_LAMBDA_DEFAULT OFF)
ENDIF()
KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to activate experimental lambda features")
IF (Trilinos_ENABLE_Kokkos)
SET(COMPLEX_ALIGN_DEFAULT OFF)
ELSE()
Expand Down Expand Up @@ -123,7 +115,7 @@ FUNCTION(check_device_specific_options)
ENDIF()
ENDFUNCTION()

CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_CONSTEXPR CUDA_LDG_INTRINSIC)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS IMPL_HPX_ASYNC_DISPATCH)

Expand Down
2 changes: 0 additions & 2 deletions containers/unit_tests/TestErrorReporter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ struct ErrorReporterDriver : public ErrorReporterDriverBase<DeviceType> {
}
};

#if !defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA)
template <typename DeviceType>
struct ErrorReporterDriverUseLambda
: public ErrorReporterDriverBase<DeviceType> {
Expand Down Expand Up @@ -178,7 +177,6 @@ struct ErrorReporterDriverUseLambda
driver_base::check_expectations(reporter_capacity, test_size);
}
};
#endif

#ifdef KOKKOS_ENABLE_OPENMP
struct ErrorReporterDriverNativeOpenMP
Expand Down
14 changes: 0 additions & 14 deletions containers/unit_tests/TestOffsetView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ void test_offsetview_construction() {
ASSERT_EQ(ov.extent(0), 5u);
ASSERT_EQ(ov.extent(1), 5u);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
{
Kokkos::Experimental::OffsetView<Scalar*, Device> offsetV1("OneDOffsetView",
range0);
Expand Down Expand Up @@ -149,7 +148,6 @@ void test_offsetview_construction() {
}

ASSERT_EQ(OVResult, answer) << "Bad data found in OffsetView";
#endif

{
offset_view_type ovCopy(ov);
Expand Down Expand Up @@ -184,7 +182,6 @@ void test_offsetview_construction() {
range3_type rangePolicy3DZero(point3_type{{0, 0, 0}},
point3_type{{extent0, extent1, extent2}});

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
int view3DSum = 0;
Kokkos::parallel_reduce(
rangePolicy3DZero,
Expand All @@ -207,7 +204,6 @@ void test_offsetview_construction() {

ASSERT_EQ(view3DSum, offsetView3DSum)
<< "construction of OffsetView from View and begins array broken.";
#endif
}
view_type viewFromOV = ov.view();

Expand All @@ -232,7 +228,6 @@ void test_offsetview_construction() {
view_type aView("aView", ov.extent(0), ov.extent(1));
Kokkos::deep_copy(aView, ov);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
int sum = 0;
Kokkos::parallel_reduce(
rangePolicy2D,
Expand All @@ -242,7 +237,6 @@ void test_offsetview_construction() {
sum);

ASSERT_EQ(sum, 0) << "deep_copy(view, offsetView) broken.";
#endif
}

{ // test view to offsetview deep copy
Expand All @@ -251,7 +245,6 @@ void test_offsetview_construction() {
Kokkos::deep_copy(aView, 99);
Kokkos::deep_copy(ov, aView);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
int sum = 0;
Kokkos::parallel_reduce(
rangePolicy2D,
Expand All @@ -261,7 +254,6 @@ void test_offsetview_construction() {
sum);

ASSERT_EQ(sum, 0) << "deep_copy(offsetView, view) broken.";
#endif
}
}

Expand Down Expand Up @@ -429,7 +421,6 @@ void test_offsetview_subview() {
ASSERT_EQ(offsetSubview.begin(1), 0);
ASSERT_EQ(offsetSubview.end(1), 9);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
using range_type = Kokkos::MDRangePolicy<Device, Kokkos::Rank<2>,
Kokkos::IndexType<int> >;
using point_type = typename range_type::point_type;
Expand All @@ -455,7 +446,6 @@ void test_offsetview_subview() {
sum);

ASSERT_EQ(sum, 6 * (e0 - b0) * (e1 - b1));
#endif
}

// slice 2
Expand Down Expand Up @@ -552,7 +542,6 @@ void test_offsetview_subview() {
}
}

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
template <class InputIt, class T, class BinaryOperation>
KOKKOS_INLINE_FUNCTION T std_accumulate(InputIt first, InputIt last, T init,
BinaryOperation op) {
Expand Down Expand Up @@ -655,7 +644,6 @@ void test_offsetview_offsets_rank3() {

ASSERT_EQ(0, errors);
}
#endif

TEST(TEST_CATEGORY, offsetview_construction) {
test_offsetview_construction<int, TEST_EXECSPACE>();
Expand All @@ -669,7 +657,6 @@ TEST(TEST_CATEGORY, offsetview_subview) {
test_offsetview_subview<int, TEST_EXECSPACE>();
}

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
TEST(TEST_CATEGORY, offsetview_offsets_rank1) {
test_offsetview_offsets_rank1<TEST_EXECSPACE>();
}
Expand All @@ -681,7 +668,6 @@ TEST(TEST_CATEGORY, offsetview_offsets_rank2) {
TEST(TEST_CATEGORY, offsetview_offsets_rank3) {
test_offsetview_offsets_rank3<TEST_EXECSPACE>();
}
#endif

} // namespace Test

Expand Down
2 changes: 0 additions & 2 deletions core/perf_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,10 @@ KOKKOS_ADD_BENCHMARK(
SOURCES ${BENCHMARK_SOURCES}
)

IF(NOT KOKKOS_ENABLE_CUDA OR KOKKOS_ENABLE_CUDA_LAMBDA)
KOKKOS_ADD_BENCHMARK(
Benchmark_Atomic_MinMax
SOURCES test_atomic_minmax_simple.cpp
)
ENDIF()

# FIXME_NVHPC
IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
Expand Down
2 changes: 0 additions & 2 deletions core/perf_test/PerfTest_ViewAllocate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ BENCHMARK(ViewAllocate_Rank8<Kokkos::LayoutRight>)
->Arg(N)
->UseManualTime();

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewAllocate_Raw<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
Expand All @@ -227,6 +226,5 @@ BENCHMARK(ViewAllocate_Raw<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();
#endif

} // namespace Test
2 changes: 0 additions & 2 deletions core/perf_test/PerfTest_ViewCopy_Raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

namespace Test {

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewDeepCopy_Raw<Kokkos::LayoutLeft, Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(10)
Expand All @@ -38,6 +37,5 @@ BENCHMARK(ViewDeepCopy_Raw<Kokkos::LayoutRight, Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(10)
->UseManualTime();
#endif

} // namespace Test
2 changes: 0 additions & 2 deletions core/perf_test/PerfTest_ViewFill_Raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

namespace Test {

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewFill_Raw<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
Expand All @@ -28,6 +27,5 @@ BENCHMARK(ViewFill_Raw<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();
#endif

} // namespace Test
2 changes: 0 additions & 2 deletions core/perf_test/PerfTest_ViewResize_Raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

namespace Test {

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewResize_NoInit_Raw<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
Expand All @@ -30,6 +29,5 @@ BENCHMARK(ViewResize_NoInit_Raw<Kokkos::LayoutRight>)
->Arg(N)
->UseManualTime()
->Iterations(R);
#endif

} // namespace Test
2 changes: 2 additions & 0 deletions core/src/Kokkos_Macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,8 @@ static constexpr bool kokkos_omp_on_host() { return false; }

#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_DEPRECATED_CODE_4)
#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
// This was previously defined from the configuration option which was removed
#define KOKKOS_ENABLE_CUDA_LAMBDA
#endif

#define KOKKOS_INVALID_INDEX (~std::size_t(0))
Expand Down
6 changes: 0 additions & 6 deletions core/src/setup/Kokkos_Setup_Cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,9 @@
#error "Cuda device capability >= 3.0 is required."
#endif

#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
#define KOKKOS_LAMBDA [=] __host__ __device__

#define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__

#else // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
#undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
#endif // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)

#define KOKKOS_IMPL_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
#define KOKKOS_IMPL_FORCEINLINE __forceinline__
#define KOKKOS_IMPL_INLINE_FUNCTION __device__ __host__ inline
Expand Down
6 changes: 0 additions & 6 deletions core/unit_test/TestCompilerMacros.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,9 @@
#error "Only one host compiler macro can be defined"
#endif

#if defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#error "Macro bug: KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA shouldn't be defined"
#endif
#else
#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#error "Macro bug: KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA should be defined"
#endif
#endif

namespace TestCompilerMacros {

Expand Down
2 changes: 0 additions & 2 deletions core/unit_test/TestMDRangeReduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ TEST(TEST_CATEGORY, mdrange_parallel_reduce_primitive_types) {
#if defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "FIXME OPENMPTARGET Tests of MDRange reduce over values "
"smaller than int would fail";
#elif defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
GTEST_SKIP() << "Skipped ENABLE_CUDA_LAMBDA";
#else
for (int bound : {0, 1, 7, 32, 65, 7000}) {
for (int k = 0; k < bound; ++k) {
Expand Down
6 changes: 0 additions & 6 deletions core/unit_test/TestTeamMDRange.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,6 @@ struct TestTeamMDParallelFor {
}
};

// If KOKKOS_ENABLE_CUDA_LAMBDA is off, extended lambdas used in parallel_for
// and parallel_reduce in these tests will not compile correctly
#if !defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA)

template <typename ExecSpace>
struct TestTeamThreadMDRangeParallelFor : public TestTeamMDParallelFor {
using TeamType = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
Expand Down Expand Up @@ -1963,7 +1959,5 @@ TEST(TEST_CATEGORY, TeamVectorMDRangeParallelReduce) {
test_parallel_reduce_for_8D_TeamVectorMDRange<Right>(smallDims);
}

#endif

} // namespace TeamMDRange
} // namespace Test
Loading

0 comments on commit 945281a

Please sign in to comment.