Skip to content

Commit

Permalink
Port ViewAllocate tests
Browse files Browse the repository at this point in the history
  • Loading branch information
cz4rs committed Feb 6, 2023
1 parent 66e53a9 commit 7c9f640
Show file tree
Hide file tree
Showing 2 changed files with 196 additions and 101 deletions.
13 changes: 2 additions & 11 deletions core/perf_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,6 @@ IF(KOKKOS_ENABLE_TESTS)
PerfTestHexGrad.cpp
PerfTest_CustomReduction.cpp
PerfTest_ExecSpacePartitioning.cpp
PerfTest_ViewAllocate.cpp
PerfTest_ViewFill_123.cpp
PerfTest_ViewFill_45.cpp
PerfTest_ViewFill_6.cpp
PerfTest_ViewFill_7.cpp
PerfTest_ViewFill_8.cpp
PerfTest_ViewResize_123.cpp
PerfTest_ViewResize_45.cpp
PerfTest_ViewResize_6.cpp
PerfTest_ViewResize_7.cpp
PerfTest_ViewResize_8.cpp
)

IF(Kokkos_ENABLE_OPENMPTARGET)
Expand Down Expand Up @@ -179,6 +168,8 @@ ENDFUNCTION()
SET(
BENCHMARK_SOURCES
BenchmarkMain.cpp
Benchmark_Context.cpp
PerfTest_ViewAllocate.cpp
PerfTest_ViewCopy_a123.cpp
PerfTest_ViewCopy_b123.cpp
PerfTest_ViewCopy_c123.cpp
Expand Down
284 changes: 194 additions & 90 deletions core/perf_test/PerfTest_ViewAllocate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,119 +15,223 @@
//@HEADER

#include <Kokkos_Core.hpp>
#include <gtest/gtest.h>
#include <cstdio>
#include <PerfTest_Category.hpp>
#include <benchmark/benchmark.h>
#include <Benchmark_Context.hpp>

namespace Test {

static constexpr int N = 10;

void report_results_allocate(benchmark::State& state, double time) {
state.SetIterationTime(time);
const auto N8 = std::pow(state.range(0), 8);
const auto size = 1.0 * N8 * 8 / 1024 / 1024;

state.counters["MB"] = benchmark::Counter(size, benchmark::Counter::kDefaults,
benchmark::Counter::OneK::kIs1024);
state.counters[KokkosBenchmark::benchmark_fom("GB/s")] =
benchmark::Counter(size / 1024 / time, benchmark::Counter::kDefaults,
benchmark::Counter::OneK::kIs1024);
}

template <class Layout>
void run_allocateview_tests(int N, int R) {
const int N1 = N;
const int N2 = N * N;
const int N3 = N2 * N;
const int N4 = N2 * N2;
const int N8 = N4 * N4;

double time1, time2, time3, time4, time5, time6, time7, time8,
time_raw = 100000.0;
{
static void ViewAllocate_Rank1(benchmark::State& state) {
const int N8 = std::pow(state.range(0), 8);

for (auto _ : state) {
Kokkos::Timer timer;
for (int r = 0; r < R; r++) {
Kokkos::View<double*, Layout> a("A1", N8);
}
time1 = timer.seconds() / R;
Kokkos::View<double*, Layout> a("A1", N8);
report_results_allocate(state, timer.seconds());
}
{
}

template <class Layout>
static void ViewAllocate_Rank2(benchmark::State& state) {
const int N4 = std::pow(state.range(0), 4);

for (auto _ : state) {
Kokkos::Timer timer;
for (int r = 0; r < R; r++) {
Kokkos::View<double**, Layout> a("A2", N4, N4);
}
time2 = timer.seconds() / R;
Kokkos::View<double**, Layout> a("A2", N4, N4);
report_results_allocate(state, timer.seconds());
}
{
}

template <class Layout>
static void ViewAllocate_Rank3(benchmark::State& state) {
const int N2 = std::pow(state.range(0), 2);
const int N3 = std::pow(state.range(0), 3);

for (auto _ : state) {
Kokkos::Timer timer;
for (int r = 0; r < R; r++) {
Kokkos::View<double***, Layout> a("A3", N3, N3, N2);
}
time3 = timer.seconds() / R;
Kokkos::View<double***, Layout> a("A3", N3, N3, N2);
report_results_allocate(state, timer.seconds());
}
{
}

template <class Layout>
static void ViewAllocate_Rank4(benchmark::State& state) {
const int N2 = std::pow(state.range(0), 2);

for (auto _ : state) {
Kokkos::Timer timer;
for (int r = 0; r < R; r++) {
Kokkos::View<double****, Layout> a("A4", N2, N2, N2, N2);
}
time4 = timer.seconds() / R;
Kokkos::View<double****, Layout> a("A4", N2, N2, N2, N2);
report_results_allocate(state, timer.seconds());
}
{
}

template <class Layout>
static void ViewAllocate_Rank5(benchmark::State& state) {
const int N1 = state.range(0);
const int N2 = N1 * N1;

for (auto _ : state) {
Kokkos::Timer timer;
for (int r = 0; r < R; r++) {
Kokkos::View<double*****, Layout> a("A5", N2, N2, N1, N1, N2);
}
time5 = timer.seconds() / R;
Kokkos::View<double*****, Layout> a("A5", N2, N2, N1, N1, N2);
report_results_allocate(state, timer.seconds());
}
{
}

template <class Layout>
static void ViewAllocate_Rank6(benchmark::State& state) {
const int N1 = state.range(0);
const int N2 = N1 * N1;

for (auto _ : state) {
Kokkos::Timer timer;
for (int r = 0; r < R; r++) {
Kokkos::View<double******, Layout> a("A6", N2, N1, N1, N1, N1, N2);
}
time6 = timer.seconds() / R;
Kokkos::View<double******, Layout> a("A6", N2, N1, N1, N1, N1, N2);
report_results_allocate(state, timer.seconds());
}
{
}

template <class Layout>
static void ViewAllocate_Rank7(benchmark::State& state) {
const int N1 = state.range(0);
const int N2 = N1 * N1;

for (auto _ : state) {
Kokkos::Timer timer;
for (int r = 0; r < R; r++) {
Kokkos::View<double*******, Layout> a("A7", N2, N1, N1, N1, N1, N1, N1);
}
time7 = timer.seconds() / R;
Kokkos::View<double*******, Layout> a("A7", N2, N1, N1, N1, N1, N1, N1);
report_results_allocate(state, timer.seconds());
}
{
}

template <class Layout>
static void ViewAllocate_Rank8(benchmark::State& state) {
const int N1 = state.range(0);

for (auto _ : state) {
Kokkos::Timer timer;
for (int r = 0; r < R; r++) {
Kokkos::View<double********, Layout> a("A8", N1, N1, N1, N1, N1, N1, N1,
N1);
}
time8 = timer.seconds() / R;
Kokkos::View<double********, Layout> a("A8", N1, N1, N1, N1, N1, N1, N1,
N1);
report_results_allocate(state, timer.seconds());
}
#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
{
}

template <class Layout>
static void ViewAllocate_Raw(benchmark::State& state) {
const int N8 = std::pow(state.range(0), 8);
for (auto _ : state) {
Kokkos::Timer timer;
for (int r = 0; r < R; r++) {
double* a_ptr =
static_cast<double*>(Kokkos::kokkos_malloc("A", sizeof(double) * N8));
Kokkos::parallel_for(
N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 0.0; });
Kokkos::fence();
Kokkos::kokkos_free(a_ptr);
}
time_raw = timer.seconds() / R;
double* a_ptr =
static_cast<double*>(Kokkos::kokkos_malloc("A", sizeof(double) * N8));
Kokkos::parallel_for(
N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 0.0; });
Kokkos::fence();
Kokkos::kokkos_free(a_ptr);
report_results_allocate(state, timer.seconds());
}
#endif
double size = 1.0 * N8 * 8 / 1024 / 1024;
printf(" Raw: %lf s %lf MB %lf GB/s\n", time_raw, size,
size / 1024 / time_raw);
printf(" Rank1: %lf s %lf MB %lf GB/s\n", time1, size,
size / 1024 / time1);
printf(" Rank2: %lf s %lf MB %lf GB/s\n", time2, size,
size / 1024 / time2);
printf(" Rank3: %lf s %lf MB %lf GB/s\n", time3, size,
size / 1024 / time3);
printf(" Rank4: %lf s %lf MB %lf GB/s\n", time4, size,
size / 1024 / time4);
printf(" Rank5: %lf s %lf MB %lf GB/s\n", time5, size,
size / 1024 / time5);
printf(" Rank6: %lf s %lf MB %lf GB/s\n", time6, size,
size / 1024 / time6);
printf(" Rank7: %lf s %lf MB %lf GB/s\n", time7, size,
size / 1024 / time7);
printf(" Rank8: %lf s %lf MB %lf GB/s\n", time8, size,
size / 1024 / time8);
}

TEST(default_exec, ViewCreate) {
printf("Create View Performance for LayoutLeft:\n");
run_allocateview_tests<Kokkos::LayoutLeft>(10, 1);
printf("Create View Performance for LayoutRight:\n");
run_allocateview_tests<Kokkos::LayoutRight>(10, 1);
}
BENCHMARK(ViewAllocate_Rank1<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank1<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank2<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank2<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank3<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank3<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank4<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank4<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank5<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank5<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank6<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank6<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank7<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank7<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank8<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Rank8<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewAllocate_Raw<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
->UseManualTime();

BENCHMARK(ViewAllocate_Raw<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();
#endif

} // namespace Test

0 comments on commit 7c9f640

Please sign in to comment.