Skip to content

Commit

Permalink
[TEST] Adds performance benchmarks for simd functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
rrahn committed Jul 15, 2019
1 parent b6ab638 commit 44d82c9
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 1 deletion.
3 changes: 2 additions & 1 deletion include/seqan3/core/simd/view_to_simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ class view_to_simd : public std::ranges::view_interface<view_to_simd<urng_t, sim
static constexpr bool fast_load = std::ranges::ContiguousRange<inner_range_t> &&
std::SizedSentinel<std::ranges::iterator_t<inner_range_t>,
std::ranges::sentinel_t<inner_range_t>> &&
sizeof(alphabet_rank_t<value_type_t<inner_range_t>>) == 1;
sizeof(alphabet_rank_t<value_type_t<inner_range_t>>) == 1 &&
sizeof(scalar_type) <= 2; // micro benchmark suggest using int8_t or int16_t has best performance.

//!\brief The size of one chunk. Equals the number of elements in the vector.
static constexpr int8_t chunk_size = simd_traits<simd_t>::length;
Expand Down
2 changes: 2 additions & 0 deletions test/performance/core/simd/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
seqan3_benchmark(simd_algorithm_benchmark.cpp)
seqan3_benchmark(view_to_simd_chunk_benchmark.cpp)
107 changes: 107 additions & 0 deletions test/performance/core/simd/simd_algorithm_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// -----------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
// -----------------------------------------------------------------------------------------------------

#include <array>
#include <cstdlib>

#include <benchmark/benchmark.h>

#include <seqan3/core/simd/simd_algorithm.hpp>
#include <seqan3/core/simd/simd_traits.hpp>
#include <seqan3/core/simd/simd.hpp>

using namespace seqan3;

// ----------------------------------------------------------------------------
// Helper functions
// ----------------------------------------------------------------------------

inline auto make_matrix()
{
using simd_t = simd_type_t<int8_t>;

std::array<simd_t, simd_traits<simd_t>::length> matrix;
for (size_t i = 0; i < matrix.size(); ++i)
for (size_t j = 0; j < matrix.size(); ++j)
matrix[i][j] = std::rand() % 10;

return matrix;
}

template <typename simd_t>
inline auto reduce(simd_t const & vec)
{
size_t sum = 0;
for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
sum += vec[i];

return sum;
}

// ----------------------------------------------------------------------------
// Benchhmark transpose
// ----------------------------------------------------------------------------

static void transpose(benchmark::State& state)
{
size_t sum = 0;

for (auto _ : state)
{
for (size_t i = 0; i < 100; ++i)
{
state.PauseTiming();
auto matrix = make_matrix();
state.ResumeTiming();

simd::transpose(matrix);

state.PauseTiming();
sum += reduce(matrix[std::rand() % matrix.size()]);
state.ResumeTiming();
}
}

state.counters["checksum"] = sum;
}

BENCHMARK(transpose);

template <typename source_t, typename target_t>
static void upcast(benchmark::State& state)
{
source_t src = simd::iota<source_t>(std::rand() % 100);
target_t target{};
size_t sum = 0;

for (auto _ : state)
{
for (size_t i = 0; i < 1'000; ++i)
{
target = simd::upcast<target_t>(src);

state.PauseTiming();
sum += reduce(target);
state.ResumeTiming();
}
}

state.counters["checksum"] = sum;
}

// ----------------------------------------------------------------------------
// Benchhmark upcast
// ----------------------------------------------------------------------------

BENCHMARK_TEMPLATE(upcast, simd_type_t<int8_t>, simd_type_t<int16_t>);
BENCHMARK_TEMPLATE(upcast, simd_type_t<int8_t>, simd_type_t<int32_t>);
BENCHMARK_TEMPLATE(upcast, simd_type_t<int8_t>, simd_type_t<int64_t>);
BENCHMARK_TEMPLATE(upcast, simd_type_t<int16_t>, simd_type_t<int32_t>);
BENCHMARK_TEMPLATE(upcast, simd_type_t<int16_t>, simd_type_t<int64_t>);
BENCHMARK_TEMPLATE(upcast, simd_type_t<int32_t>, simd_type_t<int64_t>);

BENCHMARK_MAIN();
120 changes: 120 additions & 0 deletions test/performance/core/simd/view_to_simd_chunk_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// -----------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
// -----------------------------------------------------------------------------------------------------

#include <deque>
#include <iterator>
#include <list>
#include <vector>

#include <benchmark/benchmark.h>

#include <seqan3/alphabet/concept.hpp>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/simd/concept.hpp>
#include <seqan3/core/simd/simd_traits.hpp>
#include <seqan3/core/simd/simd.hpp>
#include <seqan3/core/simd/view_to_simd.hpp>
#include <seqan3/range/container/aligned_allocator.hpp>
#include <seqan3/std/ranges>
#include <seqan3/test/performance/sequence_generator.hpp>

using namespace seqan3;

// ============================================================================
// naive implementation
// ============================================================================

template <typename container_t, typename simd_t>
void to_simd_naive(benchmark::State& state)
{
// Preparing the sequences
std::vector<container_t> data;
data.resize(simd_traits<simd_t>::length);

for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
std::ranges::copy(test::generate_sequence<dna4>(500, 10), std::back_inserter(data[i]));

std::vector<simd_t, aligned_allocator<simd_t, sizeof(simd_t)>> v;

size_t max_size = 0;
for (auto & seq : data)
max_size = std::max(max_size, seq.size());

v.resize(max_size);

size_t value = 0;
for (auto _ : state)
{
for (size_t i = 0; i < max_size; ++i)
for (size_t j = 0; j < simd_traits<simd_t>::length; ++j)
v[i][j] = (i < data[j].size()) ? seqan3::to_rank(data[j][i]) : 0;

for (simd_t & vec : v)
value += vec[0];
}

state.counters["value"] = value;
}

// runs with ContiguousRange
BENCHMARK_TEMPLATE(to_simd_naive, std::vector<dna4>, simd_type_t<int8_t>);
BENCHMARK_TEMPLATE(to_simd_naive, std::vector<dna4>, simd_type_t<int16_t>);
BENCHMARK_TEMPLATE(to_simd_naive, std::vector<dna4>, simd_type_t<int32_t>);
BENCHMARK_TEMPLATE(to_simd_naive, std::vector<dna4>, simd_type_t<int64_t>);

BENCHMARK_TEMPLATE(to_simd_naive, std::deque<dna4>, simd_type_t<int8_t>);
BENCHMARK_TEMPLATE(to_simd_naive, std::deque<dna4>, simd_type_t<int16_t>);
BENCHMARK_TEMPLATE(to_simd_naive, std::deque<dna4>, simd_type_t<int32_t>);
BENCHMARK_TEMPLATE(to_simd_naive, std::deque<dna4>, simd_type_t<int64_t>);

// ============================================================================
// view implementation
// ============================================================================

template <typename container_t, typename simd_t>
void to_simd(benchmark::State& state)
{
// Preparing the sequences
std::vector<container_t> data;
data.resize(simd_traits<simd_t>::length);

for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
std::ranges::copy(test::generate_sequence<dna4>(500, 10), std::back_inserter(data[i]));

size_t value = 0;
for (auto _ : state)
{
for (auto & chunk : data | view::to_simd<simd_t>)
for (simd_t const & vec : chunk)
value += vec[0];
}

state.counters["value"] = value;
}

// runs with ContiguousRange
BENCHMARK_TEMPLATE(to_simd, std::vector<dna4>, simd_type_t<int8_t>);
BENCHMARK_TEMPLATE(to_simd, std::vector<dna4>, simd_type_t<int16_t>);
BENCHMARK_TEMPLATE(to_simd, std::vector<dna4>, simd_type_t<int32_t>);
BENCHMARK_TEMPLATE(to_simd, std::vector<dna4>, simd_type_t<int64_t>);

BENCHMARK_TEMPLATE(to_simd, std::deque<dna4>, simd_type_t<int8_t>);
BENCHMARK_TEMPLATE(to_simd, std::deque<dna4>, simd_type_t<int16_t>);
BENCHMARK_TEMPLATE(to_simd, std::deque<dna4>, simd_type_t<int32_t>);
BENCHMARK_TEMPLATE(to_simd, std::deque<dna4>, simd_type_t<int64_t>);

// runs without ContiguousRange
BENCHMARK_TEMPLATE(to_simd, std::list<dna4>, simd_type_t<int8_t>);
BENCHMARK_TEMPLATE(to_simd, std::list<dna4>, simd_type_t<int16_t>);
BENCHMARK_TEMPLATE(to_simd, std::list<dna4>, simd_type_t<int32_t>);
BENCHMARK_TEMPLATE(to_simd, std::list<dna4>, simd_type_t<int64_t>);

// ============================================================================
// run
// ============================================================================

BENCHMARK_MAIN();

0 comments on commit 44d82c9

Please sign in to comment.