forked from LizardByte/Sunshine
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Provide SSSE3, AVX2, and AVX512 optimized Reed-Solomon functions (Liz…
…ardByte#2828) * Provide SSSE3, AVX2, and AVX512 optimized Reed-Solomon functions * Update nanors to fix AVX-512 memory corruption
- Loading branch information
Showing
8 changed files
with
236 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
/** | ||
* @file src/rswrapper.c | ||
* @brief Wrappers for nanors vectorization with different ISA options | ||
*/ | ||
|
||
// _FORTIY_SOURCE can cause some versions of GCC to try to inline | ||
// memset() with incompatible target options when compiling rs.c | ||
#ifdef _FORTIFY_SOURCE | ||
#undef _FORTIFY_SOURCE | ||
#endif | ||
|
||
// The assert() function is decorated with __cold on macOS which | ||
// is incompatible with Clang's target multiversioning feature | ||
#ifndef NDEBUG | ||
#define NDEBUG | ||
#endif | ||
|
||
#define DECORATE_FUNC_I(a, b) a##b | ||
#define DECORATE_FUNC(a, b) DECORATE_FUNC_I(a, b) | ||
|
||
// Append an ISA suffix to the public RS API | ||
#define reed_solomon_init DECORATE_FUNC(reed_solomon_init, ISA_SUFFIX) | ||
#define reed_solomon_new DECORATE_FUNC(reed_solomon_new, ISA_SUFFIX) | ||
#define reed_solomon_new_static DECORATE_FUNC(reed_solomon_new_static, ISA_SUFFIX) | ||
#define reed_solomon_release DECORATE_FUNC(reed_solomon_release, ISA_SUFFIX) | ||
#define reed_solomon_decode DECORATE_FUNC(reed_solomon_decode, ISA_SUFFIX) | ||
#define reed_solomon_encode DECORATE_FUNC(reed_solomon_encode, ISA_SUFFIX) | ||
|
||
// Append an ISA suffix to internal functions to prevent multiple definition errors | ||
#define obl_axpy_ref DECORATE_FUNC(obl_axpy_ref, ISA_SUFFIX) | ||
#define obl_scal_ref DECORATE_FUNC(obl_scal_ref, ISA_SUFFIX) | ||
#define obl_axpyb32_ref DECORATE_FUNC(obl_axpyb32_ref, ISA_SUFFIX) | ||
#define obl_axpy DECORATE_FUNC(obl_axpy, ISA_SUFFIX) | ||
#define obl_scal DECORATE_FUNC(obl_scal, ISA_SUFFIX) | ||
#define obl_swap DECORATE_FUNC(obl_swap, ISA_SUFFIX) | ||
#define obl_axpyb32 DECORATE_FUNC(obl_axpyb32, ISA_SUFFIX) | ||
#define axpy DECORATE_FUNC(axpy, ISA_SUFFIX) | ||
#define scal DECORATE_FUNC(scal, ISA_SUFFIX) | ||
#define gemm DECORATE_FUNC(gemm, ISA_SUFFIX) | ||
#define invert_mat DECORATE_FUNC(invert_mat, ISA_SUFFIX) | ||
|
||
#if defined(__x86_64__) || defined(__i386__) | ||
|
||
// Compile a variant for SSSE3 | ||
#if defined(__clang__) | ||
#pragma clang attribute push(__attribute__((target("ssse3"))), apply_to = function) | ||
#else | ||
#pragma GCC push_options | ||
#pragma GCC target("ssse3") | ||
#endif | ||
#define ISA_SUFFIX _ssse3 | ||
#define OBLAS_SSE3 | ||
#include "../third-party/nanors/rs.c" | ||
#undef OBLAS_SSE3 | ||
#undef ISA_SUFFIX | ||
#if defined(__clang__) | ||
#pragma clang attribute pop | ||
#else | ||
#pragma GCC pop_options | ||
#endif | ||
|
||
// Compile a variant for AVX2 | ||
#if defined(__clang__) | ||
#pragma clang attribute push(__attribute__((target("avx2"))), apply_to = function) | ||
#else | ||
#pragma GCC push_options | ||
#pragma GCC target("avx2") | ||
#endif | ||
#define ISA_SUFFIX _avx2 | ||
#define OBLAS_AVX2 | ||
#include "../third-party/nanors/rs.c" | ||
#undef OBLAS_AVX2 | ||
#undef ISA_SUFFIX | ||
#if defined(__clang__) | ||
#pragma clang attribute pop | ||
#else | ||
#pragma GCC pop_options | ||
#endif | ||
|
||
// Compile a variant for AVX512BW | ||
#if defined(__clang__) | ||
#pragma clang attribute push(__attribute__((target("avx512f,avx512bw"))), apply_to = function) | ||
#else | ||
#pragma GCC push_options | ||
#pragma GCC target("avx512f,avx512bw") | ||
#endif | ||
#define ISA_SUFFIX _avx512 | ||
#define OBLAS_AVX512 | ||
#include "../third-party/nanors/rs.c" | ||
#undef OBLAS_AVX512 | ||
#undef ISA_SUFFIX | ||
#if defined(__clang__) | ||
#pragma clang attribute pop | ||
#else | ||
#pragma GCC pop_options | ||
#endif | ||
|
||
#endif | ||
|
||
// Compile a default variant | ||
#define ISA_SUFFIX _def | ||
#include "../third-party/nanors/deps/obl/autoshim.h" | ||
#include "../third-party/nanors/rs.c" | ||
#undef ISA_SUFFIX | ||
|
||
#undef reed_solomon_init | ||
#undef reed_solomon_new | ||
#undef reed_solomon_new_static | ||
#undef reed_solomon_release | ||
#undef reed_solomon_decode | ||
#undef reed_solomon_encode | ||
|
||
#include "rswrapper.h" | ||
|
||
reed_solomon_new_t reed_solomon_new_fn; | ||
reed_solomon_release_t reed_solomon_release_fn; | ||
reed_solomon_encode_t reed_solomon_encode_fn; | ||
reed_solomon_decode_t reed_solomon_decode_fn; | ||
|
||
/** | ||
* @brief This initializes the RS function pointers to the best vectorized version available. | ||
* @details The streaming code will directly invoke these function pointers during encoding. | ||
*/ | ||
void | ||
reed_solomon_init(void) { | ||
#if defined(__x86_64__) || defined(__i386__) | ||
if (__builtin_cpu_supports("avx512f") && __builtin_cpu_supports("avx512bw")) { | ||
reed_solomon_new_fn = reed_solomon_new_avx512; | ||
reed_solomon_release_fn = reed_solomon_release_avx512; | ||
reed_solomon_encode_fn = reed_solomon_encode_avx512; | ||
reed_solomon_decode_fn = reed_solomon_decode_avx512; | ||
reed_solomon_init_avx512(); | ||
} | ||
else if (__builtin_cpu_supports("avx2")) { | ||
reed_solomon_new_fn = reed_solomon_new_avx2; | ||
reed_solomon_release_fn = reed_solomon_release_avx2; | ||
reed_solomon_encode_fn = reed_solomon_encode_avx2; | ||
reed_solomon_decode_fn = reed_solomon_decode_avx2; | ||
reed_solomon_init_avx2(); | ||
} | ||
else if (__builtin_cpu_supports("ssse3")) { | ||
reed_solomon_new_fn = reed_solomon_new_ssse3; | ||
reed_solomon_release_fn = reed_solomon_release_ssse3; | ||
reed_solomon_encode_fn = reed_solomon_encode_ssse3; | ||
reed_solomon_decode_fn = reed_solomon_decode_ssse3; | ||
reed_solomon_init_ssse3(); | ||
} | ||
else | ||
#endif | ||
{ | ||
reed_solomon_new_fn = reed_solomon_new_def; | ||
reed_solomon_release_fn = reed_solomon_release_def; | ||
reed_solomon_encode_fn = reed_solomon_encode_def; | ||
reed_solomon_decode_fn = reed_solomon_decode_def; | ||
reed_solomon_init_def(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/** | ||
* @file src/rswrapper.h | ||
* @brief Wrappers for nanors vectorization | ||
* @details This is a drop-in replacement for nanors rs.h | ||
*/ | ||
#pragma once | ||
|
||
#include <stdint.h> | ||
|
||
typedef struct _reed_solomon reed_solomon; | ||
|
||
typedef reed_solomon *(*reed_solomon_new_t)(int data_shards, int parity_shards); | ||
typedef void (*reed_solomon_release_t)(reed_solomon *rs); | ||
typedef int (*reed_solomon_encode_t)(reed_solomon *rs, uint8_t **shards, int nr_shards, int bs); | ||
typedef int (*reed_solomon_decode_t)(reed_solomon *rs, uint8_t **shards, uint8_t *marks, int nr_shards, int bs); | ||
|
||
extern reed_solomon_new_t reed_solomon_new_fn; | ||
extern reed_solomon_release_t reed_solomon_release_fn; | ||
extern reed_solomon_encode_t reed_solomon_encode_fn; | ||
extern reed_solomon_decode_t reed_solomon_decode_fn; | ||
|
||
#define reed_solomon_new reed_solomon_new_fn | ||
#define reed_solomon_release reed_solomon_release_fn | ||
#define reed_solomon_encode reed_solomon_encode_fn | ||
#define reed_solomon_decode reed_solomon_decode_fn | ||
|
||
/** | ||
* @brief This initializes the RS function pointers to the best vectorized version available. | ||
* @details The streaming code will directly invoke these function pointers during encoding. | ||
*/ | ||
void | ||
reed_solomon_init(void); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/** | ||
* @file tests/unit/test_rswrapper.cpp | ||
* @brief Test src/rswrapper.* | ||
*/ | ||
|
||
extern "C" { | ||
#include <src/rswrapper.h> | ||
} | ||
|
||
#include <tests/conftest.cpp> | ||
|
||
TEST(ReedSolomonWrapperTests, InitTest) { | ||
reed_solomon_init(); | ||
|
||
// Ensure all function pointers were populated | ||
ASSERT_NE(reed_solomon_new, nullptr); | ||
ASSERT_NE(reed_solomon_release, nullptr); | ||
ASSERT_NE(reed_solomon_encode, nullptr); | ||
ASSERT_NE(reed_solomon_decode, nullptr); | ||
} | ||
|
||
TEST(ReedSolomonWrapperTests, EncodeTest) { | ||
reed_solomon_init(); | ||
|
||
auto rs = reed_solomon_new(1, 1); | ||
ASSERT_NE(rs, nullptr); | ||
|
||
uint8_t dataShard[16] = {}; | ||
uint8_t fecShard[16] = {}; | ||
|
||
// If we picked the incorrect ISA in our wrapper, we should crash here | ||
uint8_t *shardPtrs[2] = { dataShard, fecShard }; | ||
auto ret = reed_solomon_encode(rs, shardPtrs, 2, sizeof(dataShard)); | ||
ASSERT_EQ(ret, 0); | ||
|
||
reed_solomon_release(rs); | ||
} |