Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide SSSE3, AVX2, and AVX512 optimized Reed-Solomon functions #2828

Merged
merged 2 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmake/compile_definitions/common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ configure_file("${CMAKE_SOURCE_DIR}/src/version.h.in" version.h @ONLY)
include_directories("${CMAKE_CURRENT_BINARY_DIR}") # required for importing version.h

set(SUNSHINE_TARGET_FILES
"${CMAKE_SOURCE_DIR}/third-party/nanors/rs.c"
"${CMAKE_SOURCE_DIR}/third-party/nanors/rs.h"
"${CMAKE_SOURCE_DIR}/third-party/moonlight-common-c/src/Input.h"
"${CMAKE_SOURCE_DIR}/third-party/moonlight-common-c/src/Rtsp.h"
"${CMAKE_SOURCE_DIR}/third-party/moonlight-common-c/src/RtspParser.c"
Expand Down Expand Up @@ -108,6 +106,8 @@ set(SUNSHINE_TARGET_FILES
"${CMAKE_SOURCE_DIR}/src/round_robin.h"
"${CMAKE_SOURCE_DIR}/src/stat_trackers.h"
"${CMAKE_SOURCE_DIR}/src/stat_trackers.cpp"
"${CMAKE_SOURCE_DIR}/src/rswrapper.h"
"${CMAKE_SOURCE_DIR}/src/rswrapper.c"
ReenigneArcher marked this conversation as resolved.
Show resolved Hide resolved
${PLATFORM_TARGET_FILES})

if(NOT SUNSHINE_ASSETS_DIR_DEF)
Expand Down
4 changes: 2 additions & 2 deletions cmake/targets/common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ set_source_files_properties("${CMAKE_SOURCE_DIR}/src/upnp.cpp"
PROPERTIES COMPILE_FLAGS -Wno-pedantic)

# third-party/nanors
set_source_files_properties("${CMAKE_SOURCE_DIR}/third-party/nanors/rs.c"
set_source_files_properties("${CMAKE_SOURCE_DIR}/src/rswrapper.c"
DIRECTORY "${CMAKE_SOURCE_DIR}" "${TEST_DIR}"
PROPERTIES COMPILE_FLAGS "-include deps/obl/autoshim.h -ftree-vectorize")
PROPERTIES COMPILE_FLAGS "-ftree-vectorize -funroll-loops")

# third-party/ViGEmClient
set(VIGEM_COMPILE_FLAGS "")
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include "video.h"

extern "C" {
#include <rs.h>
#include "rswrapper.h"
}

using namespace std::literals;
Expand Down
157 changes: 157 additions & 0 deletions src/rswrapper.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/**
* @file src/rswrapper.c
* @brief Wrappers for nanors vectorization with different ISA options
*/

// _FORTIY_SOURCE can cause some versions of GCC to try to inline
// memset() with incompatible target options when compiling rs.c
#ifdef _FORTIFY_SOURCE
#undef _FORTIFY_SOURCE
#endif

// The assert() function is decorated with __cold on macOS which
// is incompatible with Clang's target multiversioning feature
#ifndef NDEBUG
#define NDEBUG
#endif

#define DECORATE_FUNC_I(a, b) a##b
#define DECORATE_FUNC(a, b) DECORATE_FUNC_I(a, b)

// Append an ISA suffix to the public RS API
#define reed_solomon_init DECORATE_FUNC(reed_solomon_init, ISA_SUFFIX)
#define reed_solomon_new DECORATE_FUNC(reed_solomon_new, ISA_SUFFIX)
#define reed_solomon_new_static DECORATE_FUNC(reed_solomon_new_static, ISA_SUFFIX)
#define reed_solomon_release DECORATE_FUNC(reed_solomon_release, ISA_SUFFIX)
#define reed_solomon_decode DECORATE_FUNC(reed_solomon_decode, ISA_SUFFIX)
#define reed_solomon_encode DECORATE_FUNC(reed_solomon_encode, ISA_SUFFIX)

// Append an ISA suffix to internal functions to prevent multiple definition errors
#define obl_axpy_ref DECORATE_FUNC(obl_axpy_ref, ISA_SUFFIX)
#define obl_scal_ref DECORATE_FUNC(obl_scal_ref, ISA_SUFFIX)
#define obl_axpyb32_ref DECORATE_FUNC(obl_axpyb32_ref, ISA_SUFFIX)
#define obl_axpy DECORATE_FUNC(obl_axpy, ISA_SUFFIX)
#define obl_scal DECORATE_FUNC(obl_scal, ISA_SUFFIX)
#define obl_swap DECORATE_FUNC(obl_swap, ISA_SUFFIX)
#define obl_axpyb32 DECORATE_FUNC(obl_axpyb32, ISA_SUFFIX)
#define axpy DECORATE_FUNC(axpy, ISA_SUFFIX)
#define scal DECORATE_FUNC(scal, ISA_SUFFIX)
#define gemm DECORATE_FUNC(gemm, ISA_SUFFIX)
#define invert_mat DECORATE_FUNC(invert_mat, ISA_SUFFIX)

#if defined(__x86_64__) || defined(__i386__)

// Compile a variant for SSSE3
#if defined(__clang__)
#pragma clang attribute push(__attribute__((target("ssse3"))), apply_to = function)
#else
#pragma GCC push_options
#pragma GCC target("ssse3")
#endif
#define ISA_SUFFIX _ssse3
#define OBLAS_SSE3
#include "../third-party/nanors/rs.c"
#undef OBLAS_SSE3
#undef ISA_SUFFIX
#if defined(__clang__)
#pragma clang attribute pop
#else
#pragma GCC pop_options
#endif

// Compile a variant for AVX2
#if defined(__clang__)
#pragma clang attribute push(__attribute__((target("avx2"))), apply_to = function)
#else
#pragma GCC push_options
#pragma GCC target("avx2")
#endif
#define ISA_SUFFIX _avx2
#define OBLAS_AVX2
#include "../third-party/nanors/rs.c"
#undef OBLAS_AVX2
#undef ISA_SUFFIX
#if defined(__clang__)
#pragma clang attribute pop
#else
#pragma GCC pop_options
#endif

// Compile a variant for AVX512BW
#if defined(__clang__)
#pragma clang attribute push(__attribute__((target("avx512f,avx512bw"))), apply_to = function)
#else
#pragma GCC push_options
#pragma GCC target("avx512f,avx512bw")
#endif
#define ISA_SUFFIX _avx512
#define OBLAS_AVX512
#include "../third-party/nanors/rs.c"
#undef OBLAS_AVX512
#undef ISA_SUFFIX
#if defined(__clang__)
#pragma clang attribute pop
#else
#pragma GCC pop_options
#endif

#endif

// Compile a default variant
#define ISA_SUFFIX _def
#include "../third-party/nanors/deps/obl/autoshim.h"
#include "../third-party/nanors/rs.c"
#undef ISA_SUFFIX

#undef reed_solomon_init
#undef reed_solomon_new
#undef reed_solomon_new_static
#undef reed_solomon_release
#undef reed_solomon_decode
#undef reed_solomon_encode

#include "rswrapper.h"

reed_solomon_new_t reed_solomon_new_fn;
reed_solomon_release_t reed_solomon_release_fn;
reed_solomon_encode_t reed_solomon_encode_fn;
reed_solomon_decode_t reed_solomon_decode_fn;

/**
* @brief This initializes the RS function pointers to the best vectorized version available.
* @details The streaming code will directly invoke these function pointers during encoding.
*/
void
reed_solomon_init(void) {
#if defined(__x86_64__) || defined(__i386__)
if (__builtin_cpu_supports("avx512f") && __builtin_cpu_supports("avx512bw")) {
reed_solomon_new_fn = reed_solomon_new_avx512;
reed_solomon_release_fn = reed_solomon_release_avx512;
reed_solomon_encode_fn = reed_solomon_encode_avx512;
reed_solomon_decode_fn = reed_solomon_decode_avx512;
reed_solomon_init_avx512();

Check warning on line 132 in src/rswrapper.c

View check run for this annotation

Codecov / codecov/patch

src/rswrapper.c#L128-L132

Added lines #L128 - L132 were not covered by tests
}
else if (__builtin_cpu_supports("avx2")) {
reed_solomon_new_fn = reed_solomon_new_avx2;
reed_solomon_release_fn = reed_solomon_release_avx2;
reed_solomon_encode_fn = reed_solomon_encode_avx2;
reed_solomon_decode_fn = reed_solomon_decode_avx2;
reed_solomon_init_avx2();
}
else if (__builtin_cpu_supports("ssse3")) {
reed_solomon_new_fn = reed_solomon_new_ssse3;
reed_solomon_release_fn = reed_solomon_release_ssse3;
reed_solomon_encode_fn = reed_solomon_encode_ssse3;
reed_solomon_decode_fn = reed_solomon_decode_ssse3;
reed_solomon_init_ssse3();

Check warning on line 146 in src/rswrapper.c

View check run for this annotation

Codecov / codecov/patch

src/rswrapper.c#L142-L146

Added lines #L142 - L146 were not covered by tests
}
else
#endif
{
reed_solomon_new_fn = reed_solomon_new_def;
reed_solomon_release_fn = reed_solomon_release_def;
reed_solomon_encode_fn = reed_solomon_encode_def;
reed_solomon_decode_fn = reed_solomon_decode_def;
reed_solomon_init_def();
}
}
32 changes: 32 additions & 0 deletions src/rswrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* @file src/rswrapper.h
* @brief Wrappers for nanors vectorization
* @details This is a drop-in replacement for nanors rs.h
*/
#pragma once

#include <stdint.h>

typedef struct _reed_solomon reed_solomon;

typedef reed_solomon *(*reed_solomon_new_t)(int data_shards, int parity_shards);
typedef void (*reed_solomon_release_t)(reed_solomon *rs);
typedef int (*reed_solomon_encode_t)(reed_solomon *rs, uint8_t **shards, int nr_shards, int bs);
typedef int (*reed_solomon_decode_t)(reed_solomon *rs, uint8_t **shards, uint8_t *marks, int nr_shards, int bs);

extern reed_solomon_new_t reed_solomon_new_fn;
extern reed_solomon_release_t reed_solomon_release_fn;
extern reed_solomon_encode_t reed_solomon_encode_fn;
extern reed_solomon_decode_t reed_solomon_decode_fn;

#define reed_solomon_new reed_solomon_new_fn
#define reed_solomon_release reed_solomon_release_fn
#define reed_solomon_encode reed_solomon_encode_fn
#define reed_solomon_decode reed_solomon_decode_fn

/**
* @brief This initializes the RS function pointers to the best vectorized version available.
* @details The streaming code will directly invoke these function pointers during encoding.
*/
void
reed_solomon_init(void);
7 changes: 4 additions & 3 deletions src/stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
#include <boost/endian/arithmetic.hpp>

extern "C" {
// clang-format off
#include <moonlight-common-c/src/Limelight-internal.h>
#include <rs.h>
#include "rswrapper.h"
// clang-format on
}

#include "config.h"
Expand Down Expand Up @@ -236,7 +238,6 @@ namespace stream {
}
constexpr std::size_t MAX_AUDIO_PACKET_SIZE = 1400;

using rh_t = util::safe_ptr<reed_solomon, reed_solomon_release>;
using video_packet_t = util::c_ptr<video_packet_raw_t>;
using audio_packet_t = util::c_ptr<audio_packet_raw_t>;
using audio_fec_packet_t = util::c_ptr<audio_fec_packet_raw_t>;
Expand Down Expand Up @@ -621,7 +622,7 @@ namespace stream {
}

namespace fec {
using rs_t = util::safe_ptr<reed_solomon, reed_solomon_release>;
using rs_t = util::safe_ptr<reed_solomon, [](reed_solomon *rs) { reed_solomon_release(rs); }>;

struct fec_t {
size_t data_shards;
Expand Down
37 changes: 37 additions & 0 deletions tests/unit/test_rswrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/**
* @file tests/unit/test_rswrapper.cpp
* @brief Test src/rswrapper.*
*/

extern "C" {
#include <src/rswrapper.h>
}

#include <tests/conftest.cpp>

TEST(ReedSolomonWrapperTests, InitTest) {
reed_solomon_init();

// Ensure all function pointers were populated
ASSERT_NE(reed_solomon_new, nullptr);
ASSERT_NE(reed_solomon_release, nullptr);
ASSERT_NE(reed_solomon_encode, nullptr);
ASSERT_NE(reed_solomon_decode, nullptr);
}

TEST(ReedSolomonWrapperTests, EncodeTest) {
reed_solomon_init();

auto rs = reed_solomon_new(1, 1);
ASSERT_NE(rs, nullptr);

uint8_t dataShard[16] = {};
uint8_t fecShard[16] = {};

// If we picked the incorrect ISA in our wrapper, we should crash here
uint8_t *shardPtrs[2] = { dataShard, fecShard };
auto ret = reed_solomon_encode(rs, shardPtrs, 2, sizeof(dataShard));
ASSERT_EQ(ret, 0);

reed_solomon_release(rs);
}
2 changes: 1 addition & 1 deletion third-party/nanors
Submodule nanors updated 1 files
+1 −1 deps/obl/oblas_lite.c
Loading