Skip to content

Commit

Permalink
Provide SSSE3, AVX2, and AVX512 optimized Reed-Solomon functions
Browse files Browse the repository at this point in the history
  • Loading branch information
cgutman committed Jul 10, 2024
1 parent 29410c6 commit 66aa3a6
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 7 deletions.
4 changes: 2 additions & 2 deletions cmake/compile_definitions/common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ configure_file("${CMAKE_SOURCE_DIR}/src/version.h.in" version.h @ONLY)
include_directories("${CMAKE_CURRENT_BINARY_DIR}") # required for importing version.h

set(SUNSHINE_TARGET_FILES
"${CMAKE_SOURCE_DIR}/third-party/nanors/rs.c"
"${CMAKE_SOURCE_DIR}/third-party/nanors/rs.h"
"${CMAKE_SOURCE_DIR}/third-party/moonlight-common-c/src/Input.h"
"${CMAKE_SOURCE_DIR}/third-party/moonlight-common-c/src/Rtsp.h"
"${CMAKE_SOURCE_DIR}/third-party/moonlight-common-c/src/RtspParser.c"
Expand Down Expand Up @@ -108,6 +106,8 @@ set(SUNSHINE_TARGET_FILES
"${CMAKE_SOURCE_DIR}/src/round_robin.h"
"${CMAKE_SOURCE_DIR}/src/stat_trackers.h"
"${CMAKE_SOURCE_DIR}/src/stat_trackers.cpp"
"${CMAKE_SOURCE_DIR}/src/rswrapper.h"
"${CMAKE_SOURCE_DIR}/src/rswrapper.c"
${PLATFORM_TARGET_FILES})

if(NOT SUNSHINE_ASSETS_DIR_DEF)
Expand Down
4 changes: 2 additions & 2 deletions cmake/targets/common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ set_source_files_properties("${CMAKE_SOURCE_DIR}/src/upnp.cpp"
PROPERTIES COMPILE_FLAGS -Wno-pedantic)

# third-party/nanors
set_source_files_properties("${CMAKE_SOURCE_DIR}/third-party/nanors/rs.c"
set_source_files_properties("${CMAKE_SOURCE_DIR}/src/rswrapper.c"
DIRECTORY "${CMAKE_SOURCE_DIR}" "${TEST_DIR}"
PROPERTIES COMPILE_FLAGS "-include deps/obl/autoshim.h -ftree-vectorize")
PROPERTIES COMPILE_FLAGS "-ftree-vectorize -funroll-loops")

# third-party/ViGEmClient
set(VIGEM_COMPILE_FLAGS "")
Expand Down
121 changes: 121 additions & 0 deletions src/rswrapper.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/**
* @file src/rswrapper.c
* @brief Wrappers for nanors vectorization with different ISA options
*/

#define DECORATE_FUNC_I(a, b) a##b
#define DECORATE_FUNC(a, b) DECORATE_FUNC_I(a, b)

// Append an ISA suffix to the public RS API
#define reed_solomon_init DECORATE_FUNC(reed_solomon_init, ISA_SUFFIX)
#define reed_solomon_new DECORATE_FUNC(reed_solomon_new, ISA_SUFFIX)
#define reed_solomon_new_static DECORATE_FUNC(reed_solomon_new_static, ISA_SUFFIX)
#define reed_solomon_release DECORATE_FUNC(reed_solomon_release, ISA_SUFFIX)
#define reed_solomon_decode DECORATE_FUNC(reed_solomon_decode, ISA_SUFFIX)
#define reed_solomon_encode DECORATE_FUNC(reed_solomon_encode, ISA_SUFFIX)

// Append an ISA suffix to internal functions to prevent multiple definition errors
#define obl_axpy_ref DECORATE_FUNC(obl_axpy_ref, ISA_SUFFIX)
#define obl_scal_ref DECORATE_FUNC(obl_scal_ref, ISA_SUFFIX)
#define obl_axpyb32_ref DECORATE_FUNC(obl_axpyb32_ref, ISA_SUFFIX)
#define obl_axpy DECORATE_FUNC(obl_axpy, ISA_SUFFIX)
#define obl_scal DECORATE_FUNC(obl_scal, ISA_SUFFIX)
#define obl_swap DECORATE_FUNC(obl_swap, ISA_SUFFIX)
#define obl_axpyb32 DECORATE_FUNC(obl_axpyb32, ISA_SUFFIX)
#define axpy DECORATE_FUNC(axpy, ISA_SUFFIX)
#define scal DECORATE_FUNC(scal, ISA_SUFFIX)
#define gemm DECORATE_FUNC(gemm, ISA_SUFFIX)
#define invert_mat DECORATE_FUNC(invert_mat, ISA_SUFFIX)

#if defined(__x86_64__) || defined(__i386__)

// Compile a variant for SSSE3
#pragma GCC push_options
#pragma GCC target("ssse3")
#define ISA_SUFFIX _ssse3
#define OBLAS_SSE3
#include "../third-party/nanors/rs.c"
#undef OBLAS_SSE3
#undef ISA_SUFFIX
#pragma GCC pop_options

// Compile a variant for AVX2
#pragma GCC push_options
#pragma GCC target("avx2")
#define ISA_SUFFIX _avx2
#define OBLAS_AVX2
#include "../third-party/nanors/rs.c"
#undef OBLAS_AVX2
#undef ISA_SUFFIX
#pragma GCC pop_options

// Compile a variant for AVX512BW
#pragma GCC push_options
#pragma GCC target("avx512f,avx512bw")
#define ISA_SUFFIX _avx512
#define OBLAS_AVX512
#include "../third-party/nanors/rs.c"
#undef OBLAS_AVX512
#undef ISA_SUFFIX
#pragma GCC pop_options

#endif

// Compile a default variant
#define ISA_SUFFIX _def
#include "../third-party/nanors/deps/obl/autoshim.h"
#include "../third-party/nanors/rs.c"
#undef ISA_SUFFIX

#undef reed_solomon_init
#undef reed_solomon_new
#undef reed_solomon_new_static
#undef reed_solomon_release
#undef reed_solomon_decode
#undef reed_solomon_encode

#include "rswrapper.h"

reed_solomon_new_t reed_solomon_new_fn;
reed_solomon_release_t reed_solomon_release_fn;
reed_solomon_encode_t reed_solomon_encode_fn;
reed_solomon_decode_t reed_solomon_decode_fn;

/**
* @brief This initializes the RS function pointers to the best vectorized version available.
* @details The streaming code will directly invoke these function pointers during encoding.
*/
void
reed_solomon_init(void) {

Check warning on line 89 in src/rswrapper.c

View check run for this annotation

Codecov / codecov/patch

src/rswrapper.c#L89

Added line #L89 was not covered by tests
#if defined(__x86_64__) || defined(__i386__)
if (__builtin_cpu_supports("avx512f") && __builtin_cpu_supports("avx512bw")) {
reed_solomon_new_fn = reed_solomon_new_avx512;
reed_solomon_release_fn = reed_solomon_release_avx512;
reed_solomon_encode_fn = reed_solomon_encode_avx512;
reed_solomon_decode_fn = reed_solomon_decode_avx512;
reed_solomon_init_avx512();

Check warning on line 96 in src/rswrapper.c

View check run for this annotation

Codecov / codecov/patch

src/rswrapper.c#L92-L96

Added lines #L92 - L96 were not covered by tests
}
else if (__builtin_cpu_supports("avx2")) {
reed_solomon_new_fn = reed_solomon_new_avx2;
reed_solomon_release_fn = reed_solomon_release_avx2;
reed_solomon_encode_fn = reed_solomon_encode_avx2;
reed_solomon_decode_fn = reed_solomon_decode_avx2;
reed_solomon_init_avx2();

Check warning on line 103 in src/rswrapper.c

View check run for this annotation

Codecov / codecov/patch

src/rswrapper.c#L99-L103

Added lines #L99 - L103 were not covered by tests
}
else if (__builtin_cpu_supports("ssse3")) {
reed_solomon_new_fn = reed_solomon_new_ssse3;
reed_solomon_release_fn = reed_solomon_release_ssse3;
reed_solomon_encode_fn = reed_solomon_encode_ssse3;
reed_solomon_decode_fn = reed_solomon_decode_ssse3;
reed_solomon_init_ssse3();

Check warning on line 110 in src/rswrapper.c

View check run for this annotation

Codecov / codecov/patch

src/rswrapper.c#L106-L110

Added lines #L106 - L110 were not covered by tests
}
else
#endif
{
reed_solomon_new_fn = reed_solomon_new_def;
reed_solomon_release_fn = reed_solomon_release_def;
reed_solomon_encode_fn = reed_solomon_encode_def;
reed_solomon_decode_fn = reed_solomon_decode_def;
reed_solomon_init_def();

Check warning on line 119 in src/rswrapper.c

View check run for this annotation

Codecov / codecov/patch

src/rswrapper.c#L115-L119

Added lines #L115 - L119 were not covered by tests
}
}
24 changes: 24 additions & 0 deletions src/rswrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* @file src/rswrapper.h
* @brief Wrappers for nanors vectorization
*/
#pragma once

#include <stdint.h>

typedef struct _reed_solomon reed_solomon;

typedef reed_solomon *(*reed_solomon_new_t)(int data_shards, int parity_shards);
typedef void (*reed_solomon_release_t)(reed_solomon *rs);
typedef int (*reed_solomon_encode_t)(reed_solomon *rs, uint8_t **shards, int nr_shards, int bs);
typedef int (*reed_solomon_decode_t)(reed_solomon *rs, uint8_t **shards, uint8_t *marks, int nr_shards, int bs);

extern reed_solomon_new_t reed_solomon_new_fn;
extern reed_solomon_release_t reed_solomon_release_fn;
extern reed_solomon_encode_t reed_solomon_encode_fn;
extern reed_solomon_decode_t reed_solomon_decode_fn;

#define reed_solomon_new reed_solomon_new_fn
#define reed_solomon_release reed_solomon_release_fn
#define reed_solomon_encode reed_solomon_encode_fn
#define reed_solomon_decode reed_solomon_decode_fn
7 changes: 4 additions & 3 deletions src/stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
#include <boost/endian/arithmetic.hpp>

extern "C" {
// clang-format off
#include <moonlight-common-c/src/Limelight-internal.h>
#include <rs.h>
#include "rswrapper.h"
// clang-format on
}

#include "config.h"
Expand Down Expand Up @@ -234,7 +236,6 @@ namespace stream {
}
constexpr std::size_t MAX_AUDIO_PACKET_SIZE = 1400;

using rh_t = util::safe_ptr<reed_solomon, reed_solomon_release>;
using video_packet_t = util::c_ptr<video_packet_raw_t>;
using audio_packet_t = util::c_ptr<audio_packet_raw_t>;
using audio_fec_packet_t = util::c_ptr<audio_fec_packet_raw_t>;
Expand Down Expand Up @@ -619,7 +620,7 @@ namespace stream {
}

namespace fec {
using rs_t = util::safe_ptr<reed_solomon, reed_solomon_release>;
using rs_t = util::safe_ptr<reed_solomon, [](reed_solomon *rs) { reed_solomon_release(rs); }>;

struct fec_t {
size_t data_shards;
Expand Down

0 comments on commit 66aa3a6

Please sign in to comment.