Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve sha256 performance on ppc64 by 4.5x #394

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ endif()
option(DISABLE_ASM "Disable ASM" OFF)
option(DISABLE_SSSE3 "Disable SSSE3" OFF)
option(DISABLE_AESNI "Disable AES-NI" OFF)
option(DISABLE_ALTIVEC "Disable PPC64's Altivec registers for SHA-2" OFF)
option(DISABLE_NATIVE_ARCH "Disable the addition of -march=native" OFF)
set(CRYPTOPP_DATA_DIR "" CACHE PATH "Crypto++ test data directory")

Expand Down Expand Up @@ -97,6 +98,9 @@ endif()
if(DISABLE_AESNI)
add_definitions(-DCRYPTOPP_DISABLE_AESNI)
endif()
if(DISABLE_ALTIVEC)
add_definitions(-DCRYPTOPP_DISABLE_ALTIVEC)
endif()
if(NOT CRYPTOPP_DATA_DIR STREQUAL "")
add_definitions(-DCRYPTOPP_DATA_DIR="${CRYPTOPP_DATA_DIR}")
endif()
Expand Down Expand Up @@ -242,6 +246,15 @@ if(MSVC AND NOT DISABLE_ASM)
endif()
endif()

if (${UNAME_MACHINE} MATCHES "ppc64le" AND NOT ${DISABLE_ALTIVEC})
message(STATUS "Enabling SHA-2 acceleration for ppc64le.")
set(cryptopp_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/sha256_compress_ppc.s
${cryptopp_SOURCES}
)
enable_language(ASM)
endif()

#============================================================================
# Compile targets
#============================================================================
Expand Down
18 changes: 18 additions & 0 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ IS_X32 ?= 0
# Set to 1 if you used NASM to build rdrand-{x86|x32|x64}
USE_NASM ?= 0

# Set to 0 if you don't want acceleration on SHA on PPC64le
USE_ALTIVEC ?= $(IS_PPC)

# Fixup for X32
ifeq ($(IS_X32),1)
IS_X86 = 0
Expand Down Expand Up @@ -480,6 +483,10 @@ OBJS += rdrand-x86.o
endif
endif # Nasm

ifeq ($(USE_ALTIVEC),1)
OBJS += sha256_compress_ppc.o
endif

# List test.cpp first to tame C++ static initialization problems.
TESTSRCS := adhoc.cpp test.cpp bench1.cpp bench2.cpp validat0.cpp validat1.cpp validat2.cpp validat3.cpp datatest.cpp regtest.cpp fipsalgt.cpp dlltest.cpp
TESTOBJS := $(TESTSRCS:.cpp=.o)
Expand Down Expand Up @@ -774,6 +781,17 @@ rdrand-x32.o: ;
rdrand-x64.o: ;
endif

ifeq ($(IS_PPC),1)
ifeq ($(USE_ALTIVEC),1)
sha256_compress_ppc.o: sha256_compress_ppc.s
$(CXX) $(strip $(CXXFLAGS)) -c $<
else
# PPC without altivec. Correctly fallback to C implementation
sha.o : sha.cpp
$(CXX) $(strip $(CXXFLAGS)) -c $< -DCRYPTOPP_DISABLE_ALTIVEC
endif
endif

# Only use CRYPTOPP_DATA_DIR if its not set in CXXFLAGS
ifeq ($(findstring -DCRYPTOPP_DATA_DIR, $(strip $(CXXFLAGS))),)
ifneq ($(strip $(CRYPTOPP_DATA_DIR)),)
Expand Down
13 changes: 13 additions & 0 deletions config.h
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,12 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 0
#endif

#if !defined(CRYPTOPP_BOOL_ALTIVEC_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ALTIVEC)
# if defined(__powerpc64__) && defined(__POWER8_VECTOR__) && __POWER8_VECTOR__ == 1
# define CRYPTOPP_BOOL_ALTIVEC_AVAILABLE 1
# endif
#endif

// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains.
#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM)
Expand Down Expand Up @@ -597,6 +603,13 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_ARM64 0
#endif

// Using a SIMD altivec approach on machines that supports so
#if defined(__powerpc64__) && defined(__POWER8_VECTOR__) && __POWER8_VECTOR__ == 1
#define CRYPTOPP_BOOL_PPC64LE 1
#else
#define CRYPTOPP_BOOL_PPC64LE 0
#endif

#if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS)
#if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || defined(__powerpc__) || (__ARM_FEATURE_UNALIGNED >= 1))
#define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
Expand Down
31 changes: 30 additions & 1 deletion sha.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
# undef CRYPTOPP_X32_ASM_AVAILABLE
# undef CRYPTOPP_X64_ASM_AVAILABLE
# undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
# undef CRYPTOPP_BOOL_ALTIVEC_AVAILABLE
#endif

NAMESPACE_BEGIN(CryptoPP)
Expand Down Expand Up @@ -518,7 +519,7 @@ void SHA256::InitState(HashWordType *state)
memcpy(state, s, sizeof(s));
}

#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_ALTIVEC_AVAILABLE
CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = {
#else
extern const word32 SHA256_K[64] = {
Expand Down Expand Up @@ -878,6 +879,8 @@ void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data,
static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
#elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
#elif CRYPTOPP_BOOL_ALTIVEC_AVAILABLE
static void CRYPTOPP_FASTCALL SHA256_Altivec_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
#endif

#if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM)
Expand All @@ -895,6 +898,10 @@ pfnSHAHashBlocks InitializeSHA256HashBlocks()
else
#endif

#if CRYPTOPP_BOOL_ALTIVEC_AVAILABLE
return &SHA256_Altivec_SHA_HashBlocks;
#endif

return &X86_SHA256_HashBlocks;
}

Expand Down Expand Up @@ -1065,6 +1072,13 @@ static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data)
}
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE

#if CRYPTOPP_BOOL_ALTIVEC_AVAILABLE
static void SHA256_Altivec_SHA_Transform(word32 *state, const word32 *data)
{
return SHA256_Altivec_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE);
}
#endif // CRYPTOPP_BOOL_ALTIVEC_AVAILABLE

///////////////////////////////////
// start of Walton/Gulley's code //
///////////////////////////////////
Expand Down Expand Up @@ -1438,6 +1452,18 @@ static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const wor
// end of Walton/Schneiders/O'Rourke/Hovsmith's code //
///////////////////////////////////////////////////////

#if CRYPTOPP_BOOL_ALTIVEC_AVAILABLE

// Function to be found on sha256_compress_ppc.s
extern "C" void sha256_compress_ppc(word32 *STATE, const word32 *input, const uint32_t *k);

static void CRYPTOPP_FASTCALL SHA256_Altivec_SHA_HashBlocks(word32 *state, const word32 *data, size_t length)
{
for (size_t i = 0; i < length; i += SHA256::BLOCKSIZE)
sha256_compress_ppc(state, data + i, SHA256_K);
}
#endif

pfnSHATransform InitializeSHA256Transform()
{
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
Expand All @@ -1455,6 +1481,9 @@ pfnSHATransform InitializeSHA256Transform()
return &SHA256_ARM_SHA_Transform;
else
#endif
#if CRYPTOPP_BOOL_ALTIVEC_AVAILABLE
return &SHA256_Altivec_SHA_Transform;
#endif

return &SHA256_CXX_Transform;
}
Expand Down
Loading