From 7cdc682ab0e6c1c5be857cd4b8b173bdbe71defd Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Tue, 16 Oct 2018 14:55:55 -0400 Subject: [PATCH] Don't install signal handler when checking cpuid As part of its CPU feature detection, CryptoPP installs a SIGILL signal handler before issuing the cpuid instruction. The intent is to gracefully degrade on CPUs that don't support the cpuid instruction. The problem is that it is impossible to safely overwrite a signal handler installed by the Go runtime in go1.10 on macOS (golang/go#22805). This causes CockroachDB 2.0 to crash on macOS Mojave: cockroachdb/cockroach#31380. The situation has improved on the Go front, as go1.11 makes it possible to safely save and restore signal handlers installed by the Go runtime on macOS. Still, we can do better and support go1.10. There is no need to bother installing a SIGILL handler, as the cpuid instruction is supported by every x86-64 CPU. We can instead use conditional compilation to make sure that we never execute a cpuid instruction on a non x86-64 CPU. Note that CPU feature detection is performed at executable load time (see the attribute(constructor) on DetectX86Features); therefore any reference to function which calls DetectX86Features (notably HasAESNI) corrupts the signal handler. It's not entirely clear why this corruption later leads to the SIGTRAP seen in cockroachdb/cockroach#31380--is something in macOS or the Go runtime generating a SIGILL and trying to handle it gracefully?--but regardless, not mucking with the signal handler fixes the issue. --- cpu.cpp | 1493 +++++++++++++++++++++++++++---------------------------- 1 file changed, 735 insertions(+), 758 deletions(-) diff --git a/cpu.cpp b/cpu.cpp index 42831c15e..9cd184d56 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -1,758 +1,735 @@ -// cpu.cpp - written and placed in the public domain by Wei Dai - -#include "pch.h" -#include "config.h" - -#ifndef EXCEPTION_EXECUTE_HANDLER -# define EXCEPTION_EXECUTE_HANDLER 1 -#endif - -#ifndef CRYPTOPP_IMPORTS - -#include "cpu.h" -#include "misc.h" -#include - -#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -#include -#include -#endif - -#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE -#include -#endif - -NAMESPACE_BEGIN(CryptoPP) - -#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -extern "C" { - typedef void (*SigHandler)(int); -}; -#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY - -#ifdef CRYPTOPP_CPUID_AVAILABLE - -#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 - -bool CpuId(word32 input, word32 output[4]) -{ - __cpuid((int *)output, input); - return true; -} - -#else - -#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -extern "C" -{ - static jmp_buf s_jmpNoCPUID; - static void SigIllHandlerCPUID(int) - { - longjmp(s_jmpNoCPUID, 1); - } - - static jmp_buf s_jmpNoSSE2; - static void SigIllHandlerSSE2(int) - { - longjmp(s_jmpNoSSE2, 1); - } -} -#endif - -bool CpuId(word32 input, word32 output[4]) -{ -#if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - __try - { - __asm - { - mov eax, input - mov ecx, 0 - cpuid - mov edi, output - mov [edi], eax - mov [edi+4], ebx - mov [edi+8], ecx - mov [edi+12], edx - } - } - // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - - // function 0 returns the highest basic function understood in EAX - if(input == 0) - return !!output[0]; - - return true; -#else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); - if (oldHandler == SIG_ERR) - return false; - -# ifndef __MINGW32__ - volatile sigset_t oldMask; - if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) - return false; -# endif - - if (setjmp(s_jmpNoCPUID)) - result = false; - else - { - asm volatile - ( - // save ebx in case -fPIC is being used - // TODO: this might need an early clobber on EDI. -# if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 - "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx" -# else - "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx" -# endif - : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3]) - : "a" (input), "c" (0) - ); - } - -# ifndef __MINGW32__ - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); -# endif - - signal(SIGILL, oldHandler); - return result; -#endif -} - -#endif - -static bool TrySSE2() -{ -#if CRYPTOPP_BOOL_X64 - return true; -#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - __try - { -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - AS2(por xmm0, xmm0) // executing SSE2 instruction -#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE - __m128i x = _mm_setzero_si128(); - return _mm_cvtsi128_si32(x) == 0; -#endif - } - // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - return true; -#else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); - if (oldHandler == SIG_ERR) - return false; - -# ifndef __MINGW32__ - volatile sigset_t oldMask; - if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) - return false; -# endif - - if (setjmp(s_jmpNoSSE2)) - result = false; - else - { -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - __asm __volatile ("por %xmm0, %xmm0"); -#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE - __m128i x = _mm_setzero_si128(); - result = _mm_cvtsi128_si32(x) == 0; -#endif - } - -# ifndef __MINGW32__ - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); -# endif - - signal(SIGILL, oldHandler); - return result; -#endif -} - -bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false; -bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false; -bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_isP4 = false; -bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false; -bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false; -bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false; -word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; - -static inline bool IsIntel(const word32 output[4]) -{ - // This is the "GenuineIntel" string - return (output[1] /*EBX*/ == 0x756e6547) && - (output[2] /*ECX*/ == 0x6c65746e) && - (output[3] /*EDX*/ == 0x49656e69); -} - -static inline bool IsAMD(const word32 output[4]) -{ - // This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!" - return (output[1] /*EBX*/ == 0x68747541) && - (output[2] /*ECX*/ == 0x444D4163) && - (output[3] /*EDX*/ == 0x69746E65); -} - -static inline bool IsVIA(const word32 output[4]) -{ - // This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA " - return (output[1] /*EBX*/ == 0x746e6543) && - (output[2] /*ECX*/ == 0x736c7561) && - (output[3] /*EDX*/ == 0x48727561); -} - -#if HAVE_GCC_CONSTRUCTOR1 -void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectX86Features() -#elif HAVE_GCC_CONSTRUCTOR0 -void __attribute__ ((constructor)) DetectX86Features() -#else -void DetectX86Features() -#endif -{ - word32 cpuid[4], cpuid1[4]; - if (!CpuId(0, cpuid)) - return; - if (!CpuId(1, cpuid1)) - return; - - g_hasMMX = (cpuid1[3] & (1 << 23)) != 0; - if ((cpuid1[3] & (1 << 26)) != 0) - g_hasSSE2 = TrySSE2(); - g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9)); - g_hasSSE4 = g_hasSSE2 && ((cpuid1[2] & (1<<19)) && (cpuid1[2] & (1<<20))); - g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25)); - g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1)); - - if ((cpuid1[3] & (1 << 25)) != 0) - g_hasISSE = true; - else - { - word32 cpuid2[4]; - CpuId(0x080000000, cpuid2); - if (cpuid2[0] >= 0x080000001) - { - CpuId(0x080000001, cpuid2); - g_hasISSE = (cpuid2[3] & (1 << 22)) != 0; - } - } - - if (IsIntel(cpuid)) - { - static const unsigned int RDRAND_FLAG = (1 << 30); - static const unsigned int RDSEED_FLAG = (1 << 18); - - g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf; - g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1); - g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG); - - if (cpuid[0] /*EAX*/ >= 7) - { - word32 cpuid3[4]; - if (CpuId(7, cpuid3)) - g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG); - } - } - else if (IsAMD(cpuid)) - { - static const unsigned int RDRAND_FLAG = (1 << 30); - - CpuId(0x01, cpuid); - g_hasRDRAND = !!(cpuid[2] /*ECX*/ & RDRAND_FLAG); - - CpuId(0x80000005, cpuid); - g_cacheLineSize = GETBYTE(cpuid[2], 0); - } - else if (IsVIA(cpuid)) - { - static const unsigned int RNG_FLAGS = (0x3 << 2); - static const unsigned int ACE_FLAGS = (0x3 << 6); - static const unsigned int ACE2_FLAGS = (0x3 << 8); - static const unsigned int PHE_FLAGS = (0x3 << 10); - static const unsigned int PMM_FLAGS = (0x3 << 12); - - CpuId(0xC0000000, cpuid); - if (cpuid[0] >= 0xC0000001) - { - // Extended features available - CpuId(0xC0000001, cpuid); - g_hasPadlockRNG = !!(cpuid[3] /*EDX*/ & RNG_FLAGS); - g_hasPadlockACE = !!(cpuid[3] /*EDX*/ & ACE_FLAGS); - g_hasPadlockACE2 = !!(cpuid[3] /*EDX*/ & ACE2_FLAGS); - g_hasPadlockPHE = !!(cpuid[3] /*EDX*/ & PHE_FLAGS); - g_hasPadlockPMM = !!(cpuid[3] /*EDX*/ & PMM_FLAGS); - } - } - - if (!g_cacheLineSize) - g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; - - *((volatile bool*)&g_x86DetectionDone) = true; -} - -#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) - -// The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0. -// Attempting to run the code results in a SIGILL and termination. -// -// #if defined(__arm64__) || defined(__aarch64__) -// word64 caps = 0; // Read ID_AA64ISAR0_EL1 -// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps)); -// #elif defined(__arm__) || defined(__aarch32__) -// word32 caps = 0; // Read ID_ISAR5_EL1 -// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps)); -// #endif -// -// The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64 -// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu -// -bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false; -bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false; -bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false; -word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; - -#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -extern "C" -{ - static jmp_buf s_jmpNoNEON; - static void SigIllHandlerNEON(int) - { - longjmp(s_jmpNoNEON, 1); - } - - static jmp_buf s_jmpNoPMULL; - static void SigIllHandlerPMULL(int) - { - longjmp(s_jmpNoPMULL, 1); - } - - static jmp_buf s_jmpNoCRC32; - static void SigIllHandlerCRC32(int) - { - longjmp(s_jmpNoCRC32, 1); - } - - static jmp_buf s_jmpNoAES; - static void SigIllHandlerAES(int) - { - longjmp(s_jmpNoAES, 1); - } - - static jmp_buf s_jmpNoSHA1; - static void SigIllHandlerSHA1(int) - { - longjmp(s_jmpNoSHA1, 1); - } - - static jmp_buf s_jmpNoSHA2; - static void SigIllHandlerSHA2(int) - { - longjmp(s_jmpNoSHA2, 1); - } -}; -#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY - -static bool TryNEON() -{ -#if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) -# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - volatile bool result = true; - __try - { - uint32_t v1[4] = {1,1,1,1}; - uint32x4_t x1 = vld1q_u32(v1); - uint64_t v2[2] = {1,1}; - uint64x2_t x2 = vld1q_u64(v2); - - uint32x4_t x3 = vdupq_n_u32(2); - x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); - x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); - uint64x2_t x4 = vdupq_n_u64(2); - x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); - x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); - - result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - return result; -# else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON); - if (oldHandler == SIG_ERR) - return false; - - volatile sigset_t oldMask; - if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) - return false; - - if (setjmp(s_jmpNoNEON)) - result = false; - else - { - uint32_t v1[4] = {1,1,1,1}; - uint32x4_t x1 = vld1q_u32(v1); - uint64_t v2[2] = {1,1}; - uint64x2_t x2 = vld1q_u64(v2); - - uint32x4_t x3 = {0,0,0,0}; - x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); - x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); - uint64x2_t x4 = {0,0}; - x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); - x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); - - // Hack... GCC optimizes away the code and returns true - result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); - } - - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); - signal(SIGILL, oldHandler); - return result; -# endif -#else - return false; -#endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -} - -static bool TryPMULL() -{ -#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) -# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - volatile bool result = true; - __try - { - const poly64_t a1={2}, b1={3}; - const poly64x2_t a2={4,5}, b2={6,7}; - const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0}; - - const poly128_t r1 = vmull_p64(a1, b1); - const poly128_t r2 = vmull_high_p64(a2, b2); - const poly128_t r3 = vmull_high_p64(a3, b3); - - // Also see https://github.com/weidai11/cryptopp/issues/233. - const uint64x2_t& t1 = vreinterpretq_u64_p128(r1); // {6,0} - const uint64x2_t& t2 = vreinterpretq_u64_p128(r2); // {24,0} - const uint64x2_t& t3 = vreinterpretq_u64_p128(r3); // {bignum,bignum} - - result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b && - vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00); - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - return result; -# else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL); - if (oldHandler == SIG_ERR) - return false; - - volatile sigset_t oldMask; - if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) - return false; - - if (setjmp(s_jmpNoPMULL)) - result = false; - else - { - const poly64_t a1={2}, b1={3}; - const poly64x2_t a2={4,5}, b2={6,7}; - const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0}; - - const poly128_t r1 = vmull_p64(a1, b1); - const poly128_t r2 = vmull_high_p64(a2, b2); - const poly128_t r3 = vmull_high_p64(a3, b3); - - // Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233. - const uint64x2_t& t1 = (uint64x2_t)(r1); // {6,0} - const uint64x2_t& t2 = (uint64x2_t)(r2); // {24,0} - const uint64x2_t& t3 = (uint64x2_t)(r3); // {bignum,bignum} - - result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b && - vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00); - } - - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); - signal(SIGILL, oldHandler); - return result; -# endif -#else - return false; -#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -} - -static bool TryCRC32() -{ -#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) -# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - volatile bool result = true; - __try - { - word32 w=0, x=1; word16 y=2; byte z=3; - w = __crc32cw(w,x); - w = __crc32ch(w,y); - w = __crc32cb(w,z); - - result = !!w; - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - return result; -# else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32); - if (oldHandler == SIG_ERR) - return false; - - volatile sigset_t oldMask; - if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) - return false; - - if (setjmp(s_jmpNoCRC32)) - result = false; - else - { - word32 w=0, x=1; word16 y=2; byte z=3; - w = __crc32cw(w,x); - w = __crc32ch(w,y); - w = __crc32cb(w,z); - - // Hack... GCC optimizes away the code and returns true - result = !!w; - } - - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); - signal(SIGILL, oldHandler); - return result; -# endif -#else - return false; -#endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE -} - -static bool TryAES() -{ -#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) -# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - volatile bool result = true; - __try - { - // AES encrypt and decrypt - uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); - uint8x16_t r1 = vaeseq_u8(data, key); - uint8x16_t r2 = vaesdq_u8(data, key); - - result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7)); - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - return result; -# else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES); - if (oldHandler == SIG_ERR) - return false; - - volatile sigset_t oldMask; - if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) - return false; - - if (setjmp(s_jmpNoAES)) - result = false; - else - { - uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); - uint8x16_t r1 = vaeseq_u8(data, key); - uint8x16_t r2 = vaesdq_u8(data, key); - - // Hack... GCC optimizes away the code and returns true - result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7)); - } - - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); - signal(SIGILL, oldHandler); - return result; -# endif -#else - return false; -#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -} - -static bool TrySHA1() -{ -#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) -# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - volatile bool result = true; - __try - { - uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; - - uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2); - uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2); - uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2); - uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3); - uint32x4_t r5 = vsha1su1q_u32 (data1, data2); - - result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0)); - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - return result; -# else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1); - if (oldHandler == SIG_ERR) - return false; - - volatile sigset_t oldMask; - if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) - return false; - - if (setjmp(s_jmpNoSHA1)) - result = false; - else - { - uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; - - uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2); - uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2); - uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2); - uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3); - uint32x4_t r5 = vsha1su1q_u32 (data1, data2); - - // Hack... GCC optimizes away the code and returns true - result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0)); - } - - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); - signal(SIGILL, oldHandler); - return result; -# endif -#else - return false; -#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -} - -static bool TrySHA2() -{ -#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) -# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - volatile bool result = true; - __try - { - uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; - - uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3); - uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3); - uint32x4_t r3 = vsha256su0q_u32 (data1, data2); - uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3); - - result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3)); - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - return result; -# else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2); - if (oldHandler == SIG_ERR) - return false; - - volatile sigset_t oldMask; - if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) - return false; - - if (setjmp(s_jmpNoSHA2)) - result = false; - else - { - uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; - - uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3); - uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3); - uint32x4_t r3 = vsha256su0q_u32 (data1, data2); - uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3); - - // Hack... GCC optimizes away the code and returns true - result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3)); - } - - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); - signal(SIGILL, oldHandler); - return result; -# endif -#else - return false; -#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -} - -#if HAVE_GCC_CONSTRUCTOR1 -void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures() -#elif HAVE_GCC_CONSTRUCTOR0 -void __attribute__ ((constructor)) DetectArmFeatures() -#else -void DetectArmFeatures() -#endif -{ - g_hasNEON = TryNEON(); - g_hasPMULL = TryPMULL(); - g_hasCRC32 = TryCRC32(); - g_hasAES = TryAES(); - g_hasSHA1 = TrySHA1(); - g_hasSHA2 = TrySHA2(); - - *((volatile bool*)&g_ArmDetectionDone) = true; -} - -#endif - -NAMESPACE_END - -#endif +// cpu.cpp - written and placed in the public domain by Wei Dai + +#include "pch.h" +#include "config.h" + +#ifndef EXCEPTION_EXECUTE_HANDLER +# define EXCEPTION_EXECUTE_HANDLER 1 +#endif + +#ifndef CRYPTOPP_IMPORTS + +#include "cpu.h" +#include "misc.h" +#include + +#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY +#include +#include +#endif + +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +#include +#endif + +NAMESPACE_BEGIN(CryptoPP) + +#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY +extern "C" { + typedef void (*SigHandler)(int); +}; +#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + +#ifdef CRYPTOPP_CPUID_AVAILABLE + +#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 + +bool CpuId(word32 input, word32 output[4]) +{ + __cpuid((int *)output, input); + return true; +} + +#else + +#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY +extern "C" +{ + static jmp_buf s_jmpNoCPUID; + static void SigIllHandlerCPUID(int) + { + longjmp(s_jmpNoCPUID, 1); + } + + static jmp_buf s_jmpNoSSE2; + static void SigIllHandlerSSE2(int) + { + longjmp(s_jmpNoSSE2, 1); + } +} +#endif + +bool CpuId(word32 input, word32 output[4]) +{ +#if !defined(__x86_64__) && !defined(__i386__) + return false; +#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + __try + { + __asm + { + mov eax, input + mov ecx, 0 + cpuid + mov edi, output + mov [edi], eax + mov [edi+4], ebx + mov [edi+8], ecx + mov [edi+12], edx + } + } + // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + + // function 0 returns the highest basic function understood in EAX + if(input == 0) + return !!output[0]; + + return true; +#else + asm volatile + ( + // save ebx in case -fPIC is being used + // TODO: this might need an early clobber on EDI. +# if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 + "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx" +# else + "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx" +# endif + : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3]) + : "a" (input), "c" (0) + ); + return true; +#endif +} + +#endif + +static bool TrySSE2() +{ +#if CRYPTOPP_BOOL_X64 + return true; +#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + __try + { +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + AS2(por xmm0, xmm0) // executing SSE2 instruction +#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE + __m128i x = _mm_setzero_si128(); + return _mm_cvtsi128_si32(x) == 0; +#endif + } + // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return true; +#else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); + if (oldHandler == SIG_ERR) + return false; + +# ifndef __MINGW32__ + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; +# endif + + if (setjmp(s_jmpNoSSE2)) + result = false; + else + { +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + __asm __volatile ("por %xmm0, %xmm0"); +#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE + __m128i x = _mm_setzero_si128(); + result = _mm_cvtsi128_si32(x) == 0; +#endif + } + +# ifndef __MINGW32__ + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); +# endif + + signal(SIGILL, oldHandler); + return result; +#endif +} + +bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false; +bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false; +bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_isP4 = false; +bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false; +bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false; +bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false; +word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + +static inline bool IsIntel(const word32 output[4]) +{ + // This is the "GenuineIntel" string + return (output[1] /*EBX*/ == 0x756e6547) && + (output[2] /*ECX*/ == 0x6c65746e) && + (output[3] /*EDX*/ == 0x49656e69); +} + +static inline bool IsAMD(const word32 output[4]) +{ + // This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!" + return (output[1] /*EBX*/ == 0x68747541) && + (output[2] /*ECX*/ == 0x444D4163) && + (output[3] /*EDX*/ == 0x69746E65); +} + +static inline bool IsVIA(const word32 output[4]) +{ + // This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA " + return (output[1] /*EBX*/ == 0x746e6543) && + (output[2] /*ECX*/ == 0x736c7561) && + (output[3] /*EDX*/ == 0x48727561); +} + +#if HAVE_GCC_CONSTRUCTOR1 +void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectX86Features() +#elif HAVE_GCC_CONSTRUCTOR0 +void __attribute__ ((constructor)) DetectX86Features() +#else +void DetectX86Features() +#endif +{ + word32 cpuid[4], cpuid1[4]; + if (!CpuId(0, cpuid)) + return; + if (!CpuId(1, cpuid1)) + return; + + g_hasMMX = (cpuid1[3] & (1 << 23)) != 0; + if ((cpuid1[3] & (1 << 26)) != 0) + g_hasSSE2 = TrySSE2(); + g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9)); + g_hasSSE4 = g_hasSSE2 && ((cpuid1[2] & (1<<19)) && (cpuid1[2] & (1<<20))); + g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25)); + g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1)); + + if ((cpuid1[3] & (1 << 25)) != 0) + g_hasISSE = true; + else + { + word32 cpuid2[4]; + CpuId(0x080000000, cpuid2); + if (cpuid2[0] >= 0x080000001) + { + CpuId(0x080000001, cpuid2); + g_hasISSE = (cpuid2[3] & (1 << 22)) != 0; + } + } + + if (IsIntel(cpuid)) + { + static const unsigned int RDRAND_FLAG = (1 << 30); + static const unsigned int RDSEED_FLAG = (1 << 18); + + g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf; + g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1); + g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG); + + if (cpuid[0] /*EAX*/ >= 7) + { + word32 cpuid3[4]; + if (CpuId(7, cpuid3)) + g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG); + } + } + else if (IsAMD(cpuid)) + { + static const unsigned int RDRAND_FLAG = (1 << 30); + + CpuId(0x01, cpuid); + g_hasRDRAND = !!(cpuid[2] /*ECX*/ & RDRAND_FLAG); + + CpuId(0x80000005, cpuid); + g_cacheLineSize = GETBYTE(cpuid[2], 0); + } + else if (IsVIA(cpuid)) + { + static const unsigned int RNG_FLAGS = (0x3 << 2); + static const unsigned int ACE_FLAGS = (0x3 << 6); + static const unsigned int ACE2_FLAGS = (0x3 << 8); + static const unsigned int PHE_FLAGS = (0x3 << 10); + static const unsigned int PMM_FLAGS = (0x3 << 12); + + CpuId(0xC0000000, cpuid); + if (cpuid[0] >= 0xC0000001) + { + // Extended features available + CpuId(0xC0000001, cpuid); + g_hasPadlockRNG = !!(cpuid[3] /*EDX*/ & RNG_FLAGS); + g_hasPadlockACE = !!(cpuid[3] /*EDX*/ & ACE_FLAGS); + g_hasPadlockACE2 = !!(cpuid[3] /*EDX*/ & ACE2_FLAGS); + g_hasPadlockPHE = !!(cpuid[3] /*EDX*/ & PHE_FLAGS); + g_hasPadlockPMM = !!(cpuid[3] /*EDX*/ & PMM_FLAGS); + } + } + + if (!g_cacheLineSize) + g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + + *((volatile bool*)&g_x86DetectionDone) = true; +} + +#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) + +// The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0. +// Attempting to run the code results in a SIGILL and termination. +// +// #if defined(__arm64__) || defined(__aarch64__) +// word64 caps = 0; // Read ID_AA64ISAR0_EL1 +// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps)); +// #elif defined(__arm__) || defined(__aarch32__) +// word32 caps = 0; // Read ID_ISAR5_EL1 +// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps)); +// #endif +// +// The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64 +// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu +// +bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false; +bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false; +bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false; +word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + +#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY +extern "C" +{ + static jmp_buf s_jmpNoNEON; + static void SigIllHandlerNEON(int) + { + longjmp(s_jmpNoNEON, 1); + } + + static jmp_buf s_jmpNoPMULL; + static void SigIllHandlerPMULL(int) + { + longjmp(s_jmpNoPMULL, 1); + } + + static jmp_buf s_jmpNoCRC32; + static void SigIllHandlerCRC32(int) + { + longjmp(s_jmpNoCRC32, 1); + } + + static jmp_buf s_jmpNoAES; + static void SigIllHandlerAES(int) + { + longjmp(s_jmpNoAES, 1); + } + + static jmp_buf s_jmpNoSHA1; + static void SigIllHandlerSHA1(int) + { + longjmp(s_jmpNoSHA1, 1); + } + + static jmp_buf s_jmpNoSHA2; + static void SigIllHandlerSHA2(int) + { + longjmp(s_jmpNoSHA2, 1); + } +}; +#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + +static bool TryNEON() +{ +#if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; + __try + { + uint32_t v1[4] = {1,1,1,1}; + uint32x4_t x1 = vld1q_u32(v1); + uint64_t v2[2] = {1,1}; + uint64x2_t x2 = vld1q_u64(v2); + + uint32x4_t x3 = vdupq_n_u32(2); + x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); + x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); + uint64x2_t x4 = vdupq_n_u64(2); + x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); + x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); + + result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return result; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpNoNEON)) + result = false; + else + { + uint32_t v1[4] = {1,1,1,1}; + uint32x4_t x1 = vld1q_u32(v1); + uint64_t v2[2] = {1,1}; + uint64x2_t x2 = vld1q_u64(v2); + + uint32x4_t x3 = {0,0,0,0}; + x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); + x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); + uint64x2_t x4 = {0,0}; + x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); + x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +} + +static bool TryPMULL() +{ +#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; + __try + { + const poly64_t a1={2}, b1={3}; + const poly64x2_t a2={4,5}, b2={6,7}; + const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0}; + + const poly128_t r1 = vmull_p64(a1, b1); + const poly128_t r2 = vmull_high_p64(a2, b2); + const poly128_t r3 = vmull_high_p64(a3, b3); + + // Also see https://github.com/weidai11/cryptopp/issues/233. + const uint64x2_t& t1 = vreinterpretq_u64_p128(r1); // {6,0} + const uint64x2_t& t2 = vreinterpretq_u64_p128(r2); // {24,0} + const uint64x2_t& t3 = vreinterpretq_u64_p128(r3); // {bignum,bignum} + + result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b && + vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return result; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpNoPMULL)) + result = false; + else + { + const poly64_t a1={2}, b1={3}; + const poly64x2_t a2={4,5}, b2={6,7}; + const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0}; + + const poly128_t r1 = vmull_p64(a1, b1); + const poly128_t r2 = vmull_high_p64(a2, b2); + const poly128_t r3 = vmull_high_p64(a3, b3); + + // Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233. + const uint64x2_t& t1 = (uint64x2_t)(r1); // {6,0} + const uint64x2_t& t2 = (uint64x2_t)(r2); // {24,0} + const uint64x2_t& t3 = (uint64x2_t)(r3); // {bignum,bignum} + + result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b && + vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE +} + +static bool TryCRC32() +{ +#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; + __try + { + word32 w=0, x=1; word16 y=2; byte z=3; + w = __crc32cw(w,x); + w = __crc32ch(w,y); + w = __crc32cb(w,z); + + result = !!w; + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return result; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpNoCRC32)) + result = false; + else + { + word32 w=0, x=1; word16 y=2; byte z=3; + w = __crc32cw(w,x); + w = __crc32ch(w,y); + w = __crc32cb(w,z); + + // Hack... GCC optimizes away the code and returns true + result = !!w; + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE +} + +static bool TryAES() +{ +#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; + __try + { + // AES encrypt and decrypt + uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); + uint8x16_t r1 = vaeseq_u8(data, key); + uint8x16_t r2 = vaesdq_u8(data, key); + + result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7)); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return result; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpNoAES)) + result = false; + else + { + uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); + uint8x16_t r1 = vaeseq_u8(data, key); + uint8x16_t r2 = vaesdq_u8(data, key); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7)); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE +} + +static bool TrySHA1() +{ +#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; + __try + { + uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; + + uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2); + uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2); + uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2); + uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3); + uint32x4_t r5 = vsha1su1q_u32 (data1, data2); + + result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0)); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return result; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpNoSHA1)) + result = false; + else + { + uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; + + uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2); + uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2); + uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2); + uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3); + uint32x4_t r5 = vsha1su1q_u32 (data1, data2); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0)); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE +} + +static bool TrySHA2() +{ +#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; + __try + { + uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; + + uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3); + uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3); + uint32x4_t r3 = vsha256su0q_u32 (data1, data2); + uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3); + + result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3)); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return result; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpNoSHA2)) + result = false; + else + { + uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; + + uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3); + uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3); + uint32x4_t r3 = vsha256su0q_u32 (data1, data2); + uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3)); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE +} + +#if HAVE_GCC_CONSTRUCTOR1 +void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures() +#elif HAVE_GCC_CONSTRUCTOR0 +void __attribute__ ((constructor)) DetectArmFeatures() +#else +void DetectArmFeatures() +#endif +{ + g_hasNEON = TryNEON(); + g_hasPMULL = TryPMULL(); + g_hasCRC32 = TryCRC32(); + g_hasAES = TryAES(); + g_hasSHA1 = TrySHA1(); + g_hasSHA2 = TrySHA2(); + + *((volatile bool*)&g_ArmDetectionDone) = true; +} + +#endif + +NAMESPACE_END + +#endif