diff --git a/configure.ac b/configure.ac index fc375303b3..9b7d5739ac 100644 --- a/configure.ac +++ b/configure.ac @@ -163,7 +163,7 @@ case "${host_cpu}" in ;; - aarch64) + aarch64|arm64) # ARMv8 always has NEON and does not need special compiler flags. AM_CONDITIONAL([HAVE_NEON], true) @@ -178,6 +178,7 @@ case "${host_cpu}" in AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions]) NEON_CXXFLAGS="-mfpu=neon" AC_SUBST([NEON_CXXFLAGS]) + check_for_neon=1 fi ;; @@ -191,6 +192,16 @@ esac # check whether feenableexcept is supported. some C libraries (e.g. uclibc) don't. AC_CHECK_FUNCS([feenableexcept]) +# additional checks for NEON targets +if test x$check_for_neon = x1; then + AC_MSG_NOTICE([checking how to detect NEON availability]) + AC_CHECK_FUNCS([getauxval elf_aux_info android_getCpuFamily]) + + if test $ac_cv_func_getauxval = no && test $ac_cv_func_elf_aux_info = no && test $ac_cv_func_android_getCpuFamily = no; then + AC_MSG_WARN([NEON is available, but we don't know how to check for it. Will not be able to use NEON.]) + fi +fi + AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR]) AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd) diff --git a/src/arch/simddetect.cpp b/src/arch/simddetect.cpp index bb0efcf5d4..f169bccf0c 100644 --- a/src/arch/simddetect.cpp +++ b/src/arch/simddetect.cpp @@ -53,12 +53,14 @@ #endif #if defined(HAVE_NEON) && !defined(__aarch64__) -# ifdef ANDROID +# if defined(HAVE_ANDROID_GETCPUFAMILY) # include -# else -/* Assume linux */ +# elif defined(HAVE_GETAUXVAL) # include # include +# elif defined(HAVE_ELF_AUX_INFO) +# include +# include # endif #endif @@ -210,15 +212,18 @@ SIMDDetect::SIMDDetect() { #endif #if defined(HAVE_NEON) && !defined(__aarch64__) -# ifdef ANDROID +# if defined(HAVE_ANDROID_GETCPUFAMILY) { AndroidCpuFamily family = android_getCpuFamily(); if (family == ANDROID_CPU_FAMILY_ARM) neon_available_ = (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON); } -# else - /* Assume linux */ +# elif defined(HAVE_GETAUXVAL) neon_available_ = getauxval(AT_HWCAP) & HWCAP_NEON; +# elif defined(HAVE_ELF_AUX_INFO) + unsigned long hwcap = 0; + elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); + neon_available_ = hwcap & HWCAP_NEON; # endif #endif