13
0

Adding NEON detection during runtime

This commit is contained in:
Ayan Shafqat 2020-08-23 14:27:45 -04:00 committed by Robin Gareus
parent 773a1a0725
commit 1f878636c8
Signed by: rgareus
GPG Key ID: A090BCE02CF57F04
3 changed files with 18 additions and 3 deletions

View File

@ -219,7 +219,7 @@ setup_hardware_optimization (bool try_optimization)
#elif defined ARM_NEON_SUPPORT
/* Use NEON routines */
do {
if (fpu->has_neon ()) {
info << "Using ARM NEON optimized routines" << endmsg;
compute_peak = arm_neon_compute_peak;
@ -230,7 +230,7 @@ setup_hardware_optimization (bool try_optimization)
copy_vector = arm_neon_copy_vector;
generic_mix_functions = false;
} while (0);
}
#elif defined(__APPLE__) && defined(BUILD_VECLIB_OPTIMIZATIONS)

View File

@ -31,6 +31,12 @@
#include <intrin.h>
#endif
#ifdef ARM_NEON_SUPPORT
/* Needed for ARM NEON detection */
#include <sys/auxv.h>
#include <asm/hwcap.h>
#endif
#include "pbd/compose.h"
#include "pbd/fpu.h"
#include "pbd/error.h"
@ -155,6 +161,13 @@ FPU::FPU ()
return;
}
#ifdef ARM_NEON_SUPPORT
if (getauxval(AT_HWCAP) & HWCAP_NEON) {
_flags = Flags(_flags & HasNEON);
}
#endif
#if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86
/* Non-Intel architecture, nothing to do here */
return;

View File

@ -31,7 +31,8 @@ class LIBPBD_API FPU {
HasDenormalsAreZero = 0x2,
HasSSE = 0x4,
HasSSE2 = 0x8,
HasAVX = 0x10
HasAVX = 0x10,
HasNEON = 0x20,
};
public:
@ -45,6 +46,7 @@ class LIBPBD_API FPU {
bool has_sse () const { return _flags & HasSSE; }
bool has_sse2 () const { return _flags & HasSSE2; }
bool has_avx () const { return _flags & HasAVX; }
bool has_neon () const { return _flags & HasNEON; }
private:
Flags _flags;