13
0
livetrax/libs/pbd/fpu.cc

300 lines
7.3 KiB
C++

/*
Copyright (C) 2012 Paul Davis
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "libpbd-config.h"
#define _XOPEN_SOURCE 600
#include <cstring> // for memset
#include <cstdlib>
#include <stdint.h>
#include <assert.h>
#ifdef PLATFORM_WINDOWS
#include <intrin.h>
#endif
#include "pbd/compose.h"
#include "pbd/fpu.h"
#include "pbd/error.h"
#include "i18n.h"
using namespace PBD;
using namespace std;
FPU* FPU::_instance (0);
#if ( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // ARCH_X86
#ifndef PLATFORM_WINDOWS
/* use __cpuid() as the name to match the MSVC/mingw intrinsic */
static void
__cpuid(int regs[4], int cpuid_leaf)
{
int eax, ebx, ecx, edx;
asm volatile (
#if defined(__i386__)
"pushl %%ebx;\n\t"
#endif
"movl %4, %%eax;\n\t"
"cpuid;\n\t"
"movl %%eax, %0;\n\t"
"movl %%ebx, %1;\n\t"
"movl %%ecx, %2;\n\t"
"movl %%edx, %3;\n\t"
#if defined(__i386__)
"popl %%ebx;\n\t"
#endif
:"=m" (eax), "=m" (ebx), "=m" (ecx), "=m" (edx)
:"r" (cpuid_leaf)
:"%eax",
#if !defined(__i386__)
"%ebx",
#endif
"%ecx", "%edx");
regs[0] = eax;
regs[1] = ebx;
regs[2] = ecx;
regs[3] = edx;
}
#endif /* !PLATFORM_WINDOWS */
#ifndef COMPILER_MSVC
static uint64_t
_xgetbv (uint32_t xcr)
{
#ifdef __APPLE__
/* it would be nice to make this work on OS X but as long we use veclib,
we don't really need to know about SSE/AVX on that platform.
*/
return 0;
#else
uint32_t eax, edx;
__asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr));
return (static_cast<uint64_t>(edx) << 32) | eax;
#endif
}
#elif _MSC_VER < 1600
// '_xgetbv()' was only available from VC10 onwards
__declspec(noinline) static uint64_t
_xgetbv (uint32_t xcr)
{
return 0;
// N.B. The following would probably work for a pre-VC10 build,
// although it might suffer from optimization issues. We'd need
// to place this function into its own (unoptimized) source file.
__asm {
mov ecx, [xcr]
__asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 /*xgetbv*/
}
}
#endif /* !COMPILER_MSVC */
#endif /* ARCH_X86 */
#ifndef _XCR_XFEATURE_ENABLED_MASK
#define _XCR_XFEATURE_ENABLED_MASK 0
#endif
FPU*
FPU::instance()
{
if (!_instance) {
_instance = new FPU;
}
return _instance;
}
void
FPU::destroy ()
{
delete _instance;
_instance = 0;
}
FPU::FPU ()
: _flags ((Flags) 0)
{
if (_instance) {
error << _("FPU object instantiated more than once") << endmsg;
}
if (getenv("ARDOUR_FPU_FLAGS")) {
_flags = Flags (atoi (getenv("ARDOUR_FPU_FLAGS")));
return;
}
#if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86
/* Non-Intel architecture, nothing to do here */
return;
#else
/* Get the CPU vendor just for kicks */
// __cpuid with an InfoType argument of 0 returns the number of
// valid Ids in CPUInfo[0] and the CPU identification string in
// the other three array elements. The CPU identification string is
// not in linear order. The code below arranges the information
// in a human readable form. The human readable order is CPUInfo[1] |
// CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
// before using memcpy to copy these three array elements to cpu_string.
int cpu_info[4];
char cpu_string[48];
string cpu_vendor;
__cpuid (cpu_info, 0);
int num_ids = cpu_info[0];
std::swap(cpu_info[2], cpu_info[3]);
memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1]));
cpu_vendor.assign(cpu_string, 3 * sizeof(cpu_info[1]));
info << string_compose (_("CPU vendor: %1"), cpu_vendor) << endmsg;
if (num_ids > 0) {
/* Now get CPU/FPU flags */
__cpuid (cpu_info, 1);
if ((cpu_info[2] & (1<<27)) /* OSXSAVE */ &&
(cpu_info[2] & (1<<28) /* AVX */) &&
((_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6)) { /* OS really supports XSAVE */
info << _("AVX-capable processor") << endmsg;
_flags = Flags (_flags | (HasAVX) );
}
if (cpu_info[3] & (1<<25)) {
_flags = Flags (_flags | (HasSSE|HasFlushToZero));
}
if (cpu_info[3] & (1<<26)) {
_flags = Flags (_flags | HasSSE2);
}
/* Figure out CPU/FPU denormal handling capabilities */
if (cpu_info[3] & (1 << 24)) {
char** fxbuf = 0;
/* DAZ wasn't available in the first version of SSE. Since
setting a reserved bit in MXCSR causes a general protection
fault, we need to be able to check the availability of this
feature without causing problems. To do this, one needs to
set up a 512-byte area of memory to save the SSE state to,
using fxsave, and then one needs to inspect bytes 28 through
31 for the MXCSR_MASK value. If bit 6 is set, DAZ is
supported, otherwise, it isn't.
*/
#ifndef HAVE_POSIX_MEMALIGN
# ifdef PLATFORM_WINDOWS
fxbuf = (char **) _aligned_malloc (sizeof (char *), 16);
assert (fxbuf);
*fxbuf = (char *) _aligned_malloc (512, 16);
assert (*fxbuf);
# else
# warning using default malloc for aligned memory
fxbuf = (char **) malloc (sizeof (char *));
assert (fxbuf);
*fxbuf = (char *) malloc (512);
assert (*fxbuf);
# endif
#else
(void) posix_memalign ((void **) &fxbuf, 16, sizeof (char *));
assert (fxbuf);
(void) posix_memalign ((void **) fxbuf, 16, 512);
assert (*fxbuf);
#endif
memset (*fxbuf, 0, 512);
#ifdef COMPILER_MSVC
char *buf = *fxbuf;
__asm {
mov eax, buf
fxsave [eax]
};
#else
asm volatile (
"fxsave (%0)"
:
: "r" (*fxbuf)
: "memory"
);
#endif
uint32_t mxcsr_mask = *((uint32_t*) &((*fxbuf)[28]));
/* if the mask is zero, set its default value (from intel specs) */
if (mxcsr_mask == 0) {
mxcsr_mask = 0xffbf;
}
if (mxcsr_mask & (1<<6)) {
_flags = Flags (_flags | HasDenormalsAreZero);
}
#if !defined HAVE_POSIX_MEMALIGN && defined PLATFORM_WINDOWS
_aligned_free (*fxbuf);
_aligned_free (fxbuf);
#else
free (*fxbuf);
free (fxbuf);
#endif
}
/* finally get the CPU brand */
__cpuid (cpu_info, 0x80000000);
const int parameter_end = 0x80000004;
string cpu_brand;
if (cpu_info[0] >= parameter_end) {
char* cpu_string_ptr = cpu_string;
for (int parameter = 0x80000002; parameter <= parameter_end &&
cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
__cpuid(cpu_info, parameter);
memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info));
cpu_string_ptr += sizeof(cpu_info);
}
cpu_brand.assign(cpu_string, cpu_string_ptr - cpu_string);
info << string_compose (_("CPU brand: %1"), cpu_brand) << endmsg;
}
}
#endif /* !ARCH_X86 */
}
FPU::~FPU ()
{
}