diff --git a/libs/ardour/ardour/mix.h b/libs/ardour/ardour/mix.h index d472873f30..4b84b9e7ce 100644 --- a/libs/ardour/ardour/mix.h +++ b/libs/ardour/ardour/mix.h @@ -46,7 +46,9 @@ extern "C" { } /* FMA functions */ +#ifdef FPU_AVX_FMA_SUPPORT LIBARDOUR_API void x86_fma_mix_buffers_with_gain (float * dst, const float * src, uint32_t nframes, float gain); +#endif LIBARDOUR_API void x86_sse_find_peaks (const float * buf, uint32_t nsamples, float *min, float *max); #ifdef PLATFORM_WINDOWS diff --git a/libs/ardour/globals.cc b/libs/ardour/globals.cc index 6e18156731..4a46fdb643 100644 --- a/libs/ardour/globals.cc +++ b/libs/ardour/globals.cc @@ -186,8 +186,9 @@ setup_hardware_optimization (bool try_optimization) FPU* fpu = FPU::instance (); #if defined(ARCH_X86) && defined(BUILD_SSE_OPTIMIZATIONS) - /* We have AVX-optimized code for Windows and Linux */ + +#ifdef FPU_AVX_FMA_SUPPORT if (fpu->has_fma ()) { info << "Using AVX and FMA optimized routines" << endmsg; @@ -201,7 +202,9 @@ setup_hardware_optimization (bool try_optimization) generic_mix_functions = false; - } else if (fpu->has_avx ()) { + } else +#endif + if (fpu->has_avx ()) { info << "Using AVX optimized routines" << endmsg; // AVX SET diff --git a/libs/ardour/wscript b/libs/ardour/wscript index 206652a2b2..931388df00 100644 --- a/libs/ardour/wscript +++ b/libs/ardour/wscript @@ -471,14 +471,17 @@ def build(bld): obj.source += [ 'audio_unit.cc' ] avx_sources = [] + fma_sources = [] if Options.options.fpu_optimization: if (bld.env['build_target'] == 'i386' or bld.env['build_target'] == 'i686'): obj.source += [ 'sse_functions_xmm.cc', 'sse_functions.s', ] - avx_sources = [ 'sse_functions_avx_linux.cc', 'x86_functions_fma.cc' ] + avx_sources = [ 'sse_functions_avx_linux.cc' ] + fma_sources = [ 'x86_functions_fma.cc' ] elif bld.env['build_target'] == 'x86_64': obj.source += [ 'sse_functions_xmm.cc', 'sse_functions_64bit.s', ] - avx_sources = [ 'sse_functions_avx_linux.cc', 'x86_functions_fma.cc' ] + avx_sources = [ 'sse_functions_avx_linux.cc' ] + fma_sources = [ 'x86_functions_fma.cc' ] elif bld.env['build_target'] == 'mingw': # usability of the 64 bit windows assembler depends on the compiler target, # not the build host, which in turn can only be inferred from the name @@ -486,14 +489,14 @@ def build(bld): if re.search ('x86_64-w64', str(bld.env['CC'])): obj.source += [ 'sse_functions_xmm.cc' ] obj.source += [ 'sse_functions_64bit_win.s', 'sse_avx_functions_64bit_win.s' ] - avx_sources = [ 'sse_functions_avx.cc', 'x86_functions_fma.cc' ] + avx_sources = [ 'sse_functions_avx.cc' ] + fma_sources = [ 'x86_functions_fma.cc' ] elif bld.env['build_target'] == 'aarch64': obj.source += ['arm_neon_functions.cc'] obj.defines += [ 'ARM_NEON_SUPPORT' ] elif bld.env['build_target'] == 'armhf': # 32bit ARM needs -mfpu=neon - obj.defines += [ 'ARM_NEON_SUPPORT' ] arm_neon_cxxflags = list(bld.env['CXXFLAGS']) arm_neon_cxxflags.append (bld.env['compiler_flags_dict']['neon']) bld(features = 'cxx cxxstlib asm', @@ -506,13 +509,13 @@ def build(bld): target = 'arm_neon_functions') obj.use += ['arm_neon_functions' ] + obj.defines += [ 'ARM_NEON_SUPPORT' ] if avx_sources: # as long as we want to use AVX intrinsics in this file, # compile it with -mavx flag - append avx flag to the existing avx_cxxflags = list(bld.env['CXXFLAGS']) avx_cxxflags.append (bld.env['compiler_flags_dict']['avx']) - avx_cxxflags.append (bld.env['compiler_flags_dict']['fma']) avx_cxxflags.append (bld.env['compiler_flags_dict']['pic']) bld(features = 'cxx cxxstlib asm', source = avx_sources, @@ -524,6 +527,23 @@ def build(bld): obj.use += ['sse_avx_functions' ] + if bld.is_defined('FPU_AVX_FMA_SUPPORT') and fma_sources: + fma_cxxflags = list(bld.env['CXXFLAGS']) + fma_cxxflags.append (bld.env['compiler_flags_dict']['avx']) + fma_cxxflags.append (bld.env['compiler_flags_dict']['pic']) + fma_cxxflags.append (bld.env['compiler_flags_dict']['fma']) + + bld(features = 'cxx cxxstlib asm', + source = fma_sources, + cxxflags = fma_cxxflags, + includes = [ '.' ], + use = [ 'libtemporal', 'libpbd', 'libevoral', 'liblua' ], + uselib = [ 'GLIBMM', 'XML' ], + target = 'sse_fma_functions') + + obj.use += ['sse_fma_functions' ] + obj.defines += [ 'FPU_AVX_FMA_SUPPORT' ] + # i18n if bld.is_defined('ENABLE_NLS'): mo_files = bld.path.ant_glob('po/*.mo') diff --git a/wscript b/wscript index 80d9aa00b8..ecb020813d 100644 --- a/wscript +++ b/wscript @@ -493,6 +493,18 @@ int main() { return 0; }''', if opt.fpu_optimization: if conf.env['build_target'] == 'armhf' or conf.env['build_target'] == 'aarch64': conf.define('ARM_NEON_SUPPORT', 1) + elif conf.env['build_target'] == 'aarch64': + conf.define ('FPU_AVX_FMA_SUPPORT', 1) + elif conf.env['build_target'] == 'i386' or conf.env['build_target'] == 'i686' or conf.env['build_target'] == 'x86_64': + conf.check_cxx(fragment = "#include \nint main(void) { __m128 a; _mm_fmadd_ss(a, a, a); return 0; }\n", + features = ['cxx'], + cxxflags = [ conf.env['compiler_flags_dict']['fma'], conf.env['compiler_flags_dict']['avx'] ], + mandatory = False, + execute = False, + msg = 'Checking compiler for AVX/FMA intrinsics', + okmsg = 'Found', + errmsg = 'Not supported', + define_name = 'FPU_AVX_FMA_SUPPORT') if opt.use_libcpp or conf.env['build_host'] in [ 'el_capitan', 'sierra', 'high_sierra', 'mojave', 'catalina' ]: cxx_flags.append('--stdlib=libc++') @@ -1445,6 +1457,7 @@ const char* const ardour_config_info = "\\n\\ write_config_text('Dr. Mingw', conf.is_defined('HAVE_DRMINGW')) write_config_text('FLAC', conf.is_defined('HAVE_FLAC')) write_config_text('FPU optimization', opts.fpu_optimization) + write_config_text('FPU AVX/FMA support', conf.is_defined('FPU_AVX_FMA_SUPPORT')) write_config_text('Freedesktop files', opts.freedesktop) write_config_text('Libjack linking', conf.env['libjack_link']) write_config_text('Libjack metadata', conf.is_defined ('HAVE_JACK_METADATA'))