Integrate AVX512F support into Ardour

The current implementation is just a stub of AVX and is not utilizing AVX512F.
2023-02-02 12:13:27 -05:00 · 2023-02-02 12:13:27 -05:00 · 6b766e41f4
parent bf8fced073
commit 6b766e41f4
4 changed files with 80 additions and 1 deletions
--- a/libs/ardour/globals.cc
+++ b/libs/ardour/globals.cc
@ -191,7 +191,24 @@ setup_hardware_optimization (bool try_optimization)
 		FPU* fpu = FPU::instance ();

 #if defined(ARCH_X86) && defined(BUILD_SSE_OPTIMIZATIONS)
-		/* We have AVX-optimized code for Windows and Linux */
+		/* Utilize different optimization routines for various x86 extensions */
+
+#ifdef FPU_AVX512F_SUPPORT
+		if (fpu->has_avx512f ()) {
+			info << "Using AVX512F optimized routines" << endmsg;
+
+			// AVX512F SET
+			compute_peak          = x86_avx512f_compute_peak;
+			find_peaks            = x86_avx512f_find_peaks;
+			apply_gain_to_buffer  = x86_avx512f_apply_gain_to_buffer;
+			mix_buffers_with_gain = x86_avx512f_mix_buffers_with_gain;
+			mix_buffers_no_gain   = x86_avx512f_mix_buffers_no_gain;
+			copy_vector           = x86_avx512f_copy_vector;
+
+			generic_mix_functions = false;
+
+		} else
+#endif

 #ifdef FPU_AVX_FMA_SUPPORT
 		if (fpu->has_fma ()) {
--- a/libs/ardour/test/fpu_test.cc
+++ b/libs/ardour/test/fpu_test.cc
@ -153,6 +153,33 @@ FPUTest::avxTest ()
 	run (align_max);
 }

+void
+FPUTest::avx512fTest ()
+{
+	PBD::FPU* fpu = PBD::FPU::instance ();
+	if (!fpu->has_avx512f ()) {
+		printf ("AVX512F is not available at run-time\n");
+		return;
+	}
+
+#if ( defined(__x86_64__) || defined(_M_X64) )
+	size_t align_max = 64;
+#else
+	size_t align_max = 16;
+#endif
+	CPPUNIT_ASSERT_MESSAGE ("Aligned Malloc", (((intptr_t)_test1) % align_max) == 0);
+	CPPUNIT_ASSERT_MESSAGE ("Aligned Malloc", (((intptr_t)_test2) % align_max) == 0);
+
+	compute_peak          = x86_avx512f_compute_peak;
+	find_peaks            = x86_avx512f_find_peaks;
+	apply_gain_to_buffer  = x86_avx512f_apply_gain_to_buffer;
+	mix_buffers_with_gain = x86_avx512f_mix_buffers_with_gain;
+	mix_buffers_no_gain   = x86_avx512f_mix_buffers_no_gain;
+	copy_vector           = x86_avx512f_copy_vector;
+
+	run (align_max, FLT_EPSILON);
+}
+
 void
 FPUTest::sseTest ()
 {
--- a/libs/ardour/wscript
+++ b/libs/ardour/wscript
@ -465,16 +465,19 @@ def build(bld):

    avx_sources = []
    fma_sources = []
+    avx512f_sources = []

    if Options.options.fpu_optimization:
        if (bld.env['build_target'] == 'i386' or bld.env['build_target'] == 'i686'):
            obj.source += [ 'sse_functions_xmm.cc', 'sse_functions.s', ]
            avx_sources = [ 'sse_functions_avx_linux.cc' ]
            fma_sources = [ 'x86_functions_fma.cc' ]
+            avx512f_sources = [ 'x86_functions_avx512f.cc' ]
        elif bld.env['build_target'] == 'x86_64':
            obj.source += [ 'sse_functions_xmm.cc', 'sse_functions_64bit.s', ]
            avx_sources = [ 'sse_functions_avx_linux.cc' ]
            fma_sources = [ 'x86_functions_fma.cc' ]
+            avx512f_sources = [ 'x86_functions_avx512f.cc' ]
        elif bld.env['build_target'] == 'mingw':
            # usability of the 64 bit windows assembler depends on the compiler target,
            # not the build host, which in turn can only be inferred from the name
@ -484,6 +487,7 @@ def build(bld):
                obj.source += [ 'sse_functions_64bit_win.s',  'sse_avx_functions_64bit_win.s' ]
                avx_sources = [ 'sse_functions_avx.cc' ]
                fma_sources = [ 'x86_functions_fma.cc' ]
+                avx512f_sources = [ 'x86_functions_avx512f.cc' ]
        elif bld.env['build_target'] == 'aarch64':
            obj.source += ['arm_neon_functions.cc']
            obj.defines += [ 'ARM_NEON_SUPPORT' ]
@ -537,6 +541,24 @@ def build(bld):
            obj.use += ['sse_fma_functions' ]
            obj.defines += [ 'FPU_AVX_FMA_SUPPORT' ]

+        if bld.is_defined('FPU_AVX512F_SUPPORT') and avx512f_sources:
+            avx512f_cxxflags = list(bld.env['CXXFLAGS'])
+            avx512f_cxxflags.append (bld.env['compiler_flags_dict']['avx512f'])
+            avx512f_cxxflags.append (bld.env['compiler_flags_dict']['avx'])
+            avx512f_cxxflags.append (bld.env['compiler_flags_dict']['pic'])
+            avx512f_cxxflags.append (bld.env['compiler_flags_dict']['fma'])
+
+            bld(features = 'cxx cxxstlib asm',
+                source   = avx512f_sources,
+                cxxflags = avx512f_cxxflags,
+                includes = [ '.' ],
+                use = [ 'libtemporal', 'libpbd', 'libevoral', 'liblua' ],
+                uselib = [ 'GLIBMM', 'XML' ],
+                target   = 'avx512f_functions')
+
+            obj.use += ['avx512f_functions' ]
+            obj.defines += [ 'FPU_AVX512F_SUPPORT' ]
+
    # i18n
    if bld.is_defined('ENABLE_NLS'):
        mo_files = bld.path.ant_glob('po/*.mo')
--- a/13
+++ b/13
@ -89,6 +89,8 @@ compiler_flags_dictionaries= {
        'attasm': '-masm=att',
        # Flags to make AVX instructions/intrinsics available
        'avx': '-mavx',
+        # Flags to make AVX512F instructions/intrinsics available
+        'avx512f': '-mavx512f',
        # Flags to make FMA instructions/intrinsics available
        'fma': '-mfma',
        # Flags to make ARM/NEON instructions/intrinsics available
@ -519,6 +521,16 @@ int main() { return 0; }''',
            if re.search ('x86_64-w64', str(conf.env['CC'])) is not None:
                conf.define ('FPU_AVX_FMA_SUPPORT', 1)
        elif conf.env['build_target'] == 'i386' or conf.env['build_target'] == 'i686' or conf.env['build_target'] == 'x86_64':
+            conf.check_cxx(fragment = "#include <immintrin.h>\nint main(void) { __m128 a; _mm_fmadd_ss(a, a, a); return 0; }\n",
+                           features  = ['cxx'],
+                           cxxflags  = [ conf.env['compiler_flags_dict']['avx512f'], conf.env['compiler_flags_dict']['fma'], conf.env['compiler_flags_dict']['avx'] ],
+                           mandatory = False,
+                           execute   = False,
+                           msg       = 'Checking compiler for AVX512F intrinsics',
+                           okmsg     = 'Found',
+                           errmsg    = 'Not supported',
+                           define_name = 'FPU_AVX512F_SUPPORT')
+
            conf.check_cxx(fragment = "#include <immintrin.h>\nint main(void) { __m128 a; _mm_fmadd_ss(a, a, a); return 0; }\n",
                           features  = ['cxx'],
                           cxxflags  = [ conf.env['compiler_flags_dict']['fma'], conf.env['compiler_flags_dict']['avx'] ],
@ -1528,6 +1540,7 @@ const char* const ardour_config_info = "\\n\\
    write_config_text('Dr. Mingw',             conf.is_defined('HAVE_DRMINGW'))
    write_config_text('FLAC',                  conf.is_defined('HAVE_FLAC'))
    write_config_text('FPU optimization',      opts.fpu_optimization)
+    write_config_text('FPU AVX512F support',   conf.is_defined('FPU_AVX512F_SUPPORT'))
    write_config_text('FPU AVX/FMA support',   conf.is_defined('FPU_AVX_FMA_SUPPORT'))
    write_config_text('Futex Semaphore',       conf.is_defined('USE_FUTEX_SEMAPHORE'))
    write_config_text('Freedesktop files',     opts.freedesktop)