Parallelize Disk I/O and RegionFx processing

This commit is contained in:
Robin Gareus 2024-04-30 03:46:39 +02:00
parent 4b0da72bc2
commit 5b9e4fff63
Signed by: rgareus
GPG Key ID: A090BCE02CF57F04
15 changed files with 189 additions and 20 deletions

View File

@ -58,6 +58,7 @@ namespace PBD {
LIBARDOUR_API extern DebugBits FaderPort;
LIBARDOUR_API extern DebugBits GenericMidi;
LIBARDOUR_API extern DebugBits Graph;
LIBARDOUR_API extern DebugBits IOTaskList;
LIBARDOUR_API extern DebugBits LTC;
LIBARDOUR_API extern DebugBits LV2;
LIBARDOUR_API extern DebugBits LV2Automate;

View File

@ -229,9 +229,9 @@ private:
int channel,
bool reversed);
static Sample* _sum_buffer;
static Sample* _mixdown_buffer;
static gain_t* _gain_buffer;
static thread_local Sample* _sum_buffer;
static thread_local Sample* _mixdown_buffer;
static thread_local gain_t* _gain_buffer;
int refill (Sample* sum_buffer, Sample* mixdown_buffer, float* gain_buffer, samplecnt_t fill_level, bool reversed);
int refill_audio (Sample* sum_buffer, Sample* mixdown_buffer, float* gain_buffer, samplecnt_t fill_level, bool reversed);

View File

@ -19,8 +19,12 @@
#ifndef _ardour_io_tasklist_h_
#define _ardour_io_tasklist_h_
#include <atomic>
#include <boost/function.hpp>
#include <vector>
#include <glibmm/threads.h>
#include "pbd/semutils.h"
#include "ardour/libardour_visibility.h"
@ -30,7 +34,7 @@ namespace ARDOUR
class LIBARDOUR_API IOTaskList
{
public:
IOTaskList ();
IOTaskList (uint32_t);
~IOTaskList ();
/** process tasks in list in parallel, wait for them to complete */
@ -38,9 +42,19 @@ public:
void push_back (boost::function<void ()> fn);
private:
static void* _worker_thread (void*);
void io_thread ();
std::vector<boost::function<void ()>> _tasks;
size_t _n_threads;
uint32_t _n_threads;
std::atomic<uint32_t> _n_workers;
std::vector<pthread_t> _workers;
std::atomic <bool> _terminate;
PBD::Semaphore _exec_sem;
PBD::Semaphore _idle_sem;
Glib::Threads::Mutex _tasks_mutex;
};
} // namespace ARDOUR

View File

@ -221,6 +221,7 @@ CONFIG_VARIABLE (std::string, sample_lib_path, "sample-lib-path", "") /* custom
CONFIG_VARIABLE (bool, allow_special_bus_removal, "allow-special-bus-removal", false)
CONFIG_VARIABLE (int32_t, processor_usage, "processor-usage", -1)
CONFIG_VARIABLE (int32_t, cpu_dma_latency, "cpu-dma-latency", -1) /* >=0 to enable */
CONFIG_VARIABLE (int32_t, io_thread_count, "io-thread-count", -2)
CONFIG_VARIABLE (gain_t, max_gain, "max-gain", 2.0) /* +6.0dB */
CONFIG_VARIABLE (uint32_t, max_recent_sessions, "max-recent-sessions", 10)
CONFIG_VARIABLE (uint32_t, max_recent_templates, "max-recent-templates", 10)

View File

@ -109,6 +109,7 @@ LIBARDOUR_API const char* native_header_format_extension (ARDOUR::HeaderFormat,
LIBARDOUR_API bool matching_unsuffixed_filename_exists_in (const std::string& dir, const std::string& name);
LIBARDOUR_API uint32_t how_many_dsp_threads ();
LIBARDOUR_API uint32_t how_many_io_threads ();
LIBARDOUR_API std::string compute_sha1_of_file (std::string path);

View File

@ -172,9 +172,14 @@ Butler::_thread_work (void* arg)
{
SessionEvent::create_per_thread_pool ("butler events", 4096);
pthread_set_name (X_("butler"));
/* get thread buffers for RegionFx */
ARDOUR::ProcessThread* pt = new ProcessThread ();
pt->get_buffers ();
DiskReader::allocate_working_buffers ();
void* rv = ((Butler*)arg)->thread_work ();
DiskReader::free_working_buffers ();
pt->drop_buffers ();
delete pt;
return rv;

View File

@ -53,6 +53,7 @@ PBD::DebugBits PBD::DEBUG::FaderPort = PBD::new_debug_bit ("faderport");
PBD::DebugBits PBD::DEBUG::FaderPort8 = PBD::new_debug_bit ("faderport8");
PBD::DebugBits PBD::DEBUG::GenericMidi = PBD::new_debug_bit ("genericmidi");
PBD::DebugBits PBD::DEBUG::Graph = PBD::new_debug_bit ("graph");
PBD::DebugBits PBD::DEBUG::IOTaskList = PBD::new_debug_bit ("iotasklist");
PBD::DebugBits PBD::DEBUG::LTC = PBD::new_debug_bit ("ltc");
PBD::DebugBits PBD::DEBUG::LV2 = PBD::new_debug_bit ("lv2");
PBD::DebugBits PBD::DEBUG::LV2Automate = PBD::new_debug_bit ("lv2automate");

View File

@ -49,9 +49,9 @@ using namespace std;
ARDOUR::samplecnt_t DiskReader::_chunk_samples = default_chunk_samples ();
PBD::Signal0<void> DiskReader::Underrun;
Sample* DiskReader::_sum_buffer = 0;
Sample* DiskReader::_mixdown_buffer = 0;
gain_t* DiskReader::_gain_buffer = 0;
thread_local Sample* DiskReader::_sum_buffer = 0;
thread_local Sample* DiskReader::_mixdown_buffer = 0;
thread_local gain_t* DiskReader::_gain_buffer = 0;
std::atomic<int> DiskReader::_no_disk_output (0);
DiskReader::Declicker DiskReader::loop_declick_in;
DiskReader::Declicker DiskReader::loop_declick_out;

View File

@ -734,11 +734,14 @@ ARDOUR::init (bool try_optimization, const char* localedir, bool with_gui)
* each cycle). Session Export uses one, and the GUI requires
* buffers (for plugin-analysis, auditioner updates) but not
* concurrently.
* Last but not least, the butler needs one for RegionFX.
*
* In theory (hw + 4) should be sufficient, let's add one for luck.
* Last but not least, the butler needs one for RegionFX for
* each I/O thread (up to hardware_concurrency) and one for itself
* (butler's main thread).
*
* In theory (2 * hw + 4) should be sufficient, let's add one for luck.
*/
BufferManager::init (hardware_concurrency () + 5);
BufferManager::init (hardware_concurrency () * 2 + 5);
PannerManager::instance ().discover_panners ();

View File

@ -16,19 +16,72 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifdef HAVE_IOPRIO
#include <sys/syscall.h>
#endif
#include "pbd/compose.h"
#include "pbd/cpus.h"
#include "pbd/debug.h"
#include "pbd/failed_constructor.h"
#include "pbd/pthread_utils.h"
#include "temporal/tempo.h"
#include "ardour/debug.h"
#include "ardour/disk_reader.h"
#include "ardour/io_tasklist.h"
#include "ardour/process_thread.h"
#include "ardour/session_event.h"
using namespace ARDOUR;
IOTaskList::IOTaskList ()
: _n_threads (0)
IOTaskList::IOTaskList (uint32_t n_threads)
: _n_threads (n_threads)
, _terminate (false)
, _exec_sem ("io thread exec", 0)
, _idle_sem ("io thread idle", 0)
{
assert (n_threads <= hardware_concurrency ());
if (n_threads < 2) {
return;
}
pthread_attr_t attr;
struct sched_param parm;
parm.sched_priority = pbd_absolute_rt_priority (SCHED_RR, pbd_pthread_priority (THREAD_IO));
pthread_attr_init (&attr);
#ifdef PLATFORM_WINDOWS
pthread_attr_setschedpolicy (&attr, SCHED_OTHER);
#else
pthread_attr_setschedpolicy (&attr, SCHED_RR);
#endif
pthread_attr_setschedparam (&attr, &parm);
pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
pthread_attr_setinheritsched (&attr, PTHREAD_EXPLICIT_SCHED);
DEBUG_TRACE (PBD::DEBUG::IOTaskList, string_compose ("IOTaskList starting %1 threads with priority = %2\n", _n_threads, parm.sched_priority));
_workers.resize (_n_threads);
for (uint32_t i = 0; i < _n_threads; ++i) {
if (pthread_create (&_workers[i], &attr, &_worker_thread, this)) {
throw failed_constructor ();
}
}
pthread_attr_destroy (&attr);
}
IOTaskList::~IOTaskList ()
{
_terminate.store (true);
for (size_t i = 0; i < _workers.size (); ++i) {
_exec_sem.signal ();
}
for (auto const& t : _workers) {
pthread_join (t, NULL);
}
}
void
@ -41,13 +94,78 @@ void
IOTaskList::process ()
{
assert (strcmp (pthread_name (), "butler") == 0);
//std::cout << "IOTaskList::process " << pthread_name () << " " << _tasks.size () << "\n";
if (_n_threads > 1 && _tasks.size () > 2) {
// TODO
uint32_t wakeup = std::min<uint32_t> (_n_threads, _tasks.size ());
DEBUG_TRACE (PBD::DEBUG::IOTaskList, string_compose ("IOTaskList process wakeup %1 thread for %2 tasks.\n", wakeup, _tasks.size ()))
for (uint32_t i = 0; i < wakeup; ++i) {
_exec_sem.signal ();
}
for (uint32_t i = 0; i < wakeup; ++i) {
_idle_sem.wait ();
}
} else {
DEBUG_TRACE (PBD::DEBUG::IOTaskList, string_compose ("IOTaskList process %1 task(s) in main thread.\n", _tasks.size ()))
for (auto const& fn : _tasks) {
fn ();
}
}
_tasks.clear ();
}
void*
IOTaskList::_worker_thread (void* me)
{
IOTaskList* self = static_cast<IOTaskList*> (me);
uint32_t id = self->_n_workers.fetch_add (1);
char name[64];
snprintf (name, 64, "IO-%u-%p", id, (void*)DEBUG_THREAD_SELF);
pthread_set_name (name);
SessionEvent::create_per_thread_pool (name, 64);
PBD::notify_event_loops_about_thread_creation (pthread_self (), name, 64);
DiskReader::allocate_working_buffers ();
ARDOUR::ProcessThread* pt = new ProcessThread ();
pt->get_buffers ();
#ifdef HAVE_IOPRIO
/* compare to Butler::_thread_work */
// ioprio_set (IOPRIO_WHO_PROCESS, 0 /*calling thread*/, IOPRIO_PRIO_VALUE (IOPRIO_CLASS_RT, 4))
syscall (SYS_ioprio_set, 1, 0, (1 << 13) | 4);
#endif
self->io_thread ();
DiskReader::free_working_buffers ();
pt->drop_buffers ();
delete pt;
return 0;
}
void
IOTaskList::io_thread ()
{
while (1) {
_exec_sem.wait ();
if (_terminate.load ()) {
break;
}
Temporal::TempoMap::fetch ();
while (1) {
boost::function<void()> fn;
Glib::Threads::Mutex::Lock lm (_tasks_mutex);
if (_tasks.empty ()) {
break;
}
fn = _tasks.back ();
_tasks.pop_back ();
lm.release ();
fn ();
}
_idle_sem.signal ();
}
}

View File

@ -590,7 +590,8 @@ Session::immediately_post_engine ()
_process_graph.reset (new Graph (*this));
_rt_tasklist.reset (new RTTaskList (_process_graph));
_io_tasklist.reset (new IOTaskList ());
_io_tasklist.reset (new IOTaskList (how_many_io_threads ()));
/* every time we reconnect, recompute worst case output latencies */
@ -775,8 +776,6 @@ Session::destroy ()
_bundles.flush ();
_io_plugins.flush ();
DiskReader::free_working_buffers();
/* tell everyone who is still standing that we're about to die */
drop_references ();

View File

@ -272,7 +272,6 @@ Session::post_engine_init ()
_engine.GraphReordered.connect_same_thread (*this, boost::bind (&Session::graph_reordered, this, true));
_engine.MidiSelectionPortsChanged.connect_same_thread (*this, boost::bind (&Session::rewire_midi_selection_ports, this));
DiskReader::allocate_working_buffers();
refresh_disk_space ();
/* we're finally ready to call set_state() ... all objects have

View File

@ -696,6 +696,25 @@ ARDOUR::how_many_dsp_threads ()
return num_threads;
}
uint32_t
ARDOUR::how_many_io_threads ()
{
int num_cpu = hardware_concurrency();
int pu = Config->get_io_thread_count ();
uint32_t num_threads = max (num_cpu - 2, 2);
if (pu < 0) {
if (-pu < num_cpu) {
num_threads = num_cpu + pu;
}
} else if (pu == 0) {
num_threads = num_cpu;
} else {
num_threads = min (num_cpu, pu);
}
return num_threads;
}
double
ARDOUR::gain_to_slider_position_with_max (double g, double max_gain)
{

View File

@ -68,7 +68,8 @@ LIBPBD_API void pthread_set_name (const char* name);
enum PBDThreadClass {
THREAD_MAIN, // main audio I/O thread
THREAD_MIDI, // MIDI I/O threads
THREAD_PROC // realtime worker
THREAD_PROC, // realtime worker
THREAD_IO // non-realtime I/O
};
LIBPBD_API int pbd_pthread_priority (PBDThreadClass);

View File

@ -280,6 +280,11 @@ pbd_pthread_priority (PBDThreadClass which)
default:
case THREAD_PROC:
return -2;
case THREAD_IO:
/* https://github.com/mingw-w64/mingw-w64/blob/master/mingw-w64-libraries/winpthreads/src/sched.c
* -> THREAD_PRIORITY_HIGHEST
*/
return -13;
}
#else
int base = -20;
@ -299,6 +304,8 @@ pbd_pthread_priority (PBDThreadClass which)
default:
case THREAD_PROC:
return base - 2;
case THREAD_IO:
return base - 10;
}
#endif
}