diff --git a/libs/ardour/ardour/minibpm.h b/libs/ardour/ardour/minibpm.h new file mode 100644 index 0000000000..d7424c23e9 --- /dev/null +++ b/libs/ardour/ardour/minibpm.h @@ -0,0 +1,114 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + MiniBPM + A fixed-tempo BPM estimator for music audio + Copyright 2012 Particular Programs Ltd. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. + + Alternatively, if you have a valid commercial licence for MiniBPM + obtained by agreement with the copyright holders, you may + redistribute and/or modify it under the terms described in that + licence. + + If you wish to distribute code using MiniBPM under terms other + than those of the GNU General Public License, you must obtain a + valid commercial licence before doing so. +*/ + +#ifndef _BQ_MINI_BPM_H_ +#define _BQ_MINI_BPM_H_ + +#include + +namespace breakfastquay { + +/** + * A fixed-tempo BPM estimator, self-contained and implemented in a + * single C++ file. + * + * This may be used in two ways: either call estimateTempoOfSamples() + * with a single in-memory buffer of all audio samples, or (if the + * input data is streamed or cannot fit in memory) call process() + * repeatedly with consecutive sample blocks of any size, followed by + * estimateTempo() when all samples have been supplied. + * + * A single channel of audio only may be supplied (multi-channel is + * not supported). To process multi-channel audio, average the + * channels first. + */ +class MiniBPM +{ +public: + MiniBPM(float sampleRate); + ~MiniBPM(); + + /** + * Set the range of valid tempi. The default is 55-190bpm. + */ + void setBPMRange(double min, double max); + void getBPMRange(double &min, double &max) const; + + /** + * Set the number of beats per bar, if known. If unknown, leave at + * the default (which is 4). + */ + void setBeatsPerBar(int bpb); + int getBeatsPerBar() const; + + /** + * Return the estimated tempo in bpm of the music audio in the + * given sequence of samples. nsamples contains the number of + * samples. If the tempo cannot be estimated because the clip is + * too short, return 0. + * + * You should use either this function, or a series of process() + * calls followed by an estimateTempo() call. Do not call both + * process() and estimateTempoOfSamples() on the same estimator + * object. + */ + double estimateTempoOfSamples(const float *samples, int nsamples); + + /** + * Supply a single block of audio for processing. The block may be + * of any length. Blocks are assumed to be contiguous + * (i.e. without overlap). + */ + void process(const float *samples, int nsamples); + + /** + * Return the estimated tempo in bpm of the music audio in the + * sequence of samples previously supplied through a series of + * calls to process(). If the tempo cannot be estimated because + * the clip is too short, return 0. + */ + double estimateTempo(); + + /** + * Return all candidate tempi for the last tempo estimation that + * was carried out, in order of likelihood (best first). The value + * returned from estimateTempo or estimateTempoOfSamples will + * therefore be the first in the returned sequence. + */ + std::vector getTempoCandidates() const; + + /** + * Prepare the object to carry out another tempo estimation on a + * new audio clip. You can either call this between uses, or + * simply destroy this object and construct a new one. + */ + void reset(); + +private: + class D; + D *m_d; +}; + +} + +#endif diff --git a/libs/ardour/minibpm.cc b/libs/ardour/minibpm.cc new file mode 100644 index 0000000000..750de68e78 --- /dev/null +++ b/libs/ardour/minibpm.cc @@ -0,0 +1,668 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + MiniBPM + A fixed-tempo BPM estimator for music audio + Copyright 2012 Particular Programs Ltd. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. + + Alternatively, if you have a valid commercial licence for MiniBPM + obtained by agreement with the copyright holders, you may + redistribute and/or modify it under the terms described in that + licence. + + If you wish to distribute code using MiniBPM under terms other + than those of the GNU General Public License, you must obtain a + valid commercial licence before doing so. +*/ + +/* + * Method: + * + * - Take the audio as a sequence of overlapping time-domain + * frames. The frame size is chosen so that, following a Fourier + * transform, the frequency range up to about an octave above + * middle-C would take about half a dozen bins. This is a relatively + * short frame giving quite good time resolution. + * + * - For each frame, extract the low-frequency range into the + * frequency domain (up to a cutoff around 400-500 Hz) using a small + * filterbank. Also extract a single bin from a high frequency range + * (around 9K) for broadband noise, and calculate the overall RMS of + * the frame. (The low-frequency feature is the main contributor to + * tempo estimation, the other two are used as fallbacks if there is + * not enough low-frequency information.) Accumulate sequences of + * framewise spectral difference sums for the frequency domain + * information, and a sequence of the RMS values, across the + * duration of the audio. + * + * - When all audio has been processed, calculate an autocorrelation + * of each of the three features normalised to unity maximum, and + * calculate a weighted sum of the autocorrelations (discarding any + * phase difference between the three signals) with the + * low-frequency feature given the most weight. + * + * - Drag a comb filter across the subset of the summed + * autocorrelation sequence that corresponds to the plausible tempo + * range. Allocate to each lag a weighted sum of its value and those + * of elements around beats-per-bar multiples of its lag. + * + * - Apply a simplistic perceptual weighting filter to prefer tempi + * around 120-130bpm. + * + * - Find the peak of the resulting filtered autocorrelation and + * return its corresponding tempo. + */ + +#include "ardour/minibpm.h" + +#include +#include +#include +#include + +#ifdef __MSVC__ +#define R__ __restrict +#else +#ifdef __GNUC__ +#define R__ __restrict__ +#else +#define R__ +#endif +#endif + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +using std::vector; + +#include + +namespace breakfastquay { + +class Autocorrelation +{ +public: + Autocorrelation(int n, int m) : m_n(n), m_m(m) { } + + template + void acf(const T *R__ in, T *R__ out) const { + for (int i = 0; i < m_m; ++i) { + out[i] = 0.0; + for (int j = i; j < m_n; ++j) { + out[i] += in[j] * in[j - i]; + } + } + } + + template + void acfUnityNormalised(const T *R__ in, T *R__ out) const { + + acf(in, out); + + double max = 0.0; + for (int i = 0; i < m_m; ++i) { + out[i] /= m_n - i; + if (out[i] > max) max = out[i]; + } + if (max > 0.0) { + for (int i = 0; i < m_m; ++i) { + out[i] /= max; + } + } + } + + static int bpmToLag(double bpm, double hopsPerSec) { + return int((60.0 / bpm) * hopsPerSec + 0.5); + } + static double lagToBpm(double lag, double hopsPerSec) { + return (60.0 * hopsPerSec) / lag; + } + +private: + int m_n; + int m_m; +}; + +class FourierFilterbank +{ +public: + FourierFilterbank(int n, double fs, double minFreq, double maxFreq, + bool windowed) : + m_n(n), m_fs(fs), m_fmin(minFreq), m_fmax(maxFreq), + m_windowed(windowed) + { + m_binmin = int(floor(n * m_fmin) / fs); + m_binmax = int(ceil(n * m_fmax) / fs); + m_bins = m_binmax - m_binmin + 1; + initFilters(); + } + + ~FourierFilterbank() { + for (int i = 0; i < m_bins; ++i) { + delete[] m_sin[i]; + delete[] m_cos[i]; + } + delete[] m_sin; + delete[] m_cos; + } + + int getOutputSize() const { + return m_bins; + } + + void forwardMagnitude(const double *R__ realIn, double *R__ magOut) const { + for (int i = 0; i < m_bins; ++i) { + const double *R__ sin = m_sin[i]; + const double *R__ cos = m_cos[i]; + double real = 0.0, imag = 0.0; + for (int j = 0; j < m_n; ++j) real += realIn[j] * cos[j]; + for (int j = 0; j < m_n; ++j) imag += realIn[j] * sin[j]; + magOut[i] = sqrt(real*real + imag*imag); + } + } + +private: + int m_n; + double m_fs; + double m_fmin; + double m_fmax; + bool m_windowed; + int m_binmin; + int m_binmax; + int m_bins; + + double **m_sin; + double **m_cos; + + void initFilters() { + m_sin = new double*[m_bins]; + m_cos = new double*[m_bins]; + double twopi = M_PI * 2.0; + double win = 1.0; + for (int i = 0; i < m_bins; ++i) { + m_sin[i] = new double[m_n]; + m_cos[i] = new double[m_n]; + int bin = i + m_binmin; + double delta = (twopi * bin) / m_n; + for (int j = 0; j < m_n; ++j) { + double angle = j * delta; + if (m_windowed) win = 0.5 - 0.5 * cos(twopi * j / m_n); + m_sin[i][j] = sin(angle) * win; + m_cos[i][j] = cos(angle) * win; + } + } + } +}; + +class ACFCombFilter +{ +public: + ACFCombFilter(int beatsPerBar, int minlag, int maxlag, double hopsPerSec) : + m_beatsPerBar(beatsPerBar), + m_min(minlag), m_max(maxlag), + m_hopsPerSec(hopsPerSec) { } + ~ACFCombFilter() { } + + int getFilteredLength() const { + return m_max - m_min + 1; + } + + static void getContributingRange(int lag, int multiple, + int &base, int &count) { + if (multiple == 1) { + base = lag; + count = 1; + } else { + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ... + // 0 1 2 4 4 4 8 8 8 8 8 8 16 16 16 16 16 16 16 ... + base = (lag * multiple) - (multiple / 4); + count = (multiple / 4) + (multiple / 2); + } + } + + void filter(const double *acf, int acfLength, double *filtered) { + + int flen = getFilteredLength(); + + for (int i = 0; i < flen; ++i) { + + filtered[i] = 0.0; + + int lag = m_min + i; + int multiple = 1; + int n = 0; + + while (1) { + + int base, count; + getContributingRange(lag, multiple, base, count); + if (base + count > acfLength) break; + + double peak = 0.0; + for (int j = base; j < base + count; ++j) { + if (j == base || acf[j] > peak) { + peak = acf[j]; + } + } + filtered[i] += peak; + ++n; + + if (multiple == 1) multiple = m_beatsPerBar; + else multiple = multiple * 2; + } + + filtered[i] /= n; + } + } + + double refine(int lag, const double *acf, int acfLength) { + + int multiple = 1; + double interpolated = lag; + + double total = 0.0; + int n = 0; + + while (1) { + + int base, count; + getContributingRange(lag, multiple, base, count); + + if (base + count > acfLength) break; + + double peak = 0.0; + int peakidx = 0; + for (int j = base; j < base + count; ++j) { + if (acf[j] > peak) { + peak = acf[j]; + peakidx = j; + } + } + + if (peak > 0.0) { + double scaled = double(peakidx) / multiple; + total += scaled; + ++n; + } + + if (multiple == 1) multiple = m_beatsPerBar; + else multiple = multiple * 2; + } + + if (n > 0) { + interpolated = total / n; + } + + double bpm = Autocorrelation::lagToBpm(interpolated, m_hopsPerSec); + return bpm; + } + +private: + int m_beatsPerBar; + int m_min; + int m_max; + double m_hopsPerSec; +}; + +class MiniBPM::D +{ +public: + double m_minbpm; + double m_maxbpm; + int m_beatsPerBar; + + template + void copy(T *R__ t, const S *R__ s, const int n) { + for (int i = 0; i < n; ++i) t[i] = s[i]; + } + template + void zero(T *R__ t, const int n) { + for (int i = 0; i < n; ++i) t[i] = T(0); + } + template + void unityNormalise(T *R__ t, const int n) { + double max = 0.0, min = 0.0; + for (int i = 0; i < n; ++i) { + if (i == 0 || t[i] > max) max = t[i]; + if (i == 0 || t[i] < min) min = t[i]; + } + if (max > min) { + for (int i = 0; i < n; ++i) { + t[i] = (t[i] - min) / (max - min); + } + } + } + + D(float sampleRate) : + m_minbpm(55), + m_maxbpm(190), + m_beatsPerBar(4), + m_inputSampleRate(sampleRate), + m_lfmin(0), + m_lfmax(550), + m_hfmin(9000), + m_hfmax(9001), + m_input(0), + m_partial(0), + m_partialFill(0), + m_frame(0), + m_lfprev(0), + m_hfprev(0) + { + int lfbinmax = 6; + m_blockSize = (m_inputSampleRate * lfbinmax) / m_lfmax; + m_stepSize = m_blockSize / 2; + + m_lf = new FourierFilterbank(m_blockSize, m_inputSampleRate, + m_lfmin, m_lfmax, true); + + m_hf = new FourierFilterbank(m_blockSize, m_inputSampleRate, + m_hfmin, m_hfmax, true); + + int lfsize = m_lf->getOutputSize(); + int hfsize = m_hf->getOutputSize(); + + m_lfprev = new double[lfsize]; + for (int i = 0; i < lfsize; ++i) m_lfprev[i] = 0.0; + + m_hfprev = new double[hfsize]; + for (int i = 0; i < hfsize; ++i) m_hfprev[i] = 0.0; + + m_input = new double[m_blockSize]; + m_partial = new double[m_stepSize]; + + int frameSize = std::max(lfsize, hfsize); + m_frame = new double[frameSize]; + + zero(m_input, m_blockSize); + zero(m_partial, m_stepSize); + zero(m_frame, frameSize); + } + + ~D() + { + delete m_lf; + delete m_hf; + delete[] m_lfprev; + delete[] m_hfprev; + delete[] m_input; + delete[] m_partial; + delete[] m_frame; + } + + double + specdiff(const double *a, const double *b, int n) + { + double tot = 0.0; + for (int i = 0; i < n; ++i) { + tot += sqrt(fabs(a[i]*a[i] - b[i]*b[i])); + } + return tot; + } + + double estimateTempoOfSamples(const float *samples, int nsamples) + { + int i = 0; + while (i + m_blockSize < nsamples) { + copy(m_input, samples + i, m_blockSize); + processInputBlock(); + i += m_stepSize; + } + return finish(); + } + + void process(const float *samples, int nsamples) + { + int n = 0; + while (n < nsamples) { + int hole = m_blockSize - m_stepSize; + int remaining = nsamples - n; + if (m_partialFill + remaining < m_stepSize) { + copy(m_partial + m_partialFill, samples + n, remaining); + m_partialFill += remaining; + break; + } + copy(m_input + hole, m_partial, m_partialFill); + int toConsume = m_stepSize - m_partialFill; + copy(m_input + hole + m_partialFill, samples + n, toConsume); + n += toConsume; + m_partialFill = 0; + processInputBlock(); + copy(m_input, m_input + m_stepSize, hole); + } + } + + double estimateTempo() + { + if (m_partialFill > 0) { + int hole = m_blockSize - m_stepSize; + copy(m_input + hole, m_partial, m_partialFill); + zero(m_input + hole + m_partialFill, m_stepSize - m_partialFill); + m_partialFill = 0; + processInputBlock(); + } + return finish(); + } + + std::vector getTempoCandidates() const + { + return m_candidates; + } + + void reset() + { + m_lfdf.clear(); + m_hfdf.clear(); + m_rms.clear(); + m_partialFill = 0; + } + + void processInputBlock() + { + double rms = 0.0; + + for (int i = 0; i < m_blockSize; ++i) { + rms += m_input[i] * m_input[i]; + } + + rms = sqrt(rms / m_blockSize); + m_rms.push_back(rms); + + int lfsize = m_lf->getOutputSize(); + int hfsize = m_hf->getOutputSize(); + + m_lf->forwardMagnitude(m_input, m_frame); + m_lfdf.push_back(specdiff(m_frame, m_lfprev, lfsize)); + copy(m_lfprev, m_frame, lfsize); + + m_hf->forwardMagnitude(m_input, m_frame); + m_hfdf.push_back(specdiff(m_frame, m_hfprev, hfsize)); + copy(m_hfprev, m_frame, hfsize); + } + + double finish() + { + m_candidates.clear(); + + double hopsPerSec = m_inputSampleRate / m_stepSize; + int dfLength = m_lfdf.size(); + + // We have no use for any lag beyond 4 bars at minimum bpm + double barPM = m_minbpm / (4 * m_beatsPerBar); + int acfLength = Autocorrelation::bpmToLag(barPM, hopsPerSec); + while (acfLength > dfLength) acfLength /= 2; + + Autocorrelation acfcalc(dfLength, acfLength); + + double *acf = new double[acfLength]; + double *temp = new double[acfLength]; + + zero(acf, acfLength); + + acfcalc.acfUnityNormalised(m_lfdf.data(), temp); + for (int i = 0; i < acfLength; ++i) acf[i] += temp[i]; + + acfcalc.acfUnityNormalised(m_hfdf.data(), temp); + for (int i = 0; i < acfLength; ++i) acf[i] += temp[i] * 0.5; + + acfcalc.acfUnityNormalised(m_rms.data(), temp); + for (int i = 0; i < acfLength; ++i) acf[i] += temp[i] * 0.1; + + int minlag = Autocorrelation::bpmToLag(m_maxbpm, hopsPerSec); + int maxlag = Autocorrelation::bpmToLag(m_minbpm, hopsPerSec); + + if (acfLength < maxlag) { + // Not enough data + return 0.0; + } + + ACFCombFilter filter(m_beatsPerBar, minlag, maxlag, hopsPerSec); + int cflen = filter.getFilteredLength(); + double *cf = new double[cflen]; + filter.filter(acf, acfLength, cf); + unityNormalise(cf, cflen); + + for (int i = 0; i < cflen; ++i) { + // perceptual weighting: prefer middling values + double bpm = Autocorrelation::lagToBpm(minlag + i, hopsPerSec); + double weight; + double centre = 130.0; + if (bpm < centre) { + weight = 1.0 - pow(fabs(centre - bpm) / 100.0, 2.4); + } else { + weight = 1.0 - pow(fabs(centre - bpm) / 80.0, 2.4); + } + if (weight < 0.0) weight = 0.0; + cf[i] *= weight; + } + + std::multimap candidateMap; + for (int i = 1; i + 1 < cflen; ++i) { + if (cf[i] > cf[i-1] && cf[i] > cf[i+1]) { + candidateMap.insert(std::pair(cf[i], i)); + } + } + + if (candidateMap.empty()) { + return 0.0; + } + + std::multimap::const_iterator ci(candidateMap.end()); + while (ci != candidateMap.begin()) { + --ci; + int lag = ci->second + minlag; + double bpm = filter.refine(lag, acf, acfLength); + m_candidates.push_back(bpm); + } + + delete[] cf; + delete[] acf; + delete[] temp; + + return m_candidates[0]; + } + + +private: + float m_inputSampleRate; + int m_blockSize; + int m_stepSize; + int m_lfmin; + int m_lfmax; + int m_hfmin; + int m_hfmax; + + std::vector m_lfdf; + std::vector m_hfdf; + std::vector m_rms; + + std::vector m_candidates; + + FourierFilterbank *m_lf; + FourierFilterbank *m_hf; + + double *m_input; + double *m_partial; + int m_partialFill; + + double *m_frame; + double *m_lfprev; + double *m_hfprev; +}; + +MiniBPM::MiniBPM(float sampleRate) : + m_d(new D(sampleRate)) +{ +} + +MiniBPM::~MiniBPM() +{ + delete m_d; +} + +void +MiniBPM::setBPMRange(double min, double max) +{ + m_d->m_minbpm = min; + m_d->m_maxbpm = max; +} + +void +MiniBPM::getBPMRange(double &min, double &max) const +{ + min = m_d->m_minbpm; + max = m_d->m_maxbpm; +} + +void +MiniBPM::setBeatsPerBar(int bpb) +{ + m_d->m_beatsPerBar = bpb; +} + +int +MiniBPM::getBeatsPerBar() const +{ + return m_d->m_beatsPerBar; +} + +double +MiniBPM::estimateTempoOfSamples(const float *samples, int nsamples) +{ + return m_d->estimateTempoOfSamples(samples, nsamples); +} + +void +MiniBPM::process(const float *samples, int nsamples) +{ + m_d->process(samples, nsamples); +} + +double +MiniBPM::estimateTempo() +{ + return m_d->estimateTempo(); +} + +std::vector +MiniBPM::getTempoCandidates() const +{ + return m_d->getTempoCandidates(); +} + +void +MiniBPM::reset() +{ + m_d->reset(); +} + +} + + diff --git a/libs/ardour/triggerbox.cc b/libs/ardour/triggerbox.cc index 44762d0b20..9dea409201 100644 --- a/libs/ardour/triggerbox.cc +++ b/libs/ardour/triggerbox.cc @@ -16,6 +16,7 @@ #include "ardour/audio_buffer.h" #include "ardour/debug.h" #include "ardour/midi_buffer.h" +#include "ardour/minibpm.h" #include "ardour/region_factory.h" #include "ardour/session.h" #include "ardour/session_object.h" @@ -485,6 +486,7 @@ AudioTrigger::set_start (timepos_t const & s) void AudioTrigger::set_end (timepos_t const & e) { + assert (!data.empty()); set_length (timecnt_t (e.samples() - _start_offset, timepos_t (_start_offset))); } @@ -518,10 +520,6 @@ AudioTrigger::set_length (timecnt_t const & newlen) boost::shared_ptr ar (boost::dynamic_pointer_cast (_region)); - /* load raw data */ - - load_data (ar); - if (newlen == _region->length()) { /* no stretch required */ return; @@ -540,7 +538,7 @@ AudioTrigger::set_length (timecnt_t const & newlen) if (newlen.time_domain() == AudioTime) { _stretch = (double) newlen.samples() / data_length; - cerr << "gonna stretch, ratio is " << _stretch << endl; + cerr << "gonna stretch, ratio is " << _stretch << " from " << newlen.samples() << " vs. " << data_length << endl; } else { /* XXX what to use for position ??? */ timecnt_t l (newlen, timepos_t (AudioTime)); @@ -696,6 +694,10 @@ AudioTrigger::set_region (boost::shared_ptr r) set_region_internal (r); + /* load raw data */ + + load_data (ar); + /* now potentially stretch it to match our tempo. * * We do not handle tempo changes at present, and should probably issue @@ -703,9 +705,25 @@ AudioTrigger::set_region (boost::shared_ptr r) */ TempoMap::SharedPtr tm (TempoMap::use()); - timecnt_t two_bars = tm->bbt_duration_at (timepos_t (AudioTime), TriggerBox::assumed_trigger_duration()); + TempoMetric const & metric (tm->metric_at (timepos_t (AudioTime))); - set_length (two_bars); + double barcnt; + + { + breakfastquay::MiniBPM mbpm (_box.session().sample_rate()); + + mbpm.setBPMRange (metric.tempo().quarter_notes_per_minute () * 0.75, metric.tempo().quarter_notes_per_minute() * 1.5); + double bpm = mbpm.estimateTempoOfSamples (data[0], data_length); + const double seconds = (double) data_length / _box.session().sample_rate(); + const double quarters = (seconds / 60.) * bpm; + barcnt = quarters / metric.meter().divisions_per_bar(); + } + + /* use initial tempo in map (assumed for now to be the only one */ + + samplecnt_t one_bar = tm->bbt_duration_at (timepos_t (AudioTime), BBT_Offset (1, 0, 0)).samples(); + + set_length (timecnt_t ((samplecnt_t) round (barcnt) * one_bar)); PropertyChanged (ARDOUR::Properties::name); diff --git a/libs/ardour/wscript b/libs/ardour/wscript index 0bfcc31b57..4be9caae82 100644 --- a/libs/ardour/wscript +++ b/libs/ardour/wscript @@ -147,7 +147,8 @@ libardour_sources = [ 'midi_ui.cc', 'mididm.cc', 'midiport_manager.cc', - 'mix.cc', + 'minibpm.cc', + 'mix.cc', 'mode.cc', 'monitor_control.cc', 'monitor_port.cc',