13
0

triggerbox: add Chris Cannam's minibpm and use to determine tempo and thus stretch

This commit is contained in:
Paul Davis 2021-10-06 15:20:52 -06:00
parent 1c4e1d01a7
commit d4d4298320
4 changed files with 809 additions and 8 deletions

View File

@ -0,0 +1,114 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
MiniBPM
A fixed-tempo BPM estimator for music audio
Copyright 2012 Particular Programs Ltd.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
Alternatively, if you have a valid commercial licence for MiniBPM
obtained by agreement with the copyright holders, you may
redistribute and/or modify it under the terms described in that
licence.
If you wish to distribute code using MiniBPM under terms other
than those of the GNU General Public License, you must obtain a
valid commercial licence before doing so.
*/
#ifndef _BQ_MINI_BPM_H_
#define _BQ_MINI_BPM_H_
#include <vector>
namespace breakfastquay {
/**
* A fixed-tempo BPM estimator, self-contained and implemented in a
* single C++ file.
*
* This may be used in two ways: either call estimateTempoOfSamples()
* with a single in-memory buffer of all audio samples, or (if the
* input data is streamed or cannot fit in memory) call process()
* repeatedly with consecutive sample blocks of any size, followed by
* estimateTempo() when all samples have been supplied.
*
* A single channel of audio only may be supplied (multi-channel is
* not supported). To process multi-channel audio, average the
* channels first.
*/
class MiniBPM
{
public:
MiniBPM(float sampleRate);
~MiniBPM();
/**
* Set the range of valid tempi. The default is 55-190bpm.
*/
void setBPMRange(double min, double max);
void getBPMRange(double &min, double &max) const;
/**
* Set the number of beats per bar, if known. If unknown, leave at
* the default (which is 4).
*/
void setBeatsPerBar(int bpb);
int getBeatsPerBar() const;
/**
* Return the estimated tempo in bpm of the music audio in the
* given sequence of samples. nsamples contains the number of
* samples. If the tempo cannot be estimated because the clip is
* too short, return 0.
*
* You should use either this function, or a series of process()
* calls followed by an estimateTempo() call. Do not call both
* process() and estimateTempoOfSamples() on the same estimator
* object.
*/
double estimateTempoOfSamples(const float *samples, int nsamples);
/**
* Supply a single block of audio for processing. The block may be
* of any length. Blocks are assumed to be contiguous
* (i.e. without overlap).
*/
void process(const float *samples, int nsamples);
/**
* Return the estimated tempo in bpm of the music audio in the
* sequence of samples previously supplied through a series of
* calls to process(). If the tempo cannot be estimated because
* the clip is too short, return 0.
*/
double estimateTempo();
/**
* Return all candidate tempi for the last tempo estimation that
* was carried out, in order of likelihood (best first). The value
* returned from estimateTempo or estimateTempoOfSamples will
* therefore be the first in the returned sequence.
*/
std::vector<double> getTempoCandidates() const;
/**
* Prepare the object to carry out another tempo estimation on a
* new audio clip. You can either call this between uses, or
* simply destroy this object and construct a new one.
*/
void reset();
private:
class D;
D *m_d;
};
}
#endif

668
libs/ardour/minibpm.cc Normal file
View File

@ -0,0 +1,668 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
MiniBPM
A fixed-tempo BPM estimator for music audio
Copyright 2012 Particular Programs Ltd.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
Alternatively, if you have a valid commercial licence for MiniBPM
obtained by agreement with the copyright holders, you may
redistribute and/or modify it under the terms described in that
licence.
If you wish to distribute code using MiniBPM under terms other
than those of the GNU General Public License, you must obtain a
valid commercial licence before doing so.
*/
/*
* Method:
*
* - Take the audio as a sequence of overlapping time-domain
* frames. The frame size is chosen so that, following a Fourier
* transform, the frequency range up to about an octave above
* middle-C would take about half a dozen bins. This is a relatively
* short frame giving quite good time resolution.
*
* - For each frame, extract the low-frequency range into the
* frequency domain (up to a cutoff around 400-500 Hz) using a small
* filterbank. Also extract a single bin from a high frequency range
* (around 9K) for broadband noise, and calculate the overall RMS of
* the frame. (The low-frequency feature is the main contributor to
* tempo estimation, the other two are used as fallbacks if there is
* not enough low-frequency information.) Accumulate sequences of
* framewise spectral difference sums for the frequency domain
* information, and a sequence of the RMS values, across the
* duration of the audio.
*
* - When all audio has been processed, calculate an autocorrelation
* of each of the three features normalised to unity maximum, and
* calculate a weighted sum of the autocorrelations (discarding any
* phase difference between the three signals) with the
* low-frequency feature given the most weight.
*
* - Drag a comb filter across the subset of the summed
* autocorrelation sequence that corresponds to the plausible tempo
* range. Allocate to each lag a weighted sum of its value and those
* of elements around beats-per-bar multiples of its lag.
*
* - Apply a simplistic perceptual weighting filter to prefer tempi
* around 120-130bpm.
*
* - Find the peak of the resulting filtered autocorrelation and
* return its corresponding tempo.
*/
#include "ardour/minibpm.h"
#include <vector>
#include <map>
#include <utility>
#include <cmath>
#ifdef __MSVC__
#define R__ __restrict
#else
#ifdef __GNUC__
#define R__ __restrict__
#else
#define R__
#endif
#endif
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
using std::vector;
#include <iostream>
namespace breakfastquay {
class Autocorrelation
{
public:
Autocorrelation(int n, int m) : m_n(n), m_m(m) { }
template <typename T>
void acf(const T *R__ in, T *R__ out) const {
for (int i = 0; i < m_m; ++i) {
out[i] = 0.0;
for (int j = i; j < m_n; ++j) {
out[i] += in[j] * in[j - i];
}
}
}
template <typename T>
void acfUnityNormalised(const T *R__ in, T *R__ out) const {
acf(in, out);
double max = 0.0;
for (int i = 0; i < m_m; ++i) {
out[i] /= m_n - i;
if (out[i] > max) max = out[i];
}
if (max > 0.0) {
for (int i = 0; i < m_m; ++i) {
out[i] /= max;
}
}
}
static int bpmToLag(double bpm, double hopsPerSec) {
return int((60.0 / bpm) * hopsPerSec + 0.5);
}
static double lagToBpm(double lag, double hopsPerSec) {
return (60.0 * hopsPerSec) / lag;
}
private:
int m_n;
int m_m;
};
class FourierFilterbank
{
public:
FourierFilterbank(int n, double fs, double minFreq, double maxFreq,
bool windowed) :
m_n(n), m_fs(fs), m_fmin(minFreq), m_fmax(maxFreq),
m_windowed(windowed)
{
m_binmin = int(floor(n * m_fmin) / fs);
m_binmax = int(ceil(n * m_fmax) / fs);
m_bins = m_binmax - m_binmin + 1;
initFilters();
}
~FourierFilterbank() {
for (int i = 0; i < m_bins; ++i) {
delete[] m_sin[i];
delete[] m_cos[i];
}
delete[] m_sin;
delete[] m_cos;
}
int getOutputSize() const {
return m_bins;
}
void forwardMagnitude(const double *R__ realIn, double *R__ magOut) const {
for (int i = 0; i < m_bins; ++i) {
const double *R__ sin = m_sin[i];
const double *R__ cos = m_cos[i];
double real = 0.0, imag = 0.0;
for (int j = 0; j < m_n; ++j) real += realIn[j] * cos[j];
for (int j = 0; j < m_n; ++j) imag += realIn[j] * sin[j];
magOut[i] = sqrt(real*real + imag*imag);
}
}
private:
int m_n;
double m_fs;
double m_fmin;
double m_fmax;
bool m_windowed;
int m_binmin;
int m_binmax;
int m_bins;
double **m_sin;
double **m_cos;
void initFilters() {
m_sin = new double*[m_bins];
m_cos = new double*[m_bins];
double twopi = M_PI * 2.0;
double win = 1.0;
for (int i = 0; i < m_bins; ++i) {
m_sin[i] = new double[m_n];
m_cos[i] = new double[m_n];
int bin = i + m_binmin;
double delta = (twopi * bin) / m_n;
for (int j = 0; j < m_n; ++j) {
double angle = j * delta;
if (m_windowed) win = 0.5 - 0.5 * cos(twopi * j / m_n);
m_sin[i][j] = sin(angle) * win;
m_cos[i][j] = cos(angle) * win;
}
}
}
};
class ACFCombFilter
{
public:
ACFCombFilter(int beatsPerBar, int minlag, int maxlag, double hopsPerSec) :
m_beatsPerBar(beatsPerBar),
m_min(minlag), m_max(maxlag),
m_hopsPerSec(hopsPerSec) { }
~ACFCombFilter() { }
int getFilteredLength() const {
return m_max - m_min + 1;
}
static void getContributingRange(int lag, int multiple,
int &base, int &count) {
if (multiple == 1) {
base = lag;
count = 1;
} else {
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ...
// 0 1 2 4 4 4 8 8 8 8 8 8 16 16 16 16 16 16 16 ...
base = (lag * multiple) - (multiple / 4);
count = (multiple / 4) + (multiple / 2);
}
}
void filter(const double *acf, int acfLength, double *filtered) {
int flen = getFilteredLength();
for (int i = 0; i < flen; ++i) {
filtered[i] = 0.0;
int lag = m_min + i;
int multiple = 1;
int n = 0;
while (1) {
int base, count;
getContributingRange(lag, multiple, base, count);
if (base + count > acfLength) break;
double peak = 0.0;
for (int j = base; j < base + count; ++j) {
if (j == base || acf[j] > peak) {
peak = acf[j];
}
}
filtered[i] += peak;
++n;
if (multiple == 1) multiple = m_beatsPerBar;
else multiple = multiple * 2;
}
filtered[i] /= n;
}
}
double refine(int lag, const double *acf, int acfLength) {
int multiple = 1;
double interpolated = lag;
double total = 0.0;
int n = 0;
while (1) {
int base, count;
getContributingRange(lag, multiple, base, count);
if (base + count > acfLength) break;
double peak = 0.0;
int peakidx = 0;
for (int j = base; j < base + count; ++j) {
if (acf[j] > peak) {
peak = acf[j];
peakidx = j;
}
}
if (peak > 0.0) {
double scaled = double(peakidx) / multiple;
total += scaled;
++n;
}
if (multiple == 1) multiple = m_beatsPerBar;
else multiple = multiple * 2;
}
if (n > 0) {
interpolated = total / n;
}
double bpm = Autocorrelation::lagToBpm(interpolated, m_hopsPerSec);
return bpm;
}
private:
int m_beatsPerBar;
int m_min;
int m_max;
double m_hopsPerSec;
};
class MiniBPM::D
{
public:
double m_minbpm;
double m_maxbpm;
int m_beatsPerBar;
template <typename S, typename T>
void copy(T *R__ t, const S *R__ s, const int n) {
for (int i = 0; i < n; ++i) t[i] = s[i];
}
template <typename T>
void zero(T *R__ t, const int n) {
for (int i = 0; i < n; ++i) t[i] = T(0);
}
template <typename T>
void unityNormalise(T *R__ t, const int n) {
double max = 0.0, min = 0.0;
for (int i = 0; i < n; ++i) {
if (i == 0 || t[i] > max) max = t[i];
if (i == 0 || t[i] < min) min = t[i];
}
if (max > min) {
for (int i = 0; i < n; ++i) {
t[i] = (t[i] - min) / (max - min);
}
}
}
D(float sampleRate) :
m_minbpm(55),
m_maxbpm(190),
m_beatsPerBar(4),
m_inputSampleRate(sampleRate),
m_lfmin(0),
m_lfmax(550),
m_hfmin(9000),
m_hfmax(9001),
m_input(0),
m_partial(0),
m_partialFill(0),
m_frame(0),
m_lfprev(0),
m_hfprev(0)
{
int lfbinmax = 6;
m_blockSize = (m_inputSampleRate * lfbinmax) / m_lfmax;
m_stepSize = m_blockSize / 2;
m_lf = new FourierFilterbank(m_blockSize, m_inputSampleRate,
m_lfmin, m_lfmax, true);
m_hf = new FourierFilterbank(m_blockSize, m_inputSampleRate,
m_hfmin, m_hfmax, true);
int lfsize = m_lf->getOutputSize();
int hfsize = m_hf->getOutputSize();
m_lfprev = new double[lfsize];
for (int i = 0; i < lfsize; ++i) m_lfprev[i] = 0.0;
m_hfprev = new double[hfsize];
for (int i = 0; i < hfsize; ++i) m_hfprev[i] = 0.0;
m_input = new double[m_blockSize];
m_partial = new double[m_stepSize];
int frameSize = std::max(lfsize, hfsize);
m_frame = new double[frameSize];
zero(m_input, m_blockSize);
zero(m_partial, m_stepSize);
zero(m_frame, frameSize);
}
~D()
{
delete m_lf;
delete m_hf;
delete[] m_lfprev;
delete[] m_hfprev;
delete[] m_input;
delete[] m_partial;
delete[] m_frame;
}
double
specdiff(const double *a, const double *b, int n)
{
double tot = 0.0;
for (int i = 0; i < n; ++i) {
tot += sqrt(fabs(a[i]*a[i] - b[i]*b[i]));
}
return tot;
}
double estimateTempoOfSamples(const float *samples, int nsamples)
{
int i = 0;
while (i + m_blockSize < nsamples) {
copy(m_input, samples + i, m_blockSize);
processInputBlock();
i += m_stepSize;
}
return finish();
}
void process(const float *samples, int nsamples)
{
int n = 0;
while (n < nsamples) {
int hole = m_blockSize - m_stepSize;
int remaining = nsamples - n;
if (m_partialFill + remaining < m_stepSize) {
copy(m_partial + m_partialFill, samples + n, remaining);
m_partialFill += remaining;
break;
}
copy(m_input + hole, m_partial, m_partialFill);
int toConsume = m_stepSize - m_partialFill;
copy(m_input + hole + m_partialFill, samples + n, toConsume);
n += toConsume;
m_partialFill = 0;
processInputBlock();
copy(m_input, m_input + m_stepSize, hole);
}
}
double estimateTempo()
{
if (m_partialFill > 0) {
int hole = m_blockSize - m_stepSize;
copy(m_input + hole, m_partial, m_partialFill);
zero(m_input + hole + m_partialFill, m_stepSize - m_partialFill);
m_partialFill = 0;
processInputBlock();
}
return finish();
}
std::vector<double> getTempoCandidates() const
{
return m_candidates;
}
void reset()
{
m_lfdf.clear();
m_hfdf.clear();
m_rms.clear();
m_partialFill = 0;
}
void processInputBlock()
{
double rms = 0.0;
for (int i = 0; i < m_blockSize; ++i) {
rms += m_input[i] * m_input[i];
}
rms = sqrt(rms / m_blockSize);
m_rms.push_back(rms);
int lfsize = m_lf->getOutputSize();
int hfsize = m_hf->getOutputSize();
m_lf->forwardMagnitude(m_input, m_frame);
m_lfdf.push_back(specdiff(m_frame, m_lfprev, lfsize));
copy(m_lfprev, m_frame, lfsize);
m_hf->forwardMagnitude(m_input, m_frame);
m_hfdf.push_back(specdiff(m_frame, m_hfprev, hfsize));
copy(m_hfprev, m_frame, hfsize);
}
double finish()
{
m_candidates.clear();
double hopsPerSec = m_inputSampleRate / m_stepSize;
int dfLength = m_lfdf.size();
// We have no use for any lag beyond 4 bars at minimum bpm
double barPM = m_minbpm / (4 * m_beatsPerBar);
int acfLength = Autocorrelation::bpmToLag(barPM, hopsPerSec);
while (acfLength > dfLength) acfLength /= 2;
Autocorrelation acfcalc(dfLength, acfLength);
double *acf = new double[acfLength];
double *temp = new double[acfLength];
zero(acf, acfLength);
acfcalc.acfUnityNormalised(m_lfdf.data(), temp);
for (int i = 0; i < acfLength; ++i) acf[i] += temp[i];
acfcalc.acfUnityNormalised(m_hfdf.data(), temp);
for (int i = 0; i < acfLength; ++i) acf[i] += temp[i] * 0.5;
acfcalc.acfUnityNormalised(m_rms.data(), temp);
for (int i = 0; i < acfLength; ++i) acf[i] += temp[i] * 0.1;
int minlag = Autocorrelation::bpmToLag(m_maxbpm, hopsPerSec);
int maxlag = Autocorrelation::bpmToLag(m_minbpm, hopsPerSec);
if (acfLength < maxlag) {
// Not enough data
return 0.0;
}
ACFCombFilter filter(m_beatsPerBar, minlag, maxlag, hopsPerSec);
int cflen = filter.getFilteredLength();
double *cf = new double[cflen];
filter.filter(acf, acfLength, cf);
unityNormalise(cf, cflen);
for (int i = 0; i < cflen; ++i) {
// perceptual weighting: prefer middling values
double bpm = Autocorrelation::lagToBpm(minlag + i, hopsPerSec);
double weight;
double centre = 130.0;
if (bpm < centre) {
weight = 1.0 - pow(fabs(centre - bpm) / 100.0, 2.4);
} else {
weight = 1.0 - pow(fabs(centre - bpm) / 80.0, 2.4);
}
if (weight < 0.0) weight = 0.0;
cf[i] *= weight;
}
std::multimap<double, int> candidateMap;
for (int i = 1; i + 1 < cflen; ++i) {
if (cf[i] > cf[i-1] && cf[i] > cf[i+1]) {
candidateMap.insert(std::pair<double, int>(cf[i], i));
}
}
if (candidateMap.empty()) {
return 0.0;
}
std::multimap<double, int>::const_iterator ci(candidateMap.end());
while (ci != candidateMap.begin()) {
--ci;
int lag = ci->second + minlag;
double bpm = filter.refine(lag, acf, acfLength);
m_candidates.push_back(bpm);
}
delete[] cf;
delete[] acf;
delete[] temp;
return m_candidates[0];
}
private:
float m_inputSampleRate;
int m_blockSize;
int m_stepSize;
int m_lfmin;
int m_lfmax;
int m_hfmin;
int m_hfmax;
std::vector<double> m_lfdf;
std::vector<double> m_hfdf;
std::vector<double> m_rms;
std::vector<double> m_candidates;
FourierFilterbank *m_lf;
FourierFilterbank *m_hf;
double *m_input;
double *m_partial;
int m_partialFill;
double *m_frame;
double *m_lfprev;
double *m_hfprev;
};
MiniBPM::MiniBPM(float sampleRate) :
m_d(new D(sampleRate))
{
}
MiniBPM::~MiniBPM()
{
delete m_d;
}
void
MiniBPM::setBPMRange(double min, double max)
{
m_d->m_minbpm = min;
m_d->m_maxbpm = max;
}
void
MiniBPM::getBPMRange(double &min, double &max) const
{
min = m_d->m_minbpm;
max = m_d->m_maxbpm;
}
void
MiniBPM::setBeatsPerBar(int bpb)
{
m_d->m_beatsPerBar = bpb;
}
int
MiniBPM::getBeatsPerBar() const
{
return m_d->m_beatsPerBar;
}
double
MiniBPM::estimateTempoOfSamples(const float *samples, int nsamples)
{
return m_d->estimateTempoOfSamples(samples, nsamples);
}
void
MiniBPM::process(const float *samples, int nsamples)
{
m_d->process(samples, nsamples);
}
double
MiniBPM::estimateTempo()
{
return m_d->estimateTempo();
}
std::vector<double>
MiniBPM::getTempoCandidates() const
{
return m_d->getTempoCandidates();
}
void
MiniBPM::reset()
{
m_d->reset();
}
}

View File

@ -16,6 +16,7 @@
#include "ardour/audio_buffer.h"
#include "ardour/debug.h"
#include "ardour/midi_buffer.h"
#include "ardour/minibpm.h"
#include "ardour/region_factory.h"
#include "ardour/session.h"
#include "ardour/session_object.h"
@ -485,6 +486,7 @@ AudioTrigger::set_start (timepos_t const & s)
void
AudioTrigger::set_end (timepos_t const & e)
{
assert (!data.empty());
set_length (timecnt_t (e.samples() - _start_offset, timepos_t (_start_offset)));
}
@ -518,10 +520,6 @@ AudioTrigger::set_length (timecnt_t const & newlen)
boost::shared_ptr<AudioRegion> ar (boost::dynamic_pointer_cast<AudioRegion> (_region));
/* load raw data */
load_data (ar);
if (newlen == _region->length()) {
/* no stretch required */
return;
@ -540,7 +538,7 @@ AudioTrigger::set_length (timecnt_t const & newlen)
if (newlen.time_domain() == AudioTime) {
_stretch = (double) newlen.samples() / data_length;
cerr << "gonna stretch, ratio is " << _stretch << endl;
cerr << "gonna stretch, ratio is " << _stretch << " from " << newlen.samples() << " vs. " << data_length << endl;
} else {
/* XXX what to use for position ??? */
timecnt_t l (newlen, timepos_t (AudioTime));
@ -696,6 +694,10 @@ AudioTrigger::set_region (boost::shared_ptr<Region> r)
set_region_internal (r);
/* load raw data */
load_data (ar);
/* now potentially stretch it to match our tempo.
*
* We do not handle tempo changes at present, and should probably issue
@ -703,9 +705,25 @@ AudioTrigger::set_region (boost::shared_ptr<Region> r)
*/
TempoMap::SharedPtr tm (TempoMap::use());
timecnt_t two_bars = tm->bbt_duration_at (timepos_t (AudioTime), TriggerBox::assumed_trigger_duration());
TempoMetric const & metric (tm->metric_at (timepos_t (AudioTime)));
set_length (two_bars);
double barcnt;
{
breakfastquay::MiniBPM mbpm (_box.session().sample_rate());
mbpm.setBPMRange (metric.tempo().quarter_notes_per_minute () * 0.75, metric.tempo().quarter_notes_per_minute() * 1.5);
double bpm = mbpm.estimateTempoOfSamples (data[0], data_length);
const double seconds = (double) data_length / _box.session().sample_rate();
const double quarters = (seconds / 60.) * bpm;
barcnt = quarters / metric.meter().divisions_per_bar();
}
/* use initial tempo in map (assumed for now to be the only one */
samplecnt_t one_bar = tm->bbt_duration_at (timepos_t (AudioTime), BBT_Offset (1, 0, 0)).samples();
set_length (timecnt_t ((samplecnt_t) round (barcnt) * one_bar));
PropertyChanged (ARDOUR::Properties::name);

View File

@ -147,7 +147,8 @@ libardour_sources = [
'midi_ui.cc',
'mididm.cc',
'midiport_manager.cc',
'mix.cc',
'minibpm.cc',
'mix.cc',
'mode.cc',
'monitor_control.cc',
'monitor_port.cc',