674 lines
16 KiB
C++
674 lines
16 KiB
C++
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
|
|
|
/*
|
|
MiniBPM
|
|
A fixed-tempo BPM estimator for music audio
|
|
Copyright 2012 Particular Programs Ltd.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version. See the file
|
|
COPYING included with this distribution for more information.
|
|
|
|
Alternatively, if you have a valid commercial licence for MiniBPM
|
|
obtained by agreement with the copyright holders, you may
|
|
redistribute and/or modify it under the terms described in that
|
|
licence.
|
|
|
|
If you wish to distribute code using MiniBPM under terms other
|
|
than those of the GNU General Public License, you must obtain a
|
|
valid commercial licence before doing so.
|
|
*/
|
|
|
|
/*
|
|
* Method:
|
|
*
|
|
* - Take the audio as a sequence of overlapping time-domain
|
|
* frames. The frame size is chosen so that, following a Fourier
|
|
* transform, the frequency range up to about an octave above
|
|
* middle-C would take about half a dozen bins. This is a relatively
|
|
* short frame giving quite good time resolution.
|
|
*
|
|
* - For each frame, extract the low-frequency range into the
|
|
* frequency domain (up to a cutoff around 400-500 Hz) using a small
|
|
* filterbank. Also extract a single bin from a high frequency range
|
|
* (around 9K) for broadband noise, and calculate the overall RMS of
|
|
* the frame. (The low-frequency feature is the main contributor to
|
|
* tempo estimation, the other two are used as fallbacks if there is
|
|
* not enough low-frequency information.) Accumulate sequences of
|
|
* framewise spectral difference sums for the frequency domain
|
|
* information, and a sequence of the RMS values, across the
|
|
* duration of the audio.
|
|
*
|
|
* - When all audio has been processed, calculate an autocorrelation
|
|
* of each of the three features normalised to unity maximum, and
|
|
* calculate a weighted sum of the autocorrelations (discarding any
|
|
* phase difference between the three signals) with the
|
|
* low-frequency feature given the most weight.
|
|
*
|
|
* - Drag a comb filter across the subset of the summed
|
|
* autocorrelation sequence that corresponds to the plausible tempo
|
|
* range. Allocate to each lag a weighted sum of its value and those
|
|
* of elements around beats-per-bar multiples of its lag.
|
|
*
|
|
* - Apply a simplistic perceptual weighting filter to prefer tempi
|
|
* around 120-130bpm.
|
|
*
|
|
* - Find the peak of the resulting filtered autocorrelation and
|
|
* return its corresponding tempo.
|
|
*/
|
|
|
|
#include "ardour/minibpm.h"
|
|
|
|
#include <vector>
|
|
#include <map>
|
|
#include <utility>
|
|
#include <cmath>
|
|
|
|
#ifdef __MSVC__
|
|
#define R__ __restrict
|
|
#else
|
|
#ifdef __GNUC__
|
|
#define R__ __restrict__
|
|
#else
|
|
#define R__
|
|
#endif
|
|
#endif
|
|
|
|
#ifndef M_PI
|
|
#define M_PI 3.14159265358979323846
|
|
#endif
|
|
|
|
using std::vector;
|
|
|
|
#include <iostream>
|
|
|
|
namespace breakfastquay {
|
|
|
|
class Autocorrelation
|
|
{
|
|
public:
|
|
Autocorrelation(int n, int m) : m_n(n), m_m(m) { }
|
|
|
|
template <typename T>
|
|
void acf(const T *R__ in, T *R__ out) const {
|
|
for (int i = 0; i < m_m; ++i) {
|
|
out[i] = 0.0;
|
|
for (int j = i; j < m_n; ++j) {
|
|
out[i] += in[j] * in[j - i];
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
void acfUnityNormalised(const T *R__ in, T *R__ out) const {
|
|
|
|
acf(in, out);
|
|
|
|
double max = 0.0;
|
|
for (int i = 0; i < m_m; ++i) {
|
|
out[i] /= m_n - i;
|
|
if (out[i] > max) max = out[i];
|
|
}
|
|
if (max > 0.0) {
|
|
for (int i = 0; i < m_m; ++i) {
|
|
out[i] /= max;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int bpmToLag(double bpm, double hopsPerSec) {
|
|
return int((60.0 / bpm) * hopsPerSec + 0.5);
|
|
}
|
|
static double lagToBpm(double lag, double hopsPerSec) {
|
|
return (60.0 * hopsPerSec) / lag;
|
|
}
|
|
|
|
private:
|
|
int m_n;
|
|
int m_m;
|
|
};
|
|
|
|
class FourierFilterbank
|
|
{
|
|
public:
|
|
FourierFilterbank(int n, double fs, double minFreq, double maxFreq,
|
|
bool windowed) :
|
|
m_n(n), m_fs(fs), m_fmin(minFreq), m_fmax(maxFreq),
|
|
m_windowed(windowed)
|
|
{
|
|
m_binmin = int(floor(n * m_fmin) / fs);
|
|
m_binmax = int(ceil(n * m_fmax) / fs);
|
|
m_bins = m_binmax - m_binmin + 1;
|
|
initFilters();
|
|
}
|
|
|
|
~FourierFilterbank() {
|
|
for (int i = 0; i < m_bins; ++i) {
|
|
delete[] m_sin[i];
|
|
delete[] m_cos[i];
|
|
}
|
|
delete[] m_sin;
|
|
delete[] m_cos;
|
|
}
|
|
|
|
int getOutputSize() const {
|
|
return m_bins;
|
|
}
|
|
|
|
void forwardMagnitude(const double *R__ realIn, double *R__ magOut) const {
|
|
for (int i = 0; i < m_bins; ++i) {
|
|
const double *R__ sin = m_sin[i];
|
|
const double *R__ cos = m_cos[i];
|
|
double real = 0.0, imag = 0.0;
|
|
for (int j = 0; j < m_n; ++j) real += realIn[j] * cos[j];
|
|
for (int j = 0; j < m_n; ++j) imag += realIn[j] * sin[j];
|
|
magOut[i] = sqrt(real*real + imag*imag);
|
|
}
|
|
}
|
|
|
|
private:
|
|
int m_n;
|
|
double m_fs;
|
|
double m_fmin;
|
|
double m_fmax;
|
|
bool m_windowed;
|
|
int m_binmin;
|
|
int m_binmax;
|
|
int m_bins;
|
|
|
|
double **m_sin;
|
|
double **m_cos;
|
|
|
|
void initFilters() {
|
|
m_sin = new double*[m_bins];
|
|
m_cos = new double*[m_bins];
|
|
double twopi = M_PI * 2.0;
|
|
double win = 1.0;
|
|
for (int i = 0; i < m_bins; ++i) {
|
|
m_sin[i] = new double[m_n];
|
|
m_cos[i] = new double[m_n];
|
|
int bin = i + m_binmin;
|
|
double delta = (twopi * bin) / m_n;
|
|
for (int j = 0; j < m_n; ++j) {
|
|
double angle = j * delta;
|
|
if (m_windowed) win = 0.5 - 0.5 * cos(twopi * j / m_n);
|
|
m_sin[i][j] = sin(angle) * win;
|
|
m_cos[i][j] = cos(angle) * win;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
class ACFCombFilter
|
|
{
|
|
public:
|
|
ACFCombFilter(int beatsPerBar, int minlag, int maxlag, double hopsPerSec) :
|
|
m_beatsPerBar(beatsPerBar),
|
|
m_min(minlag), m_max(maxlag),
|
|
m_hopsPerSec(hopsPerSec) { }
|
|
~ACFCombFilter() { }
|
|
|
|
int getFilteredLength() const {
|
|
return m_max - m_min + 1;
|
|
}
|
|
|
|
static void getContributingRange(int lag, int multiple,
|
|
int &base, int &count) {
|
|
if (multiple == 1) {
|
|
base = lag;
|
|
count = 1;
|
|
} else {
|
|
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ...
|
|
// 0 1 2 4 4 4 8 8 8 8 8 8 16 16 16 16 16 16 16 ...
|
|
base = (lag * multiple) - (multiple / 4);
|
|
count = (multiple / 4) + (multiple / 2);
|
|
}
|
|
}
|
|
|
|
void filter(const double *acf, int acfLength, double *filtered) {
|
|
|
|
int flen = getFilteredLength();
|
|
|
|
for (int i = 0; i < flen; ++i) {
|
|
|
|
filtered[i] = 0.0;
|
|
|
|
int lag = m_min + i;
|
|
int multiple = 1;
|
|
int n = 0;
|
|
|
|
while (1) {
|
|
|
|
int base, count;
|
|
getContributingRange(lag, multiple, base, count);
|
|
if (base + count > acfLength) break;
|
|
|
|
double peak = 0.0;
|
|
for (int j = base; j < base + count; ++j) {
|
|
if (j == base || acf[j] > peak) {
|
|
peak = acf[j];
|
|
}
|
|
}
|
|
filtered[i] += peak;
|
|
++n;
|
|
|
|
if (multiple == 1) multiple = m_beatsPerBar;
|
|
else multiple = multiple * 2;
|
|
}
|
|
|
|
filtered[i] /= n;
|
|
}
|
|
}
|
|
|
|
double refine(int lag, const double *acf, int acfLength) {
|
|
|
|
int multiple = 1;
|
|
double interpolated = lag;
|
|
|
|
double total = 0.0;
|
|
int n = 0;
|
|
|
|
while (1) {
|
|
|
|
int base, count;
|
|
getContributingRange(lag, multiple, base, count);
|
|
|
|
if (base + count > acfLength) break;
|
|
|
|
double peak = 0.0;
|
|
int peakidx = 0;
|
|
for (int j = base; j < base + count; ++j) {
|
|
if (acf[j] > peak) {
|
|
peak = acf[j];
|
|
peakidx = j;
|
|
}
|
|
}
|
|
|
|
if (peak > 0.0) {
|
|
double scaled = double(peakidx) / multiple;
|
|
total += scaled;
|
|
++n;
|
|
}
|
|
|
|
if (multiple == 1) multiple = m_beatsPerBar;
|
|
else multiple = multiple * 2;
|
|
}
|
|
|
|
if (n > 0) {
|
|
interpolated = total / n;
|
|
}
|
|
|
|
double bpm = Autocorrelation::lagToBpm(interpolated, m_hopsPerSec);
|
|
return bpm;
|
|
}
|
|
|
|
private:
|
|
int m_beatsPerBar;
|
|
int m_min;
|
|
int m_max;
|
|
double m_hopsPerSec;
|
|
};
|
|
|
|
class MiniBPM::D
|
|
{
|
|
public:
|
|
double m_minbpm;
|
|
double m_maxbpm;
|
|
int m_beatsPerBar;
|
|
|
|
template <typename S, typename T>
|
|
void copy(T *R__ t, const S *R__ s, const int n) {
|
|
for (int i = 0; i < n; ++i) t[i] = s[i];
|
|
}
|
|
template <typename T>
|
|
void zero(T *R__ t, const int n) {
|
|
for (int i = 0; i < n; ++i) t[i] = T(0);
|
|
}
|
|
template <typename T>
|
|
void unityNormalise(T *R__ t, const int n) {
|
|
double max = 0.0, min = 0.0;
|
|
for (int i = 0; i < n; ++i) {
|
|
if (i == 0 || t[i] > max) max = t[i];
|
|
if (i == 0 || t[i] < min) min = t[i];
|
|
}
|
|
if (max > min) {
|
|
for (int i = 0; i < n; ++i) {
|
|
t[i] = (t[i] - min) / (max - min);
|
|
}
|
|
}
|
|
}
|
|
|
|
D(float sampleRate) :
|
|
m_minbpm(55),
|
|
m_maxbpm(190),
|
|
m_beatsPerBar(4),
|
|
m_inputSampleRate(sampleRate),
|
|
m_lfmin(0),
|
|
m_lfmax(550),
|
|
m_hfmin(9000),
|
|
m_hfmax(9001),
|
|
m_input(0),
|
|
m_partial(0),
|
|
m_partialFill(0),
|
|
m_frame(0),
|
|
m_lfprev(0),
|
|
m_hfprev(0)
|
|
{
|
|
int lfbinmax = 6;
|
|
m_blockSize = (m_inputSampleRate * lfbinmax) / m_lfmax;
|
|
m_stepSize = m_blockSize / 2;
|
|
|
|
m_lf = new FourierFilterbank(m_blockSize, m_inputSampleRate,
|
|
m_lfmin, m_lfmax, true);
|
|
|
|
m_hf = new FourierFilterbank(m_blockSize, m_inputSampleRate,
|
|
m_hfmin, m_hfmax, true);
|
|
|
|
int lfsize = m_lf->getOutputSize();
|
|
int hfsize = m_hf->getOutputSize();
|
|
|
|
m_lfprev = new double[lfsize];
|
|
for (int i = 0; i < lfsize; ++i) m_lfprev[i] = 0.0;
|
|
|
|
m_hfprev = new double[hfsize];
|
|
for (int i = 0; i < hfsize; ++i) m_hfprev[i] = 0.0;
|
|
|
|
m_input = new double[m_blockSize];
|
|
m_partial = new double[m_stepSize];
|
|
|
|
int frameSize = std::max(lfsize, hfsize);
|
|
m_frame = new double[frameSize];
|
|
|
|
zero(m_input, m_blockSize);
|
|
zero(m_partial, m_stepSize);
|
|
zero(m_frame, frameSize);
|
|
}
|
|
|
|
~D()
|
|
{
|
|
delete m_lf;
|
|
delete m_hf;
|
|
delete[] m_lfprev;
|
|
delete[] m_hfprev;
|
|
delete[] m_input;
|
|
delete[] m_partial;
|
|
delete[] m_frame;
|
|
}
|
|
|
|
double
|
|
specdiff(const double *a, const double *b, int n)
|
|
{
|
|
double tot = 0.0;
|
|
for (int i = 0; i < n; ++i) {
|
|
tot += sqrt(fabs(a[i]*a[i] - b[i]*b[i]));
|
|
}
|
|
return tot;
|
|
}
|
|
|
|
double estimateTempoOfSamples(const float *samples, int nsamples)
|
|
{
|
|
int i = 0;
|
|
while (i + m_blockSize < nsamples) {
|
|
copy(m_input, samples + i, m_blockSize);
|
|
processInputBlock();
|
|
i += m_stepSize;
|
|
}
|
|
return finish();
|
|
}
|
|
|
|
void process(const float *samples, int nsamples)
|
|
{
|
|
int n = 0;
|
|
while (n < nsamples) {
|
|
int hole = m_blockSize - m_stepSize;
|
|
int remaining = nsamples - n;
|
|
if (m_partialFill + remaining < m_stepSize) {
|
|
copy(m_partial + m_partialFill, samples + n, remaining);
|
|
m_partialFill += remaining;
|
|
break;
|
|
}
|
|
copy(m_input + hole, m_partial, m_partialFill);
|
|
int toConsume = m_stepSize - m_partialFill;
|
|
copy(m_input + hole + m_partialFill, samples + n, toConsume);
|
|
n += toConsume;
|
|
m_partialFill = 0;
|
|
processInputBlock();
|
|
copy(m_input, m_input + m_stepSize, hole);
|
|
}
|
|
}
|
|
|
|
double estimateTempo()
|
|
{
|
|
if (m_partialFill > 0) {
|
|
int hole = m_blockSize - m_stepSize;
|
|
copy(m_input + hole, m_partial, m_partialFill);
|
|
zero(m_input + hole + m_partialFill, m_stepSize - m_partialFill);
|
|
m_partialFill = 0;
|
|
processInputBlock();
|
|
}
|
|
return finish();
|
|
}
|
|
|
|
std::vector<double> getTempoCandidates() const
|
|
{
|
|
return m_candidates;
|
|
}
|
|
|
|
void reset()
|
|
{
|
|
m_lfdf.clear();
|
|
m_hfdf.clear();
|
|
m_rms.clear();
|
|
m_partialFill = 0;
|
|
}
|
|
|
|
void processInputBlock()
|
|
{
|
|
double rms = 0.0;
|
|
|
|
for (int i = 0; i < m_blockSize; ++i) {
|
|
rms += m_input[i] * m_input[i];
|
|
}
|
|
|
|
rms = sqrt(rms / m_blockSize);
|
|
m_rms.push_back(rms);
|
|
|
|
int lfsize = m_lf->getOutputSize();
|
|
int hfsize = m_hf->getOutputSize();
|
|
|
|
m_lf->forwardMagnitude(m_input, m_frame);
|
|
m_lfdf.push_back(specdiff(m_frame, m_lfprev, lfsize));
|
|
copy(m_lfprev, m_frame, lfsize);
|
|
|
|
m_hf->forwardMagnitude(m_input, m_frame);
|
|
m_hfdf.push_back(specdiff(m_frame, m_hfprev, hfsize));
|
|
copy(m_hfprev, m_frame, hfsize);
|
|
}
|
|
|
|
double finish()
|
|
{
|
|
m_candidates.clear();
|
|
|
|
double hopsPerSec = m_inputSampleRate / m_stepSize;
|
|
int dfLength = m_lfdf.size();
|
|
|
|
// We have no use for any lag beyond 4 bars at minimum bpm
|
|
double barPM = m_minbpm / (4 * m_beatsPerBar);
|
|
int acfLength = Autocorrelation::bpmToLag(barPM, hopsPerSec);
|
|
while (acfLength > dfLength) acfLength /= 2;
|
|
|
|
Autocorrelation acfcalc(dfLength, acfLength);
|
|
|
|
double *acf = new double[acfLength];
|
|
double *temp = new double[acfLength];
|
|
|
|
zero(acf, acfLength);
|
|
|
|
acfcalc.acfUnityNormalised(m_lfdf.data(), temp);
|
|
for (int i = 0; i < acfLength; ++i) acf[i] += temp[i];
|
|
|
|
acfcalc.acfUnityNormalised(m_hfdf.data(), temp);
|
|
for (int i = 0; i < acfLength; ++i) acf[i] += temp[i] * 0.5;
|
|
|
|
acfcalc.acfUnityNormalised(m_rms.data(), temp);
|
|
for (int i = 0; i < acfLength; ++i) acf[i] += temp[i] * 0.1;
|
|
|
|
int minlag = Autocorrelation::bpmToLag(m_maxbpm, hopsPerSec);
|
|
int maxlag = Autocorrelation::bpmToLag(m_minbpm, hopsPerSec);
|
|
|
|
if (acfLength < maxlag) {
|
|
// Not enough data
|
|
delete [] acf;
|
|
delete [] temp;
|
|
return 0.0;
|
|
}
|
|
|
|
ACFCombFilter filter(m_beatsPerBar, minlag, maxlag, hopsPerSec);
|
|
int cflen = filter.getFilteredLength();
|
|
double *cf = new double[cflen];
|
|
filter.filter(acf, acfLength, cf);
|
|
unityNormalise(cf, cflen);
|
|
|
|
for (int i = 0; i < cflen; ++i) {
|
|
// perceptual weighting: prefer middling values
|
|
double bpm = Autocorrelation::lagToBpm(minlag + i, hopsPerSec);
|
|
double weight;
|
|
double centre = 130.0;
|
|
if (bpm < centre) {
|
|
weight = 1.0 - pow(fabs(centre - bpm) / 100.0, 2.4);
|
|
} else {
|
|
weight = 1.0 - pow(fabs(centre - bpm) / 80.0, 2.4);
|
|
}
|
|
if (weight < 0.0) weight = 0.0;
|
|
cf[i] *= weight;
|
|
}
|
|
|
|
std::multimap<double, int> candidateMap;
|
|
for (int i = 1; i + 1 < cflen; ++i) {
|
|
if (cf[i] > cf[i-1] && cf[i] > cf[i+1]) {
|
|
candidateMap.insert(std::pair<double, int>(cf[i], i));
|
|
}
|
|
}
|
|
|
|
if (candidateMap.empty()) {
|
|
delete[] cf;
|
|
delete[] acf;
|
|
delete[] temp;
|
|
return 0.0;
|
|
}
|
|
|
|
std::multimap<double, int>::const_iterator ci(candidateMap.end());
|
|
while (ci != candidateMap.begin()) {
|
|
--ci;
|
|
int lag = ci->second + minlag;
|
|
double bpm = filter.refine(lag, acf, acfLength);
|
|
m_candidates.push_back(bpm);
|
|
}
|
|
|
|
delete[] cf;
|
|
delete[] acf;
|
|
delete[] temp;
|
|
|
|
return m_candidates[0];
|
|
}
|
|
|
|
|
|
private:
|
|
float m_inputSampleRate;
|
|
int m_blockSize;
|
|
int m_stepSize;
|
|
int m_lfmin;
|
|
int m_lfmax;
|
|
int m_hfmin;
|
|
int m_hfmax;
|
|
|
|
std::vector<double> m_lfdf;
|
|
std::vector<double> m_hfdf;
|
|
std::vector<double> m_rms;
|
|
|
|
std::vector<double> m_candidates;
|
|
|
|
FourierFilterbank *m_lf;
|
|
FourierFilterbank *m_hf;
|
|
|
|
double *m_input;
|
|
double *m_partial;
|
|
int m_partialFill;
|
|
|
|
double *m_frame;
|
|
double *m_lfprev;
|
|
double *m_hfprev;
|
|
};
|
|
|
|
MiniBPM::MiniBPM(float sampleRate) :
|
|
m_d(new D(sampleRate))
|
|
{
|
|
}
|
|
|
|
MiniBPM::~MiniBPM()
|
|
{
|
|
delete m_d;
|
|
}
|
|
|
|
void
|
|
MiniBPM::setBPMRange(double min, double max)
|
|
{
|
|
m_d->m_minbpm = min;
|
|
m_d->m_maxbpm = max;
|
|
}
|
|
|
|
void
|
|
MiniBPM::getBPMRange(double &min, double &max) const
|
|
{
|
|
min = m_d->m_minbpm;
|
|
max = m_d->m_maxbpm;
|
|
}
|
|
|
|
void
|
|
MiniBPM::setBeatsPerBar(int bpb)
|
|
{
|
|
m_d->m_beatsPerBar = bpb;
|
|
}
|
|
|
|
int
|
|
MiniBPM::getBeatsPerBar() const
|
|
{
|
|
return m_d->m_beatsPerBar;
|
|
}
|
|
|
|
double
|
|
MiniBPM::estimateTempoOfSamples(const float *samples, int nsamples)
|
|
{
|
|
return m_d->estimateTempoOfSamples(samples, nsamples);
|
|
}
|
|
|
|
void
|
|
MiniBPM::process(const float *samples, int nsamples)
|
|
{
|
|
m_d->process(samples, nsamples);
|
|
}
|
|
|
|
double
|
|
MiniBPM::estimateTempo()
|
|
{
|
|
return m_d->estimateTempo();
|
|
}
|
|
|
|
std::vector<double>
|
|
MiniBPM::getTempoCandidates() const
|
|
{
|
|
return m_d->getTempoCandidates();
|
|
}
|
|
|
|
void
|
|
MiniBPM::reset()
|
|
{
|
|
m_d->reset();
|
|
}
|
|
|
|
}
|
|
|
|
|