532 lines
15 KiB
C++
532 lines
15 KiB
C++
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
|
|
|
/*
|
|
QM DSP Library
|
|
|
|
Centre for Digital Music, Queen Mary, University of London.
|
|
This file copyright 2008-2009 Matthew Davies and QMUL.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version. See the file
|
|
COPYING included with this distribution for more information.
|
|
*/
|
|
|
|
#include "TempoTrackV2.h"
|
|
|
|
#include <cmath>
|
|
#include <cstdlib>
|
|
#include <iostream>
|
|
|
|
#include "maths/MathUtilities.h"
|
|
|
|
#define EPS 0.0000008 // just some arbitrary small number
|
|
|
|
TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
|
|
m_rate(rate), m_increment(increment) { }
|
|
TempoTrackV2::~TempoTrackV2() { }
|
|
|
|
void
|
|
TempoTrackV2::filter_df(d_vec_t &df)
|
|
{
|
|
d_vec_t a(3);
|
|
d_vec_t b(3);
|
|
d_vec_t lp_df(df.size());
|
|
|
|
//equivalent in matlab to [b,a] = butter(2,0.4);
|
|
a[0] = 1.0000;
|
|
a[1] = -0.3695;
|
|
a[2] = 0.1958;
|
|
b[0] = 0.2066;
|
|
b[1] = 0.4131;
|
|
b[2] = 0.2066;
|
|
|
|
double inp1 = 0.;
|
|
double inp2 = 0.;
|
|
double out1 = 0.;
|
|
double out2 = 0.;
|
|
|
|
|
|
// forwards filtering
|
|
for (unsigned int i = 0;i < df.size();i++)
|
|
{
|
|
lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
|
|
inp2 = inp1;
|
|
inp1 = df[i];
|
|
out2 = out1;
|
|
out1 = lp_df[i];
|
|
}
|
|
|
|
// copy forwards filtering to df...
|
|
// but, time-reversed, ready for backwards filtering
|
|
for (unsigned int i = 0;i < df.size();i++)
|
|
{
|
|
df[i] = lp_df[df.size()-i-1];
|
|
}
|
|
|
|
for (unsigned int i = 0;i < df.size();i++)
|
|
{
|
|
lp_df[i] = 0.;
|
|
}
|
|
|
|
inp1 = 0.; inp2 = 0.;
|
|
out1 = 0.; out2 = 0.;
|
|
|
|
// backwards filetering on time-reversed df
|
|
for (unsigned int i = 0;i < df.size();i++)
|
|
{
|
|
lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
|
|
inp2 = inp1;
|
|
inp1 = df[i];
|
|
out2 = out1;
|
|
out1 = lp_df[i];
|
|
}
|
|
|
|
// write the re-reversed (i.e. forward) version back to df
|
|
for (unsigned int i = 0;i < df.size();i++)
|
|
{
|
|
df[i] = lp_df[df.size()-i-1];
|
|
}
|
|
}
|
|
|
|
|
|
// MEPD 28/11/12
|
|
// This function now allows for a user to specify an inputtempo (in BPM)
|
|
// and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities
|
|
// with a gaussian which is centered around the input tempo
|
|
// Note, if inputtempo = 120 and constraintempo = false, then functionality is
|
|
// as it was before
|
|
void
|
|
TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
|
|
vector<double> &beat_period,
|
|
vector<double> &tempi,
|
|
double inputtempo, bool constraintempo)
|
|
{
|
|
// to follow matlab.. split into 512 sample frames with a 128 hop size
|
|
// calculate the acf,
|
|
// then the rcf.. and then stick the rcfs as columns of a matrix
|
|
// then call viterbi decoding with weight vector and transition matrix
|
|
// and get best path
|
|
|
|
unsigned int wv_len = 128;
|
|
|
|
// MEPD 28/11/12
|
|
// the default value of inputtempo in the beat tracking plugin is 120
|
|
// so if the user specifies a different inputtempo, the rayparam will be updated
|
|
// accordingly.
|
|
// note: 60*44100/512 is a magic number
|
|
// this might (will?) break if a user specifies a different frame rate for the onset detection function
|
|
double rayparam = (60*44100/512)/inputtempo;
|
|
|
|
// these debug statements can be removed.
|
|
// std::cerr << "inputtempo" << inputtempo << std::endl;
|
|
// std::cerr << "rayparam" << rayparam << std::endl;
|
|
// std::cerr << "constraintempo" << constraintempo << std::endl;
|
|
|
|
// make rayleigh weighting curve
|
|
d_vec_t wv(wv_len);
|
|
|
|
// check whether or not to use rayleigh weighting (if constraintempo is false)
|
|
// or use gaussian weighting it (constraintempo is true)
|
|
if (constraintempo)
|
|
{
|
|
for (unsigned int i=0; i<wv.size(); i++)
|
|
{
|
|
// MEPD 28/11/12
|
|
// do a gaussian weighting instead of rayleigh
|
|
wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (unsigned int i=0; i<wv.size(); i++)
|
|
{
|
|
// MEPD 28/11/12
|
|
// standard rayleigh weighting over periodicities
|
|
wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
|
|
}
|
|
}
|
|
|
|
// beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
|
|
unsigned int winlen = 512;
|
|
unsigned int step = 128;
|
|
|
|
// matrix to store output of comb filter bank, increment column of matrix at each frame
|
|
d_mat_t rcfmat;
|
|
int col_counter = -1;
|
|
|
|
// main loop for beat period calculation
|
|
for (unsigned int i=0; i+winlen<df.size(); i+=step)
|
|
{
|
|
// get dfframe
|
|
d_vec_t dfframe(winlen);
|
|
for (unsigned int k=0; k<winlen; k++)
|
|
{
|
|
dfframe[k] = df[i+k];
|
|
}
|
|
// get rcf vector for current frame
|
|
d_vec_t rcf(wv_len);
|
|
get_rcf(dfframe,wv,rcf);
|
|
|
|
rcfmat.push_back( d_vec_t() ); // adds a new column
|
|
col_counter++;
|
|
for (unsigned int j=0; j<rcf.size(); j++)
|
|
{
|
|
rcfmat[col_counter].push_back( rcf[j] );
|
|
}
|
|
}
|
|
|
|
// now call viterbi decoding function
|
|
viterbi_decode(rcfmat,wv,beat_period,tempi);
|
|
}
|
|
|
|
|
|
void
|
|
TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
|
|
{
|
|
// calculate autocorrelation function
|
|
// then rcf
|
|
// just hard code for now... don't really need separate functions to do this
|
|
|
|
// make acf
|
|
|
|
d_vec_t dfframe(dfframe_in);
|
|
|
|
MathUtilities::adaptiveThreshold(dfframe);
|
|
|
|
d_vec_t acf(dfframe.size());
|
|
|
|
|
|
for (unsigned int lag=0; lag<dfframe.size(); lag++)
|
|
{
|
|
double sum = 0.;
|
|
double tmp = 0.;
|
|
|
|
for (unsigned int n=0; n<(dfframe.size()-lag); n++)
|
|
{
|
|
tmp = dfframe[n] * dfframe[n+lag];
|
|
sum += tmp;
|
|
}
|
|
acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
|
|
}
|
|
|
|
// now apply comb filtering
|
|
int numelem = 4;
|
|
|
|
for (unsigned int i = 2;i < rcf.size();i++) // max beat period
|
|
{
|
|
for (int a = 1;a <= numelem;a++) // number of comb elements
|
|
{
|
|
for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
|
|
{
|
|
rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
|
|
}
|
|
}
|
|
}
|
|
|
|
// apply adaptive threshold to rcf
|
|
MathUtilities::adaptiveThreshold(rcf);
|
|
|
|
double rcfsum =0.;
|
|
for (unsigned int i=0; i<rcf.size(); i++)
|
|
{
|
|
rcf[i] += EPS ;
|
|
rcfsum += rcf[i];
|
|
}
|
|
|
|
// normalise rcf to sum to unity
|
|
for (unsigned int i=0; i<rcf.size(); i++)
|
|
{
|
|
rcf[i] /= (rcfsum + EPS);
|
|
}
|
|
}
|
|
|
|
void
|
|
TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
|
|
{
|
|
// following Kevin Murphy's Viterbi decoding to get best path of
|
|
// beat periods through rfcmat
|
|
|
|
// make transition matrix
|
|
d_mat_t tmat;
|
|
for (unsigned int i=0;i<wv.size();i++)
|
|
{
|
|
tmat.push_back ( d_vec_t() ); // adds a new column
|
|
for (unsigned int j=0; j<wv.size(); j++)
|
|
{
|
|
tmat[i].push_back(0.); // fill with zeros initially
|
|
}
|
|
}
|
|
|
|
// variance of Gaussians in transition matrix
|
|
// formed of Gaussians on diagonal - implies slow tempo change
|
|
double sigma = 8.;
|
|
// don't want really short beat periods, or really long ones
|
|
for (unsigned int i=20;i <wv.size()-20; i++)
|
|
{
|
|
for (unsigned int j=20; j<wv.size()-20; j++)
|
|
{
|
|
double mu = static_cast<double>(i);
|
|
tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
|
|
}
|
|
}
|
|
|
|
// parameters for Viterbi decoding... this part is taken from
|
|
// Murphy's matlab
|
|
|
|
d_mat_t delta;
|
|
i_mat_t psi;
|
|
for (unsigned int i=0;i <rcfmat.size(); i++)
|
|
{
|
|
delta.push_back( d_vec_t());
|
|
psi.push_back( i_vec_t());
|
|
for (unsigned int j=0; j<rcfmat[i].size(); j++)
|
|
{
|
|
delta[i].push_back(0.); // fill with zeros initially
|
|
psi[i].push_back(0); // fill with zeros initially
|
|
}
|
|
}
|
|
|
|
|
|
unsigned int T = delta.size();
|
|
|
|
if (T < 2) return; // can't do anything at all meaningful
|
|
|
|
unsigned int Q = delta[0].size();
|
|
|
|
// initialize first column of delta
|
|
for (unsigned int j=0; j<Q; j++)
|
|
{
|
|
delta[0][j] = wv[j] * rcfmat[0][j];
|
|
psi[0][j] = 0;
|
|
}
|
|
|
|
double deltasum = 0.;
|
|
for (unsigned int i=0; i<Q; i++)
|
|
{
|
|
deltasum += delta[0][i];
|
|
}
|
|
for (unsigned int i=0; i<Q; i++)
|
|
{
|
|
delta[0][i] /= (deltasum + EPS);
|
|
}
|
|
|
|
|
|
for (unsigned int t=1; t<T; t++)
|
|
{
|
|
d_vec_t tmp_vec(Q);
|
|
|
|
for (unsigned int j=0; j<Q; j++)
|
|
{
|
|
for (unsigned int i=0; i<Q; i++)
|
|
{
|
|
tmp_vec[i] = delta[t-1][i] * tmat[j][i];
|
|
}
|
|
|
|
delta[t][j] = get_max_val(tmp_vec);
|
|
|
|
psi[t][j] = get_max_ind(tmp_vec);
|
|
|
|
delta[t][j] *= rcfmat[t][j];
|
|
}
|
|
|
|
// normalise current delta column
|
|
double deltasum = 0.;
|
|
for (unsigned int i=0; i<Q; i++)
|
|
{
|
|
deltasum += delta[t][i];
|
|
}
|
|
for (unsigned int i=0; i<Q; i++)
|
|
{
|
|
delta[t][i] /= (deltasum + EPS);
|
|
}
|
|
}
|
|
|
|
i_vec_t bestpath(T);
|
|
d_vec_t tmp_vec(Q);
|
|
for (unsigned int i=0; i<Q; i++)
|
|
{
|
|
tmp_vec[i] = delta[T-1][i];
|
|
}
|
|
|
|
// find starting point - best beat period for "last" frame
|
|
bestpath[T-1] = get_max_ind(tmp_vec);
|
|
|
|
// backtrace through index of maximum values in psi
|
|
for (unsigned int t=T-2; t>0 ;t--)
|
|
{
|
|
bestpath[t] = psi[t+1][bestpath[t+1]];
|
|
}
|
|
|
|
// weird but necessary hack -- couldn't get above loop to terminate at t >= 0
|
|
bestpath[0] = psi[1][bestpath[1]];
|
|
|
|
unsigned int lastind = 0;
|
|
for (unsigned int i=0; i<T; i++)
|
|
{
|
|
unsigned int step = 128;
|
|
for (unsigned int j=0; j<step; j++)
|
|
{
|
|
lastind = i*step+j;
|
|
beat_period[lastind] = bestpath[i];
|
|
}
|
|
// std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
|
|
}
|
|
|
|
//fill in the last values...
|
|
for (unsigned int i=lastind; i<beat_period.size(); i++)
|
|
{
|
|
beat_period[i] = beat_period[lastind];
|
|
}
|
|
|
|
for (unsigned int i = 0; i < beat_period.size(); i++)
|
|
{
|
|
tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
|
|
}
|
|
}
|
|
|
|
double
|
|
TempoTrackV2::get_max_val(const d_vec_t &df)
|
|
{
|
|
double maxval = 0.;
|
|
for (unsigned int i=0; i<df.size(); i++)
|
|
{
|
|
if (maxval < df[i])
|
|
{
|
|
maxval = df[i];
|
|
}
|
|
}
|
|
|
|
return maxval;
|
|
}
|
|
|
|
int
|
|
TempoTrackV2::get_max_ind(const d_vec_t &df)
|
|
{
|
|
double maxval = 0.;
|
|
int ind = 0;
|
|
for (unsigned int i=0; i<df.size(); i++)
|
|
{
|
|
if (maxval < df[i])
|
|
{
|
|
maxval = df[i];
|
|
ind = i;
|
|
}
|
|
}
|
|
|
|
return ind;
|
|
}
|
|
|
|
void
|
|
TempoTrackV2::normalise_vec(d_vec_t &df)
|
|
{
|
|
double sum = 0.;
|
|
for (unsigned int i=0; i<df.size(); i++)
|
|
{
|
|
sum += df[i];
|
|
}
|
|
|
|
for (unsigned int i=0; i<df.size(); i++)
|
|
{
|
|
df[i]/= (sum + EPS);
|
|
}
|
|
}
|
|
|
|
// MEPD 28/11/12
|
|
// this function has been updated to allow the "alpha" and "tightness" parameters
|
|
// of the dynamic program to be set by the user
|
|
// the default value of alpha = 0.9 and tightness = 4
|
|
void
|
|
TempoTrackV2::calculateBeats(const vector<double> &df,
|
|
const vector<double> &beat_period,
|
|
vector<double> &beats, double alpha, double tightness)
|
|
{
|
|
if (df.empty() || beat_period.empty()) return;
|
|
|
|
d_vec_t cumscore(df.size()); // store cumulative score
|
|
i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
|
|
d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
|
|
|
|
for (unsigned int i=0; i<df.size(); i++)
|
|
{
|
|
localscore[i] = df[i];
|
|
backlink[i] = -1;
|
|
}
|
|
|
|
//double tightness = 4.;
|
|
//double alpha = 0.9;
|
|
// MEPD 28/11/12
|
|
// debug statements that can be removed.
|
|
// std::cerr << "alpha" << alpha << std::endl;
|
|
// std::cerr << "tightness" << tightness << std::endl;
|
|
|
|
// main loop
|
|
for (unsigned int i=0; i<localscore.size(); i++)
|
|
{
|
|
int prange_min = -2*beat_period[i];
|
|
int prange_max = round(-0.5*beat_period[i]);
|
|
|
|
// transition range
|
|
d_vec_t txwt (prange_max - prange_min + 1);
|
|
d_vec_t scorecands (txwt.size());
|
|
|
|
for (unsigned int j=0;j<txwt.size();j++)
|
|
{
|
|
double mu = static_cast<double> (beat_period[i]);
|
|
txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
|
|
|
|
// IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
|
|
// ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
|
|
|
|
int cscore_ind = i+prange_min+j;
|
|
if (cscore_ind >= 0)
|
|
{
|
|
scorecands[j] = txwt[j] * cumscore[cscore_ind];
|
|
}
|
|
}
|
|
|
|
// find max value and index of maximum value
|
|
double vv = get_max_val(scorecands);
|
|
int xx = get_max_ind(scorecands);
|
|
|
|
cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
|
|
backlink[i] = i+prange_min+xx;
|
|
|
|
// std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
|
|
}
|
|
|
|
// STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
|
|
d_vec_t tmp_vec;
|
|
for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
|
|
{
|
|
tmp_vec.push_back(cumscore[i]);
|
|
}
|
|
|
|
int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
|
|
|
|
// can happen if no results obtained earlier (e.g. input too short)
|
|
if (startpoint >= (int)backlink.size()) startpoint = backlink.size()-1;
|
|
|
|
// USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
|
|
// BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
|
|
i_vec_t ibeats;
|
|
ibeats.push_back(startpoint);
|
|
// std::cerr << "startpoint = " << startpoint << std::endl;
|
|
while (backlink[ibeats.back()] > 0)
|
|
{
|
|
// std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
|
|
int b = ibeats.back();
|
|
if (backlink[b] == b) break; // shouldn't happen... haha
|
|
ibeats.push_back(backlink[b]);
|
|
}
|
|
|
|
// REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
|
|
for (unsigned int i=0; i<ibeats.size(); i++)
|
|
{
|
|
beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
|
|
}
|
|
}
|
|
|
|
|