update/include Queen Mary Vamp plugin set

This commit is contained in:
Robin Gareus 2016-10-06 00:40:33 +02:00
parent 72060df884
commit ee2a1b7bea
16 changed files with 6010 additions and 20 deletions

View File

@ -0,0 +1,484 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM Vamp Plugin Set
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#include "BarBeatTrack.h"
#include <dsp/onsets/DetectionFunction.h>
#include <dsp/onsets/PeakPicking.h>
#include <dsp/tempotracking/TempoTrackV2.h>
#include <dsp/tempotracking/DownBeat.h>
#include <maths/MathUtilities.h>
using std::string;
using std::vector;
using std::cerr;
using std::endl;
#ifndef __GNUC__
#include <alloca.h>
#endif
float BarBeatTracker::m_stepSecs = 0.01161; // 512 samples at 44100
class BarBeatTrackerData
{
public:
BarBeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) {
df = new DetectionFunction(config);
// decimation factor aims at resampling to c. 3KHz; must be power of 2
int factor = MathUtilities::nextPowerOfTwo(rate / 3000);
// std::cerr << "BarBeatTrackerData: factor = " << factor << std::endl;
downBeat = new DownBeat(rate, factor, config.stepSize);
}
~BarBeatTrackerData() {
delete df;
delete downBeat;
}
void reset() {
delete df;
df = new DetectionFunction(dfConfig);
dfOutput.clear();
downBeat->resetAudioBuffer();
origin = Vamp::RealTime::zeroTime;
}
DFConfig dfConfig;
DetectionFunction *df;
DownBeat *downBeat;
vector<double> dfOutput;
Vamp::RealTime origin;
};
BarBeatTracker::BarBeatTracker(float inputSampleRate) :
Vamp::Plugin(inputSampleRate),
m_d(0),
m_bpb(4),
m_alpha(0.9), // changes are as per the BeatTrack.cpp
m_tightness(4.), // changes are as per the BeatTrack.cpp
m_inputtempo(120.), // changes are as per the BeatTrack.cpp
m_constraintempo(false) // changes are as per the BeatTrack.cpp
{
}
BarBeatTracker::~BarBeatTracker()
{
delete m_d;
}
string
BarBeatTracker::getIdentifier() const
{
return "qm-barbeattracker";
}
string
BarBeatTracker::getName() const
{
return "Bar and Beat Tracker";
}
string
BarBeatTracker::getDescription() const
{
return "Estimate bar and beat locations";
}
string
BarBeatTracker::getMaker() const
{
return "Queen Mary, University of London";
}
int
BarBeatTracker::getPluginVersion() const
{
return 3;
}
string
BarBeatTracker::getCopyright() const
{
return "Plugin by Matthew Davies, Christian Landone and Chris Cannam. Copyright (c) 2006-2013 QMUL - All Rights Reserved";
}
BarBeatTracker::ParameterList
BarBeatTracker::getParameterDescriptors() const
{
ParameterList list;
ParameterDescriptor desc;
desc.identifier = "bpb";
desc.name = "Beats per Bar";
desc.description = "The number of beats in each bar";
desc.minValue = 2;
desc.maxValue = 16;
desc.defaultValue = 4;
desc.isQuantized = true;
desc.quantizeStep = 1;
list.push_back(desc);
// changes are as per the BeatTrack.cpp
//Alpha Parameter of Beat Tracker
desc.identifier = "alpha";
desc.name = "Alpha";
desc.description = "Inertia - Flexibility Trade Off";
desc.minValue = 0.1;
desc.maxValue = 0.99;
desc.defaultValue = 0.90;
desc.unit = "";
desc.isQuantized = false;
list.push_back(desc);
// We aren't exposing tightness as a parameter, it's fixed at 4
// changes are as per the BeatTrack.cpp
//User input tempo
desc.identifier = "inputtempo";
desc.name = "Tempo Hint";
desc.description = "User-defined tempo on which to centre the tempo preference function";
desc.minValue = 50;
desc.maxValue = 250;
desc.defaultValue = 120;
desc.unit = "BPM";
desc.isQuantized = true;
list.push_back(desc);
// changes are as per the BeatTrack.cpp
desc.identifier = "constraintempo";
desc.name = "Constrain Tempo";
desc.description = "Constrain more tightly around the tempo hint, using a Gaussian weighting instead of Rayleigh";
desc.minValue = 0;
desc.maxValue = 1;
desc.defaultValue = 0;
desc.isQuantized = true;
desc.quantizeStep = 1;
desc.unit = "";
desc.valueNames.clear();
list.push_back(desc);
return list;
}
float
BarBeatTracker::getParameter(std::string name) const
{
if (name == "bpb") {
return m_bpb;
} else if (name == "alpha") {
return m_alpha;
} else if (name == "inputtempo") {
return m_inputtempo;
} else if (name == "constraintempo") {
return m_constraintempo ? 1.0 : 0.0;
}
return 0.0;
}
void
BarBeatTracker::setParameter(std::string name, float value)
{
if (name == "bpb") {
m_bpb = lrintf(value);
} else if (name == "alpha") {
m_alpha = value;
} else if (name == "inputtempo") {
m_inputtempo = value;
} else if (name == "constraintempo") {
m_constraintempo = (value > 0.5);
}
}
bool
BarBeatTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
{
if (m_d) {
delete m_d;
m_d = 0;
}
if (channels < getMinChannelCount() ||
channels > getMaxChannelCount()) {
std::cerr << "BarBeatTracker::initialise: Unsupported channel count: "
<< channels << std::endl;
return false;
}
if (stepSize != getPreferredStepSize()) {
std::cerr << "ERROR: BarBeatTracker::initialise: Unsupported step size for this sample rate: "
<< stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
return false;
}
if (blockSize != getPreferredBlockSize()) {
std::cerr << "WARNING: BarBeatTracker::initialise: Sub-optimal block size for this sample rate: "
<< blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
// return false;
}
DFConfig dfConfig;
dfConfig.DFType = DF_COMPLEXSD;
dfConfig.stepSize = stepSize;
dfConfig.frameLength = blockSize;
dfConfig.dbRise = 3;
dfConfig.adaptiveWhitening = false;
dfConfig.whiteningRelaxCoeff = -1;
dfConfig.whiteningFloor = -1;
m_d = new BarBeatTrackerData(m_inputSampleRate, dfConfig);
m_d->downBeat->setBeatsPerBar(m_bpb);
return true;
}
void
BarBeatTracker::reset()
{
if (m_d) m_d->reset();
}
size_t
BarBeatTracker::getPreferredStepSize() const
{
size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
if (step < 1) step = 1;
// std::cerr << "BarBeatTracker::getPreferredStepSize: input sample rate is " << m_inputSampleRate << ", step size is " << step << std::endl;
return step;
}
size_t
BarBeatTracker::getPreferredBlockSize() const
{
size_t theoretical = getPreferredStepSize() * 2;
// I think this is not necessarily going to be a power of two, and
// the host might have a problem with that, but I'm not sure we
// can do much about it here
return theoretical;
}
BarBeatTracker::OutputList
BarBeatTracker::getOutputDescriptors() const
{
OutputList list;
OutputDescriptor beat;
beat.identifier = "beats";
beat.name = "Beats";
beat.description = "Beat locations labelled with metrical position";
beat.unit = "";
beat.hasFixedBinCount = true;
beat.binCount = 0;
beat.sampleType = OutputDescriptor::VariableSampleRate;
beat.sampleRate = 1.0 / m_stepSecs;
OutputDescriptor bars;
bars.identifier = "bars";
bars.name = "Bars";
bars.description = "Bar locations";
bars.unit = "";
bars.hasFixedBinCount = true;
bars.binCount = 0;
bars.sampleType = OutputDescriptor::VariableSampleRate;
bars.sampleRate = 1.0 / m_stepSecs;
OutputDescriptor beatcounts;
beatcounts.identifier = "beatcounts";
beatcounts.name = "Beat Count";
beatcounts.description = "Beat counter function";
beatcounts.unit = "";
beatcounts.hasFixedBinCount = true;
beatcounts.binCount = 1;
beatcounts.sampleType = OutputDescriptor::VariableSampleRate;
beatcounts.sampleRate = 1.0 / m_stepSecs;
OutputDescriptor beatsd;
beatsd.identifier = "beatsd";
beatsd.name = "Beat Spectral Difference";
beatsd.description = "Beat spectral difference function used for bar-line detection";
beatsd.unit = "";
beatsd.hasFixedBinCount = true;
beatsd.binCount = 1;
beatsd.sampleType = OutputDescriptor::VariableSampleRate;
beatsd.sampleRate = 1.0 / m_stepSecs;
list.push_back(beat);
list.push_back(bars);
list.push_back(beatcounts);
list.push_back(beatsd);
return list;
}
BarBeatTracker::FeatureSet
BarBeatTracker::process(const float *const *inputBuffers,
Vamp::RealTime timestamp)
{
if (!m_d) {
cerr << "ERROR: BarBeatTracker::process: "
<< "BarBeatTracker has not been initialised"
<< endl;
return FeatureSet();
}
// We use time domain input, because DownBeat requires it -- so we
// use the time-domain version of DetectionFunction::process which
// does its own FFT. It requires doubles as input, so we need to
// make a temporary copy
// We only support a single input channel
const int fl = m_d->dfConfig.frameLength;
#ifndef __GNUC__
double *dfinput = (double *)alloca(fl * sizeof(double));
#else
double dfinput[fl];
#endif
for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i];
double output = m_d->df->processTimeDomain(dfinput);
if (m_d->dfOutput.empty()) m_d->origin = timestamp;
// std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl;
m_d->dfOutput.push_back(output);
// Downsample and store the incoming audio block.
// We have an overlap on the incoming audio stream (step size is
// half block size) -- this function is configured to take only a
// step size's worth, so effectively ignoring the overlap. Note
// however that this means we omit the last blocksize - stepsize
// samples completely for the purposes of barline detection
// (hopefully not a problem)
m_d->downBeat->pushAudioBlock(inputBuffers[0]);
return FeatureSet();
}
BarBeatTracker::FeatureSet
BarBeatTracker::getRemainingFeatures()
{
if (!m_d) {
cerr << "ERROR: BarBeatTracker::getRemainingFeatures: "
<< "BarBeatTracker has not been initialised"
<< endl;
return FeatureSet();
}
return barBeatTrack();
}
BarBeatTracker::FeatureSet
BarBeatTracker::barBeatTrack()
{
vector<double> df;
vector<double> beatPeriod;
vector<double> tempi;
for (size_t i = 2; i < m_d->dfOutput.size(); ++i) { // discard first two elts
df.push_back(m_d->dfOutput[i]);
beatPeriod.push_back(0.0);
}
if (df.empty()) return FeatureSet();
TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
// changes are as per the BeatTrack.cpp - allow m_inputtempo and m_constraintempo to be set be the user
tt.calculateBeatPeriod(df, beatPeriod, tempi, m_inputtempo, m_constraintempo);
vector<double> beats;
// changes are as per the BeatTrack.cpp - allow m_alpha and m_tightness to be set be the user
tt.calculateBeats(df, beatPeriod, beats, m_alpha, m_tightness);
// tt.calculateBeatPeriod(df, beatPeriod, tempi, 0., 0); // use default parameters
// vector<double> beats;
// tt.calculateBeats(df, beatPeriod, beats, 0.9, 4.); // use default parameters until i fix this plugin too
vector<int> downbeats;
size_t downLength = 0;
const float *downsampled = m_d->downBeat->getBufferedAudio(downLength);
m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats);
vector<double> beatsd;
m_d->downBeat->getBeatSD(beatsd);
// std::cerr << "BarBeatTracker: found downbeats at: ";
// for (int i = 0; i < downbeats.size(); ++i) std::cerr << downbeats[i] << " " << std::endl;
FeatureSet returnFeatures;
char label[20];
int dbi = 0;
int beat = 0;
int bar = 0;
if (!downbeats.empty()) {
// get the right number for the first beat; this will be
// incremented before use (at top of the following loop)
int firstDown = downbeats[0];
beat = m_bpb - firstDown - 1;
if (beat == m_bpb) beat = 0;
}
for (size_t i = 0; i < beats.size(); ++i) {
size_t frame = beats[i] * m_d->dfConfig.stepSize;
if (dbi < downbeats.size() && i == downbeats[dbi]) {
beat = 0;
++bar;
++dbi;
} else {
++beat;
}
// outputs are:
//
// 0 -> beats
// 1 -> bars
// 2 -> beat counter function
Feature feature;
feature.hasTimestamp = true;
feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
(frame, lrintf(m_inputSampleRate));
sprintf(label, "%d", beat + 1);
feature.label = label;
returnFeatures[0].push_back(feature); // labelled beats
feature.values.push_back(beat + 1);
returnFeatures[2].push_back(feature); // beat function
if (i > 0 && i <= beatsd.size()) {
feature.values.clear();
feature.values.push_back(beatsd[i-1]);
feature.label = "";
returnFeatures[3].push_back(feature); // beat spectral difference
}
if (beat == 0) {
feature.values.clear();
sprintf(label, "%d", bar);
feature.label = label;
returnFeatures[1].push_back(feature); // bars
}
}
return returnFeatures;
}

View File

@ -0,0 +1,69 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM Vamp Plugin Set
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#ifndef _BAR_BEAT_TRACK_PLUGIN_H_
#define _BAR_BEAT_TRACK_PLUGIN_H_
#include <vamp-sdk/Plugin.h>
class BarBeatTrackerData;
class BarBeatTracker : public Vamp::Plugin
{
public:
BarBeatTracker(float inputSampleRate);
virtual ~BarBeatTracker();
bool initialise(size_t channels, size_t stepSize, size_t blockSize);
void reset();
InputDomain getInputDomain() const { return TimeDomain; }
std::string getIdentifier() const;
std::string getName() const;
std::string getDescription() const;
std::string getMaker() const;
int getPluginVersion() const;
std::string getCopyright() const;
ParameterList getParameterDescriptors() const;
float getParameter(std::string) const;
void setParameter(std::string, float);
size_t getPreferredStepSize() const;
size_t getPreferredBlockSize() const;
OutputList getOutputDescriptors() const;
FeatureSet process(const float *const *inputBuffers,
Vamp::RealTime timestamp);
FeatureSet getRemainingFeatures();
protected:
BarBeatTrackerData *m_d;
static float m_stepSecs;
int m_bpb;
FeatureSet barBeatTrack();
// MEPD new protected parameters to allow the user to control these advanced parameters of the beat tracker
// changes are as per the BeatTrack.h
double m_alpha;
double m_tightness;
double m_inputtempo;
bool m_constraintempo;
};
#endif

View File

@ -0,0 +1,584 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM Vamp Plugin Set
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#include "BeatTrack.h"
#include <dsp/onsets/DetectionFunction.h>
#include <dsp/onsets/PeakPicking.h>
#include <dsp/tempotracking/TempoTrack.h>
#include <dsp/tempotracking/TempoTrackV2.h>
using std::string;
using std::vector;
using std::cerr;
using std::endl;
float BeatTracker::m_stepSecs = 0.01161; // 512 samples at 44100
#define METHOD_OLD 0
#define METHOD_NEW 1
class BeatTrackerData
{
public:
BeatTrackerData(const DFConfig &config) : dfConfig(config) {
df = new DetectionFunction(config);
}
~BeatTrackerData() {
delete df;
}
void reset() {
delete df;
df = new DetectionFunction(dfConfig);
dfOutput.clear();
origin = Vamp::RealTime::zeroTime;
}
DFConfig dfConfig;
DetectionFunction *df;
vector<double> dfOutput;
Vamp::RealTime origin;
};
BeatTracker::BeatTracker(float inputSampleRate) :
Vamp::Plugin(inputSampleRate),
m_d(0),
m_method(METHOD_NEW),
m_dfType(DF_COMPLEXSD),
m_whiten(false),
m_alpha(0.9), // MEPD new exposed parameter for beat tracker, default value = 0.9 (as old version)
m_tightness(4.),
m_inputtempo(120.), // MEPD new exposed parameter for beat tracker, default value = 120. (as old version)
m_constraintempo(false) // MEPD new exposed parameter for beat tracker, default value = false (as old version)
// calling the beat tracker with these default parameters will give the same output as the previous existing version
{
}
BeatTracker::~BeatTracker()
{
delete m_d;
}
string
BeatTracker::getIdentifier() const
{
return "qm-tempotracker";
}
string
BeatTracker::getName() const
{
return "Tempo and Beat Tracker";
}
string
BeatTracker::getDescription() const
{
return "Estimate beat locations and tempo";
}
string
BeatTracker::getMaker() const
{
return "Queen Mary, University of London";
}
int
BeatTracker::getPluginVersion() const
{
return 6;
}
string
BeatTracker::getCopyright() const
{
return "Plugin by Christian Landone and Matthew Davies. Copyright (c) 2006-2013 QMUL - All Rights Reserved";
}
BeatTracker::ParameterList
BeatTracker::getParameterDescriptors() const
{
ParameterList list;
ParameterDescriptor desc;
desc.identifier = "method";
desc.name = "Beat Tracking Method";
desc.description = "Basic method to use ";
desc.minValue = 0;
desc.maxValue = 1;
desc.defaultValue = METHOD_NEW;
desc.isQuantized = true;
desc.quantizeStep = 1;
desc.valueNames.push_back("Old");
desc.valueNames.push_back("New");
list.push_back(desc);
desc.identifier = "dftype";
desc.name = "Onset Detection Function Type";
desc.description = "Method used to calculate the onset detection function";
desc.minValue = 0;
desc.maxValue = 4;
desc.defaultValue = 3;
desc.valueNames.clear();
desc.valueNames.push_back("High-Frequency Content");
desc.valueNames.push_back("Spectral Difference");
desc.valueNames.push_back("Phase Deviation");
desc.valueNames.push_back("Complex Domain");
desc.valueNames.push_back("Broadband Energy Rise");
list.push_back(desc);
desc.identifier = "whiten";
desc.name = "Adaptive Whitening";
desc.description = "Normalize frequency bin magnitudes relative to recent peak levels";
desc.minValue = 0;
desc.maxValue = 1;
desc.defaultValue = 0;
desc.isQuantized = true;
desc.quantizeStep = 1;
desc.unit = "";
desc.valueNames.clear();
list.push_back(desc);
// MEPD new exposed parameter - used in the dynamic programming part of the beat tracker
//Alpha Parameter of Beat Tracker
desc.identifier = "alpha";
desc.name = "Alpha";
desc.description = "Inertia - Flexibility Trade Off";
desc.minValue = 0.1;
desc.maxValue = 0.99;
desc.defaultValue = 0.90;
desc.unit = "";
desc.isQuantized = false;
list.push_back(desc);
// We aren't exposing tightness as a parameter, it's fixed at 4
// MEPD new exposed parameter - used in the periodicity estimation
//User input tempo
desc.identifier = "inputtempo";
desc.name = "Tempo Hint";
desc.description = "User-defined tempo on which to centre the tempo preference function";
desc.minValue = 50;
desc.maxValue = 250;
desc.defaultValue = 120;
desc.unit = "BPM";
desc.isQuantized = true;
list.push_back(desc);
// MEPD new exposed parameter - used in periodicity estimation
desc.identifier = "constraintempo";
desc.name = "Constrain Tempo";
desc.description = "Constrain more tightly around the tempo hint, using a Gaussian weighting instead of Rayleigh";
desc.minValue = 0;
desc.maxValue = 1;
desc.defaultValue = 0;
desc.isQuantized = true;
desc.quantizeStep = 1;
desc.unit = "";
desc.valueNames.clear();
list.push_back(desc);
return list;
}
float
BeatTracker::getParameter(std::string name) const
{
if (name == "dftype") {
switch (m_dfType) {
case DF_HFC: return 0;
case DF_SPECDIFF: return 1;
case DF_PHASEDEV: return 2;
default: case DF_COMPLEXSD: return 3;
case DF_BROADBAND: return 4;
}
} else if (name == "method") {
return m_method;
} else if (name == "whiten") {
return m_whiten ? 1.0 : 0.0;
} else if (name == "alpha") {
return m_alpha;
} else if (name == "inputtempo") {
return m_inputtempo;
} else if (name == "constraintempo") {
return m_constraintempo ? 1.0 : 0.0;
}
return 0.0;
}
void
BeatTracker::setParameter(std::string name, float value)
{
if (name == "dftype") {
switch (lrintf(value)) {
case 0: m_dfType = DF_HFC; break;
case 1: m_dfType = DF_SPECDIFF; break;
case 2: m_dfType = DF_PHASEDEV; break;
default: case 3: m_dfType = DF_COMPLEXSD; break;
case 4: m_dfType = DF_BROADBAND; break;
}
} else if (name == "method") {
m_method = lrintf(value);
} else if (name == "whiten") {
m_whiten = (value > 0.5);
} else if (name == "alpha") {
m_alpha = value;
} else if (name == "inputtempo") {
m_inputtempo = value;
} else if (name == "constraintempo") {
m_constraintempo = (value > 0.5);
}
}
bool
BeatTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
{
if (m_d) {
delete m_d;
m_d = 0;
}
if (channels < getMinChannelCount() ||
channels > getMaxChannelCount()) {
std::cerr << "BeatTracker::initialise: Unsupported channel count: "
<< channels << std::endl;
return false;
}
if (stepSize != getPreferredStepSize()) {
std::cerr << "ERROR: BeatTracker::initialise: Unsupported step size for this sample rate: "
<< stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
return false;
}
if (blockSize != getPreferredBlockSize()) {
std::cerr << "WARNING: BeatTracker::initialise: Sub-optimal block size for this sample rate: "
<< blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
// return false;
}
DFConfig dfConfig;
dfConfig.DFType = m_dfType;
dfConfig.stepSize = stepSize;
dfConfig.frameLength = blockSize;
dfConfig.dbRise = 3;
dfConfig.adaptiveWhitening = m_whiten;
dfConfig.whiteningRelaxCoeff = -1;
dfConfig.whiteningFloor = -1;
m_d = new BeatTrackerData(dfConfig);
return true;
}
void
BeatTracker::reset()
{
if (m_d) m_d->reset();
}
size_t
BeatTracker::getPreferredStepSize() const
{
size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
// std::cerr << "BeatTracker::getPreferredStepSize: input sample rate is " << m_inputSampleRate << ", step size is " << step << std::endl;
return step;
}
size_t
BeatTracker::getPreferredBlockSize() const
{
size_t theoretical = getPreferredStepSize() * 2;
// I think this is not necessarily going to be a power of two, and
// the host might have a problem with that, but I'm not sure we
// can do much about it here
return theoretical;
}
BeatTracker::OutputList
BeatTracker::getOutputDescriptors() const
{
OutputList list;
OutputDescriptor beat;
beat.identifier = "beats";
beat.name = "Beats";
beat.description = "Estimated metrical beat locations";
beat.unit = "";
beat.hasFixedBinCount = true;
beat.binCount = 0;
beat.sampleType = OutputDescriptor::VariableSampleRate;
beat.sampleRate = 1.0 / m_stepSecs;
OutputDescriptor df;
df.identifier = "detection_fn";
df.name = "Onset Detection Function";
df.description = "Probability function of note onset likelihood";
df.unit = "";
df.hasFixedBinCount = true;
df.binCount = 1;
df.hasKnownExtents = false;
df.isQuantized = false;
df.sampleType = OutputDescriptor::OneSamplePerStep;
OutputDescriptor tempo;
tempo.identifier = "tempo";
tempo.name = "Tempo";
tempo.description = "Locked tempo estimates";
tempo.unit = "bpm";
tempo.hasFixedBinCount = true;
tempo.binCount = 1;
tempo.hasKnownExtents = false;
tempo.isQuantized = false;
tempo.sampleType = OutputDescriptor::VariableSampleRate;
tempo.sampleRate = 1.0 / m_stepSecs;
list.push_back(beat);
list.push_back(df);
list.push_back(tempo);
return list;
}
BeatTracker::FeatureSet
BeatTracker::process(const float *const *inputBuffers,
Vamp::RealTime timestamp)
{
if (!m_d) {
cerr << "ERROR: BeatTracker::process: "
<< "BeatTracker has not been initialised"
<< endl;
return FeatureSet();
}
size_t len = m_d->dfConfig.frameLength / 2 + 1;
double *reals = new double[len];
double *imags = new double[len];
// We only support a single input channel
for (size_t i = 0; i < len; ++i) {
reals[i] = inputBuffers[0][i*2];
imags[i] = inputBuffers[0][i*2+1];
}
double output = m_d->df->processFrequencyDomain(reals, imags);
delete[] reals;
delete[] imags;
if (m_d->dfOutput.empty()) m_d->origin = timestamp;
m_d->dfOutput.push_back(output);
FeatureSet returnFeatures;
Feature feature;
feature.hasTimestamp = false;
feature.values.push_back(output);
returnFeatures[1].push_back(feature); // detection function is output 1
return returnFeatures;
}
BeatTracker::FeatureSet
BeatTracker::getRemainingFeatures()
{
if (!m_d) {
cerr << "ERROR: BeatTracker::getRemainingFeatures: "
<< "BeatTracker has not been initialised"
<< endl;
return FeatureSet();
}
if (m_method == METHOD_OLD) return beatTrackOld();
else return beatTrackNew();
}
BeatTracker::FeatureSet
BeatTracker::beatTrackOld()
{
double aCoeffs[] = { 1.0000, -0.5949, 0.2348 };
double bCoeffs[] = { 0.1600, 0.3200, 0.1600 };
TTParams ttParams;
ttParams.winLength = 512;
ttParams.lagLength = 128;
ttParams.LPOrd = 2;
ttParams.LPACoeffs = aCoeffs;
ttParams.LPBCoeffs = bCoeffs;
ttParams.alpha = 9;
ttParams.WinT.post = 8;
ttParams.WinT.pre = 7;
TempoTrack tempoTracker(ttParams);
vector<double> tempi;
vector<int> beats = tempoTracker.process(m_d->dfOutput, &tempi);
FeatureSet returnFeatures;
char label[100];
for (size_t i = 0; i < beats.size(); ++i) {
size_t frame = beats[i] * m_d->dfConfig.stepSize;
Feature feature;
feature.hasTimestamp = true;
feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
(frame, lrintf(m_inputSampleRate));
float bpm = 0.0;
int frameIncrement = 0;
if (i < beats.size() - 1) {
frameIncrement = (beats[i+1] - beats[i]) * m_d->dfConfig.stepSize;
// one beat is frameIncrement frames, so there are
// samplerate/frameIncrement bps, so
// 60*samplerate/frameIncrement bpm
if (frameIncrement > 0) {
bpm = (60.0 * m_inputSampleRate) / frameIncrement;
bpm = int(bpm * 100.0 + 0.5) / 100.0;
sprintf(label, "%.2f bpm", bpm);
feature.label = label;
}
}
returnFeatures[0].push_back(feature); // beats are output 0
}
double prevTempo = 0.0;
for (size_t i = 0; i < tempi.size(); ++i) {
size_t frame = i * m_d->dfConfig.stepSize * ttParams.lagLength;
// std::cerr << "unit " << i << ", step size " << m_d->dfConfig.stepSize << ", hop " << ttParams.lagLength << ", frame = " << frame << std::endl;
if (tempi[i] > 1 && int(tempi[i] * 100) != int(prevTempo * 100)) {
Feature feature;
feature.hasTimestamp = true;
feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
(frame, lrintf(m_inputSampleRate));
feature.values.push_back(tempi[i]);
sprintf(label, "%.2f bpm", tempi[i]);
feature.label = label;
returnFeatures[2].push_back(feature); // tempo is output 2
prevTempo = tempi[i];
}
}
return returnFeatures;
}
BeatTracker::FeatureSet
BeatTracker::beatTrackNew()
{
vector<double> df;
vector<double> beatPeriod;
vector<double> tempi;
size_t nonZeroCount = m_d->dfOutput.size();
while (nonZeroCount > 0) {
if (m_d->dfOutput[nonZeroCount-1] > 0.0) {
break;
}
--nonZeroCount;
}
// std::cerr << "Note: nonZeroCount was " << m_d->dfOutput.size() << ", is now " << nonZeroCount << std::endl;
for (size_t i = 2; i < nonZeroCount; ++i) { // discard first two elts
df.push_back(m_d->dfOutput[i]);
beatPeriod.push_back(0.0);
}
if (df.empty()) return FeatureSet();
TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
// MEPD - note this function is now passed 2 new parameters, m_inputtempo and m_constraintempo
tt.calculateBeatPeriod(df, beatPeriod, tempi, m_inputtempo, m_constraintempo);
vector<double> beats;
// MEPD - note this function is now passed 2 new parameters, m_alpha and m_tightness
tt.calculateBeats(df, beatPeriod, beats, m_alpha, m_tightness);
FeatureSet returnFeatures;
char label[100];
for (size_t i = 0; i < beats.size(); ++i) {
size_t frame = beats[i] * m_d->dfConfig.stepSize;
Feature feature;
feature.hasTimestamp = true;
feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
(frame, lrintf(m_inputSampleRate));
float bpm = 0.0;
int frameIncrement = 0;
if (i+1 < beats.size()) {
frameIncrement = (beats[i+1] - beats[i]) * m_d->dfConfig.stepSize;
// one beat is frameIncrement frames, so there are
// samplerate/frameIncrement bps, so
// 60*samplerate/frameIncrement bpm
if (frameIncrement > 0) {
bpm = (60.0 * m_inputSampleRate) / frameIncrement;
bpm = int(bpm * 100.0 + 0.5) / 100.0;
sprintf(label, "%.2f bpm", bpm);
feature.label = label;
}
}
returnFeatures[0].push_back(feature); // beats are output 0
}
double prevTempo = 0.0;
for (size_t i = 0; i < tempi.size(); ++i) {
size_t frame = i * m_d->dfConfig.stepSize;
if (tempi[i] > 1 && int(tempi[i] * 100) != int(prevTempo * 100)) {
Feature feature;
feature.hasTimestamp = true;
feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
(frame, lrintf(m_inputSampleRate));
feature.values.push_back(tempi[i]);
sprintf(label, "%.2f bpm", tempi[i]);
feature.label = label;
returnFeatures[2].push_back(feature); // tempo is output 2
prevTempo = tempi[i];
}
}
return returnFeatures;
}

View File

@ -0,0 +1,72 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM Vamp Plugin Set
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#ifndef _BEAT_TRACK_PLUGIN_H_
#define _BEAT_TRACK_PLUGIN_H_
#include <vamp-sdk/Plugin.h>
class BeatTrackerData;
class BeatTracker : public Vamp::Plugin
{
public:
BeatTracker(float inputSampleRate);
virtual ~BeatTracker();
bool initialise(size_t channels, size_t stepSize, size_t blockSize);
void reset();
InputDomain getInputDomain() const { return FrequencyDomain; }
std::string getIdentifier() const;
std::string getName() const;
std::string getDescription() const;
std::string getMaker() const;
int getPluginVersion() const;
std::string getCopyright() const;
ParameterList getParameterDescriptors() const;
float getParameter(std::string) const;
void setParameter(std::string, float);
size_t getPreferredStepSize() const;
size_t getPreferredBlockSize() const;
OutputList getOutputDescriptors() const;
FeatureSet process(const float *const *inputBuffers,
Vamp::RealTime timestamp);
FeatureSet getRemainingFeatures();
protected:
BeatTrackerData *m_d;
int m_method;
int m_dfType;
// MEPD new protected parameters to allow the user to control these advanced parameters of the beat tracker
double m_alpha;
double m_tightness;
double m_inputtempo;
bool m_constraintempo;
bool m_whiten;
static float m_stepSecs;
FeatureSet beatTrackOld();
FeatureSet beatTrackNew();
};
#endif

View File

@ -0,0 +1,416 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM Vamp Plugin Set
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#include "ChromagramPlugin.h"
#include <base/Pitch.h>
#include <dsp/chromagram/Chromagram.h>
using std::string;
using std::vector;
using std::cerr;
using std::endl;
ChromagramPlugin::ChromagramPlugin(float inputSampleRate) :
Vamp::Plugin(inputSampleRate),
m_chromagram(0),
m_step(0),
m_block(0)
{
m_minMIDIPitch = 36;
m_maxMIDIPitch = 96;
m_tuningFrequency = 440;
m_normalise = MathUtilities::NormaliseNone;
m_bpo = 12;
setupConfig();
}
void
ChromagramPlugin::setupConfig()
{
m_config.FS = lrintf(m_inputSampleRate);
m_config.min = Pitch::getFrequencyForPitch
(m_minMIDIPitch, 0, m_tuningFrequency);
m_config.max = Pitch::getFrequencyForPitch
(m_maxMIDIPitch, 0, m_tuningFrequency);
m_config.BPO = m_bpo;
m_config.CQThresh = 0.0054;
m_config.normalise = m_normalise;
m_step = 0;
m_block = 0;
}
ChromagramPlugin::~ChromagramPlugin()
{
delete m_chromagram;
}
string
ChromagramPlugin::getIdentifier() const
{
return "qm-chromagram";
}
string
ChromagramPlugin::getName() const
{
return "Chromagram";
}
string
ChromagramPlugin::getDescription() const
{
return "Extract a series of tonal chroma vectors from the audio";
}
string
ChromagramPlugin::getMaker() const
{
return "Queen Mary, University of London";
}
int
ChromagramPlugin::getPluginVersion() const
{
return 4;
}
string
ChromagramPlugin::getCopyright() const
{
return "Plugin by Chris Cannam and Christian Landone. Copyright (c) 2006-2009 QMUL - All Rights Reserved";
}
ChromagramPlugin::ParameterList
ChromagramPlugin::getParameterDescriptors() const
{
ParameterList list;
ParameterDescriptor desc;
desc.identifier = "minpitch";
desc.name = "Minimum Pitch";
desc.unit = "MIDI units";
desc.description = "MIDI pitch corresponding to the lowest frequency to be included in the chromagram";
desc.minValue = 0;
desc.maxValue = 127;
desc.defaultValue = 36;
desc.isQuantized = true;
desc.quantizeStep = 1;
list.push_back(desc);
desc.identifier = "maxpitch";
desc.name = "Maximum Pitch";
desc.unit = "MIDI units";
desc.description = "MIDI pitch corresponding to the highest frequency to be included in the chromagram";
desc.minValue = 0;
desc.maxValue = 127;
desc.defaultValue = 96;
desc.isQuantized = true;
desc.quantizeStep = 1;
list.push_back(desc);
desc.identifier = "tuning";
desc.name = "Tuning Frequency";
desc.unit = "Hz";
desc.description = "Frequency of concert A";
desc.minValue = 360;
desc.maxValue = 500;
desc.defaultValue = 440;
desc.isQuantized = false;
list.push_back(desc);
desc.identifier = "bpo";
desc.name = "Bins per Octave";
desc.unit = "bins";
desc.description = "Number of constant-Q transform bins per octave, and the number of bins for the chromagram outputs";
desc.minValue = 2;
desc.maxValue = 480;
desc.defaultValue = 12;
desc.isQuantized = true;
desc.quantizeStep = 1;
list.push_back(desc);
desc.identifier = "normalization";
desc.name = "Normalization";
desc.unit = "";
desc.description = "Normalization for each chromagram output column";
desc.minValue = 0;
desc.maxValue = 2;
desc.defaultValue = 0;
desc.isQuantized = true;
desc.quantizeStep = 1;
desc.valueNames.push_back("None");
desc.valueNames.push_back("Unit Sum");
desc.valueNames.push_back("Unit Maximum");
list.push_back(desc);
return list;
}
float
ChromagramPlugin::getParameter(std::string param) const
{
if (param == "minpitch") {
return m_minMIDIPitch;
}
if (param == "maxpitch") {
return m_maxMIDIPitch;
}
if (param == "tuning") {
return m_tuningFrequency;
}
if (param == "bpo") {
return m_bpo;
}
if (param == "normalization") {
return int(m_normalise);
}
std::cerr << "WARNING: ChromagramPlugin::getParameter: unknown parameter \""
<< param << "\"" << std::endl;
return 0.0;
}
void
ChromagramPlugin::setParameter(std::string param, float value)
{
if (param == "minpitch") {
m_minMIDIPitch = lrintf(value);
} else if (param == "maxpitch") {
m_maxMIDIPitch = lrintf(value);
} else if (param == "tuning") {
m_tuningFrequency = value;
} else if (param == "bpo") {
m_bpo = lrintf(value);
} else if (param == "normalization") {
m_normalise = MathUtilities::NormaliseType(int(value + 0.0001));
} else {
std::cerr << "WARNING: ChromagramPlugin::setParameter: unknown parameter \""
<< param << "\"" << std::endl;
}
setupConfig();
}
bool
ChromagramPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
{
if (m_chromagram) {
delete m_chromagram;
m_chromagram = 0;
}
if (channels < getMinChannelCount() ||
channels > getMaxChannelCount()) return false;
m_chromagram = new Chromagram(m_config);
m_binsums = vector<double>(m_config.BPO);
for (int i = 0; i < m_config.BPO; ++i) {
m_binsums[i] = 0.0;
}
m_count = 0;
m_step = m_chromagram->getHopSize();
m_block = m_chromagram->getFrameSize();
if (m_step < 1) m_step = 1;
if (blockSize != m_block) {
std::cerr << "ChromagramPlugin::initialise: ERROR: supplied block size " << blockSize << " differs from required block size " << m_block << ", initialise failing" << std::endl;
delete m_chromagram;
m_chromagram = 0;
return false;
}
if (stepSize != m_step) {
std::cerr << "ChromagramPlugin::initialise: NOTE: supplied step size " << stepSize << " differs from expected step size " << m_step << " (for block size = " << m_block << ")" << std::endl;
}
return true;
}
void
ChromagramPlugin::reset()
{
if (m_chromagram) {
delete m_chromagram;
m_chromagram = new Chromagram(m_config);
for (int i = 0; i < m_config.BPO; ++i) {
m_binsums[i] = 0.0;
}
m_count = 0;
}
}
size_t
ChromagramPlugin::getPreferredStepSize() const
{
if (!m_step) {
Chromagram chroma(m_config);
m_step = chroma.getHopSize();
m_block = chroma.getFrameSize();
if (m_step < 1) m_step = 1;
}
return m_step;
}
size_t
ChromagramPlugin::getPreferredBlockSize() const
{
if (!m_block) {
Chromagram chroma(m_config);
m_step = chroma.getHopSize();
m_block = chroma.getFrameSize();
if (m_step < 1) m_step = 1;
}
return m_block;
}
ChromagramPlugin::OutputList
ChromagramPlugin::getOutputDescriptors() const
{
OutputList list;
OutputDescriptor d;
d.identifier = "chromagram";
d.name = "Chromagram";
d.unit = "";
d.description = "Output of chromagram, as a single vector per process block";
d.hasFixedBinCount = true;
d.binCount = m_config.BPO;
const char *names[] =
{ "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B" };
if (d.binCount % 12 == 0) {
for (int i = 0; i < 12; ++i) {
int ipc = m_minMIDIPitch % 12;
int index = (i + ipc) % 12;
d.binNames.push_back(names[index]);
for (int j = 0; j < int(d.binCount) / 12 - 1; ++j) {
d.binNames.push_back("");
}
}
} else {
d.binNames.push_back(names[m_minMIDIPitch % 12]);
}
d.hasKnownExtents = (m_normalise != MathUtilities::NormaliseNone);
d.minValue = 0.0;
d.maxValue = (d.hasKnownExtents ? 1.0 : 0.0);
d.isQuantized = false;
d.sampleType = OutputDescriptor::OneSamplePerStep;
list.push_back(d);
d.identifier = "chromameans";
d.name = "Chroma Means";
d.description = "Mean values of chromagram bins across the duration of the input audio";
d.sampleType = OutputDescriptor::FixedSampleRate;
d.sampleRate = 1;
list.push_back(d);
return list;
}
ChromagramPlugin::FeatureSet
ChromagramPlugin::process(const float *const *inputBuffers,
Vamp::RealTime timestamp)
{
if (!m_chromagram) {
cerr << "ERROR: ChromagramPlugin::process: "
<< "Chromagram has not been initialised"
<< endl;
return FeatureSet();
}
double *real = new double[m_block];
double *imag = new double[m_block];
for (size_t i = 0; i <= m_block/2; ++i) {
real[i] = inputBuffers[0][i*2];
if (i > 0) real[m_block - i] = real[i];
imag[i] = inputBuffers[0][i*2+1];
if (i > 0) imag[m_block - i] = imag[i];
}
// cerr << "chromagram: timestamp = " << timestamp << endl;
/*
bool printThis = false;
if (timestamp.sec == 3 && timestamp.nsec < 250000000) {
printThis = true;
}
if (printThis) {
cerr << "\n\nchromagram: timestamp " << timestamp << ": input data starts:" << endl;
for (int i = 0; i < m_block && i < 1000; ++i) {
cerr << real[i] << "," << imag[i] << " ";
}
cerr << endl << "values:" << endl;
}
*/
double *output = m_chromagram->process(real, imag);
delete[] real;
delete[] imag;
Feature feature;
feature.hasTimestamp = false;
for (size_t i = 0; i < m_config.BPO; ++i) {
double value = output[i];
/*
if (printThis) {
cerr << value << " ";
}
*/
if (ISNAN(value)) value = 0.0;
m_binsums[i] += value;
feature.values.push_back(value);
}
feature.label = "";
++m_count;
/*
if (printThis) {
cerr << endl;
}
*/
FeatureSet returnFeatures;
returnFeatures[0].push_back(feature);
return returnFeatures;
}
ChromagramPlugin::FeatureSet
ChromagramPlugin::getRemainingFeatures()
{
Feature feature;
feature.hasTimestamp = true;
feature.timestamp = Vamp::RealTime::zeroTime;
for (size_t i = 0; i < m_config.BPO; ++i) {
double v = m_binsums[i];
if (m_count > 0) v /= m_count;
feature.values.push_back(v);
}
feature.label = "Chromagram bin means";
FeatureSet returnFeatures;
returnFeatures[1].push_back(feature);
return returnFeatures;
}

View File

@ -0,0 +1,72 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM Vamp Plugin Set
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#ifndef _CHROMAGRAM_PLUGIN_H_
#define _CHROMAGRAM_PLUGIN_H_
#include <vamp-sdk/Plugin.h>
#include <dsp/chromagram/Chromagram.h>
class ChromagramPlugin : public Vamp::Plugin
{
public:
ChromagramPlugin(float inputSampleRate);
virtual ~ChromagramPlugin();
bool initialise(size_t channels, size_t stepSize, size_t blockSize);
void reset();
InputDomain getInputDomain() const { return FrequencyDomain; }
std::string getIdentifier() const;
std::string getName() const;
std::string getDescription() const;
std::string getMaker() const;
int getPluginVersion() const;
std::string getCopyright() const;
ParameterList getParameterDescriptors() const;
float getParameter(std::string) const;
void setParameter(std::string, float);
size_t getPreferredStepSize() const;
size_t getPreferredBlockSize() const;
OutputList getOutputDescriptors() const;
FeatureSet process(const float *const *inputBuffers,
Vamp::RealTime timestamp);
FeatureSet getRemainingFeatures();
protected:
int m_minMIDIPitch;
int m_maxMIDIPitch;
float m_tuningFrequency;
MathUtilities::NormaliseType m_normalise;
int m_bpo;
void setupConfig();
ChromaConfig m_config;
Chromagram *m_chromagram;
mutable size_t m_step;
mutable size_t m_block;
vector<double> m_binsums;
size_t m_count;
};
#endif

View File

@ -0,0 +1,407 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM Vamp Plugin Set
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#include "KeyDetect.h"
using std::string;
using std::vector;
//using std::cerr;
using std::endl;
#include <cmath>
// Order for circle-of-5ths plotting
static int conversion[24] =
{ 7, 12, 5, 10, 3, 8, 1, 6, 11, 4, 9, 2,
16, 21, 14, 19, 24, 17, 22, 15, 20, 13, 18, 23 };
KeyDetector::KeyDetector(float inputSampleRate) :
Plugin(inputSampleRate),
m_stepSize(0),
m_blockSize(0),
m_tuningFrequency(440),
m_length(10),
m_getKeyMode(0),
m_inputFrame(0),
m_prevKey(-1)
{
}
KeyDetector::~KeyDetector()
{
delete m_getKeyMode;
if ( m_inputFrame ) {
delete [] m_inputFrame;
}
}
string
KeyDetector::getIdentifier() const
{
return "qm-keydetector";
}
string
KeyDetector::getName() const
{
return "Key Detector";
}
string
KeyDetector::getDescription() const
{
return "Estimate the key of the music";
}
string
KeyDetector::getMaker() const
{
return "Queen Mary, University of London";
}
int
KeyDetector::getPluginVersion() const
{
return 4;
}
string
KeyDetector::getCopyright() const
{
return "Plugin by Katy Noland and Christian Landone. Copyright (c) 2006-2009 QMUL - All Rights Reserved";
}
KeyDetector::ParameterList
KeyDetector::getParameterDescriptors() const
{
ParameterList list;
ParameterDescriptor desc;
desc.identifier = "tuning";
desc.name = "Tuning Frequency";
desc.description = "Frequency of concert A";
desc.unit = "Hz";
desc.minValue = 420;
desc.maxValue = 460;
desc.defaultValue = 440;
desc.isQuantized = false;
list.push_back(desc);
desc.identifier = "length";
desc.name = "Window Length";
desc.unit = "chroma frames";
desc.description = "Number of chroma analysis frames per key estimation";
desc.minValue = 1;
desc.maxValue = 30;
desc.defaultValue = 10;
desc.isQuantized = true;
desc.quantizeStep = 1;
list.push_back(desc);
return list;
}
float
KeyDetector::getParameter(std::string param) const
{
if (param == "tuning") {
return m_tuningFrequency;
}
if (param == "length") {
return m_length;
}
std::cerr << "WARNING: KeyDetector::getParameter: unknown parameter \""
<< param << "\"" << std::endl;
return 0.0;
}
void
KeyDetector::setParameter(std::string param, float value)
{
if (param == "tuning") {
m_tuningFrequency = value;
} else if (param == "length") {
m_length = int(value + 0.1);
} else {
std::cerr << "WARNING: KeyDetector::setParameter: unknown parameter \""
<< param << "\"" << std::endl;
}
}
bool
KeyDetector::initialise(size_t channels, size_t stepSize, size_t blockSize)
{
if (m_getKeyMode) {
delete m_getKeyMode;
m_getKeyMode = 0;
}
if (channels < getMinChannelCount() ||
channels > getMaxChannelCount()) return false;
m_getKeyMode = new GetKeyMode(int(m_inputSampleRate + 0.1),
m_tuningFrequency,
m_length, m_length);
m_stepSize = m_getKeyMode->getHopSize();
m_blockSize = m_getKeyMode->getBlockSize();
if (stepSize != m_stepSize || blockSize != m_blockSize) {
std::cerr << "KeyDetector::initialise: ERROR: step/block sizes "
<< stepSize << "/" << blockSize << " differ from required "
<< m_stepSize << "/" << m_blockSize << std::endl;
delete m_getKeyMode;
m_getKeyMode = 0;
return false;
}
m_inputFrame = new double[m_blockSize];
m_prevKey = -1;
m_first = true;
return true;
}
void
KeyDetector::reset()
{
if (m_getKeyMode) {
delete m_getKeyMode;
m_getKeyMode = new GetKeyMode(int(m_inputSampleRate + 0.1),
m_tuningFrequency,
m_length, m_length);
}
if (m_inputFrame) {
for( unsigned int i = 0; i < m_blockSize; i++ ) {
m_inputFrame[ i ] = 0.0;
}
}
m_prevKey = -1;
m_first = true;
}
KeyDetector::OutputList
KeyDetector::getOutputDescriptors() const
{
OutputList list;
float osr = 0.0f;
if (m_stepSize == 0) (void)getPreferredStepSize();
osr = m_inputSampleRate / m_stepSize;
OutputDescriptor d;
d.identifier = "tonic";
d.name = "Tonic Pitch";
d.unit = "";
d.description = "Tonic of the estimated key (from C = 1 to B = 12)";
d.hasFixedBinCount = true;
d.binCount = 1;
d.hasKnownExtents = true;
d.isQuantized = true;
d.minValue = 1;
d.maxValue = 12;
d.quantizeStep = 1;
d.sampleRate = osr;
d.sampleType = OutputDescriptor::VariableSampleRate;
list.push_back(d);
d.identifier = "mode";
d.name = "Key Mode";
d.unit = "";
d.description = "Major or minor mode of the estimated key (major = 0, minor = 1)";
d.hasFixedBinCount = true;
d.binCount = 1;
d.hasKnownExtents = true;
d.isQuantized = true;
d.minValue = 0;
d.maxValue = 1;
d.quantizeStep = 1;
d.sampleRate = osr;
d.sampleType = OutputDescriptor::VariableSampleRate;
list.push_back(d);
d.identifier = "key";
d.name = "Key";
d.unit = "";
d.description = "Estimated key (from C major = 1 to B major = 12 and C minor = 13 to B minor = 24)";
d.hasFixedBinCount = true;
d.binCount = 1;
d.hasKnownExtents = true;
d.isQuantized = true;
d.minValue = 1;
d.maxValue = 24;
d.quantizeStep = 1;
d.sampleRate = osr;
d.sampleType = OutputDescriptor::VariableSampleRate;
list.push_back(d);
d.identifier = "keystrength";
d.name = "Key Strength Plot";
d.unit = "";
d.description = "Correlation of the chroma vector with stored key profile for each major and minor key";
d.hasFixedBinCount = true;
d.binCount = 25;
d.hasKnownExtents = false;
d.isQuantized = false;
d.sampleType = OutputDescriptor::OneSamplePerStep;
for (int i = 0; i < 24; ++i) {
if (i == 12) d.binNames.push_back(" ");
int idx = conversion[i];
std::string label = getKeyName(idx > 12 ? idx-12 : idx,
i >= 12,
true);
d.binNames.push_back(label);
}
list.push_back(d);
return list;
}
KeyDetector::FeatureSet
KeyDetector::process(const float *const *inputBuffers,
Vamp::RealTime now)
{
if (m_stepSize == 0) {
return FeatureSet();
}
FeatureSet returnFeatures;
for ( unsigned int i = 0 ; i < m_blockSize; i++ ) {
m_inputFrame[i] = (double)inputBuffers[0][i];
}
// int key = (m_getKeyMode->process(m_inputFrame) % 24);
int key = m_getKeyMode->process(m_inputFrame);
bool minor = m_getKeyMode->isModeMinor(key);
int tonic = key;
if (tonic > 12) tonic -= 12;
int prevTonic = m_prevKey;
if (prevTonic > 12) prevTonic -= 12;
if (m_first || (tonic != prevTonic)) {
Feature feature;
feature.hasTimestamp = true;
feature.timestamp = now;
// feature.timestamp = now;
feature.values.push_back((float)tonic);
feature.label = getKeyName(tonic, minor, false);
returnFeatures[0].push_back(feature); // tonic
}
if (m_first || (minor != (m_getKeyMode->isModeMinor(m_prevKey)))) {
Feature feature;
feature.hasTimestamp = true;
feature.timestamp = now;
feature.values.push_back(minor ? 1.f : 0.f);
feature.label = (minor ? "Minor" : "Major");
returnFeatures[1].push_back(feature); // mode
}
if (m_first || (key != m_prevKey)) {
Feature feature;
feature.hasTimestamp = true;
feature.timestamp = now;
feature.values.push_back((float)key);
feature.label = getKeyName(tonic, minor, true);
returnFeatures[2].push_back(feature); // key
}
m_prevKey = key;
m_first = false;
Feature ksf;
ksf.values.reserve(25);
double *keystrengths = m_getKeyMode->getKeyStrengths();
for (int i = 0; i < 24; ++i) {
if (i == 12) ksf.values.push_back(-1);
ksf.values.push_back(keystrengths[conversion[i]-1]);
}
ksf.hasTimestamp = false;
returnFeatures[3].push_back(ksf);
return returnFeatures;
}
KeyDetector::FeatureSet
KeyDetector::getRemainingFeatures()
{
return FeatureSet();
}
size_t
KeyDetector::getPreferredStepSize() const
{
if (!m_stepSize) {
GetKeyMode gkm(int(m_inputSampleRate + 0.1),
m_tuningFrequency, m_length, m_length);
m_stepSize = gkm.getHopSize();
m_blockSize = gkm.getBlockSize();
}
return m_stepSize;
}
size_t
KeyDetector::getPreferredBlockSize() const
{
if (!m_blockSize) {
GetKeyMode gkm(int(m_inputSampleRate + 0.1),
m_tuningFrequency, m_length, m_length);
m_stepSize = gkm.getHopSize();
m_blockSize = gkm.getBlockSize();
}
return m_blockSize;
}
std::string
KeyDetector::getKeyName(int index, bool minor, bool includeMajMin) const
{
// Keys are numbered with 1 => C, 12 => B
// This is based on chromagram base set to a C in qm-dsp's GetKeyMode.cpp
static const char *namesMajor[] = {
"C", "Db", "D", "Eb",
"E", "F", "F# / Gb", "G",
"Ab", "A", "Bb", "B"
};
static const char *namesMinor[] = {
"C", "C#", "D", "Eb / D#",
"E", "F", "F#", "G",
"G#", "A", "Bb", "B"
};
if (index < 1 || index > 12) {
return "(unknown)";
}
std::string base;
if (minor) base = namesMinor[index - 1];
else base = namesMajor[index - 1];
if (!includeMajMin) return base;
if (minor) return base + " minor";
else return base + " major";
}

View File

@ -0,0 +1,69 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM Vamp Plugin Set
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#ifndef _GETMODE_PLUGIN_H_
#define _GETMODE_PLUGIN_H_
#include <vamp-sdk/Plugin.h>
#include <dsp/keydetection/GetKeyMode.h>
class KeyDetector : public Vamp::Plugin
{
public:
KeyDetector(float inputSampleRate);
virtual ~KeyDetector();
bool initialise(size_t channels, size_t stepSize, size_t blockSize);
void reset();
InputDomain getInputDomain() const { return TimeDomain; }
std::string getIdentifier() const;
std::string getName() const;
std::string getDescription() const;
std::string getMaker() const;
int getPluginVersion() const;
std::string getCopyright() const;
ParameterList getParameterDescriptors() const;
float getParameter(std::string) const;
void setParameter(std::string, float);
OutputList getOutputDescriptors() const;
FeatureSet process(const float *const *inputBuffers,
Vamp::RealTime timestamp);
FeatureSet getRemainingFeatures();
size_t getPreferredStepSize() const;
size_t getPreferredBlockSize() const;
protected:
mutable size_t m_stepSize;
mutable size_t m_blockSize;
float m_tuningFrequency;
int m_length;
std::string getKeyName(int index, bool minor, bool includeMajMin) const;
GetKeyMode* m_getKeyMode;
double* m_inputFrame;
int m_prevKey;
bool m_first;
};
#endif

View File

@ -0,0 +1,939 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
* SimilarityPlugin.cpp
*
* Copyright 2009 Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#include <iostream>
#include <cstdio>
#include "SimilarityPlugin.h"
#include "base/Pitch.h"
#include "dsp/mfcc/MFCC.h"
#include "dsp/chromagram/Chromagram.h"
#include "dsp/rateconversion/Decimator.h"
#include "dsp/rhythm/BeatSpectrum.h"
#include "maths/KLDivergence.h"
#include "maths/CosineDistance.h"
#include "maths/MathUtilities.h"
using std::string;
using std::vector;
using std::cerr;
using std::endl;
using std::ostringstream;
const float
SimilarityPlugin::m_noRhythm = 0.009;
const float
SimilarityPlugin::m_allRhythm = 0.991;
SimilarityPlugin::SimilarityPlugin(float inputSampleRate) :
Plugin(inputSampleRate),
m_type(TypeMFCC),
m_mfcc(0),
m_rhythmfcc(0),
m_chromagram(0),
m_decimator(0),
m_featureColumnSize(20),
m_rhythmWeighting(0.5f),
m_rhythmClipDuration(4.f), // seconds
m_rhythmClipOrigin(40.f), // seconds
m_rhythmClipFrameSize(0),
m_rhythmClipFrames(0),
m_rhythmColumnSize(20),
m_blockSize(0),
m_channels(0),
m_processRate(0),
m_frameNo(0),
m_done(false)
{
int rate = lrintf(m_inputSampleRate);
int internalRate = 22050;
int decimationFactor = rate / internalRate;
if (decimationFactor < 1) decimationFactor = 1;
// must be a power of two
while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
m_processRate = rate / decimationFactor; // may be 22050, 24000 etc
}
SimilarityPlugin::~SimilarityPlugin()
{
delete m_mfcc;
delete m_rhythmfcc;
delete m_chromagram;
delete m_decimator;
}
string
SimilarityPlugin::getIdentifier() const
{
return "qm-similarity";
}
string
SimilarityPlugin::getName() const
{
return "Similarity";
}
string
SimilarityPlugin::getDescription() const
{
return "Return a distance matrix for similarity between the input audio channels";
}
string
SimilarityPlugin::getMaker() const
{
return "Queen Mary, University of London";
}
int
SimilarityPlugin::getPluginVersion() const
{
return 1;
}
string
SimilarityPlugin::getCopyright() const
{
return "Plugin by Mark Levy, Kurt Jacobson and Chris Cannam. Copyright (c) 2009 QMUL - All Rights Reserved";
}
size_t
SimilarityPlugin::getMinChannelCount() const
{
return 1;
}
size_t
SimilarityPlugin::getMaxChannelCount() const
{
return 1024;
}
int
SimilarityPlugin::getDecimationFactor() const
{
int rate = lrintf(m_inputSampleRate);
return rate / m_processRate;
}
size_t
SimilarityPlugin::getPreferredStepSize() const
{
if (m_blockSize == 0) calculateBlockSize();
// there is also an assumption to this effect in process()
// (referring to m_fftSize/2 instead of a literal post-decimation
// step size):
return m_blockSize/2;
}
size_t
SimilarityPlugin::getPreferredBlockSize() const
{
if (m_blockSize == 0) calculateBlockSize();
return m_blockSize;
}
void
SimilarityPlugin::calculateBlockSize() const
{
if (m_blockSize != 0) return;
int decimationFactor = getDecimationFactor();
m_blockSize = 2048 * decimationFactor;
}
SimilarityPlugin::ParameterList SimilarityPlugin::getParameterDescriptors() const
{
ParameterList list;
ParameterDescriptor desc;
desc.identifier = "featureType";
desc.name = "Feature Type";
desc.description = "Audio feature used for similarity measure. Timbral: use the first 20 MFCCs (19 plus C0). Chromatic: use 12 bin-per-octave chroma. Rhythmic: compare beat spectra of short regions.";
desc.unit = "";
desc.minValue = 0;
desc.maxValue = 4;
desc.defaultValue = 1;
desc.isQuantized = true;
desc.quantizeStep = 1;
desc.valueNames.push_back("Timbre");
desc.valueNames.push_back("Timbre and Rhythm");
desc.valueNames.push_back("Chroma");
desc.valueNames.push_back("Chroma and Rhythm");
desc.valueNames.push_back("Rhythm only");
list.push_back(desc);
/*
desc.identifier = "rhythmWeighting";
desc.name = "Influence of Rhythm";
desc.description = "Proportion of similarity measure made up from rhythmic similarity component, from 0 (entirely timbral or chromatic) to 100 (entirely rhythmic).";
desc.unit = "%";
desc.minValue = 0;
desc.maxValue = 100;
desc.defaultValue = 0;
desc.isQuantized = false;
desc.valueNames.clear();
list.push_back(desc);
*/
return list;
}
float
SimilarityPlugin::getParameter(std::string param) const
{
if (param == "featureType") {
if (m_rhythmWeighting > m_allRhythm) {
return 4;
}
switch (m_type) {
case TypeMFCC:
if (m_rhythmWeighting < m_noRhythm) return 0;
else return 1;
break;
case TypeChroma:
if (m_rhythmWeighting < m_noRhythm) return 2;
else return 3;
break;
}
return 1;
// } else if (param == "rhythmWeighting") {
// return nearbyint(m_rhythmWeighting * 100.0);
}
std::cerr << "WARNING: SimilarityPlugin::getParameter: unknown parameter \""
<< param << "\"" << std::endl;
return 0.0;
}
void
SimilarityPlugin::setParameter(std::string param, float value)
{
if (param == "featureType") {
int v = int(value + 0.1);
Type newType = m_type;
switch (v) {
case 0: newType = TypeMFCC; m_rhythmWeighting = 0.0f; break;
case 1: newType = TypeMFCC; m_rhythmWeighting = 0.5f; break;
case 2: newType = TypeChroma; m_rhythmWeighting = 0.0f; break;
case 3: newType = TypeChroma; m_rhythmWeighting = 0.5f; break;
case 4: newType = TypeMFCC; m_rhythmWeighting = 1.f; break;
}
if (newType != m_type) m_blockSize = 0;
m_type = newType;
return;
// } else if (param == "rhythmWeighting") {
// m_rhythmWeighting = value / 100;
// return;
}
std::cerr << "WARNING: SimilarityPlugin::setParameter: unknown parameter \""
<< param << "\"" << std::endl;
}
SimilarityPlugin::OutputList
SimilarityPlugin::getOutputDescriptors() const
{
OutputList list;
OutputDescriptor similarity;
similarity.identifier = "distancematrix";
similarity.name = "Distance Matrix";
similarity.description = "Distance matrix for similarity metric. Smaller = more similar. Should be symmetrical.";
similarity.unit = "";
similarity.hasFixedBinCount = true;
similarity.binCount = m_channels;
similarity.hasKnownExtents = false;
similarity.isQuantized = false;
similarity.sampleType = OutputDescriptor::FixedSampleRate;
similarity.sampleRate = 1;
m_distanceMatrixOutput = list.size();
list.push_back(similarity);
OutputDescriptor simvec;
simvec.identifier = "distancevector";
simvec.name = "Distance from First Channel";
simvec.description = "Distance vector for similarity of each channel to the first channel. Smaller = more similar.";
simvec.unit = "";
simvec.hasFixedBinCount = true;
simvec.binCount = m_channels;
simvec.hasKnownExtents = false;
simvec.isQuantized = false;
simvec.sampleType = OutputDescriptor::FixedSampleRate;
simvec.sampleRate = 1;
m_distanceVectorOutput = list.size();
list.push_back(simvec);
OutputDescriptor sortvec;
sortvec.identifier = "sorteddistancevector";
sortvec.name = "Ordered Distances from First Channel";
sortvec.description = "Vector of the order of other channels in similarity to the first, followed by distance vector for similarity of each to the first. Smaller = more similar.";
sortvec.unit = "";
sortvec.hasFixedBinCount = true;
sortvec.binCount = m_channels;
sortvec.hasKnownExtents = false;
sortvec.isQuantized = false;
sortvec.sampleType = OutputDescriptor::FixedSampleRate;
sortvec.sampleRate = 1;
m_sortedVectorOutput = list.size();
list.push_back(sortvec);
OutputDescriptor means;
means.identifier = "means";
means.name = "Feature Means";
means.description = "Means of the feature bins. Feature time (sec) corresponds to input channel. Number of bins depends on selected feature type.";
means.unit = "";
means.hasFixedBinCount = true;
means.binCount = m_featureColumnSize;
means.hasKnownExtents = false;
means.isQuantized = false;
means.sampleType = OutputDescriptor::FixedSampleRate;
means.sampleRate = 1;
m_meansOutput = list.size();
list.push_back(means);
OutputDescriptor variances;
variances.identifier = "variances";
variances.name = "Feature Variances";
variances.description = "Variances of the feature bins. Feature time (sec) corresponds to input channel. Number of bins depends on selected feature type.";
variances.unit = "";
variances.hasFixedBinCount = true;
variances.binCount = m_featureColumnSize;
variances.hasKnownExtents = false;
variances.isQuantized = false;
variances.sampleType = OutputDescriptor::FixedSampleRate;
variances.sampleRate = 1;
m_variancesOutput = list.size();
list.push_back(variances);
OutputDescriptor beatspectrum;
beatspectrum.identifier = "beatspectrum";
beatspectrum.name = "Beat Spectra";
beatspectrum.description = "Rhythmic self-similarity vectors (beat spectra) for the input channels. Feature time (sec) corresponds to input channel. Not returned if rhythm weighting is zero.";
beatspectrum.unit = "";
if (m_rhythmClipFrames > 0) {
beatspectrum.hasFixedBinCount = true;
beatspectrum.binCount = m_rhythmClipFrames / 2;
} else {
beatspectrum.hasFixedBinCount = false;
}
beatspectrum.hasKnownExtents = false;
beatspectrum.isQuantized = false;
beatspectrum.sampleType = OutputDescriptor::FixedSampleRate;
beatspectrum.sampleRate = 1;
m_beatSpectraOutput = list.size();
list.push_back(beatspectrum);
return list;
}
bool
SimilarityPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
{
if (channels < getMinChannelCount()) return false;
// Using more than getMaxChannelCount is not actually a problem
// for us. Using "incorrect" step and block sizes would be fine
// for timbral or chroma similarity, but will break rhythmic
// similarity, so we'd better enforce these.
if (stepSize != getPreferredStepSize()) {
std::cerr << "SimilarityPlugin::initialise: supplied step size "
<< stepSize << " differs from required step size "
<< getPreferredStepSize() << std::endl;
return false;
}
if (blockSize != getPreferredBlockSize()) {
std::cerr << "SimilarityPlugin::initialise: supplied block size "
<< blockSize << " differs from required block size "
<< getPreferredBlockSize() << std::endl;
return false;
}
m_blockSize = blockSize;
m_channels = channels;
m_lastNonEmptyFrame = std::vector<int>(m_channels);
for (int i = 0; i < m_channels; ++i) m_lastNonEmptyFrame[i] = -1;
m_emptyFrameCount = std::vector<int>(m_channels);
for (int i = 0; i < m_channels; ++i) m_emptyFrameCount[i] = 0;
m_frameNo = 0;
int decimationFactor = getDecimationFactor();
if (decimationFactor > 1) {
m_decimator = new Decimator(m_blockSize, decimationFactor);
}
if (m_type == TypeMFCC) {
m_featureColumnSize = 20;
MFCCConfig config(m_processRate);
config.fftsize = 2048;
config.nceps = m_featureColumnSize - 1;
config.want_c0 = true;
config.logpower = 1;
m_mfcc = new MFCC(config);
m_fftSize = m_mfcc->getfftlength();
m_rhythmClipFrameSize = m_fftSize / 4;
// std::cerr << "MFCC FS = " << config.FS << ", FFT size = " << m_fftSize<< std::endl;
} else if (m_type == TypeChroma) {
m_featureColumnSize = 12;
// For simplicity, aim to have the chroma fft size equal to
// 2048, the same as the mfcc fft size (so the input block
// size does not depend on the feature type and we can use the
// same processing parameters for rhythm etc). This is also
// why getPreferredBlockSize can confidently return 2048 * the
// decimation factor.
// The fft size for a chromagram is the filterbank Q value
// times the sample rate, divided by the minimum frequency,
// rounded up to the nearest power of two.
double q = 1.0 / (pow(2.0, (1.0 / 12.0)) - 1.0);
double fmin = (q * m_processRate) / 2048.0;
// Round fmin up to the nearest MIDI pitch multiple of 12.
// So long as fmin is greater than 12 to start with, this
// should not change the resulting fft size.
int pmin = Pitch::getPitchForFrequency(float(fmin));
pmin = ((pmin / 12) + 1) * 12;
fmin = Pitch::getFrequencyForPitch(pmin);
float fmax = Pitch::getFrequencyForPitch(pmin + 36);
ChromaConfig config;
config.FS = m_processRate;
config.min = fmin;
config.max = fmax;
config.BPO = 12;
config.CQThresh = 0.0054;
// We don't normalise the chromagram's columns individually;
// we normalise the mean at the end instead
config.normalise = MathUtilities::NormaliseNone;
m_chromagram = new Chromagram(config);
m_fftSize = m_chromagram->getFrameSize();
if (m_fftSize != 2048) {
std::cerr << "WARNING: SimilarityPlugin::initialise: Internal processing FFT size " << m_fftSize << " != expected size 2048 in chroma mode" << std::endl;
}
// std::cerr << "fftsize = " << m_fftSize << std::endl;
m_rhythmClipFrameSize = m_fftSize / 4;
// std::cerr << "m_rhythmClipFrameSize = " << m_rhythmClipFrameSize << std::endl;
// std::cerr << "min = "<< config.min << ", max = " << config.max << std::endl;
} else {
std::cerr << "SimilarityPlugin::initialise: internal error: unknown type " << m_type << std::endl;
return false;
}
if (needRhythm()) {
m_rhythmClipFrames =
int(ceil((m_rhythmClipDuration * m_processRate)
/ m_rhythmClipFrameSize));
// std::cerr << "SimilarityPlugin::initialise: rhythm clip requires "
// << m_rhythmClipFrames << " frames of size "
// << m_rhythmClipFrameSize << " at process rate "
// << m_processRate << " ( = "
// << (float(m_rhythmClipFrames * m_rhythmClipFrameSize) / m_processRate) << " sec )"
// << std::endl;
MFCCConfig config(m_processRate);
config.fftsize = m_rhythmClipFrameSize;
config.nceps = m_rhythmColumnSize - 1;
config.want_c0 = true;
config.logpower = 1;
config.window = RectangularWindow; // because no overlap
m_rhythmfcc = new MFCC(config);
}
for (int i = 0; i < m_channels; ++i) {
m_values.push_back(FeatureMatrix());
if (needRhythm()) {
m_rhythmValues.push_back(FeatureColumnQueue());
}
}
m_done = false;
return true;
}
void
SimilarityPlugin::reset()
{
for (int i = 0; i < m_values.size(); ++i) {
m_values[i].clear();
}
for (int i = 0; i < m_rhythmValues.size(); ++i) {
m_rhythmValues[i].clear();
}
for (int i = 0; i < m_lastNonEmptyFrame.size(); ++i) {
m_lastNonEmptyFrame[i] = -1;
}
for (int i = 0; i < m_emptyFrameCount.size(); ++i) {
m_emptyFrameCount[i] = 0;
}
m_done = false;
}
SimilarityPlugin::FeatureSet
SimilarityPlugin::process(const float *const *inputBuffers, Vamp::RealTime /* timestamp */)
{
if (m_done) {
return FeatureSet();
}
double *dblbuf = new double[m_blockSize];
double *decbuf = dblbuf;
if (m_decimator) decbuf = new double[m_fftSize];
double *raw = new double[std::max(m_featureColumnSize,
m_rhythmColumnSize)];
float threshold = 1e-10;
bool someRhythmFrameNeeded = false;
for (size_t c = 0; c < m_channels; ++c) {
bool empty = true;
for (int i = 0; i < m_blockSize; ++i) {
float val = inputBuffers[c][i];
if (fabs(val) > threshold) empty = false;
dblbuf[i] = val;
}
if (empty) {
if (needRhythm() && ((m_frameNo % 2) == 0)) {
for (int i = 0; i < m_fftSize / m_rhythmClipFrameSize; ++i) {
if (m_rhythmValues[c].size() < m_rhythmClipFrames) {
FeatureColumn mf(m_rhythmColumnSize);
for (int i = 0; i < m_rhythmColumnSize; ++i) {
mf[i] = 0.0;
}
m_rhythmValues[c].push_back(mf);
}
}
}
m_emptyFrameCount[c]++;
continue;
}
m_lastNonEmptyFrame[c] = m_frameNo;
if (m_decimator) {
m_decimator->process(dblbuf, decbuf);
}
if (needTimbre()) {
FeatureColumn mf(m_featureColumnSize);
if (m_type == TypeMFCC) {
m_mfcc->process(decbuf, raw);
for (int i = 0; i < m_featureColumnSize; ++i) {
mf[i] = raw[i];
}
} else if (m_type == TypeChroma) {
double *chroma = m_chromagram->process(decbuf);
for (int i = 0; i < m_featureColumnSize; ++i) {
mf[i] = chroma[i];
}
}
m_values[c].push_back(mf);
}
// std::cerr << "needRhythm = " << needRhythm() << ", frame = " << m_frameNo << std::endl;
if (needRhythm() && ((m_frameNo % 2) == 0)) {
// The incoming frames are overlapping; we only use every
// other one, because we don't want the overlap (it would
// screw up the rhythm)
int frameOffset = 0;
while (frameOffset + m_rhythmClipFrameSize <= m_fftSize) {
bool needRhythmFrame = true;
if (m_rhythmValues[c].size() >= m_rhythmClipFrames) {
needRhythmFrame = false;
// assumes hopsize = framesize/2
float current = m_frameNo * (m_fftSize/2) + frameOffset;
current = current / m_processRate;
if (current - m_rhythmClipDuration < m_rhythmClipOrigin) {
needRhythmFrame = true;
m_rhythmValues[c].pop_front();
}
// if (needRhythmFrame) {
// std::cerr << "at current = " <<current << " (frame = " << m_frameNo << "), have " << m_rhythmValues[c].size() << ", need rhythm = " << needRhythmFrame << std::endl;
// }
}
if (needRhythmFrame) {
someRhythmFrameNeeded = true;
m_rhythmfcc->process(decbuf + frameOffset, raw);
FeatureColumn mf(m_rhythmColumnSize);
for (int i = 0; i < m_rhythmColumnSize; ++i) {
mf[i] = raw[i];
}
m_rhythmValues[c].push_back(mf);
}
frameOffset += m_rhythmClipFrameSize;
}
}
}
if (!needTimbre() && !someRhythmFrameNeeded && ((m_frameNo % 2) == 0)) {
// std::cerr << "done!" << std::endl;
m_done = true;
}
if (m_decimator) delete[] decbuf;
delete[] dblbuf;
delete[] raw;
++m_frameNo;
return FeatureSet();
}
SimilarityPlugin::FeatureMatrix
SimilarityPlugin::calculateTimbral(FeatureSet &returnFeatures)
{
FeatureMatrix m(m_channels); // means
FeatureMatrix v(m_channels); // variances
for (int i = 0; i < m_channels; ++i) {
FeatureColumn mean(m_featureColumnSize), variance(m_featureColumnSize);
for (int j = 0; j < m_featureColumnSize; ++j) {
mean[j] = 0.0;
variance[j] = 0.0;
int count;
// We want to take values up to, but not including, the
// last non-empty frame (which may be partial)
int sz = m_lastNonEmptyFrame[i] - m_emptyFrameCount[i];
if (sz < 0) sz = 0;
if (sz >= m_values[i].size()) sz = m_values[i].size()-1;
count = 0;
for (int k = 0; k < sz; ++k) {
double val = m_values[i][k][j];
if (ISNAN(val) || ISINF(val)) continue;
mean[j] += val;
++count;
}
if (count > 0) mean[j] /= count;
count = 0;
for (int k = 0; k < sz; ++k) {
double val = ((m_values[i][k][j] - mean[j]) *
(m_values[i][k][j] - mean[j]));
if (ISNAN(val) || ISINF(val)) continue;
variance[j] += val;
++count;
}
if (count > 0) variance[j] /= count;
}
m[i] = mean;
v[i] = variance;
}
FeatureMatrix distances(m_channels);
if (m_type == TypeMFCC) {
// "Despite the fact that MFCCs extracted from music are
// clearly not Gaussian, [14] showed, somewhat surprisingly,
// that a similarity function comparing single Gaussians
// modelling MFCCs for each track can perform as well as
// mixture models. A great advantage of using single
// Gaussians is that a simple closed form exists for the KL
// divergence." -- Mark Levy, "Lightweight measures for
// timbral similarity of musical audio"
// (http://www.elec.qmul.ac.uk/easaier/papers/mlevytimbralsimilarity.pdf)
KLDivergence kld;
for (int i = 0; i < m_channels; ++i) {
for (int j = 0; j < m_channels; ++j) {
double d = kld.distanceGaussian(m[i], v[i], m[j], v[j]);
distances[i].push_back(d);
}
}
} else {
// We use the KL divergence for distributions of discrete
// variables, as chroma are histograms already. Or at least,
// they will be when we've normalised them like this:
for (int i = 0; i < m_channels; ++i) {
MathUtilities::normalise(m[i], MathUtilities::NormaliseUnitSum);
}
KLDivergence kld;
for (int i = 0; i < m_channels; ++i) {
for (int j = 0; j < m_channels; ++j) {
double d = kld.distanceDistribution(m[i], m[j], true);
distances[i].push_back(d);
}
}
}
Feature feature;
feature.hasTimestamp = true;
char labelBuffer[100];
for (int i = 0; i < m_channels; ++i) {
feature.timestamp = Vamp::RealTime(i, 0);
sprintf(labelBuffer, "Means for channel %d", i+1);
feature.label = labelBuffer;
feature.values.clear();
for (int k = 0; k < m_featureColumnSize; ++k) {
feature.values.push_back(m[i][k]);
}
returnFeatures[m_meansOutput].push_back(feature);
sprintf(labelBuffer, "Variances for channel %d", i+1);
feature.label = labelBuffer;
feature.values.clear();
for (int k = 0; k < m_featureColumnSize; ++k) {
feature.values.push_back(v[i][k]);
}
returnFeatures[m_variancesOutput].push_back(feature);
}
return distances;
}
SimilarityPlugin::FeatureMatrix
SimilarityPlugin::calculateRhythmic(FeatureSet &returnFeatures)
{
if (!needRhythm()) return FeatureMatrix();
// std::cerr << "SimilarityPlugin::initialise: rhythm clip for channel 0 contains "
// << m_rhythmValues[0].size() << " frames of size "
// << m_rhythmClipFrameSize << " at process rate "
// << m_processRate << " ( = "
// << (float(m_rhythmValues[0].size() * m_rhythmClipFrameSize) / m_processRate) << " sec )"
// << std::endl;
BeatSpectrum bscalc;
CosineDistance cd;
// Our rhythm feature matrix is a deque of vectors for practical
// reasons, but BeatSpectrum::process wants a vector of vectors
// (which is what FeatureMatrix happens to be).
FeatureMatrixSet bsinput(m_channels);
for (int i = 0; i < m_channels; ++i) {
for (int j = 0; j < m_rhythmValues[i].size(); ++j) {
bsinput[i].push_back(m_rhythmValues[i][j]);
}
}
FeatureMatrix bs(m_channels);
for (int i = 0; i < m_channels; ++i) {
bs[i] = bscalc.process(bsinput[i]);
}
FeatureMatrix distances(m_channels);
for (int i = 0; i < m_channels; ++i) {
for (int j = 0; j < m_channels; ++j) {
double d = cd.distance(bs[i], bs[j]);
distances[i].push_back(d);
}
}
Feature feature;
feature.hasTimestamp = true;
char labelBuffer[100];
for (int i = 0; i < m_channels; ++i) {
feature.timestamp = Vamp::RealTime(i, 0);
sprintf(labelBuffer, "Beat spectrum for channel %d", i+1);
feature.label = labelBuffer;
feature.values.clear();
for (int j = 0; j < bs[i].size(); ++j) {
feature.values.push_back(bs[i][j]);
}
returnFeatures[m_beatSpectraOutput].push_back(feature);
}
return distances;
}
double
SimilarityPlugin::getDistance(const FeatureMatrix &timbral,
const FeatureMatrix &rhythmic,
int i, int j)
{
double distance = 1.0;
if (needTimbre()) distance *= timbral[i][j];
if (needRhythm()) distance *= rhythmic[i][j];
return distance;
}
SimilarityPlugin::FeatureSet
SimilarityPlugin::getRemainingFeatures()
{
FeatureSet returnFeatures;
// We want to return a matrix of the distances between channels,
// but Vamp doesn't have a matrix return type so we will actually
// return a series of vectors
FeatureMatrix timbralDistances, rhythmicDistances;
if (needTimbre()) {
timbralDistances = calculateTimbral(returnFeatures);
}
if (needRhythm()) {
rhythmicDistances = calculateRhythmic(returnFeatures);
}
// We give all features a timestamp, otherwise hosts will tend to
// stamp them at the end of the file, which is annoying
Feature feature;
feature.hasTimestamp = true;
Feature distanceVectorFeature;
distanceVectorFeature.label = "Distance from first channel";
distanceVectorFeature.hasTimestamp = true;
distanceVectorFeature.timestamp = Vamp::RealTime::zeroTime;
std::map<double, int> sorted;
char labelBuffer[100];
for (int i = 0; i < m_channels; ++i) {
feature.timestamp = Vamp::RealTime(i, 0);
feature.values.clear();
for (int j = 0; j < m_channels; ++j) {
double dist = getDistance(timbralDistances, rhythmicDistances, i, j);
feature.values.push_back(dist);
}
sprintf(labelBuffer, "Distances from channel %d", i+1);
feature.label = labelBuffer;
returnFeatures[m_distanceMatrixOutput].push_back(feature);
double fromFirst =
getDistance(timbralDistances, rhythmicDistances, 0, i);
distanceVectorFeature.values.push_back(fromFirst);
sorted[fromFirst] = i;
}
returnFeatures[m_distanceVectorOutput].push_back(distanceVectorFeature);
feature.label = "Order of channels by similarity to first channel";
feature.values.clear();
feature.timestamp = Vamp::RealTime(0, 0);
for (std::map<double, int>::iterator i = sorted.begin();
i != sorted.end(); ++i) {
feature.values.push_back(i->second + 1);
}
returnFeatures[m_sortedVectorOutput].push_back(feature);
feature.label = "Ordered distances of channels from first channel";
feature.values.clear();
feature.timestamp = Vamp::RealTime(1, 0);
for (std::map<double, int>::iterator i = sorted.begin();
i != sorted.end(); ++i) {
feature.values.push_back(i->first);
}
returnFeatures[m_sortedVectorOutput].push_back(feature);
return returnFeatures;
}

View File

@ -0,0 +1,123 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
* SimilarityPlugin.h
*
* Copyright 2008 Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#ifndef _SIMILARITY_PLUGIN_H_
#define _SIMILARITY_PLUGIN_H_
#include <vamp-sdk/Plugin.h>
#include <vamp-sdk/RealTime.h>
#include <vector>
#include <deque>
class MFCC;
class Chromagram;
class Decimator;
class SimilarityPlugin : public Vamp::Plugin
{
public:
SimilarityPlugin(float inputSampleRate);
virtual ~SimilarityPlugin();
bool initialise(size_t channels, size_t stepSize, size_t blockSize);
void reset();
std::string getIdentifier() const;
std::string getName() const;
std::string getDescription() const;
std::string getMaker() const;
int getPluginVersion() const;
std::string getCopyright() const;
size_t getPreferredStepSize() const;
size_t getPreferredBlockSize() const;
InputDomain getInputDomain() const { return TimeDomain; }
size_t getMinChannelCount() const;
size_t getMaxChannelCount() const;
SimilarityPlugin::ParameterList getParameterDescriptors() const;
float getParameter(std::string param) const;
void setParameter(std::string param, float value);
OutputList getOutputDescriptors() const;
FeatureSet process(const float *const *inputBuffers, Vamp::RealTime timestamp);
FeatureSet getRemainingFeatures();
protected:
int getDecimationFactor() const;
enum Type {
TypeMFCC,
TypeChroma
};
void calculateBlockSize() const;
bool needRhythm() const { return m_rhythmWeighting > m_noRhythm; }
bool needTimbre() const { return m_rhythmWeighting < m_allRhythm; }
Type m_type;
MFCC *m_mfcc;
MFCC *m_rhythmfcc;
Chromagram *m_chromagram;
Decimator *m_decimator;
int m_featureColumnSize;
float m_rhythmWeighting;
float m_rhythmClipDuration;
float m_rhythmClipOrigin;
int m_rhythmClipFrameSize;
int m_rhythmClipFrames;
int m_rhythmColumnSize;
mutable size_t m_blockSize; // before decimation
size_t m_fftSize; // after decimation
int m_channels;
int m_processRate;
int m_frameNo;
bool m_done;
static const float m_noRhythm;
static const float m_allRhythm;
std::vector<int> m_lastNonEmptyFrame; // per channel
std::vector<int> m_emptyFrameCount; // per channel
mutable int m_distanceMatrixOutput;
mutable int m_distanceVectorOutput;
mutable int m_sortedVectorOutput;
mutable int m_meansOutput;
mutable int m_variancesOutput;
mutable int m_beatSpectraOutput;
typedef std::vector<double> FeatureColumn;
typedef std::vector<FeatureColumn> FeatureMatrix;
typedef std::vector<FeatureMatrix> FeatureMatrixSet;
typedef std::deque<FeatureColumn> FeatureColumnQueue;
typedef std::vector<FeatureColumnQueue> FeatureQueueSet;
FeatureMatrixSet m_values;
FeatureQueueSet m_rhythmValues;
FeatureMatrix calculateTimbral(FeatureSet &returnFeatures);
FeatureMatrix calculateRhythmic(FeatureSet &returnFeatures);
double getDistance(const FeatureMatrix &timbral,
const FeatureMatrix &rhythmic,
int i, int j);
};
#endif

View File

@ -0,0 +1,443 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM Vamp Plugin Set
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#include "TonalChangeDetect.h"
#include <base/Pitch.h>
#include <dsp/chromagram/Chromagram.h>
#include <dsp/tonal/ChangeDetectionFunction.h>
TonalChangeDetect::TonalChangeDetect(float fInputSampleRate)
: Vamp::Plugin(fInputSampleRate),
m_chromagram(0),
m_step(0),
m_block(0),
m_stepDelay(0),
m_origin(Vamp::RealTime::zeroTime),
m_haveOrigin(false)
{
m_minMIDIPitch = 32;
m_maxMIDIPitch = 108;
m_tuningFrequency = 440;
m_iSmoothingWidth = 5;
setupConfig();
}
TonalChangeDetect::~TonalChangeDetect()
{
}
bool TonalChangeDetect::initialise(size_t channels, size_t stepSize, size_t blockSize)
{
if (m_chromagram) {
delete m_chromagram;
m_chromagram = 0;
}
if (channels < getMinChannelCount() ||
channels > getMaxChannelCount()) {
std::cerr << "TonalChangeDetect::initialise: Given channel count " << channels << " outside acceptable range (" << getMinChannelCount() << " to " << getMaxChannelCount() << ")" << std::endl;
return false;
}
m_chromagram = new Chromagram(m_config);
m_step = m_chromagram->getHopSize();
m_block = m_chromagram->getFrameSize();
if (stepSize != m_step) {
std::cerr << "TonalChangeDetect::initialise: Given step size " << stepSize << " differs from only acceptable value " << m_step << std::endl;
delete m_chromagram;
m_chromagram = 0;
return false;
}
if (blockSize != m_block) {
std::cerr << "TonalChangeDetect::initialise: Given step size " << stepSize << " differs from only acceptable value " << m_step << std::endl;
delete m_chromagram;
m_chromagram = 0;
return false;
}
// m_stepDelay = (blockSize - stepSize) / 2;
// m_stepDelay = m_stepDelay / stepSize;
m_stepDelay = (blockSize - stepSize) / stepSize; //!!! why? seems about right to look at, but...
// std::cerr << "TonalChangeDetect::initialise: step " << stepSize << ", block "
// << blockSize << ", delay " << m_stepDelay << std::endl;
m_vaCurrentVector.resize(12, 0.0);
return true;
}
std::string TonalChangeDetect::getIdentifier() const
{
return "qm-tonalchange";
}
std::string TonalChangeDetect::getName() const
{
return "Tonal Change";
}
std::string TonalChangeDetect::getDescription() const
{
return "Detect and return the positions of harmonic changes such as chord boundaries";
}
std::string TonalChangeDetect::getMaker() const
{
return "Queen Mary, University of London";
}
int TonalChangeDetect::getPluginVersion() const
{
return 2;
}
std::string TonalChangeDetect::getCopyright() const
{
return "Plugin by Martin Gasser and Christopher Harte. Copyright (c) 2006-2009 QMUL - All Rights Reserved";
}
TonalChangeDetect::ParameterList TonalChangeDetect::getParameterDescriptors() const
{
ParameterList list;
ParameterDescriptor desc;
desc.identifier = "smoothingwidth";
desc.name = "Gaussian smoothing";
desc.description = "Window length for the internal smoothing operation, in chroma analysis frames";
desc.unit = "frames";
desc.minValue = 0;
desc.maxValue = 20;
desc.defaultValue = 5;
desc.isQuantized = true;
desc.quantizeStep = 1;
list.push_back(desc);
desc.identifier = "minpitch";
desc.name = "Chromagram minimum pitch";
desc.unit = "MIDI units";
desc.description = "Lowest pitch in MIDI units to be included in the chroma analysis";
desc.minValue = 0;
desc.maxValue = 127;
desc.defaultValue = 32;
desc.isQuantized = true;
desc.quantizeStep = 1;
list.push_back(desc);
desc.identifier = "maxpitch";
desc.name = "Chromagram maximum pitch";
desc.unit = "MIDI units";
desc.description = "Highest pitch in MIDI units to be included in the chroma analysis";
desc.minValue = 0;
desc.maxValue = 127;
desc.defaultValue = 108;
desc.isQuantized = true;
desc.quantizeStep = 1;
list.push_back(desc);
desc.identifier = "tuning";
desc.name = "Chromagram tuning frequency";
desc.unit = "Hz";
desc.description = "Frequency of concert A in the music under analysis";
desc.minValue = 420;
desc.maxValue = 460;
desc.defaultValue = 440;
desc.isQuantized = false;
list.push_back(desc);
return list;
}
float
TonalChangeDetect::getParameter(std::string param) const
{
if (param == "smoothingwidth") {
return m_iSmoothingWidth;
}
if (param == "minpitch") {
return m_minMIDIPitch;
}
if (param == "maxpitch") {
return m_maxMIDIPitch;
}
if (param == "tuning") {
return m_tuningFrequency;
}
std::cerr << "WARNING: ChromagramPlugin::getParameter: unknown parameter \""
<< param << "\"" << std::endl;
return 0.0;
}
void
TonalChangeDetect::setParameter(std::string param, float value)
{
if (param == "minpitch") {
m_minMIDIPitch = lrintf(value);
} else if (param == "maxpitch") {
m_maxMIDIPitch = lrintf(value);
} else if (param == "tuning") {
m_tuningFrequency = value;
}
else if (param == "smoothingwidth") {
m_iSmoothingWidth = int(value);
} else {
std::cerr << "WARNING: ChromagramPlugin::setParameter: unknown parameter \""
<< param << "\"" << std::endl;
}
setupConfig();
}
void TonalChangeDetect::setupConfig()
{
m_config.FS = lrintf(m_inputSampleRate);
m_config.min = Pitch::getFrequencyForPitch
(m_minMIDIPitch, 0, m_tuningFrequency);
m_config.max = Pitch::getFrequencyForPitch
(m_maxMIDIPitch, 0, m_tuningFrequency);
m_config.BPO = 12;
m_config.CQThresh = 0.0054;
m_config.normalise = MathUtilities::NormaliseNone;
m_step = 0;
m_block = 0;
}
void
TonalChangeDetect::reset()
{
if (m_chromagram) {
delete m_chromagram;
m_chromagram = new Chromagram(m_config);
}
while (!m_pending.empty()) m_pending.pop();
m_vaCurrentVector.clear();
m_TCSGram.clear();
m_origin = Vamp::RealTime::zeroTime;
m_haveOrigin = false;
}
size_t
TonalChangeDetect::getPreferredStepSize() const
{
if (!m_step) {
Chromagram chroma(m_config);
m_step = chroma.getHopSize();
m_block = chroma.getFrameSize();
}
return m_step;
}
size_t
TonalChangeDetect::getPreferredBlockSize() const
{
if (!m_step) {
Chromagram chroma(m_config);
m_step = chroma.getHopSize();
m_block = chroma.getFrameSize();
}
return m_block;
}
TonalChangeDetect::OutputList TonalChangeDetect::getOutputDescriptors() const
{
OutputList list;
OutputDescriptor hc;
hc.identifier = "tcstransform";
hc.name = "Transform to 6D Tonal Content Space";
hc.unit = "";
hc.description = "Representation of content in a six-dimensional tonal space";
hc.hasFixedBinCount = true;
hc.binCount = 6;
hc.hasKnownExtents = true;
hc.minValue = -1.0;
hc.maxValue = 1.0;
hc.isQuantized = false;
hc.sampleType = OutputDescriptor::OneSamplePerStep;
OutputDescriptor d;
d.identifier = "tcfunction";
d.name = "Tonal Change Detection Function";
d.unit = "";
d.description = "Estimate of the likelihood of a tonal change occurring within each spectral frame";
d.minValue = 0;
d.minValue = 2;
d.hasFixedBinCount = true;
d.binCount = 1;
d.hasKnownExtents = false;
d.isQuantized = false;
d.sampleType = OutputDescriptor::VariableSampleRate;
double dStepSecs = double(getPreferredStepSize()) / m_inputSampleRate;
d.sampleRate = 1.0f / dStepSecs;
OutputDescriptor changes;
changes.identifier = "changepositions";
changes.name = "Tonal Change Positions";
changes.unit = "";
changes.description = "Estimated locations of tonal changes";
changes.hasFixedBinCount = true;
changes.binCount = 0;
changes.hasKnownExtents = false;
changes.isQuantized = false;
changes.sampleType = OutputDescriptor::VariableSampleRate;
changes.sampleRate = 1.0 / dStepSecs;
list.push_back(hc);
list.push_back(d);
list.push_back(changes);
return list;
}
TonalChangeDetect::FeatureSet
TonalChangeDetect::process(const float *const *inputBuffers,
Vamp::RealTime timestamp)
{
if (!m_chromagram) {
cerr << "ERROR: TonalChangeDetect::process: "
<< "Chromagram has not been initialised"
<< endl;
return FeatureSet();
}
if (!m_haveOrigin) m_origin = timestamp;
// convert float* to double*
double *tempBuffer = new double[m_block];
for (size_t i = 0; i < m_block; ++i) {
tempBuffer[i] = inputBuffers[0][i];
}
double *output = m_chromagram->process(tempBuffer);
delete[] tempBuffer;
for (size_t i = 0; i < 12; i++)
{
m_vaCurrentVector[i] = output[i];
}
FeatureSet returnFeatures;
if (m_stepDelay == 0) {
m_vaCurrentVector.normalizeL1();
TCSVector tcsVector = m_TonalEstimator.transform2TCS(m_vaCurrentVector);
m_TCSGram.addTCSVector(tcsVector);
Feature feature;
feature.hasTimestamp = false;
for (int i = 0; i < 6; i++)
{ feature.values.push_back(static_cast<float>(tcsVector[i])); }
feature.label = "";
returnFeatures[0].push_back(feature);
return returnFeatures;
}
if (m_pending.size() == m_stepDelay) {
ChromaVector v = m_pending.front();
v.normalizeL1();
TCSVector tcsVector = m_TonalEstimator.transform2TCS(v);
m_TCSGram.addTCSVector(tcsVector);
Feature feature;
feature.hasTimestamp = false;
for (int i = 0; i < 6; i++)
{ feature.values.push_back(static_cast<float>(tcsVector[i])); }
feature.label = "";
returnFeatures[0].push_back(feature);
m_pending.pop();
} else {
returnFeatures[0].push_back(Feature());
m_TCSGram.addTCSVector(TCSVector());
}
m_pending.push(m_vaCurrentVector);
return returnFeatures;
}
TonalChangeDetect::FeatureSet TonalChangeDetect::getRemainingFeatures()
{
FeatureSet returnFeatures;
while (!m_pending.empty()) {
ChromaVector v = m_pending.front();
v.normalizeL1();
TCSVector tcsVector = m_TonalEstimator.transform2TCS(v);
m_TCSGram.addTCSVector(tcsVector);
Feature feature;
feature.hasTimestamp = false;
for (int i = 0; i < 6; i++)
{ feature.values.push_back(static_cast<float>(tcsVector[i])); }
feature.label = "";
returnFeatures[0].push_back(feature);
m_pending.pop();
}
ChangeDFConfig dfc;
dfc.smoothingWidth = double(m_iSmoothingWidth);
ChangeDetectionFunction df(dfc);
ChangeDistance d = df.process(m_TCSGram);
for (int i = 0; i < d.size(); i++)
{
double dCurrent = d[i];
double dPrevious = d[i > 0 ? i - 1 : i];
double dNext = d[i < d.size()-1 ? i + 1 : i];
Feature feature;
feature.label = "";
feature.hasTimestamp = true;
feature.timestamp = m_origin +
Vamp::RealTime::frame2RealTime(i*m_step, m_inputSampleRate);
feature.values.push_back(dCurrent);
returnFeatures[1].push_back(feature);
if (dCurrent > dPrevious && dCurrent > dNext)
{
Feature featurePeak;
featurePeak.label = "";
featurePeak.hasTimestamp = true;
featurePeak.timestamp = m_origin +
Vamp::RealTime::frame2RealTime(i*m_step, m_inputSampleRate);
returnFeatures[2].push_back(featurePeak);
}
}
return returnFeatures;
}

View File

@ -0,0 +1,84 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM Vamp Plugin Set
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#ifndef _TONALCHANGEDETECT_
#define _TONALCHANGEDETECT_
#include <vamp-sdk/Plugin.h>
#include <dsp/chromagram/Chromagram.h>
#include <dsp/tonal/TonalEstimator.h>
#include <dsp/tonal/TCSgram.h>
#include <queue>
#include <vector>
#include <valarray>
class TonalChangeDetect : public Vamp::Plugin
{
public:
TonalChangeDetect(float fInputSampleRate);
virtual ~TonalChangeDetect();
bool initialise(size_t channels, size_t stepSize, size_t blockSize);
void reset();
InputDomain getInputDomain() const { return TimeDomain; }
std::string getIdentifier() const;
std::string getName() const;
std::string getDescription() const;
std::string getMaker() const;
int getPluginVersion() const;
std::string getCopyright() const;
ParameterList getParameterDescriptors() const;
float getParameter(std::string) const;
void setParameter(std::string, float);
size_t getPreferredStepSize() const;
size_t getPreferredBlockSize() const;
OutputList getOutputDescriptors() const;
FeatureSet process(const float *const *inputBuffers,
Vamp::RealTime timestamp);
FeatureSet getRemainingFeatures();
private:
void setupConfig();
ChromaConfig m_config;
Chromagram *m_chromagram;
TonalEstimator m_TonalEstimator;
mutable size_t m_step;
mutable size_t m_block;
size_t m_stepDelay;
std::queue<ChromaVector> m_pending;
ChromaVector m_vaCurrentVector;
TCSGram m_TCSGram;
int m_iSmoothingWidth; // smoothing window size
int m_minMIDIPitch; // chromagram parameters
int m_maxMIDIPitch;
float m_tuningFrequency;
Vamp::RealTime m_origin;
bool m_haveOrigin;
};
#endif // _TONALCHANGEDETECT_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,81 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
#ifndef _TRANSCRIPTION_PLUGIN_H_
#define _TRSNSCRIPTION_PLUGIN_H_
#include <vamp-sdk/Plugin.h>
class Transcription : public Vamp::Plugin
{
public:
Transcription(float inputSampleRate);
virtual ~Transcription();
bool initialise(size_t channels, size_t stepSize, size_t blockSize);
void reset();
InputDomain getInputDomain() const { return TimeDomain; }
std::string getIdentifier() const;
std::string getName() const;
std::string getDescription() const;
std::string getMaker() const;
int getPluginVersion() const;
std::string getCopyright() const;
size_t getPreferredStepSize() const;
size_t getPreferredBlockSize() const;
OutputList getOutputDescriptors() const;
FeatureSet process(const float *const *inputBuffers,
Vamp::RealTime timestamp);
FeatureSet getRemainingFeatures();
protected:
size_t m_stepSize;
size_t m_blockSize;
double * m_SoundIn;
size_t m_SampleN;
size_t m_AllocN;
bool m_Excess;
Vamp::RealTime m_Base;
/*
void sofacomplexMex(double *y, double *z, int ncols,double StartNote,double NoteInterval1,double NoteNum,double C,double D);
void FindMaxN( double *InputArray, int InputLen,int MaxOrder);
double SumF(double *InputArray,int Start, int End);
int round10(int x) ;
void ConToPitch1250(double *In, int InLen);
void Norm1(double *In, int InLen);
void Smooth(double *In, int InLen,int smoothLen);
void FindPeaks(double *In, int InLen,double *Out1,double *Out2, int db, int db2, int db3);
void ConFrom1050To960(double *In, double *out, int InputLen);
void Move( double *InputArray, int InputLen,int m);
double SumArray( double *InputArray, int InputHLen, int InputVLen);
double Sum( double *InputArray, int InputHLen);
void MeanV2( double *InputArray, int InputHLen, int InputVLen, double *OutArray);
void SumV( double *InputArray, int InputHLen, int InputVLen, double *OutArray);
void SumV2( double *InputArray, int InputHLen, int InputVLen, double *OutArray);
void MaxV( double *InputArray, int InputHLen, int InputVLen, double *OutArray);
void MaxV2( double *InputArray, int InputHLen, int InputVLen, double *OutArray);
void MinArray( double *InputArray, int InputHLen, int InputVLen, double MinValue);
void MaxArray( double *InputArray, int InputHLen, int InputVLen, double MaxValue);
double GetMaxValue( double *InputArray, int InputHLen, int InputVLen);
void RemoveNoise( double *InputArray, int InputHLen, int InputVLen);
double MeanArray( double *InputArray, int InputHLen, int InputVLen);
void Mydiff( double *InputArray, int InputHLen, int InputVLen,int n);
void PeakDetect(double *In, int InLen);
void MeanV( double *InputArray, int InputHLen, int InputVLen, double *OutArray);
void Edetect(double *InputArray, int InputHLen, int InputVLen, double MinT, double db1,double *OutOne);
void OnsetDetection2(double *In,int InputLen,double *OutOne,double a,double b);
void PitchEstimation(double *In, int InLen, double *OutArray,double *OutArray2);
void DoMultiPitch(double *In, int RLen,int CLen, double *Out1, double *Out2);
int OnsetToArray(double *In, int Len, double *OutStart,double *OutEnd);
void dbfunction( double *InputArray, int InputHLen, int InputVLen,double *OutArray);
void Transcribe(int Len,int inputLen,double *SoundIn,double *out,double *outArray2,double *outArray3);*/
};
#endif

View File

@ -41,26 +41,42 @@
#include <vamp/vamp.h>
#include <vamp-sdk/PluginAdapter.h>
#include "ZeroCrossing.h"
#include "SpectralCentroid.h"
#include "PercussionOnsetDetector.h"
#include "AmplitudeFollower.h"
#include "OnsetDetect.h"
#include "BarBeatTrack.h"
#include "BeatTrack.h"
#include "ChromagramPlugin.h"
#include "EBUr128.h"
#include "KeyDetect.h"
#include "OnsetDetect.h"
#include "PercussionOnsetDetector.h"
#include "SimilarityPlugin.h"
#include "SpectralCentroid.h"
#include "TonalChangeDetect.h"
#include "Transcription.h"
#include "TruePeak.h"
#include "ZeroCrossing.h"
#ifdef HAVE_AUBIO
#include "Onset.h"
#endif
static Vamp::PluginAdapter<ZeroCrossing> zeroCrossingAdapter;
static Vamp::PluginAdapter<SpectralCentroid> spectralCentroidAdapter;
static Vamp::PluginAdapter<PercussionOnsetDetector> percussionOnsetAdapter;
static Vamp::PluginAdapter<AmplitudeFollower> amplitudeAdapter;
static Vamp::PluginAdapter<OnsetDetector> onsetDetectorAdapter;
static Vamp::PluginAdapter<VampEBUr128> VampEBUr128Adapter;
static Vamp::PluginAdapter<VampTruePeak> VampTruePeakAdapter;
static Vamp::PluginAdapter<AmplitudeFollower> AmplitudeFollowerAdapter;
static Vamp::PluginAdapter<BarBeatTracker> BarBeatTrackerAdapter;
static Vamp::PluginAdapter<BeatTracker> BeatTrackerAdapter;
static Vamp::PluginAdapter<ChromagramPlugin> ChromagramPluginAdapter;
static Vamp::PluginAdapter<VampEBUr128> EBUr128Adapter;
static Vamp::PluginAdapter<KeyDetector> KeyDetectorAdapter;
static Vamp::PluginAdapter<OnsetDetector> OnsetDetectorAdapter;
static Vamp::PluginAdapter<PercussionOnsetDetector> PercussionOnsetDetectorAdapter;
static Vamp::PluginAdapter<SimilarityPlugin> SimilarityPluginAdapter;
static Vamp::PluginAdapter<SpectralCentroid> SpectralCentroidAdapter;
static Vamp::PluginAdapter<TonalChangeDetect> TonalChangeDetectAdapter;
static Vamp::PluginAdapter<Transcription> TranscriptionAdapter;
static Vamp::PluginAdapter<VampTruePeak> TruePeakAdapter;
static Vamp::PluginAdapter<ZeroCrossing> ZeroCrossingAdapter;
#ifdef HAVE_AUBIO
static Vamp::PluginAdapter<Onset> onsetAdapter;
static Vamp::PluginAdapter<Onset> OnsetAdapter;
#endif
const VampPluginDescriptor *vampGetPluginDescriptor(unsigned int version,
@ -69,15 +85,22 @@ const VampPluginDescriptor *vampGetPluginDescriptor(unsigned int version,
if (version < 1) return 0;
switch (index) {
case 0: return zeroCrossingAdapter.getDescriptor();
case 1: return spectralCentroidAdapter.getDescriptor();
case 2: return percussionOnsetAdapter.getDescriptor();
case 3: return amplitudeAdapter.getDescriptor();
case 4: return onsetDetectorAdapter.getDescriptor();
case 5: return VampEBUr128Adapter.getDescriptor();
case 6: return VampTruePeakAdapter.getDescriptor();
case 0: return AmplitudeFollowerAdapter.getDescriptor();
case 1: return BarBeatTrackerAdapter.getDescriptor();
case 2: return BeatTrackerAdapter.getDescriptor();
case 3: return ChromagramPluginAdapter.getDescriptor();
case 4: return EBUr128Adapter.getDescriptor();
case 5: return KeyDetectorAdapter.getDescriptor();
case 6: return OnsetDetectorAdapter.getDescriptor();
case 7: return PercussionOnsetDetectorAdapter.getDescriptor();
case 8: return SimilarityPluginAdapter.getDescriptor();
case 9: return SpectralCentroidAdapter.getDescriptor();
case 10: return TonalChangeDetectAdapter.getDescriptor();
case 11: return TranscriptionAdapter.getDescriptor();
case 12: return TruePeakAdapter.getDescriptor();
case 13: return ZeroCrossingAdapter.getDescriptor();
#ifdef HAVE_AUBIO
case 7: return onsetAdapter.getDescriptor();
case 14: return OnsetAdapter.getDescriptor();
#endif
default: return 0;
}

View File

@ -38,11 +38,18 @@ def build(bld):
obj.source = '''
plugins.cpp
AmplitudeFollower.cpp
BarBeatTrack.cpp
BeatTrack.cpp
ChromagramPlugin.cpp
EBUr128.cpp
ebu_r128_proc.cc
KeyDetect.cpp
OnsetDetect.cpp
PercussionOnsetDetector.cpp
SimilarityPlugin.cpp
SpectralCentroid.cpp
TonalChangeDetect.cpp
Transcription.cpp
TruePeak.cpp
ZeroCrossing.cpp
'''