Include vamp-pyin
In preparation for captainMorgan's pitch analysis script.
This commit is contained in:
parent
1c8b6e1b42
commit
63994f3b82
|
@ -0,0 +1,499 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "LocalCandidatePYIN.h"
|
||||
#include "MonoPitch.h"
|
||||
#include "YinUtil.h"
|
||||
|
||||
#include "vamp-sdk/FFT.h"
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include <cstdio>
|
||||
#include <sstream>
|
||||
// #include <iostream>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include <map>
|
||||
|
||||
#include <boost/math/distributions.hpp>
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::map;
|
||||
using Vamp::RealTime;
|
||||
|
||||
|
||||
LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) :
|
||||
Plugin(inputSampleRate),
|
||||
m_channels(0),
|
||||
m_stepSize(256),
|
||||
m_blockSize(2048),
|
||||
m_fmin(40),
|
||||
m_fmax(700),
|
||||
m_oPitchTrackCandidates(0),
|
||||
m_threshDistr(2.0f),
|
||||
m_outputUnvoiced(0.0f),
|
||||
m_preciseTime(0.0f),
|
||||
m_pitchProb(0),
|
||||
m_timestamp(0),
|
||||
m_nCandidate(13)
|
||||
{
|
||||
}
|
||||
|
||||
LocalCandidatePYIN::~LocalCandidatePYIN()
|
||||
{
|
||||
}
|
||||
|
||||
string
|
||||
LocalCandidatePYIN::getIdentifier() const
|
||||
{
|
||||
return "localcandidatepyin";
|
||||
}
|
||||
|
||||
string
|
||||
LocalCandidatePYIN::getName() const
|
||||
{
|
||||
return "Local Candidate PYIN";
|
||||
}
|
||||
|
||||
string
|
||||
LocalCandidatePYIN::getDescription() const
|
||||
{
|
||||
return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
|
||||
}
|
||||
|
||||
string
|
||||
LocalCandidatePYIN::getMaker() const
|
||||
{
|
||||
return "Matthias Mauch";
|
||||
}
|
||||
|
||||
int
|
||||
LocalCandidatePYIN::getPluginVersion() const
|
||||
{
|
||||
// Increment this each time you release a version that behaves
|
||||
// differently from the previous one
|
||||
return 2;
|
||||
}
|
||||
|
||||
string
|
||||
LocalCandidatePYIN::getCopyright() const
|
||||
{
|
||||
return "GPL";
|
||||
}
|
||||
|
||||
LocalCandidatePYIN::InputDomain
|
||||
LocalCandidatePYIN::getInputDomain() const
|
||||
{
|
||||
return TimeDomain;
|
||||
}
|
||||
|
||||
size_t
|
||||
LocalCandidatePYIN::getPreferredBlockSize() const
|
||||
{
|
||||
return 2048;
|
||||
}
|
||||
|
||||
size_t
|
||||
LocalCandidatePYIN::getPreferredStepSize() const
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
size_t
|
||||
LocalCandidatePYIN::getMinChannelCount() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t
|
||||
LocalCandidatePYIN::getMaxChannelCount() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
LocalCandidatePYIN::ParameterList
|
||||
LocalCandidatePYIN::getParameterDescriptors() const
|
||||
{
|
||||
ParameterList list;
|
||||
|
||||
ParameterDescriptor d;
|
||||
|
||||
d.identifier = "threshdistr";
|
||||
d.name = "Yin threshold distribution";
|
||||
d.description = ".";
|
||||
d.unit = "";
|
||||
d.minValue = 0.0f;
|
||||
d.maxValue = 7.0f;
|
||||
d.defaultValue = 2.0f;
|
||||
d.isQuantized = true;
|
||||
d.quantizeStep = 1.0f;
|
||||
d.valueNames.push_back("Uniform");
|
||||
d.valueNames.push_back("Beta (mean 0.10)");
|
||||
d.valueNames.push_back("Beta (mean 0.15)");
|
||||
d.valueNames.push_back("Beta (mean 0.20)");
|
||||
d.valueNames.push_back("Beta (mean 0.30)");
|
||||
d.valueNames.push_back("Single Value 0.10");
|
||||
d.valueNames.push_back("Single Value 0.15");
|
||||
d.valueNames.push_back("Single Value 0.20");
|
||||
list.push_back(d);
|
||||
|
||||
d.identifier = "outputunvoiced";
|
||||
d.valueNames.clear();
|
||||
d.name = "Output estimates classified as unvoiced?";
|
||||
d.description = ".";
|
||||
d.unit = "";
|
||||
d.minValue = 0.0f;
|
||||
d.maxValue = 2.0f;
|
||||
d.defaultValue = 0.0f;
|
||||
d.isQuantized = true;
|
||||
d.quantizeStep = 1.0f;
|
||||
d.valueNames.push_back("No");
|
||||
d.valueNames.push_back("Yes");
|
||||
d.valueNames.push_back("Yes, as negative frequencies");
|
||||
list.push_back(d);
|
||||
|
||||
d.identifier = "precisetime";
|
||||
d.valueNames.clear();
|
||||
d.name = "Use non-standard precise YIN timing (slow).";
|
||||
d.description = ".";
|
||||
d.unit = "";
|
||||
d.minValue = 0.0f;
|
||||
d.maxValue = 1.0f;
|
||||
d.defaultValue = 0.0f;
|
||||
d.isQuantized = true;
|
||||
d.quantizeStep = 1.0f;
|
||||
list.push_back(d);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
float
|
||||
LocalCandidatePYIN::getParameter(string identifier) const
|
||||
{
|
||||
if (identifier == "threshdistr") {
|
||||
return m_threshDistr;
|
||||
}
|
||||
if (identifier == "outputunvoiced") {
|
||||
return m_outputUnvoiced;
|
||||
}
|
||||
if (identifier == "precisetime") {
|
||||
return m_preciseTime;
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
void
|
||||
LocalCandidatePYIN::setParameter(string identifier, float value)
|
||||
{
|
||||
if (identifier == "threshdistr")
|
||||
{
|
||||
m_threshDistr = value;
|
||||
}
|
||||
if (identifier == "outputunvoiced")
|
||||
{
|
||||
m_outputUnvoiced = value;
|
||||
}
|
||||
if (identifier == "precisetime")
|
||||
{
|
||||
m_preciseTime = value;
|
||||
}
|
||||
}
|
||||
|
||||
LocalCandidatePYIN::ProgramList
|
||||
LocalCandidatePYIN::getPrograms() const
|
||||
{
|
||||
ProgramList list;
|
||||
return list;
|
||||
}
|
||||
|
||||
string
|
||||
LocalCandidatePYIN::getCurrentProgram() const
|
||||
{
|
||||
return ""; // no programs
|
||||
}
|
||||
|
||||
void
|
||||
LocalCandidatePYIN::selectProgram(string name)
|
||||
{
|
||||
}
|
||||
|
||||
LocalCandidatePYIN::OutputList
|
||||
LocalCandidatePYIN::getOutputDescriptors() const
|
||||
{
|
||||
OutputList outputs;
|
||||
|
||||
OutputDescriptor d;
|
||||
|
||||
d.identifier = "pitchtrackcandidates";
|
||||
d.name = "Pitch track candidates";
|
||||
d.description = "Multiple candidate pitch tracks.";
|
||||
d.unit = "Hz";
|
||||
d.hasFixedBinCount = false;
|
||||
d.hasKnownExtents = true;
|
||||
d.minValue = m_fmin;
|
||||
d.maxValue = 500; //!!!???
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::FixedSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = false;
|
||||
outputs.push_back(d);
|
||||
|
||||
return outputs;
|
||||
}
|
||||
|
||||
bool
|
||||
LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
||||
{
|
||||
if (channels < getMinChannelCount() ||
|
||||
channels > getMaxChannelCount()) return false;
|
||||
|
||||
/*
|
||||
std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels
|
||||
<< ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
||||
<< std::endl;
|
||||
*/
|
||||
m_channels = channels;
|
||||
m_stepSize = stepSize;
|
||||
m_blockSize = blockSize;
|
||||
|
||||
reset();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
LocalCandidatePYIN::reset()
|
||||
{
|
||||
m_pitchProb.clear();
|
||||
m_timestamp.clear();
|
||||
/*
|
||||
std::cerr << "LocalCandidatePYIN::reset"
|
||||
<< ", blockSize = " << m_blockSize
|
||||
<< std::endl;
|
||||
*/
|
||||
}
|
||||
|
||||
LocalCandidatePYIN::FeatureSet
|
||||
LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp)
|
||||
{
|
||||
int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
|
||||
timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
|
||||
|
||||
double *dInputBuffers = new double[m_blockSize];
|
||||
for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
|
||||
|
||||
size_t yinBufferSize = m_blockSize/2;
|
||||
double* yinBuffer = new double[yinBufferSize];
|
||||
if (!m_preciseTime) YinUtil::fastDifference(dInputBuffers, yinBuffer, yinBufferSize);
|
||||
else YinUtil::slowDifference(dInputBuffers, yinBuffer, yinBufferSize);
|
||||
|
||||
delete [] dInputBuffers;
|
||||
|
||||
YinUtil::cumulativeDifference(yinBuffer, yinBufferSize);
|
||||
|
||||
float minFrequency = 60;
|
||||
float maxFrequency = 900;
|
||||
vector<double> peakProbability = YinUtil::yinProb(yinBuffer,
|
||||
m_threshDistr,
|
||||
yinBufferSize,
|
||||
m_inputSampleRate/maxFrequency,
|
||||
m_inputSampleRate/minFrequency);
|
||||
|
||||
vector<pair<double, double> > tempPitchProb;
|
||||
for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf)
|
||||
{
|
||||
if (peakProbability[iBuf] > 0)
|
||||
{
|
||||
double currentF0 =
|
||||
m_inputSampleRate * (1.0 /
|
||||
YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize));
|
||||
double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69;
|
||||
tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf]));
|
||||
}
|
||||
}
|
||||
m_pitchProb.push_back(tempPitchProb);
|
||||
m_timestamp.push_back(timestamp);
|
||||
|
||||
delete[] yinBuffer;
|
||||
|
||||
return FeatureSet();
|
||||
}
|
||||
|
||||
LocalCandidatePYIN::FeatureSet
|
||||
LocalCandidatePYIN::getRemainingFeatures()
|
||||
{
|
||||
// timestamp -> candidate number -> value
|
||||
map<RealTime, map<int, float> > featureValues;
|
||||
|
||||
// std::cerr << "in remaining features" << std::endl;
|
||||
|
||||
if (m_pitchProb.empty()) {
|
||||
return FeatureSet();
|
||||
}
|
||||
|
||||
// MONO-PITCH STUFF
|
||||
MonoPitch mp;
|
||||
size_t nFrame = m_timestamp.size();
|
||||
vector<vector<float> > pitchTracks;
|
||||
vector<float> freqSum = vector<float>(m_nCandidate);
|
||||
vector<float> freqNumber = vector<float>(m_nCandidate);
|
||||
vector<float> freqMean = vector<float>(m_nCandidate);
|
||||
|
||||
boost::math::normal normalDist(0, 8); // semitones sd
|
||||
float maxNormalDist = boost::math::pdf(normalDist, 0);
|
||||
|
||||
// Viterbi-decode multiple times with different frequencies emphasised
|
||||
for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
|
||||
{
|
||||
pitchTracks.push_back(vector<float>(nFrame));
|
||||
vector<vector<pair<double,double> > > tempPitchProb;
|
||||
float centrePitch = 45 + 3 * iCandidate;
|
||||
|
||||
for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) {
|
||||
tempPitchProb.push_back(vector<pair<double,double> >());
|
||||
float sumProb = 0;
|
||||
float pitch = 0;
|
||||
float prob = 0;
|
||||
for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb)
|
||||
{
|
||||
pitch = m_pitchProb[iFrame][iProb].first;
|
||||
prob = m_pitchProb[iFrame][iProb].second *
|
||||
boost::math::pdf(normalDist, pitch-centrePitch) /
|
||||
maxNormalDist * 2;
|
||||
sumProb += prob;
|
||||
tempPitchProb[iFrame].push_back(
|
||||
pair<double,double>(pitch,prob));
|
||||
}
|
||||
for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb)
|
||||
{
|
||||
tempPitchProb[iFrame][iProb].second /= sumProb;
|
||||
}
|
||||
}
|
||||
|
||||
vector<float> mpOut = mp.process(tempPitchProb);
|
||||
float prevFreq = 0;
|
||||
for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
||||
{
|
||||
if (mpOut[iFrame] > 0) {
|
||||
|
||||
pitchTracks[iCandidate][iFrame] = mpOut[iFrame];
|
||||
freqSum[iCandidate] += mpOut[iFrame];
|
||||
freqNumber[iCandidate]++;
|
||||
prevFreq = mpOut[iFrame];
|
||||
|
||||
}
|
||||
}
|
||||
freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate];
|
||||
}
|
||||
|
||||
// find near duplicate pitch tracks
|
||||
vector<size_t> duplicates;
|
||||
for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
|
||||
for (size_t jCandidate = iCandidate+1; jCandidate < m_nCandidate; ++jCandidate) {
|
||||
size_t countEqual = 0;
|
||||
for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
||||
{
|
||||
if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) ||
|
||||
fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01)
|
||||
countEqual++;
|
||||
}
|
||||
// std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl;
|
||||
if (countEqual * 1.0 / nFrame > 0.8) {
|
||||
if (freqNumber[iCandidate] > freqNumber[jCandidate]) {
|
||||
duplicates.push_back(jCandidate);
|
||||
} else if (iCandidate < jCandidate) {
|
||||
duplicates.push_back(iCandidate);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// now find non-duplicate pitch tracks
|
||||
map<int, int> candidateActuals;
|
||||
map<int, std::string> candidateLabels;
|
||||
|
||||
vector<vector<float> > outputFrequencies;
|
||||
for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) outputFrequencies.push_back(vector<float>());
|
||||
|
||||
int actualCandidateNumber = 0;
|
||||
for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
|
||||
{
|
||||
bool isDuplicate = false;
|
||||
for (size_t i = 0; i < duplicates.size(); ++i) {
|
||||
|
||||
if (duplicates[i] == iCandidate) {
|
||||
isDuplicate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!isDuplicate && freqNumber[iCandidate] > 0.5*nFrame)
|
||||
{
|
||||
std::ostringstream convert;
|
||||
convert << actualCandidateNumber++;
|
||||
candidateLabels[iCandidate] = convert.str();
|
||||
candidateActuals[iCandidate] = actualCandidateNumber;
|
||||
// std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl;
|
||||
for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
||||
{
|
||||
if (pitchTracks[iCandidate][iFrame] > 0)
|
||||
{
|
||||
// featureValues[m_timestamp[iFrame]][iCandidate] =
|
||||
// pitchTracks[iCandidate][iFrame];
|
||||
outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]);
|
||||
} else {
|
||||
outputFrequencies[iFrame].push_back(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
// fs[m_oPitchTrackCandidates].push_back(f);
|
||||
}
|
||||
|
||||
// adapt our features so as to return a stack of candidate values
|
||||
// per frame
|
||||
|
||||
FeatureSet fs;
|
||||
|
||||
for (size_t iFrame = 0; iFrame < nFrame; ++iFrame){
|
||||
Feature f;
|
||||
f.hasTimestamp = true;
|
||||
f.timestamp = m_timestamp[iFrame];
|
||||
f.values = outputFrequencies[iFrame];
|
||||
fs[0].push_back(f);
|
||||
}
|
||||
|
||||
// I stopped using Chris's map stuff below because I couldn't get my head around it
|
||||
//
|
||||
// for (map<RealTime, map<int, float> >::const_iterator i =
|
||||
// featureValues.begin(); i != featureValues.end(); ++i) {
|
||||
// Feature f;
|
||||
// f.hasTimestamp = true;
|
||||
// f.timestamp = i->first;
|
||||
// int nextCandidate = candidateActuals.begin()->second;
|
||||
// for (map<int, float>::const_iterator j =
|
||||
// i->second.begin(); j != i->second.end(); ++j) {
|
||||
// while (candidateActuals[j->first] > nextCandidate) {
|
||||
// f.values.push_back(0);
|
||||
// ++nextCandidate;
|
||||
// }
|
||||
// f.values.push_back(j->second);
|
||||
// nextCandidate = j->first + 1;
|
||||
// }
|
||||
// //!!! can't use labels?
|
||||
// fs[0].push_back(f);
|
||||
// }
|
||||
|
||||
return fs;
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COLocalCandidatePYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _LOCALCANDIDATEPYIN_H_
|
||||
#define _LOCALCANDIDATEPYIN_H_
|
||||
|
||||
#include <vamp-sdk/Plugin.h>
|
||||
|
||||
#include "Yin.h"
|
||||
|
||||
class LocalCandidatePYIN : public Vamp::Plugin
|
||||
{
|
||||
public:
|
||||
LocalCandidatePYIN(float inputSampleRate);
|
||||
virtual ~LocalCandidatePYIN();
|
||||
|
||||
std::string getIdentifier() const;
|
||||
std::string getName() const;
|
||||
std::string getDescription() const;
|
||||
std::string getMaker() const;
|
||||
int getPluginVersion() const;
|
||||
std::string getCopyright() const;
|
||||
|
||||
InputDomain getInputDomain() const;
|
||||
size_t getPreferredBlockSize() const;
|
||||
size_t getPreferredStepSize() const;
|
||||
size_t getMinChannelCount() const;
|
||||
size_t getMaxChannelCount() const;
|
||||
|
||||
ParameterList getParameterDescriptors() const;
|
||||
float getParameter(std::string identifier) const;
|
||||
void setParameter(std::string identifier, float value);
|
||||
|
||||
ProgramList getPrograms() const;
|
||||
std::string getCurrentProgram() const;
|
||||
void selectProgram(std::string name);
|
||||
|
||||
OutputList getOutputDescriptors() const;
|
||||
|
||||
bool initialise(size_t channels, size_t stepSize, size_t blockSize);
|
||||
void reset();
|
||||
|
||||
FeatureSet process(const float *const *inputBuffers,
|
||||
Vamp::RealTime timestamp);
|
||||
|
||||
FeatureSet getRemainingFeatures();
|
||||
|
||||
protected:
|
||||
size_t m_channels;
|
||||
size_t m_stepSize;
|
||||
size_t m_blockSize;
|
||||
float m_fmin;
|
||||
float m_fmax;
|
||||
|
||||
mutable int m_oPitchTrackCandidates;
|
||||
|
||||
float m_threshDistr;
|
||||
float m_outputUnvoiced;
|
||||
float m_preciseTime;
|
||||
vector<vector<pair<double, double> > > m_pitchProb;
|
||||
vector<Vamp::RealTime> m_timestamp;
|
||||
size_t m_nCandidate;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,63 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _MEAN_FILTER_H_
|
||||
#define _MEAN_FILTER_H_
|
||||
|
||||
class MeanFilter
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Construct a non-causal mean filter with filter length flen,
|
||||
* that replaces each sample N with the mean of samples
|
||||
* [N-floor(F/2) .. N+floor(F/2)] where F is the filter length.
|
||||
* Only odd F are supported.
|
||||
*/
|
||||
MeanFilter(int flen) : m_flen(flen) { }
|
||||
~MeanFilter() { }
|
||||
|
||||
/**
|
||||
* Filter the n samples in "in" and place the results in "out"
|
||||
*/
|
||||
void filter(const double *in, double *out, const int n) {
|
||||
filterSubsequence(in, out, n, n, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter the n samples starting at the given offset in the
|
||||
* m-element array "in" and place the results in the n-element
|
||||
* array "out"
|
||||
*/
|
||||
void filterSubsequence(const double *in, double *out,
|
||||
const int m, const int n,
|
||||
const int offset) {
|
||||
int half = m_flen/2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
double v = 0;
|
||||
int n = 0;
|
||||
for (int j = -half; j <= half; ++j) {
|
||||
int ix = i + j + offset;
|
||||
if (ix >= 0 && ix < m) {
|
||||
v += in[ix];
|
||||
++n;
|
||||
}
|
||||
}
|
||||
out[i] = v / n;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int m_flen;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,61 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "MonoNote.h"
|
||||
#include <vector>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
|
||||
MonoNote::MonoNote() :
|
||||
hmm()
|
||||
{
|
||||
}
|
||||
|
||||
MonoNote::~MonoNote()
|
||||
{
|
||||
}
|
||||
|
||||
const vector<MonoNote::FrameOutput>
|
||||
MonoNote::process(const vector<vector<pair<double, double> > > pitchProb)
|
||||
{
|
||||
vector<vector<double> > obsProb;
|
||||
for (size_t iFrame = 0; iFrame < pitchProb.size(); ++iFrame)
|
||||
{
|
||||
obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame]));
|
||||
}
|
||||
|
||||
vector<double> *scale = new vector<double>(pitchProb.size());
|
||||
|
||||
vector<MonoNote::FrameOutput> out;
|
||||
|
||||
vector<int> path = hmm.decodeViterbi(obsProb, scale);
|
||||
|
||||
for (size_t iFrame = 0; iFrame < path.size(); ++iFrame)
|
||||
{
|
||||
double currPitch = -1;
|
||||
int stateKind = 0;
|
||||
|
||||
currPitch = hmm.par.minPitch + (path[iFrame]/hmm.par.nSPP) * 1.0/hmm.par.nPPS;
|
||||
stateKind = (path[iFrame]) % hmm.par.nSPP + 1;
|
||||
|
||||
out.push_back(FrameOutput(iFrame, currPitch, stateKind));
|
||||
// std::cerr << path[iFrame] << " -- "<< pitchProb[iFrame][0].first << " -- "<< currPitch << " -- " << stateKind << std::endl;
|
||||
}
|
||||
delete scale;
|
||||
return(out);
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _MONONOTE_H_
|
||||
#define _MONONOTE_H_
|
||||
|
||||
#include "MonoNoteHMM.h"
|
||||
#include "MonoNoteParameters.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <exception>
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
|
||||
class MonoNote {
|
||||
public:
|
||||
MonoNote();
|
||||
virtual ~MonoNote();
|
||||
|
||||
struct FrameOutput {
|
||||
size_t frameNumber;
|
||||
double pitch;
|
||||
size_t noteState; // unvoiced, attack, stable, release, inter
|
||||
FrameOutput() : frameNumber(0), pitch(-1.0), noteState(0) { }
|
||||
FrameOutput(size_t _frameNumber, double _pitch, size_t _noteState) :
|
||||
frameNumber(_frameNumber), pitch(_pitch), noteState(_noteState) { }
|
||||
};
|
||||
// pitchProb is a frame-wise vector carrying a vector of pitch-probability pairs
|
||||
const vector<FrameOutput> process(const vector<vector<pair<double, double> > > pitchProb);
|
||||
private:
|
||||
MonoNoteHMM hmm;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,208 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "MonoNoteHMM.h"
|
||||
|
||||
#include <boost/math/distributions.hpp>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
|
||||
MonoNoteHMM::MonoNoteHMM() :
|
||||
par()
|
||||
{
|
||||
build();
|
||||
}
|
||||
|
||||
const vector<double>
|
||||
MonoNoteHMM::calculateObsProb(const vector<pair<double, double> > pitchProb)
|
||||
{
|
||||
// pitchProb is a list of pairs (pitches and their probabilities)
|
||||
|
||||
size_t nCandidate = pitchProb.size();
|
||||
|
||||
// what is the probability of pitched
|
||||
double pIsPitched = 0;
|
||||
for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
|
||||
{
|
||||
// pIsPitched = pitchProb[iCandidate].second > pIsPitched ? pitchProb[iCandidate].second : pIsPitched;
|
||||
pIsPitched += pitchProb[iCandidate].second;
|
||||
}
|
||||
|
||||
// pIsPitched = std::pow(pIsPitched, (1-par.priorWeight)) * std::pow(par.priorPitchedProb, par.priorWeight);
|
||||
pIsPitched = pIsPitched * (1-par.priorWeight) + par.priorPitchedProb * par.priorWeight;
|
||||
|
||||
vector<double> out = vector<double>(par.n);
|
||||
double tempProbSum = 0;
|
||||
for (size_t i = 0; i < par.n; ++i)
|
||||
{
|
||||
if (i % par.nSPP != 2)
|
||||
{
|
||||
// std::cerr << getMidiPitch(i) << std::endl;
|
||||
double tempProb = 0;
|
||||
if (nCandidate > 0)
|
||||
{
|
||||
double minDist = 10000.0;
|
||||
double minDistProb = 0;
|
||||
size_t minDistCandidate = 0;
|
||||
for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
|
||||
{
|
||||
double currDist = std::abs(getMidiPitch(i)-pitchProb[iCandidate].first);
|
||||
if (currDist < minDist)
|
||||
{
|
||||
minDist = currDist;
|
||||
minDistProb = pitchProb[iCandidate].second;
|
||||
minDistCandidate = iCandidate;
|
||||
}
|
||||
}
|
||||
tempProb = std::pow(minDistProb, par.yinTrust) *
|
||||
boost::math::pdf(pitchDistr[i],
|
||||
pitchProb[minDistCandidate].first);
|
||||
} else {
|
||||
tempProb = 1;
|
||||
}
|
||||
tempProbSum += tempProb;
|
||||
out[i] = tempProb;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < par.n; ++i)
|
||||
{
|
||||
if (i % par.nSPP != 2)
|
||||
{
|
||||
if (tempProbSum > 0)
|
||||
{
|
||||
out[i] = out[i] / tempProbSum * pIsPitched;
|
||||
}
|
||||
} else {
|
||||
out[i] = (1-pIsPitched) / (par.nPPS * par.nS);
|
||||
}
|
||||
}
|
||||
|
||||
return(out);
|
||||
}
|
||||
|
||||
void
|
||||
MonoNoteHMM::build()
|
||||
{
|
||||
// the states are organised as follows:
|
||||
// 0-2. lowest pitch
|
||||
// 0. attack state
|
||||
// 1. stable state
|
||||
// 2. silent state
|
||||
// 3-5. second-lowest pitch
|
||||
// 3. attack state
|
||||
// ...
|
||||
|
||||
// observation distributions
|
||||
for (size_t iState = 0; iState < par.n; ++iState)
|
||||
{
|
||||
pitchDistr.push_back(boost::math::normal(0,1));
|
||||
if (iState % par.nSPP == 2)
|
||||
{
|
||||
// silent state starts tracking
|
||||
init.push_back(1.0/(par.nS * par.nPPS));
|
||||
} else {
|
||||
init.push_back(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch)
|
||||
{
|
||||
size_t index = iPitch * par.nSPP;
|
||||
double mu = par.minPitch + iPitch * 1.0/par.nPPS;
|
||||
pitchDistr[index] = boost::math::normal(mu, par.sigmaYinPitchAttack);
|
||||
pitchDistr[index+1] = boost::math::normal(mu, par.sigmaYinPitchStable);
|
||||
pitchDistr[index+2] = boost::math::normal(mu, 1.0); // dummy
|
||||
}
|
||||
|
||||
boost::math::normal noteDistanceDistr(0, par.sigma2Note);
|
||||
|
||||
for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch)
|
||||
{
|
||||
// loop through all notes and set sparse transition probabilities
|
||||
size_t index = iPitch * par.nSPP;
|
||||
|
||||
// transitions from attack state
|
||||
from.push_back(index);
|
||||
to.push_back(index);
|
||||
transProb.push_back(par.pAttackSelftrans);
|
||||
|
||||
from.push_back(index);
|
||||
to.push_back(index+1);
|
||||
transProb.push_back(1-par.pAttackSelftrans);
|
||||
|
||||
// transitions from stable state
|
||||
from.push_back(index+1);
|
||||
to.push_back(index+1); // to itself
|
||||
transProb.push_back(par.pStableSelftrans);
|
||||
|
||||
from.push_back(index+1);
|
||||
to.push_back(index+2); // to silent
|
||||
transProb.push_back(par.pStable2Silent);
|
||||
|
||||
// the "easy" transitions from silent state
|
||||
from.push_back(index+2);
|
||||
to.push_back(index+2);
|
||||
transProb.push_back(par.pSilentSelftrans);
|
||||
|
||||
|
||||
// the more complicated transitions from the silent
|
||||
double probSumSilent = 0;
|
||||
|
||||
vector<double> tempTransProbSilent;
|
||||
for (size_t jPitch = 0; jPitch < (par.nS * par.nPPS); ++jPitch)
|
||||
{
|
||||
int fromPitch = iPitch;
|
||||
int toPitch = jPitch;
|
||||
double semitoneDistance =
|
||||
std::abs(fromPitch - toPitch) * 1.0 / par.nPPS;
|
||||
|
||||
// if (std::fmod(semitoneDistance, 1) == 0 && semitoneDistance > par.minSemitoneDistance)
|
||||
if (semitoneDistance == 0 ||
|
||||
(semitoneDistance > par.minSemitoneDistance
|
||||
&& semitoneDistance < par.maxJump))
|
||||
{
|
||||
size_t toIndex = jPitch * par.nSPP; // note attack index
|
||||
|
||||
double tempWeightSilent = boost::math::pdf(noteDistanceDistr,
|
||||
semitoneDistance);
|
||||
probSumSilent += tempWeightSilent;
|
||||
|
||||
tempTransProbSilent.push_back(tempWeightSilent);
|
||||
|
||||
from.push_back(index+2);
|
||||
to.push_back(toIndex);
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < tempTransProbSilent.size(); ++i)
|
||||
{
|
||||
transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double
|
||||
MonoNoteHMM::getMidiPitch(size_t index)
|
||||
{
|
||||
return pitchDistr[index].mean();
|
||||
}
|
||||
|
||||
double
|
||||
MonoNoteHMM::getFrequency(size_t index)
|
||||
{
|
||||
return 440 * pow(2, (pitchDistr[index].mean()-69)/12);
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _MONONOTEHMM_H_
|
||||
#define _MONONOTEHMM_H_
|
||||
|
||||
#include "MonoNoteParameters.h"
|
||||
#include "SparseHMM.h"
|
||||
|
||||
#include <boost/math/distributions.hpp>
|
||||
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
|
||||
using std::vector;
|
||||
|
||||
class MonoNoteHMM : public SparseHMM
|
||||
{
|
||||
public:
|
||||
MonoNoteHMM();
|
||||
const std::vector<double> calculateObsProb(const vector<pair<double, double> >);
|
||||
double getMidiPitch(size_t index);
|
||||
double getFrequency(size_t index);
|
||||
void build();
|
||||
MonoNoteParameters par;
|
||||
vector<boost::math::normal> pitchDistr;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,44 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "MonoNoteParameters.h"
|
||||
|
||||
MonoNoteParameters::MonoNoteParameters() :
|
||||
minPitch(35),
|
||||
nPPS(3),
|
||||
nS(69),
|
||||
nSPP(3), // states per pitch
|
||||
n(0),
|
||||
initPi(0),
|
||||
pAttackSelftrans(0.9),
|
||||
pStableSelftrans(0.99),
|
||||
pStable2Silent(0.01),
|
||||
pSilentSelftrans(0.9999),
|
||||
sigma2Note(0.7),
|
||||
maxJump(13),
|
||||
pInterSelftrans(0.0),
|
||||
priorPitchedProb(.7),
|
||||
priorWeight(0.5),
|
||||
minSemitoneDistance(.5),
|
||||
sigmaYinPitchAttack(5),
|
||||
sigmaYinPitchStable(0.8),
|
||||
sigmaYinPitchInter(.1),
|
||||
yinTrust(0.1)
|
||||
{
|
||||
// just in case someone put in a silly value for pRelease2Unvoiced
|
||||
n = nPPS * nS * nSPP;
|
||||
}
|
||||
|
||||
MonoNoteParameters::~MonoNoteParameters()
|
||||
{
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _MONONOTEPARAMETERS_H_
|
||||
#define _MONONOTEPARAMETERS_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <exception>
|
||||
|
||||
using std::vector;
|
||||
|
||||
class MonoNoteParameters
|
||||
{
|
||||
public:
|
||||
MonoNoteParameters();
|
||||
virtual ~MonoNoteParameters();
|
||||
|
||||
// model architecture parameters
|
||||
size_t minPitch; // lowest pitch in MIDI notes
|
||||
size_t nPPS; // number of pitches per semitone
|
||||
size_t nS; // number of semitones
|
||||
size_t nSPP; // number of states per pitch
|
||||
size_t n; // number of states (will be calcualted from other parameters)
|
||||
|
||||
// initial state probabilities
|
||||
vector<double> initPi;
|
||||
|
||||
// transition parameters
|
||||
double pAttackSelftrans;
|
||||
double pStableSelftrans;
|
||||
double pStable2Silent;
|
||||
double pSilentSelftrans;
|
||||
double sigma2Note; // standard deviation of next note Gaussian distribution
|
||||
double maxJump;
|
||||
double pInterSelftrans;
|
||||
|
||||
double priorPitchedProb;
|
||||
double priorWeight;
|
||||
|
||||
double minSemitoneDistance; // minimum distance for a transition
|
||||
|
||||
double sigmaYinPitchAttack;
|
||||
double sigmaYinPitchStable;
|
||||
double sigmaYinPitchInter;
|
||||
|
||||
double yinTrust;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,81 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "MonoPitch.h"
|
||||
#include "MonoPitchHMM.h"
|
||||
#include <vector>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
|
||||
MonoPitch::MonoPitch() :
|
||||
hmm()
|
||||
{
|
||||
}
|
||||
|
||||
MonoPitch::~MonoPitch()
|
||||
{
|
||||
}
|
||||
|
||||
const vector<float>
|
||||
MonoPitch::process(const vector<vector<pair<double, double> > > pitchProb)
|
||||
{
|
||||
// std::cerr << "before observation prob calculation" << std::endl;
|
||||
vector<vector<double> > obsProb;
|
||||
for (size_t iFrame = 0; iFrame < pitchProb.size(); ++iFrame)
|
||||
{
|
||||
obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame]));
|
||||
}
|
||||
|
||||
vector<double> *scale = new vector<double>(0);
|
||||
|
||||
vector<float> out;
|
||||
|
||||
// std::cerr << "before Viterbi decoding" << obsProb.size() << "ng" << obsProb[1].size() << std::endl;
|
||||
vector<int> path = hmm.decodeViterbi(obsProb, scale);
|
||||
// std::cerr << "after Viterbi decoding" << std::endl;
|
||||
|
||||
for (size_t iFrame = 0; iFrame < path.size(); ++iFrame)
|
||||
{
|
||||
// std::cerr << path[iFrame] << " " << hmm.m_freqs[path[iFrame]] << std::endl;
|
||||
float hmmFreq = hmm.m_freqs[path[iFrame]];
|
||||
float bestFreq = 0;
|
||||
float leastDist = 10000;
|
||||
if (hmmFreq > 0)
|
||||
{
|
||||
// This was a Yin estimate, so try to get original pitch estimate back
|
||||
// ... a bit hacky, since we could have direclty saved the frequency
|
||||
// that was assigned to the HMM bin in hmm.calculateObsProb -- but would
|
||||
// have had to rethink the interface of that method.
|
||||
for (size_t iPitch = 0; iPitch < pitchProb[iFrame].size(); ++iPitch)
|
||||
{
|
||||
float freq = 440. * std::pow(2, (pitchProb[iFrame][iPitch].first - 69)/12);
|
||||
float dist = std::abs(hmmFreq-freq);
|
||||
if (dist < leastDist)
|
||||
{
|
||||
leastDist = dist;
|
||||
bestFreq = freq;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bestFreq = hmmFreq;
|
||||
}
|
||||
out.push_back(bestFreq);
|
||||
}
|
||||
delete scale;
|
||||
return(out);
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _MONOPITCH_H_
|
||||
#define _MONOPITCH_H_
|
||||
|
||||
#include "MonoPitchHMM.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <exception>
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
|
||||
class MonoPitch {
|
||||
public:
|
||||
MonoPitch();
|
||||
virtual ~MonoPitch();
|
||||
|
||||
// pitchProb is a frame-wise vector carrying a vector of pitch-probability pairs
|
||||
const vector<float> process(const vector<vector<pair<double, double> > > pitchProb);
|
||||
private:
|
||||
MonoPitchHMM hmm;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,153 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "MonoPitchHMM.h"
|
||||
|
||||
#include <boost/math/distributions.hpp>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
|
||||
MonoPitchHMM::MonoPitchHMM() :
|
||||
m_minFreq(61.735),
|
||||
m_nBPS(5),
|
||||
m_nPitch(0),
|
||||
m_transitionWidth(0),
|
||||
m_selfTrans(0.99),
|
||||
m_yinTrust(.5),
|
||||
m_freqs(0)
|
||||
{
|
||||
m_transitionWidth = 5*(m_nBPS/2) + 1;
|
||||
m_nPitch = 69 * m_nBPS;
|
||||
m_freqs = vector<double>(2*m_nPitch);
|
||||
for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch)
|
||||
{
|
||||
m_freqs[iPitch] = m_minFreq * std::pow(2, iPitch * 1.0 / (12 * m_nBPS));
|
||||
m_freqs[iPitch+m_nPitch] = -m_freqs[iPitch];
|
||||
}
|
||||
build();
|
||||
}
|
||||
|
||||
const vector<double>
|
||||
MonoPitchHMM::calculateObsProb(const vector<pair<double, double> > pitchProb)
|
||||
{
|
||||
vector<double> out = vector<double>(2*m_nPitch+1);
|
||||
double probYinPitched = 0;
|
||||
// BIN THE PITCHES
|
||||
for (size_t iPair = 0; iPair < pitchProb.size(); ++iPair)
|
||||
{
|
||||
double freq = 440. * std::pow(2, (pitchProb[iPair].first - 69)/12);
|
||||
if (freq <= m_minFreq) continue;
|
||||
double d = 0;
|
||||
double oldd = 1000;
|
||||
for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch)
|
||||
{
|
||||
d = std::abs(freq-m_freqs[iPitch]);
|
||||
if (oldd < d && iPitch > 0)
|
||||
{
|
||||
// previous bin must have been the closest
|
||||
out[iPitch-1] = pitchProb[iPair].second;
|
||||
probYinPitched += out[iPitch-1];
|
||||
break;
|
||||
}
|
||||
oldd = d;
|
||||
}
|
||||
}
|
||||
|
||||
double probReallyPitched = m_yinTrust * probYinPitched;
|
||||
// std::cerr << probReallyPitched << " " << probYinPitched << std::endl;
|
||||
// damn, I forget what this is all about...
|
||||
for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch)
|
||||
{
|
||||
if (probYinPitched > 0) out[iPitch] *= (probReallyPitched/probYinPitched) ;
|
||||
out[iPitch+m_nPitch] = (1 - probReallyPitched) / m_nPitch;
|
||||
}
|
||||
// out[2*m_nPitch] = m_yinTrust * (1 - probYinPitched);
|
||||
return(out);
|
||||
}
|
||||
|
||||
void
|
||||
MonoPitchHMM::build()
|
||||
{
|
||||
// INITIAL VECTOR
|
||||
init = vector<double>(2*m_nPitch, 1.0 / 2*m_nPitch);
|
||||
|
||||
// TRANSITIONS
|
||||
for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch)
|
||||
{
|
||||
int theoreticalMinNextPitch = static_cast<int>(iPitch)-static_cast<int>(m_transitionWidth/2);
|
||||
int minNextPitch = iPitch>m_transitionWidth/2 ? iPitch-m_transitionWidth/2 : 0;
|
||||
int maxNextPitch = iPitch<m_nPitch-m_transitionWidth/2 ? iPitch+m_transitionWidth/2 : m_nPitch-1;
|
||||
|
||||
// WEIGHT VECTOR
|
||||
double weightSum = 0;
|
||||
vector<double> weights;
|
||||
for (size_t i = minNextPitch; i <= maxNextPitch; ++i)
|
||||
{
|
||||
if (i <= iPitch)
|
||||
{
|
||||
weights.push_back(i-theoreticalMinNextPitch+1);
|
||||
// weights.push_back(i-theoreticalMinNextPitch+1+m_transitionWidth/2);
|
||||
} else {
|
||||
weights.push_back(iPitch-theoreticalMinNextPitch+1-(i-iPitch));
|
||||
// weights.push_back(iPitch-theoreticalMinNextPitch+1-(i-iPitch)+m_transitionWidth/2);
|
||||
}
|
||||
weightSum += weights[weights.size()-1];
|
||||
}
|
||||
|
||||
// std::cerr << minNextPitch << " " << maxNextPitch << std::endl;
|
||||
// TRANSITIONS TO CLOSE PITCH
|
||||
for (size_t i = minNextPitch; i <= maxNextPitch; ++i)
|
||||
{
|
||||
from.push_back(iPitch);
|
||||
to.push_back(i);
|
||||
transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans);
|
||||
|
||||
from.push_back(iPitch);
|
||||
to.push_back(i+m_nPitch);
|
||||
transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans));
|
||||
|
||||
from.push_back(iPitch+m_nPitch);
|
||||
to.push_back(i+m_nPitch);
|
||||
transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans);
|
||||
// transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5);
|
||||
|
||||
from.push_back(iPitch+m_nPitch);
|
||||
to.push_back(i);
|
||||
transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans));
|
||||
// transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5);
|
||||
}
|
||||
|
||||
// TRANSITION TO UNVOICED
|
||||
// from.push_back(iPitch+m_nPitch);
|
||||
// to.push_back(2*m_nPitch);
|
||||
// transProb.push_back(1-m_selfTrans);
|
||||
|
||||
// TRANSITION FROM UNVOICED TO PITCH
|
||||
// from.push_back(2*m_nPitch);
|
||||
// to.push_back(iPitch+m_nPitch);
|
||||
// transProb.push_back(1.0/m_nPitch);
|
||||
}
|
||||
// UNVOICED SELFTRANSITION
|
||||
// from.push_back(2*m_nPitch);
|
||||
// to.push_back(2*m_nPitch);
|
||||
// transProb.push_back(m_selfTrans);
|
||||
|
||||
// for (size_t i = 0; i < from.size(); ++i) {
|
||||
// std::cerr << "P(["<< from[i] << " --> " << to[i] << "]) = " << transProb[i] << std::endl;
|
||||
// }
|
||||
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _MONOPITCHHMM_H_
|
||||
#define _MONOPITCHHMM_H_
|
||||
|
||||
#include "SparseHMM.h"
|
||||
|
||||
#include <boost/math/distributions.hpp>
|
||||
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
|
||||
using std::vector;
|
||||
|
||||
class MonoPitchHMM : public SparseHMM
|
||||
{
|
||||
public:
|
||||
MonoPitchHMM();
|
||||
const std::vector<double> calculateObsProb(const vector<pair<double, double> >);
|
||||
// double getMidiPitch(size_t index);
|
||||
// double getFrequency(size_t index);
|
||||
void build();
|
||||
double m_minFreq; // 82.40689f/2
|
||||
size_t m_nBPS;
|
||||
size_t m_nPitch;
|
||||
size_t m_transitionWidth;
|
||||
double m_selfTrans;
|
||||
double m_yinTrust;
|
||||
vector<double> m_freqs;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,608 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "PYinVamp.h"
|
||||
#include "MonoNote.h"
|
||||
#include "MonoPitch.h"
|
||||
|
||||
#include "vamp-sdk/FFT.h"
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using Vamp::RealTime;
|
||||
|
||||
|
||||
PYinVamp::PYinVamp(float inputSampleRate) :
|
||||
Plugin(inputSampleRate),
|
||||
m_channels(0),
|
||||
m_stepSize(256),
|
||||
m_blockSize(2048),
|
||||
m_fmin(40),
|
||||
m_fmax(1600),
|
||||
m_yin(2048, inputSampleRate, 0.0),
|
||||
m_oF0Candidates(0),
|
||||
m_oF0Probs(0),
|
||||
m_oVoicedProb(0),
|
||||
m_oCandidateSalience(0),
|
||||
m_oSmoothedPitchTrack(0),
|
||||
m_oNotes(0),
|
||||
m_threshDistr(2.0f),
|
||||
m_outputUnvoiced(0.0f),
|
||||
m_preciseTime(0.0f),
|
||||
m_lowAmp(0.1f),
|
||||
m_onsetSensitivity(0.7f),
|
||||
m_pruneThresh(0.1f),
|
||||
m_pitchProb(0),
|
||||
m_timestamp(0),
|
||||
m_level(0)
|
||||
{
|
||||
}
|
||||
|
||||
PYinVamp::~PYinVamp()
|
||||
{
|
||||
}
|
||||
|
||||
string
|
||||
PYinVamp::getIdentifier() const
|
||||
{
|
||||
return "pyin";
|
||||
}
|
||||
|
||||
string
|
||||
PYinVamp::getName() const
|
||||
{
|
||||
return "pYin";
|
||||
}
|
||||
|
||||
string
|
||||
PYinVamp::getDescription() const
|
||||
{
|
||||
return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
|
||||
}
|
||||
|
||||
string
|
||||
PYinVamp::getMaker() const
|
||||
{
|
||||
return "Matthias Mauch";
|
||||
}
|
||||
|
||||
int
|
||||
PYinVamp::getPluginVersion() const
|
||||
{
|
||||
// Increment this each time you release a version that behaves
|
||||
// differently from the previous one
|
||||
return 2;
|
||||
}
|
||||
|
||||
string
|
||||
PYinVamp::getCopyright() const
|
||||
{
|
||||
return "GPL";
|
||||
}
|
||||
|
||||
PYinVamp::InputDomain
|
||||
PYinVamp::getInputDomain() const
|
||||
{
|
||||
return TimeDomain;
|
||||
}
|
||||
|
||||
size_t
|
||||
PYinVamp::getPreferredBlockSize() const
|
||||
{
|
||||
return 2048;
|
||||
}
|
||||
|
||||
size_t
|
||||
PYinVamp::getPreferredStepSize() const
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
size_t
|
||||
PYinVamp::getMinChannelCount() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t
|
||||
PYinVamp::getMaxChannelCount() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
PYinVamp::ParameterList
|
||||
PYinVamp::getParameterDescriptors() const
|
||||
{
|
||||
ParameterList list;
|
||||
|
||||
ParameterDescriptor d;
|
||||
|
||||
d.identifier = "threshdistr";
|
||||
d.name = "Yin threshold distribution";
|
||||
d.description = ".";
|
||||
d.unit = "";
|
||||
d.minValue = 0.0f;
|
||||
d.maxValue = 7.0f;
|
||||
d.defaultValue = 2.0f;
|
||||
d.isQuantized = true;
|
||||
d.quantizeStep = 1.0f;
|
||||
d.valueNames.push_back("Uniform");
|
||||
d.valueNames.push_back("Beta (mean 0.10)");
|
||||
d.valueNames.push_back("Beta (mean 0.15)");
|
||||
d.valueNames.push_back("Beta (mean 0.20)");
|
||||
d.valueNames.push_back("Beta (mean 0.30)");
|
||||
d.valueNames.push_back("Single Value 0.10");
|
||||
d.valueNames.push_back("Single Value 0.15");
|
||||
d.valueNames.push_back("Single Value 0.20");
|
||||
list.push_back(d);
|
||||
|
||||
d.identifier = "outputunvoiced";
|
||||
d.valueNames.clear();
|
||||
d.name = "Output estimates classified as unvoiced?";
|
||||
d.description = ".";
|
||||
d.unit = "";
|
||||
d.minValue = 0.0f;
|
||||
d.maxValue = 2.0f;
|
||||
d.defaultValue = 0.0f;
|
||||
d.isQuantized = true;
|
||||
d.quantizeStep = 1.0f;
|
||||
d.valueNames.push_back("No");
|
||||
d.valueNames.push_back("Yes");
|
||||
d.valueNames.push_back("Yes, as negative frequencies");
|
||||
list.push_back(d);
|
||||
|
||||
d.identifier = "precisetime";
|
||||
d.valueNames.clear();
|
||||
d.name = "Use non-standard precise YIN timing (slow).";
|
||||
d.description = ".";
|
||||
d.unit = "";
|
||||
d.minValue = 0.0f;
|
||||
d.maxValue = 1.0f;
|
||||
d.defaultValue = 0.0f;
|
||||
d.isQuantized = true;
|
||||
d.quantizeStep = 1.0f;
|
||||
list.push_back(d);
|
||||
|
||||
d.identifier = "lowampsuppression";
|
||||
d.valueNames.clear();
|
||||
d.name = "Suppress low amplitude pitch estimates.";
|
||||
d.description = ".";
|
||||
d.unit = "";
|
||||
d.minValue = 0.0f;
|
||||
d.maxValue = 1.0f;
|
||||
d.defaultValue = 0.1f;
|
||||
d.isQuantized = false;
|
||||
list.push_back(d);
|
||||
|
||||
d.identifier = "onsetsensitivity";
|
||||
d.valueNames.clear();
|
||||
d.name = "Onset sensitivity";
|
||||
d.description = "Adds additional note onsets when RMS increases.";
|
||||
d.unit = "";
|
||||
d.minValue = 0.0f;
|
||||
d.maxValue = 1.0f;
|
||||
d.defaultValue = 0.7f;
|
||||
d.isQuantized = false;
|
||||
list.push_back(d);
|
||||
|
||||
d.identifier = "prunethresh";
|
||||
d.valueNames.clear();
|
||||
d.name = "Duration pruning threshold.";
|
||||
d.description = "Prune notes that are shorter than this value.";
|
||||
d.unit = "";
|
||||
d.minValue = 0.0f;
|
||||
d.maxValue = 0.2f;
|
||||
d.defaultValue = 0.1f;
|
||||
d.isQuantized = false;
|
||||
list.push_back(d);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
float
|
||||
PYinVamp::getParameter(string identifier) const
|
||||
{
|
||||
if (identifier == "threshdistr") {
|
||||
return m_threshDistr;
|
||||
}
|
||||
if (identifier == "outputunvoiced") {
|
||||
return m_outputUnvoiced;
|
||||
}
|
||||
if (identifier == "precisetime") {
|
||||
return m_preciseTime;
|
||||
}
|
||||
if (identifier == "lowampsuppression") {
|
||||
return m_lowAmp;
|
||||
}
|
||||
if (identifier == "onsetsensitivity") {
|
||||
return m_onsetSensitivity;
|
||||
}
|
||||
if (identifier == "prunethresh") {
|
||||
return m_pruneThresh;
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
void
|
||||
PYinVamp::setParameter(string identifier, float value)
|
||||
{
|
||||
if (identifier == "threshdistr")
|
||||
{
|
||||
m_threshDistr = value;
|
||||
}
|
||||
if (identifier == "outputunvoiced")
|
||||
{
|
||||
m_outputUnvoiced = value;
|
||||
}
|
||||
if (identifier == "precisetime")
|
||||
{
|
||||
m_preciseTime = value;
|
||||
}
|
||||
if (identifier == "lowampsuppression")
|
||||
{
|
||||
m_lowAmp = value;
|
||||
}
|
||||
if (identifier == "onsetsensitivity")
|
||||
{
|
||||
m_onsetSensitivity = value;
|
||||
}
|
||||
if (identifier == "prunethresh")
|
||||
{
|
||||
m_pruneThresh = value;
|
||||
}
|
||||
}
|
||||
|
||||
PYinVamp::ProgramList
|
||||
PYinVamp::getPrograms() const
|
||||
{
|
||||
ProgramList list;
|
||||
return list;
|
||||
}
|
||||
|
||||
string
|
||||
PYinVamp::getCurrentProgram() const
|
||||
{
|
||||
return ""; // no programs
|
||||
}
|
||||
|
||||
void
|
||||
PYinVamp::selectProgram(string name)
|
||||
{
|
||||
}
|
||||
|
||||
PYinVamp::OutputList
|
||||
PYinVamp::getOutputDescriptors() const
|
||||
{
|
||||
OutputList outputs;
|
||||
|
||||
OutputDescriptor d;
|
||||
|
||||
int outputNumber = 0;
|
||||
|
||||
d.identifier = "f0candidates";
|
||||
d.name = "F0 Candidates";
|
||||
d.description = "Estimated fundamental frequency candidates.";
|
||||
d.unit = "Hz";
|
||||
d.hasFixedBinCount = false;
|
||||
// d.binCount = 1;
|
||||
d.hasKnownExtents = true;
|
||||
d.minValue = m_fmin;
|
||||
d.maxValue = 500;
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::FixedSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = false;
|
||||
outputs.push_back(d);
|
||||
m_oF0Candidates = outputNumber++;
|
||||
|
||||
d.identifier = "f0probs";
|
||||
d.name = "Candidate Probabilities";
|
||||
d.description = "Probabilities of estimated fundamental frequency candidates.";
|
||||
d.unit = "";
|
||||
d.hasFixedBinCount = false;
|
||||
// d.binCount = 1;
|
||||
d.hasKnownExtents = true;
|
||||
d.minValue = 0;
|
||||
d.maxValue = 1;
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::FixedSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = false;
|
||||
outputs.push_back(d);
|
||||
m_oF0Probs = outputNumber++;
|
||||
|
||||
d.identifier = "voicedprob";
|
||||
d.name = "Voiced Probability";
|
||||
d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
|
||||
d.unit = "";
|
||||
d.hasFixedBinCount = true;
|
||||
d.binCount = 1;
|
||||
d.hasKnownExtents = true;
|
||||
d.minValue = 0;
|
||||
d.maxValue = 1;
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::FixedSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = false;
|
||||
outputs.push_back(d);
|
||||
m_oVoicedProb = outputNumber++;
|
||||
|
||||
d.identifier = "candidatesalience";
|
||||
d.name = "Candidate Salience";
|
||||
d.description = "Candidate Salience";
|
||||
d.hasFixedBinCount = true;
|
||||
d.binCount = m_blockSize / 2;
|
||||
d.hasKnownExtents = true;
|
||||
d.minValue = 0;
|
||||
d.maxValue = 1;
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::FixedSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = false;
|
||||
outputs.push_back(d);
|
||||
m_oCandidateSalience = outputNumber++;
|
||||
|
||||
d.identifier = "smoothedpitchtrack";
|
||||
d.name = "Smoothed Pitch Track";
|
||||
d.description = ".";
|
||||
d.unit = "Hz";
|
||||
d.hasFixedBinCount = true;
|
||||
d.binCount = 1;
|
||||
d.hasKnownExtents = false;
|
||||
// d.minValue = 0;
|
||||
// d.maxValue = 1;
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::FixedSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = false;
|
||||
outputs.push_back(d);
|
||||
m_oSmoothedPitchTrack = outputNumber++;
|
||||
|
||||
d.identifier = "notes";
|
||||
d.name = "Notes";
|
||||
d.description = "Derived fixed-pitch note frequencies";
|
||||
// d.unit = "MIDI unit";
|
||||
d.unit = "Hz";
|
||||
d.hasFixedBinCount = true;
|
||||
d.binCount = 1;
|
||||
d.hasKnownExtents = false;
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::VariableSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = true;
|
||||
outputs.push_back(d);
|
||||
m_oNotes = outputNumber++;
|
||||
|
||||
return outputs;
|
||||
}
|
||||
|
||||
bool
|
||||
PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
||||
{
|
||||
if (channels < getMinChannelCount() ||
|
||||
channels > getMaxChannelCount()) return false;
|
||||
|
||||
/*
|
||||
std::cerr << "PYinVamp::initialise: channels = " << channels
|
||||
<< ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
||||
<< std::endl;
|
||||
*/
|
||||
m_channels = channels;
|
||||
m_stepSize = stepSize;
|
||||
m_blockSize = blockSize;
|
||||
|
||||
reset();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
PYinVamp::reset()
|
||||
{
|
||||
m_yin.setThresholdDistr(m_threshDistr);
|
||||
m_yin.setFrameSize(m_blockSize);
|
||||
m_yin.setFast(!m_preciseTime);
|
||||
|
||||
m_pitchProb.clear();
|
||||
m_timestamp.clear();
|
||||
m_level.clear();
|
||||
/*
|
||||
std::cerr << "PYinVamp::reset"
|
||||
<< ", blockSize = " << m_blockSize
|
||||
<< std::endl;
|
||||
*/
|
||||
}
|
||||
|
||||
PYinVamp::FeatureSet
|
||||
PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
|
||||
{
|
||||
int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
|
||||
timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
|
||||
|
||||
FeatureSet fs;
|
||||
|
||||
float rms = 0;
|
||||
|
||||
double *dInputBuffers = new double[m_blockSize];
|
||||
for (size_t i = 0; i < m_blockSize; ++i) {
|
||||
dInputBuffers[i] = inputBuffers[0][i];
|
||||
rms += inputBuffers[0][i] * inputBuffers[0][i];
|
||||
}
|
||||
rms /= m_blockSize;
|
||||
rms = sqrt(rms);
|
||||
|
||||
bool isLowAmplitude = (rms < m_lowAmp);
|
||||
|
||||
Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
|
||||
delete [] dInputBuffers;
|
||||
|
||||
m_level.push_back(yo.rms);
|
||||
|
||||
// First, get the things out of the way that we don't want to output
|
||||
// immediately, but instead save for later.
|
||||
vector<pair<double, double> > tempPitchProb;
|
||||
for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
|
||||
{
|
||||
double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
|
||||
if (!isLowAmplitude)
|
||||
{
|
||||
tempPitchProb.push_back(pair<double, double>
|
||||
(tempPitch, yo.freqProb[iCandidate].second));
|
||||
} else {
|
||||
float factor = ((rms+0.01*m_lowAmp)/(1.01*m_lowAmp));
|
||||
tempPitchProb.push_back(pair<double, double>
|
||||
(tempPitch, yo.freqProb[iCandidate].second*factor));
|
||||
}
|
||||
}
|
||||
m_pitchProb.push_back(tempPitchProb);
|
||||
m_timestamp.push_back(timestamp);
|
||||
|
||||
// F0 CANDIDATES
|
||||
Feature f;
|
||||
f.hasTimestamp = true;
|
||||
f.timestamp = timestamp;
|
||||
for (size_t i = 0; i < yo.freqProb.size(); ++i)
|
||||
{
|
||||
f.values.push_back(yo.freqProb[i].first);
|
||||
}
|
||||
fs[m_oF0Candidates].push_back(f);
|
||||
|
||||
// VOICEDPROB
|
||||
f.values.clear();
|
||||
float voicedProb = 0;
|
||||
for (size_t i = 0; i < yo.freqProb.size(); ++i)
|
||||
{
|
||||
f.values.push_back(yo.freqProb[i].second);
|
||||
voicedProb += yo.freqProb[i].second;
|
||||
}
|
||||
fs[m_oF0Probs].push_back(f);
|
||||
|
||||
f.values.push_back(voicedProb);
|
||||
fs[m_oVoicedProb].push_back(f);
|
||||
|
||||
// SALIENCE -- maybe this should eventually disappear
|
||||
f.values.clear();
|
||||
float salienceSum = 0;
|
||||
for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
|
||||
{
|
||||
f.values.push_back(yo.salience[iBin]);
|
||||
salienceSum += yo.salience[iBin];
|
||||
}
|
||||
fs[m_oCandidateSalience].push_back(f);
|
||||
|
||||
return fs;
|
||||
}
|
||||
|
||||
PYinVamp::FeatureSet
|
||||
PYinVamp::getRemainingFeatures()
|
||||
{
|
||||
FeatureSet fs;
|
||||
Feature f;
|
||||
f.hasTimestamp = true;
|
||||
f.hasDuration = false;
|
||||
|
||||
if (m_pitchProb.empty()) {
|
||||
return fs;
|
||||
}
|
||||
|
||||
// MONO-PITCH STUFF
|
||||
MonoPitch mp;
|
||||
vector<float> mpOut = mp.process(m_pitchProb);
|
||||
for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
|
||||
{
|
||||
if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
|
||||
f.timestamp = m_timestamp[iFrame];
|
||||
f.values.clear();
|
||||
if (m_outputUnvoiced == 1)
|
||||
{
|
||||
f.values.push_back(fabs(mpOut[iFrame]));
|
||||
} else {
|
||||
f.values.push_back(mpOut[iFrame]);
|
||||
}
|
||||
|
||||
fs[m_oSmoothedPitchTrack].push_back(f);
|
||||
}
|
||||
|
||||
// MONO-NOTE STUFF
|
||||
// std::cerr << "Mono Note Stuff" << std::endl;
|
||||
MonoNote mn;
|
||||
std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
|
||||
for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
|
||||
std::vector<std::pair<double, double> > temp;
|
||||
if (mpOut[iFrame] > 0)
|
||||
{
|
||||
double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
|
||||
temp.push_back(std::pair<double,double>(tempPitch, .9));
|
||||
}
|
||||
smoothedPitch.push_back(temp);
|
||||
}
|
||||
// vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
|
||||
vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
|
||||
|
||||
// turning feature into a note feature
|
||||
f.hasTimestamp = true;
|
||||
f.hasDuration = true;
|
||||
f.values.clear();
|
||||
|
||||
int onsetFrame = 0;
|
||||
bool isVoiced = 0;
|
||||
bool oldIsVoiced = 0;
|
||||
size_t nFrame = m_pitchProb.size();
|
||||
|
||||
float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;
|
||||
|
||||
std::vector<float> notePitchTrack; // collects pitches for one note at a time
|
||||
for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
||||
{
|
||||
isVoiced = mnOut[iFrame].noteState < 3
|
||||
&& smoothedPitch[iFrame].size() > 0
|
||||
&& (iFrame >= nFrame-2
|
||||
|| ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity));
|
||||
// std::cerr << m_level[iFrame]/m_level[iFrame-1] << " " << isVoiced << std::endl;
|
||||
if (isVoiced && iFrame != nFrame-1)
|
||||
{
|
||||
if (oldIsVoiced == 0) // beginning of a note
|
||||
{
|
||||
onsetFrame = iFrame;
|
||||
}
|
||||
float pitch = smoothedPitch[iFrame][0].first;
|
||||
notePitchTrack.push_back(pitch); // add to the note's pitch track
|
||||
} else { // not currently voiced
|
||||
if (oldIsVoiced == 1) // end of note
|
||||
{
|
||||
// std::cerr << notePitchTrack.size() << " " << minNoteFrames << std::endl;
|
||||
if (notePitchTrack.size() >= minNoteFrames)
|
||||
{
|
||||
std::sort(notePitchTrack.begin(), notePitchTrack.end());
|
||||
float medianPitch = notePitchTrack[notePitchTrack.size()/2];
|
||||
float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
|
||||
f.values.clear();
|
||||
f.values.push_back(medianFreq);
|
||||
f.timestamp = m_timestamp[onsetFrame];
|
||||
f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
|
||||
fs[m_oNotes].push_back(f);
|
||||
}
|
||||
notePitchTrack.clear();
|
||||
}
|
||||
}
|
||||
oldIsVoiced = isVoiced;
|
||||
}
|
||||
return fs;
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _PYINVAMP_H_
|
||||
#define _PYINVAMP_H_
|
||||
|
||||
#include <vamp-sdk/Plugin.h>
|
||||
|
||||
#include "Yin.h"
|
||||
|
||||
class PYinVamp : public Vamp::Plugin
|
||||
{
|
||||
public:
|
||||
PYinVamp(float inputSampleRate);
|
||||
virtual ~PYinVamp();
|
||||
|
||||
std::string getIdentifier() const;
|
||||
std::string getName() const;
|
||||
std::string getDescription() const;
|
||||
std::string getMaker() const;
|
||||
int getPluginVersion() const;
|
||||
std::string getCopyright() const;
|
||||
|
||||
InputDomain getInputDomain() const;
|
||||
size_t getPreferredBlockSize() const;
|
||||
size_t getPreferredStepSize() const;
|
||||
size_t getMinChannelCount() const;
|
||||
size_t getMaxChannelCount() const;
|
||||
|
||||
ParameterList getParameterDescriptors() const;
|
||||
float getParameter(std::string identifier) const;
|
||||
void setParameter(std::string identifier, float value);
|
||||
|
||||
ProgramList getPrograms() const;
|
||||
std::string getCurrentProgram() const;
|
||||
void selectProgram(std::string name);
|
||||
|
||||
OutputList getOutputDescriptors() const;
|
||||
|
||||
bool initialise(size_t channels, size_t stepSize, size_t blockSize);
|
||||
void reset();
|
||||
|
||||
FeatureSet process(const float *const *inputBuffers,
|
||||
Vamp::RealTime timestamp);
|
||||
|
||||
FeatureSet getRemainingFeatures();
|
||||
|
||||
protected:
|
||||
size_t m_channels;
|
||||
size_t m_stepSize;
|
||||
size_t m_blockSize;
|
||||
float m_fmin;
|
||||
float m_fmax;
|
||||
Yin m_yin;
|
||||
|
||||
mutable int m_oF0Candidates;
|
||||
mutable int m_oF0Probs;
|
||||
mutable int m_oVoicedProb;
|
||||
mutable int m_oCandidateSalience;
|
||||
mutable int m_oSmoothedPitchTrack;
|
||||
mutable int m_oNotes;
|
||||
|
||||
float m_threshDistr;
|
||||
float m_outputUnvoiced;
|
||||
float m_preciseTime;
|
||||
float m_lowAmp;
|
||||
float m_onsetSensitivity;
|
||||
float m_pruneThresh;
|
||||
vector<vector<pair<double, double> > > m_pitchProb;
|
||||
vector<Vamp::RealTime> m_timestamp;
|
||||
vector<float> m_level;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,145 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "SparseHMM.h"
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
|
||||
const vector<double>
|
||||
SparseHMM::calculateObsProb(const vector<pair<double, double> > data)
|
||||
{
|
||||
// dummy (virtual?) implementation to be overloaded
|
||||
return(vector<double>());
|
||||
}
|
||||
|
||||
const std::vector<int>
|
||||
SparseHMM::decodeViterbi(std::vector<vector<double> > obsProb,
|
||||
vector<double> *scale)
|
||||
{
|
||||
if (obsProb.size() < 1) {
|
||||
return vector<int>();
|
||||
}
|
||||
|
||||
size_t nState = init.size();
|
||||
size_t nFrame = obsProb.size();
|
||||
|
||||
// check for consistency
|
||||
size_t nTrans = transProb.size();
|
||||
|
||||
// declaring variables
|
||||
std::vector<double> delta = std::vector<double>(nState);
|
||||
std::vector<double> oldDelta = std::vector<double>(nState);
|
||||
vector<vector<int> > psi; // "matrix" of remembered indices of the best transitions
|
||||
vector<int> path = vector<int>(nFrame, nState-1); // the final output path (current assignment arbitrary, makes sense only for Chordino, where nChord-1 is the "no chord" label)
|
||||
|
||||
double deltasum = 0;
|
||||
|
||||
// initialise first frame
|
||||
for (size_t iState = 0; iState < nState; ++iState)
|
||||
{
|
||||
oldDelta[iState] = init[iState] * obsProb[0][iState];
|
||||
// std::cerr << iState << " ----- " << init[iState] << std::endl;
|
||||
deltasum += oldDelta[iState];
|
||||
}
|
||||
|
||||
for (size_t iState = 0; iState < nState; ++iState)
|
||||
{
|
||||
oldDelta[iState] /= deltasum; // normalise (scale)
|
||||
// std::cerr << oldDelta[iState] << std::endl;
|
||||
}
|
||||
|
||||
scale->push_back(1.0/deltasum);
|
||||
psi.push_back(vector<int>(nState,0));
|
||||
|
||||
// rest of forward step
|
||||
for (size_t iFrame = 1; iFrame < nFrame; ++iFrame)
|
||||
{
|
||||
deltasum = 0;
|
||||
psi.push_back(vector<int>(nState,0));
|
||||
|
||||
// calculate best previous state for every current state
|
||||
size_t fromState;
|
||||
size_t toState;
|
||||
double currentTransProb;
|
||||
double currentValue;
|
||||
|
||||
// this is the "sparse" loop
|
||||
for (size_t iTrans = 0; iTrans < nTrans; ++iTrans)
|
||||
{
|
||||
fromState = from[iTrans];
|
||||
toState = to[iTrans];
|
||||
currentTransProb = transProb[iTrans];
|
||||
|
||||
currentValue = oldDelta[fromState] * currentTransProb;
|
||||
if (currentValue > delta[toState])
|
||||
{
|
||||
delta[toState] = currentValue; // will be multiplied by the right obs later!
|
||||
psi[iFrame][toState] = fromState;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t jState = 0; jState < nState; ++jState)
|
||||
{
|
||||
delta[jState] *= obsProb[iFrame][jState];
|
||||
deltasum += delta[jState];
|
||||
}
|
||||
|
||||
if (deltasum > 0)
|
||||
{
|
||||
for (size_t iState = 0; iState < nState; ++iState)
|
||||
{
|
||||
oldDelta[iState] = delta[iState] / deltasum; // normalise (scale)
|
||||
delta[iState] = 0;
|
||||
}
|
||||
scale->push_back(1.0/deltasum);
|
||||
} else
|
||||
{
|
||||
std::cerr << "WARNING: Viterbi has been fed some zero probabilities, at least they become zero at frame " << iFrame << " in combination with the model." << std::endl;
|
||||
for (size_t iState = 0; iState < nState; ++iState)
|
||||
{
|
||||
oldDelta[iState] = 1.0/nState;
|
||||
delta[iState] = 0;
|
||||
}
|
||||
scale->push_back(1.0);
|
||||
}
|
||||
}
|
||||
|
||||
// initialise backward step
|
||||
double bestValue = 0;
|
||||
for (size_t iState = 0; iState < nState; ++iState)
|
||||
{
|
||||
double currentValue = oldDelta[iState];
|
||||
if (currentValue > bestValue)
|
||||
{
|
||||
bestValue = currentValue;
|
||||
path[nFrame-1] = iState;
|
||||
}
|
||||
}
|
||||
|
||||
// rest of backward step
|
||||
for (int iFrame = nFrame-2; iFrame != -1; --iFrame)
|
||||
{
|
||||
path[iFrame] = psi[iFrame+1][path[iFrame+1]];
|
||||
}
|
||||
|
||||
// for (size_t iState = 0; iState < nState; ++iState)
|
||||
// {
|
||||
// // std::cerr << psi[2][iState] << std::endl;
|
||||
// }
|
||||
|
||||
return path;
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _SPARSEHMM_H_
|
||||
#define _SPARSEHMM_H_
|
||||
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
|
||||
class SparseHMM
|
||||
{
|
||||
public:
|
||||
virtual const std::vector<double> calculateObsProb(const vector<pair<double, double> >);
|
||||
const std::vector<int> decodeViterbi(std::vector<vector<double> > obs,
|
||||
vector<double> *scale);
|
||||
vector<double> init;
|
||||
vector<size_t> from;
|
||||
vector<size_t> to;
|
||||
vector<double> transProb;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,153 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "Yin.h"
|
||||
|
||||
#include "vamp-sdk/FFT.h"
|
||||
#include "MeanFilter.h"
|
||||
#include "YinUtil.h"
|
||||
|
||||
#include <vector>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
|
||||
using std::vector;
|
||||
|
||||
Yin::Yin(size_t frameSize, size_t inputSampleRate, double thresh, bool fast) :
|
||||
m_frameSize(frameSize),
|
||||
m_inputSampleRate(inputSampleRate),
|
||||
m_thresh(thresh),
|
||||
m_threshDistr(2),
|
||||
m_yinBufferSize(frameSize/2),
|
||||
m_fast(fast)
|
||||
{
|
||||
if (frameSize & (frameSize-1)) {
|
||||
// throw "N must be a power of two";
|
||||
}
|
||||
}
|
||||
|
||||
Yin::~Yin()
|
||||
{
|
||||
}
|
||||
|
||||
Yin::YinOutput
|
||||
Yin::process(const double *in) const {
|
||||
|
||||
double* yinBuffer = new double[m_yinBufferSize];
|
||||
|
||||
// calculate aperiodicity function for all periods
|
||||
if (m_fast) YinUtil::fastDifference(in, yinBuffer, m_yinBufferSize);
|
||||
else YinUtil::slowDifference(in, yinBuffer, m_yinBufferSize);
|
||||
|
||||
YinUtil::cumulativeDifference(yinBuffer, m_yinBufferSize);
|
||||
|
||||
int tau = 0;
|
||||
tau = YinUtil::absoluteThreshold(yinBuffer, m_yinBufferSize, m_thresh);
|
||||
|
||||
double interpolatedTau;
|
||||
double aperiodicity;
|
||||
double f0;
|
||||
|
||||
if (tau!=0)
|
||||
{
|
||||
interpolatedTau = YinUtil::parabolicInterpolation(yinBuffer, abs(tau), m_yinBufferSize);
|
||||
f0 = m_inputSampleRate * (1.0 / interpolatedTau);
|
||||
} else {
|
||||
interpolatedTau = 0;
|
||||
f0 = 0;
|
||||
}
|
||||
double rms = std::sqrt(YinUtil::sumSquare(in, 0, m_yinBufferSize)/m_yinBufferSize);
|
||||
aperiodicity = yinBuffer[abs(tau)];
|
||||
// std::cerr << aperiodicity << std::endl;
|
||||
if (tau < 0) f0 = -f0;
|
||||
|
||||
Yin::YinOutput yo(f0, 1-aperiodicity, rms);
|
||||
for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf)
|
||||
{
|
||||
yo.salience.push_back(yinBuffer[iBuf] < 1 ? 1-yinBuffer[iBuf] : 0); // why are the values sometimes < 0 if I don't check?
|
||||
}
|
||||
|
||||
delete [] yinBuffer;
|
||||
return yo;
|
||||
}
|
||||
|
||||
Yin::YinOutput
|
||||
Yin::processProbabilisticYin(const double *in) const {
|
||||
|
||||
double* yinBuffer = new double[m_yinBufferSize];
|
||||
|
||||
// calculate aperiodicity function for all periods
|
||||
if (m_fast) YinUtil::fastDifference(in, yinBuffer, m_yinBufferSize);
|
||||
else YinUtil::slowDifference(in, yinBuffer, m_yinBufferSize);
|
||||
|
||||
YinUtil::cumulativeDifference(yinBuffer, m_yinBufferSize);
|
||||
|
||||
vector<double> peakProbability = YinUtil::yinProb(yinBuffer, m_threshDistr, m_yinBufferSize);
|
||||
|
||||
// calculate overall "probability" from peak probability
|
||||
double probSum = 0;
|
||||
for (size_t iBin = 0; iBin < m_yinBufferSize; ++iBin)
|
||||
{
|
||||
probSum += peakProbability[iBin];
|
||||
}
|
||||
double rms = std::sqrt(YinUtil::sumSquare(in, 0, m_yinBufferSize)/m_yinBufferSize);
|
||||
Yin::YinOutput yo(0,0,rms);
|
||||
for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf)
|
||||
{
|
||||
yo.salience.push_back(peakProbability[iBuf]);
|
||||
if (peakProbability[iBuf] > 0)
|
||||
{
|
||||
double currentF0 =
|
||||
m_inputSampleRate * (1.0 /
|
||||
YinUtil::parabolicInterpolation(yinBuffer, iBuf, m_yinBufferSize));
|
||||
yo.freqProb.push_back(pair<double, double>(currentF0, peakProbability[iBuf]));
|
||||
}
|
||||
}
|
||||
|
||||
// std::cerr << yo.freqProb.size() << std::endl;
|
||||
|
||||
delete [] yinBuffer;
|
||||
return yo;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
Yin::setThreshold(double parameter)
|
||||
{
|
||||
m_thresh = static_cast<float>(parameter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
Yin::setThresholdDistr(float parameter)
|
||||
{
|
||||
m_threshDistr = static_cast<size_t>(parameter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
Yin::setFrameSize(size_t parameter)
|
||||
{
|
||||
m_frameSize = parameter;
|
||||
m_yinBufferSize = m_frameSize/2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
Yin::setFast(bool parameter)
|
||||
{
|
||||
m_fast = parameter;
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _YIN_H_
|
||||
#define _YIN_H_
|
||||
|
||||
#include "vamp-sdk/FFT.h"
|
||||
#include "MeanFilter.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <exception>
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
|
||||
|
||||
|
||||
class Yin
|
||||
{
|
||||
public:
|
||||
Yin(size_t frameSize, size_t inputSampleRate, double thresh = 0.2, bool fast = true);
|
||||
virtual ~Yin();
|
||||
|
||||
struct YinOutput {
|
||||
double f0;
|
||||
double periodicity;
|
||||
double rms;
|
||||
vector<double> salience;
|
||||
vector<pair<double, double> > freqProb;
|
||||
YinOutput() : f0(0), periodicity(0), rms(0),
|
||||
salience(vector<double>(0)), freqProb(vector<pair<double, double> >(0)) { }
|
||||
YinOutput(double _f, double _p, double _r) :
|
||||
f0(_f), periodicity(_p), rms(_r),
|
||||
salience(vector<double>(0)), freqProb(vector<pair<double, double> >(0)) { }
|
||||
YinOutput(double _f, double _p, double _r, vector<double> _salience) :
|
||||
f0(_f), periodicity(_p), rms(_r), salience(_salience),
|
||||
freqProb(vector<pair<double, double> >(0)) { }
|
||||
};
|
||||
|
||||
int setThreshold(double parameter);
|
||||
int setThresholdDistr(float parameter);
|
||||
int setFrameSize(size_t frameSize);
|
||||
int setFast(bool fast);
|
||||
// int setRemoveUnvoiced(bool frameSize);
|
||||
YinOutput process(const double *in) const;
|
||||
YinOutput processProbabilisticYin(const double *in) const;
|
||||
|
||||
private:
|
||||
mutable size_t m_frameSize;
|
||||
mutable size_t m_inputSampleRate;
|
||||
mutable double m_thresh;
|
||||
mutable size_t m_threshDistr;
|
||||
mutable size_t m_yinBufferSize;
|
||||
mutable bool m_fast;
|
||||
// mutable bool m_removeUnvoiced;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,363 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "YinUtil.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
#include <boost/math/distributions.hpp>
|
||||
|
||||
void
|
||||
YinUtil::slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize)
|
||||
{
|
||||
yinBuffer[0] = 0;
|
||||
double delta ;
|
||||
int startPoint = 0;
|
||||
int endPoint = 0;
|
||||
for (int i = 1; i < yinBufferSize; ++i) {
|
||||
yinBuffer[i] = 0;
|
||||
startPoint = yinBufferSize/2 - i/2;
|
||||
endPoint = startPoint + yinBufferSize;
|
||||
for (int j = startPoint; j < endPoint; ++j) {
|
||||
delta = in[i+j] - in[j];
|
||||
yinBuffer[i] += delta * delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
YinUtil::fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize)
|
||||
{
|
||||
|
||||
// DECLARE AND INITIALISE
|
||||
// initialisation of most of the arrays here was done in a separate function,
|
||||
// with all the arrays as members of the class... moved them back here.
|
||||
|
||||
size_t frameSize = 2 * yinBufferSize;
|
||||
|
||||
double *audioTransformedReal = new double[frameSize];
|
||||
double *audioTransformedImag = new double[frameSize];
|
||||
double *nullImag = new double[frameSize];
|
||||
double *kernel = new double[frameSize];
|
||||
double *kernelTransformedReal = new double[frameSize];
|
||||
double *kernelTransformedImag = new double[frameSize];
|
||||
double *yinStyleACFReal = new double[frameSize];
|
||||
double *yinStyleACFImag = new double[frameSize];
|
||||
double *powerTerms = new double[yinBufferSize];
|
||||
|
||||
for (size_t j = 0; j < yinBufferSize; ++j)
|
||||
{
|
||||
yinBuffer[j] = 0.; // set to zero
|
||||
powerTerms[j] = 0.; // set to zero
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < frameSize; ++j)
|
||||
{
|
||||
nullImag[j] = 0.;
|
||||
audioTransformedReal[j] = 0.;
|
||||
audioTransformedImag[j] = 0.;
|
||||
kernel[j] = 0.;
|
||||
kernelTransformedReal[j] = 0.;
|
||||
kernelTransformedImag[j] = 0.;
|
||||
yinStyleACFReal[j] = 0.;
|
||||
yinStyleACFImag[j] = 0.;
|
||||
}
|
||||
|
||||
// POWER TERM CALCULATION
|
||||
// ... for the power terms in equation (7) in the Yin paper
|
||||
powerTerms[0] = 0.0;
|
||||
for (size_t j = 0; j < yinBufferSize; ++j) {
|
||||
powerTerms[0] += in[j] * in[j];
|
||||
}
|
||||
|
||||
// now iteratively calculate all others (saves a few multiplications)
|
||||
for (size_t tau = 1; tau < yinBufferSize; ++tau) {
|
||||
powerTerms[tau] = powerTerms[tau-1] - in[tau-1] * in[tau-1] + in[tau+yinBufferSize] * in[tau+yinBufferSize];
|
||||
}
|
||||
|
||||
// YIN-STYLE AUTOCORRELATION via FFT
|
||||
// 1. data
|
||||
Vamp::FFT::forward(frameSize, in, nullImag, audioTransformedReal, audioTransformedImag);
|
||||
|
||||
// 2. half of the data, disguised as a convolution kernel
|
||||
for (size_t j = 0; j < yinBufferSize; ++j) {
|
||||
kernel[j] = in[yinBufferSize-1-j];
|
||||
}
|
||||
Vamp::FFT::forward(frameSize, kernel, nullImag, kernelTransformedReal, kernelTransformedImag);
|
||||
|
||||
// 3. convolution via complex multiplication -- written into
|
||||
for (size_t j = 0; j < frameSize; ++j) {
|
||||
yinStyleACFReal[j] = audioTransformedReal[j]*kernelTransformedReal[j] - audioTransformedImag[j]*kernelTransformedImag[j]; // real
|
||||
yinStyleACFImag[j] = audioTransformedReal[j]*kernelTransformedImag[j] + audioTransformedImag[j]*kernelTransformedReal[j]; // imaginary
|
||||
}
|
||||
Vamp::FFT::inverse(frameSize, yinStyleACFReal, yinStyleACFImag, audioTransformedReal, audioTransformedImag);
|
||||
|
||||
// CALCULATION OF difference function
|
||||
// ... according to (7) in the Yin paper.
|
||||
for (size_t j = 0; j < yinBufferSize; ++j) {
|
||||
// taking only the real part
|
||||
yinBuffer[j] = powerTerms[0] + powerTerms[j] - 2 * audioTransformedReal[j+yinBufferSize-1];
|
||||
}
|
||||
delete [] audioTransformedReal;
|
||||
delete [] audioTransformedImag;
|
||||
delete [] nullImag;
|
||||
delete [] kernel;
|
||||
delete [] kernelTransformedReal;
|
||||
delete [] kernelTransformedImag;
|
||||
delete [] yinStyleACFReal;
|
||||
delete [] yinStyleACFImag;
|
||||
delete [] powerTerms;
|
||||
}
|
||||
|
||||
void
|
||||
YinUtil::cumulativeDifference(double *yinBuffer, const size_t yinBufferSize)
|
||||
{
|
||||
size_t tau;
|
||||
|
||||
yinBuffer[0] = 1;
|
||||
|
||||
double runningSum = 0;
|
||||
|
||||
for (tau = 1; tau < yinBufferSize; ++tau) {
|
||||
runningSum += yinBuffer[tau];
|
||||
if (runningSum == 0)
|
||||
{
|
||||
yinBuffer[tau] = 1;
|
||||
} else {
|
||||
yinBuffer[tau] *= tau / runningSum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
YinUtil::absoluteThreshold(const double *yinBuffer, const size_t yinBufferSize, const double thresh)
|
||||
{
|
||||
size_t tau;
|
||||
size_t minTau = 0;
|
||||
double minVal = 1000.;
|
||||
|
||||
// using Joren Six's "loop construct" from TarsosDSP
|
||||
tau = 2;
|
||||
while (tau < yinBufferSize)
|
||||
{
|
||||
if (yinBuffer[tau] < thresh)
|
||||
{
|
||||
while (tau+1 < yinBufferSize && yinBuffer[tau+1] < yinBuffer[tau])
|
||||
{
|
||||
++tau;
|
||||
}
|
||||
return tau;
|
||||
} else {
|
||||
if (yinBuffer[tau] < minVal)
|
||||
{
|
||||
minVal = yinBuffer[tau];
|
||||
minTau = tau;
|
||||
}
|
||||
}
|
||||
++tau;
|
||||
}
|
||||
if (minTau > 0)
|
||||
{
|
||||
return -minTau;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static float uniformDist[100] = {0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000};
|
||||
static float betaDist1[100] = {0.028911,0.048656,0.061306,0.068539,0.071703,0.071877,0.069915,0.066489,0.062117,0.057199,0.052034,0.046844,0.041786,0.036971,0.032470,0.028323,0.024549,0.021153,0.018124,0.015446,0.013096,0.011048,0.009275,0.007750,0.006445,0.005336,0.004397,0.003606,0.002945,0.002394,0.001937,0.001560,0.001250,0.000998,0.000792,0.000626,0.000492,0.000385,0.000300,0.000232,0.000179,0.000137,0.000104,0.000079,0.000060,0.000045,0.000033,0.000024,0.000018,0.000013,0.000009,0.000007,0.000005,0.000003,0.000002,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000};
|
||||
static float betaDist2[100] = {0.012614,0.022715,0.030646,0.036712,0.041184,0.044301,0.046277,0.047298,0.047528,0.047110,0.046171,0.044817,0.043144,0.041231,0.039147,0.036950,0.034690,0.032406,0.030133,0.027898,0.025722,0.023624,0.021614,0.019704,0.017900,0.016205,0.014621,0.013148,0.011785,0.010530,0.009377,0.008324,0.007366,0.006497,0.005712,0.005005,0.004372,0.003806,0.003302,0.002855,0.002460,0.002112,0.001806,0.001539,0.001307,0.001105,0.000931,0.000781,0.000652,0.000542,0.000449,0.000370,0.000303,0.000247,0.000201,0.000162,0.000130,0.000104,0.000082,0.000065,0.000051,0.000039,0.000030,0.000023,0.000018,0.000013,0.000010,0.000007,0.000005,0.000004,0.000003,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000};
|
||||
static float betaDist3[100] = {0.006715,0.012509,0.017463,0.021655,0.025155,0.028031,0.030344,0.032151,0.033506,0.034458,0.035052,0.035331,0.035332,0.035092,0.034643,0.034015,0.033234,0.032327,0.031314,0.030217,0.029054,0.027841,0.026592,0.025322,0.024042,0.022761,0.021489,0.020234,0.019002,0.017799,0.016630,0.015499,0.014409,0.013362,0.012361,0.011407,0.010500,0.009641,0.008830,0.008067,0.007351,0.006681,0.006056,0.005475,0.004936,0.004437,0.003978,0.003555,0.003168,0.002814,0.002492,0.002199,0.001934,0.001695,0.001481,0.001288,0.001116,0.000963,0.000828,0.000708,0.000603,0.000511,0.000431,0.000361,0.000301,0.000250,0.000206,0.000168,0.000137,0.000110,0.000088,0.000070,0.000055,0.000043,0.000033,0.000025,0.000019,0.000014,0.000010,0.000007,0.000005,0.000004,0.000002,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000};
|
||||
static float betaDist4[100] = {0.003996,0.007596,0.010824,0.013703,0.016255,0.018501,0.020460,0.022153,0.023597,0.024809,0.025807,0.026607,0.027223,0.027671,0.027963,0.028114,0.028135,0.028038,0.027834,0.027535,0.027149,0.026687,0.026157,0.025567,0.024926,0.024240,0.023517,0.022763,0.021983,0.021184,0.020371,0.019548,0.018719,0.017890,0.017062,0.016241,0.015428,0.014627,0.013839,0.013068,0.012315,0.011582,0.010870,0.010181,0.009515,0.008874,0.008258,0.007668,0.007103,0.006565,0.006053,0.005567,0.005107,0.004673,0.004264,0.003880,0.003521,0.003185,0.002872,0.002581,0.002312,0.002064,0.001835,0.001626,0.001434,0.001260,0.001102,0.000959,0.000830,0.000715,0.000612,0.000521,0.000440,0.000369,0.000308,0.000254,0.000208,0.000169,0.000136,0.000108,0.000084,0.000065,0.000050,0.000037,0.000027,0.000019,0.000014,0.000009,0.000006,0.000004,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000};
|
||||
static float single10[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};
|
||||
static float single15[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};
|
||||
static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};
|
||||
|
||||
std::vector<double>
|
||||
YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0)
|
||||
{
|
||||
size_t minTau = 2;
|
||||
size_t maxTau = yinBufferSize;
|
||||
|
||||
// adapt period range, if necessary
|
||||
if (minTau0 > 0 && minTau0 < maxTau0) minTau = minTau0;
|
||||
if (maxTau0 > 0 && maxTau0 < yinBufferSize && maxTau0 > minTau) maxTau = maxTau0;
|
||||
|
||||
double minWeight = 0.01;
|
||||
size_t tau;
|
||||
std::vector<float> thresholds;
|
||||
std::vector<float> distribution;
|
||||
std::vector<double> peakProb = std::vector<double>(yinBufferSize);
|
||||
|
||||
size_t nThreshold = 100;
|
||||
int nThresholdInt = nThreshold;
|
||||
|
||||
for (int i = 0; i < nThresholdInt; ++i)
|
||||
{
|
||||
switch (prior) {
|
||||
case 0:
|
||||
distribution.push_back(uniformDist[i]);
|
||||
break;
|
||||
case 1:
|
||||
distribution.push_back(betaDist1[i]);
|
||||
break;
|
||||
case 2:
|
||||
distribution.push_back(betaDist2[i]);
|
||||
break;
|
||||
case 3:
|
||||
distribution.push_back(betaDist3[i]);
|
||||
break;
|
||||
case 4:
|
||||
distribution.push_back(betaDist4[i]);
|
||||
break;
|
||||
case 5:
|
||||
distribution.push_back(single10[i]);
|
||||
break;
|
||||
case 6:
|
||||
distribution.push_back(single15[i]);
|
||||
break;
|
||||
case 7:
|
||||
distribution.push_back(single20[i]);
|
||||
break;
|
||||
default:
|
||||
distribution.push_back(uniformDist[i]);
|
||||
}
|
||||
thresholds.push_back(0.01 + i*0.01);
|
||||
}
|
||||
|
||||
|
||||
int currThreshInd = nThreshold-1;
|
||||
tau = minTau;
|
||||
|
||||
// double factor = 1.0 / (0.25 * (nThresholdInt+1) * (nThresholdInt + 1)); // factor to scale down triangular weight
|
||||
size_t minInd = 0;
|
||||
float minVal = 42.f;
|
||||
// while (currThreshInd != -1 && tau < maxTau)
|
||||
// {
|
||||
// if (yinBuffer[tau] < thresholds[currThreshInd])
|
||||
// {
|
||||
// while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau])
|
||||
// {
|
||||
// tau++;
|
||||
// }
|
||||
// // tau is now local minimum
|
||||
// // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl;
|
||||
// if (yinBuffer[tau] < minVal && tau > 2){
|
||||
// minVal = yinBuffer[tau];
|
||||
// minInd = tau;
|
||||
// }
|
||||
// peakProb[tau] += distribution[currThreshInd];
|
||||
// currThreshInd--;
|
||||
// } else {
|
||||
// tau++;
|
||||
// }
|
||||
// }
|
||||
// double nonPeakProb = 1;
|
||||
// for (size_t i = minTau; i < maxTau; ++i)
|
||||
// {
|
||||
// nonPeakProb -= peakProb[i];
|
||||
// }
|
||||
//
|
||||
// std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl;
|
||||
float sumProb = 0;
|
||||
while (tau+1 < maxTau)
|
||||
{
|
||||
if (yinBuffer[tau] < thresholds[thresholds.size()-1] && yinBuffer[tau+1] < yinBuffer[tau])
|
||||
{
|
||||
while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau])
|
||||
{
|
||||
tau++;
|
||||
}
|
||||
// tau is now local minimum
|
||||
// std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl;
|
||||
if (yinBuffer[tau] < minVal && tau > 2){
|
||||
minVal = yinBuffer[tau];
|
||||
minInd = tau;
|
||||
}
|
||||
currThreshInd = nThresholdInt-1;
|
||||
while (thresholds[currThreshInd] > yinBuffer[tau] && currThreshInd > -1) {
|
||||
// std::cerr << distribution[currThreshInd] << std::endl;
|
||||
peakProb[tau] += distribution[currThreshInd];
|
||||
currThreshInd--;
|
||||
}
|
||||
// peakProb[tau] = 1 - yinBuffer[tau];
|
||||
sumProb += peakProb[tau];
|
||||
tau++;
|
||||
} else {
|
||||
tau++;
|
||||
}
|
||||
}
|
||||
|
||||
if (peakProb[minInd] > 1) {
|
||||
std::cerr << "WARNING: yin has prob > 1 ??? I'm returning all zeros instead." << std::endl;
|
||||
return(std::vector<double>(yinBufferSize));
|
||||
}
|
||||
|
||||
double nonPeakProb = 1;
|
||||
if (sumProb > 0) {
|
||||
for (size_t i = minTau; i < maxTau; ++i)
|
||||
{
|
||||
peakProb[i] = peakProb[i] / sumProb * peakProb[minInd];
|
||||
nonPeakProb -= peakProb[i];
|
||||
}
|
||||
}
|
||||
if (minInd > 0)
|
||||
{
|
||||
// std::cerr << "min set " << minVal << " " << minInd << " " << nonPeakProb << std::endl;
|
||||
peakProb[minInd] += nonPeakProb * minWeight;
|
||||
}
|
||||
|
||||
return peakProb;
|
||||
}
|
||||
|
||||
double
|
||||
YinUtil::parabolicInterpolation(const double *yinBuffer, const size_t tau, const size_t yinBufferSize)
|
||||
{
|
||||
// this is taken almost literally from Joren Six's Java implementation
|
||||
if (tau == yinBufferSize) // not valid anyway.
|
||||
{
|
||||
return static_cast<double>(tau);
|
||||
}
|
||||
|
||||
double betterTau = 0.0;
|
||||
if (tau > 0 && tau < yinBufferSize-1) {
|
||||
float s0, s1, s2;
|
||||
s0 = yinBuffer[tau-1];
|
||||
s1 = yinBuffer[tau];
|
||||
s2 = yinBuffer[tau+1];
|
||||
|
||||
double adjustment = (s2 - s0) / (2 * (2 * s1 - s2 - s0));
|
||||
|
||||
if (abs(adjustment)>1) adjustment = 0;
|
||||
|
||||
betterTau = tau + adjustment;
|
||||
} else {
|
||||
// std::cerr << "WARNING: can't do interpolation at the edge (tau = " << tau << "), will return un-interpolated value.\n";
|
||||
betterTau = tau;
|
||||
}
|
||||
return betterTau;
|
||||
}
|
||||
|
||||
double
|
||||
YinUtil::sumSquare(const double *in, const size_t start, const size_t end)
|
||||
{
|
||||
double out = 0;
|
||||
for (size_t i = start; i < end; ++i)
|
||||
{
|
||||
out += in[i] * in[i];
|
||||
}
|
||||
return out;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _YINUTIL_H_
|
||||
#define _YINUTIL_H_
|
||||
|
||||
#include "vamp-sdk/FFT.h"
|
||||
#include "MeanFilter.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <exception>
|
||||
|
||||
using std::vector;
|
||||
|
||||
class YinUtil
|
||||
{
|
||||
public:
|
||||
static double sumSquare(const double *in, const size_t startInd, const size_t endInd);
|
||||
static void difference(const double *in, double *yinBuffer, const size_t yinBufferSize);
|
||||
static void fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize);
|
||||
static void slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize);
|
||||
static void cumulativeDifference(double *yinBuffer, const size_t yinBufferSize);
|
||||
static int absoluteThreshold(const double *yinBuffer, const size_t yinBufferSize, const double thresh);
|
||||
static vector<double> yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, size_t minTau = 0, size_t maxTau = 0);
|
||||
static double parabolicInterpolation(const double *yinBuffer, const size_t tau,
|
||||
const size_t yinBufferSize);
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,367 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "YinVamp.h"
|
||||
#include "MonoNote.h"
|
||||
|
||||
#include "vamp-sdk/FFT.h"
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using Vamp::RealTime;
|
||||
|
||||
|
||||
YinVamp::YinVamp(float inputSampleRate) :
|
||||
Plugin(inputSampleRate),
|
||||
m_channels(0),
|
||||
m_stepSize(256),
|
||||
m_blockSize(2048),
|
||||
m_fmin(40),
|
||||
m_fmax(1600),
|
||||
m_yin(2048, inputSampleRate, 0.0),
|
||||
m_outNoF0(0),
|
||||
m_outNoPeriodicity(0),
|
||||
m_outNoRms(0),
|
||||
m_outNoSalience(0),
|
||||
m_yinParameter(0.15f),
|
||||
m_outputUnvoiced(2.0f)
|
||||
{
|
||||
}
|
||||
|
||||
YinVamp::~YinVamp()
|
||||
{
|
||||
}
|
||||
|
||||
string
|
||||
YinVamp::getIdentifier() const
|
||||
{
|
||||
return "yin";
|
||||
}
|
||||
|
||||
string
|
||||
YinVamp::getName() const
|
||||
{
|
||||
return "Yin";
|
||||
}
|
||||
|
||||
string
|
||||
YinVamp::getDescription() const
|
||||
{
|
||||
return "A vamp implementation of the Yin algorithm for monophonic frequency estimation.";
|
||||
}
|
||||
|
||||
string
|
||||
YinVamp::getMaker() const
|
||||
{
|
||||
return "Matthias Mauch";
|
||||
}
|
||||
|
||||
int
|
||||
YinVamp::getPluginVersion() const
|
||||
{
|
||||
// Increment this each time you release a version that behaves
|
||||
// differently from the previous one
|
||||
return 2;
|
||||
}
|
||||
|
||||
string
|
||||
YinVamp::getCopyright() const
|
||||
{
|
||||
return "GPL";
|
||||
}
|
||||
|
||||
YinVamp::InputDomain
|
||||
YinVamp::getInputDomain() const
|
||||
{
|
||||
return TimeDomain;
|
||||
}
|
||||
|
||||
size_t
|
||||
YinVamp::getPreferredBlockSize() const
|
||||
{
|
||||
return 2048;
|
||||
}
|
||||
|
||||
size_t
|
||||
YinVamp::getPreferredStepSize() const
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
size_t
|
||||
YinVamp::getMinChannelCount() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t
|
||||
YinVamp::getMaxChannelCount() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
YinVamp::ParameterList
|
||||
YinVamp::getParameterDescriptors() const
|
||||
{
|
||||
ParameterList list;
|
||||
|
||||
ParameterDescriptor d;
|
||||
d.identifier = "yinThreshold";
|
||||
d.name = "Yin threshold";
|
||||
d.description = "The greedy Yin search for a low value difference function is done once a dip lower than this threshold is reached.";
|
||||
d.unit = "";
|
||||
d.minValue = 0.025f;
|
||||
d.maxValue = 1.0f;
|
||||
d.defaultValue = 0.15f;
|
||||
d.isQuantized = true;
|
||||
d.quantizeStep = 0.025f;
|
||||
|
||||
list.push_back(d);
|
||||
|
||||
d.identifier = "outputunvoiced";
|
||||
d.valueNames.clear();
|
||||
d.name = "Output estimates classified as unvoiced?";
|
||||
d.description = ".";
|
||||
d.unit = "";
|
||||
d.minValue = 0.0f;
|
||||
d.maxValue = 2.0f;
|
||||
d.defaultValue = 2.0f;
|
||||
d.isQuantized = true;
|
||||
d.quantizeStep = 1.0f;
|
||||
d.valueNames.push_back("No");
|
||||
d.valueNames.push_back("Yes");
|
||||
d.valueNames.push_back("Yes, as negative frequencies");
|
||||
list.push_back(d);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
float
|
||||
YinVamp::getParameter(string identifier) const
|
||||
{
|
||||
if (identifier == "yinThreshold") {
|
||||
return m_yinParameter;
|
||||
}
|
||||
if (identifier == "outputunvoiced") {
|
||||
return m_outputUnvoiced;
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
void
|
||||
YinVamp::setParameter(string identifier, float value)
|
||||
{
|
||||
if (identifier == "yinThreshold")
|
||||
{
|
||||
m_yinParameter = value;
|
||||
}
|
||||
if (identifier == "outputunvoiced")
|
||||
{
|
||||
m_outputUnvoiced = value;
|
||||
}
|
||||
}
|
||||
|
||||
YinVamp::ProgramList
|
||||
YinVamp::getPrograms() const
|
||||
{
|
||||
ProgramList list;
|
||||
return list;
|
||||
}
|
||||
|
||||
string
|
||||
YinVamp::getCurrentProgram() const
|
||||
{
|
||||
return ""; // no programs
|
||||
}
|
||||
|
||||
void
|
||||
YinVamp::selectProgram(string name)
|
||||
{
|
||||
}
|
||||
|
||||
YinVamp::OutputList
|
||||
YinVamp::getOutputDescriptors() const
|
||||
{
|
||||
OutputList outputs;
|
||||
|
||||
OutputDescriptor d;
|
||||
|
||||
int outputNumber = 0;
|
||||
|
||||
d.identifier = "f0";
|
||||
d.name = "Estimated f0";
|
||||
d.description = "Estimated fundamental frequency";
|
||||
d.unit = "Hz";
|
||||
d.hasFixedBinCount = true;
|
||||
d.binCount = 1;
|
||||
d.hasKnownExtents = true;
|
||||
d.minValue = m_fmin;
|
||||
d.maxValue = 500;
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::FixedSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = false;
|
||||
outputs.push_back(d);
|
||||
m_outNoF0 = outputNumber++;
|
||||
|
||||
d.identifier = "periodicity";
|
||||
d.name = "Periodicity";
|
||||
d.description = "by-product of Yin f0 estimation";
|
||||
d.unit = "";
|
||||
d.hasFixedBinCount = true;
|
||||
d.binCount = 1;
|
||||
d.hasKnownExtents = true;
|
||||
d.minValue = 0;
|
||||
d.maxValue = 1;
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::FixedSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = false;
|
||||
outputs.push_back(d);
|
||||
m_outNoPeriodicity = outputNumber++;
|
||||
|
||||
d.identifier = "rms";
|
||||
d.name = "Root mean square";
|
||||
d.description = "Root mean square of the waveform.";
|
||||
d.unit = "";
|
||||
d.hasFixedBinCount = true;
|
||||
d.binCount = 1;
|
||||
d.hasKnownExtents = true;
|
||||
d.minValue = 0;
|
||||
d.maxValue = 1;
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::FixedSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = false;
|
||||
outputs.push_back(d);
|
||||
m_outNoRms = outputNumber++;
|
||||
|
||||
d.identifier = "salience";
|
||||
d.name = "Salience";
|
||||
d.description = "Yin Salience";
|
||||
d.hasFixedBinCount = true;
|
||||
d.binCount = m_blockSize / 2;
|
||||
d.hasKnownExtents = true;
|
||||
d.minValue = 0;
|
||||
d.maxValue = 1;
|
||||
d.isQuantized = false;
|
||||
d.sampleType = OutputDescriptor::FixedSampleRate;
|
||||
d.sampleRate = (m_inputSampleRate / m_stepSize);
|
||||
d.hasDuration = false;
|
||||
outputs.push_back(d);
|
||||
m_outNoSalience = outputNumber++;
|
||||
|
||||
return outputs;
|
||||
}
|
||||
|
||||
bool
|
||||
YinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
||||
{
|
||||
if (channels < getMinChannelCount() ||
|
||||
channels > getMaxChannelCount()) return false;
|
||||
|
||||
/*
|
||||
std::cerr << "YinVamp::initialise: channels = " << channels
|
||||
<< ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
||||
<< std::endl;
|
||||
*/
|
||||
m_channels = channels;
|
||||
m_stepSize = stepSize;
|
||||
m_blockSize = blockSize;
|
||||
|
||||
reset();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
YinVamp::reset()
|
||||
{
|
||||
m_yin.setThreshold(m_yinParameter);
|
||||
m_yin.setFrameSize(m_blockSize);
|
||||
/*
|
||||
std::cerr << "YinVamp::reset: yin threshold set to " << (m_yinParameter)
|
||||
<< ", blockSize = " << m_blockSize
|
||||
<< std::endl;
|
||||
*/
|
||||
}
|
||||
|
||||
YinVamp::FeatureSet
|
||||
YinVamp::process(const float *const *inputBuffers, RealTime timestamp)
|
||||
{
|
||||
timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/2, lrintf(m_inputSampleRate));
|
||||
FeatureSet fs;
|
||||
|
||||
double *dInputBuffers = new double[m_blockSize];
|
||||
for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
|
||||
|
||||
Yin::YinOutput yo = m_yin.process(dInputBuffers);
|
||||
// std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl;
|
||||
Feature f;
|
||||
f.hasTimestamp = true;
|
||||
f.timestamp = timestamp;
|
||||
if (m_outputUnvoiced == 0.0f)
|
||||
{
|
||||
// std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl;
|
||||
if (yo.f0 > 0 && yo.f0 < m_fmax && yo.f0 > m_fmin) {
|
||||
f.values.push_back(yo.f0);
|
||||
fs[m_outNoF0].push_back(f);
|
||||
}
|
||||
} else if (m_outputUnvoiced == 1.0f)
|
||||
{
|
||||
if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) {
|
||||
f.values.push_back(fabs(yo.f0));
|
||||
fs[m_outNoF0].push_back(f);
|
||||
}
|
||||
} else
|
||||
{
|
||||
if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) {
|
||||
f.values.push_back(yo.f0);
|
||||
fs[m_outNoF0].push_back(f);
|
||||
}
|
||||
}
|
||||
|
||||
f.values.clear();
|
||||
f.values.push_back(yo.rms);
|
||||
fs[m_outNoRms].push_back(f);
|
||||
|
||||
f.values.clear();
|
||||
for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
|
||||
{
|
||||
f.values.push_back(yo.salience[iBin]);
|
||||
}
|
||||
fs[m_outNoSalience].push_back(f);
|
||||
|
||||
f.values.clear();
|
||||
// f.values[0] = yo.periodicity;
|
||||
f.values.push_back(yo.periodicity);
|
||||
fs[m_outNoPeriodicity].push_back(f);
|
||||
|
||||
delete [] dInputBuffers;
|
||||
|
||||
return fs;
|
||||
}
|
||||
|
||||
YinVamp::FeatureSet
|
||||
YinVamp::getRemainingFeatures()
|
||||
{
|
||||
FeatureSet fs;
|
||||
return fs;
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef _YINVAMP_H_
|
||||
#define _YINVAMP_H_
|
||||
|
||||
#include <vamp-sdk/Plugin.h>
|
||||
|
||||
#include "Yin.h"
|
||||
|
||||
class YinVamp : public Vamp::Plugin
|
||||
{
|
||||
public:
|
||||
YinVamp(float inputSampleRate);
|
||||
virtual ~YinVamp();
|
||||
|
||||
std::string getIdentifier() const;
|
||||
std::string getName() const;
|
||||
std::string getDescription() const;
|
||||
std::string getMaker() const;
|
||||
int getPluginVersion() const;
|
||||
std::string getCopyright() const;
|
||||
|
||||
InputDomain getInputDomain() const;
|
||||
size_t getPreferredBlockSize() const;
|
||||
size_t getPreferredStepSize() const;
|
||||
size_t getMinChannelCount() const;
|
||||
size_t getMaxChannelCount() const;
|
||||
|
||||
ParameterList getParameterDescriptors() const;
|
||||
float getParameter(std::string identifier) const;
|
||||
void setParameter(std::string identifier, float value);
|
||||
|
||||
ProgramList getPrograms() const;
|
||||
std::string getCurrentProgram() const;
|
||||
void selectProgram(std::string name);
|
||||
|
||||
OutputList getOutputDescriptors() const;
|
||||
|
||||
bool initialise(size_t channels, size_t stepSize, size_t blockSize);
|
||||
void reset();
|
||||
|
||||
FeatureSet process(const float *const *inputBuffers,
|
||||
Vamp::RealTime timestamp);
|
||||
|
||||
FeatureSet getRemainingFeatures();
|
||||
|
||||
protected:
|
||||
size_t m_channels;
|
||||
size_t m_stepSize;
|
||||
size_t m_blockSize;
|
||||
float m_fmin;
|
||||
float m_fmax;
|
||||
Yin m_yin;
|
||||
|
||||
mutable int m_outNoF0;
|
||||
mutable int m_outNoPeriodicity;
|
||||
mutable int m_outNoRms;
|
||||
mutable int m_outNoSalience;
|
||||
|
||||
float m_yinParameter;
|
||||
float m_outputUnvoiced;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,38 @@
|
|||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
pYIN - A fundamental frequency estimator for monophonic audio
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include <vamp/vamp.h>
|
||||
#include <vamp-sdk/PluginAdapter.h>
|
||||
|
||||
#include "PYinVamp.h"
|
||||
#include "YinVamp.h"
|
||||
#include "LocalCandidatePYIN.h"
|
||||
|
||||
static Vamp::PluginAdapter<PYinVamp> pyinvampPluginAdapter;
|
||||
static Vamp::PluginAdapter<YinVamp> yinvampPluginAdapter;
|
||||
static Vamp::PluginAdapter<LocalCandidatePYIN> localCandidatePYINPluginAdapter;
|
||||
|
||||
const VampPluginDescriptor *
|
||||
vampGetPluginDescriptor(unsigned int version, unsigned int index)
|
||||
{
|
||||
if (version < 1) return 0;
|
||||
|
||||
switch (index) {
|
||||
case 0: return pyinvampPluginAdapter.getDescriptor();
|
||||
case 1: return yinvampPluginAdapter.getDescriptor();
|
||||
case 2: return localCandidatePYINPluginAdapter.getDescriptor();
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
#!/usr/bin/env python
|
||||
from waflib.extras import autowaf as autowaf
|
||||
import os
|
||||
|
||||
# Library version (UNIX style major, minor, micro)
|
||||
# major increment <=> incompatible changes
|
||||
# minor increment <=> compatible changes (additions)
|
||||
# micro increment <=> no interface changes
|
||||
VAMP_PYIN_LIB_VERSION = '0.0.0'
|
||||
|
||||
# Variables for 'waf dist'
|
||||
APPNAME = 'libardourvamppyin'
|
||||
VERSION = '1.1'
|
||||
|
||||
# Mandatory variables
|
||||
top = '.'
|
||||
out = 'build'
|
||||
|
||||
def options(opt):
|
||||
autowaf.set_options(opt)
|
||||
|
||||
def configure(conf):
|
||||
conf.load('compiler_cxx')
|
||||
autowaf.configure(conf)
|
||||
|
||||
def build(bld):
|
||||
# Library
|
||||
obj = bld(features = 'cxx cxxshlib')
|
||||
obj.source = '''
|
||||
libmain.cpp
|
||||
PYinVamp.cpp
|
||||
YinVamp.cpp
|
||||
LocalCandidatePYIN.cpp
|
||||
Yin.cpp
|
||||
YinUtil.cpp
|
||||
MonoNote.cpp
|
||||
MonoPitch.cpp
|
||||
MonoNoteParameters.cpp
|
||||
SparseHMM.cpp
|
||||
MonoNoteHMM.cpp
|
||||
MonoPitchHMM.cpp
|
||||
'''
|
||||
obj.export_includes = ['.']
|
||||
obj.includes = ['.']
|
||||
obj.name = 'libardourvamppyin'
|
||||
obj.target = 'ardourvamppyin'
|
||||
obj.use = 'libvampplugin'
|
||||
autowaf.ensure_visible_symbols (obj, True)
|
||||
obj.vnum = VAMP_PYIN_LIB_VERSION
|
||||
obj.install_path = os.path.join(bld.env['LIBDIR'], 'vamp')
|
||||
|
||||
def shutdown():
|
||||
autowaf.shutdown()
|
Loading…
Reference in New Issue