399 lines
11 KiB
C++
399 lines
11 KiB
C++
|
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||
|
|
||
|
/*
|
||
|
* ClusterMeltSegmenter.cpp
|
||
|
*
|
||
|
* Created by Mark Levy on 23/03/2006.
|
||
|
* Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
|
||
|
|
||
|
This program is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU General Public License as
|
||
|
published by the Free Software Foundation; either version 2 of the
|
||
|
License, or (at your option) any later version. See the file
|
||
|
COPYING included with this distribution for more information.
|
||
|
*/
|
||
|
|
||
|
#include <cfloat>
|
||
|
#include <cmath>
|
||
|
|
||
|
#include "ClusterMeltSegmenter.h"
|
||
|
#include "cluster_segmenter.h"
|
||
|
#include "segment.h"
|
||
|
|
||
|
#include "dsp/transforms/FFT.h"
|
||
|
#include "dsp/chromagram/ConstantQ.h"
|
||
|
#include "dsp/rateconversion/Decimator.h"
|
||
|
#include "dsp/mfcc/MFCC.h"
|
||
|
|
||
|
ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
|
||
|
window(NULL),
|
||
|
fft(NULL),
|
||
|
constq(NULL),
|
||
|
mfcc(NULL),
|
||
|
featureType(params.featureType),
|
||
|
hopSize(params.hopSize),
|
||
|
windowSize(params.windowSize),
|
||
|
fmin(params.fmin),
|
||
|
fmax(params.fmax),
|
||
|
nbins(params.nbins),
|
||
|
ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
|
||
|
nHMMStates(params.nHMMStates),
|
||
|
nclusters(params.nclusters),
|
||
|
histogramLength(params.histogramLength),
|
||
|
neighbourhoodLimit(params.neighbourhoodLimit),
|
||
|
decimator(NULL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void ClusterMeltSegmenter::initialise(int fs)
|
||
|
{
|
||
|
samplerate = fs;
|
||
|
|
||
|
if (featureType == FEATURE_TYPE_CONSTQ ||
|
||
|
featureType == FEATURE_TYPE_CHROMA) {
|
||
|
|
||
|
// run internal processing at 11025 or thereabouts
|
||
|
int internalRate = 11025;
|
||
|
int decimationFactor = samplerate / internalRate;
|
||
|
if (decimationFactor < 1) decimationFactor = 1;
|
||
|
|
||
|
// must be a power of two
|
||
|
while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
|
||
|
|
||
|
if (decimationFactor > Decimator::getHighestSupportedFactor()) {
|
||
|
decimationFactor = Decimator::getHighestSupportedFactor();
|
||
|
}
|
||
|
|
||
|
if (decimationFactor > 1) {
|
||
|
decimator = new Decimator(getWindowsize(), decimationFactor);
|
||
|
}
|
||
|
|
||
|
CQConfig config;
|
||
|
config.FS = samplerate / decimationFactor;
|
||
|
config.min = fmin;
|
||
|
config.max = fmax;
|
||
|
config.BPO = nbins;
|
||
|
config.CQThresh = 0.0054;
|
||
|
|
||
|
constq = new ConstantQ(config);
|
||
|
constq->sparsekernel();
|
||
|
|
||
|
ncoeff = constq->getK();
|
||
|
|
||
|
fft = new FFTReal(constq->getfftlength());
|
||
|
|
||
|
} else if (featureType == FEATURE_TYPE_MFCC) {
|
||
|
|
||
|
// run internal processing at 22050 or thereabouts
|
||
|
int internalRate = 22050;
|
||
|
int decimationFactor = samplerate / internalRate;
|
||
|
if (decimationFactor < 1) decimationFactor = 1;
|
||
|
|
||
|
// must be a power of two
|
||
|
while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
|
||
|
|
||
|
if (decimationFactor > Decimator::getHighestSupportedFactor()) {
|
||
|
decimationFactor = Decimator::getHighestSupportedFactor();
|
||
|
}
|
||
|
|
||
|
if (decimationFactor > 1) {
|
||
|
decimator = new Decimator(getWindowsize(), decimationFactor);
|
||
|
}
|
||
|
|
||
|
MFCCConfig config(samplerate / decimationFactor);
|
||
|
config.fftsize = 2048;
|
||
|
config.nceps = 19;
|
||
|
config.want_c0 = true;
|
||
|
|
||
|
mfcc = new MFCC(config);
|
||
|
ncoeff = config.nceps + 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
||
|
{
|
||
|
delete window;
|
||
|
delete constq;
|
||
|
delete decimator;
|
||
|
delete fft;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
ClusterMeltSegmenter::getWindowsize()
|
||
|
{
|
||
|
return static_cast<int>(windowSize * samplerate + 0.001);
|
||
|
}
|
||
|
|
||
|
int
|
||
|
ClusterMeltSegmenter::getHopsize()
|
||
|
{
|
||
|
return static_cast<int>(hopSize * samplerate + 0.001);
|
||
|
}
|
||
|
|
||
|
void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
|
||
|
{
|
||
|
if (featureType == FEATURE_TYPE_CONSTQ ||
|
||
|
featureType == FEATURE_TYPE_CHROMA) {
|
||
|
extractFeaturesConstQ(samples, nsamples);
|
||
|
} else if (featureType == FEATURE_TYPE_MFCC) {
|
||
|
extractFeaturesMFCC(samples, nsamples);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
|
||
|
{
|
||
|
if (!constq) {
|
||
|
std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
|
||
|
<< "No const-q: initialise not called?"
|
||
|
<< std::endl;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (nsamples < getWindowsize()) {
|
||
|
std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
int fftsize = constq->getfftlength();
|
||
|
|
||
|
if (!window || window->getSize() != fftsize) {
|
||
|
delete window;
|
||
|
window = new Window<double>(HammingWindow, fftsize);
|
||
|
}
|
||
|
|
||
|
vector<double> cq(ncoeff);
|
||
|
|
||
|
for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
|
||
|
|
||
|
const double *psource = samples;
|
||
|
int pcount = nsamples;
|
||
|
|
||
|
if (decimator) {
|
||
|
pcount = nsamples / decimator->getFactor();
|
||
|
double *decout = new double[pcount];
|
||
|
decimator->process(samples, decout);
|
||
|
psource = decout;
|
||
|
}
|
||
|
|
||
|
int origin = 0;
|
||
|
|
||
|
// std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
|
||
|
|
||
|
int frames = 0;
|
||
|
|
||
|
double *frame = new double[fftsize];
|
||
|
double *real = new double[fftsize];
|
||
|
double *imag = new double[fftsize];
|
||
|
double *cqre = new double[ncoeff];
|
||
|
double *cqim = new double[ncoeff];
|
||
|
|
||
|
while (origin <= pcount) {
|
||
|
|
||
|
// always need at least one fft window per block, but after
|
||
|
// that we want to avoid having any incomplete ones
|
||
|
if (origin > 0 && origin + fftsize >= pcount) break;
|
||
|
|
||
|
for (int i = 0; i < fftsize; ++i) {
|
||
|
if (origin + i < pcount) {
|
||
|
frame[i] = psource[origin + i];
|
||
|
} else {
|
||
|
frame[i] = 0.0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < fftsize/2; ++i) {
|
||
|
double value = frame[i];
|
||
|
frame[i] = frame[i + fftsize/2];
|
||
|
frame[i + fftsize/2] = value;
|
||
|
}
|
||
|
|
||
|
window->cut(frame);
|
||
|
|
||
|
fft->process(false, frame, real, imag);
|
||
|
|
||
|
constq->process(real, imag, cqre, cqim);
|
||
|
|
||
|
for (int i = 0; i < ncoeff; ++i) {
|
||
|
cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
|
||
|
}
|
||
|
++frames;
|
||
|
|
||
|
origin += fftsize/2;
|
||
|
}
|
||
|
|
||
|
delete [] cqre;
|
||
|
delete [] cqim;
|
||
|
delete [] real;
|
||
|
delete [] imag;
|
||
|
delete [] frame;
|
||
|
|
||
|
for (int i = 0; i < ncoeff; ++i) {
|
||
|
cq[i] /= frames;
|
||
|
}
|
||
|
|
||
|
if (decimator) delete[] psource;
|
||
|
|
||
|
features.push_back(cq);
|
||
|
}
|
||
|
|
||
|
void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
|
||
|
{
|
||
|
if (!mfcc) {
|
||
|
std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
|
||
|
<< "No mfcc: initialise not called?"
|
||
|
<< std::endl;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (nsamples < getWindowsize()) {
|
||
|
std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
int fftsize = mfcc->getfftlength();
|
||
|
|
||
|
vector<double> cc(ncoeff);
|
||
|
|
||
|
for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
|
||
|
|
||
|
const double *psource = samples;
|
||
|
int pcount = nsamples;
|
||
|
|
||
|
if (decimator) {
|
||
|
pcount = nsamples / decimator->getFactor();
|
||
|
double *decout = new double[pcount];
|
||
|
decimator->process(samples, decout);
|
||
|
psource = decout;
|
||
|
}
|
||
|
|
||
|
int origin = 0;
|
||
|
int frames = 0;
|
||
|
|
||
|
double *frame = new double[fftsize];
|
||
|
double *ccout = new double[ncoeff];
|
||
|
|
||
|
while (origin <= pcount) {
|
||
|
|
||
|
// always need at least one fft window per block, but after
|
||
|
// that we want to avoid having any incomplete ones
|
||
|
if (origin > 0 && origin + fftsize >= pcount) break;
|
||
|
|
||
|
for (int i = 0; i < fftsize; ++i) {
|
||
|
if (origin + i < pcount) {
|
||
|
frame[i] = psource[origin + i];
|
||
|
} else {
|
||
|
frame[i] = 0.0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
mfcc->process(frame, ccout);
|
||
|
|
||
|
for (int i = 0; i < ncoeff; ++i) {
|
||
|
cc[i] += ccout[i];
|
||
|
}
|
||
|
++frames;
|
||
|
|
||
|
origin += fftsize/2;
|
||
|
}
|
||
|
|
||
|
delete [] ccout;
|
||
|
delete [] frame;
|
||
|
|
||
|
for (int i = 0; i < ncoeff; ++i) {
|
||
|
cc[i] /= frames;
|
||
|
}
|
||
|
|
||
|
if (decimator) delete[] psource;
|
||
|
|
||
|
features.push_back(cc);
|
||
|
}
|
||
|
|
||
|
void ClusterMeltSegmenter::segment(int m)
|
||
|
{
|
||
|
nclusters = m;
|
||
|
segment();
|
||
|
}
|
||
|
|
||
|
void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
|
||
|
{
|
||
|
features = f;
|
||
|
featureType = FEATURE_TYPE_UNKNOWN;
|
||
|
}
|
||
|
|
||
|
void ClusterMeltSegmenter::segment()
|
||
|
{
|
||
|
delete constq;
|
||
|
constq = 0;
|
||
|
delete mfcc;
|
||
|
mfcc = 0;
|
||
|
delete decimator;
|
||
|
decimator = 0;
|
||
|
|
||
|
if (features.size() < histogramLength) return;
|
||
|
/*
|
||
|
std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
|
||
|
<< " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
|
||
|
*/
|
||
|
// copy the features to a native array and use the existing C segmenter...
|
||
|
double** arrFeatures = new double*[features.size()];
|
||
|
for (int i = 0; i < features.size(); i++)
|
||
|
{
|
||
|
if (featureType == FEATURE_TYPE_UNKNOWN) {
|
||
|
arrFeatures[i] = new double[features[0].size()];
|
||
|
for (int j = 0; j < features[0].size(); j++)
|
||
|
arrFeatures[i][j] = features[i][j];
|
||
|
} else {
|
||
|
arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
|
||
|
for (int j = 0; j < ncoeff; j++)
|
||
|
arrFeatures[i][j] = features[i][j];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
q = new int[features.size()];
|
||
|
|
||
|
if (featureType == FEATURE_TYPE_UNKNOWN ||
|
||
|
featureType == FEATURE_TYPE_MFCC)
|
||
|
cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
||
|
nclusters, neighbourhoodLimit);
|
||
|
else
|
||
|
constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
||
|
nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
|
||
|
|
||
|
// convert the cluster assignment sequence to a segmentation
|
||
|
makeSegmentation(q, features.size());
|
||
|
|
||
|
// de-allocate arrays
|
||
|
delete [] q;
|
||
|
for (int i = 0; i < features.size(); i++)
|
||
|
delete [] arrFeatures[i];
|
||
|
delete [] arrFeatures;
|
||
|
|
||
|
// clear the features
|
||
|
clear();
|
||
|
}
|
||
|
|
||
|
void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
|
||
|
{
|
||
|
segmentation.segments.clear();
|
||
|
segmentation.nsegtypes = nclusters;
|
||
|
segmentation.samplerate = samplerate;
|
||
|
|
||
|
Segment segment;
|
||
|
segment.start = 0;
|
||
|
segment.type = q[0];
|
||
|
|
||
|
for (int i = 1; i < len; i++)
|
||
|
{
|
||
|
if (q[i] != q[i-1])
|
||
|
{
|
||
|
segment.end = i * getHopsize();
|
||
|
segmentation.segments.push_back(segment);
|
||
|
segment.type = q[i];
|
||
|
segment.start = segment.end;
|
||
|
}
|
||
|
}
|
||
|
segment.end = len * getHopsize();
|
||
|
segmentation.segments.push_back(segment);
|
||
|
}
|
||
|
|