2011-03-02 07:37:39 -05:00
|
|
|
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
|
|
|
|
|
|
|
/*
|
|
|
|
QM DSP Library
|
|
|
|
|
|
|
|
Centre for Digital Music, Queen Mary, University of London.
|
|
|
|
This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU General Public License as
|
|
|
|
published by the Free Software Foundation; either version 2 of the
|
|
|
|
License, or (at your option) any later version. See the file
|
|
|
|
COPYING included with this distribution for more information.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <cmath>
|
|
|
|
#include <cstdlib>
|
|
|
|
#include <cstring>
|
|
|
|
|
|
|
|
#include "MFCC.h"
|
|
|
|
#include "dsp/transforms/FFT.h"
|
|
|
|
#include "base/Window.h"
|
|
|
|
|
|
|
|
MFCC::MFCC(MFCCConfig config)
|
|
|
|
{
|
|
|
|
int i,j;
|
|
|
|
|
|
|
|
/* Calculate at startup */
|
|
|
|
double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
lowestFrequency = 66.6666666;
|
|
|
|
linearFilters = 13;
|
|
|
|
linearSpacing = 66.66666666;
|
|
|
|
logFilters = 27;
|
|
|
|
logSpacing = 1.0711703;
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* FFT and analysis window sizes */
|
|
|
|
fftSize = config.fftsize;
|
|
|
|
fft = new FFTReal(fftSize);
|
|
|
|
|
|
|
|
totalFilters = linearFilters + logFilters;
|
|
|
|
logPower = config.logpower;
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
samplingRate = config.FS;
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* The number of cepstral componenents */
|
|
|
|
nceps = config.nceps;
|
|
|
|
|
|
|
|
/* Set if user want C0 */
|
|
|
|
WANT_C0 = (config.want_c0 ? 1 : 0);
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* Allocate space for feature vector */
|
|
|
|
if (WANT_C0 == 1) {
|
|
|
|
ceps = (double*)calloc(nceps+1, sizeof(double));
|
|
|
|
} else {
|
|
|
|
ceps = (double*)calloc(nceps, sizeof(double));
|
|
|
|
}
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* Allocate space for local vectors */
|
|
|
|
mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
|
|
|
|
for (i = 0; i < nceps+1; i++) {
|
2016-10-05 18:16:44 -04:00
|
|
|
mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
|
2011-03-02 07:37:39 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
|
|
|
|
for (i = 0; i < totalFilters; i++) {
|
2016-10-05 18:16:44 -04:00
|
|
|
mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
|
2011-03-02 07:37:39 -05:00
|
|
|
}
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
freqs = (double*)calloc(totalFilters+2,sizeof(double));
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
lower = (double*)calloc(totalFilters,sizeof(double));
|
|
|
|
center = (double*)calloc(totalFilters,sizeof(double));
|
|
|
|
upper = (double*)calloc(totalFilters,sizeof(double));
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
triangleHeight = (double*)calloc(totalFilters,sizeof(double));
|
|
|
|
fftFreqs = (double*)calloc(fftSize,sizeof(double));
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
for (i = 0; i < linearFilters; i++) {
|
|
|
|
freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
|
|
|
|
}
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
for (i = linearFilters; i < totalFilters+2; i++) {
|
2016-10-05 18:16:44 -04:00
|
|
|
freqs[i] = freqs[linearFilters-1] *
|
2011-03-02 07:37:39 -05:00
|
|
|
pow(logSpacing, (double)(i-linearFilters+1));
|
|
|
|
}
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* Define lower, center and upper */
|
|
|
|
memcpy(lower, freqs,totalFilters*sizeof(double));
|
|
|
|
memcpy(center, &freqs[1],totalFilters*sizeof(double));
|
|
|
|
memcpy(upper, &freqs[2],totalFilters*sizeof(double));
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
for (i=0;i<totalFilters;i++){
|
|
|
|
triangleHeight[i] = 2./(upper[i]-lower[i]);
|
|
|
|
}
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
for (i=0;i<fftSize;i++){
|
2016-10-05 18:16:44 -04:00
|
|
|
fftFreqs[i] = ((double) i / ((double) fftSize ) *
|
2011-03-02 07:37:39 -05:00
|
|
|
(double) samplingRate);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Build now the mccFilterWeight matrix */
|
|
|
|
for (i=0;i<totalFilters;i++){
|
|
|
|
|
|
|
|
for (j=0;j<fftSize;j++) {
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
|
2016-10-05 18:16:44 -04:00
|
|
|
|
|
|
|
mfccFilterWeights[i][j] = triangleHeight[i] *
|
|
|
|
(fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
|
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mfccFilterWeights[i][j] = 0.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
|
|
|
|
|
|
|
|
mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
|
2016-10-05 18:16:44 -04:00
|
|
|
+ triangleHeight[i] * (upper[i]-fftFreqs[j])
|
2011-03-02 07:37:39 -05:00
|
|
|
/ (upper[i]-center[i]);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2016-10-05 18:16:44 -04:00
|
|
|
* We calculate now mfccDCT matrix
|
2011-03-02 07:37:39 -05:00
|
|
|
* NB: +1 because of the DC component
|
|
|
|
*/
|
|
|
|
|
|
|
|
const double pi = 3.14159265358979323846264338327950288;
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
for (i = 0; i < nceps+1; i++) {
|
|
|
|
for (j = 0; j < totalFilters; j++) {
|
2016-10-05 18:16:44 -04:00
|
|
|
mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
|
2011-03-02 07:37:39 -05:00
|
|
|
* cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (j = 0; j < totalFilters; j++){
|
|
|
|
mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
|
|
|
|
}
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* The analysis window */
|
|
|
|
window = new Window<double>(config.window, fftSize);
|
|
|
|
|
|
|
|
/* Allocate memory for the FFT */
|
|
|
|
realOut = (double*)calloc(fftSize, sizeof(double));
|
|
|
|
imagOut = (double*)calloc(fftSize, sizeof(double));
|
|
|
|
|
|
|
|
earMag = (double*)calloc(totalFilters, sizeof(double));
|
|
|
|
fftMag = (double*)calloc(fftSize/2, sizeof(double));
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
free(freqs);
|
|
|
|
free(lower);
|
|
|
|
free(center);
|
|
|
|
free(upper);
|
|
|
|
free(triangleHeight);
|
|
|
|
free(fftFreqs);
|
|
|
|
}
|
|
|
|
|
|
|
|
MFCC::~MFCC()
|
|
|
|
{
|
|
|
|
int i;
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* Free the structure */
|
|
|
|
for (i = 0; i < nceps+1; i++) {
|
|
|
|
free(mfccDCTMatrix[i]);
|
|
|
|
}
|
|
|
|
free(mfccDCTMatrix);
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
for (i = 0; i < totalFilters; i++) {
|
|
|
|
free(mfccFilterWeights[i]);
|
|
|
|
}
|
|
|
|
free(mfccFilterWeights);
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* Free the feature vector */
|
|
|
|
free(ceps);
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* The analysis window */
|
|
|
|
delete window;
|
|
|
|
|
|
|
|
free(earMag);
|
|
|
|
free(fftMag);
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* Free the FFT */
|
|
|
|
free(realOut);
|
|
|
|
free(imagOut);
|
|
|
|
|
|
|
|
delete fft;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2016-10-05 18:16:44 -04:00
|
|
|
*
|
|
|
|
* Extract the MFCC on the input frame
|
|
|
|
*
|
|
|
|
*/
|
2011-03-02 07:37:39 -05:00
|
|
|
int MFCC::process(const double *inframe, double *outceps)
|
|
|
|
{
|
|
|
|
double *inputData = (double *)malloc(fftSize * sizeof(double));
|
|
|
|
for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
|
|
|
|
|
|
|
|
window->cut(inputData);
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
/* Calculate the fft on the input frame */
|
2016-10-05 18:16:44 -04:00
|
|
|
fft->forward(inputData, realOut, imagOut);
|
2011-03-02 07:37:39 -05:00
|
|
|
|
|
|
|
free(inputData);
|
|
|
|
|
|
|
|
return process(realOut, imagOut, outceps);
|
|
|
|
}
|
|
|
|
|
|
|
|
int MFCC::process(const double *real, const double *imag, double *outceps)
|
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < fftSize/2; ++i) {
|
|
|
|
fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < totalFilters; ++i) {
|
|
|
|
earMag[i] = 0.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Multiply by mfccFilterWeights */
|
|
|
|
for (i = 0; i < totalFilters; i++) {
|
|
|
|
double tmp = 0.0;
|
|
|
|
for (j = 0; j < fftSize/2; j++) {
|
|
|
|
tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
|
|
|
|
}
|
|
|
|
if (tmp > 0) earMag[i] = log10(tmp);
|
|
|
|
else earMag[i] = 0.0;
|
|
|
|
|
|
|
|
if (logPower != 1.0) {
|
|
|
|
earMag[i] = pow(earMag[i], logPower);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2016-10-05 18:16:44 -04:00
|
|
|
*
|
|
|
|
* Calculate now the cepstral coefficients
|
2011-03-02 07:37:39 -05:00
|
|
|
* with or without the DC component
|
|
|
|
*
|
|
|
|
*/
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
if (WANT_C0 == 1) {
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
for (i = 0; i < nceps+1; i++) {
|
|
|
|
double tmp = 0.;
|
|
|
|
for (j = 0; j < totalFilters; j++){
|
|
|
|
tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
|
|
|
|
}
|
|
|
|
outceps[i] = tmp;
|
|
|
|
}
|
|
|
|
}
|
2016-10-05 18:16:44 -04:00
|
|
|
else
|
|
|
|
{
|
2011-03-02 07:37:39 -05:00
|
|
|
for (i = 1; i < nceps+1; i++) {
|
|
|
|
double tmp = 0.;
|
|
|
|
for (j = 0; j < totalFilters; j++){
|
|
|
|
tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
|
|
|
|
}
|
|
|
|
outceps[i-1] = tmp;
|
|
|
|
}
|
|
|
|
}
|
2016-10-05 18:16:44 -04:00
|
|
|
|
2011-03-02 07:37:39 -05:00
|
|
|
return nceps;
|
|
|
|
}
|
|
|
|
|