226 lines
5.0 KiB
C
226 lines
5.0 KiB
C
|
/*
|
||
|
* cluster.c
|
||
|
* cluster_melt
|
||
|
*
|
||
|
* Created by Mark Levy on 21/02/2006.
|
||
|
* Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
|
||
|
|
||
|
This program is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU General Public License as
|
||
|
published by the Free Software Foundation; either version 2 of the
|
||
|
License, or (at your option) any later version. See the file
|
||
|
COPYING included with this distribution for more information.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
|
||
|
#include "cluster_melt.h"
|
||
|
|
||
|
#define DEFAULT_LAMBDA 0.02;
|
||
|
#define DEFAULT_LIMIT 20;
|
||
|
|
||
|
double kldist(double* a, double* b, int n) {
|
||
|
/* NB assume that all a[i], b[i] are non-negative
|
||
|
because a, b represent probability distributions */
|
||
|
double q, d;
|
||
|
int i;
|
||
|
|
||
|
d = 0;
|
||
|
for (i = 0; i < n; i++)
|
||
|
{
|
||
|
q = (a[i] + b[i]) / 2.0;
|
||
|
if (q > 0)
|
||
|
{
|
||
|
if (a[i] > 0)
|
||
|
d += a[i] * log(a[i] / q);
|
||
|
if (b[i] > 0)
|
||
|
d += b[i] * log(b[i] / q);
|
||
|
}
|
||
|
}
|
||
|
return d;
|
||
|
}
|
||
|
|
||
|
void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
|
||
|
double lambda, sum, beta, logsumexp, maxlp;
|
||
|
int i, j, a, b, b0, b1, limit, B, it, maxiter, maxiter0, maxiter1;
|
||
|
double** cl; /* reference histograms for each cluster */
|
||
|
int** nc; /* neighbour counts for each histogram */
|
||
|
double** lp; /* soft assignment probs for each histogram */
|
||
|
int* oldc; /* previous hard assignments (to check convergence) */
|
||
|
|
||
|
/* NB h is passed as a 1d row major array */
|
||
|
|
||
|
/* parameter values */
|
||
|
lambda = DEFAULT_LAMBDA;
|
||
|
if (l > 0)
|
||
|
limit = l;
|
||
|
else
|
||
|
limit = DEFAULT_LIMIT; /* use default if no valid neighbourhood limit supplied */
|
||
|
B = 2 * limit + 1;
|
||
|
maxiter0 = 20; /* number of iterations at initial temperature */
|
||
|
maxiter1 = 5; /* number of iterations at subsequent temperatures */
|
||
|
|
||
|
/* allocate memory */
|
||
|
cl = (double**) malloc(k*sizeof(double*));
|
||
|
for (i= 0; i < k; i++)
|
||
|
cl[i] = (double*) malloc(m*sizeof(double));
|
||
|
|
||
|
nc = (int**) malloc(n*sizeof(int*));
|
||
|
for (i= 0; i < n; i++)
|
||
|
nc[i] = (int*) malloc(k*sizeof(int));
|
||
|
|
||
|
lp = (double**) malloc(n*sizeof(double*));
|
||
|
for (i= 0; i < n; i++)
|
||
|
lp[i] = (double*) malloc(k*sizeof(double));
|
||
|
|
||
|
oldc = (int*) malloc(n * sizeof(int));
|
||
|
|
||
|
/* initialise */
|
||
|
for (i = 0; i < k; i++)
|
||
|
{
|
||
|
sum = 0;
|
||
|
for (j = 0; j < m; j++)
|
||
|
{
|
||
|
cl[i][j] = rand(); /* random initial reference histograms */
|
||
|
sum += cl[i][j] * cl[i][j];
|
||
|
}
|
||
|
sum = sqrt(sum);
|
||
|
for (j = 0; j < m; j++)
|
||
|
{
|
||
|
cl[i][j] /= sum; /* normalise */
|
||
|
}
|
||
|
}
|
||
|
//print_array(cl, k, m);
|
||
|
|
||
|
for (i = 0; i < n; i++)
|
||
|
c[i] = 1; /* initially assign all histograms to cluster 1 */
|
||
|
|
||
|
for (a = 0; a < t; a++)
|
||
|
{
|
||
|
beta = Bsched[a];
|
||
|
|
||
|
if (a == 0)
|
||
|
maxiter = maxiter0;
|
||
|
else
|
||
|
maxiter = maxiter1;
|
||
|
|
||
|
for (it = 0; it < maxiter; it++)
|
||
|
{
|
||
|
//if (it == maxiter - 1)
|
||
|
// mexPrintf("hasn't converged after %d iterations\n", maxiter);
|
||
|
|
||
|
for (i = 0; i < n; i++)
|
||
|
{
|
||
|
/* save current hard assignments */
|
||
|
oldc[i] = c[i];
|
||
|
|
||
|
/* calculate soft assignment logprobs for each cluster */
|
||
|
sum = 0;
|
||
|
for (j = 0; j < k; j++)
|
||
|
{
|
||
|
lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m);
|
||
|
|
||
|
/* update matching neighbour counts for this histogram, based on current hard assignments */
|
||
|
/* old version:
|
||
|
nc[i][j] = 0;
|
||
|
if (i >= limit && i <= n - 1 - limit)
|
||
|
{
|
||
|
for (b = i - limit; b <= i + limit; b++)
|
||
|
{
|
||
|
if (c[b] == j+1)
|
||
|
nc[i][j]++;
|
||
|
}
|
||
|
nc[i][j] = B - nc[i][j];
|
||
|
}
|
||
|
*/
|
||
|
b0 = i - limit;
|
||
|
if (b0 < 0)
|
||
|
b0 = 0;
|
||
|
b1 = i + limit;
|
||
|
if (b1 >= n)
|
||
|
b1 = n - 1;
|
||
|
nc[i][j] = b1 - b0 + 1; /* = B except at edges */
|
||
|
for (b = b0; b <= b1; b++)
|
||
|
if (c[b] == j+1)
|
||
|
nc[i][j]--;
|
||
|
|
||
|
sum += exp(lp[i][j]);
|
||
|
}
|
||
|
|
||
|
/* normalise responsibilities and add duration logprior */
|
||
|
logsumexp = log(sum);
|
||
|
for (j = 0; j < k; j++)
|
||
|
lp[i][j] -= logsumexp + lambda * nc[i][j];
|
||
|
}
|
||
|
//print_array(lp, n, k);
|
||
|
/*
|
||
|
for (i = 0; i < n; i++)
|
||
|
{
|
||
|
for (j = 0; j < k; j++)
|
||
|
mexPrintf("%d ", nc[i][j]);
|
||
|
mexPrintf("\n");
|
||
|
}
|
||
|
*/
|
||
|
|
||
|
|
||
|
/* update the assignments now that we know the duration priors
|
||
|
based on the current assignments */
|
||
|
for (i = 0; i < n; i++)
|
||
|
{
|
||
|
maxlp = lp[i][0];
|
||
|
c[i] = 1;
|
||
|
for (j = 1; j < k; j++)
|
||
|
if (lp[i][j] > maxlp)
|
||
|
{
|
||
|
maxlp = lp[i][j];
|
||
|
c[i] = j+1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* break if assignments haven't changed */
|
||
|
i = 0;
|
||
|
while (i < n && oldc[i] == c[i])
|
||
|
i++;
|
||
|
if (i == n)
|
||
|
break;
|
||
|
|
||
|
/* update reference histograms now we know new responsibilities */
|
||
|
for (j = 0; j < k; j++)
|
||
|
{
|
||
|
for (b = 0; b < m; b++)
|
||
|
{
|
||
|
cl[j][b] = 0;
|
||
|
for (i = 0; i < n; i++)
|
||
|
{
|
||
|
cl[j][b] += exp(lp[i][j]) * h[i*m+b];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sum = 0;
|
||
|
for (i = 0; i < n; i++)
|
||
|
sum += exp(lp[i][j]);
|
||
|
for (b = 0; b < m; b++)
|
||
|
cl[j][b] /= sum; /* normalise */
|
||
|
}
|
||
|
|
||
|
//print_array(cl, k, m);
|
||
|
//mexPrintf("\n\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* free memory */
|
||
|
for (i = 0; i < k; i++)
|
||
|
free(cl[i]);
|
||
|
free(cl);
|
||
|
for (i = 0; i < n; i++)
|
||
|
free(nc[i]);
|
||
|
free(nc);
|
||
|
for (i = 0; i < n; i++)
|
||
|
free(lp[i]);
|
||
|
free(lp);
|
||
|
free(oldc);
|
||
|
}
|
||
|
|
||
|
|