00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00041
#include <plearn/vmat/ConcatColumnsVMatrix.h>
00042
#include "GaussMix.h"
00043
#include <plearn/math/pl_erf.h>
00044
#include <plearn/math/plapack.h>
00045
#include <plearn/math/random.h>
00046
#include <plearn/vmat/SubVMatrix.h>
00047
#include <plearn/vmat/VMat_maths.h>
00048
00049
namespace PLearn {
00050
using namespace std;
00051
00053
00055 GaussMix::GaussMix()
00056 :
PDistribution(),
00057 alpha_min(1e-5),
00058 epsilon(1e-6),
00059 kmeans_iterations(3),
00060 L(1),
00061 n_eigen(-1),
00062 sigma_min(1e-5),
00063 type("spherical")
00064 {
00065 nstages = 10;
00066 }
00067
00068
PLEARN_IMPLEMENT_OBJECT(
GaussMix,
00069
"Gaussian mixture, either set non-parametrically or trained by EM.",
00070
"GaussMix implements a mixture of L gaussians.\n"
00071
"There are 4 possible parametrization types:\n"
00072
" - spherical : gaussians have covar matrix = diag(sigma). Parameter used : sigma.\n"
00073
" - diagonal : gaussians have covar matrix = diag(sigma_i). Parameters used : diags.\n"
00074
" - general : gaussians have an unconstrained covariance matrix.\n"
00075
" The user specifies the number 'n_eigen' of eigenvectors kept when\n"
00076
" decomposing the covariance matrix. The remaining eigenvectors are\n"
00077
" considered as having a fixed eigenvalue equal to the next highest\n"
00078
" eigenvalue in the decomposition.\n"
00079
" - factor : (not implemented!) as in the general case, the gaussians are defined\n"
00080
" with K<=D vectors (through KxD matrix 'V'), but these need not be\n"
00081
" orthogonal/orthonormal.\n"
00082
" The covariance matrix used will be V(t)V + psi with psi a D-vector\n"
00083
" (through parameter diags).\n"
00084
"2 parameters are common to all 4 types :\n"
00085
" - alpha : the ponderation factor of the gaussians\n"
00086
" - mu : their centers\n"
00087 );
00088
00090
00092 void GaussMix::declareOptions(
OptionList& ol)
00093 {
00094
00095
00096
00097
declareOption(ol,
"L", &GaussMix::L, OptionBase::buildoption,
00098
"Number of gaussians in the mixture.");
00099
00100
declareOption(ol,
"type", &GaussMix::type, OptionBase::buildoption,
00101
"A string : 'spherical', 'diagonal', 'general', 'factor',\n"
00102
"This is the type of covariance matrix for each Gaussian.\n"
00103
" - spherical : spherical covariance matrix sigma * I\n"
00104
" - diagonal : diagonal covariance matrix\n"
00105
" - general : unconstrained covariance matrix (defined by its eigenvectors)\n"
00106
" - factor : (not implemented) represented by Ks[i] principal components\n");
00107
00108
declareOption(ol,
"n_eigen", &GaussMix::n_eigen, OptionBase::buildoption,
00109
"If type == 'general', the number of eigenvectors used for the covariance matrix.\n"
00110
"The remaining eigenvectors will be given an eigenvalue equal to the next highest\n"
00111
"eigenvalue. If set to -1, all eigenvectors will be kept.");
00112
00113
declareOption(ol,
"alpha_min", &GaussMix::alpha_min, OptionBase::buildoption,
00114
"The minimum weight for each Gaussian.");
00115
00116
declareOption(ol,
"sigma_min", &GaussMix::sigma_min, OptionBase::buildoption,
00117
"The minimum standard deviation allowed.");
00118
00119
declareOption(ol,
"epsilon", &GaussMix::epsilon, OptionBase::buildoption,
00120
"A small number to check for near-zero probabilities.");
00121
00122
declareOption(ol,
"kmeans_iterations", &GaussMix::kmeans_iterations, OptionBase::buildoption,
00123
"The maximum number of iterations performed in the initial K-means algorithm.");
00124
00125
00126
00127
declareOption(ol,
"alpha", &GaussMix::alpha, OptionBase::learntoption,
00128
"Coefficients of each gaussian.\n"
00129
"They sum to 1 and are positive: they can be interpreted as prior P(gaussian i).\n");
00130
00131
declareOption(ol,
"eigenvalues", &GaussMix::eigenvalues, OptionBase::learntoption,
00132
"The eigenvalues associated to the principal eigenvectors (element (j,k) is the\n"
00133
"k-th eigenvalue of the j-th Gaussian.");
00134
00135
declareOption(ol,
"eigenvectors", &GaussMix::eigenvectors, OptionBase::learntoption,
00136
"The principal eigenvectors of each Gaussian (for the 'general' type). They are\n"
00137
"stored in rows of the matrices.");
00138
00139
declareOption(ol,
"D", &GaussMix::D, OptionBase::learntoption,
00140
"Number of dimensions in input space.");
00141
00142
declareOption(ol,
"diags", &GaussMix::diags, OptionBase::learntoption,
00143
"The element (i,j) is the stddev of Gaussian j on the i-th dimension.");
00144
00145
declareOption(ol,
"log_coeff", &GaussMix::log_coeff, OptionBase::learntoption,
00146
"The log of the constant part in the p(x) equation: log(1/sqrt(2*pi^D * Det(C))).");
00147
00148
declareOption(ol,
"log_p_j_x", &GaussMix::log_p_j_x, OptionBase::learntoption,
00149
"The log of p(j|x), where x is the input part.");
00150
00151
declareOption(ol,
"log_p_x_j_alphaj", &GaussMix::log_p_x_j_alphaj, OptionBase::learntoption,
00152
"The log of p(x|j) * alpha_j, where x is the input part.");
00153
00154
declareOption(ol,
"mu", &GaussMix::mu, OptionBase::learntoption,
00155
"These are the centers of each Gaussian, stored in rows.");
00156
00157
declareOption(ol,
"mu_y_x", &GaussMix::mu, OptionBase::learntoption,
00158
"The expectation E[Y | x) for each Gaussian.");
00159
00160
declareOption(ol,
"n_eigen_computed", &GaussMix::n_eigen_computed, OptionBase::learntoption,
00161
"The actual number of principal components computed when type == 'general'.");
00162
00163
declareOption(ol,
"nsamples", &GaussMix::nsamples, OptionBase::learntoption,
00164
"The number of samples in the training set.");
00165
00166
declareOption(ol,
"p_j_x", &GaussMix::p_j_x, OptionBase::learntoption,
00167
"The probability p(j|x), where x is the input part (is computed by exp(log_p_j_x).");
00168
00169
declareOption(ol,
"sigma", &GaussMix::sigma, OptionBase::learntoption,
00170
"The variance in all directions, for type == 'spherical'.\n");
00171
00172
00173
00174
00175
00176
00177
00178 inherited::declareOptions(ol);
00179 }
00180
00182
00184 void GaussMix::build()
00185 {
00186 inherited::build();
00187
build_();
00188 }
00189
00191
00193 void GaussMix::build_()
00194 {
00195
alpha.
resize(
L);
00196
log_p_x_j_alphaj.
resize(
L);
00197
log_p_j_x.
resize(
L);
00198
p_j_x.
resize(
L);
00199
00200
cov_x.
resize(0);
00201
cov_y_x.
resize(0);
00202
eigenvectors.
resize(0);
00203
eigenvectors_x.
resize(0);
00204
eigenvectors_y_x.
resize(0);
00205
full_cov.
resize(0);
00206
log_coeff.
resize(0);
00207
sigma.
resize(0);
00208
y_x_mat.
resize(0);
00209
if (
type ==
"spherical") {
00210
sigma.
resize(
L);
00211 }
else if (
type ==
"diagonal") {
00212
00213 }
else if (
type ==
"general") {
00214
cov_x.
resize(
L);
00215
cov_y_x.
resize(
L);
00216
eigenvectors.
resize(
L);
00217
eigenvectors_x.
resize(
L);
00218
eigenvectors_y_x.
resize(
L);
00219
full_cov.
resize(
L);
00220
log_coeff.
resize(
L);
00221
y_x_mat.
resize(
L);
00222 }
else {
00223
PLERROR(
"In GaussMix::resizeStuffFromOptions - Not implemented for this type");
00224 }
00225
if (n_input == 0) {
00226
00227
for (
int j = 0; j <
L; j++) {
00228
log_p_j_x[j] =
log(
alpha[j]);
00229 }
00230
p_j_x <<
alpha;
00231 }
00232 PDistribution::finishConditionalBuild();
00233 }
00234
00236
00238 void GaussMix::computeMeansAndCovariances() {
00239
VMat weighted_train_set;
00240
Vec sum_columns(
L);
00241
columnSum(
posteriors, sum_columns);
00242
for (
int j = 0; j <
L; j++) {
00243
00244
if (sum_columns[j] <
epsilon) {
00245
PLWARNING(
"In GaussMix::computeMeansAndCovariances - A posterior is almost zero");
00246 }
00247
VMat weights(
columnmatrix(
updated_weights(j)));
00248 weighted_train_set =
new ConcatColumnsVMatrix(
00249
new SubVMatrix(train_set, 0, 0,
nsamples,
D), weights);
00250 weighted_train_set->defineSizes(
D, 0, 1);
00251
if (
type ==
"spherical") {
00252
Vec variance(
D);
00253
Vec center =
mu(j);
00254
computeInputMeanAndVariance(weighted_train_set,
center,
variance);
00255
sigma[j] =
sqrt(
mean(
variance));
00256
#ifdef BOUNDCHECK
00257
if (isnan(
sigma[j])) {
00258
PLWARNING(
"In GaussMix::computeMeansAndCovariances - A standard deviation is nan");
00259 }
00260
#endif
00261
}
else if (
type ==
"diagonal") {
00262
Vec variance(
D);
00263
Vec center =
mu(j);
00264
computeInputMeanAndVariance(weighted_train_set,
center,
variance);
00265
for (
int i = 0; i <
D; i++) {
00266
diags(i,j) =
sqrt(
variance[i]);
00267 }
00268 }
else if (
type ==
"general") {
00269
Mat covar(
D,
D);
00270
Vec center =
mu(j);
00271
computeInputMeanAndCovar(weighted_train_set,
center, covar);
00272
Vec eigenvals =
eigenvalues(j);
00273
eigenVecOfSymmMat(covar,
n_eigen_computed, eigenvals,
eigenvectors[j]);
00274 }
else {
00275
PLERROR(
"In GaussMix::computeMeansAndCovariances - Not implemented for this type of Gaussian");
00276 }
00277
00278 }
00279 }
00280
00282
00284 real GaussMix::computeLogLikelihood(
const Vec& y,
int j,
bool is_input)
const {
00285
static real p, sig;
00286
static Vec mu_y;
00287
static int size;
00288
static int start;
00289
static Vec eigenvals;
00290
static Mat eigenvecs;
00291
if (
type ==
"spherical" ||
type ==
"diagonal") {
00292
00293
if (is_input) {
00294 mu_y =
mu(j).subVec(0, n_input);
00295 size = n_input;
00296 start = 0;
00297 }
else {
00298 mu_y =
mu(j).subVec(n_input, n_target);
00299 size = n_target;
00300 start = n_input;
00301 }
00302 }
00303
if (
type ==
"spherical") {
00304
00305
00306 p = 0.0;
00307
for (
int k = 0;
k < size;
k++) {
00308 p +=
gauss_log_density_stddev(y[
k], mu_y[
k],
max(
sigma_min,
sigma[j]));
00309
#ifdef BOUNDCHECK
00310
if (isnan(p)) {
00311
PLWARNING(
"In GaussMix::computeLogLikelihood - Density is nan");
00312 }
00313
#endif
00314
}
00315
return p;
00316 }
else if (
type ==
"diagonal") {
00317 p = 0.0;
00318
for (
int k = 0;
k < size;
k++) {
00319 sig =
diags(
k + start,j);
00320 p +=
gauss_log_density_stddev(y[
k], mu_y[
k],
max(
sigma_min, sig));
00321 }
00322
return p;
00323 }
else if (
type ==
"general") {
00324
if (n_margin > 0)
00325
PLERROR(
"In GaussMix::computeLogLikelihood - Marginalization not implemented for the general type");
00326
00327
static real t;
00328
static Vec y_centered;
00329
static real squared_norm_y_centered;
00330
static real one_over_lambda0;
00331
static real lambda0;
00332
static real lambda;
00333
static int n_eig;
00334
if (is_input) {
00335 mu_y =
mu(j).subVec(0, n_input);
00336 size = n_input;
00337 eigenvals =
eigenvalues_x(j);
00338 eigenvecs =
eigenvectors_x[j];
00339 n_eig = n_input;
00340 }
else {
00341 size = n_target;
00342
if (n_input > 0) {
00343
00344 mu_y =
mu_y_x(j);
00345 size = n_target;
00346 eigenvals =
eigenvalues_y_x(j);
00347 eigenvecs =
eigenvectors_y_x[j];
00348 n_eig = n_target;
00349 }
else {
00350 mu_y =
mu(j);
00351 eigenvals =
eigenvalues(j);
00352 eigenvecs =
eigenvectors[j];
00353 n_eig =
n_eigen_computed;
00354 }
00355 }
00356 t =
log_coeff[j];
00357 y_centered.
resize(size);
00358 y_centered << y;
00359 y_centered -= mu_y;
00360 squared_norm_y_centered =
pownorm(y_centered);
00361
real var_min =
sigma_min*
sigma_min;
00362 lambda0 =
max(var_min, eigenvals[n_eig - 1]);
00363
#ifdef BOUNDCHECK
00364
if (lambda0 <= 0)
00365
PLERROR(
"GaussMix::computeLogLikelihood(y,%d), var_min=%g, lambda0=%g\n",j,var_min,lambda0);
00366
#endif
00367
one_over_lambda0 = 1.0 / lambda0;
00368
00369 t -= 0.5 * one_over_lambda0 * squared_norm_y_centered;
00370
for (
int k = 0;
k < n_eig - 1;
k++) {
00371
00372 lambda =
max(var_min, eigenvals[
k]);
00373
#ifdef BOUNDCHECK
00374
if (lambda<=0)
00375
PLERROR(
"GaussMix::computeLogLikelihood(y,%d), var_min=%g, lambda(%d)=%g\n",j,var_min,
k,lambda);
00376
#endif
00377
if (lambda > lambda0)
00378 t -= 0.5 * (1.0 / lambda - one_over_lambda0) *
square(
dot(eigenvecs(
k), y_centered));
00379 }
00380
return t;
00381 }
else {
00382
PLERROR(
"In GaussMix::computeLogLikelihood - Not implemented for this type of Gaussian");
00383
return 0;
00384 }
00385 }
00386
00388
00390 void GaussMix::computePosteriors() {
00391
sample_row.
resize(
D);
00392
log_likelihood_post.
resize(
L);
00393
real log_sum_likelihood;
00394
for (
int i = 0; i <
nsamples; i++) {
00395 train_set->
getSubRow(i, 0,
sample_row);
00396
00397
for (
int j = 0; j <
L; j++) {
00398
log_likelihood_post[j] =
computeLogLikelihood(
sample_row, j) +
log(
alpha[j]);
00399
#ifdef BOUNDCHECK
00400
if (isnan(
log_likelihood_post[j])) {
00401
PLWARNING(
"In GaussMix::computePosteriors - computeLogLikelihood returned nan");
00402 }
00403
#endif
00404
}
00405 log_sum_likelihood =
logadd(
log_likelihood_post);
00406
for (
int j = 0; j < L; j++) {
00407
00408
posteriors(i, j) =
exp(
log_likelihood_post[j] - log_sum_likelihood);
00409 }
00410 }
00411
00412
updateSampleWeights();
00413 }
00414
00416
00418 bool GaussMix::computeWeights() {
00419
bool replaced_gaussian =
false;
00420
if (
L==1)
alpha[0]=1;
else {
00421
alpha.
fill(0);
00422
for (
int i = 0; i <
nsamples; i++) {
00423
for (
int j = 0; j <
L; j++)
00424
alpha[j] +=
posteriors(i,j);
00425 }
00426
alpha /=
real(nsamples);
00427
for (
int j = 0; j <
L && !replaced_gaussian; j++) {
00428
if (
alpha[j] <
alpha_min) {
00429
00430
00431
replaceGaussian(j);
00432 replaced_gaussian =
true;
00433 }
00434 } }
00435
return replaced_gaussian;
00436 }
00437
00439
00441 void GaussMix::expectation(
Vec& result)
const
00442
{
00443
00444
static Vec mu_y;
00445 result.
resize(n_target);
00446
if (
type ==
"spherical" ||
type ==
"diagonal") {
00447
00448 result.
clear();
00449
for (
int j = 0; j <
L; j++) {
00450 result +=
mu(j).subVec(n_input, n_target) *
p_j_x[j];
00451 }
00452 }
else if (
type ==
"general") {
00453
if (n_margin > 0)
00454
PLERROR(
"In GaussMix::expectation - Marginalization not implemented");
00455 result.
clear();
00456
bool is_simple = (n_input == 0 && n_margin == 0);
00457
for (
int j = 0; j <
L; j++) {
00458
if (is_simple) {
00459 mu_y =
mu(j).subVec(0, n_target);
00460 }
else {
00461 mu_y =
mu_y_x(j);
00462 }
00463 result += mu_y *
p_j_x[j];
00464 }
00465 }
else {
00466
PLERROR(
"In GaussMix::expectation - Not implemented for this type");
00467 }
00468 }
00469
00471
00473 void GaussMix::forget()
00474 {
00475 stage = 0;
00476 }
00477
00479
00481 void GaussMix::generate(
Vec& x)
const
00482
{
00483
generateFromGaussian(
x, -1);
00484 }
00485
00487
00489 void GaussMix::generateFromGaussian(
Vec& s,
int given_gaussian)
const {
00490
static Vec mu_y;
00491
int j;
00492
if (given_gaussian < 0) {
00493 j =
multinomial_sample(
p_j_x);
00494 }
else {
00495 j = given_gaussian %
alpha.
length();
00496 }
00497 s.
resize(n_target);
00498
if (
type ==
"spherical") {
00499 mu_y =
mu(j).subVec(n_input, n_target);
00500
for (
int k = 0;
k < n_target;
k++) {
00501 s[
k] =
gaussian_mu_sigma(mu_y[
k],
sigma[j]);
00502 }
00503 }
else if (
type ==
"diagonal") {
00504 mu_y =
mu(j).subVec(n_input, n_target);
00505
for (
int k = 0;
k < n_target;
k++) {
00506 s[
k] =
gaussian_mu_sigma(mu_y[
k],
diags(
k + n_input,j));
00507 }
00508 }
else if (
type ==
"general") {
00509
if (n_margin > 0)
00510
PLERROR(
"In GaussMix::generateFromGaussian - Marginalization not implemented for the general type");
00511
static Vec norm;
00512
static real lambda0;
00513
static int n_eig;
00514
static Vec eigenvals;
00515
static Mat eigenvecs;
00516
static Vec mu_y;
00517
if (n_input == 0) {
00518 n_eig =
n_eigen_computed;
00519 eigenvals =
eigenvalues(j);
00520 eigenvecs =
eigenvectors[j];
00521 mu_y =
mu(j).subVec(0, n_target);
00522 }
else {
00523 n_eig = n_target;
00524 eigenvals =
eigenvalues_y_x(j);
00525 eigenvecs =
eigenvectors_y_x[j];
00526 mu_y =
mu_y_x(j);
00527 }
00528
norm.resize(n_eig - 1);
00529
fill_random_normal(
norm);
00530
real var_min =
sigma_min*
sigma_min;
00531 lambda0 =
max(var_min, eigenvals[n_eig - 1]);
00532 s.
fill(0);
00533
for (
int k = 0;
k < n_eig - 1;
k++) {
00534 s +=
sqrt(
max(var_min, eigenvals[
k]) - lambda0) *
norm[
k] * eigenvecs(
k);
00535 }
00536
norm.resize(n_target);
00537
fill_random_normal(
norm);
00538 s +=
norm *
sqrt(lambda0);
00539 s += mu_y;
00540 }
else {
00541
PLERROR(
"In GaussMix::generate - Unknown mixture type");
00542 }
00543 }
00544
00546
00548 int GaussMix::getNEigenComputed()
const {
00549
return n_eigen_computed;
00550 }
00551
00553
00555 Mat GaussMix::getEigenvectors(
int j)
const {
00556
return eigenvectors[j];
00557 }
00558
00560
00562 Vec GaussMix::getEigenvals(
int j)
const {
00563
return eigenvalues(j);
00564 }
00565
00567
00569 void GaussMix::kmeans(
VMat samples,
int nclust,
TVec<int> & clust_idx,
Mat & clust,
int maxit)
00570
00571 {
00572
int nsamples = samples.
length();
00573
Mat newclust(nclust,samples->inputsize());
00574 clust.
resize(nclust,samples->inputsize());
00575 clust_idx.
resize(nsamples);
00576
00577
Vec input(samples->inputsize());
00578
Vec target(samples->targetsize());
00579
real weight;
00580
00581
Vec samples_per_cluster(nclust);
00582
TVec<int> old_clust_idx(nsamples);
00583
bool ok=
false;
00584
00585
00586
Vec start_idx(nclust,-1.0);
00587
int val;
00588
for(
int i=0;i<nclust;i++)
00589 {
00590
bool ok=
false;
00591
while(!ok)
00592 {
00593 ok=
true;
00594
val = rand() % nsamples;
00595
for(
int j=0;j<nclust && start_idx[j]!=-1.0;j++)
00596
if(start_idx[j]==
val)
00597 {
00598 ok=
false;
00599
break;
00600 }
00601 }
00602 start_idx[i]=
val;
00603 samples->
getExample(
val,input,target,weight);
00604 clust(i)<<input;
00605 }
00606
00607
while(!ok && maxit--)
00608 {
00609 newclust.
clear();
00610 samples_per_cluster.
clear();
00611 old_clust_idx<<clust_idx;
00612
for(
int i=0;i<nsamples;i++)
00613 {
00614 samples->
getExample(i,input,target,weight);
00615
real dist,bestdist=1E300;
00616
int bestclust=0;
00617
if (nclust>1)
for(
int j=0;j<nclust;j++)
00618
if((
dist=
pownorm(clust(j)-input)) < bestdist)
00619 {
00620 bestdist=
dist;
00621 bestclust=j;
00622 }
00623 clust_idx[i]=bestclust;
00624 samples_per_cluster[bestclust] += weight;
00625 newclust(bestclust) += input * weight;
00626 }
00627
for(
int i=0; i<nclust; i++)
00628
if (samples_per_cluster[i]>0)
00629 newclust(i) /= samples_per_cluster[i];
00630 clust << newclust;
00631 ok=
true;
00632
if (nclust>1)
00633
for(
int i=0;i<nsamples;i++)
00634
if(old_clust_idx[i]!=clust_idx[i])
00635 {
00636 ok=
false;
00637
break;
00638 }
00639 }
00640 }
00641
00643
00645 real GaussMix::log_density(
const Vec& y)
const
00646
{
00647
00648
#ifdef BOUNDCHECK
00649
if (stage == 0)
00650
PLERROR(
"GaussMix::log_density was called while model was not trained");
00651
#endif
00652
log_likelihood_dens.
resize(
L);
00653
00654
for (
int j = 0; j <
L; j++) {
00655
log_likelihood_dens[j] =
computeLogLikelihood(y, j) +
log_p_j_x[j];
00656
#ifdef BOUNDCHECK
00657
if (isnan(
log_likelihood_dens[j])) {
00658
PLWARNING(
"In GaussMix::log_density - computeLogLikelihood returned nan");
00659 }
00660
#endif
00661
}
00662
return logadd(
log_likelihood_dens);
00663 }
00664
00666
00668 void GaussMix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00669 {
00670 inherited::makeDeepCopyFromShallowCopy(copies);
00671
deepCopyField(
sample_row, copies);
00672
deepCopyField(
log_likelihood_post, copies);
00673
deepCopyField(
log_likelihood_dens, copies);
00674
deepCopyField(
x_minus_mu_x, copies);
00675
deepCopyField(
mu_target, copies);
00676
deepCopyField(
eigenvalues,copies);
00677
deepCopyField(
eigenvectors,copies);
00678
deepCopyField(
diags,copies);
00679
deepCopyField(
log_coeff,copies);
00680
deepCopyField(
posteriors,copies);
00681
deepCopyField(
initial_weights,copies);
00682
deepCopyField(
updated_weights,copies);
00683
deepCopyField(
alpha,copies);
00684
deepCopyField(
mu,copies);
00685
deepCopyField(
sigma,copies);
00686
PLERROR(
"In GaussMix::makeDeepCopyFromShallowCopy - Need to complete the implementation");
00687 }
00688
00690
00692 void GaussMix::precomputeStuff() {
00693
if (
type ==
"spherical") {
00694
00695 }
else if (
type ==
"diagonal") {
00696
00697 }
else if (
type ==
"general") {
00698
00699
real var_min =
sigma_min*
sigma_min;
00700
for (
int j = 0; j <
L; j++) {
00701
real log_det = 0;
00702
for (
int k = 0;
k <
n_eigen_computed;
k++) {
00703
#ifdef BOUNDCHECK
00704
if (var_min<
epsilon &&
eigenvalues(j,
k) <
epsilon) {
00705
PLWARNING(
"In GaussMix::precomputeStuff - An eigenvalue is near zero");
00706 }
00707
#endif
00708
log_det +=
log(
max(var_min,eigenvalues(j,
k)));
00709 }
00710
if(
D - n_eigen_computed > 0) {
00711 log_det +=
log(
max(var_min,
eigenvalues(j, n_eigen_computed - 1))) * (
D - n_eigen_computed);
00712 }
00713
log_coeff[j] = - 0.5 * (
D *
log(2*3.141549) + log_det );
00714 }
00715 }
else {
00716
PLERROR(
"In GaussMix::precomputeStuff - Not implemented for this type");
00717 }
00718 }
00719
00721
00723 void GaussMix::replaceGaussian(
int j) {
00724
00725
int high = 0;
00726
for (
int k = 1;
k <
L;
k++) {
00727
if (
alpha[
k] >
alpha[high]) {
00728 high =
k;
00729 }
00730 }
00731
00732
Vec new_center =
mu(j);
00733
generateFromGaussian(new_center, high);
00734
00735
if (
type ==
"spherical") {
00736
sigma[j] =
sigma[high];
00737 }
else if (
type ==
"diagonal") {
00738
diags.
column(j) <<
diags.
column(high);
00739 }
else if (
type ==
"general") {
00740
eigenvalues(j) <<
eigenvalues(high);
00741
eigenvectors[j] <<
eigenvectors[high];
00742
log_coeff[j] =
log_coeff[high];
00743 }
else {
00744
PLERROR(
"In GaussMix::replaceGaussian - Not implemented for this type");
00745 }
00746
00747
alpha[high] /= 2.0;
00748
alpha[j] =
alpha[high];
00749 }
00750
00752
00754 void GaussMix::resetGenerator(
long g_seed)
const
00755
{
00756
manual_seed(g_seed);
00757 }
00758
00760
00762 void GaussMix::resizeStuffBeforeTraining() {
00763
nsamples = train_set->
length();
00764
D = train_set->inputsize();
00765
initial_weights.
resize(
nsamples);
00766
mu.
resize(
L,
D);
00767
posteriors.
resize(
nsamples,
L);
00768
updated_weights.
resize(
L,
nsamples);
00769
00770
diags.
resize(0,0);
00771
eigenvalues.
resize(0,0);
00772
eigenvalues_x.
resize(0,0);
00773
eigenvalues_y_x.
resize(0,0);
00774
if (
type ==
"diagonal") {
00775
diags.
resize(
D,
L);
00776 }
else if (
type ==
"general") {
00777
if (
n_eigen == -1 ||
n_eigen ==
D) {
00778
00779
n_eigen_computed =
D;
00780 }
else {
00781
n_eigen_computed =
n_eigen + 1;
00782 }
00783
for (
int i = 0; i <
L; i++) {
00784
eigenvectors[i].
resize(
n_eigen_computed,
D);
00785 }
00786
eigenvalues.
resize(L,
n_eigen_computed);
00787 }
else if (
type ==
"spherical") {
00788
00789 }
else {
00790
PLERROR(
"In GaussMix::resizeStuffBeforeTraining - Not implemented for this type");
00791 }
00792 }
00793
00795
00797 void GaussMix::setInput(
const Vec& input)
const {
00798 inherited::setInput(input);
00799
00800
00801
if (stage == 0 || n_input == 0) {
00802
00803
return;
00804 }
00805
if (
type ==
"spherical") {
00806
00807 }
else if (
type ==
"diagonal") {
00808
00809 }
else if (
type ==
"general") {
00810
00811
x_minus_mu_x.
resize(n_input);
00812
for (
int j = 0; j <
L; j++) {
00813
x_minus_mu_x << input;
00814
x_minus_mu_x -=
mu(j).subVec(0, n_input);
00815
mu_target =
mu_y_x(j);
00816
product(
mu_target,
y_x_mat[j],
x_minus_mu_x);
00817
mu_target += mu(j).
subVec(n_input, n_target);
00818 }
00819 }
else {
00820
PLERROR(
"In GaussMix::setInput - Not implemented for this type");
00821 }
00822
for (
int j = 0; j <
L; j++) {
00823
log_p_x_j_alphaj[j] =
computeLogLikelihood(input, j,
true) +
log(
alpha[j]);
00824 }
00825
real log_p_x =
logadd(
log_p_x_j_alphaj);
00826
real t;
00827
for (
int j = 0; j < L; j++) {
00828 t =
log_p_x_j_alphaj[j] - log_p_x;
00829
log_p_j_x[j] = t;
00830
p_j_x[j] =
exp(t);
00831 }
00832 }
00833
00835
00837 void GaussMix::train()
00838 {
00839
00840
if (stage >= nstages) {
00841
PLWARNING(
"In GaussMix::train - The learner is already trained");
00842
return;
00843 }
00844
00845
00846
if (!train_set) {
00847
PLERROR(
"In GaussMix::train - No training set specified");
00848 }
00849
00850
00851
TVec<int> old_flags;
00852
bool restore_flags = ensureFullJointDistribution(old_flags);
00853
00854
00855
resizeStuffBeforeTraining();
00856
00857
if (stage == 0) {
00858
00859
if (train_set->weightsize() <= 0) {
00860
initial_weights.
fill(1);
00861 }
else {
00862
Vec tmp1;
00863
Vec tmp2;
00864
real w;
00865
for (
int i = 0; i <
nsamples; i++) {
00866 train_set->
getExample(i, tmp1, tmp2, w);
00867
initial_weights[i] = w;
00868 }
00869 }
00870
00871
TVec<int> clust_idx;
00872
kmeans(train_set,
L, clust_idx,
mu,
kmeans_iterations);
00873
posteriors.
fill(0);
00874
for (
int i = 0; i <
nsamples; i++) {
00875
00876
00877
posteriors(i, clust_idx[i]) = 1;
00878 }
00879
updateSampleWeights();
00880
computeWeights();
00881
computeMeansAndCovariances();
00882
precomputeStuff();
00883 }
00884
00885
Vec sample(
D);
00886
bool replaced_gaussian =
false;
00887
ProgressBar* pb = 0;
00888
int n_steps = nstages - stage;
00889
if (report_progress) {
00890 pb =
new ProgressBar(
"Training GaussMix", n_steps);
00891 }
00892
while (stage < nstages) {
00893
do {
00894
computePosteriors();
00895 replaced_gaussian =
computeWeights();
00896 }
while (replaced_gaussian);
00897
computeMeansAndCovariances();
00898
precomputeStuff();
00899 stage++;
00900
if (pb) {
00901 pb->
update(n_steps - nstages + stage);
00902 }
00903 }
00904
if (pb)
00905
delete pb;
00906
00907
if (restore_flags) {
00908 setConditionalFlags(old_flags);
00909 }
00910
00911
00912
log_p_j_x.
clear();
00913
00914
build();
00915 }
00916
00918
00920 void GaussMix::updateFromConditionalSorting() {
00921
static Mat inv_cov_x, tmp_cov, work_mat1, work_mat2;
00922
static Vec eigenvals;
00923
00924 sortFromFlags(
mu);
00925
if (
type ==
"spherical") {
00926
00927 }
else if (
type ==
"diagonal") {
00928 sortFromFlags(
diags,
false,
true);
00929 }
else if (
type ==
"general") {
00930
if (n_margin > 0) {
00931
PLERROR(
"In GaussMix::updateFromConditionalSorting - Marginalization not implemented for the general type");
00932 }
00933
if ((n_input == 0 && n_margin == 0) || stage == 0) {
00934
00935
return;
00936 }
00937 tmp_cov.
resize(
D,
D);
00938
real var_min =
square(
sigma_min);
00939
real lambda0;
00940
eigenvalues_x.
resize(
L, n_input);
00941
eigenvalues_y_x.
resize(
L, n_target);
00942 inv_cov_x.
resize(n_input, n_input);
00943
mu_y_x.
resize(
L, n_target);
00944 work_mat1.
resize(n_target, n_input);
00945 work_mat2.
resize(n_target, n_target);
00946
for (
int j = 0; j <
L; j++) {
00947
00948 sortFromFlags(
eigenvectors[j]);
00949
00950
00951
00952
00953
00954
full_cov[j].
resize(
D,
D);
00955 tmp_cov =
full_cov[j];
00956 eigenvals =
eigenvalues(j);
00957 lambda0 =
max(var_min, eigenvals[
n_eigen_computed - 1]);
00958 tmp_cov.
clear();
00959
for (
int k = 0;
k < n_eigen_computed - 1;
k++) {
00960 tmp_cov += (
max(var_min, eigenvals[
k]) - lambda0) *
00961
product(
columnmatrix(
eigenvectors[j](
k)),
00962
rowmatrix(
eigenvectors[j](
k)));
00963 }
00964
for (
int i = 0; i <
D; i++) {
00965 tmp_cov(i,i) += lambda0;
00966 }
00967
00968
cov_x[j] = full_cov[j].subMat(0, 0, n_input, n_input);
00969
00970
eigenvectors_x[j].
resize(n_input, n_input);
00971 eigenvals =
eigenvalues_x(j);
00972
eigenVecOfSymmMat(
cov_x[j], n_input, eigenvals,
eigenvectors_x[j]);
00973
00974 inv_cov_x.
clear();
00975 lambda0 =
max(var_min, eigenvals[n_input - 1]);
00976
real one_over_lambda0 = 1 / lambda0;
00977
for (
int k = 0;
k < n_input - 1;
k++) {
00978 inv_cov_x += (1 /
max(var_min, eigenvals[
k]) - one_over_lambda0) *
00979
product(
columnmatrix(
eigenvectors_x[j](
k)),
00980
rowmatrix(
eigenvectors_x[j](
k)));
00981 }
00982
for (
int i = 0; i < n_input; i++) {
00983 inv_cov_x(i,i) += one_over_lambda0;
00984 }
00985
00986
cov_y_x[j].
resize(n_target, n_target);
00987
cov_y_x[j] << full_cov[j].subMat(n_input, n_input, n_target, n_target);
00988 tmp_cov = full_cov[j].
subMat(n_input, 0, n_target, n_input);
00989
product(work_mat1, tmp_cov, inv_cov_x);
00990
productTranspose(work_mat2, work_mat1, tmp_cov);
00991
cov_y_x[j] -= work_mat2;
00992
y_x_mat[j].
resize(n_target, n_input);
00993
y_x_mat[j] << work_mat1;
00994
00995
eigenvectors_y_x[j].
resize(n_target, n_target);
00996 eigenvals =
eigenvalues_y_x(j);
00997
eigenVecOfSymmMat(
cov_y_x[j], n_target, eigenvals,
eigenvectors_y_x[j]);
00998 }
00999 }
else {
01000
PLERROR(
"In GaussMix::updateFromConditionalSorting - Not implemented for this type");
01001 }
01002 }
01003
01005
01007 void GaussMix::updateSampleWeights() {
01008
for (
int j = 0; j <
L; j++) {
01009
updated_weights(j) <<
initial_weights;
01010
columnmatrix(
updated_weights(j)) *=
posteriors.
column(j);
01011 }
01012 }
01013
01015
01017 real GaussMix::survival_fn(
const Vec& x)
const
01018
{
01019
PLERROR(
"survival_fn not implemented for GaussMix");
return 0.0;
01020 }
01021
01023
01025 real GaussMix::cdf(
const Vec& x)
const
01026
{
01027
PLERROR(
"cdf not implemented for GaussMix");
return 0.0;
01028 }
01029
01031
01033 void GaussMix::variance(
Mat& cov)
const
01034
{
01035
PLERROR(
"variance not implemented for GaussMix");
01036 }
01037
01038 }
01039