PLearn: GaussMix.cc Source File

00001 // -*- C++ -*- 00002 00003 // GaussMix.cc 00004 // 00005 // Copyright (C) 2003 Julien Keable 00006 // Copyright (C) 2004 Université de Montréal 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 /* ******************************************************* 00037 * $Id: GaussMix.cc,v 1.41 2004/07/21 20:22:36 tihocan Exp $ 00038 ******************************************************* */ 00039 00041 #include <plearn/vmat/ConcatColumnsVMatrix.h> 00042 #include "GaussMix.h" 00043 #include <plearn/math/pl_erf.h> 00044 #include <plearn/math/plapack.h> 00045 #include <plearn/math/random.h> 00046 #include <plearn/vmat/SubVMatrix.h> 00047 #include <plearn/vmat/VMat_maths.h> 00048 00049 namespace PLearn { 00050 using namespace std; 00051 00053 // GaussMix // 00055 GaussMix::GaussMix() 00056 : PDistribution(), 00057 alpha_min(1e-5), 00058 epsilon(1e-6), 00059 kmeans_iterations(3), 00060 L(1), 00061 n_eigen(-1), 00062 sigma_min(1e-5), 00063 type("spherical") 00064 { 00065 nstages = 10; 00066 } 00067 00068 PLEARN_IMPLEMENT_OBJECT(GaussMix, 00069 "Gaussian mixture, either set non-parametrically or trained by EM.", 00070 "GaussMix implements a mixture of L gaussians.\n" 00071 "There are 4 possible parametrization types:\n" 00072 " - spherical : gaussians have covar matrix = diag(sigma). Parameter used : sigma.\n" 00073 " - diagonal : gaussians have covar matrix = diag(sigma_i). Parameters used : diags.\n" 00074 " - general : gaussians have an unconstrained covariance matrix.\n" 00075 " The user specifies the number 'n_eigen' of eigenvectors kept when\n" 00076 " decomposing the covariance matrix. The remaining eigenvectors are\n" 00077 " considered as having a fixed eigenvalue equal to the next highest\n" 00078 " eigenvalue in the decomposition.\n" 00079 " - factor : (not implemented!) as in the general case, the gaussians are defined\n" 00080 " with K<=D vectors (through KxD matrix 'V'), but these need not be\n" 00081 " orthogonal/orthonormal.\n" 00082 " The covariance matrix used will be V(t)V + psi with psi a D-vector\n" 00083 " (through parameter diags).\n" 00084 "2 parameters are common to all 4 types :\n" 00085 " - alpha : the ponderation factor of the gaussians\n" 00086 " - mu : their centers\n" 00087 ); 00088 00090 // declareOptions // 00092 void GaussMix::declareOptions(OptionList& ol) 00093 { 00094 00095 // Build options. 00096 00097 declareOption(ol,"L", &GaussMix::L, OptionBase::buildoption, 00098 "Number of gaussians in the mixture."); 00099 00100 declareOption(ol,"type", &GaussMix::type, OptionBase::buildoption, 00101 "A string : 'spherical', 'diagonal', 'general', 'factor',\n" 00102 "This is the type of covariance matrix for each Gaussian.\n" 00103 " - spherical : spherical covariance matrix sigma * I\n" 00104 " - diagonal : diagonal covariance matrix\n" 00105 " - general : unconstrained covariance matrix (defined by its eigenvectors)\n" 00106 " - factor : (not implemented) represented by Ks[i] principal components\n"); 00107 00108 declareOption(ol,"n_eigen", &GaussMix::n_eigen, OptionBase::buildoption, 00109 "If type == 'general', the number of eigenvectors used for the covariance matrix.\n" 00110 "The remaining eigenvectors will be given an eigenvalue equal to the next highest\n" 00111 "eigenvalue. If set to -1, all eigenvectors will be kept."); 00112 00113 declareOption(ol,"alpha_min", &GaussMix::alpha_min, OptionBase::buildoption, 00114 "The minimum weight for each Gaussian."); 00115 00116 declareOption(ol,"sigma_min", &GaussMix::sigma_min, OptionBase::buildoption, 00117 "The minimum standard deviation allowed."); 00118 00119 declareOption(ol,"epsilon", &GaussMix::epsilon, OptionBase::buildoption, 00120 "A small number to check for near-zero probabilities."); 00121 00122 declareOption(ol,"kmeans_iterations", &GaussMix::kmeans_iterations, OptionBase::buildoption, 00123 "The maximum number of iterations performed in the initial K-means algorithm."); 00124 00125 // Learnt options. 00126 00127 declareOption(ol, "alpha", &GaussMix::alpha, OptionBase::learntoption, 00128 "Coefficients of each gaussian.\n" 00129 "They sum to 1 and are positive: they can be interpreted as prior P(gaussian i).\n"); 00130 00131 declareOption(ol, "eigenvalues", &GaussMix::eigenvalues, OptionBase::learntoption, 00132 "The eigenvalues associated to the principal eigenvectors (element (j,k) is the\n" 00133 "k-th eigenvalue of the j-th Gaussian."); 00134 00135 declareOption(ol, "eigenvectors", &GaussMix::eigenvectors, OptionBase::learntoption, 00136 "The principal eigenvectors of each Gaussian (for the 'general' type). They are\n" 00137 "stored in rows of the matrices."); 00138 00139 declareOption(ol,"D", &GaussMix::D, OptionBase::learntoption, 00140 "Number of dimensions in input space."); 00141 00142 declareOption(ol,"diags", &GaussMix::diags, OptionBase::learntoption, 00143 "The element (i,j) is the stddev of Gaussian j on the i-th dimension."); 00144 00145 declareOption(ol, "log_coeff", &GaussMix::log_coeff, OptionBase::learntoption, 00146 "The log of the constant part in the p(x) equation: log(1/sqrt(2*pi^D * Det(C)))."); 00147 00148 declareOption(ol, "log_p_j_x", &GaussMix::log_p_j_x, OptionBase::learntoption, 00149 "The log of p(j|x), where x is the input part."); 00150 00151 declareOption(ol, "log_p_x_j_alphaj", &GaussMix::log_p_x_j_alphaj, OptionBase::learntoption, 00152 "The log of p(x|j) * alpha_j, where x is the input part."); 00153 00154 declareOption(ol, "mu", &GaussMix::mu, OptionBase::learntoption, 00155 "These are the centers of each Gaussian, stored in rows."); 00156 00157 declareOption(ol, "mu_y_x", &GaussMix::mu, OptionBase::learntoption, 00158 "The expectation E[Y | x) for each Gaussian."); 00159 00160 declareOption(ol, "n_eigen_computed", &GaussMix::n_eigen_computed, OptionBase::learntoption, 00161 "The actual number of principal components computed when type == 'general'."); 00162 00163 declareOption(ol, "nsamples", &GaussMix::nsamples, OptionBase::learntoption, 00164 "The number of samples in the training set."); 00165 00166 declareOption(ol, "p_j_x", &GaussMix::p_j_x, OptionBase::learntoption, 00167 "The probability p(j|x), where x is the input part (is computed by exp(log_p_j_x)."); 00168 00169 declareOption(ol, "sigma", &GaussMix::sigma, OptionBase::learntoption, 00170 "The variance in all directions, for type == 'spherical'.\n"); 00171 00172 // TODO What to do with these options. 00173 00174 /* declareOption(ol, "V_idx", &GaussMix::V_idx, OptionBase::buildoption, 00175 "Used for general and factore gaussians : A vector of size L. V_idx[l] is the row index of the first vector of gaussian 'l' in the matrix 'V' (also used to index vector 'lambda')"); */ 00176 00177 // Now call the parent class' declareOptions 00178 inherited::declareOptions(ol); 00179 } 00180 00182 // build // 00184 void GaussMix::build() 00185 { 00186 inherited::build(); 00187 build_(); 00188 } 00189 00191 // build_ // 00193 void GaussMix::build_() 00194 { 00195 alpha.resize(L); 00196 log_p_x_j_alphaj.resize(L); 00197 log_p_j_x.resize(L); 00198 p_j_x.resize(L); 00199 // Those are not used for every type: 00200 cov_x.resize(0); 00201 cov_y_x.resize(0); 00202 eigenvectors.resize(0); 00203 eigenvectors_x.resize(0); 00204 eigenvectors_y_x.resize(0); 00205 full_cov.resize(0); 00206 log_coeff.resize(0); 00207 sigma.resize(0); 00208 y_x_mat.resize(0); 00209 if (type == "spherical") { 00210 sigma.resize(L); 00211 } else if (type == "diagonal") { 00212 // Nothing to resize here. 00213 } else if (type == "general") { 00214 cov_x.resize(L); 00215 cov_y_x.resize(L); 00216 eigenvectors.resize(L); 00217 eigenvectors_x.resize(L); 00218 eigenvectors_y_x.resize(L); 00219 full_cov.resize(L); 00220 log_coeff.resize(L); 00221 y_x_mat.resize(L); 00222 } else { 00223 PLERROR("In GaussMix::resizeStuffFromOptions - Not implemented for this type"); 00224 } 00225 if (n_input == 0) { 00226 // No input part: the p_j_x must be obtained from the alpha. 00227 for (int j = 0; j < L; j++) { 00228 log_p_j_x[j] = log(alpha[j]); 00229 } 00230 p_j_x << alpha; 00231 } 00232 PDistribution::finishConditionalBuild(); 00233 } 00234 00236 // computeMeansAndCovariances // 00238 void GaussMix::computeMeansAndCovariances() { 00239 VMat weighted_train_set; 00240 Vec sum_columns(L); 00241 columnSum(posteriors, sum_columns); 00242 for (int j = 0; j < L; j++) { 00243 // Build the weighted dataset. 00244 if (sum_columns[j] < epsilon) { 00245 PLWARNING("In GaussMix::computeMeansAndCovariances - A posterior is almost zero"); 00246 } 00247 VMat weights(columnmatrix(updated_weights(j))); 00248 weighted_train_set = new ConcatColumnsVMatrix( 00249 new SubVMatrix(train_set, 0, 0, nsamples, D), weights); 00250 weighted_train_set->defineSizes(D, 0, 1); 00251 if (type == "spherical") { 00252 Vec variance(D); 00253 Vec center = mu(j); 00254 computeInputMeanAndVariance(weighted_train_set, center, variance); 00255 sigma[j] = sqrt(mean(variance)); // TODO See if harmonic mean is needed ? 00256 #ifdef BOUNDCHECK 00257 if (isnan(sigma[j])) { 00258 PLWARNING("In GaussMix::computeMeansAndCovariances - A standard deviation is nan"); 00259 } 00260 #endif 00261 } else if (type == "diagonal") { 00262 Vec variance(D); 00263 Vec center = mu(j); 00264 computeInputMeanAndVariance(weighted_train_set, center, variance); 00265 for (int i = 0; i < D; i++) { 00266 diags(i,j) = sqrt(variance[i]); 00267 } 00268 } else if (type == "general") { 00269 Mat covar(D,D); 00270 Vec center = mu(j); 00271 computeInputMeanAndCovar(weighted_train_set, center, covar); 00272 Vec eigenvals = eigenvalues(j); // The eigenvalues vector of the j-th Gaussian. 00273 eigenVecOfSymmMat(covar, n_eigen_computed, eigenvals, eigenvectors[j]); 00274 } else { 00275 PLERROR("In GaussMix::computeMeansAndCovariances - Not implemented for this type of Gaussian"); 00276 } 00277 // TODO Implement them all. 00278 } 00279 } 00280 00282 // computeLogLikelihood // 00284 real GaussMix::computeLogLikelihood(const Vec& y, int j, bool is_input) const { 00285 static real p, sig; 00286 static Vec mu_y; // The corresponding mean. 00287 static int size; // The length of y (a target, or an input). 00288 static int start; // n_input if y is a target, and 0 otherwise. 00289 static Vec eigenvals; // A pointer to the adequate eigenvalues. 00290 static Mat eigenvecs; // A pointer to the adequate eigenvectors. 00291 if (type == "spherical" || type == "diagonal") { 00292 // Both types are very similar. 00293 if (is_input) { 00294 mu_y = mu(j).subVec(0, n_input); 00295 size = n_input; 00296 start = 0; 00297 } else { 00298 mu_y = mu(j).subVec(n_input, n_target); 00299 size = n_target; 00300 start = n_input; 00301 } 00302 } 00303 if (type == "spherical") { 00304 // x ~= N(mu_x, sigma) 00305 // y|x ~= N(mu_y, sigma) 00306 p = 0.0; 00307 for (int k = 0; k < size; k++) { 00308 p += gauss_log_density_stddev(y[k], mu_y[k], max(sigma_min, sigma[j])); 00309 #ifdef BOUNDCHECK 00310 if (isnan(p)) { 00311 PLWARNING("In GaussMix::computeLogLikelihood - Density is nan"); 00312 } 00313 #endif 00314 } 00315 return p; 00316 } else if (type == "diagonal") { 00317 p = 0.0; 00318 for (int k = 0; k < size; k++) { 00319 sig = diags(k + start,j); 00320 p += gauss_log_density_stddev(y[k], mu_y[k], max(sigma_min, sig)); 00321 } 00322 return p; 00323 } else if (type == "general") { 00324 if (n_margin > 0) 00325 PLERROR("In GaussMix::computeLogLikelihood - Marginalization not implemented for the general type"); 00326 // We store log(p(y | x,j)) in the variable t. 00327 static real t; 00328 static Vec y_centered; // Storing y - mu(j). 00329 static real squared_norm_y_centered; 00330 static real one_over_lambda0; 00331 static real lambda0; 00332 static real lambda; 00333 static int n_eig; 00334 if (is_input) { 00335 mu_y = mu(j).subVec(0, n_input); 00336 size = n_input; 00337 eigenvals = eigenvalues_x(j); 00338 eigenvecs = eigenvectors_x[j]; 00339 n_eig = n_input; 00340 } else { 00341 size = n_target; 00342 if (n_input > 0) { 00343 // y|x ~= N(mu_y + K2 K1^-1 (x - mu_x), K3 - K2 K1^-1 K2') 00344 mu_y = mu_y_x(j); 00345 size = n_target; 00346 eigenvals = eigenvalues_y_x(j); 00347 eigenvecs = eigenvectors_y_x[j]; 00348 n_eig = n_target; 00349 } else { 00350 mu_y = mu(j); 00351 eigenvals = eigenvalues(j); 00352 eigenvecs = eigenvectors[j]; 00353 n_eig = n_eigen_computed; 00354 } 00355 } 00356 t = log_coeff[j]; 00357 y_centered.resize(size); 00358 y_centered << y; 00359 y_centered -= mu_y; 00360 squared_norm_y_centered = pownorm(y_centered); 00361 real var_min = sigma_min*sigma_min; 00362 lambda0 = max(var_min, eigenvals[n_eig - 1]); 00363 #ifdef BOUNDCHECK 00364 if (lambda0 <= 0) 00365 PLERROR("GaussMix::computeLogLikelihood(y,%d), var_min=%g, lambda0=%g\n",j,var_min,lambda0); 00366 #endif 00367 one_over_lambda0 = 1.0 / lambda0; 00368 // t -= 0.5 * 1/lambda_0 * ||y - mu||^2 00369 t -= 0.5 * one_over_lambda0 * squared_norm_y_centered; 00370 for (int k = 0; k < n_eig - 1; k++) { 00371 // t -= 0.5 * (1/lambda_k - 1/lambda_0) * ((y - mu)'.v_k)^2 00372 lambda = max(var_min, eigenvals[k]); 00373 #ifdef BOUNDCHECK 00374 if (lambda<=0) 00375 PLERROR("GaussMix::computeLogLikelihood(y,%d), var_min=%g, lambda(%d)=%g\n",j,var_min,k,lambda); 00376 #endif 00377 if (lambda > lambda0) 00378 t -= 0.5 * (1.0 / lambda - one_over_lambda0) * square(dot(eigenvecs(k), y_centered)); 00379 } 00380 return t; 00381 } else { 00382 PLERROR("In GaussMix::computeLogLikelihood - Not implemented for this type of Gaussian"); 00383 return 0; 00384 } 00385 } 00386 00388 // computePosteriors // 00390 void GaussMix::computePosteriors() { 00391 sample_row.resize(D); 00392 log_likelihood_post.resize(L); 00393 real log_sum_likelihood; 00394 for (int i = 0; i < nsamples; i++) { 00395 train_set->getSubRow(i, 0, sample_row); 00396 // First we need to compute the likelihood P(s_i | j). 00397 for (int j = 0; j < L; j++) { 00398 log_likelihood_post[j] = computeLogLikelihood(sample_row, j) + log(alpha[j]); 00399 #ifdef BOUNDCHECK 00400 if (isnan(log_likelihood_post[j])) { 00401 PLWARNING("In GaussMix::computePosteriors - computeLogLikelihood returned nan"); 00402 } 00403 #endif 00404 } 00405 log_sum_likelihood = logadd(log_likelihood_post); 00406 for (int j = 0; j < L; j++) { 00407 // Compute the posterior P(j | s_i) = P(s_i | j) * alpha_i / (sum_i ") 00408 posteriors(i, j) = exp(log_likelihood_post[j] - log_sum_likelihood); 00409 } 00410 } 00411 // Now update the sample weights. 00412 updateSampleWeights(); 00413 } 00414 00416 // computeWeights // 00418 bool GaussMix::computeWeights() { 00419 bool replaced_gaussian = false; 00420 if (L==1) alpha[0]=1; else { 00421 alpha.fill(0); 00422 for (int i = 0; i < nsamples; i++) { 00423 for (int j = 0; j < L; j++) 00424 alpha[j] += posteriors(i,j); 00425 } 00426 alpha /= real(nsamples); 00427 for (int j = 0; j < L && !replaced_gaussian; j++) { 00428 if (alpha[j] < alpha_min) { 00429 // alpha[j] is too small! We need to remove this Gaussian from the 00430 // mixture, and find a new (better) one. 00431 replaceGaussian(j); 00432 replaced_gaussian = true; 00433 } 00434 } } 00435 return replaced_gaussian; 00436 } 00437 00439 // expectation // 00441 void GaussMix::expectation(Vec& result) const 00442 { 00443 // NB: 'mu' has been renamed into 'result' to avoid confusion with other 'mu'. 00444 static Vec mu_y; 00445 result.resize(n_target); 00446 if (type == "spherical" || type == "diagonal") { 00447 // The expectation is the same in the 'spherical' and 'diagonal' cases. 00448 result.clear(); 00449 for (int j = 0; j < L; j++) { 00450 result += mu(j).subVec(n_input, n_target) * p_j_x[j]; 00451 } 00452 } else if (type == "general") { 00453 if (n_margin > 0) 00454 PLERROR("In GaussMix::expectation - Marginalization not implemented"); 00455 result.clear(); 00456 bool is_simple = (n_input == 0 && n_margin == 0); 00457 for (int j = 0; j < L; j++) { 00458 if (is_simple) { 00459 mu_y = mu(j).subVec(0, n_target); 00460 } else { 00461 mu_y = mu_y_x(j); 00462 } 00463 result += mu_y * p_j_x[j]; 00464 } 00465 } else { 00466 PLERROR("In GaussMix::expectation - Not implemented for this type"); 00467 } 00468 } 00469 00471 // forget // 00473 void GaussMix::forget() 00474 { 00475 stage = 0; 00476 } 00477 00479 // generate // 00481 void GaussMix::generate(Vec& x) const 00482 { 00483 generateFromGaussian(x, -1); 00484 } 00485 00487 // generateFromGaussian // 00489 void GaussMix::generateFromGaussian(Vec& s, int given_gaussian) const { 00490 static Vec mu_y; 00491 int j; // The index of the Gaussian to use. 00492 if (given_gaussian < 0) { 00493 j = multinomial_sample(p_j_x); 00494 } else { 00495 j = given_gaussian % alpha.length(); 00496 } 00497 s.resize(n_target); 00498 if (type == "spherical") { 00499 mu_y = mu(j).subVec(n_input, n_target); 00500 for (int k = 0; k < n_target; k++) { 00501 s[k] = gaussian_mu_sigma(mu_y[k], sigma[j]); 00502 } 00503 } else if (type == "diagonal") { 00504 mu_y = mu(j).subVec(n_input, n_target); 00505 for (int k = 0; k < n_target; k++) { 00506 s[k] = gaussian_mu_sigma(mu_y[k], diags(k + n_input,j)); 00507 } 00508 } else if (type == "general") { 00509 if (n_margin > 0) 00510 PLERROR("In GaussMix::generateFromGaussian - Marginalization not implemented for the general type"); 00511 static Vec norm; 00512 static real lambda0; 00513 static int n_eig; 00514 static Vec eigenvals; 00515 static Mat eigenvecs; 00516 static Vec mu_y; 00517 if (n_input == 0) { 00518 n_eig = n_eigen_computed; 00519 eigenvals = eigenvalues(j); 00520 eigenvecs = eigenvectors[j]; 00521 mu_y = mu(j).subVec(0, n_target); 00522 } else { 00523 n_eig = n_target; 00524 eigenvals = eigenvalues_y_x(j); 00525 eigenvecs = eigenvectors_y_x[j]; 00526 mu_y = mu_y_x(j); 00527 } 00528 norm.resize(n_eig - 1); 00529 fill_random_normal(norm); 00530 real var_min = sigma_min*sigma_min; 00531 lambda0 = max(var_min, eigenvals[n_eig - 1]); 00532 s.fill(0); 00533 for (int k = 0; k < n_eig - 1; k++) { 00534 s += sqrt(max(var_min, eigenvals[k]) - lambda0) * norm[k] * eigenvecs(k); 00535 } 00536 norm.resize(n_target); 00537 fill_random_normal(norm); 00538 s += norm * sqrt(lambda0); 00539 s += mu_y; 00540 } else { 00541 PLERROR("In GaussMix::generate - Unknown mixture type"); 00542 } 00543 } 00544 00546 // getNEigenComputed // 00548 int GaussMix::getNEigenComputed() const { 00549 return n_eigen_computed; 00550 } 00551 00553 // getEigenvectors // 00555 Mat GaussMix::getEigenvectors(int j) const { 00556 return eigenvectors[j]; 00557 } 00558 00560 // getEigenvals // 00562 Vec GaussMix::getEigenvals(int j) const { 00563 return eigenvalues(j); 00564 } 00565 00567 // kmeans // 00569 void GaussMix::kmeans(VMat samples, int nclust, TVec<int> & clust_idx, Mat & clust, int maxit) 00570 // TODO Put it into the PLearner framework. 00571 { 00572 int nsamples = samples.length(); 00573 Mat newclust(nclust,samples->inputsize()); 00574 clust.resize(nclust,samples->inputsize()); 00575 clust_idx.resize(nsamples); 00576 00577 Vec input(samples->inputsize()); 00578 Vec target(samples->targetsize()); 00579 real weight; 00580 00581 Vec samples_per_cluster(nclust); 00582 TVec<int> old_clust_idx(nsamples); 00583 bool ok=false; 00584 00585 // build a nclust-long vector of samples indexes to initialize clusters centers 00586 Vec start_idx(nclust,-1.0); 00587 int val; 00588 for(int i=0;i<nclust;i++) 00589 { 00590 bool ok=false; 00591 while(!ok) 00592 { 00593 ok=true; 00594 val = rand() % nsamples; 00595 for(int j=0;j<nclust && start_idx[j]!=-1.0;j++) 00596 if(start_idx[j]==val) 00597 { 00598 ok=false; 00599 break; 00600 } 00601 } 00602 start_idx[i]=val; 00603 samples->getExample(val,input,target,weight); 00604 clust(i)<<input; 00605 } 00606 00607 while(!ok && maxit--) 00608 { 00609 newclust.clear(); 00610 samples_per_cluster.clear(); 00611 old_clust_idx<<clust_idx; 00612 for(int i=0;i<nsamples;i++) 00613 { 00614 samples->getExample(i,input,target,weight); 00615 real dist,bestdist=1E300; 00616 int bestclust=0; 00617 if (nclust>1) for(int j=0;j<nclust;j++) 00618 if((dist=pownorm(clust(j)-input)) < bestdist) 00619 { 00620 bestdist=dist; 00621 bestclust=j; 00622 } 00623 clust_idx[i]=bestclust; 00624 samples_per_cluster[bestclust] += weight; 00625 newclust(bestclust) += input * weight; 00626 } 00627 for(int i=0; i<nclust; i++) 00628 if (samples_per_cluster[i]>0) 00629 newclust(i) /= samples_per_cluster[i]; 00630 clust << newclust; 00631 ok=true; 00632 if (nclust>1) 00633 for(int i=0;i<nsamples;i++) 00634 if(old_clust_idx[i]!=clust_idx[i]) 00635 { 00636 ok=false; 00637 break; 00638 } 00639 } 00640 } 00641 00643 // log_density // 00645 real GaussMix::log_density(const Vec& y) const 00646 { 00647 // TODO There is some code duplication with computePosteriors. 00648 #ifdef BOUNDCHECK 00649 if (stage == 0) 00650 PLERROR("GaussMix::log_density was called while model was not trained"); 00651 #endif 00652 log_likelihood_dens.resize(L); 00653 // First we need to compute the likelihood p(y | x,j) * p(j | x). 00654 for (int j = 0; j < L; j++) { 00655 log_likelihood_dens[j] = computeLogLikelihood(y, j) + log_p_j_x[j]; 00656 #ifdef BOUNDCHECK 00657 if (isnan(log_likelihood_dens[j])) { 00658 PLWARNING("In GaussMix::log_density - computeLogLikelihood returned nan"); 00659 } 00660 #endif 00661 } 00662 return logadd(log_likelihood_dens); 00663 } 00664 00666 // makeDeepCopyFromShallowCopy // 00668 void GaussMix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00669 { 00670 inherited::makeDeepCopyFromShallowCopy(copies); 00671 deepCopyField(sample_row, copies); 00672 deepCopyField(log_likelihood_post, copies); 00673 deepCopyField(log_likelihood_dens, copies); 00674 deepCopyField(x_minus_mu_x, copies); 00675 deepCopyField(mu_target, copies); 00676 deepCopyField(eigenvalues,copies); 00677 deepCopyField(eigenvectors,copies); 00678 deepCopyField(diags,copies); 00679 deepCopyField(log_coeff,copies); 00680 deepCopyField(posteriors,copies); 00681 deepCopyField(initial_weights,copies); 00682 deepCopyField(updated_weights,copies); 00683 deepCopyField(alpha,copies); 00684 deepCopyField(mu,copies); 00685 deepCopyField(sigma,copies); 00686 PLERROR("In GaussMix::makeDeepCopyFromShallowCopy - Need to complete the implementation"); 00687 } 00688 00690 // precomputeStuff // 00692 void GaussMix::precomputeStuff() { 00693 if (type == "spherical") { 00694 // Nothing to do. 00695 } else if (type == "diagonal") { 00696 // Nothing to do. 00697 } else if (type == "general") { 00698 // Precompute the log_coeff. 00699 real var_min = sigma_min*sigma_min; 00700 for (int j = 0; j < L; j++) { 00701 real log_det = 0; 00702 for (int k = 0; k < n_eigen_computed; k++) { 00703 #ifdef BOUNDCHECK 00704 if (var_min<epsilon && eigenvalues(j,k) < epsilon) { 00705 PLWARNING("In GaussMix::precomputeStuff - An eigenvalue is near zero"); 00706 } 00707 #endif 00708 log_det += log(max(var_min,eigenvalues(j,k))); 00709 } 00710 if(D - n_eigen_computed > 0) { 00711 log_det += log(max(var_min,eigenvalues(j, n_eigen_computed - 1))) * (D - n_eigen_computed); 00712 } 00713 log_coeff[j] = - 0.5 * (D * log(2*3.141549) + log_det ); 00714 } 00715 } else { 00716 PLERROR("In GaussMix::precomputeStuff - Not implemented for this type"); 00717 } 00718 } 00719 00721 // replaceGaussian // 00723 void GaussMix::replaceGaussian(int j) { 00724 // Find the Gaussian with highest weight. 00725 int high = 0; 00726 for (int k = 1; k < L; k++) { 00727 if (alpha[k] > alpha[high]) { 00728 high = k; 00729 } 00730 } 00731 // Generate the new center from this Gaussian. 00732 Vec new_center = mu(j); 00733 generateFromGaussian(new_center, high); 00734 // Copy the covariance. 00735 if (type == "spherical") { 00736 sigma[j] = sigma[high]; 00737 } else if (type == "diagonal") { 00738 diags.column(j) << diags.column(high); 00739 } else if (type == "general") { 00740 eigenvalues(j) << eigenvalues(high); 00741 eigenvectors[j] << eigenvectors[high]; 00742 log_coeff[j] = log_coeff[high]; 00743 } else { 00744 PLERROR("In GaussMix::replaceGaussian - Not implemented for this type"); 00745 } 00746 // Arbitrarily takes half of the weight of this Gaussian. 00747 alpha[high] /= 2.0; 00748 alpha[j] = alpha[high]; 00749 } 00750 00752 // resetGenerator // 00754 void GaussMix::resetGenerator(long g_seed) const 00755 { 00756 manual_seed(g_seed); 00757 } 00758 00760 // resizeStuffBeforeTraining // 00762 void GaussMix::resizeStuffBeforeTraining() { 00763 nsamples = train_set->length(); 00764 D = train_set->inputsize(); 00765 initial_weights.resize(nsamples); 00766 mu.resize(L,D); 00767 posteriors.resize(nsamples, L); 00768 updated_weights.resize(L, nsamples); 00769 // Those are not used for every type: 00770 diags.resize(0,0); 00771 eigenvalues.resize(0,0); 00772 eigenvalues_x.resize(0,0); 00773 eigenvalues_y_x.resize(0,0); 00774 if (type == "diagonal") { 00775 diags.resize(D,L); 00776 } else if (type == "general") { 00777 if (n_eigen == -1 || n_eigen == D) { 00778 // We need to compute all eigenvectors. 00779 n_eigen_computed = D; 00780 } else { 00781 n_eigen_computed = n_eigen + 1; 00782 } 00783 for (int i = 0; i < L; i++) { 00784 eigenvectors[i].resize(n_eigen_computed, D); 00785 } 00786 eigenvalues.resize(L, n_eigen_computed); 00787 } else if (type == "spherical") { 00788 // Nothing more to resize. 00789 } else { 00790 PLERROR("In GaussMix::resizeStuffBeforeTraining - Not implemented for this type"); 00791 } 00792 } 00793 00795 // setInput // 00797 void GaussMix::setInput(const Vec& input) const { 00798 inherited::setInput(input); 00799 // We need to compute: 00800 // p(j | x) = p(x | j) p(j) / p(x) 00801 if (stage == 0 || n_input == 0) { 00802 // Nothing to do. 00803 return; 00804 } 00805 if (type == "spherical") { 00806 // Nothing special to do. 00807 } else if (type == "diagonal") { 00808 // Nothing special to do. 00809 } else if (type == "general") { 00810 // We need to compute E[Y|x,j]. 00811 x_minus_mu_x.resize(n_input); 00812 for (int j = 0; j < L; j++) { 00813 x_minus_mu_x << input; 00814 x_minus_mu_x -= mu(j).subVec(0, n_input); 00815 mu_target = mu_y_x(j); 00816 product(mu_target, y_x_mat[j], x_minus_mu_x); 00817 mu_target += mu(j).subVec(n_input, n_target); 00818 } 00819 } else { 00820 PLERROR("In GaussMix::setInput - Not implemented for this type"); 00821 } 00822 for (int j = 0; j < L; j++) { 00823 log_p_x_j_alphaj[j] = computeLogLikelihood(input, j, true) + log(alpha[j]); 00824 } 00825 real log_p_x = logadd(log_p_x_j_alphaj); 00826 real t; 00827 for (int j = 0; j < L; j++) { 00828 t = log_p_x_j_alphaj[j] - log_p_x; 00829 log_p_j_x[j] = t; 00830 p_j_x[j] = exp(t); 00831 } 00832 } 00833 00835 // train // 00837 void GaussMix::train() 00838 { 00839 // Check we actually need to train. 00840 if (stage >= nstages) { 00841 PLWARNING("In GaussMix::train - The learner is already trained"); 00842 return; 00843 } 00844 00845 // Check there is a training set. 00846 if (!train_set) { 00847 PLERROR("In GaussMix::train - No training set specified"); 00848 } 00849 00850 // When training, we want to learn the full joint distribution. 00851 TVec<int> old_flags; 00852 bool restore_flags = ensureFullJointDistribution(old_flags); 00853 00854 // Make sure everything is the right size. 00855 resizeStuffBeforeTraining(); 00856 00857 if (stage == 0) { 00858 // Copy the sample weights. 00859 if (train_set->weightsize() <= 0) { 00860 initial_weights.fill(1); 00861 } else { 00862 Vec tmp1; 00863 Vec tmp2; 00864 real w; 00865 for (int i = 0; i < nsamples; i++) { 00866 train_set->getExample(i, tmp1, tmp2, w); 00867 initial_weights[i] = w; 00868 } 00869 } 00870 // Perform K-means to initialize the centers of the mixture. 00871 TVec<int> clust_idx; // Store the cluster index for each sample. 00872 kmeans(train_set, L, clust_idx, mu, kmeans_iterations); 00873 posteriors.fill(0); 00874 for (int i = 0; i < nsamples; i++) { 00875 // Initially, P(j | s_i) = 0 if s_i is not in the j-th cluster, 00876 // and 1 otherwise. 00877 posteriors(i, clust_idx[i]) = 1; 00878 } 00879 updateSampleWeights(); 00880 computeWeights(); 00881 computeMeansAndCovariances(); 00882 precomputeStuff(); 00883 } 00884 00885 Vec sample(D); 00886 bool replaced_gaussian = false; 00887 ProgressBar* pb = 0; 00888 int n_steps = nstages - stage; 00889 if (report_progress) { 00890 pb = new ProgressBar("Training GaussMix", n_steps); 00891 } 00892 while (stage < nstages) { 00893 do { 00894 computePosteriors(); 00895 replaced_gaussian = computeWeights(); 00896 } while (replaced_gaussian); 00897 computeMeansAndCovariances(); 00898 precomputeStuff(); 00899 stage++; 00900 if (pb) { 00901 pb->update(n_steps - nstages + stage); 00902 } 00903 } 00904 if (pb) 00905 delete pb; 00906 // Restore conditional flags if necessary. 00907 if (restore_flags) { 00908 setConditionalFlags(old_flags); 00909 } 00910 // Clear 'log_p_j_x', because it may be filled with '-inf' after the initial 00911 // build: saving it would cause PLearn to crash when reloading the object. 00912 log_p_j_x.clear(); 00913 // Options have changed: build is necessary. 00914 build(); 00915 } 00916 00918 // updateFromConditionalSorting // 00920 void GaussMix::updateFromConditionalSorting() { 00921 static Mat inv_cov_x, tmp_cov, work_mat1, work_mat2; 00922 static Vec eigenvals; 00923 // Update the centers of the Gaussians. 00924 sortFromFlags(mu); 00925 if (type == "spherical") { 00926 // Nothing more to do. 00927 } else if (type == "diagonal") { 00928 sortFromFlags(diags, false, true); 00929 } else if (type == "general") { 00930 if (n_margin > 0) { 00931 PLERROR("In GaussMix::updateFromConditionalSorting - Marginalization not implemented for the general type"); 00932 } 00933 if ((n_input == 0 && n_margin == 0) || stage == 0) { 00934 // Nothing to do. 00935 return; 00936 } 00937 tmp_cov.resize(D,D); 00938 real var_min = square(sigma_min); 00939 real lambda0; 00940 eigenvalues_x.resize(L, n_input); 00941 eigenvalues_y_x.resize(L, n_target); 00942 inv_cov_x.resize(n_input, n_input); 00943 mu_y_x.resize(L, n_target); 00944 work_mat1.resize(n_target, n_input); 00945 work_mat2.resize(n_target, n_target); 00946 for (int j = 0; j < L; j++) { 00947 // Update the eigenvectors. 00948 sortFromFlags(eigenvectors[j]); 00949 // Compute the mean and covariance of x and y|x for the j-th Gaussian (we 00950 // will need them to compute the likelihood). 00951 // First we compute the joint covariance matrix from the eigenvectors and 00952 // eigenvalues: 00953 // full_cov = sum_k (lambda_k - lambda0) v_k v_k' + lambda0.I 00954 full_cov[j].resize(D,D); 00955 tmp_cov = full_cov[j]; 00956 eigenvals = eigenvalues(j); 00957 lambda0 = max(var_min, eigenvals[n_eigen_computed - 1]); 00958 tmp_cov.clear(); 00959 for (int k = 0; k < n_eigen_computed - 1; k++) { 00960 tmp_cov += (max(var_min, eigenvals[k]) - lambda0) * 00961 product(columnmatrix(eigenvectors[j](k)), 00962 rowmatrix(eigenvectors[j](k))); 00963 } 00964 for (int i = 0; i < D; i++) { 00965 tmp_cov(i,i) += lambda0; 00966 } 00967 // Extract the covariance of the input x. 00968 cov_x[j] = full_cov[j].subMat(0, 0, n_input, n_input); 00969 // Compute its SVD. 00970 eigenvectors_x[j].resize(n_input, n_input); 00971 eigenvals = eigenvalues_x(j); 00972 eigenVecOfSymmMat(cov_x[j], n_input, eigenvals, eigenvectors_x[j]); 00973 // And its inverse (we'll need it for the covariance of y|x). 00974 inv_cov_x.clear(); 00975 lambda0 = max(var_min, eigenvals[n_input - 1]); 00976 real one_over_lambda0 = 1 / lambda0; 00977 for (int k = 0; k < n_input - 1; k++) { 00978 inv_cov_x += (1 / max(var_min, eigenvals[k]) - one_over_lambda0) * 00979 product(columnmatrix(eigenvectors_x[j](k)), 00980 rowmatrix(eigenvectors_x[j](k))); 00981 } 00982 for (int i = 0; i < n_input; i++) { 00983 inv_cov_x(i,i) += one_over_lambda0; 00984 } 00985 // Compute the covariance of y|x. 00986 cov_y_x[j].resize(n_target, n_target); 00987 cov_y_x[j] << full_cov[j].subMat(n_input, n_input, n_target, n_target); 00988 tmp_cov = full_cov[j].subMat(n_input, 0, n_target, n_input); 00989 product(work_mat1, tmp_cov, inv_cov_x); 00990 productTranspose(work_mat2, work_mat1, tmp_cov); 00991 cov_y_x[j] -= work_mat2; 00992 y_x_mat[j].resize(n_target, n_input); 00993 y_x_mat[j] << work_mat1; 00994 // And its SVD. 00995 eigenvectors_y_x[j].resize(n_target, n_target); 00996 eigenvals = eigenvalues_y_x(j); 00997 eigenVecOfSymmMat(cov_y_x[j], n_target, eigenvals, eigenvectors_y_x[j]); 00998 } 00999 } else { 01000 PLERROR("In GaussMix::updateFromConditionalSorting - Not implemented for this type"); 01001 } 01002 } 01003 01005 // updateSampleWeights // 01007 void GaussMix::updateSampleWeights() { 01008 for (int j = 0; j < L; j++) { 01009 updated_weights(j) << initial_weights; 01010 columnmatrix(updated_weights(j)) *= posteriors.column(j); 01011 } 01012 } 01013 01015 // survival_fn // 01017 real GaussMix::survival_fn(const Vec& x) const 01018 { 01019 PLERROR("survival_fn not implemented for GaussMix"); return 0.0; 01020 } 01021 01023 // cdf // 01025 real GaussMix::cdf(const Vec& x) const 01026 { 01027 PLERROR("cdf not implemented for GaussMix"); return 0.0; 01028 } 01029 01031 // variance // 01033 void GaussMix::variance(Mat& cov) const 01034 { 01035 PLERROR("variance not implemented for GaussMix"); 01036 } 01037 01038 } // end of namespace PLearn 01039