PLearn: PCA.cc Source File

00001 // -*- C++ -*- 00002 00003 // PCA.cc 00004 // 00005 // Copyright (C) 2003 Pascal Vincent 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: PCA.cc,v 1.15 2004/08/12 12:59:32 tihocan Exp $ 00037 ******************************************************* */ 00038 00040 #include <plearn/vmat/CenteredVMatrix.h> 00041 #include <plearn/vmat/GetInputVMatrix.h> 00042 #include "PCA.h" 00043 #include <plearn/math/plapack.h> 00044 #include <plearn/math/random.h> 00045 #include <plearn/vmat/VMat_maths.h> 00046 00047 namespace PLearn { 00048 using namespace std; 00049 00050 PCA::PCA() 00051 : algo("classical"), 00052 ncomponents(2), 00053 sigmasq(0), 00054 normalize(true) 00055 { 00056 } 00057 00058 PLEARN_IMPLEMENT_OBJECT(PCA, 00059 "Performs a Principal Component Analysis preprocessing (projecting on the principal directions).", 00060 "This learner finds the empirical covariance matrix of the input part of\n" 00061 "the training data, and learns to project its input vectors along the\n" 00062 "principal eigenvectors of that matrix, optionally scaling by the inverse\n" 00063 "of the square root of the eigenvalues (to obtained 'sphered', i.e.\n" 00064 "Normal(0,I) data).\n" 00065 "Alternative EM algorithms are provided, that may be useful when there is\n" 00066 "a lot of data or the dimension is very high.\n" 00067 ); 00068 00069 void PCA::declareOptions(OptionList& ol) 00070 { 00071 declareOption(ol, "ncomponents", &PCA::ncomponents, OptionBase::buildoption, 00072 "The number of principal components to keep (that's also the outputsize)."); 00073 00074 declareOption(ol, "sigmasq", &PCA::sigmasq, OptionBase::buildoption, 00075 "This gets added to the diagonal of the covariance matrix prior to eigen-decomposition."); 00076 00077 declareOption(ol, "normalize", &PCA::normalize, OptionBase::buildoption, 00078 "If true, we divide by sqrt(eigenval) after projecting on the eigenvec."); 00079 00080 declareOption(ol, "algo", &PCA::algo, OptionBase::buildoption, 00081 "The algorithm used to perform the Principal Component Analysis:\n" 00082 " - 'classical' : compute the eigenvectors of the covariance matrix\n" 00083 " - 'em' : EM algorithm from \"EM algorithms for PCA and SPCA\" by S. Roweis\n" 00084 " - 'em_orth' : a variant of 'em', where orthogonal components are directly computed" 00085 ); 00086 00087 // saved options 00088 declareOption(ol, "mu", &PCA::mu, OptionBase::learntoption, 00089 "The (weighted) mean of the samples"); 00090 declareOption(ol, "eigenvals", &PCA::eigenvals, OptionBase::learntoption, 00091 "The ncomponents eigenvalues corresponding to the principal directions kept"); 00092 declareOption(ol, "eigenvecs", &PCA::eigenvecs, OptionBase::learntoption, 00093 "A ncomponents x inputsize matrix containing the principal eigenvectors"); 00094 00095 // Now call the parent class' declareOptions 00096 inherited::declareOptions(ol); 00097 } 00098 00100 // build // 00102 void PCA::build() 00103 { 00104 inherited::build(); 00105 build_(); 00106 } 00107 00109 // build_ // 00111 void PCA::build_() 00112 { 00113 } 00114 00116 // computeCostsFromOutputs // 00118 void PCA::computeCostsFromOutputs(const Vec& input, const Vec& output, 00119 const Vec& target, Vec& costs) const 00120 { 00121 static Vec reconstructed_input; 00122 reconstruct(output, reconstructed_input); 00123 costs.resize(1); 00124 costs[0] = powdistance(input, reconstructed_input); 00125 } 00126 00128 // computeOutput // 00130 void PCA::computeOutput(const Vec& input, Vec& output) const 00131 { 00132 static Vec x; 00133 x.resize(input.length()); 00134 x << input; 00135 x -= mu; 00136 output.resize(ncomponents); 00137 00138 if(normalize) 00139 { 00140 for(int i=0; i<ncomponents; i++) 00141 output[i] = dot(x,eigenvecs(i)) / sqrt(eigenvals[i]); 00142 } 00143 else 00144 { 00145 for(int i=0; i<ncomponents; i++) 00146 output[i] = dot(x,eigenvecs(i)); 00147 } 00148 } 00149 00151 // forget // 00153 void PCA::forget() 00154 { 00155 stage = 0; 00156 } 00157 00159 // getTestCostNames // 00161 TVec<string> PCA::getTestCostNames() const 00162 { 00163 return TVec<string>(1,"squared_reconstruction_error"); 00164 } 00165 00167 // getTrainCostNames // 00169 TVec<string> PCA::getTrainCostNames() const 00170 { 00171 return TVec<string>(); 00172 } 00173 00175 // makeDeepCopyFromShallowCopy // 00177 void PCA::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00178 { 00179 inherited::makeDeepCopyFromShallowCopy(copies); 00180 deepCopyField(mu, copies); 00181 deepCopyField(eigenvals, copies); 00182 deepCopyField(eigenvecs, copies); 00183 } 00184 00185 00187 // outputsize // 00189 int PCA::outputsize() const 00190 { 00191 return ncomponents; 00192 } 00193 00195 // train // 00197 void PCA::train() 00198 { 00199 Mat covarmat; 00200 if(stage<1) 00201 { 00202 ProgressBar* pb = 0; 00203 if (algo == "classical") { 00204 if (ncomponents > train_set->inputsize()) 00205 PLERROR("In PCA::train - You asked for %d components, but the training set inputsize is only %d", ncomponents, train_set->inputsize()); 00206 if (report_progress) { 00207 pb = new ProgressBar("Training PCA", 2); 00208 } 00209 computeInputMeanAndCovar(train_set, mu, covarmat); 00210 if (pb) 00211 pb->update(1); 00212 eigenVecOfSymmMat(covarmat, ncomponents, eigenvals, eigenvecs); 00213 if (pb) 00214 pb->update(2); 00215 stage = 1; 00216 } else if (algo == "em") { 00217 int n = train_set->length(); 00218 int p = train_set->inputsize(); 00219 int k = ncomponents; 00220 // Fill the matrix C with random data. 00221 Mat C(k,p); 00222 fill_random_normal(C); 00223 // Center the data. 00224 VMat centered_data = new CenteredVMatrix(new GetInputVMatrix(train_set)); 00225 Vec sample_mean = static_cast<CenteredVMatrix*>((VMatrix*) centered_data)->getMu(); 00226 mu.resize(sample_mean.length()); 00227 mu << sample_mean; 00228 Mat Y = centered_data.toMat(); 00229 Mat X(n,k); 00230 Mat tmp_k_k(k,k); 00231 Mat tmp_k_k_2(k,k); 00232 Mat tmp_p_k(p,k); 00233 Mat tmp_k_n(k,n); 00234 // Iterate through EM. 00235 if (report_progress) 00236 pb = new ProgressBar("Training EM PCA", nstages - stage); 00237 int init_stage = stage; 00238 while (stage < nstages) { 00239 // E-step: X <- Y C' (C C')^-1 00240 productTranspose(tmp_k_k, C, C); 00241 matInvert(tmp_k_k, tmp_k_k_2); 00242 transposeProduct(tmp_p_k, C, tmp_k_k_2); 00243 product(X, Y, tmp_p_k); 00244 // M-step: C <- (X' X)^-1 X' Y 00245 transposeProduct(tmp_k_k, X, X); 00246 matInvert(tmp_k_k, tmp_k_k_2); 00247 productTranspose(tmp_k_n, tmp_k_k_2, X); 00248 product(C, tmp_k_n, Y); 00249 stage++; 00250 if (report_progress) 00251 pb->update(stage - init_stage); 00252 } 00253 // Compute the orthonormal projection matrix. 00254 int n_base = GramSchmidtOrthogonalization(C); 00255 if (n_base != k) { 00256 PLWARNING("In PCA::train - The rows of C are not linearly independent"); 00257 } 00258 // Compute the projected data. 00259 productTranspose(X, Y, C); 00260 // And do a PCA to get the eigenvectors and eigenvalues. 00261 PCA true_pca; 00262 VMat proj_data(X); 00263 true_pca.ncomponents = k; 00264 true_pca.normalize = 0; 00265 true_pca.setTrainingSet(proj_data); 00266 true_pca.train(); 00267 // Transform back eigenvectors to input space. 00268 eigenvecs.resize(k, p); 00269 product(eigenvecs, true_pca.eigenvecs, C); 00270 eigenvals.resize(k); 00271 eigenvals << true_pca.eigenvals; 00272 } else if (algo == "em_orth") { 00273 int n = train_set->length(); 00274 int p = train_set->inputsize(); 00275 int k = ncomponents; 00276 // Fill the matrix C with random data. 00277 Mat C(k,p); 00278 fill_random_normal(C); 00279 // Ensure it is orthonormal. 00280 GramSchmidtOrthogonalization(C); 00281 // Center the data. 00282 VMat centered_data = new CenteredVMatrix(new GetInputVMatrix(train_set)); 00283 Vec sample_mean = static_cast<CenteredVMatrix*>((VMatrix*) centered_data)->getMu(); 00284 mu.resize(sample_mean.length()); 00285 mu << sample_mean; 00286 Mat Y = centered_data.toMat(); 00287 Mat Y_copy(n,p); 00288 Mat X(n,k); 00289 Mat tmp_k_k(k,k); 00290 Mat tmp_k_k_2(k,k); 00291 Mat tmp_p_k(p,k); 00292 Mat tmp_k_n(k,n); 00293 Mat tmp_n_1(n,1); 00294 Mat tmp_n_p(n,p); 00295 Mat X_j, C_j; 00296 Mat x_j_prime_x_j(1,1); 00297 // Iterate through EM. 00298 if (report_progress) 00299 pb = new ProgressBar("Training EM PCA", nstages - stage); 00300 int init_stage = stage; 00301 Y_copy << Y; 00302 while (stage < nstages) { 00303 Y << Y_copy; 00304 for (int j = 0; j < k; j++) { 00305 C_j = C.subMatRows(j, 1); 00306 X_j = X.subMatColumns(j,1); 00307 // E-step: X_j <- Y C_j' 00308 productTranspose(X_j, Y, C_j); 00309 // M-step: C_j <- (X_j' X_j)^-1 X_j' Y 00310 transposeProduct(x_j_prime_x_j, X_j, X_j); 00311 transposeProduct(C_j, X_j, Y); 00312 C_j /= x_j_prime_x_j(0,0); 00313 // Normalize the new direction. 00314 PLearn::normalize(C_j, 2.0); 00315 // Subtract the component along this new direction, so as to 00316 // obtain orthogonal directions. 00317 productTranspose(tmp_n_1, Y, C_j); 00318 negateElements(Y); 00319 productAcc(Y, tmp_n_1, C_j); 00320 negateElements(Y); 00321 } 00322 stage++; 00323 if (report_progress) 00324 pb->update(stage - init_stage); 00325 } 00326 // Check orthonormality of C. 00327 for (int i = 0; i < k; i++) { 00328 for (int j = i; j < k; j++) { 00329 real dot_i_j = dot(C(i), C(j)); 00330 if (i != j) { 00331 if (abs(dot_i_j) > 1e-6) { 00332 PLWARNING("In PCA::train - It looks like some vectors are not orthogonal"); 00333 } 00334 } else { 00335 if (abs(dot_i_j - 1) > 1e-6) { 00336 PLWARNING("In PCA::train - It looks like a vector is not normalized"); 00337 } 00338 } 00339 } 00340 } 00341 // Compute the projected data. 00342 Y << Y_copy; 00343 productTranspose(X, Y, C); 00344 // Compute the empirical variance on each projected axis, in order 00345 // to obtain the eigenvalues. 00346 VMat X_vm(X); 00347 Vec mean_proj, var_proj; 00348 computeMeanAndVariance(X_vm, mean_proj, var_proj); 00349 eigenvals.resize(k); 00350 eigenvals << var_proj; 00351 // Copy the eigenvectors. 00352 eigenvecs.resize(k, p); 00353 eigenvecs << C; 00354 } else { 00355 PLERROR("In PCA::train - Unknown value for 'algo'"); 00356 } 00357 if (pb) 00358 delete pb; 00359 } else { 00360 PLWARNING("In PCA::train - The learner has already been train, skipping training"); 00361 } 00362 } 00363 00364 00366 // reconstruct // 00368 void PCA::reconstruct(const Vec& output, Vec& input) const 00369 { 00370 input.resize(mu.length()); 00371 input << mu; 00372 00373 int n = output.length(); 00374 if(normalize) 00375 { 00376 for(int i=0; i<n; i++) 00377 multiplyAcc(input, eigenvecs(i), output[i]*sqrt(eigenvals[i])); 00378 } 00379 else 00380 { 00381 for(int i=0; i<n; i++) 00382 multiplyAcc(input, eigenvecs(i), output[i]); 00383 } 00384 } 00385 00386 } // end of namespace PLearn