PLearn: PLS.cc Source File

00001 // -*- C++ -*- 00002 00003 // PLS.cc 00004 // 00005 // Copyright (C) 2004 Olivier Delalleau 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: PLS.cc,v 1.10 2004/07/21 16:30:58 chrish42 Exp $ 00037 ******************************************************* */ 00038 00039 // Authors: Olivier Delalleau 00040 00043 #include <plearn/math/plapack.h> 00044 #include "PLS.h" 00045 #include <plearn/vmat/ShiftAndRescaleVMatrix.h> 00046 #include <plearn/vmat/SubVMatrix.h> 00047 #include <plearn/math/TMat_maths_impl.h> 00048 #include <plearn/vmat/VMat_maths.h> 00049 00050 namespace PLearn { 00051 using namespace std; 00052 00053 PLS::PLS() 00054 : m(-1), 00055 p(-1), 00056 k(1), 00057 method("kernel"), 00058 precision(1e-6), 00059 output_the_score(0), 00060 output_the_target(1) 00061 {} 00062 00063 PLEARN_IMPLEMENT_OBJECT(PLS, 00064 "Partial Least Squares Regression (PLSR).", 00065 "You can use this learner to perform regression, and / or dimensionality\n" 00066 "reduction.\n" 00067 "PLS regression assumes the target Y and the data X are linked through:\n" 00068 " Y = T.Q' + E\n" 00069 " X = T.P' + F\n" 00070 "The underlying coefficients T (the 'scores') and the loading matrices\n" 00071 "Q and P are seeked. It is then possible to compute the prediction y for\n" 00072 "a new input x, as well as its score vector t (its representation in\n" 00073 "lower-dimensional coordinates).\n" 00074 "The available algorithms to perform PLS (chosen by the 'method' option) are:\n" 00075 "\n" 00076 " ==== PLS1 ====\n" 00077 "The classical PLS algorithm, suitable only for a 1-dimensional target. The\n" 00078 "following algorithm is taken from 'Factor Analysis in Chemistry', with an\n" 00079 "additional loop that (I believe) was missing:\n" 00080 " (1) Let X (n x p) = the centered and normalized input data\n" 00081 " Let y (n x 1) = the centered and normalized target data\n" 00082 " Let k be the number of components extracted\n" 00083 " (2) s = y\n" 00084 " (3) lx' = s' X, s = X lx (normalized)\n" 00085 " (4) If s has changed by more than 'precision', loop to (3)\n" 00086 " (5) ly = s' y\n" 00087 " (6) lx' = s' X\n" 00088 " (7) Store s, lx and ly in the columns of respectively T, P and Q\n" 00089 " (8) X = X - s lx', y = y - s ly, loop to (2) k times\n" 00090 " (9) Set W = (T P')^(+) T, where the ^(+) is the right pseudoinverse\n" 00091 "\n" 00092 " ==== Kernel ====\n" 00093 "The code implements a NIPALS-PLS-like algorithm, which is a so-called\n" 00094 "'kernel' algorithm (faster than more classical implementations).\n" 00095 "The algorithm, inspired from 'Factor Analysis in Chemistry' and above all\n" 00096 "www.statsoftinc.com/textbook/stpls.html, is the following:\n" 00097 " (1) Let X (n x p) = the centered and normalized input data\n" 00098 " Let Y (n x m) = the centered and normalized target data\n" 00099 " Let k be the number of components extracted\n" 00100 " (2) Initialize A_0 = X'Y, M_0 = X'X, C_0 = Identity(p), and h = 0\n" 00101 " (3) q_h = largest eigenvector of B_h = A_h' A_h, found by the NIPALS method:\n" 00102 " (3.a) q_h = a (normalized) randomn column of B_h\n" 00103 " (3.b) q_h = B_h q_h\n" 00104 " (3.c) normalize q_h\n" 00105 " (3.d) if q_h has changed by more than 'precision', go to (b)\n" 00106 " (4) w_h = C_h A_h q_h, normalize w_h and store it in a column of W (p x k)\n" 00107 " (5) p_h = M_h w_h, c_h = w_h' p_h, p_h = p_h / c_h and store it in a column\n" 00108 " of P (p x k)\n" 00109 " (6) q_h = A_h' w_h / c_h, and store it in a column of Q (m x k)\n" 00110 " (7) A_h+1 = A_h - c_h p_h q_h'\n" 00111 " M_h+1 = M_h - c_h p_h p_h',\n" 00112 " C_h+1 = C_h - w_h p_h\n" 00113 " (8) h = h+1, and if h < k, go to (3)\n" 00114 "\n" 00115 "The result is then given by:\n" 00116 " - Y = X B, with B (p x m) = W Q'\n" 00117 " - T = X W, where T is the score (reduced coordinates)\n" 00118 "\n" 00119 "You can choose to have the score (T) and / or the target (Y) in the output\n" 00120 "of the learner (default is target only, i.e. regression)." 00121 ); 00122 00124 // declareOptions // 00126 void PLS::declareOptions(OptionList& ol) 00127 { 00128 // Build options. 00129 00130 declareOption(ol, "k", &PLS::k, OptionBase::buildoption, 00131 "The number of components (factors) computed."); 00132 00133 declareOption(ol, "method", &PLS::method, OptionBase::buildoption, 00134 "The PLS algorithm used ('pls1' or 'kernel', see help for more details).\n"); 00135 00136 declareOption(ol, "output_the_score", &PLS::output_the_score, OptionBase::buildoption, 00137 "If set to 1, then the score (the low-dimensional representation of the input)\n" 00138 "will be included in the output (before the target)."); 00139 00140 declareOption(ol, "output_the_target", &PLS::output_the_target, OptionBase::buildoption, 00141 "If set to 1, then (the prediction of) the target will be included in the\n" 00142 "output (after the score)."); 00143 00144 // Learnt options. 00145 00146 declareOption(ol, "B", &PLS::B, OptionBase::learntoption, 00147 "The regression matrix in Y = X.B + E."); 00148 00149 declareOption(ol, "m", &PLS::m, OptionBase::learntoption, 00150 "Used to store the target size."); 00151 00152 declareOption(ol, "mean_input", &PLS::mean_input, OptionBase::learntoption, 00153 "The mean of the input data X."); 00154 00155 declareOption(ol, "mean_target", &PLS::mean_target, OptionBase::learntoption, 00156 "The mean of the target data Y."); 00157 00158 declareOption(ol, "p", &PLS::p, OptionBase::learntoption, 00159 "Used to store the input size."); 00160 00161 declareOption(ol, "precision", &PLS::precision, OptionBase::buildoption, 00162 "The precision to which we compute the eigenvectors."); 00163 00164 declareOption(ol, "stddev_input", &PLS::stddev_input, OptionBase::learntoption, 00165 "The standard deviation of the input data X."); 00166 00167 declareOption(ol, "stddev_target", &PLS::stddev_target, OptionBase::learntoption, 00168 "The standard deviation of the target data Y."); 00169 00170 declareOption(ol, "W", &PLS::W, OptionBase::learntoption, 00171 "The regression matrix in T = X.W."); 00172 00173 // Now call the parent class' declareOptions 00174 inherited::declareOptions(ol); 00175 } 00176 00178 // build // 00180 void PLS::build() 00181 { 00182 inherited::build(); 00183 build_(); 00184 } 00185 00187 // build_ // 00189 void PLS::build_() 00190 { 00191 if (train_set) { 00192 this->m = train_set->targetsize(); 00193 this->p = train_set->inputsize(); 00194 mean_input.resize(p); 00195 stddev_input.resize(p); 00196 mean_target.resize(m); 00197 stddev_target.resize(m); 00198 if (train_set->weightsize() > 0) { 00199 PLWARNING("In PLS::build_ - The train set has weights, but the optimization algorithm won't use them"); 00200 } 00201 // Check method consistency. 00202 if (method == "pls1") { 00203 // Make sure the target is 1-dimensional. 00204 if (m != 1) { 00205 PLERROR("In PLS::build_ - With the 'pls1' method, target should be 1-dimensional"); 00206 } 00207 } else if (method == "kernel") { 00208 // Everything should be ok. 00209 } else { 00210 PLERROR("In PLS::build_ - Unknown value for option 'method'"); 00211 } 00212 } 00213 if (!output_the_score && !output_the_target) { 00214 // Weird, we don't want any output ?? 00215 PLWARNING("In PLS::build_ - There will be no output"); 00216 } 00217 } 00218 00220 // computeCostsFromOutputs // 00222 void PLS::computeCostsFromOutputs(const Vec& input, const Vec& output, 00223 const Vec& target, Vec& costs) const 00224 { 00225 // No cost computed. 00226 } 00227 00229 // computeOutput // 00231 void PLS::computeOutput(const Vec& input, Vec& output) const 00232 { 00233 static Vec input_copy; 00234 if (W.width()==0) 00235 PLERROR("PLS::computeOutput but model was not trained!"); 00236 // Compute the output from the input 00237 int nout = outputsize(); 00238 output.resize(nout); 00239 // First normalize the input. 00240 input_copy.resize(this->p); 00241 input_copy << input; 00242 input_copy -= mean_input; 00243 input_copy /= stddev_input; 00244 int target_start = 0; 00245 if (output_the_score) { 00246 transposeProduct(output.subVec(0, this->k), W, input_copy); 00247 target_start = this->k; 00248 } 00249 if (output_the_target) { 00250 if (this->m > 0) { 00251 Vec target = output.subVec(target_start, this->m); 00252 transposeProduct(target, B, input_copy); 00253 target *= stddev_target; 00254 target += mean_target; 00255 } else { 00256 // This is just a safety check, since it should never happen. 00257 PLWARNING("In PLS::computeOutput - You ask to output the target but the target size is <= 0"); 00258 } 00259 } 00260 } 00261 00263 // forget // 00265 void PLS::forget() 00266 { 00267 stage = 0; 00268 // Free memory. 00269 B = Mat(); 00270 W = Mat(); 00271 } 00272 00274 // getTestCostNames // 00276 TVec<string> PLS::getTestCostNames() const 00277 { 00278 // No cost computed. 00279 TVec<string> t; 00280 return t; 00281 } 00282 00284 // getTrainCostNames // 00286 TVec<string> PLS::getTrainCostNames() const 00287 { 00288 // No cost computed. 00289 TVec<string> t; 00290 return t; 00291 } 00292 00294 // makeDeepCopyFromShallowCopy // 00296 void PLS::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00297 { 00298 inherited::makeDeepCopyFromShallowCopy(copies); 00299 00300 // ### Call deepCopyField on all "pointer-like" fields 00301 // ### that you wish to be deepCopied rather than 00302 // ### shallow-copied. 00303 // ### ex: 00304 deepCopyField(B, copies); 00305 deepCopyField(mean_input, copies); 00306 deepCopyField(mean_target, copies); 00307 deepCopyField(stddev_input, copies); 00308 deepCopyField(stddev_target, copies); 00309 deepCopyField(W, copies); 00310 } 00311 00313 // NIPALSEigenvector // 00315 void PLS::NIPALSEigenvector(const Mat& m, Vec& v, real precision) { 00316 int n = v.length(); 00317 Vec w(n); 00318 v << m.column(0); 00319 normalize(v, 2.0); 00320 bool ok = false; 00321 while (!ok) { 00322 w << v; 00323 product(v, m, w); 00324 normalize(v, 2.0); 00325 ok = true; 00326 for (int i = 0; i < n && ok; i++) { 00327 if (fabs(v[i] - w[i]) > precision) { 00328 ok = false; 00329 } 00330 } 00331 } 00332 } 00333 00335 // outputsize // 00337 int PLS::outputsize() const 00338 { 00339 int os = 0; 00340 if (output_the_score) { 00341 os += this->k; 00342 } 00343 if (output_the_target && m >= 0) { 00344 // If m < 0, this means we don't know yet the target size, thus we 00345 // shouldn't report it here. 00346 os += this->m; 00347 } 00348 return os; 00349 } 00350 00352 // train // 00354 void PLS::train() 00355 { 00356 if (stage == 1) { 00357 // Already trained. 00358 if (verbosity >= 1) { 00359 cout << "Skipping PLS training" << endl; 00360 } 00361 return; 00362 } 00363 if (verbosity >= 1) { 00364 cout << "PLS training started" << endl; 00365 } 00366 00367 // Construct the centered and normalized training set, for the input 00368 // as well as the target part. 00369 if (verbosity >= 2) { 00370 cout << " Normalization of the data" << endl; 00371 } 00372 VMat input_part = new SubVMatrix( 00373 train_set, 0, 0, train_set->length(), train_set->inputsize()); 00374 VMat target_part = new SubVMatrix( 00375 train_set, 0, train_set->inputsize(), train_set->length(), train_set->targetsize()); 00376 PP<ShiftAndRescaleVMatrix> X_vmat = new ShiftAndRescaleVMatrix(); 00377 X_vmat->vm = input_part; 00378 X_vmat->build(); 00379 mean_input << X_vmat->shift; 00380 stddev_input << X_vmat->scale; 00381 negateElements(mean_input); 00382 invertElements(stddev_input); 00383 PP<ShiftAndRescaleVMatrix> Y_vmat = new ShiftAndRescaleVMatrix(); 00384 Y_vmat->vm = target_part; 00385 Y_vmat->build(); 00386 mean_target << Y_vmat->shift; 00387 stddev_target << Y_vmat->scale; 00388 negateElements(mean_target); 00389 invertElements(stddev_target); 00390 00391 // Some common initialization. 00392 W.resize(p, k); 00393 Mat P(p, k); 00394 Mat Q(m, k); 00395 int n = X_vmat->length(); 00396 VMat X_vmatrix = static_cast<ShiftAndRescaleVMatrix*>(X_vmat); 00397 VMat Y_vmatrix = static_cast<ShiftAndRescaleVMatrix*>(Y_vmat); 00398 00399 if (method == "kernel") { 00400 // Initialize the various coefficients. 00401 if (verbosity >= 2) { 00402 cout << " Initialization of the coefficients" << endl; 00403 } 00404 Vec ph(p); 00405 Vec qh(m); 00406 Vec wh(p); 00407 Vec tmp(p); 00408 real ch; 00409 Mat Ah = transposeProduct(X_vmatrix, Y_vmatrix); 00410 Mat Mh = transposeProduct(X_vmatrix, X_vmatrix); 00411 Mat Ch(p,p); // Initialized to Identity(p). 00412 Mat Ah_t_Ah; 00413 Mat update_Ah(p,m); 00414 Mat update_Mh(p,p); 00415 Mat update_Ch(p,p); 00416 for (int i = 0; i < p; i++) { 00417 for (int j = i+1; j < p; j++) { 00418 Ch(i,j) = Ch(j,i) = 0; 00419 } 00420 Ch(i,i) = 1; 00421 } 00422 00423 // Iterate k times to find the k first factors. 00424 ProgressBar* pb = 0; 00425 if(report_progress) { 00426 pb = new ProgressBar("Computing the components", k); 00427 } 00428 for (int h = 0; h < this->k; h++) { 00429 Ah_t_Ah = transposeProduct(Ah,Ah); 00430 if (m == 1) { 00431 // No need to compute the eigenvector. 00432 qh[0] = 1; 00433 } else { 00434 NIPALSEigenvector(Ah_t_Ah, qh, precision); 00435 } 00436 product(tmp, Ah, qh); 00437 product(wh, Ch, tmp); 00438 normalize(wh, 2.0); 00439 W.column(h) << wh; 00440 product(ph, Mh, wh); 00441 ch = dot(wh, ph); 00442 ph /= ch; 00443 P.column(h) << ph; 00444 transposeProduct(qh, Ah, wh); 00445 qh /= ch; 00446 Q.column(h) << qh; 00447 Mat ph_mat(p, 1, ph); 00448 Mat qh_mat(m, 1, qh); 00449 Mat wh_mat(p, 1, wh); 00450 update_Ah = productTranspose(ph_mat, qh_mat); 00451 update_Ah *= ch; 00452 Ah -= update_Ah; 00453 update_Mh = productTranspose(ph_mat, ph_mat); 00454 update_Mh *= ch; 00455 Mh -= update_Mh; 00456 update_Ch = productTranspose(wh_mat, ph_mat); 00457 Ch -= update_Ch; 00458 if (pb) 00459 pb->update(h + 1); 00460 } 00461 if (pb) 00462 delete pb; 00463 } else if (method == "pls1") { 00464 Vec s(n); 00465 Vec old_s(n); 00466 Vec y(n); 00467 Vec lx(p); 00468 Vec ly(1); 00469 Mat T(n,k); 00470 Mat X = X_vmatrix->toMat(); 00471 y << Y_vmatrix->toMat(); 00472 ProgressBar* pb = 0; 00473 if(report_progress) { 00474 pb = new ProgressBar("Computing the components", k); 00475 } 00476 for (int h = 0; h < k; h++) { 00477 s << y; 00478 normalize(s, 2.0); 00479 bool finished = false; 00480 while (!finished) { 00481 old_s << s; 00482 transposeProduct(lx, X, s); 00483 product(s, X, lx); 00484 normalize(s, 2.0); 00485 if (dist(old_s, s, 2) < precision) { 00486 finished = true; 00487 } 00488 } 00489 ly[0] = dot(s, y); 00490 transposeProduct(lx, X, s); 00491 T.column(h) << s; 00492 P.column(h) << lx; 00493 Q.column(h) << ly; 00494 // X = X - s lx' 00495 // y = y - s ly 00496 for (int i = 0; i < n; i++) { 00497 for (int j = 0; j < p; j++) { 00498 X(i,j) -= s[i] * lx[j]; 00499 } 00500 y[i] -= s[i] * ly[0]; 00501 } 00502 if (report_progress) 00503 pb->update(h); 00504 } 00505 if (pb) 00506 delete pb; 00507 if (verbosity >= 2) { 00508 cout << " Computation of the corresponding coefficients" << endl; 00509 } 00510 Mat tmp(n, p); 00511 productTranspose(tmp, T, P); 00512 Mat U, Vt; 00513 Vec D; 00514 real safeguard = 1.1; // Because the SVD may crash otherwise. 00515 SVD(tmp, U, D, Vt, 'A', safeguard); 00516 for (int i = 0; i < D.length(); i++) { 00517 if (abs(D[i]) < precision) { 00518 D[i] = 0; 00519 } else { 00520 D[i] = 1.0 / D[i]; 00521 } 00522 } 00523 Mat tmp2(n,p); 00524 tmp2.fill(0); 00525 for (int i = 0; i < D.length(); i++) { 00526 if (D[i] != 0) { 00527 tmp2(i) << D[i] * Vt(i); 00528 } 00529 } 00530 product(tmp, U, tmp2); 00531 transposeProduct(W, tmp, T); 00532 } 00533 B.resize(p,m); 00534 productTranspose(B, W, Q); 00535 if (verbosity >= 1) { 00536 cout << "PLS training ended" << endl; 00537 } 00538 stage = 1; 00539 } 00540 00541 } // end of namespace PLearn