PLearn: ProjectionErrorVariable.cc Source File

00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2002 Pascal Vincent, Yoshua Bengio, Rejean Ducharme and University of Montreal 00006 // Copyright (C) 2001-2002 Nicolas Chapados, Ichiro Takeuchi, Jean-Sebastien Senecal 00007 // Copyright (C) 2002 Xiangdong Wang, Christian Dorion 00008 00009 // Redistribution and use in source and binary forms, with or without 00010 // modification, are permitted provided that the following conditions are met: 00011 // 00012 // 1. Redistributions of source code must retain the above copyright 00013 // notice, this list of conditions and the following disclaimer. 00014 // 00015 // 2. Redistributions in binary form must reproduce the above copyright 00016 // notice, this list of conditions and the following disclaimer in the 00017 // documentation and/or other materials provided with the distribution. 00018 // 00019 // 3. The name of the authors may not be used to endorse or promote 00020 // products derived from this software without specific prior written 00021 // permission. 00022 // 00023 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00024 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00025 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00026 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00028 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00029 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00030 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00031 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00032 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 // 00034 // This file is part of the PLearn library. For more information on the PLearn 00035 // library, go to the PLearn Web site at www.plearn.org 00036 00037 00038 /* ******************************************************* 00039 * $Id: ProjectionErrorVariable.cc,v 1.15 2004/08/10 21:29:45 yoshua Exp $ 00040 * This file is part of the PLearn library. 00041 ******************************************************* */ 00042 00043 #include "ProjectionErrorVariable.h" 00044 #include "Var_operators.h" 00045 #include <plearn/math/plapack.h> 00046 //#include "Var_utils.h" 00047 00048 namespace PLearn { 00049 using namespace std; 00050 00051 00054 PLEARN_IMPLEMENT_OBJECT(ProjectionErrorVariable, 00055 "Computes the projection error of a set of vectors on a non-orthogonal basis.\n", 00056 "The first input is a set of n_dim vectors (possibly seen as a single vector of their concatenation) f_i, each in R^n\n" 00057 "The second input is a set of T vectors (possibly seen as a single vector of their concatenation) t_j, each in R^n\n" 00058 "There are several options that control which kind of projection error is actually computed:\n" 00059 "If !use_subspace_distance {the recommended setting}, the output is\n" 00060 " sum_j min_w || t_j - sum_i w_i f_i ||^2 / ||t_j||^2\n" 00061 " where the denominator can be eliminated (not recommended) by turning off the\n" 00062 " normalize_by_neighbor_distance option. In this expression, w is a local\n" 00063 " n_dim-vector that is optmized analytically.\n" 00064 " If the 'ordered_vectors' is set, the gradient is not computed truthfully\n" 00065 " but in such a way as to induce a natural ordering among the vectors f_i.\n" 00066 " For each f_i, the above criterion is applied using a projection that\n" 00067 " involves only the first i vectors f_1...f_i. In this way the first vector f_1\n" 00068 " tries to *explain* the vectors t_j as well as possible with a single dimension,\n" 00069 " and the vector f_2 learns to *explain* what f_2 did not already predict, etc...\n" 00070 " When this option is set, we also choose the w_i in the same greedy way, starting\n" 00071 " from w_1 chosen to minimize the projection error wrt f_1, w_2 chosen to minimize the\n" 00072 " residual projection error left on f_2, etc... Hence the cost minimized wrt f_k on neighbor j is\n" 00073 " ||t_j - sum_{i<=k} w_i f_i||^2 / ||t_j||^2\n" 00074 " (this cost is minimized to choose w_k, and to get a gradient on f_k as well).\n" 00075 " In that case no SVD is used, instead one obtains an analytic solution for w_k:\n" 00076 " w_k = (t_j . f_k - sum_{i<k} w_i f_i . f_k)/||f_k||^2.\n" 00077 " The output produced by fprop is sum_j || t_j - sum_i w_i f_i ||^2 / ||t_j||^2\n" 00078 " where the w_i are chosen as in the previous equation.\n" 00079 "However, if use_subspace_distance (not recommended), the output is\n" 00080 " min_{w,u} || sum_i w_i f_i - sum_j u_j t_j ||^2 .\n" 00081 "In both cases, if norm_penalization>0, an extra term is added:\n" 00082 " norm_penalization * sum_i (||f_i||^2 - 1)^2.\n" 00083 "The 'epsilon' and 'regularization' options are used to regularize the SVD-based matrix\n" 00084 "inversion involved in minimizing for w: only the singular values of F' that are\n" 00085 "above 'epsilon' are inverted (and their singular vectors considered, and then they\n" 00086 "are incremented by 'regularization' before inverting.\n" 00087 ); 00088 00089 ProjectionErrorVariable::ProjectionErrorVariable(Variable* input1, Variable* input2, int n_, 00090 bool normalize_by_neighbor_distance_, 00091 bool use_subspace_distance_, 00092 real norm_penalization_, real epsilon_, 00093 real regularization_, bool ordered_vectors_) 00094 : inherited(input1, input2, 1, 1), n(n_), use_subspace_distance(use_subspace_distance_), 00095 normalize_by_neighbor_distance(normalize_by_neighbor_distance_), norm_penalization(norm_penalization_), 00096 epsilon(epsilon_), regularization(regularization_), ordered_vectors(ordered_vectors_) 00097 { 00098 build_(); 00099 } 00100 00101 void 00102 ProjectionErrorVariable::build() 00103 { 00104 inherited::build(); 00105 build_(); 00106 } 00107 00108 void 00109 ProjectionErrorVariable::build_() 00110 { 00111 if (input1 && input2) { 00112 if ((input1->length()==1 && input1->width()>1) || 00113 (input1->width()==1 && input1->length()>1)) 00114 { 00115 if (n<0) PLERROR("ProjectionErrorVariable: Either the input should be matrices or n should be specified\n"); 00116 n_dim = input1->size()/n; 00117 if (n_dim*n != input1->size()) 00118 PLERROR("ProjectErrorVariable: the first input size should be an integer multiple of n"); 00119 } 00120 else 00121 n_dim = input1->length(); 00122 if ((input2->length()==1 && input2->width()>1) || 00123 (input2->width()==1 && input2->length()>1)) 00124 { 00125 if (n<0) PLERROR("ProjectionErrorVariable: Either the input should be matrices or n should be specified\n"); 00126 T = input2->size()/n; 00127 if (T*n != input2->size()) 00128 PLERROR("ProjectErrorVariable: the second input size should be an integer multiple of n"); 00129 } 00130 else 00131 T = input2->length(); 00132 00133 F = input1->value.toMat(n_dim,n); 00134 dF = input1->gradient.toMat(n_dim,n); 00135 TT = input2->value.toMat(T,n); 00136 if (n<0) n = input1->width(); 00137 if (input2->width()!=n) 00138 PLERROR("ProjectErrorVariable: the two arguments have inconsistant sizes"); 00139 if (n_dim>n) 00140 PLERROR("ProjectErrorVariable: n_dim should be less than data dimension n"); 00141 if (!use_subspace_distance) 00142 { 00143 if (ordered_vectors) 00144 { 00145 norm_f.resize(n_dim); 00146 } 00147 else 00148 { 00149 V.resize(n_dim,n_dim); 00150 Ut.resize(n,n); 00151 B.resize(n_dim,n); 00152 VVt.resize(n_dim,n_dim); 00153 } 00154 fw_minus_t.resize(T,n); 00155 w.resize(T,n_dim); 00156 one_over_norm_T.resize(T); 00157 } 00158 else 00159 { 00160 wwuu.resize(n_dim+T); 00161 ww = wwuu.subVec(0,n_dim); 00162 uu = wwuu.subVec(n_dim,T); 00163 wwuuM = wwuu.toMat(1,n_dim+T); 00164 rhs.resize(n_dim+T); 00165 rhs.subVec(0,n_dim).fill(-1.0); 00166 A.resize(n_dim+T,n_dim+T); 00167 A11 = A.subMat(0,0,n_dim,n_dim); 00168 A12 = A.subMat(0,n_dim,n_dim,T); 00169 A21 = A.subMat(n_dim,0,T,n_dim); 00170 A22 = A.subMat(n_dim,n_dim,T,T); 00171 Tu.resize(n); 00172 FT.resize(n_dim+T,n); 00173 FT1 = FT.subMat(0,0,n_dim,n); 00174 FT2 = FT.subMat(n_dim,0,T,n); 00175 Ut.resize(n,n); 00176 V.resize(n_dim+T,n_dim+T); 00177 } 00178 fw.resize(n); 00179 if (norm_penalization>0) 00180 norm_err.resize(n_dim); 00181 } 00182 } 00183 00184 00185 void ProjectionErrorVariable::recomputeSize(int& len, int& wid) const 00186 { 00187 len = 1; 00188 wid = 1; 00189 } 00190 00191 void ProjectionErrorVariable::fprop() 00192 { 00193 // Let F the input1 matrix with rows f_i. 00194 // IF use_subspace_distance THEN 00195 // We need to solve the system 00196 // | FF' -FT'| |w| | 1 | 00197 // | | | | = | | 00198 // |-TF' TT'| |u| | 0 | 00199 // in (w,u), and then scale both down by ||w|| so as to enforce ||w||=1. 00200 // 00201 // ELSE IF !ordered_vectors 00202 // We need to solve the system 00203 // F F' w_j = F t_j 00204 // for each t_j in order to find the solution w of 00205 // min_{w_j} || t_j - sum_i w_{ji} f_i ||^2 00206 // for each j. Then sum over j the above square errors. 00207 // Let F' = U S V' the SVD of F'. Then 00208 // w_j = (F F')^{-1} F t_j = (V S U' U S V')^{-1} F t_j = V S^{-2} V' F t_j. 00209 // Note that we can pre-compute 00210 // B = V S^{-2} V' F = V S^{-1} U' 00211 // and 00212 // w_j = B t_j is our solution. 00213 // ELSE (ordered_vectors && !use_subspace_distance) 00214 // for each j 00215 // for each k 00216 // w_{jk} = (t_j . f_k - sum_{i<k} w_i f_i . f_k)/||f_k||^2 00217 // cost = sum_j || t_j - sum_i w_i f_i||^2 / ||t_j||^2 00218 // ENDIF 00219 // 00220 // if norm_penalization>0 then also add the following term: 00221 // norm_penalization * sum_i (||f_i||^2 - 1)^2 00222 // 00223 real cost = 0; 00224 if (use_subspace_distance) 00225 { 00226 // use SVD of (F' -T') 00227 FT1 << F; 00228 multiply(FT2,TT,-1.0); 00229 lapackSVD(FT, Ut, S, V); 00230 wwuu.clear();// 00231 for (int k=0;k<S.length();k++) 00232 { 00233 real s_k = S[k]; 00234 real sv = s_k+ regularization; 00235 real coef = 1/(sv * sv); 00236 if (s_k>epsilon) // ignore the components that have too small singular value (more robust solution) 00237 { 00238 real sum_first_elements = 0; 00239 for (int j=0;j<n_dim;j++) 00240 sum_first_elements += V(j,k); 00241 for (int i=0;i<n_dim+T;i++) 00242 wwuu[i] += V(i,k) * sum_first_elements * coef; 00243 } 00244 } 00245 00246 static bool debugging=false; 00247 if (debugging) 00248 { 00249 productTranspose(A11,F,F); 00250 productTranspose(A12,F,TT); 00251 A12 *= -1.0; 00252 Vec res(ww.length()); 00253 product(res,A11,ww); 00254 productAcc(res,A12,uu); 00255 res -= 1.0; 00256 cout << "norm of error in w equations: " << norm(res) << endl; 00257 Vec res2(uu.length()); 00258 transposeProduct(res2,A12,ww); 00259 productTranspose(A22,TT,TT); 00260 productAcc(res2,A22,uu); 00261 cout << "norm of error in u equations: " << norm(res2) << endl; 00262 } 00263 // scale w and u so that ||w|| = 1 00264 real wnorm = sum(ww); // norm(ww); 00265 wwuu *= 1.0/wnorm; 00266 00267 // compute the cost = ||F'w - T'u||^2 00268 transposeProduct(fw,F,ww); 00269 transposeProduct(Tu,TT,uu); 00270 fw -= Tu; 00271 cost = pownorm(fw); 00272 } 00273 else // PART THAT IS REALLY USED STARTS HERE 00274 if (ordered_vectors) 00275 { 00276 // compute 1/||f_k||^2 into norm_f 00277 for (int k=0;k<n_dim;k++) 00278 { 00279 Vec fk = F(k); 00280 norm_f[k] = 1.0/pownorm(fk); 00281 } 00282 for(int j=0; j<T;j++) 00283 { 00284 Vec tj = TT(j); 00285 Vec wj = w(j); 00286 // w_{jk} = (t_j . f_k - sum_{i<k} w_i f_i . f_k)/||f_k||^2 00287 for (int k=0;k<n_dim;k++) 00288 { 00289 Vec fk = F(k); 00290 real s = dot(tj,fk); 00291 for (int i=0;i<k;i++) 00292 s -= wj[i] * dot(F(i),fk); 00293 wj[k] = s * norm_f[k]; 00294 } 00295 transposeProduct(fw, F, wj); // fw = sum_i w_ji f_i = z_m 00296 Vec fw_minus_tj = fw_minus_t(j); 00297 substract(fw,tj,fw_minus_tj); // -z_n = z_m - z 00298 if (normalize_by_neighbor_distance) // THAT'S THE ONE WHICH WORKS WELL: 00299 { 00300 one_over_norm_T[j] = 1.0/pownorm(tj); // = 1/||z|| 00301 cost += sumsquare(fw_minus_tj)*one_over_norm_T[j]; // = ||z_n||^2 / ||z||^2 00302 } 00303 else 00304 cost += sumsquare(fw_minus_tj); 00305 } 00306 } 00307 else 00308 { 00309 static Mat F_copy; 00310 F_copy.resize(F.length(),F.width()); 00311 F_copy << F; 00312 // N.B. this is the SVD of F' 00313 lapackSVD(F_copy, Ut, S, V); 00314 B.clear(); 00315 for (int k=0;k<S.length();k++) 00316 { 00317 real s_k = S[k]; 00318 if (s_k>epsilon) // ignore the components that have too small singular value (more robust solution) 00319 { 00320 s_k += regularization; 00321 real coef = 1/s_k; 00322 for (int i=0;i<n_dim;i++) 00323 { 00324 real* Bi = B[i]; 00325 for (int j=0;j<n;j++) 00326 Bi[j] += V(i,k)*Ut(k,j)*coef; 00327 } 00328 } 00329 } 00330 // now we have B, we can compute the w's and the cost 00331 for(int j=0; j<T;j++) 00332 { 00333 Vec tj = TT(j); 00334 00335 Vec wj = w(j); 00336 product(wj, B, tj); // w_j = B * t_j = projection weights for neighbor j 00337 transposeProduct(fw, F, wj); // fw = sum_i w_ji f_i = z_m 00338 00339 Vec fw_minus_tj = fw_minus_t(j); 00340 substract(fw,tj,fw_minus_tj); // -z_n = z_m - z 00341 if (normalize_by_neighbor_distance) // THAT'S THE ONE WHICH WORKS WELL: 00342 { 00343 one_over_norm_T[j] = 1.0/pownorm(tj); // = 1/||z|| 00344 cost += sumsquare(fw_minus_tj)*one_over_norm_T[j]; // = ||z_n||^2 / ||z||^2 00345 } 00346 else 00347 cost += sumsquare(fw_minus_tj); 00348 } 00349 } 00350 if (norm_penalization>0) 00351 { 00352 real penalization=0; 00353 for (int i=0;i<n_dim;i++) 00354 { 00355 Vec f_i = F(i); 00356 norm_err[i] = pownorm(f_i)-1; 00357 penalization += norm_err[i]*norm_err[i]; 00358 } 00359 cost += norm_penalization*penalization; 00360 } 00361 value[0] = cost/real(T); 00362 } 00363 00364 00365 void ProjectionErrorVariable::bprop() 00366 { 00367 // calcule dcost/F et incremente input1->matGadient avec cette valeur 00368 // keeping w fixed 00369 // 00370 // IF use_subspace_distance 00371 // dcost/dF = w (F'w - T'u)' 00372 // 00373 // ELSE IF ordered_vectors 00374 // dcost_k/df_k = sum_j 2(sum_{i<=k} w_i f_i - t_j) w_k/||t_j|| 00375 // 00376 // ELSE 00377 // dcost/dfw = 2 (fw - t_j)/||t_j|| 00378 // dfw/df_i = w_i 00379 // so 00380 // dcost/df_i = sum_j 2(fw - t_j) w_i/||t_j|| 00381 // 00382 // IF norm_penalization>0 00383 // add the following to the gradient of f_i: 00384 // norm_penalization*2*(||f_i||^2 - 1)*f_i 00385 // N.B. WE CONSIDER THE input2 (t_j's) TO BE FIXED AND DO NOT 00386 // COMPUTE THE GRADIENT WRT to input2. IF THE USE OF THIS 00387 // OBJECT CHANGES THIS MAY HAVE TO BE REVISED. 00388 // 00389 00390 if (use_subspace_distance) 00391 { 00392 externalProductScaleAcc(dF,ww,fw,gradient[0]); 00393 if (norm_penalization>0) 00394 for (int i=0;i<n_dim;i++) 00395 { 00396 Vec df_i = dF(i); // n-vector 00397 multiplyAcc(df_i, F(i), gradient[0]*norm_penalization*2*norm_err[i]); 00398 } 00399 } 00400 else if (ordered_vectors) 00401 { 00402 for (int j=0;j<T;j++) 00403 { 00404 fw.clear(); 00405 Vec wj = w(j); 00406 Vec fw_minus_tj = fw_minus_t(j); // n-vector 00407 Vec tj = TT(j); 00408 for (int k=0;k<n_dim;k++) 00409 { 00410 Vec f_k = F(k); // n-vector 00411 Vec df_k = dF(k); // n-vector 00412 multiplyAcc(fw,f_k,wj[k]); 00413 substract(fw,tj,fw_minus_tj); 00414 if (normalize_by_neighbor_distance) 00415 multiplyAcc(df_k,fw_minus_tj,gradient[0] * wj[k] * 2 * one_over_norm_T[j]/real(T)); 00416 else 00417 multiplyAcc(df_k,fw_minus_tj,gradient[0] * wj[k] * 2/real(T)); 00418 } 00419 } 00420 } 00421 else 00422 { 00423 for (int j=0;j<T;j++) 00424 { 00425 Vec fw_minus_tj = fw_minus_t(j); // n-vector 00426 Vec wj = w(j); 00427 for (int i=0;i<n_dim;i++) 00428 { 00429 Vec df_i = dF(i); // n-vector 00430 if (normalize_by_neighbor_distance) 00431 multiplyAcc(df_i, fw_minus_tj, gradient[0] * wj[i]*2*one_over_norm_T[j]/real(T)); 00432 else 00433 multiplyAcc(df_i, fw_minus_tj, gradient[0] * wj[i]*2/real(T)); 00434 if (norm_penalization>0) 00435 multiplyAcc(df_i, F(i), gradient[0]*norm_penalization*2*norm_err[i]/real(T)); 00436 } 00437 } 00438 } 00439 } 00440 00441 00442 void ProjectionErrorVariable::symbolicBprop() 00443 { 00444 PLERROR("Not implemented"); 00445 } 00446 00447 } // end of namespace PLearn 00448 00449