00001 // -*- C++ -*-4 1999/10/29 20:41:34 dugas 00002 00003 // distr_maths.cc 00004 // Copyright (C) 2002 Pascal Vincent 00005 // 00006 // Redistribution and use in source and binary forms, with or without 00007 // modification, are permitted provided that the following conditions are met: 00008 // 00009 // 1. Redistributions of source code must retain the above copyright 00010 // notice, this list of conditions and the following disclaimer. 00011 // 00012 // 2. Redistributions in binary form must reproduce the above copyright 00013 // notice, this list of conditions and the following disclaimer in the 00014 // documentation and/or other materials provided with the distribution. 00015 // 00016 // 3. The name of the authors may not be used to endorse or promote 00017 // products derived from this software without specific prior written 00018 // permission. 00019 // 00020 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00021 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00022 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00023 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00025 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00026 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00027 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00028 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00029 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 // 00031 // This file is part of the PLearn library. For more information on the PLearn 00032 // library, go to the PLearn Web site at www.plearn.org 00033 00034 00035 00036 00037 /* ******************************************************* 00038 * $Id: distr_maths.cc,v 1.5 2004/02/29 02:58:05 nova77 Exp $ 00039 * This file is part of the PLearn library. 00040 ******************************************************* */ 00041 00042 00046 #include "distr_maths.h" 00047 #include "TMat_maths.h" 00048 00049 namespace PLearn { 00050 using namespace std; 00051 00052 00053 00054 /* 00055 // ***************************** 00056 // * Using eigen decomposition * 00057 // ***************************** 00058 00059 X the input matrix 00060 Let C be the covariance of X: C = (X-mu)'.(X-mu) 00061 00062 The eigendecomposition: 00063 C = VDV' where the *columns* of V are the orthonormal eigenvectors and D is a diagonal matrix with the eigenvalues lambda_1 ... lambda_d 00064 00065 det(C) = det(D) = product of the eigenvalues 00066 log(det(C)) = \sum_{i=1}^d [ log(lambda_i) ] 00067 00068 inv(C) = V.inv(D).V' (where inv(D) is a diagonal with the inverse of each eigenvalue) 00069 00070 For a gaussian where C is the covariance matrix, mu is the mean (column vector), and x is a column vector, we have 00071 gaussian(x; mu,C) = 1/sqrt((2PI)^d * det(C)) exp( -0.5 (x-mu)'.inv(C).(x-mu) ) 00072 = 1/sqrt((2PI)^d * det(D)) exp( -0.5 (V'(x-mu))'.inv(D).(V'(x-mu)) ) 00073 = exp [ -0.5( d*log(2PI) + log(det(D)) ) -0.5(V'(x-mu))'.inv(D).(V'(x-mu)) ] 00074 \_______________ ____________/ \____________ ____________/ 00075 \/ \/ 00076 logcoef q 00077 00078 The expression q = (V'(x-mu))'.inv(D).(V'(x-mu)) can be understood as: 00079 a) projecting vector x-mu on the orthonormal basis V, 00080 i.e. obtaining a transformed x that we shall call y: y = V'(x-mu) 00081 (y corresponds to x, expressed in the coordinate system V) 00082 y_i = V'_i.(x-mu) 00083 00084 b) computing the squared norm of y , after first rescaling each coordinate by a factor 1/sqrt(lambda_i) 00085 (i.e. differences in the directions with large lambda_i are given less importance) 00086 Giving q = sum_i[ 1/lambda_i y_i^2] 00087 00088 If we only keep the first k eigenvalues, and replace the following d-k ones by the same value gamma 00089 i.e. lambda_k+1 = ... = lambda_d = gamma 00090 00091 Then q can be expressed as: 00092 q = \sum_{i=1}^k [ 1/lambda_i y_i^2 ] + 1/gamma \sum_{i=k+1}^d [ y_i^2 ] 00093 00094 But, as y is just x expressed in another orthonormal basis, we have |y|^2 = |x-mu|^2 00095 ( proof: |y|^2 = |V'(x-mu)|^2 = (V'(x-mu))'.(V'(x-mu)) = (x-mu)'.V.V'.(x-mu) = (x-mu)'(x-mu) = |x-mu|^2 ) 00096 00097 Thus, we know \sum_{i=1}^d [ y_i^2 ] = |x-mu|^2 00098 Thus \sum_{i=k+1}^d [ y_i^2 ] = |x-mu|^2 - \sum_{i=1}^k [ y_i^2 ] 00099 00100 Consequently: 00101 q = \sum_{i=1}^k [ 1/lambda_i y_i^2 ] + 1/gamma ( |x-mu|^2 - \sum_{i=1}^k [ y_i^2 ] ) 00102 00103 q = \sum_{i=1}^k [ (1/lambda_i - 1/gamma) y_i^2 ] + 1/gamma |x-mu|^2 00104 00105 q = \sum_{i=1}^k [ (1/lambda_i - 1/gamma) (V'_i.(x-mu))^2 ] + 1/gamma |x-mu|^2 00106 00107 This gives the efficient algorithm implemented below 00108 00109 -------------------------------------------------------------------------------------- 00110 00111 Other possibility: direct computation: 00112 00113 Let's note X~ = X-mu 00114 00115 We have already seen that 00116 For a gaussian where C is the covariance matrix, mu is the mean (column vector), and x is a column vector, we have 00117 gaussian(x; mu,C) = 1/sqrt((2PI)^d * det(C)) exp( -0.5 (x-mu)'.inv(C).(x-mu) ) 00118 = 1/sqrt((2PI)^d * det(D)) exp( -0.5 (V'(x-mu))'.inv(D).(V'(x-mu)) ) 00119 = exp [ -0.5( d*log(2PI) + log(det(D)) ) -0.5 (x-mu)'.inv(C).(x-mu) ] 00120 \_______________ ____________/ \_________ ________/ 00121 \/ \/ 00122 logcoef q 00123 Let z = inv(C).(x-mu) 00124 ==> z is the solution of C.z = x-mu 00125 And then we have q = (x-mu)'.z 00126 00127 So computing q is simply a matter of solving this linear equation in z, 00128 and then computing q. 00129 00130 From my old paper we had to solve for alpha in the old notation: 00131 " (V'V + lambda.I) alpha = V' (x-N~) " 00132 Which in our current notation corresponds to: 00133 (C + lambda.I) alpha = X~' x~ 00134 If we drop the + lambda.I for now: 00135 alpha = inv(C) X~' x~ 00136 and the "hyperplane distance" is then given by 00137 hd = sqnorm(x~ - X~.alpha) 00138 = sqnorm(x~ - X~.inv(C).X~'.x~) 00139 = sqnorm(x~ - X~.inv(X~'X) 00140 00141 00142 */ 00143 00144 00145 00148 00167 real logOfCompactGaussian(const Vec& x, const Vec& mu, 00168 const Vec& eigenvalues, const Mat& eigenvectors, real gamma, bool add_gamma_to_eigenval) 00169 { 00170 // cerr << "logOfCompact: mu = " << mu << endl; 00171 00172 int i; 00173 int d = x.length(); 00174 static Vec x_minus_mu; 00175 x_minus_mu.resize(d); 00176 real* px = x.data(); 00177 real* pmu = mu.data(); 00178 real* pxmu = x_minus_mu.data(); 00179 00180 real sqnorm_xmu = 0; 00181 for(i=0; i<d; i++) 00182 { 00183 real val = *px++ - *pmu++; 00184 sqnorm_xmu += val*val; 00185 *pxmu++ = val; 00186 } 00187 00188 double log_det = 0.; 00189 double inv_gamma = 1./gamma; 00190 int kk = eigenvalues.length(); 00191 double q = inv_gamma * sqnorm_xmu; 00192 for(i=0; i<kk; i++) 00193 { 00194 double lambda_i = eigenvalues[i]; 00195 if(add_gamma_to_eigenval) 00196 lambda_i += gamma; 00197 if(lambda_i<=0) // we've reached a 0 eigenvalue, stop computation here. 00198 break; 00199 log_det += log(lambda_i); 00200 q += (1./lambda_i - inv_gamma)*square(dot(eigenvectors(i),x_minus_mu)); 00201 } 00202 if(kk<d) 00203 log_det += log(gamma)*(d-i); 00204 00205 double logp = -0.5*( d*log(2*M_PI) + log_det + q); 00206 // cerr << "logOfCompactGaussian q=" << q << " log_det=" << log_det << " logp=" << logp << endl; 00207 // exit(0); 00208 return real(logp); 00209 } 00210 00211 real logOfNormal(const Vec& x, const Vec& mu, const Mat& C) 00212 { 00213 int n = x.length(); 00214 static Vec x_mu; 00215 static Vec z; 00216 static Vec y; 00217 y.resize(n); 00218 z.resize(n); 00219 x_mu.resize(n); 00220 substract(x,mu,x_mu); 00221 00222 static Mat L; 00223 // Perform Cholesky decomposition 00224 choleskyDecomposition(C, L); 00225 00226 // get the log of the determinant: 00227 // det(C) = square(product_i L_ii) 00228 double logdet = 0; 00229 for(int i=0; i<n; i++) 00230 logdet += log(L(i,i)); 00231 logdet += logdet; 00232 00233 // Finally find z, such that C.z = x-mu 00234 choleskySolve(L, x_mu, z, y); 00235 00236 double q = dot(x_mu, z); 00237 double logp = -0.5*( n*log(2*M_PI) + logdet + q); 00238 // cerr << "logOfNormal q=" << q << " logdet=" << logdet << " logp=" << logp << endl; 00239 return real(logp); 00240 } 00241 00242 real logPFittedGaussian(const Vec& x, const Mat& X, real lambda) 00243 { 00244 static Mat C; 00245 static Vec mu; 00246 computeMeanAndCovar(X, mu, C); 00247 addToDiagonal(C, lambda); 00248 return logOfNormal(x, mu, C); 00249 } 00250 00251 00252 00253 } // end of namespace PLearn 00254