Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

NllSemisphericalGaussianVariable.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2002 Pascal Vincent, Yoshua Bengio, Rejean Ducharme and University of Montreal 00006 // Copyright (C) 2001-2002 Nicolas Chapados, Ichiro Takeuchi, Jean-Sebastien Senecal 00007 // Copyright (C) 2002 Xiangdong Wang, Christian Dorion 00008 00009 // Redistribution and use in source and binary forms, with or without 00010 // modification, are permitted provided that the following conditions are met: 00011 // 00012 // 1. Redistributions of source code must retain the above copyright 00013 // notice, this list of conditions and the following disclaimer. 00014 // 00015 // 2. Redistributions in binary form must reproduce the above copyright 00016 // notice, this list of conditions and the following disclaimer in the 00017 // documentation and/or other materials provided with the distribution. 00018 // 00019 // 3. The name of the authors may not be used to endorse or promote 00020 // products derived from this software without specific prior written 00021 // permission. 00022 // 00023 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00024 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00025 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00026 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00028 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00029 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00030 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00031 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00032 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 // 00034 // This file is part of the PLearn library. For more information on the PLearn 00035 // library, go to the PLearn Web site at www.plearn.org 00036 00037 00038 /* ******************************************************* 00039 * $Id: NllSemisphericalGaussianVariable.cc,v 1.2 2004/08/17 15:37:05 larocheh Exp $ 00040 * This file is part of the PLearn library. 00041 ******************************************************* */ 00042 00043 #include <plearn/var/NllSemisphericalGaussianVariable.h> 00044 #include <plearn/var/Var_operators.h> 00045 #include <plearn/math/plapack.h> 00046 //#include "Var_utils.h" 00047 00048 namespace PLearn { 00049 using namespace std; 00050 00051 00054 PLEARN_IMPLEMENT_OBJECT(NllSemisphericalGaussianVariable, 00055 "Computes the negative log-likelihood of a Gaussian on some data point, depending on the nearest neighbors.", 00056 " This class implements the negative log-likelihood cost of a Markov chain that\n" 00057 " uses semispherical gaussian transition probabilities. The parameters of the\n" 00058 " semispherical gaussians are a tangent plane, two variances,\n" 00059 " one mean and the distance of the point with its nearest neighbors.\n" 00060 " The two variances correspond to the shared variance of every manifold directions\n" 00061 " and of every noise directions. \n" 00062 " This variable is used to do gradient descent on the parameters, but\n" 00063 " not to estimate de likelihood of the Markov chain a some point, which is\n" 00064 " more complex to estimate.\n"); 00065 00066 NllSemisphericalGaussianVariable::NllSemisphericalGaussianVariable(const VarArray& the_varray, bool that_use_noise, real theepsilon) : inherited(the_varray,the_varray[4]->length(),1), 00067 n(varray[0]->width()), use_noise(that_use_noise),epsilon(theepsilon), n_dim(varray[0]->length()), 00068 n_neighbors(varray[4]->length()) 00069 { 00070 build_(); 00071 } 00072 00073 00074 void 00075 NllSemisphericalGaussianVariable::build() 00076 { 00077 inherited::build(); 00078 build_(); 00079 } 00080 00081 void 00082 NllSemisphericalGaussianVariable::build_() 00083 { 00084 00085 // The VarArray constaints the following variables: 00086 // - varray[0] = the tangent plane (n_dim x n) 00087 // - varray[1] = mu(data_point) (n x 1) 00088 // - varray[2] = sigma_manifold (1 x 1) 00089 // - varray[3] = sigma_noise (1 x 1) 00090 // - varray[4] = neighbor_distances (n_neighbors x n) 00091 // - varray[5] = p_target (1 x 1) 00092 // - varray[6] = p_neighbors (n_neighbors x 1) 00093 // - varray[7] = noisy x (n x 1) 00094 00095 if(varray.length() != 9) 00096 PLERROR("In NllSemisphericalGaussianVariable constructor: varray is of length %d but should be of length %d", varray.length(), 7); 00097 00098 if(varray[1]->length() != n || varray[1]->width() != 1) PLERROR("In NllSemisphericalGaussianVariable constructor: varray[1] is of size (%d,%d), but should be of size (%d,%d)", 00099 varray[1]->length(), varray[1]->width(), 00100 n_dim, 1); 00101 if(varray[2]->length() != 1 || varray[2]->width() != 1) PLERROR("In NllSemisphericalGaussianVariable constructor: varray[2] is of size (%d,%d), but should be of size (%d,%d)", 00102 varray[2]->length(), varray[2]->width(), 00103 1, 1); 00104 if(varray[3]->length() != 1 || varray[3]->width() != 1) PLERROR("In NllSemisphericalGaussianVariable constructor: varray[3] is of size (%d,%d), but should be of size (%d,%d)", 00105 varray[3]->length(), varray[3]->width(), 00106 1, 1); 00107 if(varray[4]->width() != n) PLERROR("In NllSemisphericalGaussianVariable constructor: varray[4] is of size (%d,%d), but should be of size (%d,%d)", 00108 varray[4]->length(), varray[4]->width(), 00109 n_neighbors, n); 00110 if(varray[5]->length() != 1 || varray[5]->width() != 1) PLERROR("In NllSemisphericalGaussianVariable constructor: varray[5] is of size (%d,%d), but should be of size (%d,%d)", 00111 varray[5]->length(), varray[5]->width(), 00112 1, 1); 00113 if(varray[6]->length() != n_neighbors || varray[6]->width() != 1) PLERROR("In NllSemisphericalGaussianVariable constructor: varray[6] is of size (%d,%d), but should be of size (%d,%d)", 00114 varray[6]->length(), varray[6]->width(), n_neighbors, 1); 00115 if(varray[7]->length() != n || varray[7]->width() != 1) PLERROR("In NllSemisphericalGaussianVariable constructor: varray[7] is of size (%d,%d), but should be of size (%d,%d)", 00116 varray[7]->length(), varray[7]->width(), n, 1); 00117 if(varray[8]->length() != n || varray[8]->width() != 1) PLERROR("In NllSemisphericalGaussianVariable constructor: varray[8] is of size (%d,%d), but should be of size (%d,%d)", 00118 varray[8]->length(), varray[8]->width(), n, 1); 00119 00120 00121 F = varray[0]->matValue; 00122 mu = varray[1]->value; 00123 sm = varray[2]->value; 00124 sn = varray[3]->value; 00125 diff_y_x = varray[4]->matValue; 00126 00127 z.resize(n_neighbors,n); 00128 zm.resize(n_neighbors,n); 00129 zn.resize(n_neighbors,n); 00130 z_noisy.resize(n_neighbors,n); 00131 zm_noisy.resize(n_neighbors,n); 00132 zn_noisy.resize(n_neighbors,n); 00133 B.resize(n_dim,n); 00134 Ut.resize(n,n); 00135 V.resize(n_dim,n_dim); 00136 w.resize(n_neighbors,n_dim); 00137 00138 p_target = varray[5]->value; 00139 p_neighbors = varray[6]->value; 00140 noise = varray[7]->value; 00141 mu_noisy = varray[8]->value; 00142 } 00143 00144 00145 void NllSemisphericalGaussianVariable::recomputeSize(int& len, int& wid) const 00146 { 00147 len = varray[4]->length(); 00148 wid = 1; 00149 } 00150 00151 void NllSemisphericalGaussianVariable::fprop() 00152 { 00153 // Let F the tangent plan matrix with rows f_i. 00154 // We need to solve the system 00155 // F F' w_j = F z_j 00156 // where z_j is the distance between the data point and the j_th neighbor, 00157 // to find the solution w_j of 00158 // min_{w_j} || z_j - sum_i w_{ji} f_i ||^2 00159 // for each j. Then sum over j the above square errors. 00160 // Let F' = U D V' the SVD of F'. Then 00161 // w_j = (F F')^{-1} F t_j = (V D U' U D V')^{-1} F t_j = V D^{-2} V' V D U' z_j 00162 // = V D^{-1} U' z_j 00163 // = B z_j 00164 // 00165 00166 // Compute w 00167 00168 static Mat F_copy; 00169 F_copy.resize(F.length(),F.width()); 00170 F_copy << F; 00171 // N.B. this is the SVD of F' 00172 lapackSVD(F_copy, Ut, S, V); 00173 B.clear(); 00174 for (int k=0;k<S.length();k++) 00175 { 00176 real s_k = S[k]; 00177 if (s_k>epsilon) // ignore the components that have too small singular value (more robust solution) 00178 { 00179 real coef = 1/s_k; 00180 for (int i=0;i<n_dim;i++) 00181 { 00182 real* Bi = B[i]; 00183 for (int j=0;j<n;j++) 00184 Bi[j] += V(i,k)*Ut(k,j)*coef; 00185 } 00186 } 00187 } 00188 00189 // now that we have B, we can compute the w's and the nll for every neighbors 00190 /* 00191 Vec mean_diff(n); mean_diff.clear(); 00192 for(int j=0; j<n_neighbors;j++) 00193 { 00194 mean_diff += diff_y_x(j); 00195 } 00196 00197 mean_diff /= n_neighbors; 00198 */ 00199 for(int j=0; j<n_neighbors;j++) 00200 { 00201 Vec zj = z(j); 00202 //substract(diff_y_x(j),mean_diff,zj); // z = y - x - mean_diff 00203 substract(diff_y_x(j),mu,zj); // z = y - x - mu(x) 00204 Vec zmj = zm(j); 00205 Vec znj = zn(j); 00206 Vec wj = w(j); 00207 product(wj, B, zj); // w = B * z = projection weights for neighbor j 00208 transposeProduct(zmj, F, wj); // F' w = z_m 00209 substract(zj,zmj,znj); // z_n = z - zm 00210 value[j] = 0.5*(pownorm(zmj,2)/sm[0] + pownorm(znj,2)/sn[0] + n_dim*log(sm[0]) + (n-n_dim)*log(sn[0])) + n/2.0 * Log2Pi; // This value is not really -log(p(y)) 00211 } 00212 00213 // and we can make the noisy zm and zn 00214 00215 for(int j=0; j<n_neighbors;j++) 00216 { 00217 Vec zj_noisy = z_noisy(j); 00218 Vec diff_noisy(n); 00219 substract(diff_y_x(j),noise,diff_noisy); 00220 substract(diff_noisy,mu_noisy,zj_noisy); // z = y - x - mu(x) 00221 Vec zmj_noisy = zm_noisy(j); 00222 Vec znj_noisy = zn_noisy(j); 00223 Vec wj_noisy(n_dim); 00224 product(wj_noisy, B, zj_noisy); // w = B * z = projection weights for neighbor j 00225 transposeProduct(zmj_noisy, F, wj_noisy); // F' w = z_m 00226 substract(zj_noisy,zmj_noisy,znj_noisy); // z_n = z - zm 00227 } 00228 00229 00230 } 00231 00232 00233 void NllSemisphericalGaussianVariable::bprop() 00234 { 00235 00236 00237 for(int neighbor=0; neighbor<n_neighbors; neighbor++) 00238 { 00239 00240 // dNLL/dF 00241 00242 for(int i=0; i<F.length(); i++) 00243 for(int j=0; j<F.width(); j++) 00244 varray[0]->matGradient(i,j) += gradient[neighbor]*exp(-1.0*value[neighbor])*p_target[0]/p_neighbors[neighbor] * (1/sm[0] - 1/sn[0]) * w(neighbor,i) * zn(neighbor,j); 00245 00246 // dNLL/dmu 00247 if(!use_noise) 00248 { 00249 for(int i=0; i<mu.length(); i++) 00250 varray[1]->gradient[i] -= gradient[neighbor]*exp(-1.0*value[neighbor])*p_target[0]/p_neighbors[neighbor] * ( 1/sm[0] * zm(neighbor,i) + 1/sn[0] * zn(neighbor,i)); 00251 } 00252 else 00253 { 00254 // dNLL/dmu with noisy data 00255 00256 for(int i=0; i<mu_noisy.length(); i++) 00257 varray[8]->gradient[i] -= gradient[neighbor]*exp(-1.0*value[neighbor])*p_target[0]/p_neighbors[neighbor] * ( 1/sm[0] * zm_noisy(neighbor,i) + 1/sn[0] * zn_noisy(neighbor,i)); 00258 } 00259 // dNLL/dsm 00260 00261 varray[2]->gradient[0] += gradient[neighbor]*exp(-1.0*value[neighbor])*p_target[0]/p_neighbors[neighbor] * (0.5 * n_dim/sm[0] - pownorm(zm(neighbor),2)/(sm[0]*sm[0])); 00262 00263 // dNLL/dsn 00264 00265 varray[3]->gradient[0] += gradient[neighbor]*exp(-1.0*value[neighbor])*p_target[0]/p_neighbors[neighbor] * (0.5 * (n-n_dim)/sn[0] - pownorm(zn(neighbor),2)/(sn[0]*sn[0])); 00266 } 00267 00268 } 00269 00270 00271 void NllSemisphericalGaussianVariable::symbolicBprop() 00272 { 00273 PLERROR("Not implemented"); 00274 } 00275 00276 } // end of namespace PLearn 00277 00278

Generated on Tue Aug 17 15:59:53 2004 for PLearn by doxygen 1.3.7