00001 // -*- C++ -*- 00002 00003 // IsomapTangentLearner.cc 00004 // 00005 // Copyright (C) 2004 Martin Monperrus 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: IsomapTangentLearner.cc,v 1.5 2004/08/02 16:12:22 monperrm Exp $ 00037 ******************************************************* */ 00038 00039 // Authors: Martin Monperrus 00040 00044 #include "Isomap.h" 00045 #include "IsomapTangentLearner.h" 00046 #include <plearn/ker/GeodesicDistanceKernel.h> 00047 #include <plearn/ker/AdditiveNormalizationKernel.h> 00048 00049 00050 namespace PLearn { 00051 using namespace std; 00052 00053 IsomapTangentLearner::IsomapTangentLearner() : n_comp(2), knn(10) 00054 /* ### Initialize all fields to their default value here */ 00055 { 00056 // ... 00057 00058 // ### You may or may not want to call build_() to finish building the object 00059 // build_(); 00060 } 00061 00062 PLEARN_IMPLEMENT_OBJECT(IsomapTangentLearner, "Tangent learning based on Isomap Kernel", "MULTI-LINE \nHELP"); 00063 00064 void IsomapTangentLearner::declareOptions(OptionList& ol) 00065 { 00066 // ### Declare all of this object's options here 00067 // ### For the "flags" of each option, you should typically specify 00068 // ### one of OptionBase::buildoption, OptionBase::learntoption or 00069 // ### OptionBase::tuningoption. Another possible flag to be combined with 00070 // ### is OptionBase::nosave 00071 00072 00073 declareOption(ol, "knn", &IsomapTangentLearner::knn, OptionBase::buildoption, 00074 "Number of nearest neighbor taken into account"); 00075 declareOption(ol, "n_comp", &IsomapTangentLearner::n_comp, OptionBase::buildoption, 00076 "Number of Components"); 00077 declareOption(ol, "iso_learner", &IsomapTangentLearner::iso_learner, OptionBase::learntoption, 00078 "The Isomap Learner"); 00079 00080 // Now call the parent class' declareOptions 00081 inherited::declareOptions(ol); 00082 } 00083 00084 void IsomapTangentLearner::build_() 00085 { 00086 // ### This method should do the real building of the object, 00087 // ### according to set 'options', in *any* situation. 00088 // ### Typical situations include: 00089 // ### - Initial building of an object from a few user-specified options 00090 // ### - Building of a "reloaded" object: i.e. from the complete set of all serialised options. 00091 // ### - Updating or "re-building" of an object after a few "tuning" options have been modified. 00092 // ### You should assume that the parent class' build_() has already been called. 00093 00094 iso_learner.knn = knn; 00095 iso_learner.n_comp = n_comp; 00096 if (train_set) 00097 iso_learner.setTrainingSet(train_set); 00098 iso_learner.build(); 00099 00100 // peut etre qu'il faut un VectatsCollector 00101 00102 // PP<VecStatsCollector> train_stats = new VecStatsCollector(); 00103 // learner->setTrainStatsCollector(train_stats); 00104 // learner->setTrainingSet(trainset); 00105 // learner->train(); 00106 00107 00108 } 00109 00110 // ### Nothing to add here, simply calls build_ 00111 void IsomapTangentLearner::build() 00112 { 00113 inherited::build(); 00114 build_(); 00115 } 00116 00117 00118 void IsomapTangentLearner::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00119 { 00120 inherited::makeDeepCopyFromShallowCopy(copies); 00121 00122 // ### Call deepCopyField on all "pointer-like" fields 00123 // ### that you wish to be deepCopied rather than 00124 // ### shallow-copied. 00125 // ### ex: 00126 // deepCopyField(trainvec, copies); 00127 00128 // ### Remove this line when you have fully implemented this method. 00129 PLERROR("IsomapTangentLearner::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!"); 00130 } 00131 00132 00133 int IsomapTangentLearner::outputsize() const 00134 { 00135 // Compute and return the size of this learner's output (which typically 00136 // may depend on its inputsize(), targetsize() and set options). 00137 return inputsize()*n_comp; 00138 } 00139 00140 void IsomapTangentLearner::forget() 00141 { 00144 00150 } 00151 00152 void IsomapTangentLearner::train() 00153 { 00154 // The role of the train method is to bring the learner up to stage==nstages, 00155 // updating train_stats with training costs measured on-line in the process. 00156 00157 iso_learner.train(); 00158 00159 } 00160 00161 00162 void IsomapTangentLearner::computeOutput(const Vec& input, Vec& output) const 00163 { 00164 // Compute the output from the input. 00165 // int nout = outputsize(); 00166 // output.resize(nout); 00167 // ... 00168 00169 // ici je recupere le GeodesicDistanceKernel 00170 PP<AdditiveNormalizationKernel> ank = dynamic_cast<AdditiveNormalizationKernel*>((Kernel*)iso_learner.kernel); 00171 PP<GeodesicDistanceKernel> gdk = dynamic_cast<GeodesicDistanceKernel*>((Kernel*)ank->source_kernel); 00172 00173 // output.resize(outputsize()); 00174 00175 Mat k_xi_x; 00176 //cout<<input<<endl; 00177 // on fait un knn+1 au cas ou on elve la premiere ligne un peu plus loin 00178 gdk->distance_kernel->computeNearestNeighbors(input, k_xi_x, knn+1); 00179 00180 Mat k_xi_x_sorted; 00181 00182 // we assume that the trainingset contains each exemple only one time 00183 // here we manage the case of computing tangent plane on a point of the training set 00184 if (k_xi_x(0,0) < 1e-9) 00185 k_xi_x_sorted = k_xi_x.subMatRows(1,k_xi_x.length() - 1); 00186 else 00187 k_xi_x_sorted = k_xi_x; 00188 //cout<<k_xi_x.subMatRows(1,knn); 00189 Mat result(n_comp,inputsize()); 00190 00191 Vec dkdx(inputsize()); //dk/dx 00192 Vec temp(inputsize()); 00193 Vec term2(inputsize()); 00194 Vec term1(inputsize()); 00195 00196 // Vec tangentVector(inputsize()); // = sum_i v_ik*dk(i)/dx 00197 00198 int ngn; 00199 VMat trainset = ank->specify_dataset; 00200 int n_examples = trainset->length(); 00201 Mat diK_dx(n_examples,inputsize()); 00202 00203 int i,j,nc; 00204 real D; 00205 00206 term1<<0; 00207 // real seuil = 1e-9; 00208 for(j=0;j<n_examples;++j) 00209 { 00210 ngn = gdk->computeNearestGeodesicNeighbour(j, k_xi_x_sorted);// ngn minimise la distance geodesique entre input et j 00211 trainset->getRow(ngn,temp); 00212 temp << (input-temp); 00213 D = norm(temp) + gdk->geo_distances->get(j,ngn); 00214 // cout<<D<<endl; 00215 // probleme resolu: il faut appeler gdk->distance_kernel->compute... 00216 // cout<<" "<<D*D<<" "<<gdk->evaluate_i_x_from_distances(j,k_xi_x_sorted) <<endl; 00217 //if (norm(temp) > seuil) 00218 term1 += D*(temp)/norm(temp); 00219 } 00220 term1/=n_examples; 00221 00222 for(i=0;i<n_examples;++i) 00223 { 00224 00225 // get the nearest neighbor 00226 ngn = gdk->computeNearestGeodesicNeighbour(i, k_xi_x_sorted); // ngn minimise la distance geodesique entre input et i 00227 00228 trainset->getRow(ngn,temp); 00229 //cout<<i<<"="<<ngn<<"-"<<int(k_xi_x_sorted(ngn,1))<<" "; 00230 temp << (input-temp); // temp = x-xN 00231 // cout<<gdk->evaluate_i_x(i,input,k_xi_x_sorted); 00232 // cout<<norm(temp)<<endl; 00233 term2<<0; 00234 D = norm(temp) + gdk->geo_distances->get(i,ngn); 00235 //if (norm(temp) > seuil) 00236 term2 = D*(temp)/norm(temp); 00237 // else 00238 // term2.fill(0); 00239 00240 00241 //cout<<term2<<endl; 00242 //cout<<sum; 00243 diK_dx(i) << (term1 - term2); // exactement la formule de NIPS 00244 //cout<<diK_dx(i); 00245 } 00246 00247 for(nc=0;nc<n_comp;++nc) 00248 { 00249 // compute the corresponding vector with the Nystrom formula 00250 // d ek / dx = 1/n sum_i dK/dX 00251 00252 // initialisation 00253 temp<<(0); 00254 for(i=0;i<n_examples;++i) 00255 { 00256 temp += (iso_learner.eigenvectors(nc,i) * diK_dx(i)); 00257 } 00258 // on ne normalise pas car c'est la direction vecteur qui nous interesse et pas sa norme 00259 // en plus on normalise tout a 1 dans matlab pour eviter les erreurs numériques. 00260 // result(nc)<<(temp/iso_learner.eigenvalues[nc]); 00261 result(nc)<<(temp); 00262 } 00263 //cout<<result; 00264 // toVec: a mettre dans l'aide 00265 output << result.toVec(); 00266 00267 } 00268 00269 00270 00271 void IsomapTangentLearner::computeCostsFromOutputs(const Vec& input, const Vec& output, 00272 const Vec& target, Vec& costs) const 00273 { 00274 // Compute the costs from *already* computed output. 00275 // ... 00276 } 00277 00278 TVec<string> IsomapTangentLearner::getTestCostNames() const 00279 { 00280 // Return the names of the costs computed by computeCostsFromOutpus 00281 // (these may or may not be exactly the same as what's returned by getTrainCostNames). 00282 // ... 00283 return TVec<string>(); 00284 } 00285 00286 TVec<string> IsomapTangentLearner::getTrainCostNames() const 00287 { 00288 // Return the names of the objective costs that the train method computes and 00289 // for which it updates the VecStatsCollector train_stats 00290 // (these may or may not be exactly the same as what's returned by getTestCostNames). 00291 // ... 00292 return TVec<string>(); 00293 } 00294 00295 00296 } // end of namespace PLearn