Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

KNNVMatrix.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // KNNVMatrix.cc 00004 // 00005 // Copyright (C) 2004 Olivier Delalleau 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: KNNVMatrix.cc,v 1.14 2004/07/21 20:32:40 tihocan Exp $ 00037 ******************************************************* */ 00038 00039 // Authors: Olivier Delalleau 00040 00043 #include <plearn/ker/DistanceKernel.h> 00044 #include "KNNVMatrix.h" 00045 #include "SelectRowsVMatrix.h" 00046 #include "SubVMatrix.h" 00047 00048 namespace PLearn { 00049 using namespace std; 00050 00052 // KNNVMatrix // 00054 KNNVMatrix::KNNVMatrix() 00055 : knn(6), 00056 report_progress(1) 00057 {} 00058 00059 PLEARN_IMPLEMENT_OBJECT(KNNVMatrix, 00060 "A VMatrix that sees the nearest neighbours of each sample in the source VMat.", 00061 "Each sample is followed by its (knn-1) nearest neighbours.\n" 00062 "To each row is appended an additional target, which is:\n" 00063 " - 1 if it is the first of a bag of neighbours,\n" 00064 " - 2 if it is the last of a bag,\n" 00065 " - 0 if it is none of these,\n" 00066 " - 3 if it is both (only for knn == 1).\n" 00067 "In addition, if a kernel_pij kernel is provided,, in the input part of the VMatrix\n" 00068 "is appended p_ij, where\n" 00069 " p_ij = K(x_i,x_j) / \\sum_{k \\in knn(i), k != i} K(x_i,x_k)\n" 00070 "where K = kernel_pij, and j != i (for j == i, p_ij = -1)."); 00071 00073 // declareOptions // 00075 void KNNVMatrix::declareOptions(OptionList& ol) 00076 { 00077 declareOption(ol, "k_nn_mat", &KNNVMatrix::k_nn_mat, OptionBase::buildoption, 00078 "TODO comment"); 00079 00080 declareOption(ol, "knn", &KNNVMatrix::knn, OptionBase::buildoption, 00081 "The number of nearest neighbours to consider (including the point itself)."); 00082 00083 declareOption(ol, "kernel_pij", &KNNVMatrix::kernel_pij, OptionBase::buildoption, 00084 "An optional kernel used to compute the pij weights (see help)."); 00085 00086 declareOption(ol, "report_progress", &KNNVMatrix::report_progress, OptionBase::buildoption, 00087 "TODO comment"); 00088 00089 // Kinda useless to declare it as an option if we recompute it in build(). 00090 // TODO See how to be more efficient. 00091 // declareOption(ol, "nn", &KNNVMatrix::nn, OptionBase::learntoption, 00092 // "The matrix containing the index of the knn nearest neighbours of\n" 00093 // "each data point."); 00094 00095 // Now call the parent class' declareOptions 00096 inherited::declareOptions(ol); 00097 } 00098 00100 // build // 00102 void KNNVMatrix::build() 00103 { 00104 inherited::build(); 00105 build_(); 00106 } 00107 00109 // build_ // 00111 void KNNVMatrix::build_() { 00112 if (source) { 00113 int n = source->length(); 00114 bool recompute_nn = true; 00115 if (k_nn_mat) { 00116 if (k_nn_mat->length() > 0) { 00117 // We are given precomputed k nearest neighbours, what a good news. 00118 if (k_nn_mat->length() == source->length()) { 00119 if (k_nn_mat->width() < knn) { 00120 PLWARNING("In KNNVMatrix::build_ - Not enough neighbours in the given k_nn_mat, will recompute nearest neighbours"); 00121 } else { 00122 // Looks like this is the right thing. 00123 recompute_nn = false; 00124 nn.resize(n, knn); 00125 for (int i = 0; i < n; i++) { 00126 k_nn_mat->getSubRow(i, 0, nn(i)); 00127 } 00128 } 00129 } else { 00130 // Lengths differ: maybe the source VMat is a subset of the matrix 00131 // whose nearest neighbours have been computed. 00132 // Let's try a SelectRowsVMatrix. 00133 PP<SelectRowsVMatrix> smat = dynamic_cast<SelectRowsVMatrix*>((VMatrix*) source); 00134 if (!smat.isNull() && smat->source->length() == k_nn_mat->length()) { 00135 // Bingo ! 00136 // Safety warning just in case it is not what we want. 00137 PLWARNING("In KNNVMatrix::build_ - Will consider the given k_nn_mat has been computed on source's distr VMat"); 00138 recompute_nn = false; 00139 // Now we need to retrieve the nearest neighbours within the SelectRowsVMatrix. 00140 nn.resize(n, knn); 00141 Vec store_nn(k_nn_mat->width()); 00142 for (int i = 0; i < n; i++) { 00143 nn(i,0) = i; // The nearest neighbour is always itself. 00144 k_nn_mat->getRow(smat->indices[i], store_nn); 00145 int k = 1; 00146 for (int j = 1; j < knn; j++) { 00147 bool ok = false; 00148 while (!ok && k < store_nn.length()) { 00149 int q = smat->indices.find(int(store_nn[k])); 00150 if (q >= 0) { 00151 // The k-th nearest neighbour in smat->distr is in smat. 00152 ok = true; 00153 nn(i,j) = q; 00154 } 00155 k++; 00156 } 00157 if (k == store_nn.length()) { 00158 // We didn't find the j-th nearest neighbour. 00159 PLERROR("In KNNVMatrix::build_ - Not enough neighbours in the SelectRowsVMatrix"); 00160 } 00161 } 00162 } 00163 } else { 00164 // Maybe it's a SubVMatrix of the matrix whose nearest neighbours have been computed. 00165 PP<SubVMatrix> smat = dynamic_cast<SubVMatrix*>((VMatrix*) source); 00166 if ( !smat.isNull() 00167 && smat->parent->length() == k_nn_mat->length() 00168 && smat->width() == smat->parent->width()) { 00169 // Bingo ! 00170 // Safety warning just in case it is not what we want. 00171 PLWARNING("In KNNVMatrix::build_ - Will consider the given k_nn_mat has been computed on source's parent VMat"); 00172 recompute_nn = false; 00173 nn.resize(n, knn); 00174 Vec store_nn(k_nn_mat->width()); 00175 for (int i = 0; i < n; i++) { 00176 nn(i,0) = i; // The nearest neighbour is always itself. 00177 k_nn_mat->getRow(i + smat->istart, store_nn); 00178 int k = 1; 00179 for (int j = 1; j < knn; j++) { 00180 bool ok = false; 00181 while (!ok && k < store_nn.length()) { 00182 int q = int(store_nn[k]) - smat->istart; 00183 if (q >= 0 && q < smat->length()) { 00184 // The k-th nearest neighbour in smat->parent is in smat. 00185 ok = true; 00186 nn(i,j) = q - smat->istart; 00187 } 00188 k++; 00189 } 00190 if (k == store_nn.length()) { 00191 // We didn't find the j-th nearest neighbour. 00192 PLERROR("In KNNVMatrix::build_ - Not enough neighbours in the SubVMatrix"); 00193 } 00194 00195 } 00196 } 00197 } else { 00198 // What the hell is this ? 00199 PLWARNING("In KNNVMatrix::build_ - Don't know what to do with k_nn_mat, will recompute the nearest neighbours"); 00200 } 00201 } 00202 } 00203 } 00204 } 00205 00206 if (recompute_nn) { 00207 // First make sure we can store the result if needed. 00208 if (k_nn_mat) { 00209 if (k_nn_mat->length() > 0) { 00210 PLERROR("In KNNVMatrix::build_ - The given k_nn_mat already has data, free it first"); 00211 } 00212 } 00213 // Compute the pairwise distances. 00214 DistanceKernel dk(2); 00215 if (report_progress) { 00216 dk.report_progress = true; 00217 dk.build(); 00218 } 00219 dk.setDataForKernelMatrix(source); 00220 Mat distances(n,n); 00221 dk.computeGramMatrix(distances); 00222 // Deduce the nearest neighbours. 00223 nn = dk.computeNeighbourMatrixFromDistanceMatrix(distances); 00224 // Only keep the (knn) nearest ones. 00225 // TODO Free the memory used by the other neighbours. 00226 // TODO Make the matrix be a TMat<int> instead of a Mat. 00227 nn.resize(n, knn); 00228 // Store the result. 00229 if (k_nn_mat) { 00230 for (int i = 0; i < n; i++) { 00231 k_nn_mat->appendRow(nn(i)); 00232 } 00233 } 00234 } 00235 00236 // Initialize correctly the various fields. 00237 targetsize_ = source->targetsize() + 1; 00238 length_ = n * knn; 00239 width_ = source->width() + 1; 00240 setMetaInfoFromSource(); 00241 00242 // Compute the p_ij if needed. 00243 if (kernel_pij) { 00244 // TODO REPORT PROGRESS IF NEEDED. 00245 inputsize_++; 00246 width_++; 00247 kernel_pij->setDataForKernelMatrix(source); 00248 int n = source->length(); 00249 pij.resize(n, knn-1); 00250 for (int i = 0; i < n; i++) { 00251 real sum = 0; 00252 real k_ij; 00253 for (int j = 1; j < knn; j++) { 00254 // We omit the first nearest neighbour, which is the point itself. 00255 k_ij = kernel_pij->evaluate_i_j(i, int(nn(i,j))); 00256 pij(i,j-1) = k_ij; 00257 sum += k_ij; 00258 } 00259 pij.row(i) /= sum; 00260 } 00261 } 00262 } 00263 } 00264 00266 // makeDeepCopyFromShallowCopy // 00268 void KNNVMatrix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00269 { 00270 inherited::makeDeepCopyFromShallowCopy(copies); 00271 00272 // ### Call deepCopyField on all "pointer-like" fields 00273 // ### that you wish to be deepCopied rather than 00274 // ### shallow-copied. 00275 // ### ex: 00276 // deepCopyField(trainvec, copies); 00277 00278 deepCopyField(source_row, copies); 00279 deepCopyField(nn, copies); 00280 deepCopyField(pij, copies); 00281 // Currently commented out because some of the VMats used for k_nn_mat 00282 // may not implement deep copy correctly. 00283 // TODO Put back when other VMats are fine. 00284 // deepCopyField(k_nn_mat, copies); 00285 deepCopyField(kernel_pij, copies); 00286 00287 PLWARNING("In KNNVMatrix::makeDeepCopyFromShallowCopy - k_nn_mat will not be deep copied"); 00288 // PLERROR("KNNVMatrix::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!"); 00289 } 00290 00292 // getSourceIndexOf // 00294 int KNNVMatrix::getSourceIndexOf(int i, int& i_ref, int& i_n) const { 00295 i_ref = i / knn; 00296 i_n = i % knn; 00297 int i_neighbour_source = int(nn(i_ref, i_n)); 00298 return i_neighbour_source; 00299 } 00300 00302 // getNewRow // 00304 void KNNVMatrix::getNewRow(int i, const Vec& v) const { 00305 source_row.resize(source->width()); 00306 int i_n; 00307 int i_ref; 00308 int real_i = getSourceIndexOf(i, i_ref, i_n); 00309 source->getRow(real_i, source_row); 00310 if (kernel_pij) { 00311 v.subVec(0, source->inputsize()) << source_row.subVec(0, source->inputsize()); 00312 if (i_n > 0) { 00313 v[source->inputsize()] = pij(i_ref, i_n - 1); 00314 } else { 00315 v[source->inputsize()] = -1; 00316 } 00317 } else { 00318 v.subVec(0, source->inputsize() + source->targetsize()) 00319 << source_row.subVec(0, source->inputsize() + source->targetsize()); 00320 } 00321 v.subVec(inputsize(), source->targetsize()) 00322 << source_row.subVec(source->inputsize(), source->targetsize()); 00323 v[inputsize() + source->targetsize()] = getTag(i_n); 00324 if (weightsize() > 0) { 00325 v.subVec(inputsize() + targetsize(), weightsize()) 00326 << source_row.subVec(source->inputsize() + source->targetsize(), source->weightsize()); 00327 } 00328 } 00329 00331 // getTag // 00333 int KNNVMatrix::getTag(int p) const { 00334 // TODO Better use the constants defined in SumOverBagsVariable.h. 00335 if (knn == 1) return 3; 00336 if (p == 0) return 1; 00337 if (p == knn - 1) return 2; 00338 return 0; 00339 } 00340 00341 } // end of namespace PLearn

Generated on Tue Aug 17 15:56:52 2004 for PLearn by doxygen 1.3.7