Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

GenerateDecisionPlot.cc

Go to the documentation of this file.
00001 00002 // -*- C++ -*- 00003 00004 // GenerateDecisionPlot.cc 00005 // 00006 // Copyright (C) 2003 Pascal Vincent 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 /* ******************************************************* 00037 * $Id: GenerateDecisionPlot.cc,v 1.8 2004/07/21 16:30:57 chrish42 Exp $ 00038 ******************************************************* */ 00039 00041 #include "GenerateDecisionPlot.h" 00042 #include <plearn/math/VecStatsCollector.h> 00043 #include <plearn/vmat/VMat_maths.h> 00044 00045 namespace PLearn { 00046 using namespace std; 00047 00048 00053 void DX_write_2D_fields(ostream& out, const string& basename, TVec<Mat> fields, real x0, real y0, real deltax, real deltay, 00054 TVec<string> fieldnames=TVec<string>()) 00055 { 00056 int nfields = fields.length(); 00057 int nx = fields[0].length(); 00058 int ny = fields[0].width(); 00059 00060 string posname = string("\"") + basename + "_gridpos\""; 00061 00062 out << "object " << posname << " class gridpositions counts " << nx << " " << ny << "\n" 00063 << "origin " << x0 << " " << y0 << "\n" 00064 << "delta " << deltax << " 0 \n" 00065 << "delta 0 " << deltay << " \n\n\n"; 00066 00067 string conname = string("\"") + basename + "_gridcon\""; 00068 00069 out << "object " << conname << " class gridconnections counts " << nx << " " << ny << "\n" 00070 // << "attribute \"element type\" string \"cubes\" \n" 00071 << "attribute \"ref\" string \"positions\" \n\n\n"; 00072 00073 for(int k=0; k<nfields; k++) 00074 { 00075 Mat& m = fields[k]; 00076 string fieldname = tostring(k); 00077 if(fieldnames) 00078 fieldname = fieldnames[k]; 00079 00080 string dataname = string("\"") + basename + "_" + fieldname + "_data\""; 00081 00082 out << "object " << dataname << " class array type float rank 0 items " << nx*ny << " data follows \n"; 00083 for(int i=0; i<nx; i++) 00084 { 00085 for(int j=0; j<ny; j++) 00086 out << m(i,j) << " "; 00087 out << "\n"; 00088 } 00089 out << "attribute \"dep\" string \"positions\" \n\n\n"; 00090 00091 out << "object \"" << fieldname << "\" class field \n" 00092 << "component \"positions\" " << posname << " \n" 00093 << "component \"connections\" " << conname << " \n" 00094 << "component \"data\" " << dataname << " \n\n\n"; 00095 } 00096 } 00097 00098 00099 void DX_write_2D_fields(ostream& out, const string& basename, Vec X, Vec Y, TVec<Mat> fields) 00100 { 00101 int nfields = fields.length(); 00102 int nx = fields[0].length(); 00103 int ny = fields[0].width(); 00104 00105 /* 00106 out << "object \"" << basename << "_X\" class array type float rank 0 items " << nx << " data follows \n"; 00107 for(int i=0; i<nx; i++) 00108 out << X[i] << "\n"; 00109 out << "\n\n"; 00110 00111 out << "object \"" << basename << "_Y\" class array type float rank 0 items " << ny << " data follows \n"; 00112 for(int i=0; i<ny; i++) 00113 out << Y[i] << "\n"; 00114 */ 00115 00116 string posname = string("\"") + basename + "_gridpos\""; 00117 out << "object " << posname << " class array type float rank 1 shape 2 items " << nx*ny << " data follows\n"; 00118 for(int i=0; i<nx; i++) 00119 for(int j=0; j<ny; j++) 00120 out << X[i] << " " << Y[j] << "\n"; 00121 out << "\n\n"; 00122 00123 string conname = string("\"") + basename + "_gridcon\""; 00124 out << "object " << conname << " class gridconnections counts " << nx << " " << ny << "\n" 00125 // << "attribute \"element type\" string \"cubes\" \n" 00126 << "attribute \"ref\" string \"positions\" \n\n\n"; 00127 00128 for(int k=0; k<nfields; k++) 00129 { 00130 Mat& m = fields[k]; 00131 string fieldname = "output" + tostring(k); 00132 string dataname = string("\"") + basename + "_" + fieldname + "_data\""; 00133 00134 out << "object " << dataname << " class array type float rank 0 items " << nx*ny << " data follows \n"; 00135 for(int i=0; i<nx; i++) 00136 { 00137 for(int j=0; j<ny; j++) 00138 out << m(i,j) << " "; 00139 out << "\n"; 00140 } 00141 out << "attribute \"dep\" string \"positions\" \n\n\n"; 00142 00143 out << "object \"" << fieldname << "\" class field \n" 00144 << "component \"positions\" " << posname << " \n" 00145 << "component \"connections\" " << conname << " \n" 00146 << "component \"data\" " << dataname << " \n\n\n"; 00147 } 00148 } 00149 00150 00151 TVec<Mat> computeOutputFields(PP<PLearner> learner, Vec X, Vec Y) 00152 { 00153 int noutputs = learner->outputsize(); 00154 00155 int nx = X.length(); 00156 int ny = Y.length(); 00157 int nfields = noutputs; 00158 TVec<Mat> fields(nfields); 00159 00160 for(int k=0; k<nfields; k++) 00161 fields[k].resize(nx,ny); 00162 00163 Vec input(2); 00164 Vec output(noutputs); 00165 00166 ProgressBar pb("Computing " + tostring(nx) + " x " + tostring(ny) + " output field",nx*ny); 00167 00168 for(int i=0; i<nx; i++) 00169 for(int j=0; j<ny; j++) 00170 { 00171 input[0] = X[i]; 00172 input[1] = Y[j]; 00173 learner->computeOutput(input,output); 00174 // cerr << "in: " << input << " out: " << output << endl; 00175 for(int k=0; k<noutputs; k++) 00176 fields[k](i,j) = output[k]; 00177 pb.update(i*nx+j); 00178 } 00179 00180 return fields; 00181 } 00182 00183 00184 TVec<Mat> computeOutputFields(PP<PLearner> learner, int nx, int ny, real x0, real y0, real deltax, real deltay) 00185 { 00186 int noutputs = learner->outputsize(); 00187 int nfields = noutputs; 00188 00189 TVec<Mat> fields(nfields); 00190 for(int k=0; k<nfields; k++) 00191 fields[k].resize(nx,ny); 00192 00193 Vec input(2); 00194 Vec output(noutputs); 00195 00196 ProgressBar pb("Computing " + tostring(nx) + " x " + tostring(ny) + " output field",nx*ny); 00197 00198 real x = x0; 00199 real y = y0; 00200 for(int i=0; i<nx; i++, x+=deltax) 00201 for(int j=0; j<ny; j++, y+=deltay) 00202 { 00203 input[0] = x; 00204 input[1] = y; 00205 learner->computeOutput(input,output); 00206 // cerr << "in: " << input << " out: " << output << endl; 00207 for(int k=0; k<noutputs; k++) 00208 fields[k](i,j) = output[k]; 00209 pb.update(i*nx+j); 00210 } 00211 00212 return fields; 00213 } 00214 00215 // Finds appropriate x0, y0, deltax, deltay from the dataset range, computes the fields and returns them 00216 // extraspace of .10 means we'll look 10% beyond the data range on every side 00217 TVec<Mat> computeOutputFieldsAutoRange(PP<PLearner> learner, VMat dataset, int nx, int ny, 00218 real& x0, real& y0, real& deltax, real& deltay, real extraspace=.10) 00219 { 00220 Vec minv(2); 00221 Vec maxv(2); 00222 computeRange(dataset.subMatColumns(0,2), minv, maxv); 00223 real extrax = (maxv[0]-minv[0])*extraspace; 00224 x0 = minv[0]-extrax; 00225 deltax = (maxv[0]+extrax-x0)/nx; 00226 real extray = (maxv[1]-minv[1])*extraspace; 00227 y0 = minv[1]-extray; 00228 deltay = (maxv[1]+extray-y0)/ny; 00229 return computeOutputFields(learner, nx, ny, x0, y0, deltax, deltay); 00230 } 00231 00232 00233 void computeXYPositions(VMat dataset, int nx, int ny, Vec& X, Vec& Y, real extraspace=.10) 00234 { 00235 Vec minv(2); 00236 Vec maxv(2); 00237 computeRange(dataset.subMatColumns(0,2), minv, maxv); 00238 real extrax = (maxv[0]-minv[0])*extraspace; 00239 real x0 = minv[0]-extrax; 00240 real deltax = (maxv[0]+extrax-x0)/nx; 00241 real extray = (maxv[1]-minv[1])*extraspace; 00242 real y0 = minv[1]-extray; 00243 real deltay = (maxv[1]+extray-y0)/ny; 00244 00245 set<real> xpos; 00246 set<real> ypos; 00247 int l = dataset.length(); 00248 Vec datapoint(2); 00249 for(int i=0; i<l; i++) 00250 { 00251 dataset->getRow(i,datapoint); 00252 xpos.insert(datapoint[0]); 00253 ypos.insert(datapoint[1]); 00254 } 00255 real x = x0; 00256 for(int i=0; i<nx; i++, x+=deltax) 00257 xpos.insert(x); 00258 real y = y0; 00259 for(int j=0; j<ny; j++, y+=deltay) 00260 ypos.insert(y); 00261 set<real>::iterator it; 00262 X.resize(xpos.size()); 00263 real* xptr = X.data(); 00264 it = xpos.begin(); 00265 while(it!=xpos.end()) 00266 *xptr++ = *it++; 00267 Y.resize(ypos.size()); 00268 real* yptr = Y.data(); 00269 it = ypos.begin(); 00270 while(it!=ypos.end()) 00271 *yptr++ = *it++; 00272 } 00273 00274 00275 00278 void DX_create_dataset_outputs_file(const string& filename, PP<PLearner> learner, VMat dataset) 00279 { 00280 ofstream out(filename.c_str()); 00281 00282 int l = dataset.length(); 00283 int inputsize = learner->inputsize(); 00284 int targetsize = learner->targetsize(); 00285 int outputsize = learner->outputsize(); 00286 00287 // First write data points (input -> target, output) 00288 Vec input(inputsize); 00289 Vec target(targetsize); 00290 real weight; 00291 Vec output(outputsize); 00292 00293 // write 2D positions 00294 out << "object \"dset_pos\" class array type float rank 1 shape " << inputsize << " items " << l << " data follows \n"; 00295 for(int i=0; i<l; i++) 00296 { 00297 dataset->getExample(i,input,target,weight); 00298 for(int j=0; j<inputsize; j++) 00299 out << input[j] << " "; 00300 out << "\n"; 00301 } 00302 out << "\n\n\n"; 00303 00304 // Now write data for those positions (target and output) 00305 if(targetsize+outputsize>0) 00306 { 00307 ProgressBar pb("Computing outputs for dataset points",l); 00308 out << "object \"dset_value\" class array type float rank 1 shape " << targetsize+outputsize << " items " << l << " data follows \n"; 00309 for(int i=0; i<l; i++) 00310 { 00311 dataset->getExample(i,input,target,weight); 00312 for(int j=0; j<targetsize; j++) 00313 out << target[j] << " "; 00314 learner->computeOutput(input, output); 00315 for(int j=0; j<outputsize; j++) 00316 out << output[j] << " "; 00317 out << "\n"; 00318 pb.update(i); 00319 } 00320 out << "attribute \"dep\" string \"positions\" \n\n\n"; 00321 } 00322 00323 // Field is created with two components: "positions" and "data" 00324 out << "object \"dset\" class field \n" 00325 << "component \"positions\" \"dset_pos\" \n"; 00326 if(targetsize+outputsize>0) 00327 out << "component \"data\" \"dset_value\" \n"; 00328 out << "\n\n\n"; 00329 00330 00331 00332 out << "end" << endl; 00333 } 00334 00335 00342 00343 void DX_create_grid_outputs_file(const string& filename, PP<PLearner> learner, VMat dataset, 00344 int nx, int ny, bool include_datapoint_grid=false, 00345 real xmin=MISSING_VALUE, real xmax=MISSING_VALUE, 00346 real ymin=MISSING_VALUE, real ymax=MISSING_VALUE, 00347 real extraspace=.10) 00348 { 00349 ofstream out(filename.c_str()); 00350 00351 double logsum = -FLT_MAX; 00352 00353 int l = dataset.length(); 00354 int inputsize = learner->inputsize(); 00355 int targetsize = learner->targetsize(); 00356 int outputsize = learner->outputsize(); 00357 00358 Vec input(inputsize); 00359 Vec target(targetsize); 00360 real weight; 00361 Vec output(outputsize); 00362 00363 // Create the grid field 00364 00365 set<real> xpos; 00366 set<real> ypos; 00367 00368 // First the regular grid coordinates 00369 Vec minv(2); 00370 Vec maxv(2); 00371 computeRange(dataset.subMatColumns(0,2), minv, maxv); 00372 real extrax = (maxv[0]-minv[0])*extraspace; 00373 real extray = (maxv[1]-minv[1])*extraspace; 00374 if(is_missing(xmin)) 00375 xmin = minv[0]-extrax; 00376 if(is_missing(xmax)) 00377 xmax = maxv[0]+extrax; 00378 if(is_missing(ymin)) 00379 ymin = minv[1]-extray; 00380 if(is_missing(ymax)) 00381 ymax = maxv[1]+extray; 00382 real deltax = (xmax-xmin)/nx; 00383 real deltay = (ymax-ymin)/ny; 00384 00385 real x = xmin; 00386 for(int i=0; i<nx; i++, x+=deltax) 00387 xpos.insert(x); 00388 real y = ymin; 00389 for(int j=0; j<ny; j++, y+=deltay) 00390 ypos.insert(y); 00391 00392 // also include irregular grid coordinates based on coordinates of dataset points? 00393 if(include_datapoint_grid) 00394 { 00395 for(int i=0; i<l; i++) 00396 { 00397 dataset->getExample(i,input,target,weight); 00398 x = input[0]; 00399 y = input[1]; 00400 if(x>xmin && x<xmax) 00401 xpos.insert(x); 00402 if(y>ymin && y<ymax) 00403 ypos.insert(y); 00404 } 00405 } 00406 00407 nx = xpos.size(); 00408 ny = ypos.size(); 00409 set<real>::iterator itx; 00410 set<real>::iterator ity; 00411 00412 out << "object \"outputs_gridpos\" class array type float rank 1 shape 2 items " << nx*ny << " data follows\n"; 00413 for(itx=xpos.begin(); itx!=xpos.end(); ++itx) 00414 for(ity=ypos.begin(); ity!=ypos.end(); ++ity) 00415 out << *itx << " " << *ity << "\n"; 00416 out << "\n\n"; 00417 00418 out << "object \"outputs_gridcon\" class gridconnections counts " << nx << " " << ny << "\n" 00419 // << "attribute \"element type\" string \"cubes\" \n" 00420 << "attribute \"ref\" string \"positions\" \n\n\n"; 00421 00422 out << "object \"outputs_values\" class array type float rank 1 shape " << outputsize << " items " << nx*ny << " data follows \n"; 00423 00424 ProgressBar pb("Computing outputs for grid positions: " + tostring(nx)+"x"+tostring(ny), nx*ny); 00425 int n = 0; 00426 for(itx=xpos.begin(); itx!=xpos.end(); ++itx) 00427 { 00428 input[0] = *itx; 00429 for(ity=ypos.begin(); ity!=ypos.end(); ++ity) 00430 { 00431 input[1] = *ity; 00432 learner->computeOutput(input, output); 00433 for(int j=0; j<outputsize; j++) 00434 out << output[j] << " "; 00435 out << "\n"; 00436 if(logsum==-FLT_MAX) 00437 logsum = output[0]; 00438 else 00439 logsum = logadd(logsum, output[0]); 00440 pb.update(n++); 00441 } 00442 } 00443 pb.close(); 00444 out << "attribute \"dep\" string \"positions\" \n\n\n"; 00445 00446 out << "object \"outputs\" class field \n" 00447 << "component \"positions\" \"outputs_gridpos\" \n" 00448 << "component \"connections\" \"outputs_gridcon\" \n" 00449 << "component \"data\" \"outputs_values\" \n\n\n"; 00450 00451 out << "end" << endl; 00452 00453 double surfelem = deltax*deltay; 00454 double surfintegral = exp(logsum)*surfelem; 00455 cerr << "Estimated integral over sampled domain: " << surfintegral << endl; 00456 } 00457 00458 00459 GenerateDecisionPlot::GenerateDecisionPlot() 00460 :basename("dxplot"), 00461 nx(10), ny(10), 00462 include_datapoint_grid(false), 00463 xmin(MISSING_VALUE), 00464 xmax(MISSING_VALUE), 00465 ymin(MISSING_VALUE), 00466 ymax(MISSING_VALUE) 00467 { 00468 } 00469 00470 PLEARN_IMPLEMENT_OBJECT(GenerateDecisionPlot, "ONE LINE DESCR", "NO HELP"); 00471 00472 void GenerateDecisionPlot::declareOptions(OptionList& ol) 00473 { 00474 // ### Declare all of this object's options here 00475 // ### For the "flags" of each option, you should typically specify 00476 // ### one of OptionBase::buildoption, OptionBase::learntoption or 00477 // ### OptionBase::tuningoption. Another possible flag to be combined with 00478 // ### is OptionBase::nosave 00479 00480 declareOption(ol, "basename", &GenerateDecisionPlot::basename, OptionBase::buildoption, 00481 "Base name of the .dx data file to generate. Running this class will generate\n" 00482 "files basename_dset.dx containing targets and outputs for the given dataset positions\n" 00483 "and basename_outputs.dx containing outputs computed at grid positions\n"); 00484 declareOption(ol, "learner", &GenerateDecisionPlot::learner, OptionBase::buildoption, 00485 "The learner to train/test. Its train_set will be used as the dataset on\n" 00486 "which to base this decision plot (ranges are inferred from it, and decisions\n" 00487 "on the training points are written in basename_dset.dx"); 00488 declareOption(ol, "nx", &GenerateDecisionPlot::nx, OptionBase::buildoption, 00489 "Number of x sample coordinates (grid)"); 00490 declareOption(ol, "ny", &GenerateDecisionPlot::ny, OptionBase::buildoption, 00491 "Number of y sample coordinates (grid)"); 00492 declareOption(ol, "include_datapoint_grid", &GenerateDecisionPlot::include_datapoint_grid, OptionBase::buildoption, 00493 ""); 00494 declareOption(ol, "xmin", &GenerateDecisionPlot::xmin, OptionBase::buildoption, 00495 ""); 00496 declareOption(ol, "xmax", &GenerateDecisionPlot::xmax, OptionBase::buildoption, 00497 ""); 00498 declareOption(ol, "ymin", &GenerateDecisionPlot::ymin, OptionBase::buildoption, 00499 ""); 00500 declareOption(ol, "ymax", &GenerateDecisionPlot::ymax, OptionBase::buildoption, 00501 ""); 00502 declareOption(ol, "save_learner_as", &GenerateDecisionPlot::save_learner_as, OptionBase::buildoption, 00503 "(Optionally) save trained learner in this file (.psave)"); 00504 00505 // Now call the parent class' declareOptions 00506 inherited::declareOptions(ol); 00507 } 00508 00509 void GenerateDecisionPlot::build_() 00510 { 00511 // ### This method should do the real building of the object, 00512 // ### according to set 'options', in *any* situation. 00513 // ### Typical situations include: 00514 // ### - Initial building of an object from a few user-specified options 00515 // ### - Building of a "reloaded" object: i.e. from the complete set of all serialised options. 00516 // ### - Updating or "re-building" of an object after a few "tuning" options have been modified. 00517 // ### You should assume that the parent class' build_() has already been called. 00518 } 00519 00521 void GenerateDecisionPlot::run() 00522 { 00523 VMat dataset = learner->getTrainingSet(); 00524 learner->setTrainStatsCollector(new VecStatsCollector()); 00525 learner->train(); 00526 00527 if(save_learner_as!="") 00528 { 00529 cerr << "Saving trained learner in file " << save_learner_as << endl; 00530 PLearn::save(save_learner_as, *learner); 00531 } 00532 00533 string dset_fname = basename+"_dset.dx"; 00534 cerr << "Computing and writing dataset output field to file " << dset_fname << endl; 00535 DX_create_dataset_outputs_file(dset_fname, learner, dataset); 00536 00537 string outputs_fname = basename+"_outputs.dx"; 00538 cerr << "Computing and writing grid output field to file " << outputs_fname << endl; 00539 DX_create_grid_outputs_file(outputs_fname, learner, dataset, nx, ny, 00540 include_datapoint_grid, 00541 xmin, xmax, ymin, ymax); 00542 cerr << "You can now view those files with OpenDX." << endl; 00543 } 00544 00545 00546 // ### Nothing to add here, simply calls build_ 00547 void GenerateDecisionPlot::build() 00548 { 00549 inherited::build(); 00550 build_(); 00551 } 00552 00553 00554 void GenerateDecisionPlot::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00555 { 00556 inherited::makeDeepCopyFromShallowCopy(copies); 00557 } 00558 00559 } // end of namespace PLearn

Generated on Tue Aug 17 15:54:17 2004 for PLearn by doxygen 1.3.7