Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

Grapher.cc

Go to the documentation of this file.
00001 00002 // -*- C++ -*- 00003 00004 // Grapher.cc 00005 // 00006 // Copyright (C) 2003 Pascal Vincent 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 /* ******************************************************* 00037 * $Id: Grapher.cc,v 1.8 2004/07/21 16:30:57 chrish42 Exp $ 00038 ******************************************************* */ 00039 00041 #include "Grapher.h" 00042 #include <plearn/math/VecStatsCollector.h> 00043 #include <plearn/vmat/VMat_maths.h> 00044 #include <plearn/display/GhostScript.h> 00045 #include <plearn/display/Gnuplot.h> 00046 #include <plearn/vmat/RegularGridVMatrix.h> 00047 00048 namespace PLearn { 00049 using namespace std; 00050 00051 00056 void DX_write_2D_fields(ostream& out, const string& basename, TVec<Mat> fields, real x0, real y0, real deltax, real deltay, 00057 TVec<string> fieldnames=TVec<string>()) 00058 { 00059 int nfields = fields.length(); 00060 int nx = fields[0].length(); 00061 int ny = fields[0].width(); 00062 00063 string posname = string("\"") + basename + "_gridpos\""; 00064 00065 out << "object " << posname << " class gridpositions counts " << nx << " " << ny << "\n" 00066 << "origin " << x0 << " " << y0 << "\n" 00067 << "delta " << deltax << " 0 \n" 00068 << "delta 0 " << deltay << " \n\n\n"; 00069 00070 string conname = string("\"") + basename + "_gridcon\""; 00071 00072 out << "object " << conname << " class gridconnections counts " << nx << " " << ny << "\n" 00073 // << "attribute \"element type\" string \"cubes\" \n" 00074 << "attribute \"ref\" string \"positions\" \n\n\n"; 00075 00076 for(int k=0; k<nfields; k++) 00077 { 00078 Mat& m = fields[k]; 00079 string fieldname = tostring(k); 00080 if(fieldnames) 00081 fieldname = fieldnames[k]; 00082 00083 string dataname = string("\"") + basename + "_" + fieldname + "_data\""; 00084 00085 out << "object " << dataname << " class array type float rank 0 items " << nx*ny << " data follows \n"; 00086 for(int i=0; i<nx; i++) 00087 { 00088 for(int j=0; j<ny; j++) 00089 out << m(i,j) << " "; 00090 out << "\n"; 00091 } 00092 out << "attribute \"dep\" string \"positions\" \n\n\n"; 00093 00094 out << "object \"" << fieldname << "\" class field \n" 00095 << "component \"positions\" " << posname << " \n" 00096 << "component \"connections\" " << conname << " \n" 00097 << "component \"data\" " << dataname << " \n\n\n"; 00098 } 00099 } 00100 00101 00102 void DX_write_2D_fields(ostream& out, const string& basename, Vec X, Vec Y, TVec<Mat> fields) 00103 { 00104 int nfields = fields.length(); 00105 int nx = fields[0].length(); 00106 int ny = fields[0].width(); 00107 00108 /* 00109 out << "object \"" << basename << "_X\" class array type float rank 0 items " << nx << " data follows \n"; 00110 for(int i=0; i<nx; i++) 00111 out << X[i] << "\n"; 00112 out << "\n\n"; 00113 00114 out << "object \"" << basename << "_Y\" class array type float rank 0 items " << ny << " data follows \n"; 00115 for(int i=0; i<ny; i++) 00116 out << Y[i] << "\n"; 00117 */ 00118 00119 string posname = string("\"") + basename + "_gridpos\""; 00120 out << "object " << posname << " class array type float rank 1 shape 2 items " << nx*ny << " data follows\n"; 00121 for(int i=0; i<nx; i++) 00122 for(int j=0; j<ny; j++) 00123 out << X[i] << " " << Y[j] << "\n"; 00124 out << "\n\n"; 00125 00126 string conname = string("\"") + basename + "_gridcon\""; 00127 out << "object " << conname << " class gridconnections counts " << nx << " " << ny << "\n" 00128 // << "attribute \"element type\" string \"cubes\" \n" 00129 << "attribute \"ref\" string \"positions\" \n\n\n"; 00130 00131 for(int k=0; k<nfields; k++) 00132 { 00133 Mat& m = fields[k]; 00134 string fieldname = "output" + tostring(k); 00135 string dataname = string("\"") + basename + "_" + fieldname + "_data\""; 00136 00137 out << "object " << dataname << " class array type float rank 0 items " << nx*ny << " data follows \n"; 00138 for(int i=0; i<nx; i++) 00139 { 00140 for(int j=0; j<ny; j++) 00141 out << m(i,j) << " "; 00142 out << "\n"; 00143 } 00144 out << "attribute \"dep\" string \"positions\" \n\n\n"; 00145 00146 out << "object \"" << fieldname << "\" class field \n" 00147 << "component \"positions\" " << posname << " \n" 00148 << "component \"connections\" " << conname << " \n" 00149 << "component \"data\" " << dataname << " \n\n\n"; 00150 } 00151 } 00152 00153 00154 TVec<Mat> computeOutputFields(PP<PLearner> learner, Vec X, Vec Y) 00155 { 00156 int noutputs = learner->outputsize(); 00157 00158 int nx = X.length(); 00159 int ny = Y.length(); 00160 int nfields = noutputs; 00161 TVec<Mat> fields(nfields); 00162 00163 for(int k=0; k<nfields; k++) 00164 fields[k].resize(nx,ny); 00165 00166 Vec input(2); 00167 Vec output(noutputs); 00168 00169 ProgressBar pb("Computing " + tostring(nx) + " x " + tostring(ny) + " output field",nx*ny); 00170 00171 for(int i=0; i<nx; i++) 00172 for(int j=0; j<ny; j++) 00173 { 00174 input[0] = X[i]; 00175 input[1] = Y[j]; 00176 learner->computeOutput(input,output); 00177 // cerr << "in: " << input << " out: " << output << endl; 00178 for(int k=0; k<noutputs; k++) 00179 fields[k](i,j) = output[k]; 00180 pb.update(i*nx+j); 00181 } 00182 00183 return fields; 00184 } 00185 00186 00187 TVec<Mat> computeOutputFields(PP<PLearner> learner, int nx, int ny, real x0, real y0, real deltax, real deltay) 00188 { 00189 int noutputs = learner->outputsize(); 00190 int nfields = noutputs; 00191 00192 TVec<Mat> fields(nfields); 00193 for(int k=0; k<nfields; k++) 00194 fields[k].resize(nx,ny); 00195 00196 Vec input(2); 00197 Vec output(noutputs); 00198 00199 ProgressBar pb("Computing " + tostring(nx) + " x " + tostring(ny) + " output field",nx*ny); 00200 00201 real x = x0; 00202 real y = y0; 00203 for(int i=0; i<nx; i++, x+=deltax) 00204 for(int j=0; j<ny; j++, y+=deltay) 00205 { 00206 input[0] = x; 00207 input[1] = y; 00208 learner->computeOutput(input,output); 00209 // cerr << "in: " << input << " out: " << output << endl; 00210 for(int k=0; k<noutputs; k++) 00211 fields[k](i,j) = output[k]; 00212 pb.update(i*nx+j); 00213 } 00214 00215 return fields; 00216 } 00217 00218 // Finds appropriate x0, y0, deltax, deltay from the dataset range, computes the fields and returns them 00219 // extraspace of .10 means we'll look 10% beyond the data range on every side 00220 TVec<Mat> computeOutputFieldsAutoRange(PP<PLearner> learner, VMat dataset, int nx, int ny, 00221 real& x0, real& y0, real& deltax, real& deltay, real extraspace=.10) 00222 { 00223 Vec minv(2); 00224 Vec maxv(2); 00225 computeRange(dataset.subMatColumns(0,2), minv, maxv); 00226 real extrax = (maxv[0]-minv[0])*extraspace; 00227 x0 = minv[0]-extrax; 00228 deltax = (maxv[0]+extrax-x0)/nx; 00229 real extray = (maxv[1]-minv[1])*extraspace; 00230 y0 = minv[1]-extray; 00231 deltay = (maxv[1]+extray-y0)/ny; 00232 return computeOutputFields(learner, nx, ny, x0, y0, deltax, deltay); 00233 } 00234 00235 00236 void computeXYPositions(VMat dataset, int nx, int ny, Vec& X, Vec& Y, real extraspace=.10) 00237 { 00238 Vec minv(2); 00239 Vec maxv(2); 00240 computeRange(dataset.subMatColumns(0,2), minv, maxv); 00241 real extrax = (maxv[0]-minv[0])*extraspace; 00242 real x0 = minv[0]-extrax; 00243 real deltax = (maxv[0]+extrax-x0)/nx; 00244 real extray = (maxv[1]-minv[1])*extraspace; 00245 real y0 = minv[1]-extray; 00246 real deltay = (maxv[1]+extray-y0)/ny; 00247 00248 set<real> xpos; 00249 set<real> ypos; 00250 int l = dataset.length(); 00251 Vec datapoint(2); 00252 for(int i=0; i<l; i++) 00253 { 00254 dataset->getRow(i,datapoint); 00255 xpos.insert(datapoint[0]); 00256 ypos.insert(datapoint[1]); 00257 } 00258 real x = x0; 00259 for(int i=0; i<nx; i++, x+=deltax) 00260 xpos.insert(x); 00261 real y = y0; 00262 for(int j=0; j<ny; j++, y+=deltay) 00263 ypos.insert(y); 00264 set<real>::iterator it; 00265 X.resize((int)xpos.size()); 00266 real* xptr = X.data(); 00267 it = xpos.begin(); 00268 while(it!=xpos.end()) 00269 *xptr++ = *it++; 00270 Y.resize((int)ypos.size()); 00271 real* yptr = Y.data(); 00272 it = ypos.begin(); 00273 while(it!=ypos.end()) 00274 *yptr++ = *it++; 00275 } 00276 00277 00278 00281 void DX_create_dataset_outputs_file(const string& filename, PP<PLearner> learner, VMat dataset) 00282 { 00283 ofstream out(filename.c_str()); 00284 00285 int l = dataset.length(); 00286 int inputsize = learner->inputsize(); 00287 int targetsize = learner->targetsize(); 00288 int outputsize = learner->outputsize(); 00289 00290 // First write data points (input -> target, output) 00291 Vec input(inputsize); 00292 Vec target(targetsize); 00293 real weight; 00294 Vec output(outputsize); 00295 00296 // write 2D positions 00297 out << "object \"dset_pos\" class array type float rank 1 shape " << inputsize << " items " << l << " data follows \n"; 00298 for(int i=0; i<l; i++) 00299 { 00300 dataset->getExample(i,input,target,weight); 00301 for(int j=0; j<inputsize; j++) 00302 out << input[j] << " "; 00303 out << "\n"; 00304 } 00305 out << "\n\n\n"; 00306 00307 // Now write data for those positions (target and output) 00308 if(targetsize+outputsize>0) 00309 { 00310 ProgressBar pb("Computing outputs for dataset points",l); 00311 out << "object \"dset_value\" class array type float rank 1 shape " << targetsize+outputsize << " items " << l << " data follows \n"; 00312 for(int i=0; i<l; i++) 00313 { 00314 dataset->getExample(i,input,target,weight); 00315 for(int j=0; j<targetsize; j++) 00316 out << target[j] << " "; 00317 learner->computeOutput(input, output); 00318 for(int j=0; j<outputsize; j++) 00319 out << output[j] << " "; 00320 out << "\n"; 00321 pb.update(i); 00322 } 00323 out << "attribute \"dep\" string \"positions\" \n\n\n"; 00324 } 00325 00326 // Field is created with two components: "positions" and "data" 00327 out << "object \"dset\" class field \n" 00328 << "component \"positions\" \"dset_pos\" \n"; 00329 if(targetsize+outputsize>0) 00330 out << "component \"data\" \"dset_value\" \n"; 00331 out << "\n\n\n"; 00332 00333 00334 00335 out << "end" << endl; 00336 } 00337 00338 00345 00346 void DX_create_grid_outputs_file(const string& filename, PP<PLearner> learner, VMat dataset, 00347 int nx, int ny, bool include_datapoint_grid=false, 00348 real xmin=MISSING_VALUE, real xmax=MISSING_VALUE, 00349 real ymin=MISSING_VALUE, real ymax=MISSING_VALUE, 00350 real extraspace=.10) 00351 { 00352 ofstream out(filename.c_str()); 00353 00354 double logsum = -FLT_MAX; 00355 00356 int l = dataset.length(); 00357 int inputsize = learner->inputsize(); 00358 int targetsize = learner->targetsize(); 00359 int outputsize = learner->outputsize(); 00360 00361 Vec input(inputsize); 00362 Vec target(targetsize); 00363 real weight; 00364 Vec output(outputsize); 00365 00366 // Create the grid field 00367 00368 set<real> xpos; 00369 set<real> ypos; 00370 00371 // First the regular grid coordinates 00372 Vec minv(2); 00373 Vec maxv(2); 00374 computeRange(dataset.subMatColumns(0,2), minv, maxv); 00375 real extrax = (maxv[0]-minv[0])*extraspace; 00376 real extray = (maxv[1]-minv[1])*extraspace; 00377 if(is_missing(xmin)) 00378 xmin = minv[0]-extrax; 00379 if(is_missing(xmax)) 00380 xmax = maxv[0]+extrax; 00381 if(is_missing(ymin)) 00382 ymin = minv[1]-extray; 00383 if(is_missing(ymax)) 00384 ymax = maxv[1]+extray; 00385 real deltax = (xmax-xmin)/nx; 00386 real deltay = (ymax-ymin)/ny; 00387 00388 real x = xmin; 00389 for(int i=0; i<nx; i++, x+=deltax) 00390 xpos.insert(x); 00391 real y = ymin; 00392 for(int j=0; j<ny; j++, y+=deltay) 00393 ypos.insert(y); 00394 00395 // also include irregular grid coordinates based on coordinates of dataset points? 00396 if(include_datapoint_grid) 00397 { 00398 for(int i=0; i<l; i++) 00399 { 00400 dataset->getExample(i,input,target,weight); 00401 x = input[0]; 00402 y = input[1]; 00403 if(x>xmin && x<xmax) 00404 xpos.insert(x); 00405 if(y>ymin && y<ymax) 00406 ypos.insert(y); 00407 } 00408 } 00409 00410 nx = (int)xpos.size(); 00411 ny = (int)ypos.size(); 00412 set<real>::iterator itx; 00413 set<real>::iterator ity; 00414 00415 out << "object \"outputs_gridpos\" class array type float rank 1 shape 2 items " << nx*ny << " data follows\n"; 00416 for(itx=xpos.begin(); itx!=xpos.end(); ++itx) 00417 for(ity=ypos.begin(); ity!=ypos.end(); ++ity) 00418 out << *itx << " " << *ity << "\n"; 00419 out << "\n\n"; 00420 00421 out << "object \"outputs_gridcon\" class gridconnections counts " << nx << " " << ny << "\n" 00422 // << "attribute \"element type\" string \"cubes\" \n" 00423 << "attribute \"ref\" string \"positions\" \n\n\n"; 00424 00425 out << "object \"outputs_values\" class array type float rank 1 shape " << outputsize << " items " << nx*ny << " data follows \n"; 00426 00427 ProgressBar pb("Computing outputs for grid positions: " + tostring(nx)+"x"+tostring(ny), nx*ny); 00428 int n = 0; 00429 for(itx=xpos.begin(); itx!=xpos.end(); ++itx) 00430 { 00431 input[0] = *itx; 00432 for(ity=ypos.begin(); ity!=ypos.end(); ++ity) 00433 { 00434 input[1] = *ity; 00435 learner->computeOutput(input, output); 00436 for(int j=0; j<outputsize; j++) 00437 out << output[j] << " "; 00438 out << "\n"; 00439 if(logsum==-FLT_MAX) 00440 logsum = output[0]; 00441 else 00442 logsum = logadd(logsum, output[0]); 00443 pb.update(n++); 00444 } 00445 } 00446 pb.close(); 00447 out << "attribute \"dep\" string \"positions\" \n\n\n"; 00448 00449 out << "object \"outputs\" class field \n" 00450 << "component \"positions\" \"outputs_gridpos\" \n" 00451 << "component \"connections\" \"outputs_gridcon\" \n" 00452 << "component \"data\" \"outputs_values\" \n\n\n"; 00453 00454 out << "end" << endl; 00455 00456 double surfelem = deltax*deltay; 00457 double surfintegral = exp(logsum)*surfelem; 00458 cerr << "Estimated integral over sampled domain: " << surfintegral << endl; 00459 } 00460 00461 void Grapher::computeAutoGridrange() 00462 { 00463 int d = trainset->inputsize(); 00464 gridrange.resize(d); 00465 for(int j=0; j<d; j++) 00466 gridrange[j] = pair<real,real>(FLT_MAX,-FLT_MAX); 00467 Vec input; 00468 Vec target; 00469 real weight; 00470 int l = trainset.length(); 00471 for(int i=0; i<l; i++) 00472 { 00473 trainset->getExample(i, input, target, weight); 00474 for(int j=0; j<d; j++) 00475 { 00476 real x_j = input[j]; 00477 if(x_j<gridrange[j].first) 00478 gridrange[j].first = x_j; 00479 if(x_j>gridrange[j].second) 00480 gridrange[j].second = x_j; 00481 } 00482 } 00483 00484 // Now add extra 10% 00485 real extra = .10; 00486 for(int j=0; j<d; j++) 00487 { 00488 real extent = extra*(gridrange[j].second-gridrange[j].first); 00489 gridrange[j].first -= extent; 00490 gridrange[j].second += extent; 00491 } 00492 } 00493 00494 00495 Grapher::Grapher() 00496 :basename("dxplot"), task(""), class1_threshold(0.5), 00497 radius(-0.01), bw(false) 00498 { 00499 } 00500 00501 PLEARN_IMPLEMENT_OBJECT(Grapher, "ONE LINE DESCR", "NO HELP"); 00502 00503 void Grapher::declareOptions(OptionList& ol) 00504 { 00505 // ### Declare all of this object's options here 00506 // ### For the "flags" of each option, you should typically specify 00507 // ### one of OptionBase::buildoption, OptionBase::learntoption or 00508 // ### OptionBase::tuningoption. Another possible flag to be combined with 00509 // ### is OptionBase::nosave 00510 00511 declareOption(ol, "basename", &Grapher::basename, OptionBase::buildoption, 00512 "Base name of the .dx data file to generate. Running this class will generate\n" 00513 "files basename_dset.dx containing targets and outputs for the given dataset positions\n" 00514 "and basename_outputs.dx containing outputs computed at grid positions\n"); 00515 declareOption(ol, "task", &Grapher::task, OptionBase::buildoption, 00516 "Desired plotting task. Can be \"1D regression\",\n" 00517 "\"2D clustering\", \"2D density\", \"2D classification\",\n" 00518 "\"2D regression\""); 00519 declareOption(ol, "class1_threshold", &Grapher::class1_threshold, OptionBase::buildoption, 00520 "In the case of 1 output 2D classification, the output threshold to\n" 00521 "have class=1 (below the threshold, the class = 0). The default\n" 00522 "is 0.5, which appropriate for sigmoidal-output learners, but use 0\n" 00523 "if the learner outputs -1/1, such as for SVM's"); 00524 declareOption(ol, "learner", &Grapher::learner, OptionBase::buildoption, 00525 "The learner to train/test"); 00526 declareOption(ol, "trainset", &Grapher::trainset, OptionBase::buildoption, 00527 "The training set to train the learner on\n"); 00528 declareOption(ol, "gridrange", &Grapher::gridrange, OptionBase::buildoption, 00529 "A vector of low:high pairs with as many dimensions as the input space\n" 00530 "ex for 2D: [ -10:10 -3:4 ] \n" 00531 "If empty, it will be automatically inferred from the range of the\n" 00532 "trainset inputs (with an extra 10%)"); 00533 declareOption(ol, "griddim", &Grapher::griddim, OptionBase::buildoption, 00534 "A vector of integers giving the number of sample coordinates\n" 00535 "for each dimension of the grid. Ex for 2D: [ 100 100 ]\n"); 00536 declareOption(ol, "radius", &Grapher::radius, OptionBase::buildoption, 00537 "The radius of the discs around data points.\n" 00538 "(If negative, it's considered to be expressed as a percentage of the x range)\n"); 00539 declareOption(ol, "bw", &Grapher::bw, OptionBase::buildoption, 00540 "Set this to true if you want to generate black and white eps"); 00541 declareOption(ol, "save_learner_as", &Grapher::save_learner_as, OptionBase::buildoption, 00542 "(Optionally) save trained learner in this file (.psave)"); 00543 00544 // Now call the parent class' declareOptions 00545 inherited::declareOptions(ol); 00546 } 00547 00548 void Grapher::build_() 00549 { 00550 // ### This method should do the real building of the object, 00551 // ### according to set 'options', in *any* situation. 00552 // ### Typical situations include: 00553 // ### - Initial building of an object from a few user-specified options 00554 // ### - Building of a "reloaded" object: i.e. from the complete set of all serialised options. 00555 // ### - Updating or "re-building" of an object after a few "tuning" options have been modified. 00556 // ### You should assume that the parent class' build_() has already been called. 00557 } 00558 00559 real color(int colornum, real lightness) 00560 { 00561 real col = 0; 00562 switch(colornum) 00563 { 00564 case 0: 00565 col = rgb2real(lightness,1,1); 00566 break; 00567 case 1: 00568 col = rgb2real(1,lightness,1); 00569 break; 00570 case 2: 00571 col = rgb2real(1,1,lightness); 00572 break; 00573 case 3: 00574 col = rgb2real(1,lightness,lightness); 00575 break; 00576 case 4: 00577 col = rgb2real(lightness,1,lightness); 00578 case 5: 00579 col = rgb2real(lightness,lightness,1); 00580 default: 00581 PLERROR("No color setting for colornum %d", colornum); 00582 } 00583 return col; 00584 } 00585 00586 void Grapher::plot_1D_regression(string basename, VMat trainset, 00587 TVec<int> griddim, TVec< pair<real,real> > gridrange, 00588 VMat gridoutputs, VMat trainoutputs, bool bw) 00589 { 00590 if(griddim.size()!=1) 00591 PLERROR("In Grapher::plot_1D_regression, not a 1D grid!"); 00592 00593 int nx = griddim[0]; 00594 real x = gridrange[0].first; 00595 real w = gridrange[0].second - x; 00596 real dx = w/(nx-1); 00597 int l = trainset.length(); 00598 Mat curve(nx+l,2); 00599 Mat points(l,2); 00600 for(int i=0; i<nx; i++) 00601 { 00602 curve(i,0) = x; 00603 curve(i,1) = gridoutputs(i,0); 00604 x += dx; 00605 } 00606 00607 Vec input(1); 00608 Vec target(1); 00609 Vec output(1); 00610 real weight; 00611 for(int i=0; i<l; i++) 00612 { 00613 trainset->getExample(i, input, target, weight); 00614 trainoutputs->getRow(i, output); 00615 points(i,0) = input[0]; 00616 points(i,1) = target[0]; 00617 curve(nx+i,0) = input[0]; 00618 curve(nx+i,1) = output[0]; 00619 } 00620 00621 sortRows(curve); 00622 00623 saveAscii(basename+"_points.amat", points); 00624 saveAscii(basename+"_curve.amat", curve); 00625 Gnuplot gp; 00626 gp << "plot '" << basename+"_points.amat" << "' with points, '" 00627 << basename + "_curve.amat" << "' with lines" << endl; 00628 00629 pgetline(cin); 00630 } 00631 00632 void Grapher::plot_2D_classification(string epsfname, VMat trainset, 00633 TVec<int> griddim, TVec< pair<real,real> > gridrange, 00634 VMat gridoutputs, real radius, bool bw) 00635 { 00636 cerr << "Plotting 2D classification result" << endl; 00637 if(griddim.size()!=2 || gridrange.size()!=2) 00638 PLERROR("In Grapher::plot_2D_classification griddim and gridrange must be of size 2"); 00639 int nx = griddim[0]; 00640 int ny = griddim[1]; 00641 int nclasses = gridoutputs.width(); 00642 real x = gridrange[0].first; 00643 real y = gridrange[1].first; 00644 real w = gridrange[0].second - x; 00645 real h = gridrange[1].second - y; 00646 if(nclasses<2) 00647 PLERROR("In Grapher::plot_2D_classification number of classes (width of gridoutputs) must be at least 2: it is currently %d",nclasses); 00648 00649 real gswidth = 600; 00650 real gsheight = gswidth/w*h; 00651 GhostScript gs(epsfname, 0, 0, gswidth, gsheight); 00652 gs.mapping(x, y, w, h, 0, 0, gswidth, gsheight); 00653 00654 00655 Mat image(ny,nx); 00656 Vec output(nclasses); 00657 for(int i=0; i<ny; i++) 00658 for(int j=0; j<nx; j++) 00659 { 00660 gridoutputs->getRow((ny-i-1)+ny*j,output); 00661 int winner = argmax(output); 00662 // cout << i << " " << j << " " << output << endl; 00663 // real winnerval = output[winner]; 00664 if(bw) // grayscale 00665 image(i,j) = (winner==0 ?0.7 :0.3); 00666 else // color 00667 image(i,j) = color(winner,0.7); 00668 } 00669 00670 // cout << "IMAGE:" << endl << image << endl; 00671 00672 if(bw) 00673 gs.displayGray(image, x, y, w, h); 00674 else 00675 gs.displayRGB(image, x, y, w, h); 00676 00677 int l = trainset.length(); 00678 Vec input; 00679 Vec target; 00680 real weight; 00681 gs.setgray(0); // black 00682 for(int i=0; i<l; i++) 00683 { 00684 trainset->getExample(i,input,target,weight); 00685 if(target.length()==1) 00686 { 00687 if(bw) 00688 gs.setgray(target[0]==0 ?0 :1); 00689 else 00690 gs.setcolor(color(int(target[0]),0.2)); 00691 } 00692 else 00693 { 00694 if(bw) 00695 gs.setgray(target[0]==0 ?0 :1); 00696 else 00697 gs.setcolor(color(argmax(target),0.2)); 00698 } 00699 gs.fillCircle(input[0],input[1],radius); 00700 } 00701 00702 } 00703 00704 /* 00705 void Grapher::plot_2D_density(Mat gridoutputs) const 00706 { 00707 00708 } 00709 00710 void Grapher::plot_1D_regression(Mat gridoutputs, Mat trainoutputs) const 00711 { 00712 00713 } 00714 */ 00715 00717 void Grapher::run() 00718 { 00719 int l = trainset->length(); 00720 PP<VecStatsCollector> statscol = new VecStatsCollector(); 00721 learner->setTrainStatsCollector(statscol); 00722 learner->setTrainingSet(trainset); 00723 00724 cerr << "*** Training learner on trainset of length " << l << " ... ***" << endl; 00725 learner->train(); 00726 cerr << "Final traincosts: " << statscol->getMean() << endl; 00727 00728 if(save_learner_as!="") 00729 { 00730 cerr << "Saving trained learner in file " << save_learner_as << endl; 00731 PLearn::save(save_learner_as, *learner); 00732 } 00733 00734 00735 cerr << "*** Computing outputs on trainset inputs... ***" << endl; 00736 Mat trainoutputs(l, learner->outputsize()); 00737 learner->use(trainset, trainoutputs); 00738 00739 // Try to determine the task if not specified 00740 if (task == "") { 00741 if(trainset->inputsize()==1 && 00742 trainset->targetsize()==1 && learner->outputsize()==1) 00743 task = "1D regression"; 00744 else if(trainset->inputsize()==2) 00745 { 00746 switch(trainset->targetsize()) 00747 { 00748 case 0: // density estimation or clustering 00749 if(learner->outputsize()>1) 00750 task = "2D clustering"; 00751 else 00752 task = "2D density"; 00753 break; 00754 case 1: // classif or regression 00755 if(learner->outputsize()>1) 00756 task = "2D classification"; 00757 else 00758 task = "2D regression"; 00759 break; 00760 default: 00761 PLERROR("Tasks with targetsize > 1 (multi-regression) not supported"); 00762 } 00763 } 00764 else 00765 PLERROR("Task wih inputsize=%d, targetsize=%d, outputsize=%d not supported", 00766 trainset->inputsize(), trainset->targetsize(), learner->outputsize()); 00767 } 00768 00769 // Now compute outputs on grid 00770 if(gridrange.isEmpty()) 00771 computeAutoGridrange(); 00772 cerr << "*** Computing outputs on " << griddim << " grid... ***" << endl; 00773 VMat gridinputs = new RegularGridVMatrix(griddim, gridrange); 00774 Mat gridoutputs(gridinputs->length(),learner->outputsize()); 00775 learner->use(gridinputs, gridoutputs); 00776 00777 if (task == "2D classification" && learner->outputsize() == 1) { 00778 // Transform a one-class output into a two-class one... 00779 Mat newgridoutputs(gridinputs->length(), 2); 00780 for (int i=0; i<gridinputs->length(); ++i) { 00781 newgridoutputs(i,0) = gridoutputs(i,0) <= class1_threshold; 00782 newgridoutputs(i,1) = gridoutputs(i,0) > class1_threshold; 00783 } 00784 gridoutputs = newgridoutputs; 00785 } 00786 00787 cerr << ">>> TASK PERFORMED: " << task << endl; 00788 00789 string epsfname = basename+".eps"; 00790 cerr << "Creating file: " << epsfname << endl; 00791 00792 if(radius<0) 00793 radius = fabs(radius * (gridrange[0].second-gridrange[0].first)); 00794 00795 if(task=="2D classification" || task=="2D clustering") 00796 plot_2D_classification(epsfname, trainset, griddim, gridrange, gridoutputs, radius, bw); 00797 else if(task=="1D regression") 00798 plot_1D_regression(basename, trainset, griddim, gridrange, gridoutputs, trainoutputs, bw); 00799 // else if(task=="2D regression") 00800 // plot_2D-regression(basename, 00801 /* 00802 else if(task=="2D density") 00803 plot_2D_density(gridoutputs); 00804 */ 00805 00806 // Old DX stuff 00807 /* 00808 string dset_fname = basename+"_dset.dx"; 00809 cerr << "Computing and writing trainset output field to file " << dset_fname << endl; 00810 DX_create_dataset_outputs_file(dset_fname, learner, trainset); 00811 00812 string outputs_fname = basename+"_outputs.dx"; 00813 cerr << "Computing and writing grid output field to file " << outputs_fname << endl; 00814 DX_create_grid_outputs_file(outputs_fname, learner, trainset, nx, ny, 00815 include_datapoint_grid, 00816 xmin, xmax, ymin, ymax); 00817 cerr << "You can now view those files with OpenDX." << endl; 00818 */ 00819 } 00820 00821 00822 // ### Nothing to add here, simply calls build_ 00823 void Grapher::build() 00824 { 00825 inherited::build(); 00826 build_(); 00827 } 00828 00829 00830 void Grapher::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00831 { 00832 inherited::makeDeepCopyFromShallowCopy(copies); 00833 } 00834 00835 } // end of namespace PLearn

Generated on Tue Aug 17 15:54:51 2004 for PLearn by doxygen 1.3.7