00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00041 
#include "GenerateDecisionPlot.h"
00042 
#include <plearn/math/VecStatsCollector.h>
00043 
#include <plearn/vmat/VMat_maths.h>
00044 
00045 
namespace PLearn {
00046 
using namespace std;
00047 
00048 
00053 void DX_write_2D_fields(ostream& out, 
const string& basename, 
TVec<Mat> fields, 
real x0, 
real y0, 
real deltax, 
real deltay, 
00054                         
TVec<string> fieldnames=
TVec<string>())
00055 {
00056   
int nfields = fields.
length();
00057   
int nx = fields[0].
length();
00058   
int ny = fields[0].width();
00059 
00060   
string posname = 
string(
"\"") + basename + 
"_gridpos\"";
00061 
00062   out << 
"object " << posname << 
" class gridpositions counts " << nx << 
" " << ny << 
"\n"
00063       << 
"origin  " << x0 << 
" " << y0 << 
"\n"
00064       << 
"delta   " << deltax << 
" 0 \n"
00065       << 
"delta    0 " << deltay << 
" \n\n\n";
00066 
00067   string conname = string(
"\"") + basename + 
"_gridcon\"";
00068 
00069   out << 
"object " << conname << 
" class gridconnections counts " << nx << 
" " << ny << 
"\n"
00070     
00071       << 
"attribute \"ref\" string \"positions\" \n\n\n";
00072 
00073   
for(
int k=0; 
k<nfields; 
k++)
00074     {
00075       
Mat& m = fields[
k];
00076       string fieldname = 
tostring(
k);
00077       
if(fieldnames)
00078         fieldname = fieldnames[
k];
00079 
00080       string dataname = string(
"\"") + basename + 
"_" + fieldname + 
"_data\"";
00081 
00082       out << 
"object " << dataname << 
" class array type float rank 0 items " << nx*ny << 
" data follows \n";
00083       
for(
int i=0; i<nx; i++)
00084         {
00085           
for(
int j=0; j<ny; j++)
00086             out << m(i,j) << 
" ";
00087           out << 
"\n";
00088         }
00089       out << 
"attribute \"dep\" string \"positions\" \n\n\n";
00090 
00091       out << 
"object \"" << fieldname << 
"\" class field \n"
00092           << 
"component \"positions\" " << posname << 
" \n"
00093           << 
"component \"connections\" " << conname << 
" \n"
00094           << 
"component \"data\" " << dataname << 
" \n\n\n";
00095     }
00096 }
00097 
00098 
00099 void DX_write_2D_fields(ostream& out, 
const string& basename, 
Vec X, 
Vec Y, 
TVec<Mat> fields)
00100 {
00101   
int nfields = fields.
length();
00102   
int nx = fields[0].
length();
00103   
int ny = fields[0].width();
00104 
00105   
00106 
00107 
00108 
00109 
00110 
00111 
00112 
00113 
00114 
00115 
00116   
string posname = 
string(
"\"") + basename + 
"_gridpos\"";
00117   out << 
"object " << posname << 
" class array type float rank 1 shape 2 items " << nx*ny << 
" data follows\n";
00118   
for(
int i=0; i<nx; i++)
00119     
for(
int j=0; j<ny; j++)
00120       out << X[i] << 
" " << Y[j] << 
"\n";
00121   out << 
"\n\n";
00122 
00123   string conname = string(
"\"") + basename + 
"_gridcon\"";
00124   out << 
"object " << conname << 
" class gridconnections counts " << nx << 
" " << ny << 
"\n"
00125     
00126       << 
"attribute \"ref\" string \"positions\" \n\n\n";
00127 
00128   
for(
int k=0; 
k<nfields; 
k++)
00129     {
00130       
Mat& m = fields[
k];
00131       string fieldname = 
"output" + 
tostring(
k);
00132       string dataname = string(
"\"") + basename + 
"_" + fieldname + 
"_data\"";
00133 
00134       out << 
"object " << dataname << 
" class array type float rank 0 items " << nx*ny << 
" data follows \n";
00135       
for(
int i=0; i<nx; i++)
00136         {
00137           
for(
int j=0; j<ny; j++)
00138             out << m(i,j) << 
" ";
00139           out << 
"\n";
00140         }
00141       out << 
"attribute \"dep\" string \"positions\" \n\n\n";
00142 
00143       out << 
"object \"" << fieldname << 
"\" class field \n"
00144           << 
"component \"positions\" " << posname << 
" \n"
00145           << 
"component \"connections\" " << conname << 
" \n"
00146           << 
"component \"data\" " << dataname << 
" \n\n\n";
00147     }
00148 }
00149 
00150 
00151 TVec<Mat> computeOutputFields(
PP<PLearner> learner, 
Vec X, 
Vec Y)
00152 {
00153   
int noutputs = learner->outputsize();
00154 
00155   
int nx = X.
length();
00156   
int ny = Y.
length();
00157   
int nfields = noutputs;
00158   
TVec<Mat> fields(nfields);
00159 
00160   
for(
int k=0; 
k<nfields; 
k++)
00161     fields[
k].
resize(nx,ny);
00162 
00163   
Vec input(2);
00164   
Vec output(noutputs);
00165 
00166   
ProgressBar pb(
"Computing " + 
tostring(nx) + 
" x " + 
tostring(ny) + 
" output field",nx*ny);
00167   
00168   
for(
int i=0; i<nx; i++)
00169     
for(
int j=0; j<ny; j++)
00170       {
00171         input[0] = X[i];
00172         input[1] = Y[j];
00173         learner->computeOutput(input,output);
00174         
00175         
for(
int k=0; 
k<noutputs; 
k++)
00176           fields[
k](i,j) = output[
k];
00177         pb.
update(i*nx+j);
00178       }
00179 
00180   
return fields;
00181 }
00182 
00183 
00184 TVec<Mat> computeOutputFields(
PP<PLearner> learner, 
int nx, 
int ny, 
real x0, 
real y0, 
real deltax, 
real deltay)
00185 {
00186   
int noutputs = learner->outputsize();
00187   
int nfields = noutputs;
00188 
00189   
TVec<Mat> fields(nfields);
00190   
for(
int k=0; 
k<nfields; 
k++)
00191     fields[
k].
resize(nx,ny);
00192 
00193   
Vec input(2);
00194   
Vec output(noutputs);
00195 
00196   
ProgressBar pb(
"Computing " + 
tostring(nx) + 
" x " + 
tostring(ny) + 
" output field",nx*ny);
00197 
00198   
real x = x0;
00199   
real y = y0;
00200   
for(
int i=0; i<nx; i++, 
x+=deltax)
00201     
for(
int j=0; j<ny; j++, y+=deltay)
00202       {
00203         input[0] = 
x;
00204         input[1] = y;
00205         learner->computeOutput(input,output);
00206         
00207         
for(
int k=0; 
k<noutputs; 
k++)
00208           fields[
k](i,j) = output[
k];
00209         pb.
update(i*nx+j);
00210       }
00211 
00212   
return fields;
00213 }
00214 
00215 
00216 
00217 TVec<Mat> computeOutputFieldsAutoRange(
PP<PLearner> learner, 
VMat dataset, 
int nx, 
int ny, 
00218                                        
real& x0, 
real& y0, 
real& deltax, 
real& deltay, 
real extraspace=.10)
00219 {
00220   
Vec minv(2);
00221   
Vec maxv(2);
00222   
computeRange(dataset.
subMatColumns(0,2), minv, maxv);
00223   
real extrax = (maxv[0]-minv[0])*extraspace;
00224   x0 = minv[0]-extrax;
00225   deltax = (maxv[0]+extrax-x0)/nx;
00226   
real extray = (maxv[1]-minv[1])*extraspace;
00227   y0 = minv[1]-extray;
00228   deltay = (maxv[1]+extray-y0)/ny;
00229   
return computeOutputFields(learner, nx, ny, x0, y0, deltax, deltay);
00230 }
00231 
00232 
00233 void computeXYPositions(
VMat dataset, 
int nx, 
int ny, 
Vec& X, 
Vec& Y, 
real extraspace=.10)
00234 {
00235   
Vec minv(2);
00236   
Vec maxv(2);
00237   
computeRange(dataset.
subMatColumns(0,2), minv, maxv);
00238   
real extrax = (maxv[0]-minv[0])*extraspace;
00239   
real x0 = minv[0]-extrax;
00240   
real deltax = (maxv[0]+extrax-x0)/nx;
00241   
real extray = (maxv[1]-minv[1])*extraspace;
00242   
real y0 = minv[1]-extray;
00243   
real deltay = (maxv[1]+extray-y0)/ny;
00244 
00245   
set<real> xpos;
00246   
set<real> ypos;
00247   
int l = dataset.
length();
00248   
Vec datapoint(2);
00249   
for(
int i=0; i<l; i++)
00250     {
00251       dataset->getRow(i,datapoint);
00252       xpos.insert(datapoint[0]);
00253       ypos.insert(datapoint[1]);
00254     }
00255   
real x = x0;
00256   
for(
int i=0; i<nx; i++, 
x+=deltax)
00257     xpos.insert(
x);
00258   
real y = y0;
00259   
for(
int j=0; j<ny; j++, y+=deltay)
00260     ypos.insert(y);
00261   
set<real>::iterator it;
00262   X.
resize(xpos.size());
00263   
real* xptr = X.
data();
00264   it = xpos.begin();
00265   
while(it!=xpos.end())
00266     *xptr++ = *it++;
00267   Y.
resize(ypos.size());
00268   
real* yptr = Y.
data();
00269   it = ypos.begin();
00270   
while(it!=ypos.end())
00271     *yptr++ = *it++;
00272 }
00273 
00274 
00275 
00278 void DX_create_dataset_outputs_file(
const string& filename, 
PP<PLearner> learner, 
VMat dataset)
00279 {
00280   ofstream out(filename.c_str());
00281 
00282   
int l = dataset.
length();
00283   
int inputsize = learner->inputsize();
00284   
int targetsize = learner->targetsize();
00285   
int outputsize = learner->outputsize();
00286 
00287   
00288   
Vec input(inputsize);
00289   
Vec target(targetsize);
00290   
real weight;
00291   
Vec output(outputsize);
00292 
00293   
00294   out << 
"object \"dset_pos\" class array type float rank 1 shape " << inputsize << 
" items " << l << 
" data follows \n";
00295   
for(
int i=0; i<l; i++)
00296     {
00297       dataset->
getExample(i,input,target,weight);
00298       
for(
int j=0; j<inputsize; j++)
00299         out << input[j] << 
" ";
00300       out << 
"\n";
00301     }
00302   out << 
"\n\n\n";
00303 
00304   
00305   
if(targetsize+outputsize>0)
00306     {
00307       
ProgressBar pb(
"Computing outputs for dataset points",l);
00308       out << 
"object \"dset_value\" class array type float rank 1 shape " << targetsize+outputsize << 
" items " << l << 
" data follows \n";
00309       
for(
int i=0; i<l; i++)
00310         {
00311           dataset->
getExample(i,input,target,weight);
00312           
for(
int j=0; j<targetsize; j++)
00313             out << target[j] << 
" ";
00314           learner->computeOutput(input, output);
00315           
for(
int j=0; j<outputsize; j++)
00316             out << output[j] << 
" ";
00317           out << 
"\n";
00318           pb.
update(i);
00319         }
00320       out << 
"attribute \"dep\" string \"positions\" \n\n\n";
00321     }
00322 
00323   
00324   out << 
"object \"dset\" class field \n"
00325       << 
"component \"positions\" \"dset_pos\" \n";
00326   
if(targetsize+outputsize>0)
00327     out << 
"component \"data\" \"dset_value\" \n";
00328   out << 
"\n\n\n";
00329 
00330 
00331   
00332   out << 
"end" << 
endl;
00333 }
00334 
00335 
00342 
00343 void DX_create_grid_outputs_file(
const string& filename, 
PP<PLearner> learner, 
VMat dataset, 
00344                                   
int nx, 
int ny, 
bool include_datapoint_grid=
false, 
00345                                   
real xmin=MISSING_VALUE, 
real xmax=MISSING_VALUE, 
00346                                   
real ymin=MISSING_VALUE, 
real ymax=MISSING_VALUE,
00347                                   
real extraspace=.10)
00348 {
00349   ofstream out(filename.c_str());
00350 
00351   
double logsum = -FLT_MAX;
00352 
00353   
int l = dataset.
length();
00354   
int inputsize = learner->inputsize();
00355   
int targetsize = learner->targetsize();
00356   
int outputsize = learner->outputsize();
00357 
00358   
Vec input(inputsize);
00359   
Vec target(targetsize);
00360   
real weight;
00361   
Vec output(outputsize);
00362 
00363   
00364 
00365   
set<real> xpos;
00366   
set<real> ypos;
00367 
00368   
00369   
Vec minv(2);
00370   
Vec maxv(2);
00371   
computeRange(dataset.
subMatColumns(0,2), minv, maxv);
00372   
real extrax = (maxv[0]-minv[0])*extraspace;
00373   
real extray = (maxv[1]-minv[1])*extraspace;
00374   
if(
is_missing(xmin))
00375     xmin = minv[0]-extrax;
00376   
if(
is_missing(xmax))
00377     xmax = maxv[0]+extrax;
00378   
if(
is_missing(ymin))
00379     ymin = minv[1]-extray;
00380   
if(
is_missing(ymax))
00381     ymax = maxv[1]+extray;
00382   
real deltax = (xmax-xmin)/nx;
00383   
real deltay = (ymax-ymin)/ny;
00384 
00385   
real x = xmin;
00386   
for(
int i=0; i<nx; i++, 
x+=deltax)
00387     xpos.insert(
x);
00388   
real y = ymin;
00389   
for(
int j=0; j<ny; j++, y+=deltay)
00390     ypos.insert(y);
00391 
00392   
00393   
if(include_datapoint_grid) 
00394     {
00395       
for(
int i=0; i<l; i++)
00396         {
00397           dataset->
getExample(i,input,target,weight);
00398           
x = input[0];
00399           y = input[1];
00400           
if(
x>xmin && 
x<xmax)
00401             xpos.insert(
x);
00402           
if(y>ymin && y<ymax)
00403             ypos.insert(y);
00404         }
00405     }
00406 
00407   nx = xpos.size();
00408   ny = ypos.size();
00409   
set<real>::iterator itx;
00410   
set<real>::iterator ity;
00411 
00412   out << 
"object \"outputs_gridpos\" class array type float rank 1 shape 2 items " << nx*ny << 
" data follows\n";
00413   
for(itx=xpos.begin(); itx!=xpos.end(); ++itx)
00414     
for(ity=ypos.begin(); ity!=ypos.end(); ++ity)
00415       out << *itx << 
" " << *ity << 
"\n";
00416   out << 
"\n\n";
00417 
00418   out << 
"object \"outputs_gridcon\" class gridconnections counts " << nx << 
" " << ny << 
"\n"
00419     
00420       << 
"attribute \"ref\" string \"positions\" \n\n\n";
00421 
00422   out << 
"object \"outputs_values\" class array type float rank 1 shape " << outputsize << 
" items " << nx*ny << 
" data follows \n";
00423   
00424   
ProgressBar pb(
"Computing outputs for grid positions: " + 
tostring(nx)+
"x"+
tostring(ny), nx*ny);
00425   
int n = 0;
00426   
for(itx=xpos.begin(); itx!=xpos.end(); ++itx)
00427     {
00428       input[0] = *itx;
00429       
for(ity=ypos.begin(); ity!=ypos.end(); ++ity)
00430         {
00431           input[1] = *ity;
00432           learner->computeOutput(input, output);
00433           
for(
int j=0; j<outputsize; j++)
00434             out << output[j] << 
" ";
00435           out << 
"\n";
00436           
if(logsum==-FLT_MAX)
00437             logsum = output[0];
00438           
else 
00439             logsum = 
logadd(logsum, output[0]);
00440           pb.
update(n++);
00441         }
00442     }
00443   pb.
close();
00444   out << 
"attribute \"dep\" string \"positions\" \n\n\n";
00445 
00446   out << 
"object \"outputs\" class field \n"
00447       << 
"component \"positions\" \"outputs_gridpos\" \n"
00448       << 
"component \"connections\" \"outputs_gridcon\" \n"
00449       << 
"component \"data\" \"outputs_values\" \n\n\n";
00450   
00451   out << 
"end" << 
endl;
00452 
00453   
double surfelem = deltax*deltay;
00454   
double surfintegral = 
exp(logsum)*surfelem;
00455   cerr << 
"Estimated integral over sampled domain: " << surfintegral << 
endl;
00456 }
00457 
00458 
00459 GenerateDecisionPlot::GenerateDecisionPlot() 
00460   :basename("dxplot"),
00461    nx(10), ny(10),
00462    include_datapoint_grid(false),
00463    xmin(
MISSING_VALUE), 
00464    xmax(
MISSING_VALUE), 
00465    ymin(
MISSING_VALUE), 
00466    ymax(
MISSING_VALUE)
00467   {
00468   }
00469 
00470   
PLEARN_IMPLEMENT_OBJECT(
GenerateDecisionPlot, 
"ONE LINE DESCR", 
"NO HELP");
00471 
00472   void GenerateDecisionPlot::declareOptions(
OptionList& ol)
00473   {
00474     
00475     
00476     
00477     
00478     
00479 
00480     
declareOption(ol, 
"basename", &GenerateDecisionPlot::basename, OptionBase::buildoption,
00481                   
"Base name of the .dx data file to generate. Running this class will generate\n"
00482                   
"files basename_dset.dx containing targets and outputs for the given dataset positions\n"
00483                   
"and basename_outputs.dx containing outputs computed at grid positions\n");
00484     
declareOption(ol, 
"learner", &GenerateDecisionPlot::learner, OptionBase::buildoption,
00485                    
"The learner to train/test. Its train_set will be used as the dataset on\n"
00486                   
"which to base this decision plot (ranges are inferred from it, and decisions\n"
00487                   
"on the training points are written in basename_dset.dx");
00488     
declareOption(ol, 
"nx", &GenerateDecisionPlot::nx, OptionBase::buildoption,
00489                    
"Number of x sample coordinates (grid)");
00490     
declareOption(ol, 
"ny", &GenerateDecisionPlot::ny, OptionBase::buildoption,
00491                    
"Number of y sample coordinates (grid)");
00492     
declareOption(ol, 
"include_datapoint_grid", &GenerateDecisionPlot::include_datapoint_grid, OptionBase::buildoption,
00493                    
"");
00494     
declareOption(ol, 
"xmin", &GenerateDecisionPlot::xmin, OptionBase::buildoption,
00495                    
"");
00496     
declareOption(ol, 
"xmax", &GenerateDecisionPlot::xmax, OptionBase::buildoption,
00497                    
"");
00498     
declareOption(ol, 
"ymin", &GenerateDecisionPlot::ymin, OptionBase::buildoption,
00499                    
"");
00500     
declareOption(ol, 
"ymax", &GenerateDecisionPlot::ymax, OptionBase::buildoption,
00501                    
"");
00502     
declareOption(ol, 
"save_learner_as", &GenerateDecisionPlot::save_learner_as, OptionBase::buildoption,
00503                    
"(Optionally) save trained learner in this file (.psave)");
00504 
00505     
00506     inherited::declareOptions(ol);
00507   }
00508 
00509   void GenerateDecisionPlot::build_()
00510   {
00511     
00512     
00513     
00514     
00515     
00516     
00517     
00518   }
00519 
00521 void GenerateDecisionPlot::run()
00522 {
00523   
VMat dataset = 
learner->getTrainingSet();
00524   
learner->setTrainStatsCollector(
new VecStatsCollector());
00525   
learner->train();
00526 
00527   
if(
save_learner_as!=
"")
00528     {
00529       cerr << 
"Saving trained learner in file " << 
save_learner_as << 
endl;
00530       
PLearn::save(
save_learner_as, *
learner);
00531     }
00532 
00533   
string dset_fname = basename+
"_dset.dx";
00534   cerr << 
"Computing and writing dataset output field to file " << dset_fname << 
endl;
00535   
DX_create_dataset_outputs_file(dset_fname, 
learner, dataset);
00536 
00537   
string outputs_fname = basename+
"_outputs.dx";
00538   cerr << 
"Computing and writing grid output field to file " << outputs_fname << 
endl; 
00539   
DX_create_grid_outputs_file(outputs_fname, 
learner, dataset, 
nx, 
ny, 
00540                                
include_datapoint_grid, 
00541                                
xmin, 
xmax, 
ymin, 
ymax);
00542   cerr << 
"You can now view those files with OpenDX." << 
endl;
00543 }
00544 
00545 
00546   
00547   void GenerateDecisionPlot::build()
00548   {
00549     inherited::build();
00550     
build_();
00551   }
00552 
00553 
00554   void GenerateDecisionPlot::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00555   {
00556     inherited::makeDeepCopyFromShallowCopy(copies);
00557   }
00558 
00559 }