00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00041
#include "GenerateDecisionPlot.h"
00042
#include <plearn/math/VecStatsCollector.h>
00043
#include <plearn/vmat/VMat_maths.h>
00044
00045
namespace PLearn {
00046
using namespace std;
00047
00048
00053 void DX_write_2D_fields(ostream& out,
const string& basename,
TVec<Mat> fields,
real x0,
real y0,
real deltax,
real deltay,
00054
TVec<string> fieldnames=
TVec<string>())
00055 {
00056
int nfields = fields.
length();
00057
int nx = fields[0].
length();
00058
int ny = fields[0].width();
00059
00060
string posname =
string(
"\"") + basename +
"_gridpos\"";
00061
00062 out <<
"object " << posname <<
" class gridpositions counts " << nx <<
" " << ny <<
"\n"
00063 <<
"origin " << x0 <<
" " << y0 <<
"\n"
00064 <<
"delta " << deltax <<
" 0 \n"
00065 <<
"delta 0 " << deltay <<
" \n\n\n";
00066
00067 string conname = string(
"\"") + basename +
"_gridcon\"";
00068
00069 out <<
"object " << conname <<
" class gridconnections counts " << nx <<
" " << ny <<
"\n"
00070
00071 <<
"attribute \"ref\" string \"positions\" \n\n\n";
00072
00073
for(
int k=0;
k<nfields;
k++)
00074 {
00075
Mat& m = fields[
k];
00076 string fieldname =
tostring(
k);
00077
if(fieldnames)
00078 fieldname = fieldnames[
k];
00079
00080 string dataname = string(
"\"") + basename +
"_" + fieldname +
"_data\"";
00081
00082 out <<
"object " << dataname <<
" class array type float rank 0 items " << nx*ny <<
" data follows \n";
00083
for(
int i=0; i<nx; i++)
00084 {
00085
for(
int j=0; j<ny; j++)
00086 out << m(i,j) <<
" ";
00087 out <<
"\n";
00088 }
00089 out <<
"attribute \"dep\" string \"positions\" \n\n\n";
00090
00091 out <<
"object \"" << fieldname <<
"\" class field \n"
00092 <<
"component \"positions\" " << posname <<
" \n"
00093 <<
"component \"connections\" " << conname <<
" \n"
00094 <<
"component \"data\" " << dataname <<
" \n\n\n";
00095 }
00096 }
00097
00098
00099 void DX_write_2D_fields(ostream& out,
const string& basename,
Vec X,
Vec Y,
TVec<Mat> fields)
00100 {
00101
int nfields = fields.
length();
00102
int nx = fields[0].
length();
00103
int ny = fields[0].width();
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
string posname =
string(
"\"") + basename +
"_gridpos\"";
00117 out <<
"object " << posname <<
" class array type float rank 1 shape 2 items " << nx*ny <<
" data follows\n";
00118
for(
int i=0; i<nx; i++)
00119
for(
int j=0; j<ny; j++)
00120 out << X[i] <<
" " << Y[j] <<
"\n";
00121 out <<
"\n\n";
00122
00123 string conname = string(
"\"") + basename +
"_gridcon\"";
00124 out <<
"object " << conname <<
" class gridconnections counts " << nx <<
" " << ny <<
"\n"
00125
00126 <<
"attribute \"ref\" string \"positions\" \n\n\n";
00127
00128
for(
int k=0;
k<nfields;
k++)
00129 {
00130
Mat& m = fields[
k];
00131 string fieldname =
"output" +
tostring(
k);
00132 string dataname = string(
"\"") + basename +
"_" + fieldname +
"_data\"";
00133
00134 out <<
"object " << dataname <<
" class array type float rank 0 items " << nx*ny <<
" data follows \n";
00135
for(
int i=0; i<nx; i++)
00136 {
00137
for(
int j=0; j<ny; j++)
00138 out << m(i,j) <<
" ";
00139 out <<
"\n";
00140 }
00141 out <<
"attribute \"dep\" string \"positions\" \n\n\n";
00142
00143 out <<
"object \"" << fieldname <<
"\" class field \n"
00144 <<
"component \"positions\" " << posname <<
" \n"
00145 <<
"component \"connections\" " << conname <<
" \n"
00146 <<
"component \"data\" " << dataname <<
" \n\n\n";
00147 }
00148 }
00149
00150
00151 TVec<Mat> computeOutputFields(
PP<PLearner> learner,
Vec X,
Vec Y)
00152 {
00153
int noutputs = learner->outputsize();
00154
00155
int nx = X.
length();
00156
int ny = Y.
length();
00157
int nfields = noutputs;
00158
TVec<Mat> fields(nfields);
00159
00160
for(
int k=0;
k<nfields;
k++)
00161 fields[
k].
resize(nx,ny);
00162
00163
Vec input(2);
00164
Vec output(noutputs);
00165
00166
ProgressBar pb(
"Computing " +
tostring(nx) +
" x " +
tostring(ny) +
" output field",nx*ny);
00167
00168
for(
int i=0; i<nx; i++)
00169
for(
int j=0; j<ny; j++)
00170 {
00171 input[0] = X[i];
00172 input[1] = Y[j];
00173 learner->computeOutput(input,output);
00174
00175
for(
int k=0;
k<noutputs;
k++)
00176 fields[
k](i,j) = output[
k];
00177 pb.
update(i*nx+j);
00178 }
00179
00180
return fields;
00181 }
00182
00183
00184 TVec<Mat> computeOutputFields(
PP<PLearner> learner,
int nx,
int ny,
real x0,
real y0,
real deltax,
real deltay)
00185 {
00186
int noutputs = learner->outputsize();
00187
int nfields = noutputs;
00188
00189
TVec<Mat> fields(nfields);
00190
for(
int k=0;
k<nfields;
k++)
00191 fields[
k].
resize(nx,ny);
00192
00193
Vec input(2);
00194
Vec output(noutputs);
00195
00196
ProgressBar pb(
"Computing " +
tostring(nx) +
" x " +
tostring(ny) +
" output field",nx*ny);
00197
00198
real x = x0;
00199
real y = y0;
00200
for(
int i=0; i<nx; i++,
x+=deltax)
00201
for(
int j=0; j<ny; j++, y+=deltay)
00202 {
00203 input[0] =
x;
00204 input[1] = y;
00205 learner->computeOutput(input,output);
00206
00207
for(
int k=0;
k<noutputs;
k++)
00208 fields[
k](i,j) = output[
k];
00209 pb.
update(i*nx+j);
00210 }
00211
00212
return fields;
00213 }
00214
00215
00216
00217 TVec<Mat> computeOutputFieldsAutoRange(
PP<PLearner> learner,
VMat dataset,
int nx,
int ny,
00218
real& x0,
real& y0,
real& deltax,
real& deltay,
real extraspace=.10)
00219 {
00220
Vec minv(2);
00221
Vec maxv(2);
00222
computeRange(dataset.
subMatColumns(0,2), minv, maxv);
00223
real extrax = (maxv[0]-minv[0])*extraspace;
00224 x0 = minv[0]-extrax;
00225 deltax = (maxv[0]+extrax-x0)/nx;
00226
real extray = (maxv[1]-minv[1])*extraspace;
00227 y0 = minv[1]-extray;
00228 deltay = (maxv[1]+extray-y0)/ny;
00229
return computeOutputFields(learner, nx, ny, x0, y0, deltax, deltay);
00230 }
00231
00232
00233 void computeXYPositions(
VMat dataset,
int nx,
int ny,
Vec& X,
Vec& Y,
real extraspace=.10)
00234 {
00235
Vec minv(2);
00236
Vec maxv(2);
00237
computeRange(dataset.
subMatColumns(0,2), minv, maxv);
00238
real extrax = (maxv[0]-minv[0])*extraspace;
00239
real x0 = minv[0]-extrax;
00240
real deltax = (maxv[0]+extrax-x0)/nx;
00241
real extray = (maxv[1]-minv[1])*extraspace;
00242
real y0 = minv[1]-extray;
00243
real deltay = (maxv[1]+extray-y0)/ny;
00244
00245
set<real> xpos;
00246
set<real> ypos;
00247
int l = dataset.
length();
00248
Vec datapoint(2);
00249
for(
int i=0; i<l; i++)
00250 {
00251 dataset->getRow(i,datapoint);
00252 xpos.insert(datapoint[0]);
00253 ypos.insert(datapoint[1]);
00254 }
00255
real x = x0;
00256
for(
int i=0; i<nx; i++,
x+=deltax)
00257 xpos.insert(
x);
00258
real y = y0;
00259
for(
int j=0; j<ny; j++, y+=deltay)
00260 ypos.insert(y);
00261
set<real>::iterator it;
00262 X.
resize(xpos.size());
00263
real* xptr = X.
data();
00264 it = xpos.begin();
00265
while(it!=xpos.end())
00266 *xptr++ = *it++;
00267 Y.
resize(ypos.size());
00268
real* yptr = Y.
data();
00269 it = ypos.begin();
00270
while(it!=ypos.end())
00271 *yptr++ = *it++;
00272 }
00273
00274
00275
00278 void DX_create_dataset_outputs_file(
const string& filename,
PP<PLearner> learner,
VMat dataset)
00279 {
00280 ofstream out(filename.c_str());
00281
00282
int l = dataset.
length();
00283
int inputsize = learner->inputsize();
00284
int targetsize = learner->targetsize();
00285
int outputsize = learner->outputsize();
00286
00287
00288
Vec input(inputsize);
00289
Vec target(targetsize);
00290
real weight;
00291
Vec output(outputsize);
00292
00293
00294 out <<
"object \"dset_pos\" class array type float rank 1 shape " << inputsize <<
" items " << l <<
" data follows \n";
00295
for(
int i=0; i<l; i++)
00296 {
00297 dataset->
getExample(i,input,target,weight);
00298
for(
int j=0; j<inputsize; j++)
00299 out << input[j] <<
" ";
00300 out <<
"\n";
00301 }
00302 out <<
"\n\n\n";
00303
00304
00305
if(targetsize+outputsize>0)
00306 {
00307
ProgressBar pb(
"Computing outputs for dataset points",l);
00308 out <<
"object \"dset_value\" class array type float rank 1 shape " << targetsize+outputsize <<
" items " << l <<
" data follows \n";
00309
for(
int i=0; i<l; i++)
00310 {
00311 dataset->
getExample(i,input,target,weight);
00312
for(
int j=0; j<targetsize; j++)
00313 out << target[j] <<
" ";
00314 learner->computeOutput(input, output);
00315
for(
int j=0; j<outputsize; j++)
00316 out << output[j] <<
" ";
00317 out <<
"\n";
00318 pb.
update(i);
00319 }
00320 out <<
"attribute \"dep\" string \"positions\" \n\n\n";
00321 }
00322
00323
00324 out <<
"object \"dset\" class field \n"
00325 <<
"component \"positions\" \"dset_pos\" \n";
00326
if(targetsize+outputsize>0)
00327 out <<
"component \"data\" \"dset_value\" \n";
00328 out <<
"\n\n\n";
00329
00330
00331
00332 out <<
"end" <<
endl;
00333 }
00334
00335
00342
00343 void DX_create_grid_outputs_file(
const string& filename,
PP<PLearner> learner,
VMat dataset,
00344
int nx,
int ny,
bool include_datapoint_grid=
false,
00345
real xmin=MISSING_VALUE,
real xmax=MISSING_VALUE,
00346
real ymin=MISSING_VALUE,
real ymax=MISSING_VALUE,
00347
real extraspace=.10)
00348 {
00349 ofstream out(filename.c_str());
00350
00351
double logsum = -FLT_MAX;
00352
00353
int l = dataset.
length();
00354
int inputsize = learner->inputsize();
00355
int targetsize = learner->targetsize();
00356
int outputsize = learner->outputsize();
00357
00358
Vec input(inputsize);
00359
Vec target(targetsize);
00360
real weight;
00361
Vec output(outputsize);
00362
00363
00364
00365
set<real> xpos;
00366
set<real> ypos;
00367
00368
00369
Vec minv(2);
00370
Vec maxv(2);
00371
computeRange(dataset.
subMatColumns(0,2), minv, maxv);
00372
real extrax = (maxv[0]-minv[0])*extraspace;
00373
real extray = (maxv[1]-minv[1])*extraspace;
00374
if(
is_missing(xmin))
00375 xmin = minv[0]-extrax;
00376
if(
is_missing(xmax))
00377 xmax = maxv[0]+extrax;
00378
if(
is_missing(ymin))
00379 ymin = minv[1]-extray;
00380
if(
is_missing(ymax))
00381 ymax = maxv[1]+extray;
00382
real deltax = (xmax-xmin)/nx;
00383
real deltay = (ymax-ymin)/ny;
00384
00385
real x = xmin;
00386
for(
int i=0; i<nx; i++,
x+=deltax)
00387 xpos.insert(
x);
00388
real y = ymin;
00389
for(
int j=0; j<ny; j++, y+=deltay)
00390 ypos.insert(y);
00391
00392
00393
if(include_datapoint_grid)
00394 {
00395
for(
int i=0; i<l; i++)
00396 {
00397 dataset->
getExample(i,input,target,weight);
00398
x = input[0];
00399 y = input[1];
00400
if(
x>xmin &&
x<xmax)
00401 xpos.insert(
x);
00402
if(y>ymin && y<ymax)
00403 ypos.insert(y);
00404 }
00405 }
00406
00407 nx = xpos.size();
00408 ny = ypos.size();
00409
set<real>::iterator itx;
00410
set<real>::iterator ity;
00411
00412 out <<
"object \"outputs_gridpos\" class array type float rank 1 shape 2 items " << nx*ny <<
" data follows\n";
00413
for(itx=xpos.begin(); itx!=xpos.end(); ++itx)
00414
for(ity=ypos.begin(); ity!=ypos.end(); ++ity)
00415 out << *itx <<
" " << *ity <<
"\n";
00416 out <<
"\n\n";
00417
00418 out <<
"object \"outputs_gridcon\" class gridconnections counts " << nx <<
" " << ny <<
"\n"
00419
00420 <<
"attribute \"ref\" string \"positions\" \n\n\n";
00421
00422 out <<
"object \"outputs_values\" class array type float rank 1 shape " << outputsize <<
" items " << nx*ny <<
" data follows \n";
00423
00424
ProgressBar pb(
"Computing outputs for grid positions: " +
tostring(nx)+
"x"+
tostring(ny), nx*ny);
00425
int n = 0;
00426
for(itx=xpos.begin(); itx!=xpos.end(); ++itx)
00427 {
00428 input[0] = *itx;
00429
for(ity=ypos.begin(); ity!=ypos.end(); ++ity)
00430 {
00431 input[1] = *ity;
00432 learner->computeOutput(input, output);
00433
for(
int j=0; j<outputsize; j++)
00434 out << output[j] <<
" ";
00435 out <<
"\n";
00436
if(logsum==-FLT_MAX)
00437 logsum = output[0];
00438
else
00439 logsum =
logadd(logsum, output[0]);
00440 pb.
update(n++);
00441 }
00442 }
00443 pb.
close();
00444 out <<
"attribute \"dep\" string \"positions\" \n\n\n";
00445
00446 out <<
"object \"outputs\" class field \n"
00447 <<
"component \"positions\" \"outputs_gridpos\" \n"
00448 <<
"component \"connections\" \"outputs_gridcon\" \n"
00449 <<
"component \"data\" \"outputs_values\" \n\n\n";
00450
00451 out <<
"end" <<
endl;
00452
00453
double surfelem = deltax*deltay;
00454
double surfintegral =
exp(logsum)*surfelem;
00455 cerr <<
"Estimated integral over sampled domain: " << surfintegral <<
endl;
00456 }
00457
00458
00459 GenerateDecisionPlot::GenerateDecisionPlot()
00460 :basename("dxplot"),
00461 nx(10), ny(10),
00462 include_datapoint_grid(false),
00463 xmin(
MISSING_VALUE),
00464 xmax(
MISSING_VALUE),
00465 ymin(
MISSING_VALUE),
00466 ymax(
MISSING_VALUE)
00467 {
00468 }
00469
00470
PLEARN_IMPLEMENT_OBJECT(
GenerateDecisionPlot,
"ONE LINE DESCR",
"NO HELP");
00471
00472 void GenerateDecisionPlot::declareOptions(
OptionList& ol)
00473 {
00474
00475
00476
00477
00478
00479
00480
declareOption(ol,
"basename", &GenerateDecisionPlot::basename, OptionBase::buildoption,
00481
"Base name of the .dx data file to generate. Running this class will generate\n"
00482
"files basename_dset.dx containing targets and outputs for the given dataset positions\n"
00483
"and basename_outputs.dx containing outputs computed at grid positions\n");
00484
declareOption(ol,
"learner", &GenerateDecisionPlot::learner, OptionBase::buildoption,
00485
"The learner to train/test. Its train_set will be used as the dataset on\n"
00486
"which to base this decision plot (ranges are inferred from it, and decisions\n"
00487
"on the training points are written in basename_dset.dx");
00488
declareOption(ol,
"nx", &GenerateDecisionPlot::nx, OptionBase::buildoption,
00489
"Number of x sample coordinates (grid)");
00490
declareOption(ol,
"ny", &GenerateDecisionPlot::ny, OptionBase::buildoption,
00491
"Number of y sample coordinates (grid)");
00492
declareOption(ol,
"include_datapoint_grid", &GenerateDecisionPlot::include_datapoint_grid, OptionBase::buildoption,
00493
"");
00494
declareOption(ol,
"xmin", &GenerateDecisionPlot::xmin, OptionBase::buildoption,
00495
"");
00496
declareOption(ol,
"xmax", &GenerateDecisionPlot::xmax, OptionBase::buildoption,
00497
"");
00498
declareOption(ol,
"ymin", &GenerateDecisionPlot::ymin, OptionBase::buildoption,
00499
"");
00500
declareOption(ol,
"ymax", &GenerateDecisionPlot::ymax, OptionBase::buildoption,
00501
"");
00502
declareOption(ol,
"save_learner_as", &GenerateDecisionPlot::save_learner_as, OptionBase::buildoption,
00503
"(Optionally) save trained learner in this file (.psave)");
00504
00505
00506 inherited::declareOptions(ol);
00507 }
00508
00509 void GenerateDecisionPlot::build_()
00510 {
00511
00512
00513
00514
00515
00516
00517
00518 }
00519
00521 void GenerateDecisionPlot::run()
00522 {
00523
VMat dataset =
learner->getTrainingSet();
00524
learner->setTrainStatsCollector(
new VecStatsCollector());
00525
learner->train();
00526
00527
if(
save_learner_as!=
"")
00528 {
00529 cerr <<
"Saving trained learner in file " <<
save_learner_as <<
endl;
00530
PLearn::save(
save_learner_as, *
learner);
00531 }
00532
00533
string dset_fname = basename+
"_dset.dx";
00534 cerr <<
"Computing and writing dataset output field to file " << dset_fname <<
endl;
00535
DX_create_dataset_outputs_file(dset_fname,
learner, dataset);
00536
00537
string outputs_fname = basename+
"_outputs.dx";
00538 cerr <<
"Computing and writing grid output field to file " << outputs_fname <<
endl;
00539
DX_create_grid_outputs_file(outputs_fname,
learner, dataset,
nx,
ny,
00540
include_datapoint_grid,
00541
xmin,
xmax,
ymin,
ymax);
00542 cerr <<
"You can now view those files with OpenDX." <<
endl;
00543 }
00544
00545
00546
00547 void GenerateDecisionPlot::build()
00548 {
00549 inherited::build();
00550
build_();
00551 }
00552
00553
00554 void GenerateDecisionPlot::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00555 {
00556 inherited::makeDeepCopyFromShallowCopy(copies);
00557 }
00558
00559 }