00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00040
#include "Experiment.h"
00041
00042
#include <plearn/math/VecStatsCollector.h>
00043
#include <plearn/vmat/AsciiVMatrix.h>
00044
#include <plearn/sys/PLMPI.h>
00045
00046
namespace PLearn {
00047
using namespace std;
00048
00049 Experiment::Experiment()
00050 :save_models(true), save_initial_models(false),
00051 save_test_outputs(false), save_test_costs(false)
00052 {}
00053
00054
PLEARN_IMPLEMENT_OBJECT(
Experiment,
"DEPRECATED: use PTester instead",
"");
00055
00056 void Experiment::declareOptions(
OptionList& ol)
00057 {
00058
declareOption(ol,
"expdir", &Experiment::expdir, OptionBase::buildoption,
00059
"Path of this experiment's directory in which to save all experiment results (will be created if it does not already exist)");
00060
declareOption(ol,
"learner", &Experiment::learner, OptionBase::buildoption,
00061
"The learner to train/test");
00062
declareOption(ol,
"dataset", &Experiment::dataset, OptionBase::buildoption,
00063
"The dataset to use for training/testing (will be split according to what is specified in the testmethod)\n"
00064
"You may omit this only if your splitter is an ExplicitSplitter");
00065
declareOption(ol,
"splitter", &Experiment::splitter, OptionBase::buildoption,
00066
"The splitter to use to generate one or several train/test pairs from the dataset.");
00067
declareOption(ol,
"save_models", &Experiment::save_models, OptionBase::buildoption,
00068
"If true, the final model#k will be saved in Split#k/final.psave");
00069
declareOption(ol,
"save_initial_models", &Experiment::save_initial_models, OptionBase::buildoption,
00070
"If true, the initial model#k (just after forget() has been called) will be saved in Split#k/initial.psave");
00071
declareOption(ol,
"save_test_outputs", &Experiment::save_test_outputs, OptionBase::buildoption,
00072
"If true, the outputs of the test for split #k will be saved in Split#k/test_outputs.pmat");
00073
declareOption(ol,
"save_test_costs", &Experiment::save_test_costs, OptionBase::buildoption,
00074
"If true, the costs of the test for split #k will be saved in Split#k/test_costs.pmat");
00075 inherited::declareOptions(ol);
00076 }
00077
00078 void Experiment::build_()
00079 {
00080
splitter->setDataSet(
dataset);
00081 }
00082
00083
00084 void Experiment::build()
00085 {
00086 inherited::build();
00087
build_();
00088 }
00089
00090 void Experiment::run()
00091 {
00092
if(
expdir==
"")
00093
PLERROR(
"No expdir specified for Experiment.");
00094
if(!
learner)
00095
PLERROR(
"No learner specified for Experiment.");
00096
if(!
splitter)
00097
PLERROR(
"No splitter specified for Experiment");
00098
00099
if(PLMPI::rank==0)
00100 {
00101
if(
pathexists(
expdir))
00102
PLERROR(
"Directory (or file) %s already exists. First move it out of the way.",
expdir.c_str());
00103
00104
if(!
force_mkdir(
expdir))
00105
PLERROR(
"Could not create experiment directory %s",
expdir.c_str());
00106
00107
00108
PLearn::save(
append_slash(
expdir)+
"experiment.psave", *
this);
00109 }
00110
00111
int nsplits =
splitter->nsplits();
00112
Array<string> testresnames =
learner->testResultsNames();
00113
int ntestres = testresnames.
size();
00114
VecStatsCollector teststats;
00115
00116
00117
VMat results;
00118
00119
if(PLMPI::rank==0)
00120 {
00121
00122
string fname =
append_slash(
expdir)+
"results.amat";
00123
00124
00125
TVec<string> fieldnames(1+ntestres);
00126 fieldnames[0] =
"splitnum";
00127
for(
int k=0;
k<ntestres;
k++)
00128 fieldnames[
k+1] = testresnames[
k];
00129
00130
00131
00132
00133
00134
00135
00136 results =
new AsciiVMatrix(
fname, 1+ntestres, fieldnames,
"# Special values for splitnum are: -1 -> MEAN; -2 -> STDERROR; -3 -> STDDEV");
00137 }
00138
00139
Vec resultrow(1+ntestres);
00140
Vec resultrow_sub = resultrow.
subVec(1,ntestres);
00141
00142
for(
int k=0;
k<nsplits;
k++)
00143 {
00144
TVec<VMat> train_test =
splitter->getSplit(
k);
00145
if(train_test.
size()!=2)
00146
PLERROR(
"Splitter returned a split with %d subsets, instead of the expected 2: train&test",train_test.
size());
00147
VMat trainset = train_test[0];
00148
VMat testset = train_test[1];
00149
string learner_expdir =
append_slash(
expdir)+
"Split"+
tostring(
k);
00150
learner->setExperimentDirectory(learner_expdir);
00151
00152
learner->forget();
00153
if(
save_initial_models)
00154
PLearn::save(learner_expdir+
slash+
"initial.psave",
learner);
00155
00156
learner->setTestDuringTrain(testset);
00157
learner->train(trainset);
00158
if(
save_models)
00159
PLearn::save(learner_expdir+
slash+
"final.psave",
learner);
00160
00161
string test_outputs_fname = learner_expdir+
slash+
"test_outputs.pmat";
00162
if(
save_test_outputs)
00163 test_outputs_fname = learner_expdir+
slash+
"test_outputs.pmat";
00164
string test_costs_fname = learner_expdir+
slash+
"test_costs.pmat";
00165
if(
save_test_costs)
00166 test_costs_fname = learner_expdir+
slash+
"test_costs.pmat";
00167
Vec testres =
learner->test(testset, test_outputs_fname, test_costs_fname);
00168
00169 testres.
println(cout);
00170
00171
00172
if(testres.
length()!=ntestres)
00173
PLERROR(
"In Experiment: length of Vec returned by test (%d) differs from number of names returned by testResultNames (%d)",
00174 testres.
length(), ntestres);
00175
00176 teststats.
update(testres);
00177
if(PLMPI::rank==0)
00178 {
00179 resultrow[0] =
k;
00180 resultrow_sub << testres;
00181 results->appendRow(resultrow);
00182 }
00183 }
00184
00185
if(PLMPI::rank==0)
00186 {
00187
00188 resultrow[0] = -1;
00189 resultrow_sub << teststats.
getMean();
00190 results->appendRow(resultrow);
00191
00192
00193 resultrow[0] = -2;
00194 resultrow_sub << teststats.
getStdError();
00195 results->appendRow(resultrow);
00196
00197
00198 resultrow[0] = -3;
00199 resultrow_sub << teststats.
getStdDev();
00200 results->appendRow(resultrow);
00201 }
00202 }
00203
00204 }