PLearn: Learner.cc Source File

00001 // -*- C++ -*- 00002 00003 // Learner.cc 00004 // 00005 // Copyright (C) 1998-2002 Pascal Vincent 00006 // Copyright (C) 1999-2002 Yoshua Bengio, Nicolas Chapados, Charles Dugas, Rejean Ducharme, Universite de Montreal 00007 // Copyright (C) 2001,2002 Francis Pieraut, Jean-Sebastien Senecal 00008 // Copyright (C) 2002 Frederic Morin, Xavier Saint-Mleux, Julien Keable 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are met: 00012 // 00013 // 1. Redistributions of source code must retain the above copyright 00014 // notice, this list of conditions and the following disclaimer. 00015 // 00016 // 2. Redistributions in binary form must reproduce the above copyright 00017 // notice, this list of conditions and the following disclaimer in the 00018 // documentation and/or other materials provided with the distribution. 00019 // 00020 // 3. The name of the authors may not be used to endorse or promote 00021 // products derived from this software without specific prior written 00022 // permission. 00023 // 00024 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00025 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00026 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00027 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00028 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00029 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00030 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00031 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00032 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00033 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00034 // 00035 // This file is part of the PLearn library. For more information on the PLearn 00036 // library, go to the PLearn Web site at www.plearn.org 00037 00038 00039 00040 00041 /* ******************************************************* 00042 * $Id: Learner.cc,v 1.20 2004/07/21 16:30:56 chrish42 Exp $ 00043 ******************************************************* */ 00044 00045 #include "Learner.h" 00046 //#include "DisplayUtils.h" 00047 #include <plearn/io/TmpFilenames.h> 00048 //#include "fileutils.h" 00049 #include <plearn/base/stringutils.h> 00050 #include <plearn/io/MPIStream.h> 00051 #include <plearn/vmat/FileVMatrix.h> 00052 #include <plearn/vmat/RemoveRowsVMatrix.h> 00053 #include <plearn/sys/PLMPI.h> 00054 00055 namespace PLearn { 00056 using namespace std; 00057 00058 Vec Learner::tmp_input; // temporary input vec 00059 Vec Learner::tmp_target; // temporary target vec 00060 Vec Learner::tmp_weight; // temporary weight vec 00061 Vec Learner::tmp_output; // temporary output vec 00062 Vec Learner::tmp_costs; // temporary costs vec 00063 00064 PStream& /*oassignstream&*/ Learner::default_vlog() 00065 { 00066 // static oassignstream default_vlog = cout; 00067 static PStream default_vlog(&cout); 00068 default_vlog.outmode=PStream::raw_ascii; 00069 return default_vlog; 00070 } 00071 int Learner::use_file_if_bigger = 64000000L; 00072 bool Learner::force_saving_on_all_processes = false; 00073 00074 Learner::Learner(int the_inputsize, int the_targetsize, int the_outputsize) 00075 :train_objective_stream(0), epoch_(0), distributed_(false), 00076 inputsize_(the_inputsize), targetsize_(the_targetsize), outputsize_(the_outputsize), 00077 weightsize_(0), dont_parallelize(false), save_at_every_epoch(false), save_objective(true), best_step(0) 00078 { 00079 test_every = 1; 00080 minibatch_size = 1; // by default call use, not apply 00081 setEarlyStopping(-1, 0, 0); // No early stopping by default 00082 vlog = default_vlog(); 00083 report_test_progress_every = 10000; 00084 measure_cpu_time_first=false; 00085 setTestStatistics(mean_stats() & stderr_stats()); 00086 } 00087 00088 PLEARN_IMPLEMENT_ABSTRACT_OBJECT(Learner, "DEPRECATED CLASS: Derive from PLearner instead", "NO HELP"); 00089 void Learner::makeDeepCopyFromShallowCopy(CopiesMap& copies) 00090 { 00091 Object::makeDeepCopyFromShallowCopy(copies); 00092 //Measurer::makeDeepCopyFromShallowCopy(copies); 00093 //deepCopyField(test_sets, copies); 00094 //deepCopyField(measurers, copies); 00095 deepCopyField(avg_objective, copies); 00096 deepCopyField(avgsq_objective, copies); 00097 deepCopyField(test_costfuncs, copies); 00098 deepCopyField(test_statistics, copies); 00099 } 00100 00101 void Learner::outputResultLineToFile(const string & fname, const Vec& results,bool append,const string& names) 00102 { 00103 #if __GNUC__ < 3 00104 ofstream teststream(fname.c_str(),ios::out|(append?ios::app:0)); 00105 #else 00106 ofstream teststream(fname.c_str(),ios_base::out|(append?ios_base::app:static_cast<ios::openmode>(0))); 00107 #endif 00108 // norman: added WIN32 check 00109 #if __GNUC__ < 3 && !defined(WIN32) 00110 if(teststream.tellp()==0) 00111 #else 00112 if(teststream.tellp() == streampos(0)) 00113 #endif 00114 teststream << "#: epoch " << names << endl; 00115 teststream << setw(5) << epoch_ << " " << results << endl; 00116 } 00117 00118 string Learner::basename() const 00119 { 00120 if(!experiment_name.empty()) 00121 { 00122 PLWARNING("** Warning: the experiment_name system is DEPRECATED, please use the expdir system from now on, through setExperimentDirectory, and don't set an experiment_name. For now I'll be using the specified experiment_name=%s as the default basename for your results, but this won't be supported in the future",experiment_name.c_str()); 00123 return experiment_name; 00124 } 00125 else if(expdir.empty()) 00126 { 00127 PLERROR("Problem in Learner: Please call setExperimentDirectory for your learner prior to calling a train/test"); 00128 } 00129 else if(!train_set) 00130 { 00131 PLWARNING("You should call setTrainingSet at the beginning of the train method in class %s ... Using 'unknown' as alias for now...", classname().c_str()); 00132 return expdir + "unknown"; 00133 } 00134 else if(train_set->getAlias().empty()) 00135 { 00136 //PLWARNING("The training set has no alias defined for it (you could call setAlias(...)) Using 'unknown' as alias"); 00137 return expdir + "unknown"; 00138 } 00139 return expdir+train_set->getAlias(); 00140 } 00141 00142 00143 void Learner::declareOptions(OptionList& ol) 00144 { 00145 declareOption(ol, "inputsize", &Learner::inputsize_, OptionBase::buildoption, 00146 "dimensionality of input vector \n"); 00147 00148 declareOption(ol, "outputsize", &Learner::outputsize_, OptionBase::buildoption, 00149 "dimensionality of output \n"); 00150 00151 declareOption(ol, "targetsize", &Learner::targetsize_, OptionBase::buildoption, 00152 "dimensionality of target \n"); 00153 00154 declareOption(ol, "weightsize", &Learner::weightsize_, OptionBase::buildoption, 00155 "Number of weights within target. The last 'weightsize' fields of the target vector will be used as cost weights.\n" 00156 "This is usually 0 (no weight) or 1 (1 weight per sample). Special loss functions may be able to give a meaning\n" 00157 "to weightsize>1. Not all learners support weights."); 00158 00159 declareOption(ol, "dont_parallelize", &Learner::dont_parallelize, OptionBase::buildoption, 00160 "By default, MPI parallelization done at a given level prevents further parallelization\n" 00161 "at levels further down. If true, this means *don't parallelize processing at this level*"); 00162 00163 declareOption(ol, "earlystop_testsetnum", &Learner::earlystop_testsetnum, OptionBase::buildoption, 00164 " index of test set (in test_sets) to use for early \n" 00165 " stopping (-1 means no early-stopping) \n"); 00166 00167 declareOption(ol, "earlystop_testresultindex", &Learner::earlystop_testresultindex, OptionBase::buildoption, 00168 " index of statistic (as returned by test) to use\n"); 00169 00170 declareOption(ol, "earlystop_max_degradation", &Learner::earlystop_max_degradation, OptionBase::buildoption, 00171 " maximum degradation in error from last best value\n"); 00172 00173 declareOption(ol, "earlystop_min_value", &Learner::earlystop_min_value, OptionBase::buildoption, 00174 " minimum error beyond which we stop\n"); 00175 00176 declareOption(ol, "earlystop_min_improvement", &Learner::earlystop_min_improvement, OptionBase::buildoption, 00177 " minimum improvement in error otherwise we stop\n"); 00178 00179 declareOption(ol, "earlystop_relative_changes", &Learner::earlystop_relative_changes, OptionBase::buildoption, 00180 " are max_degradation and min_improvement relative?\n"); 00181 00182 declareOption(ol, "earlystop_save_best", &Learner::earlystop_save_best, OptionBase::buildoption, 00183 " if yes, then return with saved 'best' model\n"); 00184 00185 declareOption(ol, "earlystop_max_degraded_steps", &Learner::earlystop_max_degraded_steps, OptionBase::buildoption, 00186 " ax. nb of steps beyond best found (-1 means ignore) \n"); 00187 00188 declareOption(ol, "save_at_every_epoch", &Learner::save_at_every_epoch, OptionBase::buildoption, 00189 " save learner at each epoch?\n"); 00190 00191 declareOption(ol, "save_objective", &Learner::save_objective, OptionBase::buildoption, 00192 " save objective at each epoch?\n"); 00193 00194 declareOption(ol, "expdir", &Learner::expdir, OptionBase::buildoption, 00195 " The directory in which to save results \n"); 00196 00197 declareOption(ol, "test_costfuncs", &Learner::test_costfuncs, OptionBase::buildoption, 00198 " The cost functions used by the default useAndCost method \n"); 00199 00200 declareOption(ol, "test_statistics", &Learner::test_statistics, OptionBase::buildoption, 00201 " The test statistics used by the default test method \n", 00202 "mean_stats() & stderr_stats()"); 00203 00204 declareOption(ol, "test_every", &Learner::test_every, OptionBase::buildoption, 00205 " Compute cost on the test set every <test_every> steps (if 0, then no test is done during training\n"); 00206 00207 declareOption(ol, "minibatch_size", &Learner::minibatch_size, 00208 OptionBase::buildoption, 00209 " size of blocks over which to perform tests, calling 'apply' if >1, otherwise caling 'use'\n"); 00210 00211 inherited::declareOptions(ol); 00212 } 00213 00214 00215 void Learner::setExperimentDirectory(const string& the_expdir) 00216 { 00217 #if USING_MPI 00218 if(PLMPI::rank==0) { 00219 #endif 00220 if(!force_mkdir(the_expdir)) 00221 { 00222 PLERROR("In Learner::setExperimentDirectory Could not create experiment directory %s",the_expdir.c_str());} 00223 #if USING_MPI 00224 } 00225 #endif 00226 expdir = abspath(the_expdir); 00227 } 00228 00229 void Learner::build_() 00230 { 00231 // Early stopping initialisation 00232 earlystop_previousval = FLT_MAX; 00233 earlystop_minval = FLT_MAX; 00234 } 00235 00236 void Learner::build() 00237 { 00238 inherited::build(); 00239 build_(); 00240 } 00241 00242 void Learner::forget() 00243 { 00244 // Early stopping parameters initialisation 00245 earlystop_previousval = FLT_MAX; 00246 earlystop_minval = FLT_MAX; 00247 epoch_ = 0; 00248 } 00249 00250 void Learner::useAndCostOnTestVec(const VMat& test_set, int i, const Vec& output, const Vec& cost) 00251 { 00252 tmpvec.resize(test_set.width()); 00253 if (minibatch_size > 1) 00254 { 00255 Vec inputvec(inputsize()*minibatch_size); 00256 Vec targetvec(targetsize()*minibatch_size); 00257 for (int k=0; k<minibatch_size;k++) 00258 { 00259 test_set->getRow(i+k,tmpvec); 00260 for (int j=0; j<inputsize(); j++) 00261 inputvec[k*inputsize()+j] = tmpvec[j]; 00262 for (int j=0; j<targetsize(); j++) 00263 targetvec[k*targetsize()+j] = tmpvec[inputsize()+j]; 00264 } 00265 useAndCost(inputvec, targetvec, output, cost); 00266 } 00267 else 00268 { 00269 test_set->getRow(i,tmpvec); 00270 useAndCost(tmpvec.subVec(0,inputsize()), tmpvec.subVec(inputsize(),targetsize()), output, cost); 00271 } 00272 } 00273 00274 void Learner::useAndCost(const Vec& input, const Vec& target, Vec output, Vec cost) 00275 { 00276 use(input,output); 00277 computeCost(input, target, output, cost); 00278 } 00279 00280 void Learner::computeCost(const Vec& input, const Vec& target, const Vec& output, const Vec& cost) 00281 { 00282 00283 for (int k=0; k<test_costfuncs.size(); k++) 00284 cost[k] = test_costfuncs[k](output, target); 00285 } 00286 00287 void Learner::setTestDuringTrain(ostream& out, int every, Array<VMat> testsets) 00288 { 00289 testout(&out);//testout = out; 00290 test_every = every; 00291 test_sets = testsets; 00292 } 00293 00294 void Learner::openTrainObjectiveStream() 00295 { 00296 string filename = expdir.empty() ? string("/dev/null") : expdir+"train.objective"; 00297 if(train_objective_stream) 00298 delete train_objective_stream; 00299 train_objective_stream = new ofstream(filename.c_str(),ios::out|ios::app); 00300 ostream& out = *train_objective_stream; 00301 if(out.bad()) 00302 PLERROR("could not open file %s for appending",filename.c_str()); 00303 // norman: added WIN32 check 00304 #if __GNUC__ < 3 && !defined(WIN32) 00305 if(out.tellp()==0) 00306 #else 00307 if(out.tellp() == streampos(0)) 00308 #endif 00309 out << "# epoch | " << join(trainObjectiveNames()," | ") << endl; 00310 } 00311 00312 ostream& Learner::getTrainObjectiveStream() 00313 { 00314 if(!train_objective_stream) 00315 openTrainObjectiveStream(); 00316 return *train_objective_stream; 00317 } 00318 00320 void Learner::openTestResultsStreams() 00321 { 00322 freeTestResultsStreams(); 00323 int n = test_sets.size(); 00324 test_results_streams.resize(n); 00325 for(int k=0; k<n; k++) 00326 { 00327 string alias = test_sets[k]->getAlias(); 00328 // if(alias.empty()) 00329 // PLERROR("In Learner::openTestResultsStreams testset #%d has no defined alias",k); 00330 string filename = alias.empty() ? string("/dev/null") : expdir+alias+".results"; 00331 test_results_streams[k] = new ofstream(filename.c_str(), ios::out|ios::app); 00332 ostream& out = *test_results_streams[k]; 00333 if(out.bad()) 00334 PLERROR("In Learner::openTestResultsStreams could not open file %s for appending",filename.c_str()); 00335 // norman: added WIN32 check 00336 #if __GNUC__ < 3 && !defined(WIN32) 00337 if(out.tellp() == 0) 00338 #else 00339 if(out.tellp() == streampos(0)) 00340 #endif 00341 out << "#: epoch " << join(testResultsNames()," ") << endl; 00342 } 00343 } 00344 00345 void Learner::freeTestResultsStreams() 00346 { 00347 int n = test_results_streams.size(); 00348 for(int k=0; k<n; k++) 00349 delete test_results_streams[k]; 00350 test_results_streams.resize(0); 00351 } 00352 00353 // There are as many test results streams as there are 00354 ostream& Learner::getTestResultsStream(int k) 00355 { 00356 if(test_results_streams.size()==0) 00357 openTestResultsStreams(); 00358 return *test_results_streams[k]; 00359 } 00360 00361 00362 void Learner::setTestDuringTrain(Array<VMat> testsets) 00363 { test_sets = testsets; } 00364 00365 Learner::~Learner() 00366 { 00367 if(train_objective_stream) 00368 delete train_objective_stream; 00369 freeTestResultsStreams(); 00370 } 00371 00372 // which_testset and which_testresult select the appropriate testset and 00373 // costfunction to base early-stopping on from those that were specified 00374 // in setTestDuringTrain 00375 // * degradation is the difference between the current value and the 00376 // smallest value ever attained, training will be stopped if it grows 00377 // beyond max_degradation 00378 // * training will be stopped if current value goes below min_value 00379 // * training will be stopped if difference between previous value and 00380 // current value is below min_improvement 00381 void Learner::setEarlyStopping(int which_testset, int which_testresult, 00382 real max_degradation, real min_value, 00383 real min_improvement, bool relative_changes, 00384 bool save_best, int max_degraded_steps) 00385 { 00386 earlystop_testsetnum = which_testset; 00387 earlystop_testresultindex = which_testresult; 00388 earlystop_max_degradation = max_degradation; 00389 earlystop_min_value = min_value; 00390 earlystop_previousval = FLT_MAX; 00391 earlystop_minval = FLT_MAX; 00392 earlystop_relative_changes = relative_changes; 00393 earlystop_min_improvement = min_improvement; 00394 earlystop_save_best = save_best; 00395 earlystop_max_degraded_steps = max_degraded_steps; 00396 } 00397 00398 bool Learner::measure(int step, const Vec& costs) 00399 { 00400 earlystop_min_value /= minibatch_size; 00401 if (costs.length()<1) 00402 PLERROR("Learner::measure: costs.length_=%d should be >0", costs.length()); 00403 00404 //vlog << ">>> Now measuring for step " << step << " (costs = " << costs << " )" << endl; 00405 00406 // if (objectiveout) 00407 // objectiveout << setw(5) << step << " " << costs << "\n"; 00408 00409 00410 if (((!PLMPI::synchronized && each_cpu_saves_its_errors) || PLMPI::rank==0) && save_objective) 00411 outputResultLineToFile(basename()+".objective",costs,true,join(trainObjectiveNames()," ")); 00412 00413 bool muststop = false; 00414 00415 if (((!PLMPI::synchronized && each_cpu_saves_its_errors) || PLMPI::rank==0) && save_at_every_epoch) 00416 { 00417 string fname = basename()+".epoch"+tostring(epoch())+".psave"; 00418 vlog << " >> Saving model in " << fname << endl; 00419 PLearn::save(fname, *this); 00420 } 00421 if ((test_every != 0) && (step%test_every==0)) 00422 { 00423 int ntestsets = test_sets.size(); 00424 Array<Vec> test_results(ntestsets); 00425 for (int n=0; n<ntestsets; n++) // looping over test sets 00426 { 00427 test_results[n] = test(test_sets[n]); 00428 if ((!PLMPI::synchronized && each_cpu_saves_its_errors) || PLMPI::rank==0) 00429 outputResultLineToFile(basename()+"."+test_sets[n]->getAlias()+".hist.results",test_results[n],true, 00430 join(testResultsNames()," ")); 00431 } 00432 00433 if (ntestsets>0 && earlystop_testsetnum>=0) // are we doing early stopping? 00434 { 00435 real earlystop_currentval = 00436 test_results[earlystop_testsetnum][earlystop_testresultindex]; 00437 // cout << earlystop_currentval << " " << earlystop_testsetnum << " " << earlystop_testresultindex << endl; 00438 // Check if early-stopping condition was met 00439 if ((earlystop_relative_changes && 00440 ((earlystop_currentval-earlystop_minval > 00441 earlystop_max_degradation * abs(earlystop_minval)) 00442 || (earlystop_currentval < earlystop_min_value) 00443 || (earlystop_previousval-earlystop_currentval < 00444 earlystop_min_improvement * abs(earlystop_previousval)))) || 00445 (!earlystop_relative_changes && 00446 ((earlystop_currentval-earlystop_minval > earlystop_max_degradation) 00447 || (earlystop_currentval < earlystop_min_value) 00448 || (earlystop_previousval-earlystop_currentval < 00449 earlystop_min_improvement))) || 00450 (earlystop_max_degraded_steps>=0 && 00451 (step-best_step>=earlystop_max_degraded_steps) && 00452 (earlystop_minval < FLT_MAX))) 00453 { // earlystopping met 00454 if (earlystop_save_best) 00455 { 00456 string fname = basename()+".psave"; 00457 vlog << "Met early-stopping condition!" << endl; 00458 vlog << "earlystop_currentval = " << earlystop_currentval << endl; 00459 vlog << "earlystop_minval = " << earlystop_minval << endl; 00460 vlog << "threshold = " << earlystop_max_degradation*earlystop_minval << endl; 00461 vlog << "STOPPING (reloading best model)" << endl; 00462 if(expdir.empty()) // old deprecated mode 00463 load(); 00464 else 00465 PLearn::load(fname,*this); 00466 } 00467 else 00468 cout << "Result for benchmark is: " << test_results << endl; 00469 muststop = true; 00470 } 00471 else // earlystopping not met 00472 { 00473 earlystop_previousval = earlystop_currentval; 00474 if (PLMPI::rank==0 && earlystop_save_best 00475 && (earlystop_currentval < earlystop_minval)) 00476 { 00477 string fname = basename()+".psave"; 00478 vlog << "saving model in " << fname << " because of earlystopping / improvement: " << endl; 00479 vlog << "earlystop_currentval = " << earlystop_currentval << endl; 00480 vlog << "earlystop_minval = " << earlystop_minval << endl; 00481 PLearn::save(fname,*this); 00482 // update .results file 00483 if ((!PLMPI::synchronized && each_cpu_saves_its_errors) || PLMPI::rank==0) 00484 for (int n=0; n<ntestsets; n++) // looping over test sets 00485 outputResultLineToFile(basename()+"."+test_sets[n]->getAlias()+".results",test_results[n],false, 00486 join(testResultsNames()," ")); 00487 cout << "Result for benchmark is: " << test_results << endl; 00488 } 00489 } 00490 if (earlystop_currentval < earlystop_minval) 00491 { 00492 earlystop_minval = earlystop_currentval; 00493 best_step = step; 00494 if(PLMPI::rank==0) 00495 vlog << "currently best step at " << best_step << " with " << earlystop_currentval << " " << test_results << endl; 00496 } 00497 } 00498 else 00499 // save tests in .results 00500 if ((!PLMPI::synchronized && each_cpu_saves_its_errors) || PLMPI::rank==0) 00501 for (int n=0; n<ntestsets; n++) // looping over test sets 00502 outputResultLineToFile(basename()+"."+test_sets[n]->getAlias()+".results",test_results[n],false, 00503 join(testResultsNames()," ")); 00504 } 00505 00506 for (int i=0; i<measurers.size(); i++) 00507 muststop = muststop || measurers[i]->measure(step,costs); 00508 00509 ++epoch_; 00510 00511 // BUG: This doesn't work as intented in certain cases (ie. me!) 00512 //#if USING_MPI 00513 //MPI_Barrier(MPI_COMM_WORLD); 00514 //#endif 00515 00516 return muststop; 00517 } 00518 00519 // Call the 'use' method many times on the first inputsize() elements of 00520 // each row of a 'data' VMat, and put the 00521 // machine's 'outputs' in a writable VMat (e.g. maybe a file, or a matrix). 00522 void Learner::apply(const VMat& data, VMat outputs) 00523 { 00524 int n=data.length(); 00525 Vec data_row(data.width()); 00526 Vec input = data_row.subVec(0,inputsize()); 00527 Vec output(outputsize()); 00528 for (int i=0;i<n;i++) 00529 { 00530 data->getRow(i,data_row); // also gets input_row and target 00531 use(input,output); 00532 outputs->putRow(i,output); 00533 } 00534 } 00535 00536 // This method calls useAndCost repetitively on all the rows of data, 00537 // throwing away the resulting output vectors but putting all the cost vectors 00538 // in the costs VMat. 00539 void Learner::computeCosts(const VMat& data, VMat costs) 00540 { 00541 int n=data.length(); 00542 int ncostfuncs = costsize(); 00543 Vec output_row(outputsize()); 00544 Vec cost(ncostfuncs); 00545 cout << ncostfuncs << endl; 00546 for (int i=0;i*minibatch_size<n;i++) 00547 { 00548 useAndCostOnTestVec(data, i*minibatch_size, output_row, cost); 00549 costs->putRow(i,cost); // save the costs 00550 } 00551 } 00552 00553 void Learner::computeLeaveOneOutCosts(const VMat& data, VMat costsmat) 00554 { 00555 // Vec testsample(inputsize()+targetsize()); 00556 // Vec testinput = testsample.subVec(0,inputsize()); 00557 // Vec testtarget = testsample.subVec(inputsize(),targetsize()); 00558 Vec output(outputsize()); 00559 Vec cost(costsize()); 00560 // VMat subset; 00561 for(int i=0; i<data.length(); i++) 00562 { 00563 // data->getRow(i,testsample); 00564 train(removeRow(data,i)); 00565 useAndCostOnTestVec(data, i, output, cost); 00566 // useAndCost(testinput,testtarget,output,cost); 00567 costsmat->putRow(i,cost); 00568 vlog << '.' << flush; 00569 if(i%100==0) 00570 vlog << '\n' << i << flush; 00571 } 00572 } 00573 00574 void Learner::computeLeaveOneOutCosts(const VMat& data, VMat costsmat, CostFunc costf) 00575 { 00576 // norman: added parenthesis to clarify precendence 00577 if( (costsmat.length() != data.length()) | (costsmat.width()!=1)) 00578 PLERROR("In Learner::computeLeaveOneOutCosts bad dimensions for costsmat VMat"); 00579 Vec testsample(inputsize()+targetsize()); 00580 Vec testinput = testsample.subVec(0,inputsize()); 00581 Vec testtarget = testsample.subVec(inputsize(),targetsize()); 00582 Vec output(outputsize()); 00583 VMat subset; 00584 for(int i=0; i<data.length(); i++) 00585 { 00586 data->getRow(i,testsample); 00587 train(removeRow(data,i)); 00588 use(testinput,output); 00589 costsmat->put(i,0,costf(output,testtarget)); 00590 vlog << '.' << flush; 00591 if(i%100==0) 00592 vlog << '\n' << i << flush; 00593 } 00594 } 00595 00596 // This method calls useAndCost repetitively on all the rows of data, 00597 // putting all the resulting output and cost vectors in the outputs and 00598 // costs VMat's. 00599 void Learner::applyAndComputeCosts(const VMat& data, VMat outputs, VMat costs) 00600 { 00601 int n=data.length(); 00602 int ncostfuncs = costsize(); 00603 Vec output_row(outputsize()*minibatch_size); 00604 Vec costs_row(ncostfuncs); 00605 for (int i=0;i*minibatch_size<n;i++) 00606 { 00607 // data->getRow(i,data_row); // also gets input_row and target 00608 useAndCostOnTestVec(data, i*minibatch_size, output_row, costs_row); 00609 // useAndCostOnTestVec(data, i, output_row, costs_row); 00610 // useAndCost(input_row,target,output_row,costs_row); // does the work 00611 //outputs->putRow(i,output_row); // save the outputs 00612 for (int k=0; k<minibatch_size; k++) 00613 { 00614 outputs->putRow(i+k,output_row.subVec(k*outputsize(),outputsize())); // save the outputs 00615 } 00616 costs->putRow(i,costs_row); // save the costs 00617 } 00618 } 00619 00620 Vec Learner::computeTestStatistics(const VMat& costs) 00621 { 00622 return concat(test_statistics.computeStats(costs)); 00623 } 00624 00625 00626 // [PASCAL TODO:] 00627 // 1) Handle weights properly 00628 // 2) Fix parallel code to use MPIStream for more efficient buffering (and check Yoshua's problem) 00629 // 4) let save parameters be VMatrix (on which to call append) 00630 00634 00635 Vec Learner::test(VMat test_set, const string& save_test_outputs, const string& save_test_costs) 00636 { 00637 int ncostfuncs = costsize(); 00638 00639 Vec output(outputsize()*minibatch_size); 00640 Vec cost(ncostfuncs); 00641 Mat output_block(minibatch_size,outputsize()); 00642 Mat cost_block(minibatch_size,outputsize()); 00643 if (minibatch_size>1) 00644 cost_block.resize(minibatch_size,costsize()); 00645 00646 Vec result; 00647 00648 VMat outputs; // possibly where to save outputs (and target) 00649 VMat costs; // possibly where to save costs 00650 if(PLMPI::rank==0 && !save_test_outputs.empty()) 00651 outputs = new FileVMatrix(save_test_outputs, test_set.length(), outputsize()); 00652 00653 if(PLMPI::rank==0 && !save_test_costs.empty()) 00654 costs = new FileVMatrix(save_test_costs, test_set.length(), ncostfuncs); 00655 00656 int l = test_set.length(); 00657 ProgressBar progbar(vlog, "Testing " + test_set->getAlias(), l); 00658 // ProgressBar progbar(cerr, "Testing " + test_set->getAlias(), l); 00659 // ProgressBar progbar(nullout(), "Testing " + test_set->getAlias(), l); 00660 00661 // Do the test statistics require multiple passes? 00662 bool multipass = test_statistics.requiresMultiplePasses(); 00663 00664 // If multiple passes are required, make sure we save the individual costs in an appropriate 'costs' VMat 00665 if (PLMPI::rank==0 && save_test_costs.empty() && multipass) 00666 { 00667 TmpFilenames tmpfile(1); 00668 bool save_on_file = ncostfuncs*test_set.length() > use_file_if_bigger; 00669 if (save_on_file) 00670 costs = new FileVMatrix(tmpfile.addFilename(),test_set.length(),ncostfuncs); 00671 else 00672 costs = Mat(test_set.length(),ncostfuncs); 00673 } 00674 00675 if(!multipass) // stats can be computed in a single pass? 00676 test_statistics.init(ncostfuncs); 00677 00678 if(USING_MPI && PLMPI::synchronized && !dont_parallelize && PLMPI::size>1) 00679 { // parallel implementation 00680 // cout << "PARALLEL-DATA TEST" << endl; 00681 #if USING_MPI 00682 PLMPI::synchronized = false; 00683 if(PLMPI::rank==0) // process 0 gathers costs, computes statistics and writes stuff to output files if required 00684 { 00685 MPIStreams mpistreams(200,200); 00686 // MPI_Status status; 00687 for(int i=0; i<l; i++) 00688 { 00689 int pnum = 1 + i%(PLMPI::size-1); 00690 if(!save_test_outputs.empty()) // receive and save output 00691 { 00692 // MPI_Recv(cost.data(), cost.length(), PLMPI_REAL, pnum, 0, MPI_COMM_WORLD, &status); 00693 //cerr << "/ MPI #" << PLMPI::rank << " received " << cost.length() << " values from MPI #" << pnum << endl; 00694 PLearn::binread(mpistreams[pnum], output); 00695 outputs->putRow(i, output); 00696 } 00697 /* else // receive output and cost only 00698 { 00699 MPI_Recv(output.data(), output.length()+cost.length(), PLMPI_REAL, pnum, 0, MPI_COMM_WORLD, &status); 00700 //cerr << "/ MPI #" << PLMPI::rank << " received " << cost.length() << " values from MPI #" << pnum << endl; 00701 outputs->putRow(i,output); 00702 }*/ 00703 // receive cost 00704 PLearn::binread(mpistreams[pnum], cost); 00705 if(costs) // save costs? 00706 costs->putRow(i,cost); 00707 if(!multipass) // stats can be computed in a single pass? 00708 test_statistics.update(cost); 00709 progbar(i); 00710 } 00711 } 00712 else // other processes compute output and cost on different rows of the test_set and send them to process 0 00713 { 00714 MPIStream mpistream(0,200,200); // stream to node 0 00715 int step = PLMPI::size-1; 00716 for(int i=PLMPI::rank-1; i<l; i+=step) 00717 { 00718 useAndCostOnTestVec(test_set, i, output, cost); 00719 // test_set->getRow(i, sample); 00720 // useAndCost(input,target,output,cost); 00721 /* if(save_test_outputs.empty()) // send only cost 00722 { 00723 //cerr << "/ MPI #" << PLMPI::rank << " sending " << cost.length() << " values to MPI #0" << endl; 00724 MPI_Send(cost.data(), cost.length(), PLMPI_REAL, 0, 0, MPI_COMM_WORLD); 00725 } 00726 else // send output and cost only 00727 { 00728 //cerr << "/ MPI #" << PLMPI::rank << " sending " << cost.length() << " values to MPI #0" << endl; 00729 MPI_Send(output.data(), output.length()+cost.length(), PLMPI_REAL, 0, 0, MPI_COMM_WORLD); 00730 } 00731 } 00732 }*/ 00733 if(!save_test_outputs.empty()) // send output 00734 PLearn::binwrite(mpistream, output); 00735 // send cost 00736 PLearn::binwrite(mpistream, cost); 00737 } 00738 } 00739 00740 // Finalize statistics computation 00741 int result_len; 00742 if(PLMPI::rank==0) // process 0 finalizes stats computation and broadcasts them 00743 { 00744 if(!multipass) 00745 { 00746 test_statistics.finish(); 00747 result = concat(test_statistics.getResults()); 00748 } 00749 else 00750 result = concat(test_statistics.computeStats(costs)); 00751 result_len = result.length(); 00752 } 00753 MPI_Bcast(&result_len, 1, MPI_INT, 0, MPI_COMM_WORLD); 00754 result.resize(result_len); 00755 MPI_Bcast(result.data(), result.length(), PLMPI_REAL, 0, MPI_COMM_WORLD); 00756 PLMPI::synchronized = true; 00757 #endif 00758 } 00759 else // default sequential implementation 00760 { 00761 00762 for (int i=0; i<l; i++) 00763 { 00764 if (i%10000<minibatch_size) stop_if_wanted(); 00765 if (minibatch_size>1 && i+minibatch_size<l) 00766 { 00767 applyAndComputeCostsOnTestMat(test_set, i, output_block, cost_block); 00768 i+=minibatch_size; 00769 if(outputs) // save outputs? 00770 outputs->putMat(i,0,output_block); 00771 if(costs) // save costs? 00772 costs->putMat(i,0,cost_block); 00773 if(!multipass) // stats can be computed in a single pass? 00774 test_statistics.update(cost_block); 00775 } 00776 else 00777 { 00778 useAndCostOnTestVec(test_set, i, output, cost); 00779 if(outputs) // save outputs? 00780 outputs->putRow(i,output); 00781 if(costs) // save costs? 00782 costs->putRow(i,cost); 00783 if(!multipass) // stats can be computed in a single pass? 00784 test_statistics.update(cost); 00785 } 00786 // test_set->getRow(i, sample); 00787 // useAndCost(input, target, output, cost); 00788 00789 progbar(i); 00790 00791 } 00792 00793 // Finalize statistics computation 00794 if(!multipass) 00795 { 00796 test_statistics.finish(); 00797 result = concat(test_statistics.getResults()); 00798 } 00799 else 00800 result = concat(test_statistics.computeStats(costs)); 00801 00802 } 00803 00804 return result; 00805 } 00806 00807 void Learner::applyAndComputeCostsOnTestMat(const VMat& test_set, int i, const Mat& output_block, 00808 const Mat& cost_block) 00809 { 00810 applyAndComputeCosts(test_set.subMatRows(i,output_block.length()),output_block,cost_block); 00811 //applyAndComputeCosts(test_set.subMatRows(i,output_block.length()*minibatch_size),output_block,cost_block); 00812 } 00813 00814 void Learner::setModel(const Vec& options) { 00815 PLERROR("setModel: method not implemented for this Learner (and DEPRECATED!!! DON'T IMPLEMENT IT, DON'T CALL IT. SEE setOption INSTEAD)"); 00816 } 00817 00818 int Learner::costsize() const 00819 { return test_costfuncs.size(); } 00820 00821 Array<string> Learner::costNames() const 00822 { 00823 Array<string> cost_names(test_costfuncs.size()); 00824 for (int i=0; i<cost_names.size(); i++) 00825 cost_names[i] = space_to_underscore(test_costfuncs[i]->info()); 00826 return cost_names; 00827 } 00828 00829 Array<string> Learner::testResultsNames() const 00830 { 00831 Array<string> cost_names = costNames(); 00832 Array<string> names(test_statistics.size()*cost_names.size()); 00833 int k=0; 00834 for (int i=0;i<test_statistics.size();i++) 00835 { 00836 string stati = test_statistics[i]->info(); 00837 for (int j=0;j<cost_names.size();j++) 00838 names[k++] = space_to_underscore(cost_names[j] + "." + stati); 00839 } 00840 return names; 00841 } 00842 00843 Array<string> Learner::trainObjectiveNames() const 00844 { return testResultsNames(); } 00845 00846 void Learner::oldwrite(ostream& out) const 00847 { 00848 writeHeader(out,"Learner",1); 00849 writeField(out,"inputsize",inputsize_); 00850 writeField(out,"outputsize",outputsize_); 00851 writeField(out,"targetsize",targetsize_); 00852 writeField(out,"test_every",test_every); // recently added by senecal 00853 writeField(out,"earlystop_testsetnum",earlystop_testsetnum); 00854 writeField(out,"earlystop_testresultindex",earlystop_testresultindex); 00855 writeField(out,"earlystop_max_degradation",earlystop_max_degradation); 00856 writeField(out,"earlystop_min_value",earlystop_min_value); 00857 writeField(out,"earlystop_min_improvement",earlystop_min_improvement); 00858 writeField(out,"earlystop_relative_changes",earlystop_relative_changes); 00859 writeField(out,"earlystop_save_best",earlystop_save_best); 00860 writeField(out,"earlystop_max_degraded_steps",earlystop_max_degraded_steps); 00861 writeField(out,"save_at_every_epoch",save_at_every_epoch); 00862 writeField(out,"experiment_name",experiment_name); 00863 writeField(out,"test_costfuncs",test_costfuncs); 00864 writeField(out,"test_statistics",test_statistics); 00865 writeFooter(out,"Learner"); 00866 } 00867 00868 void Learner::oldread(istream& in) 00869 { 00870 int version=readHeader(in,"Learner"); 00871 if(version>=2) 00872 { 00873 readField(in,"expdir",expdir); 00874 readField(in,"epoch",epoch_); 00875 } 00876 readField(in,"inputsize",inputsize_); 00877 readField(in,"outputsize",outputsize_); 00878 readField(in,"targetsize",targetsize_); 00879 readField(in,"test_every",test_every); 00880 readField(in,"earlystop_testsetnum",earlystop_testsetnum); 00881 readField(in,"earlystop_testresultindex",earlystop_testresultindex); 00882 readField(in,"earlystop_max_degradation",earlystop_max_degradation); 00883 readField(in,"earlystop_min_value",earlystop_min_value); 00884 readField(in,"earlystop_min_improvement",earlystop_min_improvement); 00885 readField(in,"earlystop_relative_changes",earlystop_relative_changes); 00886 readField(in,"earlystop_save_best",earlystop_save_best); 00887 if (version>=1) 00888 readField(in,"earlystop_max_degraded_steps",earlystop_max_degraded_steps); 00889 else 00890 earlystop_max_degraded_steps=-1; 00891 readField(in,"save_at_every_epoch",save_at_every_epoch); 00892 readField(in,"experiment_name",experiment_name); 00893 readField(in,"test_costfuncs",test_costfuncs); 00894 readField(in,"test_statistics",test_statistics); 00895 readFooter(in,"Learner"); 00896 } 00897 00898 void Learner::save(const string& filename) const 00899 { 00900 #if USING_MPI 00901 if (PLMPI::rank!=0 && !force_saving_on_all_processes) 00902 return; 00903 #endif 00904 if(!filename.empty()) 00905 Object::save(filename); 00906 else if(!experiment_name.empty()) 00907 Object::save(experiment_name); 00908 else 00909 PLERROR("Called Learner::save with an empty filename, while experiment_name is also empty. What file name am I supposed to use???? Anyway this method is DEPRECATED, you should call directly function PLearn::save(whatever_filename_you_want, the_object) "); 00910 } 00911 00912 void Learner::load(const string& filename) 00913 { 00914 if (!filename.empty()) 00915 Object::load(filename); 00916 else if (!experiment_name.empty()) 00917 Object::load(experiment_name); 00918 else 00919 PLERROR("Called Learner::load with an empty filename, while experiment_name is also empty. What file name am I supposed to use???? Anyway this method is DEPRECATED, you should call directly function PLearn::load(whatever_filename_you_want, the_object) "); 00920 } 00921 00922 void Learner::stop_if_wanted() 00923 { 00924 string stopping_filename = basename()+".stop"; 00925 if (file_exists(stopping_filename.c_str())) 00926 { 00927 #ifdef PROFILE 00928 string profile_report_name = basename(); 00929 #if USING_MPI 00930 profile_report_name += "_r" + tostring(PLMPI::rank);; 00931 #endif 00932 profile_report_name += ".profile"; 00933 ofstream profile_report(profile_report_name.c_str()); 00934 Profiler::report(profile_report); 00935 #endif 00936 #if USING_MPI 00937 MPI_Barrier(MPI_COMM_WORLD); 00938 if (PLMPI::rank==0) 00939 { 00940 string fname = basename()+".stopped.psave"; 00941 PLearn::save(fname,*this); 00942 vlog << "saving and quitting because of stop signal" << endl; 00943 unlink(stopping_filename.c_str()); // remove file if possible 00944 } 00945 exit(0); 00946 #else 00947 unlink(stopping_filename.c_str()); // remove file if possible 00948 exit(0); 00949 #endif 00950 } 00951 } 00952 00953 00954 // NOTE: For backward compatibility, default version currently calls the 00955 // deprecated method use which should ultimately be removed... 00956 void Learner::computeOutput(const VVec& input, Vec& output) 00957 { 00958 tmp_input.resize(input.length()); 00959 tmp_input << input; 00960 use(tmp_input,output); 00961 } 00962 00963 00964 // NOTE: For backward compatibility, default version currently calls the 00965 // deprecated method computeCost which should ultimately be removed... 00966 void Learner::computeCostsFromOutputs(const VVec& input, const Vec& output, 00967 const VVec& target, const VVec& weight, 00968 Vec& costs) 00969 { 00970 tmp_input.resize(input.length()); 00971 tmp_input << input; 00972 tmp_target.resize(target.length()); 00973 tmp_target << target; 00974 computeCost(input, target, output, costs); 00975 00976 int nw = weight.length(); 00977 if(nw>0) 00978 { 00979 tmp_weight.resize(nw); 00980 tmp_weight << weight; 00981 if(nw==1) // a single scalar weight 00982 costs *= tmp_weight[0]; 00983 else if(nw==costs.length()) // one weight per cost element 00984 costs *= tmp_weight; 00985 else 00986 PLERROR("In computeCostsFromOutputs: don't know how to handle cost-weight vector of length %d while output vector has length %d", nw, output.length()); 00987 } 00988 } 00989 00990 00991 void Learner::computeOutputAndCosts(const VVec& input, VVec& target, const VVec& weight, 00992 Vec& output, Vec& costs) 00993 { 00994 computeOutput(input, output); 00995 computeCostsFromOutputs(input, output, target, weight, costs); 00996 } 00997 00998 void Learner::computeCosts(const VVec& input, VVec& target, VVec& weight, 00999 Vec& costs) 01000 { 01001 tmp_output.resize(outputsize()); 01002 computeOutputAndCosts(input, target, weight, tmp_output, costs); 01003 } 01004 01005 01006 void Learner::newtrain(VecStatsCollector& stats) 01007 { PLERROR("newtrain not yet implemented for this learner"); } 01008 01009 01010 void Learner::newtest(VMat testset, VecStatsCollector& test_stats, 01011 VMat testoutputs, VMat testcosts) 01012 { 01013 PLERROR("Learner::newtrain not yet implemented"); 01014 01015 /* 01016 int l = testset.length(); 01017 VVec input; 01018 VVec target; 01019 VVec weight; 01020 01021 Vec output(testoutputs ?outputsize() :0); 01022 Vec costs(costsize()); 01023 01024 testset->defineSizes(inputsize(),targetsize(),weightsize()); 01025 01026 test_stats.forget(); 01027 01028 for(int i=0; i<l; i++) 01029 { 01030 testset.getSample(i, input, target, weight); 01031 01032 if(testoutputs) 01033 { 01034 computeOutputAndCosts(input, target, weight, output, costs); 01035 testoutputs->putOrAppendRow(i,output); 01036 } 01037 else // no need to compute outputs 01038 computeCosts(input, target, weight, costs); 01039 01040 if(testcosts) 01041 testcosts->putOrAppendRow(i, costs); 01042 01043 test_stats.update(costs); 01044 } 01045 01046 test_stats.finalize(); 01047 01048 */ 01049 } 01050 01051 01052 } // end of namespace PLearn 01053