PLearn: SequentialModelSelector.cc Source File

00001 // -*- C++ -*- 00002 00003 // SequentialModelSelector.cc 00004 // 00005 // Copyright (C) 2003 Rejean Ducharme, Yoshua Bengio 00006 // Copyright (C) 2003 Pascal Vincent 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 00038 #include "SequentialModelSelector.h" 00039 00040 //#define DEBUG 00041 00042 namespace PLearn { 00043 using namespace std; 00044 00045 00046 PLEARN_IMPLEMENT_OBJECT(SequentialModelSelector, "ONE LINE DESCR", "NO HELP"); 00047 00048 SequentialModelSelector::SequentialModelSelector(): 00049 report_paired_T_tests(false), 00050 stepwise_save(true), 00051 comparison_type(1), 00052 comparison_window(-1) 00053 {} 00054 00055 void SequentialModelSelector::setExperimentDirectory(const string& _expdir) 00056 { 00057 int nb_models = models.length(); 00058 checkModelNames(); 00059 00060 PLearner::setExperimentDirectory(_expdir); 00061 string model_m; 00062 for(int m=0; m < nb_models; m++) 00063 { 00064 model_m = models[m]->getExperimentDirectory(); 00065 if(model_m == "") 00066 model_m = model_names[m]; 00067 models[m]->setExperimentDirectory(append_slash(_expdir)+ model_m); 00068 } 00069 } 00070 00071 void SequentialModelSelector::build_() 00072 { 00073 // Precondition check on init_train_size 00074 if(init_train_size < horizon) 00075 { 00076 PLWARNING("The init_train_size provided by the user was %d. However since the horizon is %d\n" 00077 "the init_train_size will be modified to be %d, the horizon value. This modification is\n" 00078 "necessary because, within a model selection context, a first internal test is needed to\n" 00079 "determine the first best model (see the SequentialModelSelector::train method\n" 00080 "implementation). If you want to avoid getting this warning, you should set init_train_size\n" 00081 "to horizon (%d) directly.", 00082 init_train_size, horizon, horizon, horizon); 00083 init_train_size = horizon; 00084 } 00085 00086 int nb_models = models.length(); 00087 if(nb_models < 1) 00088 PLERROR("The 'models' option is mandatory in a SequentialModelSelector."); 00089 00090 int nb_common_costs = common_costs.length(); 00091 if(nb_common_costs < 1) 00092 PLERROR("The 'common_costs' requires at least 1 cost from which models will\n" 00093 "be compared to choose the best model."); 00094 00095 common_cost_indices.resize(nb_models, nb_common_costs); 00096 for(int m=0; m < nb_models; m++) 00097 for(int c=0; c < nb_common_costs; c++) 00098 common_cost_indices(m, c) = models[m]->getTestCostIndex( common_costs[c] ); 00099 00100 best_model.resize(max_seq_len); 00101 sequence_costs.resize(nb_models); 00102 00103 forget(); 00104 } 00105 00106 void SequentialModelSelector::build() 00107 { 00108 inherited::build(); 00109 build_(); 00110 } 00111 00112 void SequentialModelSelector::declareOptions(OptionList& ol) 00113 { 00114 declareOption(ol, "stepwise_save", &SequentialModelSelector::stepwise_save, OptionBase::buildoption, 00115 "Does the model selector hass to save errors at each step.\n" 00116 "Default: true."); 00117 00118 declareOption(ol, "models", &SequentialModelSelector::models, 00119 OptionBase::buildoption, "List of all the models.\n"); 00120 00121 declareOption(ol, "model_names", &SequentialModelSelector::model_names, OptionBase::buildoption, 00122 "If the user desires to provide a name for each model instead of model_i."); 00123 00124 declareOption(ol, "common_costs", &SequentialModelSelector::common_costs, OptionBase::buildoption, 00125 "The names of costs that are common to all models and that the user wishes the model\n" 00126 "selector to keep track of. The first one is considered to be the main cost, the one\n" 00127 "from which models will be compared to choose the best model."); 00128 00129 declareOption(ol, "report_paired_T_tests", &SequentialModelSelector::report_paired_T_tests, OptionBase::buildoption, 00130 "If true, the model selector will report as costs the paired T tests on common_cost_indices[0] for\n" 00131 "models[0] against each other model. The model selector will only report T tests once,\n" 00132 "at t=(max_seq_len-1)\n" 00133 "\n" 00134 "Default: false."); 00135 00136 declareOption(ol, "comparison_type", &SequentialModelSelector::comparison_type, 00137 OptionBase::buildoption, 00138 "From the common_costs list, the first cost is the one from which models will be compared\n" 00139 "to choose the best model. But should the best model be chosen according to the\n" 00140 "\n" 00141 "max/min\n" 00142 " +/- 1: Mean\n" 00143 " +/- 2: Mean / Variance\n" 00144 " +/- 3: more to come.\n" 00145 "\n" 00146 "of the cost realizations. \n" 00147 "Default: 1.\n"); 00148 00149 declareOption(ol, "comparison_window", &SequentialModelSelector::comparison_window, OptionBase::buildoption, 00150 "If positive, the comparison performed on the basis of common_cost[0] will be applyed only\n" 00151 "the comparison_window last elements of the cost sequence.\n" 00152 "Default: -1. (No window)"); 00153 00154 inherited::declareOptions(ol); 00155 } 00156 00157 real SequentialModelSelector::sequenceCost(const Vec& sequence_errors) 00158 { 00159 int seq_len = sequence_errors.length(); 00160 if(seq_len == 0) 00161 return MISSING_VALUE; 00162 00163 int window_start = seq_len - comparison_window; 00164 Vec windowed; 00165 if(comparison_window < 1 || window_start < 0) 00166 windowed = sequence_errors; // The option is inactive or the sequence is not yet comparison_window long 00167 else 00168 windowed = sequence_errors.subVec(window_start, comparison_window); 00169 00170 int type = abs(comparison_type); 00171 real mean_ = mean(windowed); 00172 if (type == 1) 00173 return mean_; 00174 00175 if (type == 2) 00176 return mean_ / variance(windowed, mean_); 00177 00178 PLERROR("Invalid comparison type %d!", comparison_type); 00179 return MISSING_VALUE; 00180 } 00181 00182 real SequentialModelSelector::paired_t_test(const int& m1, const int& m2, int cc /*0*/) const 00183 { 00184 Vec u = remove_missing(models[m1]->errors.column( common_cost_indices(m1, cc) ).toVecCopy()); 00185 Vec v = remove_missing(models[m2]->errors.column( common_cost_indices(m2, cc) ).toVecCopy()); 00186 00187 int n = u.length(); 00188 if( v.length() != n ) 00189 PLERROR("SequentialModelSelector::paired_t_test -- Can't make a paired t-test on to unequally lengthed vectors (%d != %d).", 00190 n, v.length()); 00191 00192 real ubar = mean(u); 00193 real vbar = mean(v); 00194 Vec u2 = u - ubar; 00195 Vec v2 = v - vbar; 00196 00197 return (ubar - vbar) * sqrt( n*(n-1) / sumsquare(u2-v2)); 00198 } 00199 00200 void SequentialModelSelector::forget() 00201 { 00202 best_model.resize(max_seq_len); 00203 best_model.fill(-1); // by default 00204 for (int i=0; i<models.size(); i++) 00205 models[i]->forget(); 00206 00207 inherited::forget(); 00208 } 00209 00210 void SequentialModelSelector::train() 00211 { 00221 if(last_call_train_t == -1) 00222 for (int i=0; i<models.size(); i++) 00223 { 00224 models[i]->horizon = horizon; 00225 models[i]->init_train_size = init_train_size-horizon; 00226 } 00227 00228 last_call_train_t = train_set.length()-1; 00229 00230 // The init_train_size option is the train_set size at which we should start training. 00231 // The corresponding time index is (init_train_size-1). See also the build_ precondition 00232 // check on init_train_size. 00233 int init_train_t = init_train_size-1; 00234 int start_t = MAX(init_train_t, last_train_t+train_step); 00235 if( start_t < last_test_t ) 00236 PLERROR("SequentialModelSelector::train -- start_t = %d < %d = last_test_t", start_t, last_test_t); 00237 00238 if( last_call_train_t < start_t ) 00239 return; 00240 00241 ProgressBar* pb = NULL; 00242 if (report_progress) 00243 pb = new ProgressBar("Training SequentialModelSelector learner",train_set.length()); 00244 00245 TVec< PP<VecStatsCollector> > dummy_stats(models.length()); 00246 for (int t=start_t; t < train_set.length(); t++) 00247 { 00248 // The time t index is equal to a set last row index. Its length will therefore be: 00249 int set_length = t+1; 00250 00251 #ifdef DEBUG 00252 cout << "SequentialModelSelector::train() -- sub_train.length = " << set_length-horizon << " et sub_test.length = " << set_length << endl; 00253 #endif 00254 00255 VMat sub_train = train_set.subMatRows(0,set_length-horizon); // last training pair is (t-2*horizon,t-horizon) 00256 sub_train->defineSizes(train_set->inputsize(), train_set->targetsize(), train_set->weightsize()); 00257 00258 VMat sub_test = train_set.subMatRows(0,set_length); // last test pair is (t-horizon,t) 00259 sub_test->defineSizes(train_set->inputsize(), train_set->targetsize(), train_set->weightsize()); 00260 00261 for (int i=0; i < models.size(); i++) 00262 { 00263 if(t == start_t) 00264 dummy_stats[i] = new VecStatsCollector(); 00265 00266 models[i]->setTrainingSet(sub_train, false); 00267 models[i]->train(); 00268 models[i]->test(sub_test, dummy_stats[i]); // last cost computed goes at t-1, last prediction at t-1-horizon 00269 00270 Vec sequence_errors = remove_missing(models[i]->errors.column( common_cost_indices(i, 0) ).toVecCopy()); 00271 00272 #ifdef DEBUG 00273 cout << "models["<<i<<"]->getTestCostNames()[common_cost_indices(i, 0)]: " 00274 << models[i]->getTestCostNames()[common_cost_indices(i, 0)] << endl; 00275 PLWARNING("models[%d]->errors.subMat(0,%d,%d,1)", i, common_cost_indices(i, 0), t); 00276 cout << remove_missing( models[i]->errors.subMat(0,common_cost_indices(i,0),set_length,1).toVecCopy() ) << endl; 00277 cout << "---\nOR\n---\n" << sequence_errors << endl; 00278 #endif 00279 00280 sequence_costs[i] = sequenceCost(sequence_errors); 00281 } 00282 00283 // we set the best model for this time step 00284 if(comparison_type > 0) 00285 best_model[t] = argmax(sequence_costs, true); 00286 else 00287 best_model[t] = argmin(sequence_costs, true); 00288 00289 if(best_model[t] == -1) 00290 best_model[t] = 0; // All models provided only MISSING_VALUE as common_costs[0] !!! 00291 00292 #ifdef DEBUG 00293 cout << "sequence_costs: " << sequence_costs << endl; 00294 cout << "SequentialModelSelector::train() -- t = " << t << " et best_model = " << best_model[t] << endl; 00295 #endif 00296 00297 if(predictions(t-horizon).hasMissing()) 00298 predictions(t-horizon) << models[best_model[t]]->predictions(t-horizon); 00299 00300 #ifdef DEBUG 00301 cout << "SequentialModelSelector::train() -- train_set.length = " << set_length << endl; 00302 #endif 00303 if (pb) 00304 pb->update(t); 00305 } 00306 00307 // Now train with everything that is available -- last training pair is (t-horizon,t) 00308 Vec best_model_costs; 00309 for (int i=0; i<models.size(); i++) 00310 { 00311 models[i]->setTrainingSet(train_set, false); 00312 models[i]->train(); 00313 00314 if(i == best_model[last_call_train_t]) 00315 best_model_costs = models[i]->errors(last_call_train_t); 00316 } 00317 00318 if(train_stats) 00319 { 00320 Vec update = best_model_costs( common_cost_indices(best_model[last_call_train_t]) ); 00321 train_stats->update( update ); 00322 } 00323 00324 predictions(last_call_train_t) << models[ best_model[last_call_train_t] ]->predictions(last_call_train_t); 00325 00326 if(pb) 00327 pb->close(); 00328 00329 00330 last_train_t = last_call_train_t; 00331 #ifdef DEBUG 00332 cout << "SequentialModelSelector.last_train_t = " << last_train_t << endl; 00333 #endif 00334 00335 00336 if(stepwise_save) 00337 { 00338 string s1 = append_slash(expdir) + "predictions_train_t=" + tostring(last_train_t); //"seq_model/predictions_train_t=" + tostring(last_train_t); 00339 saveAsciiWithoutSize(s1, predictions); 00340 00341 string s2 = append_slash(expdir) + "errors_train_t=" + tostring(last_train_t); 00342 saveAsciiWithoutSize(s2, errors); 00343 } 00344 } 00345 00346 void SequentialModelSelector::test(VMat test_set, PP<VecStatsCollector> test_stats, 00347 VMat testoutputs, VMat testcosts) const 00348 { 00349 // Since model selection train method may not train before (init_train_size+horizon) 00350 // we have to verify if a first train was actually done prior to entering the 00351 // test core 00352 if(last_train_t == -1) 00353 { 00354 PLWARNING("SequentialModelSelector::test -- Skipped because there were no prior train."); 00355 return; 00356 } 00357 00358 int start_t = MAX(last_train_t+1,last_test_t+1); 00359 if( test_set.length()-1 < start_t ) 00360 return; 00361 00362 ProgressBar* pb = NULL; 00363 if (report_progress) 00364 pb = new ProgressBar("Testing SequentialModelSelector learner",test_set.length()); 00365 00366 TVec< PP<VecStatsCollector> > dummy_stats(models.length()); 00367 for (int t=start_t; t < test_set.length(); t++) 00368 { 00369 Vec best_model_costs; 00370 Vec models_update; 00371 00372 for (int i=0; i<models.size(); i++) 00373 { 00374 if(t == start_t) 00375 dummy_stats[i] = new VecStatsCollector(); 00376 00377 models[i]->test(test_set, dummy_stats[i]); 00378 00379 if(i == best_model[last_train_t]) 00380 { 00381 best_model_costs = models[i]->errors(t); 00382 models_update.append( best_model_costs ); 00383 } 00384 else 00385 models_update.append( models[i]->errors(t) ); 00386 } 00387 00388 // The update is composed of the common_costs and the modelwise costs 00389 Vec update = best_model_costs( common_cost_indices(best_model[last_train_t]) ); 00390 update.append(models_update); 00391 00392 // Report T test processing: only reports T tests once, at t=(max_seq_len-1) 00393 if( report_paired_T_tests ) 00394 for(int m=1; m < models.length(); m++) 00395 { 00396 if( test_set.length() == max_seq_len ) 00397 update.append( paired_t_test(0, m) ); 00398 else 00399 update.append( MISSING_VALUE ); 00400 } 00401 00402 test_stats->update( update ); 00403 00404 predictions(t) << models[best_model[last_train_t]]->predictions(t); 00405 errors(t) << update; 00406 if (testoutputs) testoutputs->appendRow( predictions(t) ); 00407 if (testcosts) testcosts->appendRow(update); 00408 00409 #ifdef DEBUG 00410 cout << "predictions(" << t << "): " << predictions(t) << endl 00411 << "errors(" << t << "): " << errors(t) << endl; 00412 #endif 00413 00414 if(pb) 00415 pb->update(t); 00416 } 00417 if(pb) 00418 pb->close(); 00419 00420 00421 last_test_t = test_set.length()-1; 00422 #ifdef DEBUG 00423 cout << "SequentialModelSelector.last_test_t = " << last_test_t << endl; 00424 #endif 00425 00426 00427 if(stepwise_save) 00428 { 00429 string s1 = append_slash(expdir) + "predictions_test_t=" + tostring(last_test_t); 00430 saveAsciiWithoutSize(s1, predictions); 00431 00432 string s2 = append_slash(expdir) + "errors_test_t=" + tostring(last_test_t); 00433 saveAsciiWithoutSize(s2, errors); 00434 } 00435 } 00436 00437 void SequentialModelSelector::computeOutput(const Vec& input, Vec& output) const 00438 { 00439 models[best_model[last_train_t]]->computeOutput(input, output); 00440 } 00441 00442 void SequentialModelSelector::computeCostsFromOutputs(const Vec& input, 00443 const Vec& output, const Vec& target, Vec& costs) const 00444 { 00445 models[best_model[last_train_t]]->computeCostsFromOutputs(input, output, target, costs); 00446 } 00447 00448 void SequentialModelSelector::computeOutputAndCosts(const Vec& input, 00449 const Vec& target, Vec& output, Vec& costs) const 00450 { 00451 models[best_model[last_train_t]]->computeOutputAndCosts(input, target, output, costs); 00452 } 00453 00454 void SequentialModelSelector::computeCostsOnly(const Vec& input, 00455 const Vec& target, Vec& costs) const 00456 { 00457 models[best_model[last_train_t]]->computeCostsOnly(input, target, costs); 00458 } 00459 00460 void SequentialModelSelector::checkModelNames() const 00461 { 00462 int nb_models = models.length(); 00463 int nb_names = model_names.length(); 00464 if(nb_names == 0) 00465 { 00466 model_names.resize(nb_models); 00467 for(int m=0; m < nb_models; m++) 00468 model_names[m] = "Model_"+tostring(m); 00469 } 00470 else if(nb_names != nb_models) 00471 PLERROR("Names must be provided for all models (%d = nb_names != nb_models = %d).", nb_names, nb_models); 00472 } 00473 00474 TVec<string> SequentialModelSelector::getTestCostNames() const 00475 { 00476 TVec<string> tcnames = common_costs; 00477 00478 int nb_models = models.length(); 00479 checkModelNames(); 00480 00481 for(int m=0; m < nb_models; m++) 00482 { 00483 TVec<string> tcm = models[m]->getTestCostNames(); 00484 for(int c=0; c < tcm.length(); c++) 00485 tcnames.append(model_names[m] + "::" + tcm[c]); 00486 } 00487 00488 if( report_paired_T_tests ) 00489 for(int m=1; m < nb_models; m++) 00490 tcnames.append("T-test_" + model_names[0] + "_vs_" + model_names[m]); 00491 00492 return tcnames; 00493 } 00494 00495 TVec<string> SequentialModelSelector::getTrainCostNames() const 00496 { return common_costs; } 00497 00498 void SequentialModelSelector::makeDeepCopyFromShallowCopy(CopiesMap& copies) 00499 { 00500 inherited::makeDeepCopyFromShallowCopy(copies); 00501 deepCopyField(models, copies); 00502 //deepCopyField(mean_costs, copies); 00503 } 00504 00505 void SequentialModelSelector::matlabSave(const string& matlab_subdir) 00506 { 00507 string save_dir = append_slash(getExperimentDirectory()) + matlab_subdir; 00508 Vec dummy, add(1); add[0] = 0; 00509 00510 TVec<string> legend(1, "ModelSelector"); 00511 legend.append(model_names); 00512 00513 int nb_models = models.length(); 00514 int nb_common_costs = common_costs.length(); 00515 for(int g=0; g < nb_common_costs; g++) 00516 { 00517 Vec cs; 00518 00519 Array<Mat> columns(nb_models+1); 00520 cs = getCostSequence(g); 00521 00522 // Since the model selector first runs tests in its train method, the models' sequence 00523 // length will probably be to long... 00524 int msel_cs_len = cs.length(); 00525 00526 columns[0] = Mat(cs.length(), 1, cs); 00527 for(int m=0; m < nb_models; m++) 00528 { 00529 cs = models[m]->getCostSequence( common_cost_indices(m, g) ); 00530 cs = cs.subVec(cs.length() - msel_cs_len, msel_cs_len); //... length correction (see the comment above) 00531 columns[m+1] = Mat(cs.length(), 1, cs); 00532 } 00533 00534 Mat concat = hconcat(columns); 00535 PLearn::matlabSave( save_dir, common_costs[g], 00536 concat, 00537 add, dummy, legend); 00538 } 00539 00540 for(int m=0; m < nb_models; m++) 00541 models[m]->matlabSave(matlab_subdir); 00542 } 00543 00544 } // end of namespace PLearn 00545