PLearn: AdaBoost.cc Source File

00001 // -*- C++ -*- 00002 00003 // AdaBoost.cc 00004 // 00005 // Copyright (C) 2003 Pascal Vincent 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: AdaBoost.cc,v 1.4 2004/07/21 16:30:55 chrish42 Exp $ 00037 ******************************************************* */ 00038 00039 // Authors: Yoshua Bengio 00040 00043 #include "AdaBoost.h" 00044 #include <plearn/math/pl_math.h> 00045 #include <plearn/vmat/ConcatColumnsVMatrix.h> 00046 #include <plearn/vmat/SelectRowsVMatrix.h> 00047 #include <plearn/math/random.h> 00048 00049 namespace PLearn { 00050 using namespace std; 00051 00052 AdaBoost::AdaBoost() 00053 : target_error(0.5), output_threshold(0.5), compute_training_error(1), 00054 pseudo_loss_adaboost(1), weight_by_resampling(1), early_stopping(1), 00055 save_often(0) 00056 { } 00057 00058 PLEARN_IMPLEMENT_OBJECT( 00059 AdaBoost, 00060 "AdaBoost boosting algorithm for TWO-CLASS classification", 00061 "Given a classification weak-learner, this algorithm \"boosts\" it in\n" 00062 "order to obtain a much more powerful classification algorithm.\n" 00063 "The classifier is two-class, returning 0 or 1, or a number between 0 and 1\n" 00064 "(in that case the user can set the 'pseudo_loss_adaboost' option, which\n" 00065 "computes a more precise notion of error taking into account the precise\n" 00066 "value outputted by the soft classifier).\n" 00067 "The nstages option from PLearner is used to specify the desired\n" 00068 "number of boosting rounds (but the algorithm can stop earlier if\n" 00069 "the next weak learner is unable to unable to make significant progress.\n"); 00070 00071 void AdaBoost::declareOptions(OptionList& ol) 00072 { 00073 declareOption(ol, "weak_learners", &AdaBoost::weak_learners, 00074 OptionBase::learntoption, 00075 "The vector of learned weak learners"); 00076 00077 declareOption(ol, "voting_weights", &AdaBoost::voting_weights, 00078 OptionBase::learntoption, 00079 "Weights given to the weak learners (their output is linearly combined with these weights\n" 00080 "to form the output of the AdaBoost learner).\n"); 00081 00082 declareOption(ol, "initial_sum_weights", &AdaBoost::initial_sum_weights, 00083 OptionBase::learntoption, 00084 "Initial sum of weights on the examples. Do not temper with.\n"); 00085 00086 declareOption(ol, "weak_learner_template", &AdaBoost::weak_learner_template, 00087 OptionBase::buildoption, 00088 "Template for the regression weak learner to be boosted into a classifier"); 00089 00090 declareOption(ol, "target_error", &AdaBoost::target_error, 00091 OptionBase::buildoption, 00092 "This is the target average weighted error below which each weak learner" 00093 "must reach after its training (ordinary adaboost: target_error=0.5)."); 00094 00095 declareOption(ol, "pseudo_loss_adaboost", &AdaBoost::pseudo_loss_adaboost, 00096 OptionBase::buildoption, 00097 "Whether to use a variant of AdaBoost which is appropriate for soft classifiers\n" 00098 "whose output is between 0 and 1 rather than being either 0 or 1.\n"); 00099 00100 declareOption(ol, "weight_by_resampling", &AdaBoost::weight_by_resampling, 00101 OptionBase::buildoption, 00102 "Whether to train the weak learner using resampling to represent the weighting\n" 00103 "given to examples. If false then give these weights explicitly in the training set\n" 00104 "of the weak learner (note that some learners can accomodate weights well, others not).\n"); 00105 00106 declareOption(ol, "output_threshold", &AdaBoost::output_threshold, 00107 OptionBase::buildoption, 00108 "To interpret the output of the learner as a class, it is compared to this\n" 00109 "threshold: class 1 if greather than output_threshold, class 0 otherwise.\n"); 00110 00111 declareOption(ol, "provide_learner_expdir", &AdaBoost::provide_learner_expdir, OptionBase::buildoption, 00112 "If true, each weak learner to be trained will have its experiment directory set to WeakLearner#kExpdir/"); 00113 00114 declareOption(ol, "early_stopping", &AdaBoost::early_stopping, OptionBase::buildoption, 00115 "If true, then boosting stops when the next weak learner is too weak (avg error > target_error - .01)\n"); 00116 00117 declareOption(ol, "save_often", &AdaBoost::save_often, OptionBase::buildoption, 00118 "If true, then save the model after training each weak learner, under <expdir>/model.psave\n"); 00119 00120 declareOption(ol, "compute_training_error", &AdaBoost::compute_training_error, OptionBase::buildoption, 00121 "Whether to compute training error at each stage.\n"); 00122 00123 // Now call the parent class' declareOptions 00124 inherited::declareOptions(ol); 00125 } 00126 00127 void AdaBoost::build_() 00128 { 00129 } 00130 00131 // ### Nothing to add here, simply calls build_ 00132 void AdaBoost::build() 00133 { 00134 inherited::build(); 00135 build_(); 00136 } 00137 00138 00139 void AdaBoost::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00140 { 00141 inherited::makeDeepCopyFromShallowCopy(copies); 00142 00143 deepCopyField(learners_error, copies); 00144 deepCopyField(example_weights, copies); 00145 deepCopyField(voting_weights, copies); 00146 deepCopyField(weak_learners, copies); 00147 deepCopyField(weak_learner_template, copies); 00148 } 00149 00150 00151 int AdaBoost::outputsize() const 00152 { 00153 // Outputsize is always 1, since this is a 0-1 classifier 00154 return 1; 00155 } 00156 00157 void AdaBoost::forget() 00158 { 00159 stage = 0; 00160 learners_error.resize(0, nstages); 00161 weak_learners.resize(0, nstages); 00162 voting_weights.resize(0, nstages); 00163 sum_voting_weights = 0; 00164 if (seed_ >= 0) 00165 manual_seed(seed_); 00166 else 00167 PLearn::seed(); 00168 } 00169 00170 void AdaBoost::train() 00171 { 00172 if(!train_set) 00173 PLERROR("In AdaBoost::train, you did not setTrainingSet"); 00174 00175 if(!train_stats) 00176 PLERROR("In AdaBoost::train, you did not setTrainStatsCollector"); 00177 00178 if (train_set->targetsize()!=1) 00179 PLERROR("In AdaBoost::train, targetsize should be 1, found %d", train_set->targetsize()); 00180 00181 if (nstages < stage) 00182 forget(); 00183 00184 static Vec input; 00185 static Vec output; 00186 static Vec target; 00187 real weight; 00188 00189 static Vec examples_error; 00190 00191 const int n = train_set.length(); 00192 static TVec<int> train_indices; 00193 static Vec pseudo_loss; 00194 00195 input.resize(inputsize()); 00196 output.resize(1); 00197 target.resize(targetsize()); 00198 examples_error.resize(n); 00199 00200 if (stage==0) 00201 { 00202 example_weights.resize(n); 00203 if (train_set->weightsize()>0) 00204 { 00205 ProgressBar pb("AdaBoost round " + tostring(stage) + 00206 ": extracting initial weights", n); 00207 initial_sum_weights=0; 00208 for (int i=0; i<n; ++i) { 00209 pb.update(i); 00210 train_set->getExample(i, input, target, weight); 00211 example_weights[i]=weight; 00212 initial_sum_weights += weight; 00213 } 00214 example_weights *= 1.0/initial_sum_weights; 00215 } 00216 else example_weights.fill(1.0/n); 00217 sum_voting_weights = 0; 00218 voting_weights.resize(0,nstages); 00219 } 00220 00221 VMat unweighted_data = train_set.subMatColumns(0, inputsize()+1); 00222 learners_error.resize(nstages); 00223 00224 for ( ; stage < nstages ; ++stage) 00225 { 00226 VMat weak_learner_training_set; 00227 { 00228 ProgressBar pb("AdaBoost round " + tostring(stage) + 00229 ": making training set for weak learner", n); 00230 // We shall now construct a training set for the new weak learner: 00231 if (weight_by_resampling) 00232 { 00233 // use a "smart" resampling that approximated sampling with replacement 00234 // with the probabilities given by example_weights. 00235 map<real,int> indices; 00236 for (int i=0; i<n; ++i) { 00237 pb.update(i); 00238 real p_i = example_weights[i]; 00239 int n_samples_of_row_i = int(rint(gaussian_mu_sigma(n*p_i,sqrt(n*p_i*(1-p_i))))); // randomly choose how many repeats of example i 00240 for (int j=0;j<n_samples_of_row_i;j++) 00241 { 00242 if (j==0) 00243 indices[i]=i; 00244 else 00245 { 00246 real k=n*uniform_sample(); // put the others in random places 00247 indices[k]=i; // while avoiding collisions 00248 } 00249 } 00250 } 00251 train_indices.resize(0,n); 00252 map<real,int>::iterator it = indices.begin(); 00253 map<real,int>::iterator last = indices.end(); 00254 for (;it!=last;++it) 00255 train_indices.push_back(it->second); 00256 weak_learner_training_set = new SelectRowsVMatrix(unweighted_data, train_indices); 00257 weak_learner_training_set->defineSizes(inputsize(), 1, 0); 00258 } 00259 else 00260 { 00261 Mat data_weights_column = example_weights.toMat(n,1).copy(); 00262 data_weights_column *= initial_sum_weights; // to bring the weights to the same average level as the original ones 00263 VMat data_weights = VMat(data_weights_column); 00264 weak_learner_training_set = new ConcatColumnsVMatrix(unweighted_data,data_weights); 00265 weak_learner_training_set->defineSizes(inputsize(), 1, 1); 00266 } 00267 } 00268 00269 // Create new weak-learner and train it 00270 PP<PLearner> new_weak_learner = ::PLearn::deepCopy(weak_learner_template); 00271 new_weak_learner->setTrainingSet(weak_learner_training_set); 00272 new_weak_learner->setTrainStatsCollector(new VecStatsCollector); 00273 00274 if(expdir!="" && provide_learner_expdir) 00275 new_weak_learner->setExperimentDirectory(append_slash(expdir+"WeakLearner" + tostring(stage) + "Expdir")); 00276 00277 new_weak_learner->train(); 00278 00279 // calculate its weighted training error 00280 { 00281 ProgressBar pb("computing weighted training error of weak learner",n); 00282 learners_error[stage] = 0; 00283 for (int i=0; i<n; ++i) { 00284 pb.update(i); 00285 train_set->getExample(i, input, target, weight); 00286 new_weak_learner->computeOutput(input,output); 00287 real y_i=target[0]; 00288 real f_i=output[0]; 00289 if (pseudo_loss_adaboost) // an error between 0 and 1 (before weighting) 00290 { 00291 examples_error[i] = 0.5*(f_i+y_i-2*f_i*y_i); 00292 learners_error[stage] += example_weights[i]*examples_error[i]; 00293 } 00294 else 00295 { 00296 if (y_i==1) 00297 { 00298 if (f_i<output_threshold) 00299 { 00300 learners_error[stage] += example_weights[i]; 00301 examples_error[i]=1; 00302 } 00303 else examples_error[i] = 0; 00304 } 00305 else 00306 { 00307 if (f_i>=output_threshold) { 00308 learners_error[stage] += example_weights[i]; 00309 examples_error[i]=1; 00310 } 00311 else examples_error[i]=0; 00312 } 00313 } 00314 } 00315 } 00316 00317 if (verbosity>1) 00318 cout << "weak learner at stage " << stage << " has average loss = " << learners_error[stage] << endl; 00319 00320 // stopping criterion (in addition to n_stages) 00321 if (early_stopping && (learners_error[stage] == 0 || learners_error[stage] > target_error - 0.01)) 00322 { 00323 nstages = stage; 00324 cout << "AdaBoost::train early stopping because learner's loss at stage " << stage << " is " << learners_error[stage] << endl; 00325 break; 00326 } 00327 00328 weak_learners.push_back(new_weak_learner); 00329 00330 if (save_often && expdir!="") 00331 PLearn::save(append_slash(expdir)+"model.psave", *this); 00332 00333 // compute the new learner's weight 00334 00335 voting_weights.push_back(safeflog(((1-learners_error[stage])*target_error)/(learners_error[stage]*(1-target_error)))); 00336 sum_voting_weights += fabs(voting_weights[stage]); 00337 00338 // update the example weights 00339 00340 real sum_w=0; 00341 for (int i=0;i<n;i++) 00342 { 00343 example_weights[i] *= exp(-voting_weights[stage]*(1-examples_error[i])); 00344 sum_w += example_weights[i]; 00345 } 00346 example_weights *= 1.0/sum_w; 00347 00348 if (compute_training_error) 00349 { 00350 { 00351 ProgressBar pb("computing weighted training error of whole model",n); 00352 train_stats->forget(); 00353 static Vec err(1); 00354 for (int i=0;i<n;i++) 00355 { 00356 pb.update(i); 00357 train_set->getExample(i, input, target, weight); 00358 computeCostsOnly(input,target,err); 00359 train_stats->update(err); 00360 } 00361 train_stats->finalize(); 00362 } 00363 if (verbosity>2) 00364 cout << "At stage " << stage << " boosted (weighted) classification error on training set = " << train_stats->getMean() << endl; 00365 } 00366 } 00367 } 00368 00369 00370 void AdaBoost::computeOutput(const Vec& input, Vec& output) const 00371 { 00372 output.resize(1); 00373 real sum_out=0; 00374 static Vec weak_learner_output(1); 00375 for (int i=0;i<voting_weights.length();i++) 00376 { 00377 weak_learners[i]->computeOutput(input,weak_learner_output); 00378 sum_out += weak_learner_output[0]*voting_weights[i]; 00379 } 00380 output[0] = sum_out/sum_voting_weights; 00381 } 00382 00383 void AdaBoost::computeCostsFromOutputs(const Vec& input, const Vec& output, 00384 const Vec& target, Vec& costs) const 00385 { 00386 costs.resize(1); 00387 00388 // First cost is negative log-likelihood... output[0] is the likelihood 00389 // of the first class 00390 if (target.size() > 1) 00391 PLERROR("AdaBoost::computeCostsFromOutputs: target must contain " 00392 "one element only: the 0/1 class"); 00393 if (target[0] == 0) { 00394 costs[0] = output[0] >= output_threshold; 00395 } 00396 else if (target[0] == 1) { 00397 costs[0] = output[0] < output_threshold; 00398 } 00399 else PLERROR("AdaBoost::computeCostsFromOutputs: target must be " 00400 "either 0 or 1; current target=%f", target[0]); 00401 } 00402 00403 TVec<string> AdaBoost::getTestCostNames() const 00404 { 00405 return getTrainCostNames(); 00406 } 00407 00408 TVec<string> AdaBoost::getTrainCostNames() const 00409 { 00410 TVec<string> costs(1); 00411 costs[0] = "class_error"; 00412 return costs; 00413 } 00414 00415 } // end of namespace PLearn