PLearn: AddCostToLearner.cc Source File

00001 // -*- C++ -*- 00002 00003 // AddCostToLearner.cc 00004 // 00005 // Copyright (C) 2004 Olivier Delalleau 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: AddCostToLearner.cc,v 1.12 2004/08/13 13:54:57 tihocan Exp $ 00037 ******************************************************* */ 00038 00039 // Authors: Olivier Delalleau 00040 00044 #include "AddCostToLearner.h" 00045 #include <plearn/vmat/ConcatColumnsVMatrix.h> 00046 #include <plearn/var/CrossEntropyVariable.h> 00047 #include <plearn/vmat/SubVMatrix.h> 00048 #include <plearn/var/SumOverBagsVariable.h> 00049 #include <plearn/var/VarArray.h> 00050 #include <plearn/var/VecElementVariable.h> 00051 00052 namespace PLearn { 00053 using namespace std; 00054 00055 PLEARN_IMPLEMENT_OBJECT(AddCostToLearner, 00056 "A PLearner that just adds additional costs to another PLearner.", 00057 "In addition, this learner can be used to compute costs on bags instead of\n" 00058 "individual samples, using the option 'compute_costs_on_bags'.\n" 00059 "\n" 00060 "Feel free to make this class evolve by adding new costs, or rewriting it\n" 00061 "in a better fashion, because this one is certainly not perfect.\n" 00062 "To use the lift cost, do the following:\n" 00063 " (1) add a cost of type 1 to this object's option 'costs'\n" 00064 " (2) replace the template_stats_collector of your PTester with one like this:\n" 00065 " template_stats_collector =\n" 00066 " LiftStatsCollector (\n" 00067 " lift_fraction = 0.1 ;\n" 00068 " output_column = \"lift_output\" ;\n" 00069 " opposite_lift = 0 ; # to set to 1 if we want to optimize it\n" 00070 " sign_trick = 1 ;\n" 00071 " (3) ask for the lift in the stats:\n" 00072 " statnames = [\n" 00073 " \"E[test1.LIFT]\"\n" 00074 " \"E[test1.LIFT_MAX]\"\n" 00075 " ];" ); 00076 00078 // AddCostToLearner // 00080 AddCostToLearner::AddCostToLearner() 00081 : bag_size(0), 00082 check_output_consistency(1), 00083 combine_bag_outputs_method(1), 00084 compute_costs_on_bags(0), 00085 from_max(1), 00086 from_min(-1), 00087 rescale_output(0), 00088 rescale_target(0), 00089 to_max(1), 00090 to_min(0) 00091 {} 00092 00094 // declareOptions // 00096 void AddCostToLearner::declareOptions(OptionList& ol) 00097 { 00098 declareOption(ol, "check_output_consistency", &AddCostToLearner::check_output_consistency, OptionBase::buildoption, 00099 "If set to 1, additional checks will be performed to make sure the output\n" 00100 "is compatible with the costs to be computed. This may slow down the costs\n" 00101 "computation, but is also safer."); 00102 00103 declareOption(ol, "combine_bag_outputs_method", &AddCostToLearner::combine_bag_outputs_method, OptionBase::buildoption, 00104 "The method used to combine the individual outputs of the sub_learner to\n" 00105 "obtain a global output on the bag (irrelevant if 'compute_costs_on_bags' == 0):\n" 00106 " - 1 : o = 1 - (1 - o_1) * (1 - o_2) * .... * (1 - o_n)\n" 00107 " - 2 : o = max(o_1, o_2, ..., o_n)"); 00108 00109 declareOption(ol, "compute_costs_on_bags", &AddCostToLearner::compute_costs_on_bags, OptionBase::buildoption, 00110 "If set to 1, then the costs will be computed on bags, but the sub_learner will\n" 00111 "be trained without the bag information (see SumOverBagsVariable for info on bags)."); 00112 00113 declareOption(ol, "costs", &AddCostToLearner::costs, OptionBase::buildoption, 00114 "The costs to be added:\n" 00115 " - 'lift_output': used to compute the lift cost\n" 00116 " - 'cross_entropy': t*log(o) + (1-t)*log(1-o)\n" 00117 " - 'mse': the mean squared error (o - t)^2\n" 00118 " - 'squared_norm_reconstruction_error': | ||i||^2 - ||o||^2 |"); 00119 00120 declareOption(ol, "force_output_to_target_interval", &AddCostToLearner::force_output_to_target_interval, OptionBase::buildoption, 00121 "If set to 1 and 'rescale_output' is also set to 1, then the scaled output\n" 00122 "will be forced to belong to [to_min, to_max], which may not be the case otherwise\n" 00123 "if the output doesn't originate from [from_min, from_max]."); 00124 00125 declareOption(ol, "rescale_output", &AddCostToLearner::rescale_output, OptionBase::buildoption, 00126 "If set to 1, then the output will be rescaled before computing the costs, according\n" 00127 "to the values of from_min, from_max, to_min, to_max. This means it will map\n" 00128 "[from_min, from_max] to [to_min, to_max]."); 00129 00130 declareOption(ol, "rescale_target", &AddCostToLearner::rescale_target, OptionBase::buildoption, 00131 "Same as 'rescale_output', but for the target."); 00132 00133 declareOption(ol, "from_max", &AddCostToLearner::from_max, OptionBase::buildoption, 00134 "Upper bound of the source interval [from_min, from_max] (used in rescaling)."); 00135 00136 declareOption(ol, "from_min", &AddCostToLearner::from_min, OptionBase::buildoption, 00137 "Lower bound of the source interval [from_min, from_max] (used in rescaling)."); 00138 00139 declareOption(ol, "to_max", &AddCostToLearner::to_max, OptionBase::buildoption, 00140 "Upper bound of the destination interval [to_min, to_max] (used in rescaling)."); 00141 00142 declareOption(ol, "to_min", &AddCostToLearner::to_min, OptionBase::buildoption, 00143 "Lower bound of the destination interval [to_min, to_max] (used in rescaling)."); 00144 00145 declareOption(ol, "sub_learner", &AddCostToLearner::sub_learner, OptionBase::buildoption, 00146 "The learner to which we add the costs."); 00147 00148 // Now call the parent class' declareOptions 00149 inherited::declareOptions(ol); 00150 } 00151 00153 // build // 00155 void AddCostToLearner::build() 00156 { 00157 inherited::build(); 00158 build_(); 00159 } 00160 00162 // build_ // 00164 void AddCostToLearner::build_() 00165 { 00166 // Give a default size to bag_outputs. 00167 bag_outputs.resize(10, 1); 00168 // Make sure all costs are valid. 00169 int n = costs.length(); 00170 int min_verb = 2; 00171 bool display = (verbosity >= min_verb); 00172 int os = sub_learner->outputsize(); 00173 sub_learner_output.resize(os); 00174 desired_target.resize(os); 00175 if (rescale_output || rescale_target) { 00176 real from_fac = from_max - from_min; 00177 real to_fac = to_max - to_min; 00178 fac = to_fac / from_fac; 00179 } 00180 output_min = -REAL_MAX; 00181 output_max = REAL_MAX; 00182 if (n > 0 && display) { 00183 cout << "Additional costs computed: "; 00184 } 00185 for (int i = 0; i < n; i++) { 00186 string c = costs[i]; 00187 if (display) cout << c << " "; 00188 if (c == "lift_output") { 00189 // Output should be positive. 00190 output_min = max(output_min, real(0)); 00191 } else if (c == "cross_entropy") { 00192 // Output should be in [0,1]. 00193 output_min = max(output_min, real(0)); 00194 output_max = min(output_max, real(1)); 00195 { 00196 Var zero = var(0); 00197 output_var = accessElement(sub_learner_output, zero); 00198 target_var = accessElement(desired_target, zero); 00199 cross_entropy_var = cross_entropy(output_var, target_var); 00200 cross_entropy_prop = propagationPath(cross_entropy_var); 00201 } 00202 } else if (c == "mse") { 00203 } else if (c == "squared_norm_reconstruction_error") { 00204 } else { 00205 PLERROR("In AddCostToLearner::build_ - Invalid cost requested (make sure you are using the new costs syntax)"); 00206 } 00207 } 00208 if (n > 0 && display) { 00209 cout << endl; 00210 } 00211 } 00212 00214 // computeCostsFromOutputs // 00216 void AddCostToLearner::computeCostsFromOutputs(const Vec& input, const Vec& output, 00217 const Vec& target, Vec& costs) const 00218 { 00219 int n_original_costs = sub_learner->nTestCosts(); 00220 // We give only costs.subVec to the sub_learner because it may want to resize it. 00221 Vec sub_costs = costs.subVec(0, n_original_costs); 00222 if (compute_costs_on_bags) { 00223 sub_learner->computeCostsFromOutputs(input, output, target.subVec(0, target.length() - 1), sub_costs); 00224 } else { 00225 sub_learner->computeCostsFromOutputs(input, output, target, sub_costs); 00226 } 00227 00228 if (compute_costs_on_bags) { 00229 // We only need to compute the costs when the whole bag has been seen, 00230 // otherwise we just store the outputs of each sample in the bag and fill 00231 // the cost with MISSING_VALUE. 00232 int bag_signal = int(target[target.length() - 1]); 00233 if (bag_signal & SumOverBagsVariable::TARGET_COLUMN_FIRST) { 00234 // Beginning of the bag. 00235 bag_size = 0; 00236 } 00237 if (bag_outputs.width() != output.length()) { 00238 // Need to resize bag_outputs. 00239 bag_outputs.resize(bag_outputs.length(), output.length()); 00240 } 00241 if (bag_outputs.length() <= bag_size) { 00242 // Need to resize bag_outputs. 00243 bag_outputs.resize(bag_outputs.length() * 2, bag_outputs.width()); 00244 } 00245 bag_outputs(bag_size) << output; 00246 bag_size++; 00247 if (bag_signal & SumOverBagsVariable::TARGET_COLUMN_LAST) { 00248 // Reached the end of the bag: we can compute the output for the bag. 00249 bag_outputs.resize(bag_size, bag_outputs.width()); 00250 combined_output.resize(output.length()); 00251 switch (combine_bag_outputs_method) { 00252 case 1: // o = 1 - (1 - o_1) * (1 - o_2) * .... * (1 - o_n) 00253 { 00254 real prod; 00255 for (int j = 0; j < bag_outputs.width(); j++) { 00256 prod = 1; 00257 for (int i = 0; i < bag_outputs.length(); i++) { 00258 prod = prod * (1 - bag_outputs(i, j)); 00259 } 00260 combined_output[j] = 1 - prod; 00261 } 00262 } 00263 break; 00264 case 2: // o = max(o_1, o_2, ..., o_n) 00265 { 00266 for (int j = 0; j < bag_outputs.width(); j++) { 00267 combined_output[j] = max(bag_outputs.column(j)); 00268 } 00269 } 00270 break; 00271 default: 00272 PLERROR("In AddCostToLearner::computeCostsFromOutputs - Unknown value for 'combine_bag_outputs_method'"); 00273 } 00274 // We re-compute the sub_learner's costs with the brand new combined bag output. 00275 sub_learner->computeCostsFromOutputs(input, combined_output, target.subVec(0, target.length() - 1), sub_costs); 00276 } else { 00277 costs.fill(MISSING_VALUE); 00278 return; 00279 } 00280 } else { 00281 combined_output = output; 00282 } 00283 00284 Vec the_target; 00285 if (compute_costs_on_bags) { 00286 the_target = target.subVec(0, target.length() - 1); 00287 } else { 00288 the_target = target; 00289 } 00290 00291 // Optional rescaling. 00292 if (!rescale_output) { 00293 sub_learner_output << combined_output; 00294 } else { 00295 int n = output.length(); 00296 real scaled_output; 00297 for (int i = 0; i < n; i++) { 00298 scaled_output = (combined_output[i] - from_min) * fac + to_min; 00299 if (force_output_to_target_interval) { 00300 if (scaled_output > to_max) { 00301 scaled_output = to_max; 00302 } else if (scaled_output < to_min) { 00303 scaled_output = to_min; 00304 } 00305 } 00306 sub_learner_output[i] = scaled_output; 00307 } 00308 } 00309 if (!rescale_target) { 00310 desired_target << the_target; 00311 } else { 00312 int n = output.length(); 00313 for (int i = 0; i < n; i++) { 00314 desired_target[i] = (the_target[i] - from_min) * fac + to_min; 00315 } 00316 } 00317 00318 if (check_output_consistency) { 00319 real out; 00320 for (int i = 0; i < sub_learner_output.length(); i++) { 00321 out = sub_learner_output[i]; 00322 if (out < output_min) { 00323 PLERROR("In AddCostToLearner::computeCostsFromOutputs - Sub-learner output (%f) is lower than %f", out, output_min); 00324 } 00325 if (out > output_max) { 00326 PLERROR("In AddCostToLearner::computeCostsFromOutputs - Sub-learner output (%f) is higher than %f", out, output_max); 00327 } 00328 } 00329 } 00330 00331 for (int i = 0; i < this->costs.length(); i++) { 00332 string c = this->costs[i]; 00333 int ind_cost = i + n_original_costs; 00334 if (c == "lift_output") { 00335 // TODO Using a LiftOutputVariable would be better. 00336 #ifdef BOUNDCHECK 00337 if (sub_learner_output.length() != 1 || desired_target.length() != 1) { 00338 PLERROR("In AddCostToLearner::computeCostsFromOutputs - Lift cost is only meant to be used with one-dimensional output and target"); 00339 } 00340 #endif 00341 { 00342 // The 'lift cost', which actually isn't a cost, is the output when 00343 // the target is 1, and -output when the target is 0 00344 #ifdef BOUNDCHECK 00345 if (desired_target[0] != 0 && desired_target[0] != 1) { 00346 // Invalid target. 00347 PLERROR("In AddCostToLearner::computeCostsFromOutputs - Target isn't compatible with lift"); 00348 } 00349 #endif 00350 if (desired_target[0] == 1) { 00351 costs[ind_cost] = sub_learner_output[0]; 00352 } else { 00353 costs[ind_cost] = - sub_learner_output[0]; 00354 } 00355 } 00356 } else if (c == "cross_entropy") { 00357 #ifdef BOUNDCHECK 00358 if (desired_target[0] != 0 && desired_target[0] != 1) { 00359 // Invalid target. 00360 PLERROR("In AddCostToLearner::computeCostsFromOutputs - Target isn't compatible with cross_entropy"); 00361 } 00362 #endif 00363 cross_entropy_prop.fprop(); 00364 costs[ind_cost] = cross_entropy_var->valuedata[0]; 00365 } else if (c == "mse") { 00366 costs[ind_cost] = powdistance(desired_target, sub_learner_output); 00367 } else if (c == "squared_norm_reconstruction_error") { 00368 PLWARNING("In AddCostToLearner::computeCostsFromOutputs - 'squared_norm_reconstruction_error'" 00369 " has not been tested yet, please remove this warning if it works correctly"); 00370 costs[ind_cost] = abs(pownorm(input, 2) - pownorm(sub_learner_output, 2)); 00371 } else { 00372 PLERROR("In AddCostToLearner::computeCostsFromOutputs - Unknown cost"); 00373 } 00374 } 00375 } 00376 00378 // computeOutput // 00380 void AddCostToLearner::computeOutput(const Vec& input, Vec& output) const 00381 { 00382 sub_learner->computeOutput(input, output); 00383 } 00384 00386 // forget // 00388 void AddCostToLearner::forget() 00389 { 00390 sub_learner->forget(); 00391 bag_size = 0; 00392 } 00393 00395 // getTestCostNames // 00397 TVec<string> AddCostToLearner::getTestCostNames() const 00398 { 00399 // Return the names of the costs computed by computeCostsFromOutpus 00400 // (these may or may not be exactly the same as what's returned by getTrainCostNames) 00401 // ... 00402 TVec<string> sub_costs = sub_learner->getTestCostNames(); 00403 for (int i = 0; i < this->costs.length(); i++) { 00404 sub_costs.append(costs[i]); 00405 } 00406 return sub_costs; 00407 } 00408 00410 // getTrainCostNames // 00412 TVec<string> AddCostToLearner::getTrainCostNames() const 00413 { 00414 // The lift is only a test cost. 00415 return sub_learner->getTrainCostNames(); 00416 } 00417 00419 extern void varDeepCopyField(Var& field, CopiesMap& copies); 00420 00422 // makeDeepCopyFromShallowCopy // 00424 void AddCostToLearner::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00425 { 00426 inherited::makeDeepCopyFromShallowCopy(copies); 00427 deepCopyField(combined_output, copies); 00428 deepCopyField(bag_outputs, copies); 00429 deepCopyField(cross_entropy_prop, copies); 00430 varDeepCopyField(cross_entropy_var, copies); 00431 deepCopyField(desired_target, copies); 00432 varDeepCopyField(output_var, copies); 00433 deepCopyField(sub_learner_output, copies); 00434 deepCopyField(sub_input, copies); 00435 varDeepCopyField(target_var, copies); 00436 deepCopyField(costs, copies); 00437 deepCopyField(sub_learner, copies); 00438 } 00439 00441 // outputsize // 00443 int AddCostToLearner::outputsize() const 00444 { 00445 return sub_learner->outputsize(); 00446 } 00447 00449 // setTrainingSet // 00451 void AddCostToLearner::setTrainingSet(VMat training_set, bool call_forget) { 00452 if (compute_costs_on_bags) { 00453 // We need to remove the bag information (assumed to be in the last column 00454 // of the target) when giving the training set to the sub learner. 00455 // TODO Write a SubTargetVMatrix to make it easier. 00456 if (training_set->inputsize() < 0 || training_set->targetsize() < 0) { 00457 PLERROR("In AddCostToLearner::setTrainingSet - The inputsize and / or targetsize of the training set isn't specified"); 00458 } 00459 VMat sub_training_set; 00460 if (training_set->weightsize() > 0) { 00461 sub_training_set = new ConcatColumnsVMatrix( 00462 new SubVMatrix(training_set, 0, 0, training_set->length(), training_set->inputsize() + training_set->targetsize() - 1), 00463 new SubVMatrix(training_set, 0, training_set->inputsize() + training_set->targetsize(), training_set->length(), training_set->weightsize()) 00464 ); 00465 } else { 00466 sub_training_set = new SubVMatrix(training_set, 0, 0, training_set->length(), training_set->width() - 1); 00467 } 00468 sub_training_set->defineSizes(training_set->inputsize(), training_set->targetsize() - 1, training_set->weightsize()); 00469 sub_learner->setTrainingSet(sub_training_set, call_forget); 00470 } else { 00471 sub_learner->setTrainingSet(training_set, call_forget); 00472 } 00473 inherited::setTrainingSet(training_set, call_forget); 00474 } 00475 00477 // train // 00479 void AddCostToLearner::train() 00480 { 00481 sub_learner->train(); 00482 } 00483 00484 } // end of namespace PLearn