PLearn: PDistribution.cc Source File

00001 // -*- C++ -*- 00002 00003 // PDistribution.cc 00004 // 00005 // Copyright (C) 2003 Pascal Vincent 00006 // Copyright (C) 2004 Université de Montréal 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 /* ******************************************************* 00037 * $Id: PDistribution.cc,v 1.21 2004/07/21 20:24:07 tihocan Exp $ 00038 ******************************************************* */ 00039 00042 #include "PDistribution.h" 00043 00044 namespace PLearn { 00045 using namespace std; 00046 00048 // PDistribution // 00050 PDistribution::PDistribution() 00051 : n_input(0), 00052 n_margin(0), 00053 n_target(0), 00054 full_joint_distribution(true), 00055 need_set_input(true), 00056 n_curve_points(-1), 00057 outputs_def("l") 00058 {} 00059 00060 PLEARN_IMPLEMENT_OBJECT(PDistribution, 00061 "PDistribution is the base class for distributions.\n", 00062 "PDistributions derive from PLearner (as some of them may be fitted to data with train()),\n" 00063 "but they have additional methods allowing for ex. to compute density or generate data points.\n" 00064 "The default implementations of the learner-type methods for computing outputs and costs work as follows:\n" 00065 " - the outputs_def option allows to choose which outputs are produced\n" 00066 " - cost is a vector of size 1 containing only the negative log-likelihood (NLL), i.e. -log_density.\n" 00067 "A PDistribution may be conditional P(Y|X), if the option 'conditional_flags' is set. If it is the case,\n" 00068 "the input should always be made of both the 'input' part (X) and the 'target' part (Y), even if the\n" 00069 "output may not need to use the Y part. The exception is when computeOutput() needs to be called\n" 00070 "successively with the same value of X: in this case, after a first call with both X and Y, one may\n" 00071 "only provide Y as input, and X will be assumed to be unchanged.\n" 00072 ); 00073 00075 // declareOptions // 00077 void PDistribution::declareOptions(OptionList& ol) 00078 { 00079 00080 // Build options. 00081 00082 declareOption(ol, "outputs_def", &PDistribution::outputs_def, OptionBase::buildoption, 00083 "Defines what will be given in output. This is a string where the characters\n" 00084 "have the following meaning:\n" 00085 "'l'-> log_density, 'd' -> density, 'c' -> cdf, 's' -> survival_fn,\n" 00086 "'e' -> expectation, 'v' -> variance.\n" 00087 "In lower case they give the value associated with a given observation.\n" 00088 "In upper case, a curve is evaluated at regular intervals and produced in\n" 00089 "output (as a histogram). For 'L', 'D', 'C', 'S', it is the target part that\n" 00090 "varies, while for 'E' and 'V' it is the input part (for conditional distributions).\n" 00091 "The number of curve points is determined by the 'n_curve_points' option.\n" 00092 "Note that the upper case letters only work for SCALAR variables." 00093 ); 00094 00095 declareOption(ol, "conditional_flags", &PDistribution::conditional_flags, OptionBase::buildoption, 00096 "This vector should be set for conditional distributions. It indicates what\n" 00097 "each input variable corresponds to:\n" 00098 " - 0 = it is marginalized (it does not appear in the distribution Y|X)\n" 00099 " - 1 = it is an input (the X in Y|X)\n" 00100 " - 2 = it is a target (the Y in Y|X)\n" 00101 "If this vector is empty, then all variables are considered targets (thus\n" 00102 "it is an unconditional distribution)." 00103 ); 00104 00105 declareOption(ol, "provide_input", &PDistribution::provide_input, OptionBase::buildoption, 00106 "If provided, then setInput() will be called at build time with this input\n" 00107 "(this defines the input part for conditional distributions)."); 00108 00109 declareOption(ol, "n_curve_points", &PDistribution::n_curve_points, OptionBase::buildoption, 00110 "The number of points for which the output is evaluated when outputs_defs\n" 00111 "is upper case (produces a histogram).\n" 00112 "The lower_bound and upper_bound options specify where the curve begins and ends.\n" 00113 "Note that these options (upper case letters) only work for SCALAR variables." 00114 ); 00115 00116 declareOption(ol, "lower_bound", &PDistribution::lower_bound, OptionBase::buildoption, 00117 "The lower bound of scalar Y values to compute a histogram of the distribution\n" 00118 "when upper case outputs_def are specified.\n"); 00119 00120 declareOption(ol, "upper_bound", &PDistribution::upper_bound, OptionBase::buildoption, 00121 "The upper bound of scalar Y values to compute a histogram of the distribution\n" 00122 "when upper case outputs_def are specified.\n"); 00123 00124 // Learnt options. 00125 00126 declareOption(ol, "cond_sort", &PDistribution::cond_sort, OptionBase::learntoption, 00127 "A vector containing the indices of the variables, so that they are ordered like\n" 00128 "this: input, target, margin."); 00129 00130 declareOption(ol, "n_input", &PDistribution::n_input, OptionBase::learntoption, 00131 "The size of the input x in p(y|x)."); 00132 00133 declareOption(ol, "n_target", &PDistribution::n_target, OptionBase::learntoption, 00134 "The size of the target y in p(y|x)."); 00135 00136 declareOption(ol, "n_margin", &PDistribution::n_margin, OptionBase::learntoption, 00137 "The size of the variables that are marginalized in p(y|x). E.g., if the whole\n" 00138 "input contains (x,y,z), and we want to compute p(y|x), then n_margin = z.length()."); 00139 00140 // Now call the parent class' declareOptions 00141 inherited::declareOptions(ol); 00142 } 00143 00145 // build // 00147 void PDistribution::build() 00148 { 00149 inherited::build(); 00150 build_(); 00151 } 00152 00154 // build_ // 00156 void PDistribution::build_() 00157 { 00158 if (n_curve_points > 0) { 00159 delta_curve = (upper_bound - lower_bound) / real(n_curve_points); 00160 } 00161 // Precompute the stuff associated to the conditional flags. 00162 setConditionalFlagsWithoutUpdate(conditional_flags); 00163 } 00164 00166 // computeOutput // 00168 void PDistribution::computeOutput(const Vec& input, Vec& output) const 00169 { 00170 need_set_input = splitCond(input); 00171 if (need_set_input) { 00172 // There is an input part, and it is not the same as in the previous call. 00173 setInput(input_part); 00174 } 00175 int l = (int) outputs_def.length(); 00176 int k = 0; 00177 for(int i=0; i<l; i++) 00178 { 00179 switch(outputs_def[i]) 00180 { 00181 case 'l': 00182 output[k++] = log_density(target_part); 00183 break; 00184 case 'd': 00185 output[k++] = density(target_part); 00186 break; 00187 case 'c': 00188 output[k++] = cdf(target_part); 00189 break; 00190 case 's': 00191 output[k++] = survival_fn(target_part); 00192 break; 00193 case 'e': 00194 store_expect = output.subVec(k, n_target); 00195 expectation(store_expect); 00196 k += n_target; 00197 break; 00198 case 'v': 00199 store_cov = output.subVec(k, square(n_target)).toMat(n_target, n_target); 00200 variance(store_cov); 00201 k += square(n_target); 00202 break; 00203 case 'E': 00204 case 'V': 00205 if (n_target > 1) 00206 PLERROR("In PDistribution::computeOutput - Can only plot histogram of expectation or variance for one-dimensional target"); 00207 if (n_target == 0) 00208 PLERROR("In PDistribution::computeOutput - Cannot plot histogram of expectation or variance for unconditional distributions"); 00209 case 'L': 00210 case 'D': 00211 case 'C': 00212 case 'S': 00213 real t; 00214 store_result.resize(1); 00215 store_result[0] = lower_bound; 00216 for (int j = 0; j < n_curve_points; j++) { 00217 switch(outputs_def[i]) { 00218 case 'L': 00219 t = log_density(store_result); 00220 break; 00221 case 'D': 00222 t = density(store_result); 00223 break; 00224 case 'C': 00225 t = cdf(store_result); 00226 break; 00227 case 'S': 00228 t = survival_fn(store_result); 00229 break; 00230 case 'E': 00231 setInput(store_result); 00232 expectation(store_expect); 00233 t = store_expect[0]; 00234 break; 00235 case 'V': 00236 setInput(store_result); 00237 store_cov = store_expect.toMat(1,1); 00238 variance(store_cov); 00239 t = store_expect[0]; 00240 break; 00241 default: 00242 PLERROR("In PDistribution::computeOutput - This should never happen"); 00243 t = 0; // To make the compiler happy. 00244 } 00245 output[j + k] = t; 00246 store_result[0] += delta_curve; 00247 } 00248 k += n_curve_points; 00249 break; 00250 default: 00251 PLERROR("In PDistribution::computeOutput - Unrecognized outputs_def character"); 00252 } 00253 } 00254 } 00255 00257 // computeCostsFromOutputs // 00259 void PDistribution::computeCostsFromOutputs(const Vec& input, const Vec& output, 00260 const Vec& target, Vec& costs) const 00261 { 00262 if(outputs_def[0] != 'l') 00263 PLERROR("In PDistribution::computeCostsFromOutputs currently can only 'compute' \n" 00264 "negative log likelihood from log likelihood returned as first output \n"); 00265 costs.resize(1); 00266 costs[0] = -output[0]; 00267 } 00268 00270 // ensureFullJointDistribution // 00272 bool PDistribution::ensureFullJointDistribution(TVec<int>& old_flags) { 00273 bool restore_flags = false; 00274 if (!full_joint_distribution) { 00275 // Backup flags. 00276 restore_flags = true; 00277 old_flags.resize(conditional_flags.length()); 00278 old_flags << conditional_flags; 00279 // Set flags to compute the full joint distribution. 00280 TVec<int> tmp; 00281 setConditionalFlags(tmp); 00282 } else { 00283 old_flags.resize(0); 00284 } 00285 return restore_flags; 00286 } 00287 00289 // finishConditionalBuild // 00291 void PDistribution::finishConditionalBuild() { 00292 updateFromConditionalSorting(); 00293 // Set the input part for a conditional distribution, if provided. 00294 if (provide_input.isNotEmpty() && provide_input.length() == n_input) { 00295 input_part << provide_input; 00296 setInput(input_part); 00297 } 00298 } 00299 00301 // getTestCostNames // 00303 TVec<string> PDistribution::getTestCostNames() const 00304 { 00305 return TVec<string>(1,"NLL"); 00306 } 00307 00309 // getTrainCostNames // 00311 TVec<string> PDistribution::getTrainCostNames() const 00312 { 00313 // Default = no train cost computed. This may be overridden in subclasses. 00314 TVec<string> c; 00315 return c; 00316 } 00317 00319 // generateN // 00321 void PDistribution::generateN(const Mat& Y) const 00322 { 00323 Vec v; 00324 if (Y.width()!=inputsize()) 00325 PLERROR("In PDistribution::generateN matrix width (%d) differs from inputsize() (%d)", Y.width(), inputsize()); 00326 int N = Y.length(); 00327 for(int i=0; i<N; i++) 00328 { 00329 v = Y(i); 00330 generate(v); 00331 } 00332 } 00333 00335 // makeDeepCopyFromShallowCopy // 00337 void PDistribution::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00338 { 00339 inherited::makeDeepCopyFromShallowCopy(copies); 00340 deepCopyField(store_expect, copies); 00341 deepCopyField(store_result, copies); 00342 deepCopyField(store_cov, copies); 00343 deepCopyField(cond_sort, copies); 00344 deepCopyField(cond_swap, copies); 00345 deepCopyField(input_part, copies); 00346 deepCopyField(target_part, copies); 00347 deepCopyField(conditional_flags, copies); 00348 deepCopyField(provide_input, copies); 00349 } 00350 00352 // outputsize // 00354 int PDistribution::outputsize() const 00355 { 00356 int l = 0; 00357 for (unsigned int i=0; i<outputs_def.length(); i++) { 00358 if (outputs_def[i]=='L' || outputs_def[i]=='D' || outputs_def[i]=='C' || outputs_def[i]=='S' 00359 || outputs_def[i]=='E' || outputs_def[i]=='V') 00360 l+=n_curve_points; 00361 else if (outputs_def[i]=='e') 00362 l += n_target; 00363 else if (outputs_def[i]=='v') // by default assume variance is full nxn matrix 00364 l += n_target * n_target; 00365 else l++; 00366 } 00367 return l; 00368 } 00369 00371 // resizeParts // 00373 void PDistribution::resizeParts() { 00374 input_part.resize(n_input); 00375 target_part.resize(n_target); 00376 } 00377 00379 // setConditionalFlags // 00381 void PDistribution::setConditionalFlags(TVec<int>& flags) { 00382 // Update the conditional flags. 00383 setConditionalFlagsWithoutUpdate(flags); 00384 // And call the method that updates the internal Vec and Mat given the new 00385 // sorting (this method should be written in subclasses). 00386 updateFromConditionalSorting(); 00387 } 00388 00390 // setConditionalFlagsWithoutUpdate // 00392 void PDistribution::setConditionalFlagsWithoutUpdate(TVec<int>& flags) { 00393 static TVec<int> input; 00394 static TVec<int> target; 00395 static TVec<int> margin; 00396 if (inputsize_ <= 0) { 00397 // No dataset has been specified yet. 00398 return; 00399 } 00400 int is = inputsize(); 00401 input.resize(0); 00402 target.resize(0); 00403 margin.resize(0); 00404 if (flags.isEmpty()) { 00405 // No flags: everything is target. 00406 for (int i = 0; i < is; i++) { 00407 target.append(i); 00408 } 00409 } else { 00410 for (int i = 0; i < flags.length(); i++) { 00411 switch (flags[i]) { 00412 case 0: 00413 margin.append(i); 00414 break; 00415 case 1: 00416 input.append(i); 00417 break; 00418 case 2: 00419 target.append(i); 00420 break; 00421 default: 00422 PLERROR("In PDistribution::setConditionalFlagsWithoutUpdate - Unknown flag value"); 00423 } 00424 } 00425 } 00426 // Update the sizes. 00427 n_input = input.length(); 00428 n_target = target.length(); 00429 n_margin = margin.length(); 00430 resizeParts(); 00431 if (n_input == 0 && n_margin == 0) { 00432 // Only full joint distribution. 00433 full_joint_distribution = true; 00434 } else { 00435 full_joint_distribution = false; 00436 } 00437 // Fill the new vector of sorted indices. 00438 TVec<int> new_cond_sort(is); 00439 new_cond_sort.subVec(0, n_input) << input; 00440 new_cond_sort.subVec(n_input, n_target) << target; 00441 new_cond_sort.subVec(n_input + n_target, n_margin) << margin; 00442 // Check whether we are in the 'easy' case where input, target and margin 00443 // are correctly sorted. 00444 if ((n_input == 0 || max(input) == n_input - 1) && 00445 (n_target == 0 || max(target) == n_target + n_input - 1)) { 00446 already_sorted = true; 00447 } else { 00448 already_sorted = false; 00449 } 00450 // Deduce the indices to be swapped compared to the previous sorting. 00451 bool found; 00452 int j; 00453 int index; 00454 cond_swap.resize(0); 00455 if (cond_sort.length() != is) { 00456 // The previous cond_sort is not valid anymore, we probably 00457 // have a new training set. 00458 cond_sort = TVec<int>(0, is - 1, 1); 00459 } 00460 for (int i = 0; i < is; i++) { 00461 found = false; 00462 j = 0; 00463 index = new_cond_sort[i]; 00464 while (!found) { 00465 if (cond_sort[j] == index) { 00466 found = true; 00467 if (i != j) { 00468 // There is really a need to swap the indices. 00469 cond_swap.append(i); 00470 cond_swap.append(j); 00471 } 00472 } else { 00473 j++; 00474 } 00475 } 00476 } 00477 // Copy the new vector of sorted indices. 00478 cond_sort << new_cond_sort; 00479 // Copy the new flags. 00480 conditional_flags.resize(flags.length()); 00481 conditional_flags << flags; 00482 } 00483 00485 // setInput // 00487 void PDistribution::setInput(const Vec& input) const { 00488 // Default behavior: only fill input_part with input. 00489 input_part << input; 00490 } 00491 00493 // setTrainingSet // 00495 void PDistribution::setTrainingSet(VMat training_set, bool call_forget) { 00496 inherited::setTrainingSet(training_set, call_forget); 00497 // Update internal data according to conditional_flags. 00498 setConditionalFlags(conditional_flags); 00499 } 00500 00502 // sortFromFlags // 00504 void PDistribution::sortFromFlags(Vec& v) { 00505 static Vec tmp_copy; 00506 tmp_copy.resize(v.length()); 00507 tmp_copy << v; 00508 for (int i = 0; i < cond_swap.length();) { 00509 v[cond_swap[i++]] = tmp_copy[cond_swap[i++]]; 00510 } 00511 } 00512 00513 void PDistribution::sortFromFlags(Mat& m, bool sort_columns, bool sort_rows) { 00514 static int j,k; 00515 static Mat tmp_copy; 00516 static Vec row; 00517 if (sort_columns) { 00518 for (int r = 0; r < m.length(); r++) { 00519 row = m(r); 00520 sortFromFlags(row); 00521 } 00522 } 00523 if (sort_rows && m.length() > 0 && m.width() > 0) { 00524 tmp_copy.resize(m.length(), m.width()); 00525 tmp_copy << m; 00526 for (int i = 0; i < cond_swap.length();) { 00527 j = cond_swap[i++]; 00528 k = cond_swap[i++]; 00529 // The new j-th row is the old k-th row. 00530 m(j) << tmp_copy(k); 00531 } 00532 } 00533 } 00534 00536 // splitCond // 00538 bool PDistribution::splitCond(const Vec& input) const { 00539 if (n_input > 0 && input.length() == n_target + n_margin) { 00540 // No input part provided: this means this is the same as before. 00541 if (already_sorted) { 00542 target_part << input.subVec(0, n_target); 00543 } else { 00544 // A bit messy here. It probably won't happen, so it's not implemented 00545 // for now (but wouldn't be that hard to do it). 00546 PLERROR("In PDistribution::splitCond - You'll need to implement this case!"); 00547 } 00548 return false; 00549 } 00550 if (already_sorted) { 00551 input_part << input.subVec(0, n_input); 00552 target_part << input.subVec(n_input, n_target); 00553 } else { 00554 for (int i = 0; i < n_input; i++) { 00555 input_part[i] = input[cond_sort[i]]; 00556 } 00557 for (int i = 0; i < n_target; i++) { 00558 target_part[i] = input[cond_sort[i + n_input]]; 00559 } 00560 } 00561 return true; 00562 } 00563 00565 // subclass stuff // 00567 00568 void PDistribution::forget() { 00569 PLERROR("forget not implemented for this PDistribution"); 00570 } 00571 00572 real PDistribution::log_density(const Vec& y) const 00573 { PLERROR("density not implemented for this PDistribution"); return 0; } 00574 00575 real PDistribution::density(const Vec& y) const 00576 { return exp(log_density(y)); } 00577 00578 real PDistribution::survival_fn(const Vec& y) const 00579 { PLERROR("survival_fn not implemented for this PDistribution"); return 0; } 00580 00581 real PDistribution::cdf(const Vec& y) const 00582 { PLERROR("cdf not implemented for this PDistribution"); return 0; } 00583 00584 void PDistribution::expectation(Vec& mu) const 00585 { PLERROR("expectation not implemented for this PDistribution"); } 00586 00587 void PDistribution::variance(Mat& covar) const 00588 { PLERROR("variance not implemented for this PDistribution"); } 00589 00590 void PDistribution::resetGenerator(long g_seed) const 00591 { PLERROR("resetGenerator not implemented for this PDistribution"); } 00592 00593 void PDistribution::generate(Vec& y) const 00594 { PLERROR("generate not implemented for this PDistribution"); } 00595 00596 void PDistribution::train() 00597 { PLERROR("train not implemented for this PDistribution"); } 00598 00600 // updateFromConditionalSorting // 00602 void PDistribution::updateFromConditionalSorting() { 00603 // Default does nothing. 00604 return; 00605 } 00606 00607 } // end of namespace PLearn