00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00042
#include "PDistribution.h"
00043
00044
namespace PLearn {
00045
using namespace std;
00046
00048
00050 PDistribution::PDistribution()
00051 : n_input(0),
00052 n_margin(0),
00053 n_target(0),
00054 full_joint_distribution(true),
00055 need_set_input(true),
00056 n_curve_points(-1),
00057 outputs_def("l")
00058 {}
00059
00060
PLEARN_IMPLEMENT_OBJECT(
PDistribution,
00061
"PDistribution is the base class for distributions.\n",
00062
"PDistributions derive from PLearner (as some of them may be fitted to data with train()),\n"
00063
"but they have additional methods allowing for ex. to compute density or generate data points.\n"
00064
"The default implementations of the learner-type methods for computing outputs and costs work as follows:\n"
00065
" - the outputs_def option allows to choose which outputs are produced\n"
00066
" - cost is a vector of size 1 containing only the negative log-likelihood (NLL), i.e. -log_density.\n"
00067
"A PDistribution may be conditional P(Y|X), if the option 'conditional_flags' is set. If it is the case,\n"
00068
"the input should always be made of both the 'input' part (X) and the 'target' part (Y), even if the\n"
00069
"output may not need to use the Y part. The exception is when computeOutput() needs to be called\n"
00070
"successively with the same value of X: in this case, after a first call with both X and Y, one may\n"
00071
"only provide Y as input, and X will be assumed to be unchanged.\n"
00072 );
00073
00075
00077 void PDistribution::declareOptions(
OptionList& ol)
00078 {
00079
00080
00081
00082
declareOption(ol,
"outputs_def", &PDistribution::outputs_def, OptionBase::buildoption,
00083
"Defines what will be given in output. This is a string where the characters\n"
00084
"have the following meaning:\n"
00085
"'l'-> log_density, 'd' -> density, 'c' -> cdf, 's' -> survival_fn,\n"
00086
"'e' -> expectation, 'v' -> variance.\n"
00087
"In lower case they give the value associated with a given observation.\n"
00088
"In upper case, a curve is evaluated at regular intervals and produced in\n"
00089
"output (as a histogram). For 'L', 'D', 'C', 'S', it is the target part that\n"
00090
"varies, while for 'E' and 'V' it is the input part (for conditional distributions).\n"
00091
"The number of curve points is determined by the 'n_curve_points' option.\n"
00092
"Note that the upper case letters only work for SCALAR variables."
00093 );
00094
00095
declareOption(ol,
"conditional_flags", &PDistribution::conditional_flags, OptionBase::buildoption,
00096
"This vector should be set for conditional distributions. It indicates what\n"
00097
"each input variable corresponds to:\n"
00098
" - 0 = it is marginalized (it does not appear in the distribution Y|X)\n"
00099
" - 1 = it is an input (the X in Y|X)\n"
00100
" - 2 = it is a target (the Y in Y|X)\n"
00101
"If this vector is empty, then all variables are considered targets (thus\n"
00102
"it is an unconditional distribution)."
00103 );
00104
00105
declareOption(ol,
"provide_input", &PDistribution::provide_input, OptionBase::buildoption,
00106
"If provided, then setInput() will be called at build time with this input\n"
00107
"(this defines the input part for conditional distributions).");
00108
00109
declareOption(ol,
"n_curve_points", &PDistribution::n_curve_points, OptionBase::buildoption,
00110
"The number of points for which the output is evaluated when outputs_defs\n"
00111
"is upper case (produces a histogram).\n"
00112
"The lower_bound and upper_bound options specify where the curve begins and ends.\n"
00113
"Note that these options (upper case letters) only work for SCALAR variables."
00114 );
00115
00116
declareOption(ol,
"lower_bound", &PDistribution::lower_bound, OptionBase::buildoption,
00117
"The lower bound of scalar Y values to compute a histogram of the distribution\n"
00118
"when upper case outputs_def are specified.\n");
00119
00120
declareOption(ol,
"upper_bound", &PDistribution::upper_bound, OptionBase::buildoption,
00121
"The upper bound of scalar Y values to compute a histogram of the distribution\n"
00122
"when upper case outputs_def are specified.\n");
00123
00124
00125
00126
declareOption(ol,
"cond_sort", &PDistribution::cond_sort, OptionBase::learntoption,
00127
"A vector containing the indices of the variables, so that they are ordered like\n"
00128
"this: input, target, margin.");
00129
00130
declareOption(ol,
"n_input", &PDistribution::n_input, OptionBase::learntoption,
00131
"The size of the input x in p(y|x).");
00132
00133
declareOption(ol,
"n_target", &PDistribution::n_target, OptionBase::learntoption,
00134
"The size of the target y in p(y|x).");
00135
00136
declareOption(ol,
"n_margin", &PDistribution::n_margin, OptionBase::learntoption,
00137
"The size of the variables that are marginalized in p(y|x). E.g., if the whole\n"
00138
"input contains (x,y,z), and we want to compute p(y|x), then n_margin = z.length().");
00139
00140
00141 inherited::declareOptions(ol);
00142 }
00143
00145
00147 void PDistribution::build()
00148 {
00149 inherited::build();
00150
build_();
00151 }
00152
00154
00156 void PDistribution::build_()
00157 {
00158
if (
n_curve_points > 0) {
00159
delta_curve = (
upper_bound -
lower_bound) /
real(
n_curve_points);
00160 }
00161
00162
setConditionalFlagsWithoutUpdate(
conditional_flags);
00163 }
00164
00166
00168 void PDistribution::computeOutput(
const Vec& input,
Vec& output)
const
00169
{
00170
need_set_input =
splitCond(input);
00171
if (
need_set_input) {
00172
00173
setInput(
input_part);
00174 }
00175
int l = (
int)
outputs_def.length();
00176
int k = 0;
00177
for(
int i=0; i<l; i++)
00178 {
00179
switch(
outputs_def[i])
00180 {
00181
case 'l':
00182 output[
k++] =
log_density(
target_part);
00183
break;
00184
case 'd':
00185 output[
k++] =
density(
target_part);
00186
break;
00187
case 'c':
00188 output[
k++] =
cdf(
target_part);
00189
break;
00190
case 's':
00191 output[
k++] =
survival_fn(
target_part);
00192
break;
00193
case 'e':
00194
store_expect = output.
subVec(
k,
n_target);
00195
expectation(
store_expect);
00196
k +=
n_target;
00197
break;
00198
case 'v':
00199
store_cov = output.
subVec(
k,
square(n_target)).toMat(n_target, n_target);
00200
variance(
store_cov);
00201
k +=
square(n_target);
00202
break;
00203
case 'E':
00204
case 'V':
00205
if (n_target > 1)
00206
PLERROR(
"In PDistribution::computeOutput - Can only plot histogram of expectation or variance for one-dimensional target");
00207
if (n_target == 0)
00208
PLERROR(
"In PDistribution::computeOutput - Cannot plot histogram of expectation or variance for unconditional distributions");
00209
case 'L':
00210
case 'D':
00211
case 'C':
00212
case 'S':
00213
real t;
00214
store_result.
resize(1);
00215
store_result[0] =
lower_bound;
00216
for (
int j = 0; j <
n_curve_points; j++) {
00217
switch(
outputs_def[i]) {
00218
case 'L':
00219 t = log_density(
store_result);
00220
break;
00221
case 'D':
00222 t = density(
store_result);
00223
break;
00224
case 'C':
00225 t = cdf(
store_result);
00226
break;
00227
case 'S':
00228 t = survival_fn(
store_result);
00229
break;
00230
case 'E':
00231
setInput(
store_result);
00232
expectation(
store_expect);
00233 t =
store_expect[0];
00234
break;
00235
case 'V':
00236
setInput(
store_result);
00237
store_cov = store_expect.toMat(1,1);
00238
variance(
store_cov);
00239 t = store_expect[0];
00240
break;
00241
default:
00242
PLERROR(
"In PDistribution::computeOutput - This should never happen");
00243 t = 0;
00244 }
00245 output[j +
k] = t;
00246
store_result[0] +=
delta_curve;
00247 }
00248
k += n_curve_points;
00249
break;
00250
default:
00251
PLERROR(
"In PDistribution::computeOutput - Unrecognized outputs_def character");
00252 }
00253 }
00254 }
00255
00257
00259 void PDistribution::computeCostsFromOutputs(
const Vec& input,
const Vec& output,
00260
const Vec& target,
Vec& costs)
const
00261
{
00262
if(
outputs_def[0] !=
'l')
00263
PLERROR(
"In PDistribution::computeCostsFromOutputs currently can only 'compute' \n"
00264
"negative log likelihood from log likelihood returned as first output \n");
00265 costs.
resize(1);
00266 costs[0] = -output[0];
00267 }
00268
00270
00272 bool PDistribution::ensureFullJointDistribution(
TVec<int>& old_flags) {
00273
bool restore_flags =
false;
00274
if (!
full_joint_distribution) {
00275
00276 restore_flags =
true;
00277 old_flags.
resize(
conditional_flags.
length());
00278 old_flags <<
conditional_flags;
00279
00280
TVec<int> tmp;
00281
setConditionalFlags(tmp);
00282 }
else {
00283 old_flags.
resize(0);
00284 }
00285
return restore_flags;
00286 }
00287
00289
00291 void PDistribution::finishConditionalBuild() {
00292
updateFromConditionalSorting();
00293
00294
if (
provide_input.
isNotEmpty() &&
provide_input.
length() ==
n_input) {
00295
input_part <<
provide_input;
00296
setInput(
input_part);
00297 }
00298 }
00299
00301
00303 TVec<string> PDistribution::getTestCostNames()
const
00304
{
00305
return TVec<string>(1,
"NLL");
00306 }
00307
00309
00311 TVec<string> PDistribution::getTrainCostNames()
const
00312
{
00313
00314
TVec<string> c;
00315
return c;
00316 }
00317
00319
00321 void PDistribution::generateN(
const Mat& Y)
const
00322
{
00323
Vec v;
00324
if (Y.
width()!=
inputsize())
00325
PLERROR(
"In PDistribution::generateN matrix width (%d) differs from inputsize() (%d)", Y.
width(),
inputsize());
00326
int N = Y.
length();
00327
for(
int i=0; i<N; i++)
00328 {
00329 v = Y(i);
00330
generate(v);
00331 }
00332 }
00333
00335
00337 void PDistribution::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00338 {
00339 inherited::makeDeepCopyFromShallowCopy(copies);
00340
deepCopyField(
store_expect, copies);
00341
deepCopyField(
store_result, copies);
00342
deepCopyField(
store_cov, copies);
00343
deepCopyField(
cond_sort, copies);
00344
deepCopyField(
cond_swap, copies);
00345
deepCopyField(
input_part, copies);
00346
deepCopyField(
target_part, copies);
00347
deepCopyField(
conditional_flags, copies);
00348
deepCopyField(
provide_input, copies);
00349 }
00350
00352
00354 int PDistribution::outputsize()
const
00355
{
00356
int l = 0;
00357
for (
unsigned int i=0; i<
outputs_def.length(); i++) {
00358
if (
outputs_def[i]==
'L' ||
outputs_def[i]==
'D' ||
outputs_def[i]==
'C' ||
outputs_def[i]==
'S'
00359 ||
outputs_def[i]==
'E' ||
outputs_def[i]==
'V')
00360 l+=
n_curve_points;
00361
else if (
outputs_def[i]==
'e')
00362 l +=
n_target;
00363
else if (
outputs_def[i]==
'v')
00364 l += n_target * n_target;
00365
else l++;
00366 }
00367
return l;
00368 }
00369
00371
00373 void PDistribution::resizeParts() {
00374
input_part.
resize(
n_input);
00375
target_part.
resize(
n_target);
00376 }
00377
00379
00381 void PDistribution::setConditionalFlags(
TVec<int>& flags) {
00382
00383
setConditionalFlagsWithoutUpdate(flags);
00384
00385
00386
updateFromConditionalSorting();
00387 }
00388
00390
00392 void PDistribution::setConditionalFlagsWithoutUpdate(
TVec<int>& flags) {
00393
static TVec<int> input;
00394
static TVec<int> target;
00395
static TVec<int> margin;
00396
if (inputsize_ <= 0) {
00397
00398
return;
00399 }
00400
int is =
inputsize();
00401 input.
resize(0);
00402 target.
resize(0);
00403 margin.
resize(0);
00404
if (flags.
isEmpty()) {
00405
00406
for (
int i = 0; i < is; i++) {
00407 target.
append(i);
00408 }
00409 }
else {
00410
for (
int i = 0; i < flags.
length(); i++) {
00411
switch (flags[i]) {
00412
case 0:
00413 margin.
append(i);
00414
break;
00415
case 1:
00416 input.
append(i);
00417
break;
00418
case 2:
00419 target.
append(i);
00420
break;
00421
default:
00422
PLERROR(
"In PDistribution::setConditionalFlagsWithoutUpdate - Unknown flag value");
00423 }
00424 }
00425 }
00426
00427
n_input = input.
length();
00428
n_target = target.
length();
00429
n_margin = margin.
length();
00430
resizeParts();
00431
if (
n_input == 0 &&
n_margin == 0) {
00432
00433
full_joint_distribution =
true;
00434 }
else {
00435
full_joint_distribution =
false;
00436 }
00437
00438
TVec<int> new_cond_sort(is);
00439 new_cond_sort.
subVec(0,
n_input) << input;
00440 new_cond_sort.
subVec(
n_input,
n_target) << target;
00441 new_cond_sort.
subVec(
n_input +
n_target, n_margin) << margin;
00442
00443
00444
if ((
n_input == 0 ||
max(input) ==
n_input - 1) &&
00445 (n_target == 0 ||
max(target) == n_target +
n_input - 1)) {
00446
already_sorted =
true;
00447 }
else {
00448
already_sorted =
false;
00449 }
00450
00451
bool found;
00452
int j;
00453
int index;
00454
cond_swap.
resize(0);
00455
if (
cond_sort.
length() != is) {
00456
00457
00458
cond_sort =
TVec<int>(0, is - 1, 1);
00459 }
00460
for (
int i = 0; i < is; i++) {
00461 found =
false;
00462 j = 0;
00463 index = new_cond_sort[i];
00464
while (!found) {
00465
if (
cond_sort[j] == index) {
00466 found =
true;
00467
if (i != j) {
00468
00469
cond_swap.
append(i);
00470
cond_swap.
append(j);
00471 }
00472 }
else {
00473 j++;
00474 }
00475 }
00476 }
00477
00478
cond_sort << new_cond_sort;
00479
00480
conditional_flags.
resize(flags.
length());
00481
conditional_flags << flags;
00482 }
00483
00485
00487 void PDistribution::setInput(
const Vec& input)
const {
00488
00489
input_part << input;
00490 }
00491
00493
00495 void PDistribution::setTrainingSet(
VMat training_set,
bool call_forget) {
00496 inherited::setTrainingSet(training_set, call_forget);
00497
00498
setConditionalFlags(
conditional_flags);
00499 }
00500
00502
00504 void PDistribution::sortFromFlags(
Vec& v) {
00505
static Vec tmp_copy;
00506 tmp_copy.
resize(v.
length());
00507 tmp_copy << v;
00508
for (
int i = 0; i <
cond_swap.
length();) {
00509 v[
cond_swap[i++]] = tmp_copy[cond_swap[i++]];
00510 }
00511 }
00512
00513 void PDistribution::sortFromFlags(
Mat& m,
bool sort_columns,
bool sort_rows) {
00514
static int j,
k;
00515
static Mat tmp_copy;
00516
static Vec row;
00517
if (sort_columns) {
00518
for (
int r = 0; r < m.
length(); r++) {
00519 row = m(r);
00520
sortFromFlags(row);
00521 }
00522 }
00523
if (sort_rows && m.
length() > 0 && m.
width() > 0) {
00524 tmp_copy.
resize(m.
length(), m.
width());
00525 tmp_copy << m;
00526
for (
int i = 0; i <
cond_swap.
length();) {
00527 j =
cond_swap[i++];
00528
k = cond_swap[i++];
00529
00530 m(j) << tmp_copy(
k);
00531 }
00532 }
00533 }
00534
00536
00538 bool PDistribution::splitCond(
const Vec& input)
const {
00539
if (
n_input > 0 && input.
length() ==
n_target +
n_margin) {
00540
00541
if (
already_sorted) {
00542
target_part << input.
subVec(0,
n_target);
00543 }
else {
00544
00545
00546
PLERROR(
"In PDistribution::splitCond - You'll need to implement this case!");
00547 }
00548
return false;
00549 }
00550
if (
already_sorted) {
00551
input_part << input.
subVec(0,
n_input);
00552
target_part << input.
subVec(
n_input,
n_target);
00553 }
else {
00554
for (
int i = 0; i <
n_input; i++) {
00555
input_part[i] = input[
cond_sort[i]];
00556 }
00557
for (
int i = 0; i <
n_target; i++) {
00558
target_part[i] = input[
cond_sort[i + n_input]];
00559 }
00560 }
00561
return true;
00562 }
00563
00565
00567
00568 void PDistribution::forget() {
00569
PLERROR(
"forget not implemented for this PDistribution");
00570 }
00571
00572 real PDistribution::log_density(
const Vec& y)
const
00573
{
PLERROR(
"density not implemented for this PDistribution");
return 0; }
00574
00575 real PDistribution::density(
const Vec& y)
const
00576
{
return exp(
log_density(y)); }
00577
00578 real PDistribution::survival_fn(
const Vec& y)
const
00579
{
PLERROR(
"survival_fn not implemented for this PDistribution");
return 0; }
00580
00581 real PDistribution::cdf(
const Vec& y)
const
00582
{
PLERROR(
"cdf not implemented for this PDistribution");
return 0; }
00583
00584 void PDistribution::expectation(
Vec& mu)
const
00585
{
PLERROR(
"expectation not implemented for this PDistribution"); }
00586
00587 void PDistribution::variance(
Mat& covar)
const
00588
{
PLERROR(
"variance not implemented for this PDistribution"); }
00589
00590 void PDistribution::resetGenerator(
long g_seed)
const
00591
{
PLERROR(
"resetGenerator not implemented for this PDistribution"); }
00592
00593 void PDistribution::generate(
Vec& y)
const
00594
{
PLERROR(
"generate not implemented for this PDistribution"); }
00595
00596 void PDistribution::train()
00597 {
PLERROR(
"train not implemented for this PDistribution"); }
00598
00600
00602 void PDistribution::updateFromConditionalSorting() {
00603
00604
return;
00605 }
00606
00607 }