00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
#include "Func.h"
00045
#include <plearn/math/random.h>
00046
#include <plearn/math/TMat_maths.h>
00047
#include "Var.h"
00048
#include "Var_operators.h"
00049
#include "TimesConstantVariable.h"
00050
#include <plearn/display/DisplayUtils.h>
00051
00052
namespace PLearn {
00053
using namespace std;
00054
00057 Func::Func()
00058 {}
00059
00060 Func::Func(
Function* f)
00061 :
PP<
Function>(f)
00062 {}
00063
00064 Func::Func(
const VarArray& the_inputs,
const VarArray& parameters_to_optimize,
const VarArray& the_outputs)
00065 :
PP<
Function>(new
Function(the_inputs,parameters_to_optimize,the_outputs))
00066 {}
00067 Func::Func(
const VarArray& the_inputs,
const VarArray& the_outputs)
00068 :
PP<
Function>(new
Function(the_inputs, the_outputs))
00069 {}
00070
00071
00072
00073
00074
00075
00076
00077 Vec Func::operator()(
const Vec& input)
const
00078
{
return ptr->operator()(input); }
00079
00080 real Func::operator()(
const Vec& input1,
const Vec& input2)
const
00081
{
return ptr->operator()(input1, input2); }
00082
00083 VarArray Func::operator()(
const VarArray& new_inputs)
const
00084
{
return ptr->operator()(new_inputs); }
00085
00086 Func operator/(
Func f,
real value)
00087 {
00088
if(value==1.0)
00089
return f;
00090
else
00091 {
00092
int nouts = f->outputs.size();
00093
VarArray outs(nouts);
00094
for(
int i=0; i<nouts; i++)
00095 outs[i] = f->outputs[i]/value;
00096
return Func(f->inputs, outs);
00097 }
00098 }
00099
00100
00103 Function::Function()
00104 :inputsize(-1), outputsize(-1)
00105 {}
00106
00107
00108 Function::Function(
const VarArray& the_inputs,
const VarArray& the_outputs)
00109 :inputs(the_inputs), outputs(the_outputs)
00110 {
00111
build_();
00112 }
00113
00114 Function::Function(
const VarArray& the_inputs,
const VarArray& parameters_to_optimize,
const VarArray& the_outputs)
00115 : inputs(the_inputs), parameters(parameters_to_optimize), outputs(the_outputs)
00116 {
00117
build_();
00118 }
00119
00120
00121
00122
00123
00124
00125
00126
PLEARN_IMPLEMENT_OBJECT(
Function,
"Implements a function defined as a var graph",
"NO HELP");
00127
00128 void Function::declareOptions(
OptionList& ol)
00129 {
00130
declareOption(ol,
"inputs", &Function::inputs, OptionBase::buildoption,
00131
"The list of input variabes of this function");
00132
declareOption(ol,
"parameters", &Function::parameters, OptionBase::buildoption,
00133
"The list of parameters to optimize");
00134
declareOption(ol,
"outputs", &Function::outputs, OptionBase::buildoption,
00135
"The list of output variables of this function");
00136
00137
00138 inherited::declareOptions(ol);
00139 }
00140
00141 void Function::build_()
00142 {
00143
if(
parameters.
isEmpty())
00144
parameters =
nonInputSources(
inputs,
outputs);
00145
00146
inputsize =
inputs.
nelems();
00147
outputsize =
outputs.
nelems();
00148
00149
fproppath =
propagationPath(
inputs,
outputs);
00150
if (
fproppath.
length()==0)
00151
00152 {
00153
fproppath =
propagationPath(
inputs &
parameters,
outputs);
00154
bproppath =
propagationPath(
inputs & parameters,
outputs);
00155 }
00156
else
00157
bproppath =
propagationPath(
inputs,
outputs);
00158
00159
parentspath =
propagationPathToParentsOfPath(
inputs,
outputs);
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179 }
00180
00181 void Function::build()
00182 {
00183 inherited::build();
00184
build_();
00185 }
00186
00187 void Function::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00188 {
00189 Object::makeDeepCopyFromShallowCopy(copies);
00190
deepCopyField(
inputs, copies);
00191
deepCopyField(
outputs, copies);
00192
deepCopyField(
fproppath, copies);
00193
deepCopyField(
bproppath, copies);
00194
deepCopyField(
parentspath, copies);
00195
deepCopyField(
df, copies);
00196
deepCopyField(
parameters, copies);
00197 }
00198
00199 void Function::fprop(
const Vec& in,
const Vec& out)
const
00200
{
00201
inputs << in;
00202
fproppath.
fprop();
00203
outputs >> out;
00204 }
00205
00206 void Function::fprop(
const Array<Vec>& in,
const Array<Vec>& out)
const
00207
{
00208
inputs << in;
00209
fproppath.
fprop();
00210
outputs >> out;
00211 }
00212
00213 real Function::operator()(
const Vec& input1,
const Vec& input2)
const
00214
{
00215
if(
inputs.
size()!=2 ||
outputsize!=1)
00216
PLERROR(
"You can only call real Function::operator()(const Vec& input1, const Vec& input2) for a function that has 2 input Vars and a single scalar output Var");
00217
inputs[0]->
copyFrom(input1);
00218
inputs[1]->
copyFrom(input2);
00219
fproppath.
fprop();
00220
return outputs[0]->value[0];
00221 }
00222
00223 void Function::fbprop(
const Vec& in,
const Vec& out,
const Vec& input_gradient,
const Vec& output_gradient)
00224 {
00225
inputs << in;
00226
inputs.
clearGradient();
00227
fproppath.
clearGradient();
00228
outputs.
copyGradientFrom(output_gradient);
00229
fproppath.
fbprop();
00230
outputs >> out;
00231
inputs.
copyGradientTo(input_gradient);
00232
00233
#ifdef BOUNDCHECK
00234
if (out.hasMissing())
00235
PLERROR(
"Function::fbprop: detected MISSING_VALUE in function output!");
00236
00237
static bool displayvargraph=
false;
00238
if (displayvargraph)
00239
displayVarGraph(
outputs,
true);
00240
#endif
00241
}
00242
00243 void Function::fbprop(
const Array<Vec>& in,
const Array<Vec>& out,
const Array<Vec>& input_gradient,
const Array<Vec>& output_gradient)
00244 {
00245
inputs << in;
00246
inputs.
clearGradient();
00247
fproppath.
clearGradient();
00248
outputs.
copyGradientFrom(output_gradient);
00249
fproppath.
fbprop();
00250
outputs >> out;
00251
inputs.
copyGradientTo(input_gradient);
00252
00253
#ifdef BOUNDCHECK
00254
if (out.hasMissing())
00255
PLERROR(
"Function::fbprop: detected MISSING_VALUE in function output!");
00256
#endif
00257
}
00258
00259 void Function::fbbprop(
const Vec& in,
const Vec& output,
const Vec& gradient,
const Mat& hessian)
00260 {
00261
if(
df==0)
00262
df =
differentiate();
00263
00264
inputs << in;
00265
fproppath.
fprop();
00266
outputs >> output;
00267
df->fproppath.fprop();
00268
df->outputs >> gradient;
00269
00270
df->outputs.clearGradient();
00271
int pos = 0;
00272
for(
int varnum=0; varnum<
df->outputs.size(); varnum++)
00273 {
00274
Var& outputvar =
df->outputs[varnum];
00275
for(
int i=0; i<outputvar->nelems(); i++)
00276 {
00277
df->inputs.clearGradient();
00278
df->bproppath.clearGradient();
00279 outputvar->gradient[i] = 1.0;
00280
df->bproppath.bprop();
00281
Vec hessian_row = hessian(pos++);
00282
df->inputs.copyGradientTo(hessian_row);
00283 outputvar->gradient[i] = 0.0;
00284 }
00285 }
00286 }
00287
00288 void Function::fbbpropAcc(
const Vec& in,
const Vec& output,
const Vec& gradient,
const Mat& hessian)
00289 {
00290
if(
df==0)
00291
df =
differentiate();
00292
00293
inputs << in;
00294
fproppath.
fprop();
00295
outputs.
accumulateTo(output);
00296
df->fproppath.fprop();
00297
df->outputs.accumulateTo(gradient);
00298
00299
df->outputs.clearGradient();
00300
int pos = 0;
00301
for(
int varnum=0; varnum<
df->outputs.size(); varnum++)
00302 {
00303
Var& outputvar =
df->outputs[varnum];
00304
for(
int i=0; i<outputvar->nelems(); i++)
00305 {
00306
df->inputs.clearGradient();
00307
df->bproppath.clearGradient();
00308 outputvar->gradient[i] = 1.0;
00309
df->bproppath.bprop();
00310
Vec hessian_row = hessian(pos++);
00311
df->inputs.accumulateGradientTo(hessian_row);
00312 outputvar->gradient[i] = 0.0;
00313 }
00314 }
00315 }
00316
00317 void Function::rfprop(
const Vec& in,
const Vec& out,
const Vec& input_rvalue,
const Vec& output_rvalue,
bool do_fprop)
00318 {
00319
if (do_fprop)
fprop(in,out);
00320
00321
inputs.
copyRValueFrom(input_rvalue);
00322
fproppath.
rfprop();
00323
outputs.
copyRValueTo(output_rvalue);
00324 }
00325
00326 void Function::recomputeParents()
00327 {
parentspath.
fprop(); }
00328
00329 Func Function::differentiate()
00330 {
00331
if (
outputs.
size()>1)
00332
PLERROR(
"In Function::differentiate cannot differentiate function with more than one output variable");
00333
Var output =
outputs[0];
00334
if(
df==0)
00335 {
00336 output->g =
Var(1,
"output->g");
00337 output->g = 1.0;
00338
fproppath.
symbolicBprop();
00339
00340
for(
int i=0; i<
fproppath.
size(); i++)
00341 {
00342
if(!
fproppath[i]->g)
00343 {
00344
string name =
"gr_" +
fproppath[i]->getName();
00345 fproppath[i]->g->setName(name);
00346 }
00347 }
00348
for(
int i=0; i<
inputs.
size(); i++)
00349 {
00350
if(
inputs[i]->g.
isNull())
00351
inputs[i]->g =
Var(
inputs[i]->length(),
inputs[i]->width());
00352
string name =
"gr_" +
inputs[i]->getName();
00353 inputs[i]->g->setName(name);
00354 }
00355
VarArray dinputs =
inputs.
symbolicGradient();
00356
00357
if(dinputs.
nelems() !=
inputs.
nelems())
00358
PLERROR(
"Problem in Function::differentiate() please send a bug report to vincentp@iro.umontreal.ca");
00359
00360 cerr <<
"i0: " <<
inputs[0]->classname() <<
endl;
00361 cerr <<
"i1: " << inputs[1]->classname() <<
endl;
00362 cerr <<
"di0: " << dinputs[0]->classname() <<
endl;
00363 cerr <<
"di1: " << dinputs[1]->classname() <<
endl;
00364 dinputs.resizeRValue();
00365 cerr <<
"di0 = " << dinputs[0]->rvaluedata <<
endl;
00366
df =
Func(inputs, dinputs);
00367
df->fproppath =
propagationPath(
fproppath.
parents() & (
VarArray)output->g, dinputs);
00368
fproppath.
clearSymbolicGradient();
00369 }
00370
return df;
00371 }
00372
00373 Vec Function::operator()(
const Vec& input)
const
00374
{
00375
Vec output(
outputsize);
00376
fprop(input,output);
00377
return output;
00378 }
00379
00380
00381
00382 VarArray Function::operator()(
const VarArray& new_inputs)
const
00383
{
00384
CopiesMap copies;
00385
00386
00387
for(
int i=0; i<
inputs.
size(); i++)
00388 {
00389
if(new_inputs[i]->
length()!=
inputs[i]->
length() || new_inputs[i]->width()!=
inputs[i]->width())
00390
PLERROR(
"In Function::operator()(const VarArray& new_inputs) dimensions of variables in new_inputs and inputs do not match");
00391 copies[(
Variable*)
inputs[i]] = (
Variable*)new_inputs[i];
00392
if (!new_inputs[i]->nameIsSet() &&
inputs[i]->nameIsSet())
00393 new_inputs[i]->setName(
inputs[i]->getName());
00394 }
00395
00396
00397
00398
00399
00400
VarArray parofpath =
nonInputParentsOfPath(
inputs,
outputs);
00401
for(
int i=0; i<parofpath.
size(); i++)
00402 copies[(
Variable*)parofpath[i]] = (
Variable*)parofpath[i];
00403
00404
00405
VarArray new_outputs =
outputs;
00406 new_outputs.
makeDeepCopyFromShallowCopy(copies);
00407
00408
return new_outputs;
00409 }
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439 void Function::verifyHessian(
const Vec& input,
real step)
00440 {
00441
00442
00443
00444
00445
if(
outputsize!=1)
00446
PLERROR(
"In Function::verifyHessian(...) Can verify hessian only for output of size 1");
00447
real out1,out2,out3,out4;
00448
real doublestep = 2*step;
00449
Vec output(1);
00450
Vec gradient(
inputsize);
00451
Mat hessian(
inputsize,
inputsize);
00452
fbbprop(input, output, gradient, hessian);
00453 cerr <<
"** Verifying hessian computation **" <<
endl;
00454 cerr <<
"Input: " << input;
00455 cerr <<
"Output: " << output;
00456 cerr <<
"Computed hessian: " << hessian;
00457
00458
00459
00460
00461
00462
00463
Vec newinput1 = input.
copy();
00464
Vec newinput2 = input.copy();
00465
Vec newinput3 = input.copy();
00466
Vec newinput4 = input.copy();
00467
Mat finitediffhessian(
inputsize,
inputsize);
00468
Mat rel(
inputsize,
inputsize);
00469
double h,f;
00470
for(
int i=0; i<
inputsize; i++)
00471 {
00472
for(
int j=0; j<inputsize; j++)
00473 {
00474 newinput1[i] = newinput1[i]-step;
00475 newinput1[j] = newinput1[j]-step;
00476 newinput2[i] = newinput2[i]+step;
00477 newinput2[j] = newinput2[j]-step;
00478 newinput3[i] = newinput3[i]-step;
00479 newinput3[j] = newinput3[j]+step;
00480 newinput4[i] = newinput4[i]+step;
00481 newinput4[j] = newinput4[j]+step;
00482
fprop(newinput1,output);
00483 out1 = output[0];
00484
fprop(newinput2,output);
00485 out2 = output[0];
00486
fprop(newinput3,output);
00487 out3 = output[0];
00488
fprop(newinput4,output);
00489 out4 = output[0];
00490 finitediffhessian(i,j) = ((out4-out3)/doublestep-(out2-out1)/doublestep)/doublestep;
00491 newinput1[i] = input[i];
00492 newinput1[j] = input[j];
00493 newinput2[i] = input[i];
00494 newinput2[j] = input[j];
00495 newinput3[i] = input[i];
00496 newinput3[j] = input[j];
00497 newinput4[i] = input[i];
00498 newinput4[j] = input[j];
00499 }
00500 }
00501 cerr <<
"Estimated hessian: " << finitediffhessian;
00502 cerr <<
"-------------------" <<
endl;
00503
for (
int i=0; i<inputsize; i++)
00504 {
00505
for(
int j=0; j<inputsize; j++)
00506 {
00507 h = hessian(i,j);
00508 f = finitediffhessian(i,j);
00509 rel(i,j) = 2*fabs(h-f)/(fabs(h)+fabs(f));
00510 }
00511 }
00512 cerr <<
"relative difference: " << rel <<
endl;
00513 cerr <<
"-------------------" <<
endl;
00514 cerr <<
"max relative difference: " <<
max(rel) <<
endl;
00515 }
00516
00517
00518
00519 void Function::verifyGradient(
const Vec& input,
real step)
00520 {
00521
if(
outputsize!=1)
00522
PLWARNING(
"In Function::verifyGradient(...) Will verify gradient only for the first output");
00523
Vec output(
outputsize);
00524
Vec output_gradient(
outputsize);
00525 output_gradient[0]=1.0;
00526
Vec gradient(
inputsize);
00527
fbprop(input, output, gradient,output_gradient);
00528 cerr <<
"** Verifying gradient computation **" <<
endl;
00529 cerr <<
"Input: " << input <<
endl;
00530 cerr <<
"Output: " << output[0] <<
endl;
00531 cerr <<
"Computed gradient: " << gradient <<
endl;
00532
00533
00534
Vec newinput = input.
copy();
00535
Vec finitediffgradient(
inputsize);
00536
double doublestep = step+step;
00537
for(
int i=0; i<
inputsize; i++)
00538 {
00539
real in = input[i];
00540 newinput[i] = in+step;
00541
fprop(newinput,output);
00542
real out1 = output[0];
00543 newinput[i] = in-step;
00544
fprop(newinput,output);
00545
real out2 = output[0];
00546 finitediffgradient[i] = (out1-out2)/doublestep;
00547 newinput[i] = input[i] = in;
00548 }
00549
00550
fprop(newinput,output);
00551 cerr <<
"Estimated gradient: " << finitediffgradient;
00552 cerr <<
"-------------------" <<
endl;
00553
00554 cerr <<
"relative difference: " <<
00555
apply(gradient - finitediffgradient,
FABS)/
00556 (
real(0.5)*
apply(gradient + finitediffgradient,
FABS));
00557
00558 cerr <<
"-------------------" <<
endl;
00559 cerr <<
"max relative difference: " <<
00560
max(
apply(gradient - finitediffgradient,(
tRealFunc)
FABS)/
00561 (
real(0.5)*
apply(gradient + finitediffgradient,(
tRealFunc)
FABS)))
00562 <<
endl;
00563 cerr <<
"cos(angle) : " <<
dot(gradient,finitediffgradient)/(
norm(gradient)*
norm(finitediffgradient))
00564 <<
endl;
00565 cerr <<
"angle : " << acos(
dot(gradient,finitediffgradient)/(
norm(gradient)*
norm(finitediffgradient)))
00566 <<
endl;
00567 }
00568
00569 void Function::verifyGradient(
real minval,
real maxval,
real step)
00570 {
00571
Vec input(
inputsize);
00572
fill_random_uniform(input,minval, maxval);
00573
verifyGradient(input, step);
00574 }
00575
00576 void Function::verifyGradient(
real step)
00577 {
00578
Vec input(
inputsize);
00579
inputs >> input;
00580
verifyGradient(input, step);
00581 }
00582
00583 void Function::verifySymbolicGradient(
const Vec& in)
00584 {
00585
if(in.
length()!=
inputsize)
00586
PLERROR(
"In Function::verifySymbolicGradient(const Vec& in) in does not have the size that this function expects");
00587
Vec out(
outputsize);
00588
Vec output_gradient(
outputsize,1.0);
00589
Vec gradient1(
inputsize);
00590
fbprop(in,out,gradient1,output_gradient);
00591 cout <<
"Bprop computed gradient: " << gradient1 <<
endl;
00592
00593
displayFunction(
this,
true,
false);
00594
00595
Func df =
differentiate();
00596
00597
Vec gradient2 = df(in);
00598
displayFunction(df,
true,
false);
00599 cout <<
"Symbolically computed gradient: " << gradient2 <<
endl;
00600 }
00601
00602 void Function::verifyrfprop(
const Vec& in,
real step)
00603 {
00604
00605
00606
Vec gradient(
inputsize);
00607
Vec rfpropRgradient(
inputsize);
00608
Vec fbbRgradient(
inputsize);
00609
Mat hessian(
inputsize,
inputsize);
00610
Vec rel(
inputsize);
00611
Vec out(
outputsize);
00612
real b,r;
00613
00614
if(
df==0)
00615
df =
differentiate();
00616
00617
fbbprop(in, out, gradient, hessian);
00618 fbbRgradient =
transposeProduct(hessian, gradient);
00619
00620
df->inputs.copyRValueFrom(gradient);
00621
df->fproppath.rfprop();
00622
df->outputs.copyRValueTo(rfpropRgradient);
00623
00624
for (
int i=0; i<
inputsize; i++)
00625 {
00626 b = fbbRgradient[i];
00627 r = rfpropRgradient[i];
00628
if (b==0 && r==0)
00629 rel[i] = 0.0;
00630
else rel[i] = fabs(b-r)/(fabs(b)+fabs(r));
00631 }
00632 cerr <<
"max relative difference of H*g between rfprop and fbbprop: " <<
max(rel) <<
endl;
00633
00634
00635 }
00636
00637
template <>
00638 void deepCopyField(
Func& field, CopiesMap& copies)
00639 {
00640
if (field)
00641 field = static_cast<Function*>(field->deepCopy(copies));
00642 }
00643
00644
00645 }
00646