PLearn: Func.cc Source File

00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2002 Pascal Vincent, Yoshua Bengio and University of Montreal 00006 // 00007 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 00038 00039 /* ******************************************************* 00040 * $Id: Func.cc,v 1.19 2004/07/21 16:30:54 chrish42 Exp $ 00041 * This file is part of the PLearn library. 00042 ******************************************************* */ 00043 00044 #include "Func.h" 00045 #include <plearn/math/random.h> 00046 #include <plearn/math/TMat_maths.h> 00047 #include "Var.h" 00048 #include "Var_operators.h" 00049 #include "TimesConstantVariable.h" 00050 #include <plearn/display/DisplayUtils.h> 00051 00052 namespace PLearn { 00053 using namespace std; 00054 00057 Func::Func() 00058 {} 00059 00060 Func::Func(Function* f) 00061 :PP<Function>(f) 00062 {} 00063 00064 Func::Func(const VarArray& the_inputs, const VarArray& parameters_to_optimize,const VarArray& the_outputs) 00065 :PP<Function>(new Function(the_inputs,parameters_to_optimize,the_outputs)) 00066 {} 00067 Func::Func(const VarArray& the_inputs, const VarArray& the_outputs) 00068 :PP<Function>(new Function(the_inputs, the_outputs)) 00069 {} 00070 00071 /*void Func::bprop(VarArray& parameters_to_optimize) 00072 { 00073 ptr->bprop(parameters_to_optimize); 00074 } 00075 */ 00076 00077 Vec Func::operator()(const Vec& input) const 00078 { return ptr->operator()(input); } 00079 00080 real Func::operator()(const Vec& input1, const Vec& input2) const 00081 { return ptr->operator()(input1, input2); } 00082 00083 VarArray Func::operator()(const VarArray& new_inputs) const 00084 { return ptr->operator()(new_inputs); } 00085 00086 Func operator/(Func f, real value) 00087 { 00088 if(value==1.0) 00089 return f; 00090 else 00091 { 00092 int nouts = f->outputs.size(); 00093 VarArray outs(nouts); 00094 for(int i=0; i<nouts; i++) 00095 outs[i] = f->outputs[i]/value; 00096 return Func(f->inputs, outs); 00097 } 00098 } 00099 00100 00103 Function::Function() 00104 :inputsize(-1), outputsize(-1) 00105 {} 00106 00107 00108 Function::Function(const VarArray& the_inputs, const VarArray& the_outputs) 00109 :inputs(the_inputs), outputs(the_outputs) 00110 { 00111 build_(); 00112 } 00113 00114 Function::Function(const VarArray& the_inputs, const VarArray& parameters_to_optimize,const VarArray& the_outputs) 00115 : inputs(the_inputs), parameters(parameters_to_optimize), outputs(the_outputs) 00116 { 00117 build_(); 00118 } 00119 00120 /*void Function::bprop(VarArray& parameters_to_optimize) 00121 { 00122 //bproppath = propagationPath(inputs, parameters_to_optimize,outputs); 00123 } 00124 */ 00125 00126 PLEARN_IMPLEMENT_OBJECT(Function, "Implements a function defined as a var graph", "NO HELP"); 00127 00128 void Function::declareOptions(OptionList& ol) 00129 { 00130 declareOption(ol, "inputs", &Function::inputs, OptionBase::buildoption, 00131 "The list of input variabes of this function"); 00132 declareOption(ol, "parameters", &Function::parameters, OptionBase::buildoption, 00133 "The list of parameters to optimize"); 00134 declareOption(ol, "outputs", &Function::outputs, OptionBase::buildoption, 00135 "The list of output variables of this function"); 00136 00137 // Now call the parent class' declareOptions 00138 inherited::declareOptions(ol); 00139 } 00140 00141 void Function::build_() 00142 { 00143 if(parameters.isEmpty()) 00144 parameters = nonInputSources(inputs, outputs); 00145 00146 inputsize = inputs.nelems(); 00147 outputsize = outputs.nelems(); 00148 00149 fproppath = propagationPath(inputs, outputs); 00150 if (fproppath.length()==0) // to handle the weird case in which there is no path from inputs to outputs 00151 // but outputs still depends on parameters and we want to represent that dependency 00152 { 00153 fproppath = propagationPath(inputs & parameters, outputs); 00154 bproppath = propagationPath(inputs & parameters, outputs); 00155 } 00156 else 00157 bproppath = propagationPath(inputs, outputs); 00158 00159 parentspath = propagationPathToParentsOfPath(inputs, outputs); 00160 00161 //parameters_to_optimize.printNames(); 00162 //cout<<"**************Func::printInfo(inputs, outputs);"<<endl; 00163 //printInfo(inputs, outputs); 00164 //cout<<"**************Func::printInfo(parameters_to_optimize, outputs);"<<endl; 00165 //printInfo(parameters_to_optimize,outputs); 00166 //displayVarGraph(fproppath,true, 333, "ffpp", false); 00167 //displayVarGraph(bproppath,true, 333, "fbpp", false); 00168 00169 00170 // Let's see if getting everything in a single chunk of memory will improve efficiency... 00171 // Hmm, doesn't seem to. 00172 /* 00173 VarArray criticalvars = the_inputs & fproppath; 00174 int n = criticalvars.nelems(); 00175 Vec data(2*n); 00176 criticalvars.makeSharedValue(data); 00177 criticalvars.makeSharedGradient(data,n); 00178 */ 00179 } 00180 00181 void Function::build() 00182 { 00183 inherited::build(); 00184 build_(); 00185 } 00186 00187 void Function::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00188 { 00189 Object::makeDeepCopyFromShallowCopy(copies); 00190 deepCopyField(inputs, copies); 00191 deepCopyField(outputs, copies); 00192 deepCopyField(fproppath, copies); 00193 deepCopyField(bproppath, copies); 00194 deepCopyField(parentspath, copies); 00195 deepCopyField(df, copies); 00196 deepCopyField(parameters, copies); 00197 } 00198 00199 void Function::fprop(const Vec& in, const Vec& out) const 00200 { 00201 inputs << in; 00202 fproppath.fprop(); 00203 outputs >> out; 00204 } 00205 00206 void Function::fprop(const Array<Vec>& in, const Array<Vec>& out) const 00207 { 00208 inputs << in; 00209 fproppath.fprop(); 00210 outputs >> out; 00211 } 00212 00213 real Function::operator()(const Vec& input1, const Vec& input2) const 00214 { 00215 if(inputs.size()!=2 || outputsize!=1) 00216 PLERROR("You can only call real Function::operator()(const Vec& input1, const Vec& input2) for a function that has 2 input Vars and a single scalar output Var"); 00217 inputs[0]->copyFrom(input1); 00218 inputs[1]->copyFrom(input2); 00219 fproppath.fprop(); 00220 return outputs[0]->value[0]; 00221 } 00222 00223 void Function::fbprop(const Vec& in, const Vec& out, const Vec& input_gradient, const Vec& output_gradient) 00224 { 00225 inputs << in; 00226 inputs.clearGradient(); 00227 fproppath.clearGradient(); 00228 outputs.copyGradientFrom(output_gradient); 00229 fproppath.fbprop(); 00230 outputs >> out; 00231 inputs.copyGradientTo(input_gradient); 00232 00233 #ifdef BOUNDCHECK 00234 if (out.hasMissing()) 00235 PLERROR("Function::fbprop: detected MISSING_VALUE in function output!"); 00236 00237 static bool displayvargraph=false; 00238 if (displayvargraph) 00239 displayVarGraph(outputs,true); 00240 #endif 00241 } 00242 00243 void Function::fbprop(const Array<Vec>& in, const Array<Vec>& out, const Array<Vec>& input_gradient, const Array<Vec>& output_gradient) 00244 { 00245 inputs << in; 00246 inputs.clearGradient(); 00247 fproppath.clearGradient(); 00248 outputs.copyGradientFrom(output_gradient); 00249 fproppath.fbprop(); 00250 outputs >> out; 00251 inputs.copyGradientTo(input_gradient); 00252 00253 #ifdef BOUNDCHECK 00254 if (out.hasMissing()) 00255 PLERROR("Function::fbprop: detected MISSING_VALUE in function output!"); 00256 #endif 00257 } 00258 00259 void Function::fbbprop(const Vec& in, const Vec& output, const Vec& gradient, const Mat& hessian) 00260 { 00261 if(df==0) 00262 df = differentiate(); 00263 00264 inputs << in; // inputs and df->inputs are supposed to be the same... 00265 fproppath.fprop(); 00266 outputs >> output; 00267 df->fproppath.fprop(); 00268 df->outputs >> gradient; 00269 00270 df->outputs.clearGradient(); 00271 int pos = 0; 00272 for(int varnum=0; varnum<df->outputs.size(); varnum++) 00273 { 00274 Var& outputvar = df->outputs[varnum]; 00275 for(int i=0; i<outputvar->nelems(); i++) 00276 { 00277 df->inputs.clearGradient(); 00278 df->bproppath.clearGradient(); 00279 outputvar->gradient[i] = 1.0; 00280 df->bproppath.bprop(); 00281 Vec hessian_row = hessian(pos++); 00282 df->inputs.copyGradientTo(hessian_row); 00283 outputvar->gradient[i] = 0.0; 00284 } 00285 } 00286 } 00287 00288 void Function::fbbpropAcc(const Vec& in, const Vec& output, const Vec& gradient, const Mat& hessian) 00289 { 00290 if(df==0) 00291 df = differentiate(); 00292 00293 inputs << in; // inputs and df->inputs are supposed to be the same... 00294 fproppath.fprop(); 00295 outputs.accumulateTo(output); 00296 df->fproppath.fprop(); 00297 df->outputs.accumulateTo(gradient); 00298 00299 df->outputs.clearGradient(); 00300 int pos = 0; 00301 for(int varnum=0; varnum<df->outputs.size(); varnum++) 00302 { 00303 Var& outputvar = df->outputs[varnum]; 00304 for(int i=0; i<outputvar->nelems(); i++) 00305 { 00306 df->inputs.clearGradient(); 00307 df->bproppath.clearGradient(); 00308 outputvar->gradient[i] = 1.0; 00309 df->bproppath.bprop(); 00310 Vec hessian_row = hessian(pos++); 00311 df->inputs.accumulateGradientTo(hessian_row); 00312 outputvar->gradient[i] = 0.0; 00313 } 00314 } 00315 } 00316 00317 void Function::rfprop(const Vec& in, const Vec& out, const Vec& input_rvalue, const Vec& output_rvalue, bool do_fprop) 00318 { 00319 if (do_fprop) fprop(in,out); 00320 00321 inputs.copyRValueFrom(input_rvalue); 00322 fproppath.rfprop(); 00323 outputs.copyRValueTo(output_rvalue); 00324 } 00325 00326 void Function::recomputeParents() 00327 { parentspath.fprop(); } 00328 00329 Func Function::differentiate() 00330 { 00331 if (outputs.size()>1) 00332 PLERROR("In Function::differentiate cannot differentiate function with more than one output variable"); 00333 Var output = outputs[0]; 00334 if(df==0) 00335 { 00336 output->g = Var(1,"output->g"); 00337 output->g = 1.0; // fill gradient 00338 fproppath.symbolicBprop(); 00339 // Give the symbolic gradient vars reasonable names 00340 for(int i=0; i<fproppath.size(); i++) 00341 { 00342 if(!fproppath[i]->g) 00343 { 00344 string name = "gr_" + fproppath[i]->getName(); 00345 fproppath[i]->g->setName(name); 00346 } 00347 } 00348 for(int i=0; i<inputs.size(); i++) 00349 { 00350 if(inputs[i]->g.isNull()) // must create it, even though it will remain 0 00351 inputs[i]->g = Var(inputs[i]->length(), inputs[i]->width()); 00352 string name = "gr_" + inputs[i]->getName(); 00353 inputs[i]->g->setName(name); 00354 } 00355 VarArray dinputs = inputs.symbolicGradient(); 00356 // Sanity check: 00357 if(dinputs.nelems() != inputs.nelems()) 00358 PLERROR("Problem in Function::differentiate() please send a bug report to vincentp@iro.umontreal.ca"); 00359 00360 cerr << "i0: " << inputs[0]->classname() << endl; 00361 cerr << "i1: " << inputs[1]->classname() << endl; 00362 cerr << "di0: " << dinputs[0]->classname() << endl; 00363 cerr << "di1: " << dinputs[1]->classname() << endl; 00364 dinputs.resizeRValue(); 00365 cerr << "di0 = " << dinputs[0]->rvaluedata << endl; 00366 df = Func(inputs, dinputs); 00367 df->fproppath = propagationPath(fproppath.parents() & (VarArray)output->g, dinputs); 00368 fproppath.clearSymbolicGradient(); 00369 } 00370 return df; 00371 } 00372 00373 Vec Function::operator()(const Vec& input) const 00374 { 00375 Vec output(outputsize); 00376 fprop(input,output); 00377 return output; 00378 } 00379 00380 // new version that uses the new deepCopy system 00381 00382 VarArray Function::operator()(const VarArray& new_inputs) const 00383 { 00384 CopiesMap copies; 00385 00386 // make sure the clones of the old inputs are the new inputs 00387 for(int i=0; i<inputs.size(); i++) 00388 { 00389 if(new_inputs[i]->length()!=inputs[i]->length() || new_inputs[i]->width()!=inputs[i]->width()) 00390 PLERROR("In Function::operator()(const VarArray& new_inputs) dimensions of variables in new_inputs and inputs do not match"); 00391 copies[(Variable*)inputs[i]] = (Variable*)new_inputs[i]; 00392 if (!new_inputs[i]->nameIsSet() && inputs[i]->nameIsSet()) 00393 new_inputs[i]->setName(inputs[i]->getName()); 00394 } 00395 00396 // make sure that only the vars on the direct path from inputs to outputs 00397 // get cloned but the clones should have the same parents as the 00398 // originals so that gradients can be accumulated in these originals and 00399 // then back propagated to shared sources. 00400 VarArray parofpath = nonInputParentsOfPath(inputs, outputs); 00401 for(int i=0; i<parofpath.size(); i++) 00402 copies[(Variable*)parofpath[i]] = (Variable*)parofpath[i]; 00403 00404 // do the deep copying 00405 VarArray new_outputs = outputs; 00406 new_outputs.makeDeepCopyFromShallowCopy(copies); 00407 00408 return new_outputs; 00409 } 00410 00411 00412 // Old Version that uses the old clone system 00413 /* 00414 VarArray Function::operator()(const VarArray& new_inputs) const 00415 { 00416 for(int i=0; i<inputs.size(); i++) 00417 { 00418 if(new_inputs[i]->length()!=inputs[i]->length() || new_inputs[i]->width()!=inputs[i]->width()) 00419 PLERROR("In Function::operator()(const VarArray& new_inputs) dimensions of variables in new_inputs and inputs do not match"); 00420 inputs[i]->clone_ = new_inputs[i]; 00421 } 00422 00423 VarArray clones(fproppath.size()); 00424 for(int i=0; i<fproppath.size(); i++) 00425 clones[i] = fproppath[i]->clone(); 00426 00427 VarArray new_outputs(outputs.size()); 00428 for(int i=0; i<outputs.size(); i++) 00429 new_outputs[i] = outputs[i]->clone(); 00430 00431 inputs.clearClone(); 00432 fproppath.clearClone(); 00433 outputs.clearClone(); 00434 00435 return new_outputs; 00436 } 00437 */ 00438 00439 void Function::verifyHessian(const Vec& input, real step) 00440 { 00441 // Job a Charles... 00442 // Note: L'utilisation de l'option -DUSEDOUBLE dans le Makefile_option 00443 // permet d'eviter certains problemes numeriques d'approximation 00444 // et donc d'utiliser des valeurs de step plus petites 00445 if(outputsize!=1) 00446 PLERROR("In Function::verifyHessian(...) Can verify hessian only for output of size 1"); 00447 real out1,out2,out3,out4; 00448 real doublestep = 2*step; 00449 Vec output(1); 00450 Vec gradient(inputsize); 00451 Mat hessian(inputsize,inputsize); 00452 fbbprop(input, output, gradient, hessian); 00453 cerr << "** Verifying hessian computation **" << endl; 00454 cerr << "Input: " << input; 00455 cerr << "Output: " << output; 00456 cerr << "Computed hessian: " << hessian; 00457 // Now computing the gradient by finite difference 00458 // 00459 // f(x1+dx1,x2+dx2)-f(x1-dx1,x2+dx2)-f(x1+dx1,x2-dx2)+f(x1-dx1,x2-dx2) 00460 // ------------------------------------------------------------------ 00461 // 2 * dx1 * 2 * dx2 00462 // 00463 Vec newinput1 = input.copy(); 00464 Vec newinput2 = input.copy(); 00465 Vec newinput3 = input.copy(); 00466 Vec newinput4 = input.copy(); 00467 Mat finitediffhessian(inputsize,inputsize); 00468 Mat rel(inputsize,inputsize); 00469 double h,f; 00470 for(int i=0; i<inputsize; i++) 00471 { 00472 for(int j=0; j<inputsize; j++) 00473 { 00474 newinput1[i] = newinput1[i]-step; 00475 newinput1[j] = newinput1[j]-step; 00476 newinput2[i] = newinput2[i]+step; 00477 newinput2[j] = newinput2[j]-step; 00478 newinput3[i] = newinput3[i]-step; 00479 newinput3[j] = newinput3[j]+step; 00480 newinput4[i] = newinput4[i]+step; 00481 newinput4[j] = newinput4[j]+step; 00482 fprop(newinput1,output); 00483 out1 = output[0]; 00484 fprop(newinput2,output); 00485 out2 = output[0]; 00486 fprop(newinput3,output); 00487 out3 = output[0]; 00488 fprop(newinput4,output); 00489 out4 = output[0]; 00490 finitediffhessian(i,j) = ((out4-out3)/doublestep-(out2-out1)/doublestep)/doublestep; 00491 newinput1[i] = input[i]; 00492 newinput1[j] = input[j]; 00493 newinput2[i] = input[i]; 00494 newinput2[j] = input[j]; 00495 newinput3[i] = input[i]; 00496 newinput3[j] = input[j]; 00497 newinput4[i] = input[i]; 00498 newinput4[j] = input[j]; 00499 } 00500 } 00501 cerr << "Estimated hessian: " << finitediffhessian; 00502 cerr << "-------------------" << endl; 00503 for (int i=0; i<inputsize; i++) 00504 { 00505 for(int j=0; j<inputsize; j++) 00506 { 00507 h = hessian(i,j); 00508 f = finitediffhessian(i,j); 00509 rel(i,j) = 2*fabs(h-f)/(fabs(h)+fabs(f)); 00510 } 00511 } 00512 cerr << "relative difference: " << rel << endl; 00513 cerr << "-------------------" << endl; 00514 cerr << "max relative difference: " << max(rel) << endl; 00515 } 00516 00517 00518 00519 void Function::verifyGradient(const Vec& input, real step) 00520 { 00521 if(outputsize!=1) 00522 PLWARNING("In Function::verifyGradient(...) Will verify gradient only for the first output"); 00523 Vec output(outputsize); 00524 Vec output_gradient(outputsize); 00525 output_gradient[0]=1.0; 00526 Vec gradient(inputsize); 00527 fbprop(input, output, gradient,output_gradient); 00528 cerr << "** Verifying gradient computation **" << endl; 00529 cerr << "Input: " << input << endl; 00530 cerr << "Output: " << output[0] << endl; 00531 cerr << "Computed gradient: " << gradient << endl; 00532 //displayFunction(this,true); 00533 // Now computing the gradient by finite difference 00534 Vec newinput = input.copy(); 00535 Vec finitediffgradient(inputsize); 00536 double doublestep = step+step; 00537 for(int i=0; i<inputsize; i++) 00538 { 00539 real in = input[i]; 00540 newinput[i] = in+step; 00541 fprop(newinput,output); 00542 real out1 = output[0]; 00543 newinput[i] = in-step; 00544 fprop(newinput,output); 00545 real out2 = output[0]; 00546 finitediffgradient[i] = (out1-out2)/doublestep; 00547 newinput[i] = input[i] = in; 00548 } 00549 // copy the original input into the VarArray 00550 fprop(newinput,output); 00551 cerr << "Estimated gradient: " << finitediffgradient; 00552 cerr << "-------------------" << endl; 00553 00554 cerr << "relative difference: " << 00555 apply(gradient - finitediffgradient,FABS)/ 00556 (real(0.5)*apply(gradient + finitediffgradient,FABS)); 00557 // apply(gradient - finitediffgradient,(tRealFunc)fabs)/(0.5*apply(gradient + finitediffgradient,(tRealFunc)fabs)); 00558 cerr << "-------------------" << endl; 00559 cerr << "max relative difference: " << 00560 max(apply(gradient - finitediffgradient,(tRealFunc)FABS)/ 00561 (real(0.5)*apply(gradient + finitediffgradient,(tRealFunc)FABS))) 00562 << endl; 00563 cerr << "cos(angle) : " << dot(gradient,finitediffgradient)/(norm(gradient)*norm(finitediffgradient)) 00564 << endl; 00565 cerr << "angle : " << acos(dot(gradient,finitediffgradient)/(norm(gradient)*norm(finitediffgradient))) 00566 << endl; 00567 } 00568 00569 void Function::verifyGradient(real minval, real maxval, real step) 00570 { 00571 Vec input(inputsize); 00572 fill_random_uniform(input,minval, maxval); 00573 verifyGradient(input, step); 00574 } 00575 00576 void Function::verifyGradient(real step) 00577 { 00578 Vec input(inputsize); 00579 inputs >> input; 00580 verifyGradient(input, step); 00581 } 00582 00583 void Function::verifySymbolicGradient(const Vec& in) 00584 { 00585 if(in.length()!=inputsize) 00586 PLERROR("In Function::verifySymbolicGradient(const Vec& in) in does not have the size that this function expects"); 00587 Vec out(outputsize); 00588 Vec output_gradient(outputsize,1.0); 00589 Vec gradient1(inputsize); 00590 fbprop(in,out,gradient1,output_gradient); 00591 cout << "Bprop computed gradient: " << gradient1 << endl; 00592 //cout << "Display f proppath" << endl; 00593 displayFunction(this, true, false); 00594 00595 Func df = differentiate(); 00596 //cout << "Display df proppath" << endl; 00597 Vec gradient2 = df(in); 00598 displayFunction(df, true, false); 00599 cout << "Symbolically computed gradient: " << gradient2 << endl; 00600 } 00601 00602 void Function::verifyrfprop(const Vec& in, real step) 00603 { 00604 //This is developed to make sure that the code of rfprop is correct. 00605 00606 Vec gradient(inputsize); 00607 Vec rfpropRgradient(inputsize); 00608 Vec fbbRgradient(inputsize); 00609 Mat hessian(inputsize,inputsize); 00610 Vec rel(inputsize); 00611 Vec out(outputsize); 00612 real b,r; 00613 00614 if(df==0) 00615 df = differentiate(); 00616 00617 fbbprop(in, out, gradient, hessian); 00618 fbbRgradient = transposeProduct(hessian, gradient); 00619 00620 df->inputs.copyRValueFrom(gradient); 00621 df->fproppath.rfprop(); 00622 df->outputs.copyRValueTo(rfpropRgradient); 00623 00624 for (int i=0; i<inputsize; i++) 00625 { 00626 b = fbbRgradient[i]; 00627 r = rfpropRgradient[i]; 00628 if (b==0 && r==0) 00629 rel[i] = 0.0; 00630 else rel[i] = fabs(b-r)/(fabs(b)+fabs(r)); 00631 } 00632 cerr << "max relative difference of H*g between rfprop and fbbprop: " << max(rel) << endl; 00633 //cerr << "max & min of rfprop rgradient: " << max(rfpropRgradient) << " " << min(rfpropRgradient) << endl; 00634 //cerr << "max & min of fbb rgradient: " << max(fbbRgradient) << " " << min(fbbRgradient) << endl; 00635 } 00636 00637 template <> 00638 void deepCopyField(Func& field, CopiesMap& copies) 00639 { 00640 if (field) 00641 field = static_cast<Function*>(field->deepCopy(copies)); 00642 } 00643 00644 00645 } // end of namespace PLearn 00646