00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 2003 Olivier Delalleau 00005 // 00006 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 00036 00037 00038 /* ******************************************************* 00039 * $Id: ConjGradientOptimizer.h,v 1.35 2004/04/30 13:12:46 tihocan Exp $ 00040 * This file is part of the PLearn library. 00041 ******************************************************* */ 00042 00043 00044 #ifndef CONJGRADIENTOPTIMIZER_INC 00045 #define CONJGRADIENTOPTIMIZER_INC 00046 00047 #include "Optimizer.h" 00048 00049 namespace PLearn { 00050 using namespace std; 00051 00052 00053 /* 00054 * CLASS CONJGRADIENTOPTIMIZER 00055 * 00056 * An optimizer using the conjugate gradient method. 00057 * It can be configured to use various algorithms. 00058 * 00059 */ 00060 class ConjGradientOptimizer : public Optimizer { 00061 00062 typedef Optimizer inherited; 00063 00064 public: 00065 00066 // General options (also available through setOption) 00067 00069 int compute_cost; 00070 00075 int line_search_algo; 00082 int find_new_direction_formula; 00083 real starting_step_size; // initial step for line search 00084 real restart_coeff; // we restart when : 00085 // abs(g(n).g(n-1)) > restart_coeff*norm(g(n))^2 00086 00087 // GSearch specific options 00088 real epsilon; // gradient resolution 00089 00090 // FletcherSearch specific options 00091 real sigma; // in the constraint : abs(f'(m)) < -sigma.f'(0) 00092 real rho; // in the constraint : f(m) < f(0) + m.rho.f'(0) 00093 real fmax; // we stop if we reach cost <= fmax (usually fmax = 0) 00094 real stop_epsilon; // we stop when (a-alpha).f'(a) < stop_epsilon (Fletcher) 00095 real tau1, tau2, tau3; // bracketing parameters 00096 00097 // NewtonSearch specific options (see the options description) 00098 int max_steps; 00099 real initial_step; 00100 real low_enough; 00101 00102 protected: 00103 00104 Vec meancost; // used to store the cost, for display purpose 00105 00106 private: 00107 00108 // Internal data 00109 Vec current_opp_gradient; // current opposite gradient value 00110 Vec search_direction; // current search direction for the line search 00111 Vec tmp_storage; // used for temporary storage of data 00112 Vec delta; // temporary storage of the gradient 00113 real last_improvement; // cost improvement during the last iteration 00114 real last_cost; // last cost computed 00115 real current_step_size; // current step size for line search 00116 00117 public: 00118 00119 // Constructors and other usual stuff 00120 ConjGradientOptimizer( 00121 real the_starting_step_size=0.01, 00122 real the_restart_coeff = 0.2, 00123 real the_epsilon=0.01, 00124 real the_sigma=0.01, 00125 real the_rho=0.005, 00126 real the_fmax=-1e8, 00127 real the_stop_epsilon=0.0001, 00128 real the_tau1=9, 00129 real the_tau2=0.1, 00130 real the_tau3=0.5, 00131 int n_updates=1, const string& filename="", 00132 int every_iterations=1); 00133 00134 ConjGradientOptimizer( 00135 VarArray the_params, 00136 Var the_cost, 00137 real the_starting_step_size=0.01, 00138 real the_restart_coeff = 0.2, 00139 real the_epsilon=0.01, 00140 real the_sigma=0.01, 00141 real the_rho=0.005, 00142 real the_fmax=0, 00143 real the_stop_epsilon=0.0001, 00144 real the_tau1=9, 00145 real the_tau2=0.1, 00146 real the_tau3=0.5, 00147 int n_updates=1, const string& filename="", 00148 int every_iterations=1); 00149 00150 ConjGradientOptimizer( 00151 VarArray the_params, 00152 Var the_cost, 00153 VarArray the_update_for_measure, 00154 real the_starting_step_size=0.01, 00155 real the_restart_coeff = 0.2, 00156 real the_epsilon=0.01, 00157 real the_sigma=0.01, 00158 real the_rho=0.005, 00159 real the_fmax=0, 00160 real the_stop_epsilon=0.0001, 00161 real the_tau1=9, 00162 real the_tau2=0.1, 00163 real the_tau3=0.5, 00164 int n_updates=1, const string& filename="", 00165 int every_iterations=1); 00166 00167 PLEARN_DECLARE_OBJECT(ConjGradientOptimizer); 00168 00169 virtual void makeDeepCopyFromShallowCopy(CopiesMap& copies) { inherited::makeDeepCopyFromShallowCopy(copies); } 00170 00171 virtual void build() 00172 { 00173 inherited::build(); 00174 build_(); 00175 } 00176 00177 private: 00178 00179 void build_(); 00180 00181 public: 00182 00183 virtual real optimize(); // Deprecated, use optimizeN. 00184 virtual bool optimizeN(VecStatsCollector& stat_coll); 00185 00186 virtual void reset(); 00187 00188 protected: 00189 static void declareOptions(OptionList& ol); 00190 00191 virtual void printStep(ostream& ostr, int step, real mean_cost, string sep="\t") 00192 { ostr << step << sep << meancost << endl; } 00193 private: 00194 00196 bool findDirection(); 00197 00200 bool lineSearch(); 00201 00206 void updateSearchDirection(real gamma); 00207 00208 //----------------------- CONJUGATE GRADIENT FORMULAS --------------------- 00209 // 00210 // A Conjugate Gradient formula finds the new search direction, given 00211 // the current gradient, the previous one, and the current search direction. 00212 // It returns a constant gamma, which will be used in : 00213 // h(n) = -g(n) + gamma * h(n-1) 00214 00215 // The CONJPOMDP algorithm as described in 00216 // "Direct Gradient-Based Reinforcement Learning: 00217 // II. Gradient Ascent Algorithms and Experiments" 00218 // by J.Baxter, L. Weaver, P. Bartlett. 00219 // Actually this is almost the same as the Polak-Ribiere formula 00220 static real conjpomdp ( 00221 // The given grad function needs to compute the gradient 00222 // (or the opposite of the gradient if we need the minimum, as the 00223 // algorithm tries to find the maximum) 00224 void (*grad)(Optimizer*, const Vec& gradient), 00225 ConjGradientOptimizer* opt); 00226 00227 // The Dai-Yuan formula proposed in 00228 // "A nonlinear conjugate gradient method with a strong gloval convergence 00229 // property" by Dai, Yuan (1999) 00230 static real daiYuan ( 00231 void (*grad)(Optimizer*, const Vec&), 00232 ConjGradientOptimizer* opt); 00233 00234 // The Fletcher-Reeves formula used to find the new direction 00235 // h(n) = -g(n) + norm2(g(n)) / norm2(g(n-1)) * h(n-1) 00236 static real fletcherReeves ( 00237 void (*grad)(Optimizer*, const Vec&), 00238 ConjGradientOptimizer* opt); 00239 00240 // The Hestenes-Stiefel formula used to find the new direction 00241 // h(n) = -g(n) + dot(g(n), g(n)-g(n-1)) / dot(h(n-1), g(n)-g(n-1)) * h(n-1) 00242 static real hestenesStiefel ( 00243 void (*grad)(Optimizer*, const Vec&), 00244 ConjGradientOptimizer* opt); 00245 00246 // The Polak-Ribiere formula used to find the new direction 00247 // h(n) = -g(n) + dot(g(n), g(n)-g(n-1)) / norm2(g(n-1)) * h(n-1) 00248 static real polakRibiere ( 00249 void (*grad)(Optimizer*, const Vec&), 00250 ConjGradientOptimizer* opt); 00251 00252 //------------------------- LINE SEARCH ALGORITHMS ------------------------- 00253 // 00254 // A line search algorithm moves "params" to the value minimizing "cost", 00255 // when moving in the direction "search_direction". 00256 // It must not update "current_opp_gradient" (that is done in the Conjugate 00257 // Gradient formulas). 00258 // It must return the optimal step found to minimize the gradient. 00259 00260 // The GSearch algorithm as described in 00261 // "Direct Gradient-Based Reinforcement Learning: 00262 // II. Gradient Ascent Algorithms and Experiments" 00263 // by J.Baxter, L. Weaver, P. Bartlett. 00264 real gSearch(void (*grad)(Optimizer*, const Vec&)); 00265 00266 // The line search algorithm described in 00267 // "Practical Methods of Optimization, 2nd Ed", by Fletcher (1987) 00268 // (this function actually just calls fletcherSearchMain) 00269 real fletcherSearch(real mu = FLT_MAX); 00270 00271 // To be used in the cost is quadratic in the search direction : the minimum 00272 // will then be found much faster (hopefully) 00273 real newtonSearch( 00274 int max_steps, 00275 real initial_step, 00276 real low_enough); 00277 00278 //--------------------------- UTILITY FUNCTIONS ---------------------------- 00279 00280 private: 00281 00282 // Return cost->value() after an update of params with step size alpha 00283 // in the current search direction 00284 // ie : f(x) = cost(params + x*search_direction) in x = alpha 00285 static real computeCostValue(real alpha, ConjGradientOptimizer* opt); 00286 00287 // Return the derivative of the function 00288 // f(x) = cost(params + x*search_direction) 00289 // in x = alpha 00290 static real computeDerivative(real alpha, ConjGradientOptimizer* opt); 00291 00292 // Same as the two functions above combined. 00293 // The result is returned in the cost and derivative parameters. 00294 static void computeCostAndDerivative( 00295 real alpha, ConjGradientOptimizer*opt, real& cost, real& derivative); 00296 00297 // Put in a, b, c, d the coefficients of the cubic interpolation 00298 // given values of f and g=df/dx in 2 points (0 and 1) 00299 static void cubicInterpol( 00300 real f0, real f1, real g0, real g1, 00301 real& a, real& b, real& c, real& d); 00302 00303 00304 // The main function for the Dai-Yuan formula 00305 // (see the conjugate gradient formulas) 00306 static real daiYuanMain ( 00307 Vec new_gradient, 00308 Vec old_gradient, 00309 Vec old_search_direction, 00310 Vec tmp_storage); 00311 00312 // Find the minimum of the cubic interpolation of function f, with g=df/dx, 00313 // in the interval [mini, maxi], given values at points p1 and p2 00314 static real findMinWithCubicInterpol ( 00315 real p1, 00316 real p2, 00317 real mini, 00318 real maxi, 00319 real f0, 00320 real f1, 00321 real g0, 00322 real g1); 00323 00324 // Find the minimum of the quadratic interpolation of the function, given the 00325 // n first values c[i] and derivatives g[i] at the points x[i] 00326 static real findMinWithQuadInterpol( 00327 int q, real sum_x, real sum_x_2, real sum_x_3, real sum_x_4, 00328 real sum_c_x_2, real sum_g_x, real sum_c_x, real sum_c, real sum_g); 00329 00330 // The main function for Fletcher's line search algorithm 00331 // We keep all the parameters, so that it can be used separately 00332 // (without a real ConjGradientOptimizer object) 00333 static real fletcherSearchMain ( 00334 real (*f)(real, ConjGradientOptimizer* opt), 00335 real (*g)(real, ConjGradientOptimizer* opt), 00336 ConjGradientOptimizer* opt, 00337 real sigma, 00338 real rho, 00339 real fmax, 00340 real epsilon, 00341 real tau1 = 9, 00342 real tau2 = 0.1, 00343 real tau3 = 0.5, 00344 real alpha1 = FLT_MAX, // if FLT_MAX, then let the algo find a value 00345 real mu = FLT_MAX); // same remark as alpha1 00346 00347 // Find the minimum of the cubic a.x^3 + b.x^2 + c.x 00348 // in the range [mini, maxi] 00349 static real minCubic( 00350 real a, real b, real c, 00351 real mini = -FLT_MAX, real maxi = FLT_MAX); 00352 00353 // Find the minimum of the function a.x^2 + b.x 00354 // in the range [mini, maxi] 00355 static real minQuadratic( 00356 real a, real b, 00357 real mini = -FLT_MAX, real maxi = FLT_MAX); 00358 00359 // Put in a, b, c the coefficients of the quadratic interpolation 00360 // given values of f in 2 points (0 and 1) and g=df/dx in 0 00361 static void quadraticInterpol( 00362 real f0, real f1, real g0, 00363 real& a, real& b, real& c); 00364 00365 }; 00366 00367 } // end of namespace PLearn 00368 00369 #endif