00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00047
#ifndef GRADIENTOPTIMIZER_INC
00048
#define GRADIENTOPTIMIZER_INC
00049
00050
#include "Optimizer.h"
00051
00052
namespace PLearn {
00053
using namespace std;
00054
00055
00056 class GradientOptimizer :
public Optimizer
00057 {
00058 typedef Optimizer inherited;
00059
00060
00062
00063
00064
protected:
00065
public:
00068 real learning_rate;
00069
00070
00071 real start_learning_rate;
00072 real decrease_constant;
00073
00074
00075
00076
00077
00078 Mat lr_schedule;
00079
00080
GradientOptimizer(
real the_start_learning_rate=0.01,
00081
real the_decrease_constant=0,
00082
int n_updates=1,
const string& filename=
"",
00083
int every_iterations=1);
00084
GradientOptimizer(
VarArray the_params,
Var the_cost,
00085
real the_start_learning_rate=0.01,
00086
real the_decrease_constant=0,
00087
int n_updates=1,
const string& filename=
"",
00088
int every_iterations=1);
00089
GradientOptimizer(
VarArray the_params,
Var the_cost,
00090
VarArray update_for_measure,
00091
real the_start_learning_rate=0.01,
00092
real the_decrease_constant=0,
00093
int n_updates=1,
const string& filename=
"",
00094
int every_iterations=1);
00095
00096
00097
PLEARN_DECLARE_OBJECT(
GradientOptimizer);
00098 virtual void makeDeepCopyFromShallowCopy(
CopiesMap& copies) { inherited::makeDeepCopyFromShallowCopy(copies); }
00099
00100 virtual void build()
00101 {
00102 inherited::build();
00103
build_();
00104 }
00105
private:
00106 void build_()
00107 {}
00108
00109
public:
00110
00111
00112
00113
virtual real optimize();
00114
virtual bool optimizeN(
VecStatsCollector& stats_coll);
00115
00116
protected:
00117
static void declareOptions(
OptionList& ol);
00118 };
00119
00120
DECLARE_OBJECT_PTR(GradientOptimizer);
00121
00122 class ScaledGradientOptimizer :
public Optimizer
00123 {
00124
protected:
00125 Vec gradient;
00127 real short_time_mac;
00129 real long_time_mac;
00131 Vec short_time_ma;
00133 Vec long_time_ma;
00135 Vec long_time_mv;
00137 Vec long_time_md;
00138
00139
public:
00142 real start_learning_rate;
00143 real decrease_constant;
00144 real init_learning_rate;
00145 real learning_rate;
00146 Vec eps_scale;
00147
00148
protected:
00150 real regularizer;
00151
00152
public:
00154 ScaledGradientOptimizer(
VarArray the_params,
Var the_cost,
00155
real the_start_learning_rate=0.01,
00156
real the_decrease_constant=0.01,
00157
real the_init_learning_rate=0.003,
00158
int n_updates=1,
00159
real short_time_moving_avg_coef=0.01,
00160
real long_time_moving_avg_coef=0.001,
00161
real the_regularizer=1.0,
00162
const string& filename=
"",
00163
int every_iterations=1)
00164 :
Optimizer(the_params,the_cost, n_updates, filename, every_iterations),
00165
gradient(the_params.nelems()),
00166
short_time_mac(short_time_moving_avg_coef),
00167
long_time_mac(long_time_moving_avg_coef),
00168
short_time_ma(the_params.nelems()),
00169
long_time_ma(the_params.nelems()),
00170
long_time_mv(the_params.nelems()),
00171
long_time_md(the_params.nelems()),
00172
start_learning_rate(the_start_learning_rate),
00173
decrease_constant(the_decrease_constant),
00174
init_learning_rate(the_init_learning_rate),
00175
eps_scale(the_params.nelems()),
00176
regularizer(the_regularizer) {}
00177
00178
00179 ScaledGradientOptimizer(
VarArray the_params,
Var the_cost,
00180
real the_start_learning_rate=0.01,
00181
real the_decrease_constant=0.01,
00182
int n_updates=1,
00183
real short_time_moving_avg_coef=0.01,
00184
real long_time_moving_avg_coef=0.001,
00185
real the_regularizer=1.0,
00186
const string& filename=
"",
00187
int every_iterations=1)
00188 :
Optimizer(the_params,the_cost,
00189 n_updates, filename, every_iterations),
00190
gradient(the_params.nelems()),
00191
short_time_mac(short_time_moving_avg_coef),
00192
long_time_mac(long_time_moving_avg_coef),
00193
short_time_ma(the_params.nelems()),
00194
long_time_ma(the_params.nelems()),
00195
long_time_mv(the_params.nelems()),
00196
long_time_md(the_params.nelems()),
00197
start_learning_rate(the_start_learning_rate),
00198
decrease_constant(the_decrease_constant),
00199
eps_scale(the_params.nelems()),
00200
regularizer(the_regularizer)
00201 {
00202
eps_scale.
fill(1.0);
00203 }
00204
00205
virtual real optimize();
00206 };
00207
00208
00209
00210 }
00211
00212
#endif
00213