00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00043
#include <plearn/var/AffineTransformVariable.h>
00044
#include <plearn/var/AffineTransformWeightPenalty.h>
00045
#include <plearn/var/BinaryClassificationLossVariable.h>
00046
#include <plearn/var/ClassificationLossVariable.h>
00047
#include <plearn/var/ConcatColumnsVariable.h>
00048
#include <plearn/var/CrossEntropyVariable.h>
00049
#include <plearn/var/ExpVariable.h>
00050
#include <plearn/var/LiftOutputVariable.h>
00051
#include <plearn/var/LogSoftmaxVariable.h>
00052
#include <plearn/var/MarginPerceptronCostVariable.h>
00053
#include <plearn/var/MulticlassLossVariable.h>
00054
#include <plearn/var/NegCrossEntropySigmoidVariable.h>
00055
#include <plearn/var/OneHotSquaredLoss.h>
00056
00057
#include <plearn/var/SigmoidVariable.h>
00058
#include <plearn/var/SoftmaxVariable.h>
00059
#include <plearn/var/SoftplusVariable.h>
00060
#include <plearn/var/SumVariable.h>
00061
#include <plearn/var/SumAbsVariable.h>
00062
#include <plearn/var/SumOfVariable.h>
00063
#include <plearn/var/SumSquareVariable.h>
00064
#include <plearn/var/TanhVariable.h>
00065
#include <plearn/var/TransposeProductVariable.h>
00066
#include <plearn/var/UnaryHardSlopeVariable.h>
00067
#include <plearn/var/Var_operators.h>
00068
#include <plearn/var/Var_utils.h>
00069
00070
#include <plearn/vmat/ConcatColumnsVMatrix.h>
00071
00072
00073
#include "NNet.h"
00074
#include <plearn/math/random.h>
00075
#include <plearn/vmat/SubVMatrix.h>
00076
00077
namespace PLearn {
00078
using namespace std;
00079
00080
PLEARN_IMPLEMENT_OBJECT(NNet,
"Ordinary Feedforward Neural Network with 1 or 2 hidden layers",
00081
"Neural network with many bells and whistles...");
00082
00083 NNet::NNet()
00084 :
00085 nhidden(0),
00086 nhidden2(0),
00087 noutputs(0),
00088 weight_decay(0),
00089 bias_decay(0),
00090 layer1_weight_decay(0),
00091 layer1_bias_decay(0),
00092 layer2_weight_decay(0),
00093 layer2_bias_decay(0),
00094 output_layer_weight_decay(0),
00095 output_layer_bias_decay(0),
00096 direct_in_to_out_weight_decay(0),
00097 classification_regularizer(0),
00098 margin(1),
00099 fixed_output_weights(0),
00100 rbf_layer_size(0),
00101 first_class_is_junk(1),
00102 L1_penalty(false),
00103 input_reconstruction_penalty(0),
00104 direct_in_to_out(false),
00105 output_transfer_func(""),
00106 hidden_transfer_func("
tanh"),
00107 interval_minval(0), interval_maxval(1),
00108 batch_size(1),
00109 initialization_method("normal_linear")
00110 {}
00111
00112 NNet::~NNet()
00113 {
00114 }
00115
00116 void NNet::declareOptions(
OptionList& ol)
00117 {
00118
declareOption(ol,
"nhidden", &NNet::nhidden, OptionBase::buildoption,
00119
" number of hidden units in first hidden layer (0 means no hidden layer)\n");
00120
00121
declareOption(ol,
"nhidden2", &NNet::nhidden2, OptionBase::buildoption,
00122
" number of hidden units in second hidden layer (0 means no hidden layer)\n");
00123
00124
declareOption(ol,
"noutputs", &NNet::noutputs, OptionBase::buildoption,
00125
" number of output units. This gives this learner its outputsize.\n"
00126
" It is typically of the same dimensionality as the target for regression problems \n"
00127
" But for classification problems where target is just the class number, noutputs is \n"
00128
" usually of dimensionality number of classes (as we want to output a score or probability \n"
00129
" vector, one per class)");
00130
00131
declareOption(ol,
"weight_decay", &NNet::weight_decay, OptionBase::buildoption,
00132
" global weight decay for all layers\n");
00133
00134
declareOption(ol,
"bias_decay", &NNet::bias_decay, OptionBase::buildoption,
00135
" global bias decay for all layers\n");
00136
00137
declareOption(ol,
"layer1_weight_decay", &NNet::layer1_weight_decay, OptionBase::buildoption,
00138
" Additional weight decay for the first hidden layer. Is added to weight_decay.\n");
00139
00140
declareOption(ol,
"layer1_bias_decay", &NNet::layer1_bias_decay, OptionBase::buildoption,
00141
" Additional bias decay for the first hidden layer. Is added to bias_decay.\n");
00142
00143
declareOption(ol,
"layer2_weight_decay", &NNet::layer2_weight_decay, OptionBase::buildoption,
00144
" Additional weight decay for the second hidden layer. Is added to weight_decay.\n");
00145
00146
declareOption(ol,
"layer2_bias_decay", &NNet::layer2_bias_decay, OptionBase::buildoption,
00147
" Additional bias decay for the second hidden layer. Is added to bias_decay.\n");
00148
00149
declareOption(ol,
"output_layer_weight_decay", &NNet::output_layer_weight_decay, OptionBase::buildoption,
00150
" Additional weight decay for the output layer. Is added to 'weight_decay'.\n");
00151
00152
declareOption(ol,
"output_layer_bias_decay", &NNet::output_layer_bias_decay, OptionBase::buildoption,
00153
" Additional bias decay for the output layer. Is added to 'bias_decay'.\n");
00154
00155
declareOption(ol,
"direct_in_to_out_weight_decay", &NNet::direct_in_to_out_weight_decay, OptionBase::buildoption,
00156
" Additional weight decay for the direct in-to-out layer. Is added to 'weight_decay'.\n");
00157
00158
declareOption(ol,
"L1_penalty", &NNet::L1_penalty, OptionBase::buildoption,
00159
" should we use L1 penalty instead of the default L2 penalty on the weights?\n");
00160
00161
declareOption(ol,
"fixed_output_weights", &NNet::fixed_output_weights, OptionBase::buildoption,
00162
" If true then the output weights are not learned. They are initialized to +1 or -1 randomly.\n");
00163
00164
declareOption(ol,
"input_reconstruction_penalty", &NNet::input_reconstruction_penalty, OptionBase::buildoption,
00165
" if >0 then a set of weights will be added from a hidden layer to predict (reconstruct) the inputs\n"
00166
" and the total loss will include an extra term that is the squared input reconstruction error,\n"
00167
" multiplied by the input_reconstruction_penalty factor.\n");
00168
00169
declareOption(ol,
"direct_in_to_out", &NNet::direct_in_to_out, OptionBase::buildoption,
00170
" should we include direct input to output connections?\n");
00171
00172
declareOption(ol,
"rbf_layer_size", &NNet::rbf_layer_size, OptionBase::buildoption,
00173
" If non-zero, add an extra layer which computes N(h(x);mu_i,sigma_i) (Gaussian density) for the\n"
00174
" i-th output unit with mu_i a free vector and sigma_i a free scalar, and h(x) the vector of\n"
00175
" activations of the 'representation' output, i.e. what would be the output layer otherwise. The\n"
00176
" given non-zero value is the number of these 'representation' outputs. Typically this\n"
00177
" makes sense for classification problems, with a softmax output_transfer_func. If the\n"
00178
" first_class_is_junk option is set then the first output (first class) does not get a\n"
00179
" Gaussian density but just a 'pseudo-uniform' density (the single free parameter is the\n"
00180
" value of that density) and in a softmax it makes sure that when h(x) is far from the\n"
00181
" centers mu_i for all the other classes then the last class gets the strongest posterior probability.\n");
00182
00183
declareOption(ol,
"first_class_is_junk", &NNet::first_class_is_junk, OptionBase::buildoption,
00184
" This option is used only when rbf_layer_size>0. If true then the first class is\n"
00185
" treated differently and gets a pre-transfer-function value that is a learned constant, whereas\n"
00186
" the others get a normal centered at mu_i.\n");
00187
00188
declareOption(ol,
"output_transfer_func", &NNet::output_transfer_func, OptionBase::buildoption,
00189
" what transfer function to use for ouput layer? \n"
00190
" one of: tanh, sigmoid, exp, softplus, softmax, log_softmax \n"
00191
" or interval(<minval>,<maxval>), which stands for\n"
00192
" <minval>+(<maxval>-<minval>)*sigmoid(.).\n"
00193
" An empty string or \"none\" means no output transfer function \n");
00194
00195
declareOption(ol,
"hidden_transfer_func", &NNet::hidden_transfer_func, OptionBase::buildoption,
00196
" what transfer function to use for hidden units? \n"
00197
" one of: linear, tanh, sigmoid, exp, softplus, softmax, log_softmax, hard_slope or symm_hard_slope\n");
00198
00199
declareOption(ol,
"cost_funcs", &NNet::cost_funcs, OptionBase::buildoption,
00200
" a list of cost functions to use\n"
00201
" in the form \"[ cf1; cf2; cf3; ... ]\" where each function is one of: \n"
00202
" mse (for regression)\n"
00203
" mse_onehot (for classification)\n"
00204
" NLL (negative log likelihood -log(p[c]) for classification) \n"
00205
" class_error (classification error) \n"
00206
" binary_class_error (classification error for a 0-1 binary classifier)\n"
00207
" multiclass_error\n"
00208
" cross_entropy (for binary classification)\n"
00209
" stable_cross_entropy (more accurate backprop and possible regularization, for binary classification)\n"
00210
" margin_perceptron_cost (a hard version of the cross_entropy, uses the 'margin' option)\n"
00211
" lift_output (not a real cost function, just the output for lift computation)\n"
00212
" The first function of the list will be used as \n"
00213
" the objective function to optimize \n"
00214
" (possibly with an added weight decay penalty) \n");
00215
00216
declareOption(ol,
"classification_regularizer", &NNet::classification_regularizer, OptionBase::buildoption,
00217
" used only in the stable_cross_entropy cost function, to fight overfitting (0<=r<1)\n");
00218
00219
declareOption(ol,
"margin", &NNet::margin, OptionBase::buildoption,
00220
" margin requirement, used only with the margin_perceptron_cost cost function.\n"
00221
" It should be positive, and larger values regularize more.\n");
00222
00223
declareOption(ol,
"optimizer", &NNet::optimizer, OptionBase::buildoption,
00224
" specify the optimizer to use\n");
00225
00226
declareOption(ol,
"batch_size", &NNet::batch_size, OptionBase::buildoption,
00227
" how many samples to use to estimate the avergage gradient before updating the weights\n"
00228
" 0 is equivalent to specifying training_set->length() \n");
00229
00230
declareOption(ol,
"initialization_method", &NNet::initialization_method, OptionBase::buildoption,
00231
" The method used to initialize the weights:\n"
00232
" - normal_linear = a normal law with variance 1 / n_inputs\n"
00233
" - normal_sqrt = a normal law with variance 1 / sqrt(n_inputs)\n");
00234
00235
declareOption(ol,
"paramsvalues", &NNet::paramsvalues, OptionBase::learntoption,
00236
" The learned parameter vector\n");
00237
00238 inherited::declareOptions(ol);
00239
00240 }
00241
00242 void NNet::build()
00243 {
00244 inherited::build();
00245
build_();
00246 }
00247
00248 void NNet::build_()
00249 {
00250
00251
00252
00253
00254
00255
00256
00257
if(inputsize_>=0 && targetsize_>=0 && weightsize_>=0)
00258 {
00259
00260
00261
00262
input =
Var(
inputsize(),
"input");
00263
output =
input;
00264
params.
resize(0);
00265 Var hidden_layer;
00266
00267
00268
if(
nhidden>0)
00269 {
00270
w1 = Var(1+
inputsize(),
nhidden,
"w1");
00271 hidden_layer =
affine_transform(
output,
w1);
00272
params.
append(
w1);
00273
if(
hidden_transfer_func==
"linear")
00274
output = hidden_layer;
00275
else if(
hidden_transfer_func==
"tanh")
00276
output =
tanh(hidden_layer);
00277
else if(
hidden_transfer_func==
"sigmoid")
00278
output =
sigmoid(hidden_layer);
00279
else if(
hidden_transfer_func==
"softplus")
00280
output =
softplus(hidden_layer);
00281
else if(
hidden_transfer_func==
"exp")
00282
output =
exp(hidden_layer);
00283
else if(
hidden_transfer_func==
"softmax")
00284
output =
softmax(hidden_layer);
00285
else if (
hidden_transfer_func ==
"log_softmax")
00286
output =
log_softmax(
output);
00287
else if(
hidden_transfer_func==
"hard_slope")
00288
output =
unary_hard_slope(hidden_layer,0,1);
00289
else if(
hidden_transfer_func==
"symm_hard_slope")
00290
output =
unary_hard_slope(hidden_layer,-1,1);
00291
else
00292
PLERROR(
"In NNet::build_() unknown hidden_transfer_func option: %s",
hidden_transfer_func.c_str());
00293 }
00294
00295
00296
if(
nhidden2>0)
00297 {
00298
w2 = Var(1+
nhidden,
nhidden2,
"w2");
00299
output =
affine_transform(
output,
w2);
00300
params.
append(
w2);
00301
if(
hidden_transfer_func==
"linear")
00302
output =
output;
00303
else if(
hidden_transfer_func==
"tanh")
00304 output =
tanh(output);
00305
else if(
hidden_transfer_func==
"sigmoid")
00306 output =
sigmoid(output);
00307
else if(
hidden_transfer_func==
"softplus")
00308 output =
softplus(output);
00309
else if(
hidden_transfer_func==
"exp")
00310 output =
exp(output);
00311
else if(
hidden_transfer_func==
"softmax")
00312 output =
softmax(output);
00313
else if (
hidden_transfer_func ==
"log_softmax")
00314 output =
log_softmax(output);
00315
else if(
hidden_transfer_func==
"hard_slope")
00316 output =
unary_hard_slope(output,0,1);
00317
else if(
hidden_transfer_func==
"symm_hard_slope")
00318 output =
unary_hard_slope(output,-1,1);
00319
else
00320
PLERROR(
"In NNet::build_() unknown hidden_transfer_func option: %s",
hidden_transfer_func.c_str());
00321 }
00322
00323
if (
nhidden2>0 &&
nhidden==0)
00324
PLERROR(
"NNet:: can't have nhidden2 (=%d) > 0 while nhidden=0",
nhidden2);
00325
00326
if (
rbf_layer_size>0)
00327 {
00328
if (
first_class_is_junk)
00329 {
00330
rbf_centers = Var(
outputsize()-1,
rbf_layer_size,
"rbf_centers");
00331
rbf_sigmas = Var(
outputsize()-1,
"rbf_sigmas");
00332
PLERROR(
"In NNet.cc, the code needs to be completed, rbf_layer isn't declared and thus it doesn't compile with the line below");
00333
00334
00335
params.
append(
junk_prob);
00336 }
00337
else
00338 {
00339
rbf_centers = Var(
outputsize(),
rbf_layer_size,
"rbf_centers");
00340
rbf_sigmas = Var(
outputsize(),
"rbf_sigmas");
00341
PLERROR(
"In NNet.cc, the code needs to be completed, rbf_layer isn't declared and thus it doesn't compile with the line below");
00342
00343 }
00344
params.
append(
rbf_centers);
00345
params.
append(
rbf_sigmas);
00346 }
00347
00348
00349
wout = Var(1+
output->size(),
outputsize(),
"wout");
00350
output =
affine_transform(
output,
wout);
00351
if (!
fixed_output_weights)
00352
params.
append(
wout);
00353
else
00354 {
00355
outbias = Var(
output->size(),
"outbias");
00356
output =
output +
outbias;
00357
params.
append(outbias);
00358 }
00359
00360
00361
if(
direct_in_to_out)
00362 {
00363
wdirect = Var(
inputsize(),
outputsize(),
"wdirect");
00364
output +=
transposeProduct(
wdirect, input);
00365
params.
append(
wdirect);
00366 }
00367
00368 Var before_transfer_func =
output;
00369
00370
00371
00372
00373 size_t p=0;
00374
if(
output_transfer_func!=
"" &&
output_transfer_func!=
"none")
00375 {
00376
if(
output_transfer_func==
"tanh")
00377 output =
tanh(output);
00378
else if(
output_transfer_func==
"sigmoid")
00379 output =
sigmoid(output);
00380
else if(
output_transfer_func==
"softplus")
00381 output =
softplus(output);
00382
else if(
output_transfer_func==
"exp")
00383 output =
exp(output);
00384
else if(
output_transfer_func==
"softmax")
00385 output =
softmax(output);
00386
else if (
output_transfer_func ==
"log_softmax")
00387 output =
log_softmax(output);
00388
else if ((p=
output_transfer_func.find(
"interval"))!=string::npos)
00389 {
00390 size_t q =
output_transfer_func.find(
",");
00391
interval_minval = atof(
output_transfer_func.substr(p+1,q-(p+1)).c_str());
00392 size_t r =
output_transfer_func.find(
")");
00393
interval_maxval = atof(
output_transfer_func.substr(q+1,r-(q+1)).c_str());
00394 output =
interval_minval + (
interval_maxval -
interval_minval)*
sigmoid(output);
00395 }
00396
else
00397
PLERROR(
"In NNet::build_() unknown output_transfer_func option: %s",
output_transfer_func.c_str());
00398 }
00399
00400
00401
00402
00403
00404
target = Var(
targetsize(),
"target");
00405
00406
if(weightsize_>0)
00407 {
00408
if (weightsize_!=1)
00409
PLERROR(
"NNet: expected weightsize to be 1 or 0 (or unspecified = -1, meaning 0), got %d",weightsize_);
00410
sampleweight = Var(1,
"weight");
00411 }
00412
00413
00414
00415
int ncosts =
cost_funcs.
size();
00416
if(ncosts<=0)
00417
PLERROR(
"In NNet::build_() Empty cost_funcs : must at least specify the cost function to optimize!");
00418
costs.
resize(ncosts);
00419
00420
for(
int k=0;
k<ncosts;
k++)
00421 {
00422
00423
if(
cost_funcs[
k]==
"mse")
00424
costs[
k]=
sumsquare(output-
target);
00425
else if(
cost_funcs[
k]==
"mse_onehot")
00426
costs[
k] =
onehot_squared_loss(output, target);
00427
else if(
cost_funcs[
k]==
"NLL")
00428 {
00429
if (output->size() == 1) {
00430
00431
costs[
k] =
cross_entropy(output, target);
00432 }
else {
00433
if (
output_transfer_func ==
"log_softmax")
00434
costs[
k] = -output[target];
00435
else
00436
costs[
k] =
neg_log_pi(output, target);
00437 }
00438 }
00439
else if(
cost_funcs[
k]==
"class_error")
00440
costs[
k] =
classification_loss(output, target);
00441
else if(
cost_funcs[
k]==
"binary_class_error")
00442
costs[
k] =
binary_classification_loss(output, target);
00443
else if(
cost_funcs[
k]==
"multiclass_error")
00444
costs[
k] =
multiclass_loss(output, target);
00445
else if(
cost_funcs[
k]==
"cross_entropy")
00446
costs[
k] =
cross_entropy(output, target);
00447
else if (
cost_funcs[
k]==
"stable_cross_entropy") {
00448 Var c =
stable_cross_entropy(before_transfer_func, target);
00449
costs[
k] = c;
00450
if (
classification_regularizer) {
00451
00452 dynamic_cast<NegCrossEntropySigmoidVariable*>((
Variable*) c)->
00453 setRegularizer(
classification_regularizer);
00454 }
00455 }
00456
else if (
cost_funcs[
k]==
"margin_perceptron_cost")
00457
costs[
k] =
margin_perceptron_cost(output,target,
margin);
00458
else if (
cost_funcs[
k]==
"lift_output")
00459
costs[
k] =
lift_output(output, target);
00460
else
00461 {
00462
costs[
k]= dynamic_cast<Variable*>(
newObject(
cost_funcs[
k]));
00463
if(
costs[
k].
isNull())
00464
PLERROR(
"In NNet::build_() unknown cost_func option: %s",
cost_funcs[
k].
c_str());
00465
costs[
k]->setParents(output & target);
00466
costs[
k]->build();
00467 }
00468
00469
00470
00471
00472 }
00473
00474
00475
00476
00477
00478
00479
00480
penalties.
resize(0);
00481
if(
w1 && ((
layer1_weight_decay +
weight_decay)!=0 || (
layer1_bias_decay +
bias_decay)!=0))
00482
penalties.
append(
affine_transform_weight_penalty(
w1, (
layer1_weight_decay +
weight_decay), (
layer1_bias_decay +
bias_decay),
L1_penalty));
00483
if(
w2 && ((
layer2_weight_decay + weight_decay)!=0 || (
layer2_bias_decay + bias_decay)!=0))
00484
penalties.
append(
affine_transform_weight_penalty(
w2, (
layer2_weight_decay + weight_decay), (
layer2_bias_decay + bias_decay),
L1_penalty));
00485
if(
wout && ((
output_layer_weight_decay + weight_decay)!=0 || (
output_layer_bias_decay + bias_decay)!=0))
00486
penalties.
append(
affine_transform_weight_penalty(
wout, (
output_layer_weight_decay + weight_decay),
00487 (
output_layer_bias_decay + bias_decay),
L1_penalty));
00488
if(
wdirect && (
direct_in_to_out_weight_decay + weight_decay) != 0)
00489 {
00490
if (
L1_penalty)
00491
penalties.
append(
sumabs(
wdirect)*(
direct_in_to_out_weight_decay + weight_decay));
00492
else
00493
penalties.
append(
sumsquare(
wdirect)*(
direct_in_to_out_weight_decay + weight_decay));
00494 }
00495
00496
if (
input_reconstruction_penalty>0)
00497 {
00498
wrec = Var(hidden_layer->size(),
inputsize(),
"wrec");
00499
predicted_input =
transposeProduct(
wrec, hidden_layer);
00500
params.
append(
wrec);
00501
penalties.
append(
input_reconstruction_penalty*
sumsquare(
predicted_input - input));
00502 }
00503
00504
test_costs =
hconcat(
costs);
00505
00506
00507
00508
00509
if(
penalties.
size() != 0) {
00510
if (weightsize_>0)
00511
00512
training_cost =
hconcat(
sampleweight*
sum(
hconcat(
costs[0] &
penalties))
00513 & (
test_costs*
sampleweight));
00514
else {
00515
training_cost =
hconcat(
sum(
hconcat(
costs[0] & penalties)) &
test_costs);
00516 }
00517 }
00518
else {
00519
if(weightsize_>0) {
00520
00521
training_cost =
hconcat(
costs[0]*
sampleweight &
test_costs*
sampleweight);
00522 }
else {
00523
training_cost =
hconcat(
costs[0] &
test_costs);
00524 }
00525 }
00526
00527
training_cost->setName(
"training_cost");
00528
test_costs->setName(
"test_costs");
00529 output->setName(
"output");
00530
00531
00532
if((
bool)
paramsvalues && (
paramsvalues.
size() ==
params.
nelems()))
00533
params <<
paramsvalues;
00534
else
00535 {
00536 paramsvalues.
resize(
params.
nelems());
00537
initializeParams();
00538 }
00539
params.
makeSharedValue(paramsvalues);
00540
00541
00542
invars.
resize(0);
00543
VarArray outvars;
00544
VarArray testinvars;
00545
if(input)
00546 {
00547
invars.
push_back(input);
00548 testinvars.
push_back(input);
00549 }
00550
if(output)
00551 outvars.
push_back(output);
00552
if(
target)
00553 {
00554
invars.
push_back(
target);
00555 testinvars.
push_back(
target);
00556 outvars.
push_back(
target);
00557 }
00558
if(
sampleweight)
00559 {
00560
invars.
push_back(
sampleweight);
00561 }
00562
00563
f =
Func(input, output);
00564
test_costf = Func(testinvars, output&
test_costs);
00565
test_costf->recomputeParents();
00566
output_and_target_to_cost = Func(outvars, test_costs);
00567
output_and_target_to_cost->recomputeParents();
00568 }
00569 }
00570
00571 int NNet::outputsize()
const
00572
{
return noutputs; }
00573
00574 TVec<string> NNet::getTrainCostNames()
const
00575
{
00576
return (
cost_funcs[0]+
"+penalty") &
cost_funcs;
00577 }
00578
00579 TVec<string> NNet::getTestCostNames()
const
00580
{
00581
return cost_funcs;
00582 }
00583
00584
00585 void NNet::train()
00586 {
00587
00588
00589
00590
00591
if(!train_set)
00592
PLERROR(
"In NNet::train, you did not setTrainingSet");
00593
00594
if(!train_stats)
00595
PLERROR(
"In NNet::train, you did not setTrainStatsCollector");
00596
00597
int l = train_set->
length();
00598
00599
if(
f.
isNull())
00600
build();
00601
00602
00603
int nsamples =
batch_size>0 ?
batch_size : l;
00604
Func paramf =
Func(
invars,
training_cost);
00605
Var totalcost =
meanOf(train_set, paramf, nsamples);
00606
if(
optimizer)
00607 {
00608
optimizer->setToOptimize(
params, totalcost);
00609
optimizer->build();
00610 }
00611
else PLERROR(
"RecommandationNet::train can't train without setting an optimizer first!");
00612
00613
00614
int optstage_per_lstage = l/nsamples;
00615
00616
ProgressBar* pb = 0;
00617
if(report_progress)
00618 pb =
new ProgressBar(
"Training NNet from stage " +
tostring(stage) +
" to " +
tostring(nstages), nstages-stage);
00619
00620
int initial_stage = stage;
00621
bool early_stop=
false;
00622
while(stage<nstages && !early_stop)
00623 {
00624
optimizer->nstages = optstage_per_lstage;
00625 train_stats->forget();
00626
optimizer->early_stop =
false;
00627
optimizer->optimizeN(*train_stats);
00628 train_stats->finalize();
00629
if(verbosity>2)
00630 cout <<
"Epoch " << stage <<
" train objective: " << train_stats->getMean() <<
endl;
00631 ++stage;
00632
if(pb)
00633 pb->
update(stage-initial_stage);
00634 }
00635
if(verbosity>1)
00636 cout <<
"EPOCH " << stage <<
" train objective: " << train_stats->getMean() <<
endl;
00637
00638
if(pb)
00639
delete pb;
00640
00641
output_and_target_to_cost->recomputeParents();
00642
test_costf->recomputeParents();
00643
00644
00645 }
00646
00647
00648
00649 void NNet::computeOutput(
const Vec& inputv,
Vec& outputv)
const
00650
{
00651
f->fprop(inputv,outputv);
00652 }
00653
00654 void NNet::computeOutputAndCosts(
const Vec& inputv,
const Vec& targetv,
00655
Vec& outputv,
Vec& costsv)
const
00656
{
00657
test_costf->fprop(inputv&targetv, outputv&costsv);
00658 }
00659
00660
00661 void NNet::computeCostsFromOutputs(
const Vec& inputv,
const Vec& outputv,
00662
const Vec& targetv,
Vec& costsv)
const
00663
{
00664
output_and_target_to_cost->fprop(outputv&targetv, costsv);
00665 }
00666
00667 void NNet::initializeParams()
00668 {
00669
if (seed_>=0)
00670
manual_seed(seed_);
00671
else
00672
PLearn::seed();
00673
00674
00675
real delta = 0;
00676
if (
initialization_method ==
"normal_linear") {
00677 delta = 1.0 /
inputsize();
00678 }
else if (
initialization_method ==
"normal_sqrt") {
00679 delta = 1.0 /
sqrt(
real(
inputsize()));
00680 }
else {
00681
PLERROR(
"In NNet::initializeParams - Unknown value for 'initialization_method'");
00682 }
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692
if(
nhidden>0)
00693 {
00694
fill_random_uniform(
w1->value, -delta, +delta);
00695
00696
if(
direct_in_to_out)
00697 {
00698
fill_random_uniform(
wdirect->value, -delta, +delta);
00699
00700
wdirect->matValue(0).clear();
00701 }
00702
if (
initialization_method ==
"normal_linear") {
00703 delta = 1.0 /
real(
nhidden);
00704 }
else if (
initialization_method ==
"normal_sqrt") {
00705 delta = 1.0 /
sqrt(
real(
nhidden));
00706 }
00707
w1->matValue(0).clear();
00708 }
00709
if(
nhidden2>0)
00710 {
00711
fill_random_uniform(
w2->value, -delta, +delta);
00712
00713
if (
initialization_method ==
"normal_linear") {
00714 delta = 1.0 /
real(
nhidden2);
00715 }
else if (
initialization_method ==
"normal_sqrt") {
00716 delta = 1.0 /
sqrt(
real(
nhidden2));
00717 }
00718
w2->matValue(0).clear();
00719 }
00720
if (
fixed_output_weights)
00721 {
00722
static Vec values;
00723
if (values.
size()==0)
00724 {
00725 values.
resize(2);
00726 values[0]=-1;
00727 values[1]=1;
00728 }
00729
fill_random_discrete(
wout->value, values);
00730
wout->matValue(0).clear();
00731 }
00732
else
00733 {
00734
fill_random_uniform(
wout->value, -delta, +delta);
00735
00736
wout->matValue(0).clear();
00737 }
00738
00739
00740
if(
optimizer)
00741
optimizer->reset();
00742 }
00743
00744 void NNet::forget()
00745 {
00746
if (train_set)
initializeParams();
00747 stage = 0;
00748 }
00749
00751
extern void varDeepCopyField(
Var& field, CopiesMap& copies);
00752
00753 void NNet::makeDeepCopyFromShallowCopy(
CopiesMap& copies)
00754 {
00755 inherited::makeDeepCopyFromShallowCopy(copies);
00756
varDeepCopyField(
input, copies);
00757
varDeepCopyField(
target, copies);
00758
varDeepCopyField(
sampleweight, copies);
00759
varDeepCopyField(
w1, copies);
00760
varDeepCopyField(
w2, copies);
00761
varDeepCopyField(
wout, copies);
00762
varDeepCopyField(
outbias, copies);
00763
varDeepCopyField(
wdirect, copies);
00764
varDeepCopyField(
wrec, copies);
00765
varDeepCopyField(
rbf_centers, copies);
00766
varDeepCopyField(
rbf_sigmas, copies);
00767
varDeepCopyField(
junk_prob, copies);
00768
varDeepCopyField(
output, copies);
00769
varDeepCopyField(
predicted_input, copies);
00770
deepCopyField(
costs, copies);
00771
deepCopyField(
penalties, copies);
00772
varDeepCopyField(
training_cost, copies);
00773
varDeepCopyField(
test_costs, copies);
00774
deepCopyField(
invars, copies);
00775
deepCopyField(
params, copies);
00776
deepCopyField(
paramsvalues, copies);
00777
deepCopyField(
f, copies);
00778
deepCopyField(
test_costf, copies);
00779
deepCopyField(
output_and_target_to_cost, copies);
00780
deepCopyField(
optimizer, copies);
00781 }
00782
00783 }