00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00043
#include <plearn/var/AffineTransformVariable.h>
00044
#include <plearn/var/AffineTransformWeightPenalty.h>
00045
#include <plearn/var/BinaryClassificationLossVariable.h>
00046
#include <plearn/var/ClassificationLossVariable.h>
00047
#include <plearn/var/ConcatColumnsVariable.h>
00048
#include <plearn/var/CrossEntropyVariable.h>
00049
#include <plearn/var/ExpVariable.h>
00050
#include <plearn/var/IfThenElseVariable.h>
00051
#include <plearn/var/LiftOutputVariable.h>
00052
#include <plearn/var/LogSoftmaxVariable.h>
00053
#include <plearn/var/MulticlassLossVariable.h>
00054
#include <plearn/var/NegCrossEntropySigmoidVariable.h>
00055
#include <plearn/var/OneHotSquaredLoss.h>
00056
#include <plearn/var/SemiSupervisedProbClassCostVariable.h>
00057
#include <plearn/var/SigmoidVariable.h>
00058
#include <plearn/var/SoftmaxVariable.h>
00059
#include <plearn/var/SoftplusVariable.h>
00060
#include <plearn/var/SourceVariable.h>
00061
#include <plearn/var/SubMatVariable.h>
00062
#include <plearn/var/SumVariable.h>
00063
#include <plearn/var/SumOfVariable.h>
00064
#include <plearn/var/SumSquareVariable.h>
00065
#include <plearn/var/TanhVariable.h>
00066
#include <plearn/var/TransposeProductVariable.h>
00067
#include <plearn/var/Var_operators.h>
00068
#include <plearn/var/Var_utils.h>
00069
#include <plearn/var/WeightedSumSquareVariable.h>
00070
00071
#include "NeuralNet.h"
00072
00073
#include <plearn/math/random.h>
00074
00075
#include <plearn/var/SemiSupervisedProbClassCostVariable.h>
00076
#include <plearn/var/IsMissingVariable.h>
00077
00078
namespace PLearn {
00079
using namespace std;
00080
00081
00082
PLEARN_IMPLEMENT_OBJECT(NeuralNet,
"DEPRECATED: Use NNet instead",
"NO HELP");
00083
00084 NeuralNet::NeuralNet()
00085 :nhidden(0),
00086 nhidden2(0),
00087 weight_decay(0),
00088 bias_decay(0),
00089 layer1_weight_decay(0),
00090 layer1_bias_decay(0),
00091 layer2_weight_decay(0),
00092 layer2_bias_decay(0),
00093 output_layer_weight_decay(0),
00094 output_layer_bias_decay(0),
00095 direct_in_to_out_weight_decay(0),
00096 direct_in_to_out(false),
00097 output_transfer_func(""),
00098 iseed(-1),
00099 semisupervised_flatten_factor(1),
00100 batch_size(1),
00101 nepochs(10000),
00102 saveparams("")
00103 {}
00104
00105 NeuralNet::~NeuralNet()
00106 {
00107 }
00108
00109 void NeuralNet::declareOptions(
OptionList& ol)
00110 {
00111
declareOption(ol,
"nhidden", &NeuralNet::nhidden, OptionBase::buildoption,
00112
" number of hidden units in first hidden layer (0 means no hidden layer)\n");
00113
00114
declareOption(ol,
"nhidden2", &NeuralNet::nhidden2, OptionBase::buildoption,
00115
" number of hidden units in second hidden layer (0 means no hidden layer)\n");
00116
00117
declareOption(ol,
"weight_decay", &NeuralNet::weight_decay, OptionBase::buildoption,
00118
" global weight decay for all layers\n");
00119
00120
declareOption(ol,
"bias_decay", &NeuralNet::bias_decay, OptionBase::buildoption,
00121
" global bias decay for all layers\n");
00122
00123
declareOption(ol,
"layer1_weight_decay", &NeuralNet::layer1_weight_decay, OptionBase::buildoption,
00124
" Additional weight decay for the first hidden layer. Is added to weight_decay.\n");
00125
declareOption(ol,
"layer1_bias_decay", &NeuralNet::layer1_bias_decay, OptionBase::buildoption,
00126
" Additional bias decay for the first hidden layer. Is added to bias_decay.\n");
00127
00128
declareOption(ol,
"layer2_weight_decay", &NeuralNet::layer2_weight_decay, OptionBase::buildoption,
00129
" Additional weight decay for the second hidden layer. Is added to weight_decay.\n");
00130
00131
declareOption(ol,
"layer2_bias_decay", &NeuralNet::layer2_bias_decay, OptionBase::buildoption,
00132
" Additional bias decay for the second hidden layer. Is added to bias_decay.\n");
00133
00134
declareOption(ol,
"output_layer_weight_decay", &NeuralNet::output_layer_weight_decay, OptionBase::buildoption,
00135
" Additional weight decay for the output layer. Is added to 'weight_decay'.\n");
00136
00137
declareOption(ol,
"output_layer_bias_decay", &NeuralNet::output_layer_bias_decay, OptionBase::buildoption,
00138
" Additional bias decay for the output layer. Is added to 'bias_decay'.\n");
00139
00140
declareOption(ol,
"direct_in_to_out_weight_decay", &NeuralNet::direct_in_to_out_weight_decay, OptionBase::buildoption,
00141
" Additional weight decay for the direct in-to-out layer. Is added to 'weight_decay'.\n");
00142
00143
declareOption(ol,
"direct_in_to_out", &NeuralNet::direct_in_to_out, OptionBase::buildoption,
00144
" should we include direct input to output connections?\n");
00145
00146
declareOption(ol,
"output_transfer_func", &NeuralNet::output_transfer_func, OptionBase::buildoption,
00147
" what transfer function to use for ouput layer? \n"
00148
" one of: tanh, sigmoid, exp, softmax \n"
00149
" an empty string means no output transfer function \n");
00150
00151
declareOption(ol,
"seed", &NeuralNet::iseed, OptionBase::buildoption,
00152
" Seed for the random number generator used to initialize parameters. If -1 then use time of day.\n");
00153
00154
declareOption(ol,
"cost_funcs", &NeuralNet::cost_funcs, OptionBase::buildoption,
00155
" a list of cost functions to use\n"
00156
" in the form \"[ cf1; cf2; cf3; ... ]\" where each function is one of: \n"
00157
" mse (for regression)\n"
00158
" mse_onehot (for classification)\n"
00159
" NLL (negative log likelihood -log(p[c]) for classification) \n"
00160
" class_error (classification error) \n"
00161
" semisupervised_prob_class\n"
00162
" The first function of the list will be used as \n"
00163
" the objective function to optimize \n"
00164
" (possibly with an added weight decay penalty) \n"
00165
" If semisupervised_prob_class is chosen, then the options\n"
00166
" semisupervised_{flatten_factor,prior} will be used. Note that\n"
00167
" the output_transfer_func should be the softmax, in that case.\n"
00168 );
00169
00170
declareOption(ol,
"semisupervised_flatten_factor", &NeuralNet::semisupervised_flatten_factor, OptionBase::buildoption,
00171
" Hyper-parameter of the semi-supervised criterion for probabilistic classifiers\n");
00172
00173
declareOption(ol,
"semisupervised_prior", &NeuralNet::semisupervised_prior, OptionBase::buildoption,
00174
" Hyper-parameter of the semi-supervised criterion = prior classes probabilities\n");
00175
00176
declareOption(ol,
"optimizer", &NeuralNet::optimizer, OptionBase::buildoption,
00177
" specify the optimizer to use\n");
00178
00179
declareOption(ol,
"batch_size", &NeuralNet::batch_size, OptionBase::buildoption,
00180
" how many samples to use to estimate the avergage gradient before updating the weights\n"
00181
" 0 is equivalent to specifying training_set->length() \n"
00182
" NOTE: this overrides the optimizer's 'n_updates' and 'every_iterations'.\n");
00183
00184
declareOption(ol,
"nepochs", &NeuralNet::nepochs, OptionBase::buildoption,
00185
" how many times the optimizer gets to see the whole training set.\n");
00186
00187
declareOption(ol,
"paramsvalues", &NeuralNet::paramsvalues, OptionBase::learntoption,
00188
" The learned parameter vector (in which order?)\n");
00189
00190
declareOption(ol,
"saveparams", &NeuralNet::saveparams, OptionBase::learntoption,
00191
" This string, if not empty, indicates where in the expdir directory\n"
00192
" to save the final paramsvalues\n");
00193
00194
declareOption(ol,
"normalization", &NeuralNet::normalization, OptionBase::buildoption,
00195
" The normalization to be applied to the data\n");
00196 inherited::declareOptions(ol);
00197
00198 }
00199
00200 void NeuralNet::build()
00201 {
00202 inherited::build();
00203
build_();
00204 }
00205
00206 void NeuralNet::build_()
00207 {
00208
00209
00210
00211
00212
00213
input =
Var(
inputsize(),
"input");
00214
if (
normalization.
length()) {
00215 Var means(
normalization[0]);
00216 Var stddevs(
normalization[1]);
00217
output = (
input - means) / stddevs;
00218 }
else
00219
output =
input;
00220
params.
resize(0);
00221
00222
00223
if(
nhidden>0)
00224 {
00225
w1 = Var(1+
inputsize(),
nhidden,
"w1");
00226
output =
tanh(
affine_transform(
output,
w1));
00227
params.
append(
w1);
00228 }
00229
00230
00231
if(
nhidden2>0)
00232 {
00233
w2 = Var(1+
nhidden,
nhidden2,
"w2");
00234
output =
tanh(
affine_transform(
output,
w2));
00235
params.
append(
w2);
00236 }
00237
00238
00239
wout = Var(1+
output->size(),
outputsize(),
"wout");
00240
output =
affine_transform(
output,
wout);
00241
params.
append(
wout);
00242
00243
00244
if(
direct_in_to_out)
00245 {
00246
wdirect = Var(
inputsize(),
outputsize(),
"wdirect");
00247
output +=
transposeProduct(
wdirect, input);
00248
params.
append(
wdirect);
00249 }
00250
00251
00252
00253
00254
if(
output_transfer_func!=
"")
00255 {
00256
if(
output_transfer_func==
"tanh")
00257
output =
tanh(
output);
00258
else if(
output_transfer_func==
"sigmoid")
00259
output =
sigmoid(
output);
00260
else if(
output_transfer_func==
"softplus")
00261
output =
softplus(
output);
00262
else if(
output_transfer_func==
"exp")
00263
output =
exp(
output);
00264
else if(
output_transfer_func==
"softmax")
00265
output =
softmax(
output);
00266
else if (
output_transfer_func ==
"log_softmax")
00267
output =
log_softmax(
output);
00268
else
00269
PLERROR(
"In NeuralNet::build_() unknown output_transfer_func option: %s",
output_transfer_func.c_str());
00270 }
00271
00272
00273
00274
00275
if(
weightsize() != 0 &&
weightsize() != 1 &&
targetsize()/2 !=
weightsize())
00276
PLERROR(
"In NeuralNet::build_() weightsize must be either:\n"
00277
"\t0: no weights on costs\n"
00278
"\t1: single weight applied on total cost\n"
00279
"\ttargetsize/2: vector of weights applied individually to each component of the cost\n"
00280
"weightsize= %d; targetsize= %d.",
weightsize(),
targetsize());
00281
00282
00283
target_and_weights= Var(
targetsize(),
"target_and_weights");
00284
target =
new SubMatVariable(
target_and_weights, 0, 0,
targetsize()-
weightsize(), 1);
00285
target->setName(
"target");
00286
if(0 <
weightsize())
00287 {
00288
costweights =
new SubMatVariable(
target_and_weights,
targetsize()-
weightsize(), 0,
weightsize(), 1);
00289
costweights->setName(
"costweights");
00290 }
00291
00292
00293
00294
int ncosts =
cost_funcs.
size();
00295
if(ncosts<=0)
00296
PLERROR(
"In NeuralNet::build_() Empty cost_funcs : must at least specify the cost function to optimize!");
00297
costs.
resize(ncosts);
00298
00299
for(
int k=0;
k<ncosts;
k++)
00300 {
00301
bool handles_missing_target=
false;
00302
00303
if(
cost_funcs[
k]==
"mse")
00304
if(
weightsize() < 2)
00305
costs[
k]=
sumsquare(
output-
target);
00306
else
00307
costs[
k]=
weighted_sumsquare(
output-target,
costweights);
00308
else if(
cost_funcs[
k]==
"mse_onehot")
00309
costs[
k] =
onehot_squared_loss(
output, target);
00310
else if(
cost_funcs[
k]==
"NLL") {
00311
if (
output_transfer_func ==
"log_softmax")
00312
costs[
k] = -
output[target];
00313
else
00314
costs[
k] =
neg_log_pi(output, target);
00315 }
else if(
cost_funcs[
k]==
"class_error")
00316
costs[
k] =
classification_loss(
output, target);
00317
else if(
cost_funcs[
k]==
"multiclass_error")
00318
if(
weightsize() < 2)
00319
costs[
k] =
multiclass_loss(
output, target);
00320
else
00321
PLERROR(
"In NeuralNet::build() weighted multiclass error cost not implemented.");
00322
else if(
cost_funcs[
k]==
"cross_entropy")
00323
if(
weightsize() < 2)
00324
costs[
k] =
cross_entropy(
output, target);
00325
else
00326
PLERROR(
"In NeuralNet::build() weighted cross entropy cost not implemented.");
00327
else if (
cost_funcs[
k]==
"semisupervised_prob_class")
00328 {
00329
if (
output_transfer_func!=
"softmax")
00330
PLWARNING(
"To properly use the semisupervised_prob_class criterion, the transfer function should probably be a softmax, to guarantee positive probabilities summing to 1");
00331
if (
semisupervised_prior.
length()==0)
00332 {
00333
semisupervised_prior.
resize(
outputsize());
00334
semisupervised_prior.
fill(1.0);
00335 }
00336
costs[
k] =
new SemiSupervisedProbClassCostVariable(
output,target,
new SourceVariable(
semisupervised_prior),
00337
semisupervised_flatten_factor);
00338 handles_missing_target=
true;
00339 }
00340
else
00341 {
00342
costs[
k]= dynamic_cast<Variable*>(
newObject(
cost_funcs[
k]));
00343
if(
costs[
k].
isNull())
00344
PLERROR(
"In NeuralNet::build_() unknown cost_func option: %s",
cost_funcs[
k].
c_str());
00345
if(
weightsize() < 2)
00346
costs[
k]->setParents(
output & target);
00347
else
00348
costs[
k]->setParents(
output & target &
costweights);
00349
costs[
k]->build();
00350 }
00351
00352
00353
if(1 ==
weightsize())
00354
costs[
k]=
costs[
k] *
costweights;
00355
00356
if (!handles_missing_target)
00357 costs[
k] =
ifThenElse(
isMissing(target),
var(
MISSING_VALUE),costs[
k]);
00358 }
00359
00360
00361
00362
00363
00364
00365
00366
VarArray penalties;
00367
if(
w1 && ((
layer1_weight_decay +
weight_decay)!=0 || (
layer1_bias_decay +
bias_decay)!=0))
00368 penalties.
append(
affine_transform_weight_penalty(
w1, (
layer1_weight_decay +
weight_decay), (
layer1_bias_decay +
bias_decay)));
00369
if(
w2 && ((
layer2_weight_decay + weight_decay)!=0 || (
layer2_bias_decay + bias_decay)!=0))
00370 penalties.
append(
affine_transform_weight_penalty(
w2, (
layer2_weight_decay + weight_decay), (
layer2_bias_decay + bias_decay)));
00371
if(
wout && ((
output_layer_weight_decay + weight_decay)!=0 || (
output_layer_bias_decay + bias_decay)!=0))
00372 penalties.
append(
affine_transform_weight_penalty(
wout, (
output_layer_weight_decay + weight_decay), (
output_layer_bias_decay + bias_decay)));
00373
if(
wdirect && (
direct_in_to_out_weight_decay + weight_decay) != 0)
00374 penalties.
append(
sumsquare(
wdirect)*(
direct_in_to_out_weight_decay + weight_decay));
00375
00376
00377
if(penalties.
size() != 0)
00378
cost =
hconcat(
sum(
hconcat(
costs[0] & penalties)) &
costs );
00379
else
00380
cost =
hconcat(costs[0] & costs);
00381
00382
00383
cost->setName(
"cost");
00384
output->setName(
"output");
00385
00386
00387
00388
if((
bool)(
paramsvalues) && (
paramsvalues.
size() ==
params.
nelems()))
00389 {
00390
params <<
paramsvalues;
00391
initial_paramsvalues.
resize(paramsvalues.length());
00392
initial_paramsvalues << paramsvalues;
00393 }
00394
else
00395 {
00396
paramsvalues.
resize(
params.
nelems());
00397
initializeParams();
00398 }
00399
params.
makeSharedValue(
paramsvalues);
00400
00401
00402
00403
f =
Func(input,
output);
00404
costf = Func(input&
target_and_weights,
output&
cost);
00405
costf->recomputeParents();
00406
output_and_target_to_cost = Func(
output&target_and_weights, cost);
00407
output_and_target_to_cost->recomputeParents();
00408 }
00409
00410 Array<string> NeuralNet::costNames()
const
00411
{
00412
return (
cost_funcs[0]+
"+penalty") &
cost_funcs;
00413 }
00414
00415 int NeuralNet::costsize()
const
00416
{
return cost->size(); }
00417
00418 void NeuralNet::train(
VMat training_set)
00419 {
00420 setTrainingSet(training_set);
00421
int l = training_set->
length();
00422
int nsamples =
batch_size>0 ?
batch_size : l;
00423
Func paramf =
Func(
input&
target_and_weights,
cost);
00424
Var totalcost =
meanOf(training_set,paramf, nsamples);
00425
optimizer->setToOptimize(
params, totalcost);
00426
optimizer->nupdates = (
nepochs*l)/nsamples;
00427
optimizer->every = l/nsamples;
00428
optimizer->addMeasurer(*
this);
00429
optimizer->build();
00430
optimizer->optimize();
00431
00432
output_and_target_to_cost->recomputeParents();
00433
costf->recomputeParents();
00434
00435 setTrainCost(totalcost->value);
00436
if (
saveparams!=
"")
00437
PLearn::save(expdir+
saveparams,
paramsvalues);
00438 }
00439
00440
00441 void NeuralNet::initializeParams()
00442 {
00443
if (
iseed<0)
00444
seed();
00445
else
00446
manual_seed(
iseed);
00447
00448
real delta = 1./
inputsize();
00449
00450
00451
00452
00453
00454
00455
00456
00457
if(
nhidden>0)
00458 {
00459
00460
00461
fill_random_normal(
w1->value, 0, delta);
00462
if(
direct_in_to_out)
00463 {
00464
00465
fill_random_normal(
wdirect->value, 0, delta);
00466
wdirect->matValue(0).clear();
00467 }
00468 delta = 1./
nhidden;
00469
w1->matValue(0).clear();
00470 }
00471
if(
nhidden2>0)
00472 {
00473
00474
00475
fill_random_normal(
w2->value, 0, delta);
00476 delta = 1./
nhidden2;
00477
w2->matValue(0).clear();
00478 }
00479
00480
fill_random_normal(
wout->value, 0, delta);
00481
wout->matValue(0).clear();
00482 }
00483
00484 void NeuralNet::use(
const Vec& in,
Vec& prediction)
00485 {
00486
f->fprop(in,prediction);
00487 }
00488
00489 void NeuralNet::useAndCost(
const Vec& inputvec,
const Vec& targetvec,
Vec outputvec,
Vec costvec)
00490 {
00491
costf->fprop(inputvec&targetvec, outputvec&costvec);
00492 }
00493
00494 void NeuralNet::computeCost(
const Vec& inputvec,
const Vec& targetvec,
const Vec& outputvec,
const Vec& costvec)
00495 {
00496
output_and_target_to_cost->fprop(outputvec&targetvec, costvec);
00497 }
00498
00499 void NeuralNet::forget()
00500 {
00501
if(
initial_paramsvalues)
00502
params <<
initial_paramsvalues;
00503
else
00504
initializeParams();
00505 inherited::forget();
00506 }
00507
00508 void NeuralNet::makeDeepCopyFromShallowCopy(
CopiesMap& copies)
00509 {
00510 inherited::makeDeepCopyFromShallowCopy(copies);
00511
deepCopyField(
optimizer, copies);
00512 }
00513
00514 }