00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00044
#include <plearn/var/AffineTransformVariable.h>
00045
#include <plearn/var/AffineTransformWeightPenalty.h>
00046
#include <plearn/var/BinaryClassificationLossVariable.h>
00047
#include <plearn/var/ClassificationLossVariable.h>
00048
#include <plearn/var/ColumnSumVariable.h>
00049
#include <plearn/var/ConcatColumnsVariable.h>
00050
#include <plearn/vmat/ConcatColumnsVMatrix.h>
00051
#include <plearn/var/CrossEntropyVariable.h>
00052
#include <plearn/var/DotProductVariable.h>
00053
#include <plearn/var/ExpVariable.h>
00054
#include <plearn/var/InvertElementsVariable.h>
00055
#include <plearn/var/LogVariable.h>
00056
#include <plearn/var/LiftOutputVariable.h>
00057
#include <plearn/var/LogSoftmaxVariable.h>
00058
#include <plearn/var/MinusVariable.h>
00059
#include <plearn/var/MulticlassLossVariable.h>
00060
#include <plearn/var/NegateElementsVariable.h>
00061
#include <plearn/var/NegCrossEntropySigmoidVariable.h>
00062
#include "NeighborhoodSmoothnessNNet.h"
00063
#include <plearn/var/OneHotSquaredLoss.h>
00064
#include <plearn/base/ProgressBar.h>
00065
#include <plearn/math/random.h>
00066
#include <plearn/var/SigmoidVariable.h>
00067
#include <plearn/var/SoftmaxVariable.h>
00068
#include <plearn/var/SoftplusVariable.h>
00069
#include <plearn/var/SumVariable.h>
00070
#include <plearn/var/SumAbsVariable.h>
00071
#include <plearn/var/SumOfVariable.h>
00072
#include <plearn/var/SumOverBagsVariable.h>
00073
#include <plearn/var/SumSquareVariable.h>
00074
#include <plearn/var/SubMatVariable.h>
00075
#include <plearn/var/SubMatTransposeVariable.h>
00076
#include <plearn/vmat/SubVMatrix.h>
00077
#include <plearn/var/TanhVariable.h>
00078
#include <plearn/var/TimesVariable.h>
00079
#include <plearn/var/TimesScalarVariable.h>
00080
#include <plearn/var/TransposeProductVariable.h>
00081
#include <plearn/var/UnfoldedFuncVariable.h>
00082
#include <plearn/var/UnfoldedSumOfVariable.h>
00083
#include <plearn/var/Var_operators.h>
00084
#include <plearn/var/Var_utils.h>
00085
00086
00087
00088
00089
namespace PLearn {
00090
using namespace std;
00091
00092
PLEARN_IMPLEMENT_OBJECT(NeighborhoodSmoothnessNNet,
00093
"Feedforward neural network whose hidden units are smoothed according to input neighborhood\n",
00094
"TODO"
00095 );
00096
00097 NeighborhoodSmoothnessNNet::NeighborhoodSmoothnessNNet()
00098 :
00099 test_bag_size(0),
00100 max_n_instances(1),
00101 nhidden(0),
00102 nhidden2(0),
00103 noutputs(0),
00104 sigma_hidden(0.1),
00105 sne_weight(0),
00106 weight_decay(0),
00107 bias_decay(0),
00108 layer1_weight_decay(0),
00109 layer1_bias_decay(0),
00110 layer2_weight_decay(0),
00111 layer2_bias_decay(0),
00112 output_layer_weight_decay(0),
00113 output_layer_bias_decay(0),
00114 direct_in_to_out_weight_decay(0),
00115 L1_penalty(false),
00116 direct_in_to_out(false),
00117 output_transfer_func(""),
00118 interval_minval(0), interval_maxval(1),
00119 batch_size(1)
00120 {}
00121
00122 NeighborhoodSmoothnessNNet::~NeighborhoodSmoothnessNNet()
00123 {
00124 }
00125
00126 void NeighborhoodSmoothnessNNet::declareOptions(
OptionList& ol)
00127 {
00128
declareOption(ol,
"max_n_instances", &NeighborhoodSmoothnessNNet::max_n_instances, OptionBase::buildoption,
00129
" maximum number of instances (input vectors x_i) allowed\n");
00130
00131
declareOption(ol,
"nhidden", &NeighborhoodSmoothnessNNet::nhidden, OptionBase::buildoption,
00132
" number of hidden units in first hidden layer (0 means no hidden layer)\n");
00133
00134
declareOption(ol,
"nhidden2", &NeighborhoodSmoothnessNNet::nhidden2, OptionBase::buildoption,
00135
" number of hidden units in second hidden layer (0 means no hidden layer)\n");
00136
00137
declareOption(ol,
"sne_weight", &NeighborhoodSmoothnessNNet::sne_weight, OptionBase::buildoption,
00138
" The weight of the SNE cost in the total cost optimized.");
00139
00140
declareOption(ol,
"sigma_hidden", &NeighborhoodSmoothnessNNet::sigma_hidden, OptionBase::buildoption,
00141
" The bandwidth of the Gaussian kernel used to compute the similarity\n"
00142
" between hidden layers.");
00143
00144
declareOption(ol,
"noutputs", &NeighborhoodSmoothnessNNet::noutputs, OptionBase::buildoption,
00145
" number of output units. This gives this learner its outputsize.\n"
00146
" It is typically of the same dimensionality as the target for regression problems \n"
00147
" But for classification problems where target is just the class number, noutputs is \n"
00148
" usually of dimensionality number of classes (as we want to output a score or probability \n"
00149
" vector, one per class)");
00150
00151
declareOption(ol,
"weight_decay", &NeighborhoodSmoothnessNNet::weight_decay, OptionBase::buildoption,
00152
" global weight decay for all layers\n");
00153
00154
declareOption(ol,
"bias_decay", &NeighborhoodSmoothnessNNet::bias_decay, OptionBase::buildoption,
00155
" global bias decay for all layers\n");
00156
00157
declareOption(ol,
"layer1_weight_decay", &NeighborhoodSmoothnessNNet::layer1_weight_decay, OptionBase::buildoption,
00158
" Additional weight decay for the first hidden layer. Is added to weight_decay.\n");
00159
declareOption(ol,
"layer1_bias_decay", &NeighborhoodSmoothnessNNet::layer1_bias_decay, OptionBase::buildoption,
00160
" Additional bias decay for the first hidden layer. Is added to bias_decay.\n");
00161
00162
declareOption(ol,
"layer2_weight_decay", &NeighborhoodSmoothnessNNet::layer2_weight_decay, OptionBase::buildoption,
00163
" Additional weight decay for the second hidden layer. Is added to weight_decay.\n");
00164
00165
declareOption(ol,
"layer2_bias_decay", &NeighborhoodSmoothnessNNet::layer2_bias_decay, OptionBase::buildoption,
00166
" Additional bias decay for the second hidden layer. Is added to bias_decay.\n");
00167
00168
declareOption(ol,
"output_layer_weight_decay", &NeighborhoodSmoothnessNNet::output_layer_weight_decay, OptionBase::buildoption,
00169
" Additional weight decay for the output layer. Is added to 'weight_decay'.\n");
00170
00171
declareOption(ol,
"output_layer_bias_decay", &NeighborhoodSmoothnessNNet::output_layer_bias_decay, OptionBase::buildoption,
00172
" Additional bias decay for the output layer. Is added to 'bias_decay'.\n");
00173
00174
declareOption(ol,
"direct_in_to_out_weight_decay", &NeighborhoodSmoothnessNNet::direct_in_to_out_weight_decay, OptionBase::buildoption,
00175
" Additional weight decay for the direct in-to-out layer. Is added to 'weight_decay'.\n");
00176
00177
declareOption(ol,
"L1_penalty", &NeighborhoodSmoothnessNNet::L1_penalty, OptionBase::buildoption,
00178
" should we use L1 penalty instead of the default L2 penalty on the weights?\n");
00179
00180
declareOption(ol,
"direct_in_to_out", &NeighborhoodSmoothnessNNet::direct_in_to_out, OptionBase::buildoption,
00181
" should we include direct input to output connections?\n");
00182
00183
declareOption(ol,
"output_transfer_func", &NeighborhoodSmoothnessNNet::output_transfer_func, OptionBase::buildoption,
00184
" what transfer function to use for ouput layer? \n"
00185
" one of: tanh, sigmoid, exp, softplus, softmax \n"
00186
" or interval(<minval>,<maxval>), which stands for\n"
00187
" <minval>+(<maxval>-<minval>)*sigmoid(.).\n"
00188
" An empty string or \"none\" means no output transfer function \n");
00189
00190
declareOption(ol,
"cost_funcs", &NeighborhoodSmoothnessNNet::cost_funcs, OptionBase::buildoption,
00191
" a list of cost functions to use\n"
00192
" in the form \"[ cf1; cf2; cf3; ... ]\" where each function is one of: \n"
00193
" mse (for regression)\n"
00194
" mse_onehot (for classification)\n"
00195
" NLL (negative log likelihood -log(p[c]) for classification) \n"
00196
" class_error (classification error) \n"
00197
" binary_class_error (classification error for a 0-1 binary classifier)\n"
00198
" multiclass_error\n"
00199
" cross_entropy (for binary classification)\n"
00200
" stable_cross_entropy (more accurate backprop and possible regularization, for binary classification)\n"
00201
" lift_output (not a real cost function, just the output for lift computation)\n"
00202
" The first function of the list will be used as \n"
00203
" the objective function to optimize \n"
00204
" (possibly with an added weight decay penalty) \n");
00205
00206
declareOption(ol,
"classification_regularizer", &NeighborhoodSmoothnessNNet::classification_regularizer, OptionBase::buildoption,
00207
" used only in the stable_cross_entropy cost function, to fight overfitting (0<=r<1)\n");
00208
00209
declareOption(ol,
"optimizer", &NeighborhoodSmoothnessNNet::optimizer, OptionBase::buildoption,
00210
" specify the optimizer to use\n");
00211
00212
declareOption(ol,
"batch_size", &NeighborhoodSmoothnessNNet::batch_size, OptionBase::buildoption,
00213
" how many samples to use to estimate the avergage gradient before updating the weights\n"
00214
" 0 is equivalent to specifying training_set->n_non_missing_rows() \n");
00215
00216
00217
declareOption(ol,
"paramsvalues", &NeighborhoodSmoothnessNNet::paramsvalues, OptionBase::learntoption,
00218
" The learned parameter vector\n");
00219
00220 inherited::declareOptions(ol);
00221
00222 }
00223
00225
00227 void NeighborhoodSmoothnessNNet::build()
00228 {
00229 inherited::build();
00230
build_();
00231 }
00232
00234
00236 void NeighborhoodSmoothnessNNet::build_()
00237 {
00238
00239
00240
00241
00242
00243
00244
00245
if(inputsize_>=0 && targetsize_>=0 && weightsize_>=0)
00246 {
00247
00248
00249
int true_inputsize =
inputsize();
00250
bag_inputs =
Var(
max_n_instances,
inputsize() + 1);
00251
00252 Var input_and_pij =
subMat(
bag_inputs, 0, 0, 1,
bag_inputs->
width());
00253
input =
new SubMatTransposeVariable(input_and_pij, 0, 0, 1, true_inputsize);
00254
output =
input;
00255
params.
resize(0);
00256
00257
00258
if(
nhidden>0)
00259 {
00260
w1 = Var(1 + true_inputsize,
nhidden,
"w1");
00261
output =
tanh(
affine_transform(
output,
w1));
00262
params.
append(
w1);
00263
last_hidden =
output;
00264 }
00265
00266
00267
if(
nhidden2>0)
00268 {
00269
w2 = Var(1+
nhidden,
nhidden2,
"w2");
00270
output =
tanh(
affine_transform(
output,
w2));
00271
params.
append(
w2);
00272
last_hidden =
output;
00273 }
00274
00275
if (
nhidden==0)
00276
PLERROR(
"NeighborhoodSmoothnessNNet:: there must be hidden units!",
nhidden2);
00277
00278
00279
00280
00281
wout = Var(1+
output->size(),
outputsize(),
"wout");
00282
output =
affine_transform(
output,
wout);
00283
params.
append(
wout);
00284
00285
00286
if(
direct_in_to_out)
00287 {
00288
wdirect = Var(true_inputsize,
outputsize(),
"wdirect");
00289
output +=
transposeProduct(
wdirect, input);
00290
params.
append(
wdirect);
00291 }
00292
00293 Var before_transfer_func =
output;
00294
00295
00296
00297
00298
unsigned int p=0;
00299
if(
output_transfer_func!=
"" &&
output_transfer_func!=
"none")
00300 {
00301
if(
output_transfer_func==
"tanh")
00302 output =
tanh(output);
00303
else if(
output_transfer_func==
"sigmoid")
00304 output =
sigmoid(output);
00305
else if(
output_transfer_func==
"softplus")
00306 output =
softplus(output);
00307
else if(
output_transfer_func==
"exp")
00308 output =
exp(output);
00309
else if(
output_transfer_func==
"softmax")
00310 output =
softmax(output);
00311
else if (
output_transfer_func ==
"log_softmax")
00312 output =
log_softmax(output);
00313
else if ((p=
output_transfer_func.find(
"interval"))!=string::npos)
00314 {
00315
unsigned int q =
output_transfer_func.find(
",");
00316
interval_minval = atof(
output_transfer_func.substr(p+1,q-(p+1)).c_str());
00317
unsigned int r =
output_transfer_func.find(
")");
00318
interval_maxval = atof(
output_transfer_func.substr(q+1,r-(q+1)).c_str());
00319 output =
interval_minval + (
interval_maxval -
interval_minval)*
sigmoid(output);
00320 }
00321
else
00322
PLERROR(
"In NNet::build_() unknown output_transfer_func option: %s",
output_transfer_func.c_str());
00323 }
00324
00325
00326
00327
00328
00329
target = Var(
targetsize()-1,
"target");
00330
00331
if(weightsize_>0)
00332 {
00333
if (weightsize_!=1)
00334
PLERROR(
"NeighborhoodSmoothnessNNet: expected weightsize to be 1 or 0 (or unspecified = -1, meaning 0), got %d",weightsize_);
00335
sampleweight = Var(1,
"weight");
00336 }
00337
00338
00339
penalties.
resize(0);
00340
if(
w1 && ((
layer1_weight_decay +
weight_decay)!=0 || (
layer1_bias_decay +
bias_decay)!=0))
00341
penalties.
append(
affine_transform_weight_penalty(
w1, (
layer1_weight_decay +
weight_decay), (
layer1_bias_decay +
bias_decay),
L1_penalty));
00342
if(
w2 && ((
layer2_weight_decay + weight_decay)!=0 || (
layer2_bias_decay + bias_decay)!=0))
00343
penalties.
append(
affine_transform_weight_penalty(
w2, (
layer2_weight_decay + weight_decay), (
layer2_bias_decay + bias_decay),
L1_penalty));
00344
if(
wout && ((
output_layer_weight_decay + weight_decay)!=0 || (
output_layer_bias_decay + bias_decay)!=0))
00345
penalties.
append(
affine_transform_weight_penalty(
wout, (
output_layer_weight_decay + weight_decay),
00346 (
output_layer_bias_decay + bias_decay),
L1_penalty));
00347
if(
wdirect && (
direct_in_to_out_weight_decay + weight_decay) != 0)
00348 {
00349
if (
L1_penalty)
00350
penalties.
append(
sumabs(
wdirect)*(
direct_in_to_out_weight_decay + weight_decay));
00351
else
00352
penalties.
append(
sumsquare(
wdirect)*(
direct_in_to_out_weight_decay + weight_decay));
00353 }
00354
00355
00356
if(
paramsvalues && (
paramsvalues.
size() ==
params.
nelems()))
00357
params <<
paramsvalues;
00358
else
00359 {
00360 paramsvalues.
resize(
params.
nelems());
00361
initializeParams();
00362 }
00363
params.
makeSharedValue(paramsvalues);
00364
00365 output->setName(
"element output");
00366
00367
f =
Func(input, output);
00368
f_input_to_hidden = Func(input,
last_hidden);
00369
00370
00371
00372
00373
00374
bag_size = Var(1,1);
00375
bag_hidden =
unfoldedFunc(
subMat(
bag_inputs, 0, 0,
bag_inputs.
length(), true_inputsize),
f_input_to_hidden,
false);
00376
p_ij =
subMat(
bag_inputs, 1, true_inputsize,
bag_inputs->
length() - 1, 1);
00377
00378
00379 Var hidden_0 =
new SubMatTransposeVariable(
bag_hidden, 0, 0, 1,
bag_hidden->
width());
00380 Var store_hidden(
last_hidden.
length(),
last_hidden.
width());
00381 Var hidden_0_minus_hidden =
minus(hidden_0, store_hidden);
00382 Var k_hidden =
00383
exp(
00384
timesScalar(
00385
dot(hidden_0_minus_hidden, hidden_0_minus_hidden),
00386
var(- 1 / (
sigma_hidden *
sigma_hidden))
00387 )
00388 );
00389 Func f_hidden_to_k_hidden(store_hidden, k_hidden);
00390 Var k_hidden_all =
00391
unfoldedFunc(
00392
subMat(
00393
bag_hidden, 1, 0,
bag_hidden->
length() - 1,
bag_hidden->
width()
00394 ),
00395 f_hidden_to_k_hidden,
00396
false
00397 );
00398 Var one_over_sum_of_k_hidden =
invertElements(
sum(k_hidden_all));
00399 Var log_q_ij =
log(
timesScalar(k_hidden_all, one_over_sum_of_k_hidden));
00400 Var minus_weight_sum_p_ij_log_q_ij =
00401
timesScalar(
sum(
times(
p_ij, log_q_ij)),
var(-
sne_weight));
00402
00403
int ncosts =
cost_funcs.
size();
00404
if(ncosts<=0)
00405
PLERROR(
"In NNet::build_() Empty cost_funcs : must at least specify the cost function to optimize!");
00406
costs.
resize(ncosts);
00407
00408
for(
int k=0;
k<ncosts;
k++)
00409 {
00410
00411
if(
cost_funcs[
k]==
"mse")
00412
costs[
k]=
sumsquare(output-
target);
00413
else if(
cost_funcs[
k]==
"mse_onehot")
00414
costs[
k] =
onehot_squared_loss(output, target);
00415
else if(
cost_funcs[
k]==
"NLL")
00416 {
00417
if (output->size() == 1) {
00418
00419
costs[
k] =
cross_entropy(output, target);
00420 }
else {
00421
if (
output_transfer_func ==
"log_softmax")
00422
costs[
k] = -output[target];
00423
else
00424
costs[
k] =
neg_log_pi(output, target);
00425 }
00426 }
00427
else if(
cost_funcs[
k]==
"class_error")
00428
costs[
k] =
classification_loss(output, target);
00429
else if(
cost_funcs[
k]==
"binary_class_error")
00430
costs[
k] =
binary_classification_loss(output, target);
00431
else if(
cost_funcs[
k]==
"multiclass_error")
00432
costs[
k] =
multiclass_loss(output, target);
00433
else if(
cost_funcs[
k]==
"cross_entropy")
00434
costs[
k] =
cross_entropy(output, target);
00435
else if (
cost_funcs[
k]==
"stable_cross_entropy") {
00436 Var c =
stable_cross_entropy(before_transfer_func, target);
00437
costs[
k] = c;
00438
if (
classification_regularizer) {
00439
00440 dynamic_cast<NegCrossEntropySigmoidVariable*>((
Variable*) c)->
00441 setRegularizer(
classification_regularizer);
00442 }
00443 }
00444
else if (
cost_funcs[
k]==
"lift_output")
00445
costs[
k] =
lift_output(output, target);
00446
else
00447 {
00448
costs[
k]= dynamic_cast<Variable*>(
newObject(
cost_funcs[
k]));
00449
if(
costs[
k].
isNull())
00450
PLERROR(
"In NNet::build_() unknown cost_func option: %s",
cost_funcs[
k].
c_str());
00451
costs[
k]->setParents(output & target);
00452
costs[
k]->build();
00453 }
00454
00455
00456
00457
00458 }
00459
00460
test_costs =
hconcat(
costs);
00461
00462
00463
00464
00465 Var test_costs_final =
test_costs;
00466 Var first_cost_final =
costs[0];
00467
if (
penalties.
size() != 0) {
00468 first_cost_final =
sum(
hconcat(first_cost_final &
penalties));
00469 }
00470
if (weightsize_ > 0) {
00471 test_costs_final =
sampleweight * test_costs;
00472 first_cost_final =
sampleweight * first_cost_final;
00473 }
00474
00475
00476
00477 first_cost_final = first_cost_final + minus_weight_sum_p_ij_log_q_ij;
00478
00479
training_cost =
hconcat(first_cost_final & test_costs_final);
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
training_cost->setName(
"training_cost");
00500 test_costs->setName(
"test_costs");
00501
00502
if (weightsize_ > 0) {
00503
invars =
bag_inputs &
bag_size &
target &
sampleweight;
00504 }
else {
00505
invars =
bag_inputs &
bag_size &
target;
00506 }
00507
invars_to_training_cost = Func(
invars,
training_cost);
00508
00509
invars_to_training_cost->recomputeParents();
00510
00511
00512
VarArray outvars;
00513
VarArray testinvars;
00514 testinvars.
push_back(input);
00515 outvars.
push_back(output);
00516 testinvars.
push_back(
target);
00517 outvars.
push_back(
target);
00518
00519
test_costf = Func(testinvars, output&test_costs);
00520
test_costf->recomputeParents();
00521
output_and_target_to_cost = Func(outvars, test_costs);
00522
output_and_target_to_cost->recomputeParents();
00523
00524 }
00525 }
00526
00528
00530 int NeighborhoodSmoothnessNNet::outputsize()
const
00531
{
return noutputs; }
00532
00534
00536 TVec<string> NeighborhoodSmoothnessNNet::getTrainCostNames()
const
00537
{
00538
return (
cost_funcs[0]+
"+penalty+SNE") &
cost_funcs;
00539 }
00540
00542
00544 TVec<string> NeighborhoodSmoothnessNNet::getTestCostNames()
const
00545
{
00546
return cost_funcs;
00547 }
00548
00549 void NeighborhoodSmoothnessNNet::setTrainingSet(
VMat training_set,
bool call_forget)
00550 {
00551
00552
00553
bool training_set_has_changed =
00554 !train_set
00555 || train_set->
width() != training_set->
width()
00556 || train_set->
length() != training_set->
length()
00557 || train_set->inputsize() != training_set->inputsize()
00558 || train_set->weightsize() != training_set->weightsize()
00559 || train_set->targetsize() != training_set->targetsize();
00560 train_set = training_set;
00561
00562
if (training_set_has_changed && inputsize_<0)
00563 {
00564 inputsize_ = train_set->inputsize()-1;
00565 targetsize_ = train_set->targetsize();
00566 weightsize_ = train_set->weightsize();
00567 }
else if (train_set->inputsize() != training_set->inputsize()) {
00568
PLERROR(
"In NeighborhoodSmoothnessNNet::setTrainingSet - You can't change the inputsize of the training set");
00569 }
00570
if (training_set_has_changed || call_forget)
00571
build();
00572
if (call_forget)
00573
forget();
00574 }
00575
00577
00579 void NeighborhoodSmoothnessNNet::train()
00580 {
00581
00582
00583
00584
00585
if(!train_set)
00586
PLERROR(
"In NeighborhoodSmoothnessNNet::train, you did not setTrainingSet");
00587
00588
if(!train_stats)
00589
PLERROR(
"In NeighborhoodSmoothnessNNet::train, you did not setTrainStatsCollector");
00590
00591
if(
f.
isNull())
00592
build();
00593
00594
int n_bags = -1;
00595
00596 {
00597 n_bags=0;
00598
int l = train_set->
length();
00599
ProgressBar* pb = 0;
00600
if(report_progress)
00601 pb =
new ProgressBar(
"Counting nb bags in train_set for NeighborhoodSmoothnessNNet", l);
00602
Vec row(train_set->
width());
00603
int tag_column = train_set->inputsize() + train_set->targetsize() - 1;
00604
for (
int i=0;i<l;i++) {
00605 train_set->getRow(i,row);
00606
if (
int(row[tag_column]) & SumOverBagsVariable::TARGET_COLUMN_FIRST) {
00607
00608 n_bags++;
00609 }
00610
if(pb)
00611 pb->
update(i);
00612 }
00613
if(pb)
00614
delete pb;
00615 }
00616
00617
int true_batch_size =
batch_size;
00618
if (true_batch_size <= 0) {
00619
00620 true_batch_size = n_bags;
00621 }
00622
00623
00624
Var totalcost =
sumOverBags(train_set,
invars_to_training_cost,
max_n_instances, true_batch_size,
true);
00625
00626
00627
int optstage_per_lstage = 0;
00628
if (batch_size<=0) {
00629 optstage_per_lstage = 1;
00630 }
else {
00631 optstage_per_lstage = n_bags/batch_size;
00632 }
00633
00634
if(
optimizer) {
00635
optimizer->setToOptimize(
params, totalcost);
00636
optimizer->build();
00637 }
00638
00639
ProgressBar* pb = 0;
00640
if(report_progress)
00641 pb =
new ProgressBar(
"Training NeighborhoodSmoothnessNNet from stage " +
tostring(stage) +
" to " +
tostring(nstages), nstages-stage);
00642
00643
int initial_stage = stage;
00644
bool early_stop=
false;
00645
while(stage<nstages && !early_stop)
00646 {
00647
optimizer->nstages = optstage_per_lstage;
00648 train_stats->forget();
00649
optimizer->early_stop =
false;
00650
optimizer->optimizeN(*train_stats);
00651 train_stats->finalize();
00652
if(verbosity>2)
00653 cout <<
"Epoch " << stage <<
" train objective: " << train_stats->getMean() <<
endl;
00654 ++stage;
00655
if(pb)
00656 pb->
update(stage-initial_stage);
00657 }
00658
if(verbosity>1)
00659 cout <<
"EPOCH " << stage <<
" train objective: " << train_stats->getMean() <<
endl;
00660
00661
if(pb)
00662
delete pb;
00663
00664
00665
output_and_target_to_cost->recomputeParents();
00666
test_costf->recomputeParents();
00667
00668 }
00669
00671
00673 void NeighborhoodSmoothnessNNet::computeOutput(
00674
const Vec& inputv,
Vec& outputv)
const
00675
{
00676
f->fprop(inputv,outputv);
00677 }
00678
00680
00682 void NeighborhoodSmoothnessNNet::computeOutputAndCosts(
00683
const Vec& inputv,
const Vec& targetv,
Vec& outputv,
Vec& costsv)
const
00684
{
00685
test_costf->fprop(inputv&targetv, outputv&costsv);
00686 }
00687
00689
00691 void NeighborhoodSmoothnessNNet::computeCostsFromOutputs(
00692
const Vec& inputv,
const Vec& outputv,
const Vec& targetv,
Vec& costsv)
const
00693
{
00694
output_and_target_to_cost->fprop(outputv&targetv, costsv);
00695 }
00696
00698
00700 void NeighborhoodSmoothnessNNet::initializeParams()
00701 {
00702
if (seed_>=0)
00703
manual_seed(seed_);
00704
else
00705
PLearn::seed();
00706
00707
real delta = 1. /
inputsize();
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
if(
nhidden>0)
00718 {
00719
00720
00721
fill_random_normal(
w1->value, 0, delta);
00722
if(
direct_in_to_out)
00723 {
00724
00725
fill_random_normal(
wdirect->value, 0, 0.01*delta);
00726
wdirect->matValue(0).clear();
00727 }
00728 delta = 1./
nhidden;
00729
w1->matValue(0).clear();
00730 }
00731
if(
nhidden2>0)
00732 {
00733
00734
00735
fill_random_normal(
w2->value, 0, delta);
00736 delta = 1./
nhidden2;
00737
w2->matValue(0).clear();
00738 }
00739
00740
fill_random_normal(
wout->value, 0, delta);
00741
wout->matValue(0).clear();
00742
00743
00744
if(
optimizer)
00745
optimizer->reset();
00746 }
00747
00749
00751 void NeighborhoodSmoothnessNNet::forget()
00752 {
00753
if (train_set)
initializeParams();
00754 stage = 0;
00755 }
00756
00758
00760 void NeighborhoodSmoothnessNNet::makeDeepCopyFromShallowCopy(
CopiesMap& copies)
00761 {
00762 inherited::makeDeepCopyFromShallowCopy(copies);
00763
deepCopyField(
input, copies);
00764
deepCopyField(
target, copies);
00765
deepCopyField(
sampleweight, copies);
00766
deepCopyField(
w1, copies);
00767
deepCopyField(
w2, copies);
00768
deepCopyField(
wout, copies);
00769
deepCopyField(
wdirect, copies);
00770
deepCopyField(
last_hidden, copies);
00771
deepCopyField(
output, copies);
00772
deepCopyField(
bag_size, copies);
00773
deepCopyField(
bag_inputs, copies);
00774
deepCopyField(
bag_output, copies);
00775
deepCopyField(
bag_hidden, copies);
00776
deepCopyField(
invars_to_training_cost, copies);
00777
00778
deepCopyField(
costs, copies);
00779
deepCopyField(
penalties, copies);
00780
deepCopyField(
training_cost, copies);
00781
deepCopyField(
test_costs, copies);
00782
deepCopyField(
invars, copies);
00783
deepCopyField(
params, copies);
00784
deepCopyField(
paramsvalues, copies);
00785
00786
deepCopyField(
p_ij, copies);
00787
00788
deepCopyField(
f, copies);
00789
deepCopyField(
f_input_to_hidden, copies);
00790
deepCopyField(
test_costf, copies);
00791
deepCopyField(
output_and_target_to_cost, copies);
00792
00793
deepCopyField(
cost_funcs, copies);
00794
00795
deepCopyField(
optimizer, copies);
00796 }
00797
00798 }