PLearn: NeighborhoodSmoothnessNNet.cc Source File

00001 // -*- C++ -*- 00002 00003 // NeighborhoodSmoothnessNNet.cc 00004 // Copyright (c) 1998-2002 Pascal Vincent 00005 // Copyright (C) 1999-2002 Yoshua Bengio and University of Montreal 00006 // Copyright (c) 2002 Jean-Sebastien Senecal, Xavier Saint-Mleux, Rejean Ducharme 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 /* ******************************************************* 00038 * $Id: NeighborhoodSmoothnessNNet.cc,v 1.15 2004/07/21 16:30:56 chrish42 Exp $ 00039 ******************************************************* */ 00040 00044 #include <plearn/var/AffineTransformVariable.h> 00045 #include <plearn/var/AffineTransformWeightPenalty.h> 00046 #include <plearn/var/BinaryClassificationLossVariable.h> 00047 #include <plearn/var/ClassificationLossVariable.h> 00048 #include <plearn/var/ColumnSumVariable.h> 00049 #include <plearn/var/ConcatColumnsVariable.h> 00050 #include <plearn/vmat/ConcatColumnsVMatrix.h> 00051 #include <plearn/var/CrossEntropyVariable.h> 00052 #include <plearn/var/DotProductVariable.h> 00053 #include <plearn/var/ExpVariable.h> 00054 #include <plearn/var/InvertElementsVariable.h> 00055 #include <plearn/var/LogVariable.h> 00056 #include <plearn/var/LiftOutputVariable.h> 00057 #include <plearn/var/LogSoftmaxVariable.h> 00058 #include <plearn/var/MinusVariable.h> 00059 #include <plearn/var/MulticlassLossVariable.h> 00060 #include <plearn/var/NegateElementsVariable.h> 00061 #include <plearn/var/NegCrossEntropySigmoidVariable.h> 00062 #include "NeighborhoodSmoothnessNNet.h" 00063 #include <plearn/var/OneHotSquaredLoss.h> 00064 #include <plearn/base/ProgressBar.h> 00065 #include <plearn/math/random.h> 00066 #include <plearn/var/SigmoidVariable.h> 00067 #include <plearn/var/SoftmaxVariable.h> 00068 #include <plearn/var/SoftplusVariable.h> 00069 #include <plearn/var/SumVariable.h> 00070 #include <plearn/var/SumAbsVariable.h> 00071 #include <plearn/var/SumOfVariable.h> 00072 #include <plearn/var/SumOverBagsVariable.h> 00073 #include <plearn/var/SumSquareVariable.h> 00074 #include <plearn/var/SubMatVariable.h> 00075 #include <plearn/var/SubMatTransposeVariable.h> 00076 #include <plearn/vmat/SubVMatrix.h> 00077 #include <plearn/var/TanhVariable.h> 00078 #include <plearn/var/TimesVariable.h> 00079 #include <plearn/var/TimesScalarVariable.h> 00080 #include <plearn/var/TransposeProductVariable.h> 00081 #include <plearn/var/UnfoldedFuncVariable.h> 00082 #include <plearn/var/UnfoldedSumOfVariable.h> 00083 #include <plearn/var/Var_operators.h> 00084 #include <plearn/var/Var_utils.h> 00085 00086 //#include "DisplayUtils.h" 00087 //#include "GradientOptimizer.h" 00088 00089 namespace PLearn { 00090 using namespace std; 00091 00092 PLEARN_IMPLEMENT_OBJECT(NeighborhoodSmoothnessNNet, 00093 "Feedforward neural network whose hidden units are smoothed according to input neighborhood\n", 00094 "TODO" 00095 ); 00096 00097 NeighborhoodSmoothnessNNet::NeighborhoodSmoothnessNNet() // DEFAULT VALUES FOR ALL OPTIONS 00098 : 00099 test_bag_size(0), 00100 max_n_instances(1), 00101 nhidden(0), 00102 nhidden2(0), 00103 noutputs(0), 00104 sigma_hidden(0.1), 00105 sne_weight(0), 00106 weight_decay(0), 00107 bias_decay(0), 00108 layer1_weight_decay(0), 00109 layer1_bias_decay(0), 00110 layer2_weight_decay(0), 00111 layer2_bias_decay(0), 00112 output_layer_weight_decay(0), 00113 output_layer_bias_decay(0), 00114 direct_in_to_out_weight_decay(0), 00115 L1_penalty(false), 00116 direct_in_to_out(false), 00117 output_transfer_func(""), 00118 interval_minval(0), interval_maxval(1), 00119 batch_size(1) 00120 {} 00121 00122 NeighborhoodSmoothnessNNet::~NeighborhoodSmoothnessNNet() 00123 { 00124 } 00125 00126 void NeighborhoodSmoothnessNNet::declareOptions(OptionList& ol) 00127 { 00128 declareOption(ol, "max_n_instances", &NeighborhoodSmoothnessNNet::max_n_instances, OptionBase::buildoption, 00129 " maximum number of instances (input vectors x_i) allowed\n"); 00130 00131 declareOption(ol, "nhidden", &NeighborhoodSmoothnessNNet::nhidden, OptionBase::buildoption, 00132 " number of hidden units in first hidden layer (0 means no hidden layer)\n"); 00133 00134 declareOption(ol, "nhidden2", &NeighborhoodSmoothnessNNet::nhidden2, OptionBase::buildoption, 00135 " number of hidden units in second hidden layer (0 means no hidden layer)\n"); 00136 00137 declareOption(ol, "sne_weight", &NeighborhoodSmoothnessNNet::sne_weight, OptionBase::buildoption, 00138 " The weight of the SNE cost in the total cost optimized."); 00139 00140 declareOption(ol, "sigma_hidden", &NeighborhoodSmoothnessNNet::sigma_hidden, OptionBase::buildoption, 00141 " The bandwidth of the Gaussian kernel used to compute the similarity\n" 00142 " between hidden layers."); 00143 00144 declareOption(ol, "noutputs", &NeighborhoodSmoothnessNNet::noutputs, OptionBase::buildoption, 00145 " number of output units. This gives this learner its outputsize.\n" 00146 " It is typically of the same dimensionality as the target for regression problems \n" 00147 " But for classification problems where target is just the class number, noutputs is \n" 00148 " usually of dimensionality number of classes (as we want to output a score or probability \n" 00149 " vector, one per class)"); 00150 00151 declareOption(ol, "weight_decay", &NeighborhoodSmoothnessNNet::weight_decay, OptionBase::buildoption, 00152 " global weight decay for all layers\n"); 00153 00154 declareOption(ol, "bias_decay", &NeighborhoodSmoothnessNNet::bias_decay, OptionBase::buildoption, 00155 " global bias decay for all layers\n"); 00156 00157 declareOption(ol, "layer1_weight_decay", &NeighborhoodSmoothnessNNet::layer1_weight_decay, OptionBase::buildoption, 00158 " Additional weight decay for the first hidden layer. Is added to weight_decay.\n"); 00159 declareOption(ol, "layer1_bias_decay", &NeighborhoodSmoothnessNNet::layer1_bias_decay, OptionBase::buildoption, 00160 " Additional bias decay for the first hidden layer. Is added to bias_decay.\n"); 00161 00162 declareOption(ol, "layer2_weight_decay", &NeighborhoodSmoothnessNNet::layer2_weight_decay, OptionBase::buildoption, 00163 " Additional weight decay for the second hidden layer. Is added to weight_decay.\n"); 00164 00165 declareOption(ol, "layer2_bias_decay", &NeighborhoodSmoothnessNNet::layer2_bias_decay, OptionBase::buildoption, 00166 " Additional bias decay for the second hidden layer. Is added to bias_decay.\n"); 00167 00168 declareOption(ol, "output_layer_weight_decay", &NeighborhoodSmoothnessNNet::output_layer_weight_decay, OptionBase::buildoption, 00169 " Additional weight decay for the output layer. Is added to 'weight_decay'.\n"); 00170 00171 declareOption(ol, "output_layer_bias_decay", &NeighborhoodSmoothnessNNet::output_layer_bias_decay, OptionBase::buildoption, 00172 " Additional bias decay for the output layer. Is added to 'bias_decay'.\n"); 00173 00174 declareOption(ol, "direct_in_to_out_weight_decay", &NeighborhoodSmoothnessNNet::direct_in_to_out_weight_decay, OptionBase::buildoption, 00175 " Additional weight decay for the direct in-to-out layer. Is added to 'weight_decay'.\n"); 00176 00177 declareOption(ol, "L1_penalty", &NeighborhoodSmoothnessNNet::L1_penalty, OptionBase::buildoption, 00178 " should we use L1 penalty instead of the default L2 penalty on the weights?\n"); 00179 00180 declareOption(ol, "direct_in_to_out", &NeighborhoodSmoothnessNNet::direct_in_to_out, OptionBase::buildoption, 00181 " should we include direct input to output connections?\n"); 00182 00183 declareOption(ol, "output_transfer_func", &NeighborhoodSmoothnessNNet::output_transfer_func, OptionBase::buildoption, 00184 " what transfer function to use for ouput layer? \n" 00185 " one of: tanh, sigmoid, exp, softplus, softmax \n" 00186 " or interval(<minval>,<maxval>), which stands for\n" 00187 " <minval>+(<maxval>-<minval>)*sigmoid(.).\n" 00188 " An empty string or \"none\" means no output transfer function \n"); 00189 00190 declareOption(ol, "cost_funcs", &NeighborhoodSmoothnessNNet::cost_funcs, OptionBase::buildoption, 00191 " a list of cost functions to use\n" 00192 " in the form \"[ cf1; cf2; cf3; ... ]\" where each function is one of: \n" 00193 " mse (for regression)\n" 00194 " mse_onehot (for classification)\n" 00195 " NLL (negative log likelihood -log(p[c]) for classification) \n" 00196 " class_error (classification error) \n" 00197 " binary_class_error (classification error for a 0-1 binary classifier)\n" 00198 " multiclass_error\n" 00199 " cross_entropy (for binary classification)\n" 00200 " stable_cross_entropy (more accurate backprop and possible regularization, for binary classification)\n" 00201 " lift_output (not a real cost function, just the output for lift computation)\n" 00202 " The first function of the list will be used as \n" 00203 " the objective function to optimize \n" 00204 " (possibly with an added weight decay penalty) \n"); 00205 00206 declareOption(ol, "classification_regularizer", &NeighborhoodSmoothnessNNet::classification_regularizer, OptionBase::buildoption, 00207 " used only in the stable_cross_entropy cost function, to fight overfitting (0<=r<1)\n"); 00208 00209 declareOption(ol, "optimizer", &NeighborhoodSmoothnessNNet::optimizer, OptionBase::buildoption, 00210 " specify the optimizer to use\n"); 00211 00212 declareOption(ol, "batch_size", &NeighborhoodSmoothnessNNet::batch_size, OptionBase::buildoption, 00213 " how many samples to use to estimate the avergage gradient before updating the weights\n" 00214 " 0 is equivalent to specifying training_set->n_non_missing_rows() \n"); 00215 // TODO Not really, since the matrix given typically has much more rows (KNNVMatrix) than input samples. 00216 00217 declareOption(ol, "paramsvalues", &NeighborhoodSmoothnessNNet::paramsvalues, OptionBase::learntoption, 00218 " The learned parameter vector\n"); 00219 00220 inherited::declareOptions(ol); 00221 00222 } 00223 00225 // build // 00227 void NeighborhoodSmoothnessNNet::build() 00228 { 00229 inherited::build(); 00230 build_(); 00231 } 00232 00234 // build_ // 00236 void NeighborhoodSmoothnessNNet::build_() 00237 { 00238 /* 00239 * Create Topology Var Graph 00240 */ 00241 00242 // Don't do anything if we don't have a train_set 00243 // It's the only one who knows the inputsize and targetsize anyway... 00244 00245 if(inputsize_>=0 && targetsize_>=0 && weightsize_>=0) 00246 { 00247 00248 // init. basic vars 00249 int true_inputsize = inputsize(); // inputsize is now true inputsize 00250 bag_inputs = Var(max_n_instances, inputsize() + 1); 00251 // The input (with pij) is the first column of the bag inputs. 00252 Var input_and_pij = subMat(bag_inputs, 0, 0, 1, bag_inputs->width()); 00253 input = new SubMatTransposeVariable(input_and_pij, 0, 0, 1, true_inputsize); 00254 output = input; 00255 params.resize(0); 00256 00257 // first hidden layer 00258 if(nhidden>0) 00259 { 00260 w1 = Var(1 + true_inputsize, nhidden, "w1"); 00261 output = tanh(affine_transform(output,w1)); 00262 params.append(w1); 00263 last_hidden = output; 00264 } 00265 00266 // second hidden layer 00267 if(nhidden2>0) 00268 { 00269 w2 = Var(1+nhidden, nhidden2, "w2"); 00270 output = tanh(affine_transform(output,w2)); 00271 params.append(w2); 00272 last_hidden = output; 00273 } 00274 00275 if (nhidden==0) 00276 PLERROR("NeighborhoodSmoothnessNNet:: there must be hidden units!",nhidden2); 00277 00278 00279 // output layer before transfer function 00280 00281 wout = Var(1+output->size(), outputsize(), "wout"); 00282 output = affine_transform(output,wout); 00283 params.append(wout); 00284 00285 // direct in-to-out layer 00286 if(direct_in_to_out) 00287 { 00288 wdirect = Var(true_inputsize, outputsize(), "wdirect"); 00289 output += transposeProduct(wdirect, input); 00290 params.append(wdirect); 00291 } 00292 00293 Var before_transfer_func = output; 00294 00295 /* 00296 * output_transfer_func 00297 */ 00298 unsigned int p=0; 00299 if(output_transfer_func!="" && output_transfer_func!="none") 00300 { 00301 if(output_transfer_func=="tanh") 00302 output = tanh(output); 00303 else if(output_transfer_func=="sigmoid") 00304 output = sigmoid(output); 00305 else if(output_transfer_func=="softplus") 00306 output = softplus(output); 00307 else if(output_transfer_func=="exp") 00308 output = exp(output); 00309 else if(output_transfer_func=="softmax") 00310 output = softmax(output); 00311 else if (output_transfer_func == "log_softmax") 00312 output = log_softmax(output); 00313 else if ((p=output_transfer_func.find("interval"))!=string::npos) 00314 { 00315 unsigned int q = output_transfer_func.find(","); 00316 interval_minval = atof(output_transfer_func.substr(p+1,q-(p+1)).c_str()); 00317 unsigned int r = output_transfer_func.find(")"); 00318 interval_maxval = atof(output_transfer_func.substr(q+1,r-(q+1)).c_str()); 00319 output = interval_minval + (interval_maxval - interval_minval)*sigmoid(output); 00320 } 00321 else 00322 PLERROR("In NNet::build_() unknown output_transfer_func option: %s",output_transfer_func.c_str()); 00323 } 00324 00325 /* 00326 * target and weights 00327 */ 00328 00329 target = Var(targetsize()-1, "target"); 00330 00331 if(weightsize_>0) 00332 { 00333 if (weightsize_!=1) 00334 PLERROR("NeighborhoodSmoothnessNNet: expected weightsize to be 1 or 0 (or unspecified = -1, meaning 0), got %d",weightsize_); 00335 sampleweight = Var(1, "weight"); 00336 } 00337 00338 // create penalties 00339 penalties.resize(0); // prevents penalties from being added twice by consecutive builds 00340 if(w1 && ((layer1_weight_decay + weight_decay)!=0 || (layer1_bias_decay + bias_decay)!=0)) 00341 penalties.append(affine_transform_weight_penalty(w1, (layer1_weight_decay + weight_decay), (layer1_bias_decay + bias_decay), L1_penalty)); 00342 if(w2 && ((layer2_weight_decay + weight_decay)!=0 || (layer2_bias_decay + bias_decay)!=0)) 00343 penalties.append(affine_transform_weight_penalty(w2, (layer2_weight_decay + weight_decay), (layer2_bias_decay + bias_decay), L1_penalty)); 00344 if(wout && ((output_layer_weight_decay + weight_decay)!=0 || (output_layer_bias_decay + bias_decay)!=0)) 00345 penalties.append(affine_transform_weight_penalty(wout, (output_layer_weight_decay + weight_decay), 00346 (output_layer_bias_decay + bias_decay), L1_penalty)); 00347 if(wdirect && (direct_in_to_out_weight_decay + weight_decay) != 0) 00348 { 00349 if (L1_penalty) 00350 penalties.append(sumabs(wdirect)*(direct_in_to_out_weight_decay + weight_decay)); 00351 else 00352 penalties.append(sumsquare(wdirect)*(direct_in_to_out_weight_decay + weight_decay)); 00353 } 00354 00355 // Shared values hack... 00356 if(paramsvalues && (paramsvalues.size() == params.nelems())) 00357 params << paramsvalues; 00358 else 00359 { 00360 paramsvalues.resize(params.nelems()); 00361 initializeParams(); 00362 } 00363 params.makeSharedValue(paramsvalues); 00364 00365 output->setName("element output"); 00366 00367 f = Func(input, output); 00368 f_input_to_hidden = Func(input, last_hidden); 00369 00370 /* 00371 * costfuncs 00372 */ 00373 00374 bag_size = Var(1,1); 00375 bag_hidden = unfoldedFunc(subMat(bag_inputs, 0, 0, bag_inputs.length(), true_inputsize), f_input_to_hidden, false); 00376 p_ij = subMat(bag_inputs, 1, true_inputsize, bag_inputs->length() - 1, 1); 00377 00378 // The q_ij function. 00379 Var hidden_0 = new SubMatTransposeVariable(bag_hidden, 0, 0, 1, bag_hidden->width()); 00380 Var store_hidden(last_hidden.length(), last_hidden.width()); 00381 Var hidden_0_minus_hidden = minus(hidden_0, store_hidden); 00382 Var k_hidden = 00383 exp( 00384 timesScalar( 00385 dot(hidden_0_minus_hidden, hidden_0_minus_hidden), 00386 var(- 1 / (sigma_hidden * sigma_hidden)) 00387 ) 00388 ); 00389 Func f_hidden_to_k_hidden(store_hidden, k_hidden); 00390 Var k_hidden_all = 00391 unfoldedFunc( 00392 subMat( 00393 bag_hidden, 1, 0, bag_hidden->length() - 1, bag_hidden->width() 00394 ), 00395 f_hidden_to_k_hidden, 00396 false 00397 ); 00398 Var one_over_sum_of_k_hidden = invertElements(sum(k_hidden_all)); 00399 Var log_q_ij = log(timesScalar(k_hidden_all, one_over_sum_of_k_hidden)); 00400 Var minus_weight_sum_p_ij_log_q_ij = 00401 timesScalar(sum(times(p_ij, log_q_ij)), var(-sne_weight)); 00402 00403 int ncosts = cost_funcs.size(); 00404 if(ncosts<=0) 00405 PLERROR("In NNet::build_() Empty cost_funcs : must at least specify the cost function to optimize!"); 00406 costs.resize(ncosts); 00407 00408 for(int k=0; k<ncosts; k++) 00409 { 00410 // create costfuncs and apply individual weights if weightpart > 1 00411 if(cost_funcs[k]=="mse") 00412 costs[k]= sumsquare(output-target); 00413 else if(cost_funcs[k]=="mse_onehot") 00414 costs[k] = onehot_squared_loss(output, target); 00415 else if(cost_funcs[k]=="NLL") 00416 { 00417 if (output->size() == 1) { 00418 // Assume sigmoid output here! 00419 costs[k] = cross_entropy(output, target); 00420 } else { 00421 if (output_transfer_func == "log_softmax") 00422 costs[k] = -output[target]; 00423 else 00424 costs[k] = neg_log_pi(output, target); 00425 } 00426 } 00427 else if(cost_funcs[k]=="class_error") 00428 costs[k] = classification_loss(output, target); 00429 else if(cost_funcs[k]=="binary_class_error") 00430 costs[k] = binary_classification_loss(output, target); 00431 else if(cost_funcs[k]=="multiclass_error") 00432 costs[k] = multiclass_loss(output, target); 00433 else if(cost_funcs[k]=="cross_entropy") 00434 costs[k] = cross_entropy(output, target); 00435 else if (cost_funcs[k]=="stable_cross_entropy") { 00436 Var c = stable_cross_entropy(before_transfer_func, target); 00437 costs[k] = c; 00438 if (classification_regularizer) { 00439 // There is a regularizer to add to the cost function. 00440 dynamic_cast<NegCrossEntropySigmoidVariable*>((Variable*) c)-> 00441 setRegularizer(classification_regularizer); 00442 } 00443 } 00444 else if (cost_funcs[k]=="lift_output") 00445 costs[k] = lift_output(output, target); 00446 else // Assume we got a Variable name and its options 00447 { 00448 costs[k]= dynamic_cast<Variable*>(newObject(cost_funcs[k])); 00449 if(costs[k].isNull()) 00450 PLERROR("In NNet::build_() unknown cost_func option: %s",cost_funcs[k].c_str()); 00451 costs[k]->setParents(output & target); 00452 costs[k]->build(); 00453 } 00454 00455 // take into account the sampleweight 00456 //if(sampleweight) 00457 // costs[k]= costs[k] * sampleweight; // NO, because this is taken into account (more properly) in stats->update 00458 } 00459 00460 test_costs = hconcat(costs); 00461 00462 // Apply penalty to cost. 00463 // If there is no penalty, we still add costs[0] as the first cost, in 00464 // order to keep the same number of costs as if there was a penalty. 00465 Var test_costs_final = test_costs; 00466 Var first_cost_final = costs[0]; 00467 if (penalties.size() != 0) { 00468 first_cost_final = sum(hconcat(first_cost_final & penalties)); 00469 } 00470 if (weightsize_ > 0) { 00471 test_costs_final = sampleweight * test_costs; 00472 first_cost_final = sampleweight * first_cost_final; 00473 } 00474 // We add the SNE cost. 00475 // TODO Make sure we optimize the training cost. 00476 // TODO Actually maybe we should put this before multiplying by sampleweight. 00477 first_cost_final = first_cost_final + minus_weight_sum_p_ij_log_q_ij; 00478 00479 training_cost = hconcat(first_cost_final & test_costs_final); 00480 00481 /* if(penalties.size() != 0) { 00482 if (weightsize_>0) 00483 // only multiply by sampleweight if there are weights 00484 training_cost = hconcat(sampleweight*sum(hconcat(costs[0] & penalties)) 00485 & (test_costs*sampleweight)); 00486 else { 00487 training_cost = hconcat(sum(hconcat(costs[0] & penalties)) & test_costs); 00488 } 00489 } 00490 else { 00491 if(weightsize_>0) { 00492 // only multiply by sampleweight if there are weights 00493 training_cost = hconcat(costs[0]*sampleweight & test_costs*sampleweight); 00494 } else { 00495 training_cost = hconcat(costs[0] & test_costs); 00496 } 00497 } */ 00498 00499 training_cost->setName("training_cost"); 00500 test_costs->setName("test_costs"); 00501 00502 if (weightsize_ > 0) { 00503 invars = bag_inputs & bag_size & target & sampleweight; 00504 } else { 00505 invars = bag_inputs & bag_size & target; 00506 } 00507 invars_to_training_cost = Func(invars, training_cost); 00508 00509 invars_to_training_cost->recomputeParents(); 00510 00511 // Other funcs. 00512 VarArray outvars; 00513 VarArray testinvars; 00514 testinvars.push_back(input); 00515 outvars.push_back(output); 00516 testinvars.push_back(target); 00517 outvars.push_back(target); 00518 00519 test_costf = Func(testinvars, output&test_costs); 00520 test_costf->recomputeParents(); 00521 output_and_target_to_cost = Func(outvars, test_costs); 00522 output_and_target_to_cost->recomputeParents(); 00523 00524 } 00525 } 00526 00528 // outputsize // 00530 int NeighborhoodSmoothnessNNet::outputsize() const 00531 { return noutputs; } 00532 00534 // getTrainCostNames // 00536 TVec<string> NeighborhoodSmoothnessNNet::getTrainCostNames() const 00537 { 00538 return (cost_funcs[0]+"+penalty+SNE") & cost_funcs; 00539 } 00540 00542 // getTestCostNames // 00544 TVec<string> NeighborhoodSmoothnessNNet::getTestCostNames() const 00545 { 00546 return cost_funcs; 00547 } 00548 00549 void NeighborhoodSmoothnessNNet::setTrainingSet(VMat training_set, bool call_forget) 00550 { 00551 // YB: je ne suis pas sur qu'il soit necessaire de faire un build si la LONGUEUR du train_set a change? 00552 // les methodes non-parametriques qui utilisent la longueur devrait faire leur "resize" dans train, pas dans build. 00553 bool training_set_has_changed = 00554 !train_set 00555 || train_set->width() != training_set->width() 00556 || train_set->length() != training_set->length() 00557 || train_set->inputsize() != training_set->inputsize() 00558 || train_set->weightsize() != training_set->weightsize() 00559 || train_set->targetsize() != training_set->targetsize(); 00560 train_set = training_set; 00561 00562 if (training_set_has_changed && inputsize_<0) 00563 { 00564 inputsize_ = train_set->inputsize()-1; 00565 targetsize_ = train_set->targetsize(); 00566 weightsize_ = train_set->weightsize(); 00567 } else if (train_set->inputsize() != training_set->inputsize()) { 00568 PLERROR("In NeighborhoodSmoothnessNNet::setTrainingSet - You can't change the inputsize of the training set"); 00569 } 00570 if (training_set_has_changed || call_forget) 00571 build(); // MODIF FAITE PAR YOSHUA: sinon apres un setTrainingSet le build n'est pas complete dans un NNet train_set = training_set; 00572 if (call_forget) 00573 forget(); 00574 } 00575 00577 // train // 00579 void NeighborhoodSmoothnessNNet::train() 00580 { 00581 // NeighborhoodSmoothnessNNet nstages is number of epochs (whole passages through the training set) 00582 // while optimizer nstages is number of weight updates. 00583 // So relationship between the 2 depends whether we are in stochastic, batch or minibatch mode 00584 00585 if(!train_set) 00586 PLERROR("In NeighborhoodSmoothnessNNet::train, you did not setTrainingSet"); 00587 00588 if(!train_stats) 00589 PLERROR("In NeighborhoodSmoothnessNNet::train, you did not setTrainStatsCollector"); 00590 00591 if(f.isNull()) // Net has not been properly built yet (because build was called before the learner had a proper training set) 00592 build(); 00593 00594 int n_bags = -1; 00595 // We must count the nb of bags in the training set. 00596 { 00597 n_bags=0; 00598 int l = train_set->length(); 00599 ProgressBar* pb = 0; 00600 if(report_progress) 00601 pb = new ProgressBar("Counting nb bags in train_set for NeighborhoodSmoothnessNNet", l); 00602 Vec row(train_set->width()); 00603 int tag_column = train_set->inputsize() + train_set->targetsize() - 1; 00604 for (int i=0;i<l;i++) { 00605 train_set->getRow(i,row); 00606 if (int(row[tag_column]) & SumOverBagsVariable::TARGET_COLUMN_FIRST) { 00607 // Indicates the beginning of a new bag. 00608 n_bags++; 00609 } 00610 if(pb) 00611 pb->update(i); 00612 } 00613 if(pb) 00614 delete pb; 00615 } 00616 00617 int true_batch_size = batch_size; 00618 if (true_batch_size <= 0) { 00619 // The real batch size is actually the number of bags in the training set. 00620 true_batch_size = n_bags; 00621 } 00622 00623 // We can now compute the total cost. 00624 Var totalcost = sumOverBags(train_set, invars_to_training_cost, max_n_instances, true_batch_size, true); 00625 00626 // Number of optimizer stages corresponding to one learner stage (one epoch). 00627 int optstage_per_lstage = 0; 00628 if (batch_size<=0) { 00629 optstage_per_lstage = 1; 00630 } else { 00631 optstage_per_lstage = n_bags/batch_size; 00632 } 00633 00634 if(optimizer) { 00635 optimizer->setToOptimize(params, totalcost); 00636 optimizer->build(); 00637 } 00638 00639 ProgressBar* pb = 0; 00640 if(report_progress) 00641 pb = new ProgressBar("Training NeighborhoodSmoothnessNNet from stage " + tostring(stage) + " to " + tostring(nstages), nstages-stage); 00642 00643 int initial_stage = stage; 00644 bool early_stop=false; 00645 while(stage<nstages && !early_stop) 00646 { 00647 optimizer->nstages = optstage_per_lstage; 00648 train_stats->forget(); 00649 optimizer->early_stop = false; 00650 optimizer->optimizeN(*train_stats); 00651 train_stats->finalize(); 00652 if(verbosity>2) 00653 cout << "Epoch " << stage << " train objective: " << train_stats->getMean() << endl; 00654 ++stage; 00655 if(pb) 00656 pb->update(stage-initial_stage); 00657 } 00658 if(verbosity>1) 00659 cout << "EPOCH " << stage << " train objective: " << train_stats->getMean() << endl; 00660 00661 if(pb) 00662 delete pb; 00663 00664 // TODO Not sure if this is needed, but just in case... 00665 output_and_target_to_cost->recomputeParents(); 00666 test_costf->recomputeParents(); 00667 00668 } 00669 00671 // computeOutput // 00673 void NeighborhoodSmoothnessNNet::computeOutput( 00674 const Vec& inputv, Vec& outputv) const 00675 { 00676 f->fprop(inputv,outputv); 00677 } 00678 00680 // computeOutputAndCosts // 00682 void NeighborhoodSmoothnessNNet::computeOutputAndCosts( 00683 const Vec& inputv, const Vec& targetv, Vec& outputv, Vec& costsv) const 00684 { 00685 test_costf->fprop(inputv&targetv, outputv&costsv); 00686 } 00687 00689 // computeCostsFromOutputs // 00691 void NeighborhoodSmoothnessNNet::computeCostsFromOutputs( 00692 const Vec& inputv, const Vec& outputv, const Vec& targetv, Vec& costsv) const 00693 { 00694 output_and_target_to_cost->fprop(outputv&targetv, costsv); 00695 } 00696 00698 // initializeParams // 00700 void NeighborhoodSmoothnessNNet::initializeParams() 00701 { 00702 if (seed_>=0) 00703 manual_seed(seed_); 00704 else 00705 PLearn::seed(); 00706 00707 real delta = 1. / inputsize(); 00708 00709 /* 00710 if(direct_in_to_out) 00711 { 00712 //fill_random_uniform(wdirect->value, -delta, +delta); 00713 fill_random_normal(wdirect->value, 0, delta); 00714 //wdirect->matValue(0).clear(); 00715 } 00716 */ 00717 if(nhidden>0) 00718 { 00719 //fill_random_uniform(w1->value, -delta, +delta); 00720 //delta = 1./sqrt(nhidden); 00721 fill_random_normal(w1->value, 0, delta); 00722 if(direct_in_to_out) 00723 { 00724 //fill_random_uniform(wdirect->value, -delta, +delta); 00725 fill_random_normal(wdirect->value, 0, 0.01*delta); 00726 wdirect->matValue(0).clear(); 00727 } 00728 delta = 1./nhidden; 00729 w1->matValue(0).clear(); 00730 } 00731 if(nhidden2>0) 00732 { 00733 //fill_random_uniform(w2->value, -delta, +delta); 00734 //delta = 1./sqrt(nhidden2); 00735 fill_random_normal(w2->value, 0, delta); 00736 delta = 1./nhidden2; 00737 w2->matValue(0).clear(); 00738 } 00739 //fill_random_uniform(wout->value, -delta, +delta); 00740 fill_random_normal(wout->value, 0, delta); 00741 wout->matValue(0).clear(); 00742 00743 // Reset optimizer 00744 if(optimizer) 00745 optimizer->reset(); 00746 } 00747 00749 // forget // 00751 void NeighborhoodSmoothnessNNet::forget() 00752 { 00753 if (train_set) initializeParams(); 00754 stage = 0; 00755 } 00756 00758 // makeDeepCopyFromShallowCopy // 00760 void NeighborhoodSmoothnessNNet::makeDeepCopyFromShallowCopy(CopiesMap& copies) 00761 { 00762 inherited::makeDeepCopyFromShallowCopy(copies); 00763 deepCopyField(input, copies); 00764 deepCopyField(target, copies); 00765 deepCopyField(sampleweight, copies); 00766 deepCopyField(w1, copies); 00767 deepCopyField(w2, copies); 00768 deepCopyField(wout, copies); 00769 deepCopyField(wdirect, copies); 00770 deepCopyField(last_hidden, copies); 00771 deepCopyField(output, copies); 00772 deepCopyField(bag_size, copies); 00773 deepCopyField(bag_inputs, copies); 00774 deepCopyField(bag_output, copies); 00775 deepCopyField(bag_hidden, copies); 00776 deepCopyField(invars_to_training_cost, copies); 00777 00778 deepCopyField(costs, copies); 00779 deepCopyField(penalties, copies); 00780 deepCopyField(training_cost, copies); 00781 deepCopyField(test_costs, copies); 00782 deepCopyField(invars, copies); 00783 deepCopyField(params, copies); 00784 deepCopyField(paramsvalues, copies); 00785 00786 deepCopyField(p_ij, copies); 00787 00788 deepCopyField(f, copies); 00789 deepCopyField(f_input_to_hidden, copies); 00790 deepCopyField(test_costf, copies); 00791 deepCopyField(output_and_target_to_cost, copies); 00792 00793 deepCopyField(cost_funcs, copies); 00794 00795 deepCopyField(optimizer, copies); 00796 } 00797 00798 } // end of namespace PLearn