00001 // -*- C++ -*- 00002 00003 // NeuralNet.h 00004 // Copyright (c) 1998-2002 Pascal Vincent 00005 // Copyright (C) 1999-2002 Yoshua Bengio and University of Montreal 00006 // Copyright (c) 2002 Jean-Sebastien Senecal, Xavier Saint-Mleux, Rejean Ducharme 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 /* ******************************************************* 00038 * $Id: NeuralNet.h,v 1.10 2004/07/21 16:30:56 chrish42 Exp $ 00039 ******************************************************* */ 00040 00043 #ifndef NeuralNet_INC 00044 #define NeuralNet_INC 00045 00046 #include "Learner.h" 00047 #include <plearn/opt/Optimizer.h> 00048 //#include "Var_all.h" 00049 00050 namespace PLearn { 00051 using namespace std; 00052 00053 class NeuralNet: public Learner 00054 { 00055 protected: 00056 Var input; // Var(inputsize()) 00057 Var target; // Var(targetsize()-weightsize()) 00058 Var costweights; // Var(weightsize()) 00059 Var target_and_weights;// hconcat(target&costweights) 00060 Var w1; // bias and weights of first hidden layer 00061 Var w2; // bias and weights of second hidden layer 00062 Var wout; // bias and weights of output layer 00063 Var wdirect; // bias and weights for direct in-to-out connection 00064 00065 Var output; 00066 VarArray costs; // al costs of interest 00067 Var cost; // cost for one (input,target) 00068 00069 VarArray params; // all arameter input vars 00070 00071 Vec paramsvalues; // values of all parameters 00072 Vec initial_paramsvalues; 00073 00074 public: 00075 Func f; // input -> output 00076 Func costf; // input & target -> output & cost 00077 Func output_and_target_to_cost; // output & target -> cost 00078 00079 public: 00080 00081 typedef Learner inherited; 00082 00083 // Build options inherited from learner: 00084 // inputsize, outputszie, targetsize, experiment_name, save_at_every_epoch 00085 00086 // Build options: 00087 int nhidden; // number of hidden units in first hidden layer (default:0) 00088 int nhidden2; // number of hidden units in second hidden layer (default:0) 00089 00090 real weight_decay; // default: 0 00091 real bias_decay; // default: 0 00092 real layer1_weight_decay; // default: MISSING_VALUE 00093 real layer1_bias_decay; // default: MISSING_VALUE 00094 real layer2_weight_decay; // default: MISSING_VALUE 00095 real layer2_bias_decay; // default: MISSING_VALUE 00096 real output_layer_weight_decay; // default: MISSING_VALUE 00097 real output_layer_bias_decay; // default: MISSING_VALUE 00098 real direct_in_to_out_weight_decay; // default: MISSING_VALUE 00099 00100 bool global_weight_decay; // default: false 00101 bool direct_in_to_out; // should we include direct input to output connecitons? default: false 00102 string output_transfer_func; // tanh, sigmoid, softplus, softmax (default: "" means no transfer function) 00103 int iseed; // seed for random nb generator, -1 = use time of day 00104 00106 // where the cost functions can be one of mse, mse_onehot, NLL, 00107 // class_error or multiclass_error (no default), or semisupervised_prob_class 00108 // (if the latter, then the semisupervised_flatten_factor option must be set). 00109 Array<string> cost_funcs; 00110 real semisupervised_flatten_factor; // see SemiSupervisedProbClassCostVariable for explanations 00111 Vec semisupervised_prior; // see SemiSupervisedProbClassCostVariable for explanations 00112 00113 // Build options related to the optimization: 00114 PP<Optimizer> optimizer; // the optimizer to use (no default) 00115 00116 int batch_size; // how many samples to use to estimate gradient before an update 00117 // 0 means the whole training set (default: 1) 00118 00119 int nepochs; // how many times the optimizer gets to see the training set (default: 10000). 00120 00121 string saveparams; // where to optionally save params after training 00122 00123 Array<Vec> normalization; 00124 private: 00125 void build_(); 00126 00127 public: 00128 00129 NeuralNet(); 00130 virtual ~NeuralNet(); 00131 PLEARN_DECLARE_OBJECT(NeuralNet); 00132 00133 // Learner methods 00134 00135 virtual void build(); 00136 virtual void forget(); // simply calls initializeParams() 00137 void initializeParams(); 00138 00139 virtual void train(VMat training_set); 00140 virtual void use(const Vec& inputvec, Vec& prediction); 00141 00142 virtual int costsize() const; 00143 virtual Array<string> costNames() const; 00144 virtual Array<string> testResultsNames() { return costNames(); } 00145 virtual void useAndCost(const Vec& inputvec, const Vec& targetvec, 00146 Vec outputvec, Vec costvec); 00147 void computeCost(const Vec& inputvec, const Vec& targetvec, 00148 const Vec& outputvec, const Vec& costvec); 00149 00150 virtual void makeDeepCopyFromShallowCopy(CopiesMap &copies); 00151 protected: 00152 static void declareOptions(OptionList& ol); 00153 00154 }; 00155 00156 DECLARE_OBJECT_PTR(NeuralNet); 00157 00158 } // end of namespace PLearn 00159 00160 #endif 00161