00001 // -*- C++ -*- 00002 00003 // ConditionalDensityNet.h 00004 // 00005 // Copyright (C) 2004 Université de Montréal 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: ConditionalDensityNet.h,v 1.34 2004/07/21 16:30:55 chrish42 Exp $ 00037 ******************************************************* */ 00038 00039 // Authors: Yoshua Bengio 00040 00044 #ifndef ConditionalDensityNet_INC 00045 #define ConditionalDensityNet_INC 00046 00047 #include "PDistribution.h" 00048 #include <plearn/opt/Optimizer.h> 00049 00050 namespace PLearn { 00051 using namespace std; 00052 00053 class ConditionalDensityNet: public PDistribution 00054 { 00055 00056 private: 00057 00058 typedef PDistribution inherited; 00059 00060 protected: 00061 00062 // ********************* 00063 // * protected options * 00064 // ********************* 00065 00066 Var input; // Var(inputsize()) 00067 Var target; // Var(targetsize()-weightsize()) 00068 Var sampleweight; // Var(1) if train_set->hasWeights() 00069 Var w1; // bias and weights of first hidden layer 00070 Var w2; // bias and weights of second hidden layer 00071 Var wout; // bias and weights of output layer 00072 Var wdirect; // bias and weights for direct in-to-out connection 00073 00074 Var output; // output layer contains the parameters of the distribution: 00075 Var outputs; // contains the result of computeOutput, e.g. expectation, or cdf curve 00076 Var a, pos_a; // output parameter, scalar constant part 00077 Var b, pos_b; // output parameters, step height parameters 00078 Var c, pos_c; // output parameters, step smoothing parameters 00079 Var density; 00080 Var cumulative; 00081 Var expected_value; 00082 00083 VarArray costs; // all costs of interest 00084 VarArray penalties; 00085 Var training_cost; // weighted scalar costs[0] including penalties 00086 Var test_costs; // hconcat(costs) 00087 00088 VarArray invars; 00089 VarArray params; // all arameter input vars 00090 00091 public : 00092 00093 Vec paramsvalues; // values of all parameters 00094 00095 protected: 00096 00097 Var centers, centers_M, steps, steps_M, steps_0, steps_gradient, steps_integral, delta_steps, cum_numerator, cum_denominator; 00098 00099 // the cond. distribution step multiplicative parameters 00100 // are relative to the unconditional cdf step heights, at the mu positions, contained in this source var 00101 // (computed at the beginning of training). 00102 Vec unconditional_cdf; 00103 // unconditional_cdf[i] - unconditional_cdf[i-1], for use to scale the steps of the cdf 00104 Var unconditional_delta_cdf; 00105 00106 // coefficients that scale the pos_c, = initial_hardness/(mu[i]-mu[i-1]) 00107 Var initial_hardnesses; 00108 00109 // for debugging 00110 Var prev_centers, prev_centers_M, scaled_prev_centers, 00111 scaled_prev_centers_M, minus_prev_centers_0, minus_scaled_prev_centers_0; 00112 00113 public: 00114 00115 VarArray y_values; // values at which output probability curve is sampled 00116 Var mu; // output parameters, step location parameters 00117 mutable Func f; // input -> output 00118 mutable Func test_costf; // input & target -> output & test_costs 00119 mutable Func output_and_target_to_cost; // output & target -> cost 00120 00121 mutable Func cdf_f; // target -> cumulative 00122 mutable Func mean_f; // output -> expected value 00123 mutable Func density_f; // target -> density 00124 mutable Func in2distr_f; // input -> parameters of output distr 00125 VarArray output_and_target; 00126 Vec output_and_target_values; 00127 Var totalcost; 00128 Var mass_cost; 00129 Var pos_y_cost; 00130 00131 // ************************ 00132 // * public build options * 00133 // ************************ 00134 00135 // ***** OPTIONS PASTED FROM NNET ************** 00136 00137 int nhidden; // number of hidden units in first hidden layer (default:0) 00138 int nhidden2; // number of hidden units in second hidden layer (default:0) 00139 00140 real weight_decay; // default: 0 00141 real bias_decay; // default: 0 00142 real layer1_weight_decay; // default: MISSING_VALUE 00143 real layer1_bias_decay; // default: MISSING_VALUE 00144 real layer2_weight_decay; // default: MISSING_VALUE 00145 real layer2_bias_decay; // default: MISSING_VALUE 00146 real output_layer_weight_decay; // default: MISSING_VALUE 00147 real output_layer_bias_decay; // default: MISSING_VALUE 00148 real direct_in_to_out_weight_decay; // default: MISSING_VALUE 00149 00150 bool L1_penalty; // default: false 00151 bool direct_in_to_out; // should we include direct input to output connecitons? default: false 00152 00153 // Build options related to the optimization: 00154 PP<Optimizer> optimizer; // the optimizer to use (no default) 00155 00156 int batch_size; // how many samples to use to estimate gradient before an update 00157 // 0 means the whole training set (default: 1) 00158 00159 // ***** OPTIONS SPECIFIC TO CONDITIONALDENSITYNET ************** 00160 00161 real c_penalization; 00162 00163 // maximum value that Y can take (minimum value is 0 by default). 00164 real maxY; 00165 00166 // threshold value of Y for which we might want to compute P(Y>thresholdY), with outputs_def='t' 00167 real thresholdY; 00168 00169 // this weight between 0 and 1 controls the balance of the cost function 00170 // between minimizing the negative log-likelihood and minimizing squared error 00171 // if 1 then perform maximum likelihood, if 0 then perform least square optimization 00172 real log_likelihood_vs_squared_error_balance; 00173 00174 // whether to model the mass point with a separate parameter 00175 bool separate_mass_point; 00176 00177 // number of terms in the output density function 00178 int n_output_density_terms; 00179 00180 real generate_precision; 00181 00182 // the type of steps used to build the cumulative 00183 // allowed values are: 00184 // - sigmoid_steps: g(y,theta,i) = sigmoid(s(c_i)*(y-mu_i))\n" 00185 // - sloped_steps: g(y,theta,i) = s(s(c_i)*(mu_i-y))-s(s(c_i)*(mu_i-y))\n" 00186 string steps_type; 00187 00188 // how to initialize the mu_i and how to select the curve points: 00189 // - uniform: at regular intervals in [0,maxY] 00190 // - log-scale: as the exponential of values at regular intervals in log scale, using the formula: 00191 // i-th position = (exp(scale*(i+1-n_output_density_terms)/n_output_density_terms)-exp(-scale))/(1-exp(-scale)) 00192 string centers_initialization; 00193 string curve_positions; 00194 real scale; 00195 00196 // approximate unconditional probability of Y=0 (mass point), used 00197 // to initialize the parameters 00198 real unconditional_p0; 00199 00200 // whether to learn the mu or keep them at their initial values 00201 bool mu_is_fixed; 00202 00203 // initial value of softplus(c) (used only in initializeParams()) 00204 real initial_hardness; 00205 00206 // **************** 00207 // * Constructors * 00208 // **************** 00209 00210 // Default constructor, make sure the implementation in the .cc 00211 // initializes all fields to reasonable default values. 00212 ConditionalDensityNet(); 00213 00214 00215 // ************************* 00216 // * PDistribution methods * 00217 // ************************* 00218 00219 private: 00220 00222 void build_(); 00223 00224 protected: 00225 00227 static void declareOptions(OptionList& ol); 00228 00229 public: 00230 00231 // ************************ 00232 // **** Object methods **** 00233 // ************************ 00234 00236 virtual void build(); 00237 00239 virtual void makeDeepCopyFromShallowCopy(map<const void*, void*>& copies); 00240 00241 // Declares other standard object methods 00242 // If your class is not instantiatable (it has pure virtual methods) 00243 // you should replace this by PLEARN_DECLARE_ABSTRACT_OBJECT_METHODS 00244 PLEARN_DECLARE_OBJECT(ConditionalDensityNet); 00245 00246 // ****************************** 00247 // **** PDistribution methods *** 00248 // ****************************** 00249 00251 virtual void setInput(const Vec& input) const; 00252 00253 // ************************** 00254 // **** PDistribution methods **** 00255 // ************************** 00256 00258 virtual real log_density(const Vec& x) const; 00259 00261 virtual real survival_fn(const Vec& x) const; 00262 00264 virtual real cdf(const Vec& x) const; 00265 00267 virtual void expectation(Vec& mu) const; 00268 00270 virtual void variance(Mat& cov) const; 00271 00273 virtual void resetGenerator(long g_seed) const; 00274 00276 virtual void generate(Vec& x) const; 00277 00278 00279 // ************************** 00280 // **** Learner methods **** 00281 // ************************** 00282 00283 // Default version of inputsize returns learner->inputsize() 00284 // If this is not appropriate, you should uncomment this and define 00285 // it properly in the .cc 00286 // virtual int inputsize() const; 00287 00291 virtual void forget(); 00292 00293 /* 00294 virtual int outputsize() const; 00295 */ 00296 00297 void initializeParams(); 00298 void initialize_mu(Vec& mu_); 00299 00303 virtual void train(); 00304 00305 /* 00306 virtual void computeOutput(const Vec& input, Vec& output) const; 00307 */ 00308 00309 /* 00310 virtual void computeOutputAndCosts(const Vec& input, const Vec& target, 00311 Vec& output, Vec& costs) const; 00312 */ 00313 00314 virtual TVec<string> getTrainCostNames() const; 00315 // virtual TVec<string> getTestCostNames() const; 00316 }; 00317 00318 // Declares a few other classes and functions related to this class 00319 DECLARE_OBJECT_PTR(ConditionalDensityNet); 00320 00321 } // end of namespace PLearn 00322 00323 #endif