PLearn: LinearRegressor.cc Source File

00001 00002 // -*- C++ -*- 00003 00004 // LinearRegressor.cc 00005 // 00006 // Copyright (C) 2003 Yoshua Bengio 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 /* ******************************************************* 00037 * $Id: LinearRegressor.cc,v 1.9 2004/07/21 20:27:27 tihocan Exp $ 00038 ******************************************************* */ 00039 00041 #include "LinearRegressor.h" 00042 #include <plearn/vmat/VMat_maths.h> 00043 00044 namespace PLearn { 00045 using namespace std; 00046 00047 /* ### Initialise all fields to their default value here */ 00048 LinearRegressor::LinearRegressor() 00049 : cholesky(true), 00050 weight_decay(0) 00051 {} 00052 00053 PLEARN_IMPLEMENT_OBJECT(LinearRegressor, "Ordinary Least Squares and Ridge Regression, optionally weighted", 00054 "This class performs OLS (Ordinary Least Squares) and Ridge Regression, optionally on weighted\n" 00055 "data, by solving the linear equation (X'W X + weight_decay*n_examples*I) theta = X'W Y\n" 00056 "where X is the (n_examples x (1+inputsize)) matrix of extended inputs (with a 1 in the first column),\n" 00057 "Y is the (n_example x targetsize), W is a diagonal matrix of weights (one per example)\n" 00058 "{the identity matrix if weightsize()==0 in the training set}, and theta is the resulting\n" 00059 "set of parameters. W_{ii} is obtained from the weight column of the training set, if any.\n" 00060 "This column must have width 0 (no weight) or 1.\n" 00061 "A prediction (computeOutput) is obtained from an input vector as follows:\n" 00062 " output = theta * (1,input)\n" 00063 "The criterion that is minimized by solving the above linear system is the squared loss" 00064 "plus squared norm penalty (weight_decay*sum_{ij} theta_{ij}^2) PER EXAMPLE. This class also measures" 00065 "the ordinary squared loss (||output-theta||^2). The two costs are named 'mse+penalty' and 'mse' respectively.\n" 00066 "Training has two steps: (1) computing X'W X and X' W Y, (2) solving the linear system.\n" 00067 "The first step takes time O(n_examples*inputsize^2 + n_examples*inputsize*outputsize).\n" 00068 "The second step takes time O(inputsize^3).\n" 00069 "If train() is called repeatedly with different values of weight_decay, without intervening\n" 00070 "calls to forget(), then the first step will be done only once, and only the second step\n" 00071 "is repeated.\n"); 00072 00073 void LinearRegressor::declareOptions(OptionList& ol) 00074 { 00075 // ### Declare all of this object's options here 00076 // ### For the "flags" of each option, you should typically specify 00077 // ### one of OptionBase::buildoption, OptionBase::learntoption or 00078 // ### OptionBase::tuningoption. Another possible flag to be combined with 00079 // ### is OptionBase::nosave 00080 declareOption(ol, "cholesky", &LinearRegressor::cholesky, OptionBase::buildoption, 00081 "Whether to use the Cholesky decomposition or not, when solving the linear system."); 00082 00083 declareOption(ol, "weight_decay", &LinearRegressor::weight_decay, OptionBase::buildoption, 00084 "The weight decay is the factor that multiplies the squared norm of the parameters in the loss function\n"); 00085 00086 declareOption(ol, "weights", &LinearRegressor::weights, OptionBase::learntoption, 00087 "The weight matrix, which are the parameters computed by training the regressor.\n"); 00088 00089 // Now call the parent class' declareOptions 00090 inherited::declareOptions(ol); 00091 } 00092 00093 void LinearRegressor::build_() 00094 { 00095 // ### This method should do the real building of the object, 00096 // ### according to set 'options', in *any* situation. 00097 // ### Typical situations include: 00098 // ### - Initial building of an object from a few user-specified options 00099 // ### - Building of a "reloaded" object: i.e. from the complete set of all serialised options. 00100 // ### - Updating or "re-building" of an object after a few "tuning" options have been modified. 00101 // ### You should assume that the parent class' build_() has already been called. 00102 } 00103 00104 // ### Nothing to add here, simply calls build_ 00105 void LinearRegressor::build() 00106 { 00107 inherited::build(); 00108 build_(); 00109 } 00110 00111 00112 void LinearRegressor::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00113 { 00114 inherited::makeDeepCopyFromShallowCopy(copies); 00115 // ### Call deepCopyField on all "pointer-like" fields 00116 // ### that you wish to be deepCopied rather than 00117 // ### shallow-copied. 00118 // ### ex: 00119 deepCopyField(extendedinput, copies); 00120 deepCopyField(input, copies); 00121 deepCopyField(target, copies); 00122 deepCopyField(train_costs, copies); 00123 deepCopyField(XtX, copies); 00124 deepCopyField(XtY, copies); 00125 deepCopyField(weights, copies); 00126 } 00127 00128 00129 int LinearRegressor::outputsize() const 00130 { 00131 int ts = targetsize(); 00132 if (ts >= 0) { 00133 return ts; 00134 } else { 00135 // This learner's training set probably hasn't been set yet, so 00136 // we don't know the targetsize. 00137 return 0; 00138 } 00139 } 00140 00141 void LinearRegressor::forget() 00142 { 00143 XtX.resize(0,XtX.width()); 00144 XtY.resize(0,XtY.width()); 00145 sum_squared_y = 0; 00146 sum_gammas = 0; 00147 } 00148 00149 00150 void LinearRegressor::train() 00151 { 00152 bool recompute_XXXY = (XtX.length()==0); 00153 extendedinput.resize(1+inputsize()); 00154 input = extendedinput.subVec(1,inputsize()); 00155 extendedinput[0]=1.0; 00156 target.resize(targetsize()); // the train_set's targetsize() 00157 weights.resize(extendedinput.length(),target.length()); 00158 if (recompute_XXXY) 00159 { 00160 XtX.resize(extendedinput.length(),extendedinput.length()); 00161 XtY.resize(extendedinput.length(),target.length()); 00162 } 00163 if(!train_stats) // make a default stats collector, in case there's none 00164 train_stats = new VecStatsCollector(); 00165 00166 train_stats->forget(); 00167 00168 real squared_error=0; 00169 train_costs.resize(2); 00170 00171 if (train_set->weightsize()<=0) 00172 { 00173 squared_error = 00174 linearRegression(train_set.subMatColumns(0, inputsize()), 00175 train_set.subMatColumns(inputsize(), outputsize()), 00176 weight_decay*train_set.length(), weights, 00177 !recompute_XXXY, XtX, XtY,sum_squared_y,true, 0, cholesky); 00178 } 00179 else if (train_set->weightsize()==1) 00180 { 00181 squared_error = 00182 weightedLinearRegression(train_set.subMatColumns(0, inputsize()), 00183 train_set.subMatColumns(inputsize(), outputsize()), 00184 train_set.subMatColumns(inputsize()+outputsize(),1), weight_decay*train_set.length(), weights, 00185 !recompute_XXXY, XtX, XtY,sum_squared_y,sum_gammas,true, 0, cholesky); 00186 } 00187 else PLERROR("LinearRegressor: expected dataset's weightsize to be either 1 or 0, got %d\n",train_set->weightsize()); 00188 00189 Mat weights_excluding_biases = weights.subMatRows(1,inputsize()); 00190 weights_norm = dot(weights_excluding_biases,weights_excluding_biases); 00191 train_costs[0] = squared_error + weight_decay*weights_norm; 00192 train_costs[1] = squared_error; 00193 train_stats->update(train_costs); 00194 train_stats->finalize(); 00195 } 00196 00197 00198 void LinearRegressor::computeOutput(const Vec& actual_input, Vec& output) const 00199 { 00200 // Compute the output from the input 00201 int nout = outputsize(); 00202 output.resize(nout); 00203 if (input.length()==0) 00204 { 00205 extendedinput.resize(1+inputsize()); 00206 input = extendedinput.subVec(1,inputsize()); 00207 extendedinput[0]=1; 00208 } 00209 input << actual_input; 00210 transposeProduct(output,weights,extendedinput); 00211 } 00212 00213 void LinearRegressor::computeCostsFromOutputs(const Vec& actual_input, const Vec& output, 00214 const Vec& target, Vec& costs) const 00215 { 00216 // Compute the costs from *already* computed output. 00217 costs.resize(2); 00218 real squared_loss = powdistance(output,target); 00219 costs[0] = squared_loss + weight_decay*weights_norm; 00220 costs[1] = squared_loss; 00221 } 00222 00223 TVec<string> LinearRegressor::getTestCostNames() const 00224 { 00225 return getTrainCostNames(); 00226 } 00227 00228 TVec<string> LinearRegressor::getTrainCostNames() const 00229 { 00230 // Return the names of the objective costs that the train method computes and 00231 // for which it updates the VecStatsCollector train_stats 00232 TVec<string> names(2); 00233 names[0] = "mse+penalty"; 00234 names[1] = "mse"; 00235 return names; 00236 } 00237 00238 00239 00240 } // end of namespace PLearn