00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00041
#include "LinearRegressor.h"
00042
#include <plearn/vmat/VMat_maths.h>
00043
00044
namespace PLearn {
00045
using namespace std;
00046
00047
00048 LinearRegressor::LinearRegressor()
00049 : cholesky(true),
00050 weight_decay(0)
00051 {}
00052
00053
PLEARN_IMPLEMENT_OBJECT(
LinearRegressor,
"Ordinary Least Squares and Ridge Regression, optionally weighted",
00054
"This class performs OLS (Ordinary Least Squares) and Ridge Regression, optionally on weighted\n"
00055
"data, by solving the linear equation (X'W X + weight_decay*n_examples*I) theta = X'W Y\n"
00056
"where X is the (n_examples x (1+inputsize)) matrix of extended inputs (with a 1 in the first column),\n"
00057
"Y is the (n_example x targetsize), W is a diagonal matrix of weights (one per example)\n"
00058
"{the identity matrix if weightsize()==0 in the training set}, and theta is the resulting\n"
00059
"set of parameters. W_{ii} is obtained from the weight column of the training set, if any.\n"
00060
"This column must have width 0 (no weight) or 1.\n"
00061
"A prediction (computeOutput) is obtained from an input vector as follows:\n"
00062
" output = theta * (1,input)\n"
00063
"The criterion that is minimized by solving the above linear system is the squared loss"
00064
"plus squared norm penalty (weight_decay*sum_{ij} theta_{ij}^2) PER EXAMPLE. This class also measures"
00065
"the ordinary squared loss (||output-theta||^2). The two costs are named 'mse+penalty' and 'mse' respectively.\n"
00066
"Training has two steps: (1) computing X'W X and X' W Y, (2) solving the linear system.\n"
00067
"The first step takes time O(n_examples*inputsize^2 + n_examples*inputsize*outputsize).\n"
00068
"The second step takes time O(inputsize^3).\n"
00069
"If train() is called repeatedly with different values of weight_decay, without intervening\n"
00070
"calls to forget(), then the first step will be done only once, and only the second step\n"
00071
"is repeated.\n");
00072
00073 void LinearRegressor::declareOptions(
OptionList& ol)
00074 {
00075
00076
00077
00078
00079
00080
declareOption(ol,
"cholesky", &LinearRegressor::cholesky, OptionBase::buildoption,
00081
"Whether to use the Cholesky decomposition or not, when solving the linear system.");
00082
00083
declareOption(ol,
"weight_decay", &LinearRegressor::weight_decay, OptionBase::buildoption,
00084
"The weight decay is the factor that multiplies the squared norm of the parameters in the loss function\n");
00085
00086
declareOption(ol,
"weights", &LinearRegressor::weights, OptionBase::learntoption,
00087
"The weight matrix, which are the parameters computed by training the regressor.\n");
00088
00089
00090 inherited::declareOptions(ol);
00091 }
00092
00093 void LinearRegressor::build_()
00094 {
00095
00096
00097
00098
00099
00100
00101
00102 }
00103
00104
00105 void LinearRegressor::build()
00106 {
00107 inherited::build();
00108
build_();
00109 }
00110
00111
00112 void LinearRegressor::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00113 {
00114 inherited::makeDeepCopyFromShallowCopy(copies);
00115
00116
00117
00118
00119
deepCopyField(
extendedinput, copies);
00120
deepCopyField(
input, copies);
00121
deepCopyField(
target, copies);
00122
deepCopyField(
train_costs, copies);
00123
deepCopyField(
XtX, copies);
00124
deepCopyField(
XtY, copies);
00125
deepCopyField(
weights, copies);
00126 }
00127
00128
00129 int LinearRegressor::outputsize()
const
00130
{
00131
int ts =
targetsize();
00132
if (ts >= 0) {
00133
return ts;
00134 }
else {
00135
00136
00137
return 0;
00138 }
00139 }
00140
00141 void LinearRegressor::forget()
00142 {
00143
XtX.
resize(0,
XtX.
width());
00144
XtY.
resize(0,
XtY.
width());
00145
sum_squared_y = 0;
00146
sum_gammas = 0;
00147 }
00148
00149
00150 void LinearRegressor::train()
00151 {
00152
bool recompute_XXXY = (
XtX.
length()==0);
00153
extendedinput.
resize(1+
inputsize());
00154
input =
extendedinput.
subVec(1,
inputsize());
00155
extendedinput[0]=1.0;
00156
target.
resize(
targetsize());
00157
weights.
resize(
extendedinput.
length(),
target.
length());
00158
if (recompute_XXXY)
00159 {
00160
XtX.
resize(
extendedinput.
length(),
extendedinput.
length());
00161
XtY.
resize(
extendedinput.
length(),
target.
length());
00162 }
00163
if(!train_stats)
00164 train_stats =
new VecStatsCollector();
00165
00166 train_stats->forget();
00167
00168
real squared_error=0;
00169
train_costs.
resize(2);
00170
00171
if (train_set->weightsize()<=0)
00172 {
00173
squared_error =
00174
linearRegression(train_set.
subMatColumns(0, inputsize()),
00175 train_set.
subMatColumns(
inputsize(),
outputsize()),
00176
weight_decay*train_set.
length(),
weights,
00177 !recompute_XXXY,
XtX,
XtY,
sum_squared_y,
true, 0,
cholesky);
00178 }
00179
else if (train_set->weightsize()==1)
00180 {
00181
squared_error =
00182
weightedLinearRegression(train_set.
subMatColumns(0, inputsize()),
00183 train_set.
subMatColumns(
inputsize(),
outputsize()),
00184 train_set.
subMatColumns(
inputsize()+
outputsize(),1),
weight_decay*train_set.
length(),
weights,
00185 !recompute_XXXY,
XtX,
XtY,
sum_squared_y,
sum_gammas,
true, 0,
cholesky);
00186 }
00187
else PLERROR(
"LinearRegressor: expected dataset's weightsize to be either 1 or 0, got %d\n",train_set->weightsize());
00188
00189
Mat weights_excluding_biases =
weights.
subMatRows(1,
inputsize());
00190
weights_norm =
dot(weights_excluding_biases,weights_excluding_biases);
00191
train_costs[0] =
squared_error +
weight_decay*
weights_norm;
00192
train_costs[1] =
squared_error;
00193 train_stats->update(
train_costs);
00194 train_stats->finalize();
00195 }
00196
00197
00198 void LinearRegressor::computeOutput(
const Vec& actual_input,
Vec& output)
const
00199
{
00200
00201
int nout =
outputsize();
00202 output.
resize(nout);
00203
if (
input.
length()==0)
00204 {
00205
extendedinput.
resize(1+
inputsize());
00206
input =
extendedinput.
subVec(1,
inputsize());
00207
extendedinput[0]=1;
00208 }
00209
input << actual_input;
00210
transposeProduct(output,
weights,
extendedinput);
00211 }
00212
00213 void LinearRegressor::computeCostsFromOutputs(
const Vec& actual_input,
const Vec& output,
00214
const Vec& target,
Vec& costs)
const
00215
{
00216
00217 costs.
resize(2);
00218
real squared_loss =
powdistance(output,target);
00219 costs[0] = squared_loss +
weight_decay*
weights_norm;
00220 costs[1] = squared_loss;
00221 }
00222
00223 TVec<string> LinearRegressor::getTestCostNames()
const
00224
{
00225
return getTrainCostNames();
00226 }
00227
00228 TVec<string> LinearRegressor::getTrainCostNames()
const
00229
{
00230
00231
00232
TVec<string> names(2);
00233 names[0] =
"mse+penalty";
00234 names[1] =
"mse";
00235
return names;
00236 }
00237
00238
00239
00240 }