00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
#include "GaussianProcessRegressor.h"
00043
#include <plearn/math/pl_erf.h>
00044
#include <plearn/math/plapack.h>
00045
00046
namespace PLearn {
00047
using namespace std;
00048
00049 GaussianProcessRegressor::GaussianProcessRegressor() :
00050
inherited(), Gram_matrix_normalization("none"),
00051 max_nb_evectors(-1)
00052 {}
00053
00054
PLEARN_IMPLEMENT_OBJECT(
GaussianProcessRegressor,
"Basic version of Gaussian Process regression.",
"NO HELP");
00055
00056 void GaussianProcessRegressor::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00057 {
00058 inherited::makeDeepCopyFromShallowCopy(copies);
00059
00060
00061
00062
00063
00064
deepCopyField(
kernel, copies);
00065
deepCopyField(
noise_sd, copies);
00066
deepCopyField(
alpha, copies);
00067
deepCopyField(
Kxxi, copies);
00068
deepCopyField(
Kxx, copies);
00069
deepCopyField(
K, copies);
00070
deepCopyField(
eigenvectors, copies);
00071
deepCopyField(
eigenvalues, copies);
00072
deepCopyField(
meanK, copies);
00073 }
00074
00075 void GaussianProcessRegressor::setInput(
const Vec& input)
00076 {
00077
setInput_const(input);
00078 }
00079 void GaussianProcessRegressor::setInput_const(
const Vec& input)
const
00080
{
00081
00082
for (
int i=0;i<
Kxxi.
length();i++)
00083
Kxxi[i]=
kernel->evaluate_x_i(input,i);
00084
00085
Kxx =
kernel->evaluate(input,input);
00086
00087
if (
Gram_matrix_normalization==
"centering_a_dotproduct")
00088 {
00089
real kmean =
mean(
Kxxi);
00090
for (
int i=0;i<
Kxxi.
length();i++)
00091
Kxxi[i] =
Kxxi[i] - kmean -
meanK[i] +
mean_allK;
00092
Kxx =
Kxx - kmean - kmean + mean_allK;
00093 }
else if (
Gram_matrix_normalization==
"centering_a_distance")
00094 {
00095
real kmean =
mean(
Kxxi);
00096
for (
int i=0;i<
Kxxi.
length();i++)
00097
Kxxi[i] = -0.5*(
Kxxi[i] - kmean -
meanK[i] +
mean_allK);
00098
Kxx = -0.5*(
Kxx - kmean - kmean +
mean_allK);
00099 }
00100
else if (
Gram_matrix_normalization==
"divisive")
00101 {
00102
real kmean =
mean(
Kxxi);
00103
for (
int i=0;i<
Kxxi.
length();i++)
00104
Kxxi[i] =
Kxxi[i]/
sqrt(kmean*
meanK[i]);
00105
Kxx =
Kxx/kmean;
00106 }
00107 }
00108
00109
00110 void GaussianProcessRegressor::declareOptions(
OptionList& ol)
00111 {
00112
declareOption(ol,
"kernel", &GaussianProcessRegressor::kernel, OptionBase::buildoption,
00113
"The kernel (seen as a symmetric, two-argument function of a pair of input points)\n"
00114
"that corresponds to the prior covariance on the function to be learned.\n");
00115
00116
declareOption(ol,
"noise_sd", &GaussianProcessRegressor::noise_sd, OptionBase::buildoption,
00117
"Output noise std. dev. (one element per output).\n");
00118
00119
00120
declareOption(ol,
"max_nb_evectors", &GaussianProcessRegressor::max_nb_evectors, OptionBase::buildoption,
00121
"Maximum number of eigenvectors of the Gram matrix to compute (or -1 if all should be computed).\n");
00122
00123
00124
declareOption(ol,
"Gram_matrix_normalization", &GaussianProcessRegressor::Gram_matrix_normalization,
00125 OptionBase::buildoption,
00126
"normalization method to apply to Gram matrix. Expected values are:\n"
00127
"\"none\": no normalization\n"
00128
"\"centering_a_dot_product\": this is the kernel PCA centering\n"
00129
" K_{ij} <-- K_{ij} - mean_i(K_ij) - mean_j(K_ij) + mean_{ij}(K_ij)\n"
00130
"\"centering_a_distance\": this is the MDS transformation of squared distances to dot products\n"
00131
" K_{ij} <-- -0.5(K_{ij} - mean_i(K_ij) - mean_j(K_ij) + mean_{ij}(K_ij))\n"
00132
"\"divisive\": this is the spectral clustering and Laplacian eigenmaps normalization\n"
00133
" K_{ij} <-- K_{ij}/sqrt(mean_i(K_ij) mean_j(K_ij))\n");
00134
00135
00136 inherited::declareOptions(ol);
00137 }
00138
00139 void GaussianProcessRegressor::build_()
00140 {
00141
if(expdir!=
"")
00142 {
00143
if(!
force_mkdir(expdir))
00144
PLERROR(
"In GaussianProcessRegressor Could not create experiment directory %s",expdir.c_str());
00145 expdir =
abspath(expdir);
00146 }
00147
00148
if (train_set)
00149 {
00150
K.
resize(train_set->
length(),train_set->
length());
00151
Kxxi.
resize(train_set->
length());
00152
alpha.
resize(
outputsize(),train_set->
length());
00153
meanK.
resize(train_set->
length());
00154
n_outputs = train_set->targetsize();
00155 }
00156 }
00157
00158 int GaussianProcessRegressor::outputsize()
const
00159
{
00160
int output_size=0;
00161
if (outputs_def.find(
"e") != string::npos)
00162 output_size+=
n_outputs;
00163
if (outputs_def.find(
"v") != string::npos)
00164
00165 output_size+=n_outputs;
00166
return output_size;
00167 }
00168
00169 void GaussianProcessRegressor::build()
00170 {
00171 inherited::build();
00172
build_();
00173 }
00174
00175 void GaussianProcessRegressor::forget()
00176 {
00177 stage = 0;
00178 }
00179
00180 GaussianProcessRegressor::~GaussianProcessRegressor()
00181 {
00182 }
00183
00184 TVec<string> GaussianProcessRegressor::getTrainCostNames()
const
00185
{
00186
TVec<string> names(2);
00187 names[0]=
"log-likelihood";
00188 names[1]=
"mse";
00189
return names;
00190 }
00191
00192 TVec<string> GaussianProcessRegressor::getTestCostNames()
const
00193
{
return getTrainCostNames(); }
00194
00195 int GaussianProcessRegressor::getTestCostIndex(
const string& costname)
const
00196
{
00197
TVec<string> costnames =
getTestCostNames();
00198
for(
int i=0; i<costnames.
length(); i++)
00199
if(costnames[i]==costname)
00200
return i;
00201
return -1;
00202 }
00203
00204 int GaussianProcessRegressor::getTrainCostIndex(
const string& costname)
const
00205
{
00206
TVec<string> costnames =
getTrainCostNames();
00207
for(
int i=0; i<costnames.
length(); i++)
00208
if(costnames[i]==costname)
00209
return i;
00210
return -1;
00211 }
00212
00214 double GaussianProcessRegressor::log_density(
const Vec& y)
const
00215
{
00216
PLERROR(
"GaussianProcessRegressor::log_density not implemented yet");
00217
return 0;
00218 }
00219
00221 void GaussianProcessRegressor::expectation(
Vec expected_y)
const
00222
{
00223
for (
int i=0;i<
n_outputs;i++)
00224 expected_y[i] =
dot(
Kxxi,
alpha(i));
00225 }
00226
00228 Vec GaussianProcessRegressor::expectation()
const
00229
{
00230
static Vec expected_target;
00231 expected_target.
resize(
n_outputs);
00232
expectation(expected_target);
00233
return expected_target;
00234 }
00235
00236 void GaussianProcessRegressor::variance(
Vec diag_variances)
const
00237
{
00238
for (
int i=0;i<
n_outputs;i++)
00239 {
00240
real v =
Kxx;
00241 v -=
QFormInverse(
noise_sd[i]*
noise_sd[i],
Kxxi);
00242 diag_variances[i] = v;
00243 }
00244 }
00245
00247 Mat GaussianProcessRegressor::variance()
const
00248
{
00249
static Mat var;
00250
if (
var.
length()!=
n_outputs)
00251 {
00252
var.resize(
n_outputs,
n_outputs);
00253
var.clear();
00254 }
00255
for (
int i=0;i<
n_outputs;i++)
00256 {
00257
real v =
Kxx;
00258 v -=
QFormInverse(
noise_sd[i]*
noise_sd[i],
Kxxi);
00259
var(i,i) = v;
00260 }
00261
return var;
00262 }
00263
00264 void GaussianProcessRegressor::computeOutput(
const Vec& input,
Vec& output)
const
00265
{
00266
setInput_const(input);
00267
int i0=0;
00268
if (outputs_def.find(
"e") != string::npos)
00269 {
00270
expectation(output.
subVec(i0,
n_outputs));
00271 i0+=
n_outputs;
00272 }
00273
if (outputs_def.find(
"v") != string::npos)
00274 {
00275
variance(output.
subVec(i0,
n_outputs));
00276 i0+=
n_outputs;
00277 }
00278 }
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294 void GaussianProcessRegressor::computeCostsFromOutputs(
const Vec& input,
const Vec& output,
00295
const Vec& target,
Vec& costs)
const
00296
{
00297
Vec mu;
00298
static Vec var;
00299
int i0=0;
00300
if (outputs_def.find(
"e")!=string::npos)
00301 {
00302 mu = output.
subVec(i0,
n_outputs);
00303 i0+=
n_outputs;
00304 }
00305
else
00306 mu =
expectation();
00307
if (outputs_def.find(
"v")!=string::npos)
00308 {
00309
var = output.
subVec(i0,
n_outputs);
00310 i0+=
n_outputs;
00311 }
00312
else
00313 {
00314
var.resize(
n_outputs);
00315
variance(
var);
00316 }
00317
real mse = 0;
00318
real logdensity = 0;
00319
for (
int i=0;i<
n_outputs;i++)
00320 {
00321
real diff=mu[i] - target[i];
00322 mse += diff*diff;
00323 logdensity +=
gauss_log_density_var(target[i],mu[i],
var[i]+
noise_sd[i]*
noise_sd[i]);
00324 }
00325 costs[0]=mse;
00326 costs[1]=logdensity;
00327 }
00328
00329 void GaussianProcessRegressor::computeOutputAndCosts(
const Vec& input,
const Vec& target,
00330
Vec& output,
Vec& costs)
const
00331
{
00332
computeOutput(input, output);
00333
computeCostsFromOutputs(input, output, target, costs);
00334 }
00335
00336 void GaussianProcessRegressor::computeCostsOnly(
const Vec& input,
const Vec& target,
00337
Vec& costs)
const
00338
{
00339
static Vec tmp_output;
00340 tmp_output.
resize(
outputsize());
00341
computeOutputAndCosts(input, target, tmp_output, costs);
00342 }
00343
00344 void GaussianProcessRegressor::train()
00345 {
00346
00347
int l=
K.
length();
00348
VMat input_rows = train_set.
subMatColumns(0,
inputsize());
00349
VMat target_rows = train_set.
subMatColumns(
inputsize(),
targetsize());
00350
kernel->setDataForKernelMatrix(input_rows);
00351
kernel->computeGramMatrix(
K);
00352
00353
00354
00355
00356
if (
Gram_matrix_normalization==
"centering_a_dotproduct")
00357 {
00358
columnMean(
K,
meanK);
00359
mean_allK =
mean(
meanK);
00360
int m=
K.
mod();
00361
real mean_allK =
mean(
meanK);
00362
for (
int i=0;i<l;i++)
00363 {
00364
real* Ki =
K[i];
00365
real* Kji_ = &K[0][i];
00366
for (
int j=0;j<=i;j++,Kji_+=m)
00367 {
00368
real Kij = Ki[j] -
meanK[i] - meanK[j] + mean_allK;
00369 Ki[j]=Kij;
00370
if (j<i)
00371 *Kji_ =Kij;
00372 }
00373 }
00374 }
00375
else if (
Gram_matrix_normalization==
"centering_a_distance")
00376 {
00377
columnMean(
K,
meanK);
00378
mean_allK =
mean(
meanK);
00379
int m=
K.
mod();
00380
real mean_allK =
mean(
meanK);
00381
for (
int i=0;i<l;i++)
00382 {
00383
real* Ki =
K[i];
00384
real* Kji_ = &K[0][i];
00385
for (
int j=0;j<=i;j++,Kji_+=m)
00386 {
00387
real Kij = -0.5*(Ki[j] -
meanK[i] - meanK[j] + mean_allK);
00388 Ki[j]=Kij;
00389
if (j<i)
00390 *Kji_ =Kij;
00391 }
00392 }
00393 }
00394
else if (
Gram_matrix_normalization==
"divisive")
00395 {
00396
columnMean(
K,
meanK);
00397
int m=
K.
mod();
00398
for (
int i=0;i<l;i++)
00399 {
00400
real* Ki =
K[i];
00401
real* Kji_ = &K[0][i];
00402
for (
int j=0;j<=i;j++,Kji_+=m)
00403 {
00404
real Kij = Ki[j] /
sqrt(
meanK[i]*
meanK[j]);
00405 Ki[j]=Kij;
00406
if (j<i)
00407 *Kji_ =Kij;
00408 }
00409 }
00410 }
00411
00412
int n_components = max_nb_evectors<0 || max_nb_evectors>l ? l :
max_nb_evectors;
00413
eigenVecOfSymmMat(
K,n_components,
eigenvalues,
eigenvectors);
00414
00415
for (
int i=0;i<
n_outputs;i++)
00416 {
00417
VMat target_column = target_rows.
subMatColumns(i,1);
00418
inverseCovTimesVec(
noise_sd[i]*
noise_sd[i],target_column.
toMat().
toVec(),
alpha(i));
00419 }
00420
00421 }
00422
00423 real GaussianProcessRegressor::BayesianCost()
00424 {
00427
int l=
K.
length();
00428
int m=
eigenvectors.
length();
00429
real nll = l*
n_outputs*
Log2Pi;
00430
for (
int i=0;i<
n_outputs;i++)
00431 {
00432
real sigma2_i=
noise_sd[i]*noise_sd[i];
00433
00434
00435
if (m<l)
00436
00437 nll += (l-m)*
safeflog(sigma2_i);
00438
00439
for (
int j=0;j<m;j++)
00440 nll +=
safeflog(
eigenvalues[i]+sigma2_i);
00441 }
00442 nll *= 0.5;
00443
return nll;
00444 }
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456 void GaussianProcessRegressor::inverseCovTimesVec(
real sigma2,
Vec u,
Vec Cinv_u)
const
00457
{
00458
int m=
eigenvectors.
length();
00459
real one_over_sigma2 = 1.0/sigma2;
00460
multiply(u,one_over_sigma2,Cinv_u);
00461
for (
int i=0;i<m;i++)
00462 {
00463
Vec v_i =
eigenvectors(i);
00464
real proj =
dot(v_i,u);
00465
multiplyAdd(Cinv_u, v_i, proj*(1.0/(
eigenvalues[i]+sigma2)-one_over_sigma2), Cinv_u);
00466 }
00467 }
00468
00469 real GaussianProcessRegressor::QFormInverse(
real sigma2,
Vec u)
const
00470
{
00471
int m=
eigenvectors.
length();
00472
real one_over_sigma2 = 1.0/sigma2;
00473
real qf =
norm(u)*one_over_sigma2;
00474
for (
int i=0;i<m;i++)
00475 {
00476
Vec v_i =
eigenvectors(i);
00477
real proj =
dot(v_i,u);
00478 qf += (1.0/(
eigenvalues[i]+sigma2)-one_over_sigma2) * proj*proj;
00479 }
00480
return qf;
00481 }
00482
00483
00484 }
00485