ProjectionErrorVariable.cc
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
#include "ProjectionErrorVariable.h"
00044
#include "Var_operators.h"
00045
#include <plearn/math/plapack.h>
00046
00047
00048
namespace PLearn {
00049
using namespace std;
00050
00051
00054
PLEARN_IMPLEMENT_OBJECT(ProjectionErrorVariable,
00055
"Computes the projection error of a set of vectors on a non-orthogonal basis.\n",
00056
"The first input is a set of n_dim vectors (possibly seen as a single vector of their concatenation) f_i, each in R^n\n"
00057
"The second input is a set of T vectors (possibly seen as a single vector of their concatenation) t_j, each in R^n\n"
00058
"There are several options that control which kind of projection error is actually computed:\n"
00059
"If !use_subspace_distance {the recommended setting}, the output is\n"
00060
" sum_j min_w || t_j - sum_i w_i f_i ||^2 / ||t_j||^2\n"
00061
" where the denominator can be eliminated (not recommended) by turning off the\n"
00062
" normalize_by_neighbor_distance option. In this expression, w is a local\n"
00063
" n_dim-vector that is optmized analytically.\n"
00064
" If the 'ordered_vectors' is set, the gradient is not computed truthfully\n"
00065
" but in such a way as to induce a natural ordering among the vectors f_i.\n"
00066
" For each f_i, the above criterion is applied using a projection that\n"
00067
" involves only the first i vectors f_1...f_i. In this way the first vector f_1\n"
00068
" tries to *explain* the vectors t_j as well as possible with a single dimension,\n"
00069
" and the vector f_2 learns to *explain* what f_2 did not already predict, etc...\n"
00070
" When this option is set, we also choose the w_i in the same greedy way, starting\n"
00071
" from w_1 chosen to minimize the projection error wrt f_1, w_2 chosen to minimize the\n"
00072
" residual projection error left on f_2, etc... Hence the cost minimized wrt f_k on neighbor j is\n"
00073
" ||t_j - sum_{i<=k} w_i f_i||^2 / ||t_j||^2\n"
00074
" (this cost is minimized to choose w_k, and to get a gradient on f_k as well).\n"
00075
" In that case no SVD is used, instead one obtains an analytic solution for w_k:\n"
00076
" w_k = (t_j . f_k - sum_{i<k} w_i f_i . f_k)/||f_k||^2.\n"
00077
" The output produced by fprop is sum_j || t_j - sum_i w_i f_i ||^2 / ||t_j||^2\n"
00078
" where the w_i are chosen as in the previous equation.\n"
00079
"However, if use_subspace_distance (not recommended), the output is\n"
00080
" min_{w,u} || sum_i w_i f_i - sum_j u_j t_j ||^2 .\n"
00081
"In both cases, if norm_penalization>0, an extra term is added:\n"
00082
" norm_penalization * sum_i (||f_i||^2 - 1)^2.\n"
00083
"The 'epsilon' and 'regularization' options are used to regularize the SVD-based matrix\n"
00084
"inversion involved in minimizing for w: only the singular values of F' that are\n"
00085
"above 'epsilon' are inverted (and their singular vectors considered, and then they\n"
00086
"are incremented by 'regularization' before inverting.\n"
00087 );
00088
00089 ProjectionErrorVariable::ProjectionErrorVariable(
Variable* input1,
Variable* input2,
int n_,
00090
bool normalize_by_neighbor_distance_,
00091
bool use_subspace_distance_,
00092
real norm_penalization_,
real epsilon_,
00093
real regularization_,
bool ordered_vectors_)
00094 :
inherited(input1, input2, 1, 1), n(n_), use_subspace_distance(use_subspace_distance_),
00095 normalize_by_neighbor_distance(normalize_by_neighbor_distance_), norm_penalization(norm_penalization_),
00096 epsilon(epsilon_), regularization(regularization_), ordered_vectors(ordered_vectors_)
00097 {
00098
build_();
00099 }
00100
00101
void
00102 ProjectionErrorVariable::build()
00103 {
00104 inherited::build();
00105
build_();
00106 }
00107
00108
void
00109 ProjectionErrorVariable::build_()
00110 {
00111
if (input1 && input2) {
00112
if ((input1->
length()==1 && input1->
width()>1) ||
00113 (input1->
width()==1 && input1->
length()>1))
00114 {
00115
if (
n<0)
PLERROR(
"ProjectionErrorVariable: Either the input should be matrices or n should be specified\n");
00116
n_dim = input1->size()/
n;
00117
if (
n_dim*
n != input1->size())
00118
PLERROR(
"ProjectErrorVariable: the first input size should be an integer multiple of n");
00119 }
00120
else
00121
n_dim = input1->
length();
00122
if ((input2->
length()==1 && input2->
width()>1) ||
00123 (input2->
width()==1 && input2->
length()>1))
00124 {
00125
if (
n<0)
PLERROR(
"ProjectionErrorVariable: Either the input should be matrices or n should be specified\n");
00126
T = input2->size()/
n;
00127
if (
T*
n != input2->size())
00128
PLERROR(
"ProjectErrorVariable: the second input size should be an integer multiple of n");
00129 }
00130
else
00131
T = input2->
length();
00132
00133
F = input1->value.toMat(
n_dim,
n);
00134
dF = input1->gradient.toMat(
n_dim,
n);
00135
TT = input2->value.toMat(
T,
n);
00136
if (
n<0)
n = input1->
width();
00137
if (input2->
width()!=
n)
00138
PLERROR(
"ProjectErrorVariable: the two arguments have inconsistant sizes");
00139
if (
n_dim>
n)
00140
PLERROR(
"ProjectErrorVariable: n_dim should be less than data dimension n");
00141
if (!
use_subspace_distance)
00142 {
00143
if (
ordered_vectors)
00144 {
00145
norm_f.
resize(
n_dim);
00146 }
00147
else
00148 {
00149
V.
resize(
n_dim,
n_dim);
00150
Ut.
resize(
n,
n);
00151
B.
resize(
n_dim,
n);
00152
VVt.
resize(
n_dim,
n_dim);
00153 }
00154
fw_minus_t.
resize(
T,
n);
00155
w.
resize(
T,
n_dim);
00156
one_over_norm_T.
resize(
T);
00157 }
00158
else
00159 {
00160
wwuu.
resize(
n_dim+
T);
00161
ww =
wwuu.
subVec(0,
n_dim);
00162
uu =
wwuu.
subVec(
n_dim,T);
00163
wwuuM =
wwuu.
toMat(1,
n_dim+T);
00164
rhs.
resize(
n_dim+T);
00165
rhs.
subVec(0,
n_dim).fill(-1.0);
00166
A.
resize(
n_dim+T,
n_dim+T);
00167
A11 =
A.
subMat(0,0,
n_dim,
n_dim);
00168
A12 =
A.
subMat(0,
n_dim,
n_dim,T);
00169
A21 =
A.
subMat(
n_dim,0,T,
n_dim);
00170
A22 =
A.
subMat(
n_dim,
n_dim,T,T);
00171
Tu.
resize(
n);
00172
FT.
resize(
n_dim+T,
n);
00173
FT1 =
FT.
subMat(0,0,
n_dim,
n);
00174
FT2 =
FT.
subMat(
n_dim,0,T,
n);
00175
Ut.
resize(
n,
n);
00176
V.
resize(
n_dim+T,
n_dim+T);
00177 }
00178
fw.
resize(
n);
00179
if (
norm_penalization>0)
00180
norm_err.
resize(
n_dim);
00181 }
00182 }
00183
00184
00185 void ProjectionErrorVariable::recomputeSize(
int& len,
int& wid)
const
00186
{
00187 len = 1;
00188 wid = 1;
00189 }
00190
00191 void ProjectionErrorVariable::fprop()
00192 {
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
real cost = 0;
00224
if (
use_subspace_distance)
00225 {
00226
00227
FT1 <<
F;
00228
multiply(
FT2,
TT,-1.0);
00229
lapackSVD(
FT,
Ut,
S,
V);
00230
wwuu.
clear();
00231
for (
int k=0;
k<
S.
length();
k++)
00232 {
00233
real s_k =
S[
k];
00234
real sv = s_k+
regularization;
00235
real coef = 1/(sv * sv);
00236
if (s_k>
epsilon)
00237 {
00238
real sum_first_elements = 0;
00239
for (
int j=0;j<
n_dim;j++)
00240 sum_first_elements +=
V(j,
k);
00241
for (
int i=0;i<n_dim+
T;i++)
00242
wwuu[i] += V(i,
k) * sum_first_elements * coef;
00243 }
00244 }
00245
00246
static bool debugging=
false;
00247
if (debugging)
00248 {
00249
productTranspose(
A11,
F,
F);
00250
productTranspose(
A12,
F,
TT);
00251
A12 *= -1.0;
00252
Vec res(
ww.
length());
00253
product(res,
A11,
ww);
00254
productAcc(res,
A12,
uu);
00255 res -= 1.0;
00256 cout <<
"norm of error in w equations: " <<
norm(res) <<
endl;
00257
Vec res2(
uu.
length());
00258
transposeProduct(res2,
A12,
ww);
00259
productTranspose(
A22,
TT,
TT);
00260
productAcc(res2,
A22,
uu);
00261 cout <<
"norm of error in u equations: " <<
norm(res2) <<
endl;
00262 }
00263
00264
real wnorm =
sum(
ww);
00265
wwuu *= 1.0/wnorm;
00266
00267
00268
transposeProduct(
fw,
F,
ww);
00269
transposeProduct(
Tu,
TT,
uu);
00270
fw -=
Tu;
00271 cost =
pownorm(
fw);
00272 }
00273
else
00274
if (
ordered_vectors)
00275 {
00276
00277
for (
int k=0;
k<
n_dim;
k++)
00278 {
00279
Vec fk =
F(
k);
00280
norm_f[
k] = 1.0/
pownorm(fk);
00281 }
00282
for(
int j=0; j<
T;j++)
00283 {
00284
Vec tj =
TT(j);
00285
Vec wj =
w(j);
00286
00287
for (
int k=0;
k<n_dim;
k++)
00288 {
00289
Vec fk =
F(
k);
00290
real s =
dot(tj,fk);
00291
for (
int i=0;i<
k;i++)
00292 s -= wj[i] *
dot(
F(i),fk);
00293 wj[
k] = s *
norm_f[
k];
00294 }
00295
transposeProduct(
fw,
F, wj);
00296
Vec fw_minus_tj =
fw_minus_t(j);
00297
substract(
fw,tj,fw_minus_tj);
00298
if (
normalize_by_neighbor_distance)
00299 {
00300
one_over_norm_T[j] = 1.0/
pownorm(tj);
00301 cost +=
sumsquare(fw_minus_tj)*
one_over_norm_T[j];
00302 }
00303
else
00304 cost +=
sumsquare(fw_minus_tj);
00305 }
00306 }
00307
else
00308 {
00309
static Mat F_copy;
00310 F_copy.
resize(
F.
length(),
F.
width());
00311 F_copy <<
F;
00312
00313
lapackSVD(F_copy,
Ut,
S,
V);
00314
B.
clear();
00315
for (
int k=0;
k<
S.
length();
k++)
00316 {
00317
real s_k =
S[
k];
00318
if (s_k>
epsilon)
00319 {
00320 s_k +=
regularization;
00321
real coef = 1/s_k;
00322
for (
int i=0;i<
n_dim;i++)
00323 {
00324
real* Bi =
B[i];
00325
for (
int j=0;j<
n;j++)
00326 Bi[j] +=
V(i,
k)*
Ut(
k,j)*coef;
00327 }
00328 }
00329 }
00330
00331
for(
int j=0; j<
T;j++)
00332 {
00333
Vec tj =
TT(j);
00334
00335
Vec wj =
w(j);
00336
product(wj,
B, tj);
00337
transposeProduct(
fw,
F, wj);
00338
00339
Vec fw_minus_tj =
fw_minus_t(j);
00340
substract(
fw,tj,fw_minus_tj);
00341
if (
normalize_by_neighbor_distance)
00342 {
00343
one_over_norm_T[j] = 1.0/
pownorm(tj);
00344 cost +=
sumsquare(fw_minus_tj)*
one_over_norm_T[j];
00345 }
00346
else
00347 cost +=
sumsquare(fw_minus_tj);
00348 }
00349 }
00350
if (
norm_penalization>0)
00351 {
00352
real penalization=0;
00353
for (
int i=0;i<
n_dim;i++)
00354 {
00355
Vec f_i =
F(i);
00356
norm_err[i] =
pownorm(f_i)-1;
00357 penalization +=
norm_err[i]*norm_err[i];
00358 }
00359 cost +=
norm_penalization*penalization;
00360 }
00361 value[0] = cost/
real(
T);
00362 }
00363
00364
00365 void ProjectionErrorVariable::bprop()
00366 {
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
if (
use_subspace_distance)
00391 {
00392
externalProductScaleAcc(
dF,
ww,
fw,gradient[0]);
00393
if (
norm_penalization>0)
00394
for (
int i=0;i<
n_dim;i++)
00395 {
00396
Vec df_i =
dF(i);
00397
multiplyAcc(df_i,
F(i), gradient[0]*
norm_penalization*2*
norm_err[i]);
00398 }
00399 }
00400
else if (
ordered_vectors)
00401 {
00402
for (
int j=0;j<
T;j++)
00403 {
00404
fw.
clear();
00405
Vec wj =
w(j);
00406
Vec fw_minus_tj =
fw_minus_t(j);
00407
Vec tj =
TT(j);
00408
for (
int k=0;
k<
n_dim;
k++)
00409 {
00410
Vec f_k =
F(
k);
00411
Vec df_k =
dF(
k);
00412
multiplyAcc(
fw,f_k,wj[
k]);
00413
substract(
fw,tj,fw_minus_tj);
00414
if (
normalize_by_neighbor_distance)
00415
multiplyAcc(df_k,fw_minus_tj,gradient[0] * wj[
k] * 2 *
one_over_norm_T[j]/
real(T));
00416
else
00417
multiplyAcc(df_k,fw_minus_tj,gradient[0] * wj[
k] * 2/
real(T));
00418 }
00419 }
00420 }
00421
else
00422 {
00423
for (
int j=0;j<
T;j++)
00424 {
00425
Vec fw_minus_tj =
fw_minus_t(j);
00426
Vec wj =
w(j);
00427
for (
int i=0;i<
n_dim;i++)
00428 {
00429
Vec df_i =
dF(i);
00430
if (
normalize_by_neighbor_distance)
00431
multiplyAcc(df_i, fw_minus_tj, gradient[0] * wj[i]*2*
one_over_norm_T[j]/
real(T));
00432
else
00433
multiplyAcc(df_i, fw_minus_tj, gradient[0] * wj[i]*2/
real(T));
00434
if (
norm_penalization>0)
00435
multiplyAcc(df_i,
F(i), gradient[0]*
norm_penalization*2*
norm_err[i]/
real(T));
00436 }
00437 }
00438 }
00439 }
00440
00441
00442 void ProjectionErrorVariable::symbolicBprop()
00443 {
00444
PLERROR(
"Not implemented");
00445 }
00446
00447 }
00448
00449
Generated on Tue Aug 17 16:03:17 2004 for PLearn by
1.3.7