00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00044
#include "TangentLearner.h"
00045
#include <plearn/var/ProjectionErrorVariable.h>
00046
00047
#include <plearn/vmat/LocalNeighborsDifferencesVMatrix.h>
00048
#include <plearn/var/ProductVariable.h>
00049
#include <plearn/var/PlusVariable.h>
00050
#include <plearn/var/Var_operators.h>
00051
#include <plearn/vmat/ConcatColumnsVMatrix.h>
00052
#include <plearn/math/random.h>
00053
#include <plearn/var/SumOfVariable.h>
00054
#include <plearn/var/TanhVariable.h>
00055
#include <plearn/var/DiagonalizedFactorsProductVariable.h>
00056
#include <plearn/math/random.h>
00057
#include <plearn/math/plapack.h>
00058
00059
00060
00061
namespace PLearn {
00062
using namespace std;
00063
00064
00065
00066
00067
00068
00069
00070
Mat smartInitialization(VMat v,
int n,
real c,
real regularization)
00071 {
00072
int l = v->
length();
00073
int w = v->width();
00074
00075
Mat result(n,w);
00076
Mat temp(w,w);
00077
Vec b(w);
00078 b<<c;
00079
00080
int i,j;
00081
00082
for (i=0;i<n;++i)
00083 {
00084 temp.clear();
00085
for (j=0;j<w;++j)
00086 {
00087 v->getRow(
uniform_multinomial_sample(l),temp(j));
00088 }
00089
00090
regularizeMatrix(temp,regularization);
00091 result(i) <<
solveLinearSystem(temp, b);
00092 }
00093
return result;
00094 }
00095
00096 TangentLearner::TangentLearner()
00097
00098 : training_targets("local_neighbors"), use_subspace_distance(false), normalize_by_neighbor_distance(true),
00099 ordered_vectors(false), smart_initialization(0),initialization_regularization(1e-3),
00100 n_neighbors(5), n_dim(1), architecture_type("single_neural_network"), output_type("tangent_plane"),
00101 n_hidden_units(-1), batch_size(1), norm_penalization(0), svd_threshold(1e-5),
00102 projection_error_regularization(0)
00103
00104 {
00105 }
00106
00107
PLEARN_IMPLEMENT_OBJECT(
TangentLearner,
"Learns local tangent plane of the manifold near which the data lie.",
00108
"This learner models a manifold near which the data are supposed to lie.\n"
00109
"The manifold is represented by a function which predicts a basis for the\n"
00110
"tangent planes at each point x, given x in R^n. Let f_i(x) be the predicted i-th tangent\n"
00111
"vector (in R^n). Then we will optimize the parameters that define the d functions f_i by\n"
00112
"pushing the f_i so that they span the local tangent directions. Three criteria are\n"
00113
"possible, according to the 'training_targets', 'normalize_by_neighbor_distance' and\n"
00114
"'use_subspace_distance' option. The default criterion is the recommanded one, with\n"
00115
" training_targets='local_neighbors', normalize_by_neighbor_distance=1,\n"
00116
"and use_subspace_distance=0 (it really did not work well in our experiments with\n"
00117
"use_subspace_distance=1). This corresponds to the following cost function:\n"
00118
" sum_x sum_j min_w ||t(x,j) - sum_i w_i f_i(x)||^2 / ||t(x,j)||^2\n"
00119
"where x is an example, t(x,j) is the difference vector between x and its j-th neighbor,\n"
00120
"and the w_i are chosen freely for each j and x and correspond to the weights given to\n"
00121
"each basis vector f_i(x) to obtain the projection of t(x,j) on the tangent plane.\n"
00122
"More generally, if use_subspace_distance,\n"
00123
" criterion = min_{w,u} || sum_i w_i f_i - sum_j u_j t(x,j) ||^2\n"
00124
" under the constraint that ||w||=1.\n"
00125
" else\n"
00126
" criterion = sum_x sum_j min_w ||t(x,j) - sum_i w_i f_i(x)||^2 / ||t(x,j)||^2\n"
00127
" where the first sum is over training examples and w is a free d-vector,\n"
00128
" t(x,j) estimates local tangent directions based on near neighbors, and the denominator\n"
00129
" ||t(x,j)||^2 is optional (normalize_by_neighbor_distance). t(x,j)\n"
00130
" is defined according to the training_targets option:\n"
00131
" 'local_evectors' : local principal components (based on n_neighbors of x)\n"
00132
" 'local_neighbors': difference between x and its n_neighbors.\n"
00133
"An additional criterion option that applies only to use_subspace_criterion=0 is\n"
00134
"the orderered_vectors option, which applies a separate cost to each of the f_i:\n"
00135
"the f_1 vector tries to make the projection of t(x,j) on f_1 close to t(x,j), while\n"
00136
"the f_2 vector tries to make the projection of t(x,j) on the (f_1,f_2) basis close to t(x,j),\n"
00137
"etc... i.e. the gradient on f_i is computed based on a cost that involves only\n"
00138
"the projection on the first i vectors. This is analogous to principal component analysis:\n"
00139
"the first vector tries to capture as much as possible of the variance, the second as much\n"
00140
"as possible of the remaining variance, etc...\n"
00141
"Different architectures are possible for the f_i(x) (architecture_type option):\n"
00142
" - multi_neural_network: one neural net per basis function\n"
00143
" - single_neural_network: single neural network with matrix output (one row per basis vector)\n"
00144
" - linear: F_{ij}(x) = sum_k A_{ijk} x_k\n"
00145
" - embedding_neural_network: the embedding function e_k(x) (for k-th dimension)\n"
00146
" is an ordinary neural network, and F_{ki}(x) = d(e_k(x))/d(x_i). This allows to\n"
00147
" output the embedding, instead of, or as well as, the tangent plane (output_type option).\n"
00148
" - embedding_quadratic: the embedding function e_k(x) (for k-th dimension)\n"
00149
" is a 2nd order polynomial of x, and F_{ki}(x) = d(e_k(x))/d(x_i). This allows to\n"
00150
" output the embedding, instead of, or as well as, the tangent plane (output_type option).\n"
00151 );
00152
00153 void TangentLearner::declareOptions(
OptionList& ol)
00154 {
00155
00156
00157
00158
00159
00160
00161
00162
declareOption(ol,
"training_targets", &TangentLearner::training_targets, OptionBase::buildoption,
00163
"Specifies a strategy for training the tangent plane predictor. Possible values are the strings\n"
00164
" local_evectors : local principal components (based on n_neighbors of x)\n"
00165
" local_neighbors : difference between x and its n_neighbors.\n"
00166 );
00167
declareOption(ol,
"smart_initialization",&TangentLearner::smart_initialization,OptionBase::buildoption,
00168
"Use of Smart Initialization");
00169
00170
declareOption(ol,
"initialization_regularization",&TangentLearner::initialization_regularization,OptionBase::buildoption,
00171
"initialization_regularization");
00172
00173
declareOption(ol,
"use_subspace_distance", &TangentLearner::use_subspace_distance, OptionBase::buildoption,
00174
"Minimize distance between subspace spanned by f_i and by (x-neighbors), instead of between\n"
00175
"the individual targets t_j and the subspace spanned by the f_i.\n");
00176
00177
declareOption(ol,
"normalize_by_neighbor_distance", &TangentLearner::normalize_by_neighbor_distance,
00178 OptionBase::buildoption,
"Whether to normalize cost by distance of neighbor.\n");
00179
00180
declareOption(ol,
"ordered_vectors", &TangentLearner::ordered_vectors,
00181 OptionBase::buildoption,
"Whether to apply a differential cost to each f_i so as to\n"
00182
"obtain an ordering similar to the one obtained with principal component analysis.\n");
00183
00184
declareOption(ol,
"n_neighbors", &TangentLearner::n_neighbors, OptionBase::buildoption,
00185
"Number of nearest neighbors to consider.\n"
00186 );
00187
00188
declareOption(ol,
"n_dim", &TangentLearner::n_dim, OptionBase::buildoption,
00189
"Number of tangent vectors to predict.\n"
00190 );
00191
00192
declareOption(ol,
"optimizer", &TangentLearner::optimizer, OptionBase::buildoption,
00193
"Optimizer that optimizes the cost function Number of tangent vectors to predict.\n"
00194 );
00195
00196
00197
00198
00199
00200
declareOption(ol,
"architecture_type", &TangentLearner::architecture_type, OptionBase::buildoption,
00201
"For pre-defined tangent_predictor types: \n"
00202
" multi_neural_network : prediction[j] = b[j] + W[j]*tanh(c[j] + V[j]*x), where W[j] has n_hidden_units columns\n"
00203
" where there is a separate set of parameters for each of n_dim tangent vectors to predict.\n"
00204
" single_neural_network : prediction = b + W*tanh(c + V*x), where W has n_hidden_units columns\n"
00205
" where the resulting vector is viewed as a n_dim by n matrix\n"
00206
" linear : prediction = b + W*x\n"
00207
" embedding_neural_network: prediction[k,i] = d(e[k]/d(x[i), where e(x) is an ordinary neural\n"
00208
" network representing the embedding function (see output_type option)\n"
00209
" embedding_quadratic: prediction[k,i] = d(e_k/d(x_i) = A_k x + b_k, where e_k(x) is a quadratic\n"
00210
" form in x, i.e. e_k = x' A_k x + b_k' x\n"
00211
" (empty string): specify explicitly the function with tangent_predictor option\n"
00212
"where (b,W,c,V) are parameters to be optimized.\n"
00213 );
00214
00215
declareOption(ol,
"n_hidden_units", &TangentLearner::n_hidden_units, OptionBase::buildoption,
00216
"Number of hidden units (if architecture_type is some kidn of neural network)\n"
00217 );
00218
00219
declareOption(ol,
"output_type", &TangentLearner::output_type, OptionBase::buildoption,
00220
"Default value (the only one considered if architecture_type != embedding_*) is\n"
00221
" tangent_plane: output the predicted tangent plane.\n"
00222
" embedding: output the embedding vector (only if architecture_type == embedding_*).\n"
00223
" tangent_plane+embedding: output both (in this order).\n"
00224 );
00225
00226
00227
declareOption(ol,
"batch_size", &TangentLearner::batch_size, OptionBase::buildoption,
00228
" how many samples to use to estimate the average gradient before updating the weights\n"
00229
" 0 is equivalent to specifying training_set->length() \n");
00230
00231
declareOption(ol,
"norm_penalization", &TangentLearner::norm_penalization, OptionBase::buildoption,
00232
"Factor that multiplies an extra penalization of the norm of f_i so that ||f_i|| be close to 1.\n"
00233
"The penalty is norm_penalization*sum_i (1 - ||f_i||^2)^2.\n"
00234 );
00235
00236
declareOption(ol,
"svd_threshold", &TangentLearner::svd_threshold, OptionBase::buildoption,
00237
"Threshold to accept singular values of F in solving for linear combination weights on tangent subspace.\n"
00238 );
00239
00240
declareOption(ol,
"projection_error_regularization", &TangentLearner::projection_error_regularization, OptionBase::buildoption,
00241
"Term added to the linear system matrix involved in fitting subspaces in the projection error computation.\n"
00242 );
00243
00244
declareOption(ol,
"parameters", &TangentLearner::parameters, OptionBase::learntoption,
00245
"Parameters of the tangent_predictor function.\n"
00246 );
00247
00248
00249 inherited::declareOptions(ol);
00250 }
00251
00252 void TangentLearner::build_()
00253 {
00254
00255
int n = PLearner::inputsize_;
00256
00257
if (n>0)
00258 {
00259
if (
architecture_type ==
"multi_neural_network")
00260 {
00261
if (
n_hidden_units <= 0)
00262
PLERROR(
"TangentLearner::Number of hidden units should be positive, now %d\n",
n_hidden_units);
00263 }
00264
if (
architecture_type ==
"single_neural_network")
00265 {
00266
if (
n_hidden_units <= 0)
00267
PLERROR(
"TangentLearner::Number of hidden units should be positive, now %d\n",
n_hidden_units);
00268
Var x(n);
00269 b =
Var(
n_dim*n,1,
"b");
00270
W = Var(
n_dim*n,
n_hidden_units,
"W");
00271 c = Var(
n_hidden_units,1,
"c");
00272
V = Var(
n_hidden_units,n,
"V");
00273
tangent_predictor =
Func(
x, b &
W & c &
V, b +
product(
W,
tanh(c +
product(V,
x))));
00274
output_f =
tangent_predictor;
00275 }
00276
else if (
architecture_type ==
"linear")
00277 {
00278
Var x(n);
00279 b =
Var(
n_dim*n,1,
"b");
00280
W = Var(
n_dim*n,n,
"W");
00281
tangent_predictor =
Func(
x, b &
W, b +
product(W,
x));
00282
output_f =
tangent_predictor;
00283 }
00284
else if (
architecture_type ==
"embedding_neural_network")
00285 {
00286
if (
n_hidden_units <= 0)
00287
PLERROR(
"TangentLearner::Number of hidden units should be positive, now %d\n",
n_hidden_units);
00288
Var x(n);
00289
W =
Var(
n_dim,
n_hidden_units,
"W");
00290 c = Var(
n_hidden_units,1,
"c");
00291
V = Var(
n_hidden_units,n,
"V");
00292 b = Var(
n_dim,n,
"b");
00293 Var a =
tanh(c +
product(
V,
x));
00294 Var tangent_plane =
diagonalized_factors_product(
W,1-a*a,V);
00295
tangent_predictor =
Func(
x,
W & c & V, tangent_plane);
00296
embedding =
product(
W,a);
00297
if (
output_type==
"tangent_plane")
00298
output_f =
tangent_predictor;
00299
else if (
output_type==
"embedding")
00300
output_f = Func(
x,
embedding);
00301
else if (
output_type==
"tangent_plane+embedding")
00302
output_f = Func(
x, tangent_plane &
embedding);
00303 }
00304
else if (
architecture_type ==
"embedding_quadratic")
00305 {
00306
Var x(n);
00307 b =
Var(
n_dim,n,
"b");
00308
W = Var(
n_dim*n,n,
"W");
00309 Var Wx =
product(
W,
x);
00310 Var tangent_plane = Wx + b;
00311
tangent_predictor =
Func(
x,
W & b, tangent_plane);
00312
embedding =
product(
new PlusVariable(b,Wx),
x);
00313
if (
output_type==
"tangent_plane")
00314
output_f =
tangent_predictor;
00315
else if (
output_type==
"embedding")
00316
output_f = Func(
x,
embedding);
00317
else if (
output_type==
"tangent_plane+embedding")
00318
output_f = Func(
x, tangent_plane &
embedding);
00319 }
00320
else if (
architecture_type !=
"")
00321
PLERROR(
"TangentLearner::build, unknown architecture_type option %s (should be 'neural_network', 'linear', or empty string '')\n",
00322
architecture_type.c_str());
00323
00324
if (
parameters.
size()>0 &&
parameters.
nelems() ==
tangent_predictor->parameters.nelems())
00325
tangent_predictor->parameters.copyValuesFrom(
parameters);
00326
parameters.
resize(
tangent_predictor->parameters.size());
00327
for (
int i=0;i<
parameters.
size();i++)
00328
parameters[i] =
tangent_predictor->parameters[i];
00329
00330
if (
training_targets==
"local_evectors")
00331
tangent_targets =
Var(
n_dim,n);
00332
else if (
training_targets==
"local_neighbors")
00333
tangent_targets = Var(
n_neighbors,n);
00334
else PLERROR(
"TangentLearner::build, option training_targets is %s, should be 'local_evectors' or 'local_neighbors'.",
00335
training_targets.c_str());
00336
00337 Var proj_err =
projection_error(
tangent_predictor->outputs[0],
tangent_targets,
norm_penalization, n,
00338
normalize_by_neighbor_distance,
use_subspace_distance,
svd_threshold,
00339
projection_error_regularization,
ordered_vectors);
00340
projection_error_f =
Func(
tangent_predictor->outputs[0] &
tangent_targets, proj_err);
00341
cost_of_one_example = Func(
tangent_predictor->inputs &
tangent_targets,
tangent_predictor->parameters, proj_err);
00342
00343 }
00344 }
00345
00346
00347 void TangentLearner::build()
00348 {
00349 inherited::build();
00350
build_();
00351 }
00352
00353
extern void varDeepCopyField(
Var& field, CopiesMap& copies);
00354
00355 void TangentLearner::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00356 { inherited::makeDeepCopyFromShallowCopy(copies);
00357
00358
deepCopyField(
cost_of_one_example, copies);
00359
varDeepCopyField(b, copies);
00360
varDeepCopyField(
W, copies);
00361
varDeepCopyField(c, copies);
00362
varDeepCopyField(
V, copies);
00363
varDeepCopyField(
tangent_targets, copies);
00364
deepCopyField(
parameters, copies);
00365
deepCopyField(
optimizer, copies);
00366
deepCopyField(
tangent_predictor, copies);
00367 }
00368
00369
00370 int TangentLearner::outputsize()
const
00371
{
00372
return output_f->outputsize;
00373 }
00374
00375 void TangentLearner::forget()
00376 {
00377
if (train_set)
initializeParams();
00378 stage = 0;
00379 }
00380
00381 void TangentLearner::train()
00382 {
00383
00384
VMat train_set_with_targets;
00385
VMat targets_vmat;
00386
if (!
cost_of_one_example)
00387
PLERROR(
"TangentLearner::train: build has not been run after setTrainingSet!");
00388
00389
if (
training_targets ==
"local_evectors")
00390 {
00391
00392
PLERROR(
"local_evectors not yet implemented");
00393 }
00394
else if (
training_targets ==
"local_neighbors")
00395 {
00396
00397 targets_vmat =
local_neighbors_differences(train_set,
n_neighbors);
00398
00399 }
00400
else PLERROR(
"TangentLearner::train, unknown training_targets option %s (should be 'local_evectors' or 'local_neighbors')\n",
00401
training_targets.c_str());
00402
00403 train_set_with_targets =
hconcat(train_set, targets_vmat);
00404 train_set_with_targets->defineSizes(
inputsize(),
inputsize()*
n_neighbors,0);
00405
int l = train_set->
length();
00406
int nsamples =
batch_size>0 ?
batch_size : l;
00407
Var totalcost =
meanOf(train_set_with_targets,
cost_of_one_example, nsamples);
00408
if(
optimizer)
00409 {
00410
optimizer->setToOptimize(
parameters, totalcost);
00411
optimizer->build();
00412 }
00413
else PLERROR(
"TangentLearner::train can't train without setting an optimizer first!");
00414
00415
00416
int optstage_per_lstage = l/nsamples;
00417
00418
ProgressBar* pb = 0;
00419
if(report_progress>0)
00420 pb =
new ProgressBar(
"Training TangentLearner from stage " +
tostring(stage) +
" to " +
tostring(nstages), nstages-stage);
00421
00422
int initial_stage = stage;
00423
bool early_stop=
false;
00424
while(stage<nstages && !early_stop)
00425 {
00426
optimizer->nstages = optstage_per_lstage;
00427 train_stats->forget();
00428
optimizer->early_stop =
false;
00429
optimizer->optimizeN(*train_stats);
00430 train_stats->finalize();
00431
if(verbosity>2)
00432 cout <<
"Epoch " << stage <<
" train objective: " << train_stats->getMean() <<
endl;
00433 ++stage;
00434
if(pb)
00435 pb->
update(stage-initial_stage);
00436 }
00437
if(verbosity>1)
00438 cout <<
"EPOCH " << stage <<
" train objective: " << train_stats->getMean() <<
endl;
00439
00440
if(pb)
00441
delete pb;
00442 }
00443
00444 void TangentLearner::initializeParams()
00445 {
00446
if (seed_>=0)
00447
manual_seed(seed_);
00448
else
00449
PLearn::seed();
00450
00451
if (
architecture_type==
"single_neural_network")
00452 {
00453
if (
smart_initialization)
00454 {
00455
V->matValue<<
smartInitialization(train_set,
n_hidden_units,
smart_initialization,
initialization_regularization);
00456
W->value<<(1/
real(
n_hidden_units));
00457 b->matValue.clear();
00458 c->matValue.clear();
00459 }
00460
else
00461 {
00462
real delta = 1.0 /
sqrt(
real(
inputsize()));
00463
fill_random_uniform(
V->value, -delta, delta);
00464 delta = 1.0 /
real(
n_hidden_units);
00465
fill_random_uniform(
W->matValue, -delta, delta);
00466 c->matValue.clear();
00467
00468
00469 }
00470 }
00471
else if (
architecture_type==
"linear")
00472 {
00473
real delta = 1.0 /
sqrt(
real(
inputsize()));
00474 b->matValue.clear();
00475
fill_random_uniform(
W->matValue, -delta, delta);
00476 }
00477
else if (
architecture_type==
"embedding_neural_network")
00478 {
00479
real delta = 1.0 /
sqrt(
real(
inputsize()));
00480
fill_random_uniform(
V->value, -delta, delta);
00481 delta = 1.0 /
real(
n_hidden_units);
00482
fill_random_uniform(
W->matValue, -delta, delta);
00483 c->value.clear();
00484 b->value.clear();
00485 }
00486
else if (
architecture_type==
"embedding_quadratic")
00487 {
00488
real delta = 1.0 /
sqrt(
real(
inputsize()));
00489
fill_random_uniform(
W->matValue, -delta, delta);
00490 b->value.clear();
00491 }
00492
else PLERROR(
"other types not handled yet!");
00493
00494
if(
optimizer)
00495
optimizer->reset();
00496 }
00497
00498
00499 void TangentLearner::computeOutput(
const Vec& input,
Vec& output)
const
00500
{
00501
int nout =
outputsize();
00502 output.
resize(nout);
00503 output <<
output_f(input);
00504 }
00505
00506 void TangentLearner::computeCostsFromOutputs(
const Vec& input,
const Vec& output,
00507
const Vec& target,
Vec& costs)
const
00508
{
00509
PLERROR(
"TangentLearner::computeCostsFromOutputs not defined for this learner");
00510 }
00511
00512 TVec<string> TangentLearner::getTestCostNames()
const
00513
{
00514
return getTrainCostNames();
00515 }
00516
00517 TVec<string> TangentLearner::getTrainCostNames()
const
00518
{
00519
TVec<string> cost(1); cost[0] =
"projection_error";
00520
return cost;
00521 }
00522
00523
00524
00525 }