00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00044
#include "GaussianContinuum.h"
00045
#include <plearn/vmat/LocalNeighborsDifferencesVMatrix.h>
00046
#include <plearn/var/ProductVariable.h>
00047
#include <plearn/var/PlusVariable.h>
00048
#include <plearn/var/SoftplusVariable.h>
00049
#include <plearn/var/VarRowsVariable.h>
00050
#include <plearn/var/VarRowVariable.h>
00051
#include <plearn/var/SourceVariable.h>
00052
#include <plearn/var/Var_operators.h>
00053
#include <plearn/vmat/ConcatColumnsVMatrix.h>
00054
#include <plearn/math/random.h>
00055
#include <plearn/var/SumOfVariable.h>
00056
#include <plearn/var/TanhVariable.h>
00057
#include <plearn/var/NllSemisphericalGaussianVariable.h>
00058
#include <plearn/var/DiagonalizedFactorsProductVariable.h>
00059
#include <plearn/math/random.h>
00060
#include <plearn/math/plapack.h>
00061
#include <plearn/var/ColumnSumVariable.h>
00062
#include <plearn/vmat/VMat_maths.h>
00063
#include <plearn/vmat/ConcatRowsVMatrix.h>
00064
#include <plearn/vmat/SubVMatrix.h>
00065
#include <plearn/var/PDistributionVariable.h>
00066
#include <plearn_learners/distributions/UniformDistribution.h>
00067
#include <plearn/display/DisplayUtils.h>
00068
#include <plearn/opt/GradientOptimizer.h>
00069
00070
namespace PLearn {
00071
using namespace std;
00072
00073
00074
00075
00076
00077
00078
00079 Mat smartInitialization(
VMat v,
int n,
real c,
real regularization)
00080 {
00081
int l = v->
length();
00082
int w = v->
width();
00083
00084
Mat result(n,w);
00085
Mat temp(w,w);
00086
Vec b(w);
00087 b<<c;
00088
00089
int i,j;
00090
00091
for (i=0;i<n;++i)
00092 {
00093 temp.
clear();
00094
for (j=0;j<w;++j)
00095 {
00096 v->getRow(
uniform_multinomial_sample(l),temp(j));
00097 }
00098
00099
regularizeMatrix(temp,regularization);
00100 result(i) <<
solveLinearSystem(temp, b);
00101 }
00102
return result;
00103 }
00104
00105 GaussianContinuum::GaussianContinuum()
00106
00107 : use_noise(false), noise(-1), noise_type("uniform"), n_random_walk_step(0), n_random_walk_per_point(0),save_image_mat(false),min_sigma(0), min_diff(0),print_parameters(false),sm_bigger_than_sn(false),use_best_model(false), n_neighbors(5), n_neighbors_density(5), n_dim(1), compute_cost_every_n_epochs(5), architecture_type("single_neural_network"), output_type("tangent_plane"),
00108 n_hidden_units(-1), batch_size(1), norm_penalization(0), svd_threshold(1e-5),
00109 projection_error_regularization(0)
00110
00111 {
00112 }
00113
00114
PLEARN_IMPLEMENT_OBJECT(
GaussianContinuum,
"Learns a continuous (uncountable) Gaussian mixture with non-local parametrization",
00115
"This learner implicitly estimates the density of the data through\n"
00116
"a generalization of the Gaussian mixture model and of the TangentLearner\n"
00117
"algorithm (see help on that class). The density is the fixed point of\n"
00118
"a random walk {z_t} that follows the following transition probabilities:\n"
00119
" z_{t+1} sampled from a Gaussian associated with z_t, centered\n"
00120
" at z_t + mu(z_t), with covariance matrix S(z_t).\n"
00121
"The semantic of that random walk is the following (and that is how\n"
00122
"it will be estimated). Given a point z_t, the sample z_{t+1} represents\n"
00123
"a 'near neighbor' of z_t. We assume that the density is smooth enough\n"
00124
"that the cloud of 'near neighbors' around z_t can be modeled by a Gaussian.\n"
00125
"The functions mu(.) and S(.) have globally estimated parameters (for example\n"
00126
"using neural nets or linear functions of x, or linear functions of a basis).\n"
00127
"Here we suppose that the eigenvalues of S(.) come from two groups:\n"
00128
"the first group should correspond to locally estimated principal\n"
00129
"directions of variations and there are no constraints on these eigenvalues\n"
00130
"(except that they are positive), while the second group should correspond\n"
00131
"to 'noise' directions, that have all the same value sigma2_noise\n"
00132
"i.e. it is not necessary to explicitly model the directions of variations\n"
00133
"(the eigenvectors) associated with the second group. In general we expect\n"
00134
"sigma2_noise to be small compared to the first group eigenvalues, which\n"
00135
"means that the Gaussians are flat in the corresponding directions, and\n"
00136
"that the first group variations correspond to modeling a manifold near\n"
00137
"which most of the data lie. Optionally, an embedding corresponding\n"
00138
"to variations associated with the first group of eigenvalues can be learnt\n"
00139
"by choosing for the architecture_type option a value of the form embedding_*.\n"
00140
"Although the density is not available in closed form, it is easy (but maybe slow)\n"
00141
"to sample from it: pick one of the training examples at random and then\n"
00142
"follow the random walk (ideally, a long time). It is also possible in\n"
00143
"principle to obtain a numerical estimate of the density at a point x,\n"
00144
"by sampling enough random walk points around x.\n"
00145 );
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369 void GaussianContinuum::declareOptions(
OptionList& ol)
00370 {
00371
00372
00373
00374
00375
00376
00377
declareOption(ol,
"n_neighbors", &GaussianContinuum::n_neighbors, OptionBase::buildoption,
00378
"Number of nearest neighbors to consider for gradient descent.\n"
00379 );
00380
00381
declareOption(ol,
"n_neighbors_density", &GaussianContinuum::n_neighbors_density, OptionBase::buildoption,
00382
"Number of nearest neighbors to consider for p(x) density estimation.\n"
00383 );
00384
00385
declareOption(ol,
"n_dim", &GaussianContinuum::n_dim, OptionBase::buildoption,
00386
"Number of tangent vectors to predict.\n"
00387 );
00388
00389
declareOption(ol,
"compute_cost_every_n_epochs", &GaussianContinuum::compute_cost_every_n_epochs, OptionBase::buildoption,
00390
"Frequency of the computation of the cost on the training and validation set. \n"
00391 );
00392
00393
declareOption(ol,
"optimizer", &GaussianContinuum::optimizer, OptionBase::buildoption,
00394
"Optimizer that optimizes the cost function Number of tangent vectors to predict.\n"
00395 );
00396
00397
declareOption(ol,
"variances_transfer_function", &GaussianContinuum::variances_transfer_function,
00398 OptionBase::buildoption,
00399
"Type of output transfer function for predicted variances, to force them to be >0:\n"
00400
" square : take the square\n"
00401
" exp : apply the exponential\n"
00402
" softplus : apply the function log(1+exp(.))\n"
00403 );
00404
00405
declareOption(ol,
"architecture_type", &GaussianContinuum::architecture_type, OptionBase::buildoption,
00406
"For pre-defined tangent_predictor types: \n"
00407
" multi_neural_network : prediction[j] = b[j] + W[j]*tanh(c[j] + V[j]*x), where W[j] has n_hidden_units columns\n"
00408
" where there is a separate set of parameters for each of n_dim tangent vectors to predict.\n"
00409
" single_neural_network : prediction = b + W*tanh(c + V*x), where W has n_hidden_units columns\n"
00410
" where the resulting vector is viewed as a n_dim by n matrix\n"
00411
" linear : prediction = b + W*x\n"
00412
" embedding_neural_network: prediction[k,i] = d(e[k]/d(x[i), where e(x) is an ordinary neural\n"
00413
" network representing the embedding function (see output_type option)\n"
00414
" embedding_quadratic: prediction[k,i] = d(e_k/d(x_i) = A_k x + b_k, where e_k(x) is a quadratic\n"
00415
" form in x, i.e. e_k = x' A_k x + b_k' x\n"
00416
" (empty string): specify explicitly the function with tangent_predictor option\n"
00417
"where (b,W,c,V) are parameters to be optimized.\n"
00418 );
00419
00420
declareOption(ol,
"n_hidden_units", &GaussianContinuum::n_hidden_units, OptionBase::buildoption,
00421
"Number of hidden units (if architecture_type is some kidn of neural network)\n"
00422 );
00423
00424
declareOption(ol,
"output_type", &GaussianContinuum::output_type, OptionBase::buildoption,
00425
"Default value (the only one considered if architecture_type != embedding_*) is\n"
00426
" tangent_plane: output the predicted tangent plane.\n"
00427
" embedding: output the embedding vector (only if architecture_type == embedding_*).\n"
00428
" tangent_plane+embedding: output both (in this order).\n"
00429 );
00430
00431
00432
declareOption(ol,
"batch_size", &GaussianContinuum::batch_size, OptionBase::buildoption,
00433
" how many samples to use to estimate the average gradient before updating the weights\n"
00434
" 0 is equivalent to specifying training_set->length() \n");
00435
00436
declareOption(ol,
"svd_threshold", &GaussianContinuum::svd_threshold, OptionBase::buildoption,
00437
"Threshold to accept singular values of F in solving for linear combination weights on tangent subspace.\n"
00438 );
00439
00440
declareOption(ol,
"print_parameters", &GaussianContinuum::print_parameters, OptionBase::buildoption,
00441
"Indication that the parameters should be printed for the training set points.\n"
00442 );
00443
00444
declareOption(ol,
"sm_bigger_than_sn", &GaussianContinuum::sm_bigger_than_sn, OptionBase::buildoption,
00445
"Indication that sm should always be bigger than sn.\n"
00446 );
00447
00448
declareOption(ol,
"use_best_model", &GaussianContinuum::use_best_model, OptionBase::buildoption,
00449
"Indication that the best model on the validation set should be used.\n"
00450 );
00451
00452
declareOption(ol,
"save_image_mat", &GaussianContinuum::save_image_mat, OptionBase::buildoption,
00453
"Indication that a matrix corresponding to the probabilities of the points on a 2d grid should be created.\n"
00454 );
00455
00456
declareOption(ol,
"upper_y", &GaussianContinuum::upper_y, OptionBase::buildoption,
00457
"Upper bound on the y (second) coordinate.\n"
00458 );
00459
00460
declareOption(ol,
"upper_x", &GaussianContinuum::upper_x, OptionBase::buildoption,
00461
"Lower bound on the x (first) coordinate.\n"
00462 );
00463
00464
declareOption(ol,
"lower_y", &GaussianContinuum::lower_y, OptionBase::buildoption,
00465
"Lower bound on the y (second) coordinate.\n"
00466 );
00467
00468
declareOption(ol,
"lower_x", &GaussianContinuum::lower_x, OptionBase::buildoption,
00469
"Lower bound on the x (first) coordinate.\n"
00470 );
00471
00472
declareOption(ol,
"points_per_dim", &GaussianContinuum::points_per_dim, OptionBase::buildoption,
00473
"Number of points per dimension on the grid.\n"
00474 );
00475
00476
declareOption(ol,
"projection_error_regularization", &GaussianContinuum::projection_error_regularization, OptionBase::buildoption,
00477
"Term added to the linear system matrix involved in fitting subspaces in the projection error computation.\n"
00478 );
00479
00480
declareOption(ol,
"parameters", &GaussianContinuum::parameters, OptionBase::learntoption,
00481
"Parameters of the tangent_predictor function.\n"
00482 );
00483
00484
declareOption(ol,
"Bs", &GaussianContinuum::Bs, OptionBase::learntoption,
00485
"The B matrices for the training set.\n"
00486 );
00487
00488
declareOption(ol,
"Fs", &GaussianContinuum::Fs, OptionBase::learntoption,
00489
"The F (tangent planes) matrices for the training set.\n"
00490 );
00491
00492
declareOption(ol,
"mus", &GaussianContinuum::mus, OptionBase::learntoption,
00493
"The mu vertors for the training set.\n"
00494 );
00495
00496
declareOption(ol,
"sms", &GaussianContinuum::sms, OptionBase::learntoption,
00497
"The sm values for the training set.\n"
00498 );
00499
00500
declareOption(ol,
"sns", &GaussianContinuum::sns, OptionBase::learntoption,
00501
"The sn values for the training set.\n"
00502 );
00503
00504
declareOption(ol,
"min_sigma", &GaussianContinuum::min_sigma, OptionBase::buildoption,
00505
"The minimum value for sigma noise and manifold.\n"
00506 );
00507
00508
declareOption(ol,
"min_diff", &GaussianContinuum::min_sigma, OptionBase::buildoption,
00509
"The minimum value for the difference between sigma manifold and noise.\n"
00510 );
00511
00512
declareOption(ol,
"n_random_walk_step", &GaussianContinuum::n_random_walk_step, OptionBase::buildoption,
00513
"The number of random walk step.\n"
00514 );
00515
00516
declareOption(ol,
"n_random_walk_per_point", &GaussianContinuum::n_random_walk_per_point, OptionBase::buildoption,
00517
"The number of random walks per training set point.\n"
00518 );
00519
00520
declareOption(ol,
"noise", &GaussianContinuum::noise, OptionBase::buildoption,
00521
"Noise parameter for the training data.\n"
00522 );
00523
00524
declareOption(ol,
"noise_type", &GaussianContinuum::noise_type, OptionBase::buildoption,
00525
"Type of the noise (\"uniform\").\n"
00526 );
00527
00528
declareOption(ol,
"use_noise", &GaussianContinuum::use_noise, OptionBase::buildoption,
00529
"Indication that the training should be done using noise on training data.\n"
00530 );
00531
00532
00533 inherited::declareOptions(ol);
00534 }
00535
00536 void GaussianContinuum::build_()
00537 {
00538
00539
n = PLearner::inputsize_;
00540
00541
if (
n>0)
00542 {
00543
Var log_n_examples(1,1,
"log(n_examples)");
00544
00545
if (
architecture_type ==
"embedding_neural_network")
00546 {
00547
if (
n_hidden_units <= 0)
00548
PLERROR(
"GaussianContinuum::Number of hidden units should be positive, now %d\n",
n_hidden_units);
00549
00550
x =
Var(
n);
00551
if(
noise > 0)
00552 {
00553
if(
noise_type ==
"uniform")
00554 {
00555
PP<UniformDistribution> temp =
new UniformDistribution();
00556
Vec lower_noise(
n);
00557
Vec upper_noise(
n);
00558
for(
int i=0; i<
n; i++)
00559 {
00560 lower_noise[i] = -1*
noise;
00561 upper_noise[i] = noise;
00562 }
00563 temp->min = lower_noise;
00564 temp->max = upper_noise;
00565
dist = temp;
00566 }
00567
else PLERROR(
"In GaussianContinuum::build_() : noise_type %c not defined",
noise_type.c_str());
00568
noise_var =
new PDistributionVariable(
x,
dist);
00569
noise_var->setName(
noise_type);
00570 }
00571
else
00572 {
00573
noise_var =
new SourceVariable(
n,1);
00574
noise_var->setName(
"no noise");
00575
for(
int i=0; i<
n; i++)
00576
noise_var->value[i] = 0;
00577 }
00578
00579
W = Var(
n_dim,
n_hidden_units,
"W ");
00580
c = Var(
n_hidden_units,1,
"c ");
00581
V = Var(
n_hidden_units,
n,
"V ");
00582
muV = Var(
n,
n_hidden_units,
"muV ");
00583
smV = Var(1,
n_hidden_units,
"smV ");
00584
smb = Var(1,1,
"smB ");
00585
snV = Var(1,
n_hidden_units,
"snV ");
00586
snb = Var(1,1,
"snB ");
00587
00588 Var a =
tanh(
c +
product(
V,
x));
00589
tangent_plane =
diagonalized_factors_product(
W,1-a*a,V);
00590
embedding =
product(
W,a);
00591
mu =
product(
muV,a);
00592 Var min_sig =
new SourceVariable(1,1);
00593 min_sig->value[0] =
min_sigma;
00594 Var min_d =
new SourceVariable(1,1);
00595 min_d->value[0] =
min_diff;
00596
00597
00598 Var a_noisy =
tanh(
c +
product(V,
x+
noise_var));
00599
mu_noisy =
product(
muV,a_noisy);
00600
00601
if(
sm_bigger_than_sn)
00602 {
00603
sn =
softplus(
snb +
product(
snV,a)) + min_sig;
00604 Var diff =
softplus(
smb +
product(
smV,a)) + min_d;
00605
sm =
sn + diff;
00606 }
00607
else
00608 {
00609
sm =
softplus(
smb +
product(
smV,a)) + min_sig;
00610
sn =
softplus(
snb +
product(
snV,a)) + min_sig;
00611 }
00612
00613
mu_noisy->setName(
"mu_noisy ");
00614
tangent_plane->setName(
"tangent_plane ");
00615
mu->setName(
"mu ");
00616
sm->setName(
"sm ");
00617
sn->setName(
"sn ");
00618 a_noisy->setName(
"a_noisy ");
00619 a->setName(
"a ");
00620
embedding->setName(
"embedding ");
00621
x->setName(
"x ");
00622
00623
predictor =
Func(
x,
W &
c & V &
muV &
smV &
smb &
snV &
snb,
tangent_plane &
mu &
sm &
sn);
00624
00625
if (
output_type==
"tangent_plane")
00626
output_f = Func(
x,
tangent_plane);
00627
else if (
output_type==
"embedding")
00628
output_f = Func(
x,
embedding);
00629
else if (
output_type==
"tangent_plane+embedding")
00630
output_f = Func(
x,
tangent_plane &
embedding);
00631
else if(
output_type ==
"tangent_plane_variance_normalized")
00632
output_f = Func(
x,
tangent_plane &
sm);
00633
else if(
output_type ==
"semispherical_gaussian_parameters")
00634
output_f = Func(
x,
tangent_plane &
mu & sm & sn);
00635
output_f_all = Func(
x,
tangent_plane &
mu & sm & sn);
00636 }
00637
else
00638
PLERROR(
"GaussianContinuum::build, unknown architecture_type option %s (should be 'neural_network', 'linear', or empty string '')\n",
00639
architecture_type.c_str());
00640
00641
if (
parameters.
size()>0 &&
parameters.
nelems() ==
predictor->parameters.nelems())
00642
predictor->parameters.copyValuesFrom(
parameters);
00643
parameters.
resize(
predictor->parameters.size());
00644
for (
int i=0;i<
parameters.
size();i++)
00645
parameters[i] =
predictor->parameters[i];
00646
00647
tangent_targets =
Var(
n_neighbors,
n);
00648
00649 Var
target_index = Var(1,1);
00650 Var neighbor_indexes = Var(
n_neighbors,1);
00651
p_x = Var(train_set->
length(),1);
00652
p_target =
new VarRowsVariable(
p_x,target_index);
00653
p_neighbors =
new VarRowsVariable(
p_x,neighbor_indexes);
00654
00655
00656 Var nll =
nll_semispherical_gaussian(
tangent_plane,
mu,
sm,
sn,
tangent_targets,
p_target,
p_neighbors,
noise_var,
mu_noisy,
00657
use_noise,
svd_threshold);
00658
00659 Var knn =
new SourceVariable(1,1);
00660 knn->value[0] =
n_neighbors;
00661
sum_nll =
new ColumnSumVariable(nll) / knn;
00662
cost_of_one_example =
Func(
x &
tangent_targets & target_index & neighbor_indexes,
predictor->parameters,
sum_nll);
00663
noisy_data = Func(
x,
x +
noise_var);
00664
verify_gradient_func = Func(
predictor->inputs &
tangent_targets & target_index & neighbor_indexes,
predictor->parameters &
mu_noisy,
sum_nll);
00665
00666
if(validation_set == 0) validation_set = train_set;
00667
best_validation_cost =
REAL_MAX;
00668
00669
train_nearest_neighbors.
resize(train_set.length(),
n_neighbors_density);
00670
validation_nearest_neighbors.
resize(validation_set.
length(),
n_neighbors_density);
00671
00672
t_row.
resize(
n);
00673
Ut_svd.
resize(
n,
n);
00674
V_svd.
resize(
n_dim,
n_dim);
00675
z.
resize(
n);
00676
zm.
resize(
n);
00677
zn.
resize(
n);
00678
x_minus_neighbor.
resize(
n);
00679
neighbor_row.
resize(
n);
00680
w.
resize(
n_dim);
00681
00682
Bs.
resize(train_set.length());
00683
Fs.
resize(train_set.length());
00684
mus.
resize(train_set.length(),
n);
00685
sms.
resize(train_set.length());
00686
sns.
resize(train_set.length());
00687
00688 }
00689
00690 }
00691
00692 void GaussianContinuum::make_random_walk()
00693 {
00694
if(
n_random_walk_step < 1)
PLERROR(
"Number of step in random walk should be at least one");
00695
if(
n_random_walk_per_point < 1)
PLERROR(
"Number of random walk per training set point should be at least one");
00696
ith_step_generated_set.
resize(
n_random_walk_step);
00697
00698
Mat generated_set(train_set.
length()*
n_random_walk_per_point,
n);
00699
for(
int t=0; t<train_set.
length(); t++)
00700 {
00701 train_set->getRow(t,
t_row);
00702
output_f_all(
t_row);
00703
00704
real this_sm =
sm->value[0];
00705
real this_sn =
sn->value[0];
00706
Vec this_mu(
n); this_mu <<
mu->value;
00707
static Mat this_F(
n_dim,
n); this_F <<
tangent_plane->matValue;
00708
00709
00710
lapackSVD(this_F,
Ut_svd,
S_svd,
V_svd);
00711
00712
00713
for(
int rwp=0; rwp<
n_random_walk_per_point; rwp++)
00714 {
00715
TVec<real> z_m(
n_dim);
00716
TVec<real> z(
n);
00717
for(
int i=0; i<
n_dim; i++)
00718 z_m[i] =
normal_sample();
00719
for(
int i=0; i<
n; i++)
00720 z[i] =
normal_sample();
00721
00722
Vec new_point = generated_set(t*n_random_walk_per_point+rwp);
00723
for(
int j=0; j<n; j++)
00724 {
00725 new_point[j] = 0;
00726
for(
int k=0;
k<n_dim;
k++)
00727 new_point[j] +=
Ut_svd(
k,j)*z_m[
k];
00728 new_point[j] *=
sqrt(this_sm-this_sn);
00729 new_point[j] += z[j]*
sqrt(this_sn);
00730 }
00731 new_point += this_mu +
t_row;
00732 }
00733 }
00734
PLearn::save(
"gen_points_0.psave",generated_set);
00735
ith_step_generated_set[0] =
VMat(generated_set);
00736
00737
for(
int step=1; step<
n_random_walk_step; step++)
00738 {
00739
Mat generated_set(
ith_step_generated_set[step-1].length(),
n);
00740
for(
int t=0; t<
ith_step_generated_set[step-1].
length(); t++)
00741 {
00742 ith_step_generated_set[step-1]->getRow(t,
t_row);
00743
output_f_all(
t_row);
00744
00745
real this_sm =
sm->value[0];
00746
real this_sn =
sn->value[0];
00747
Vec this_mu(
n); this_mu <<
mu->value;
00748
static Mat this_F(
n_dim,
n); this_F <<
tangent_plane->matValue;
00749
00750
00751
lapackSVD(this_F,
Ut_svd,
S_svd,
V_svd);
00752
00753
TVec<real> z_m(
n_dim);
00754
TVec<real> z(
n);
00755
for(
int i=0; i<
n_dim; i++)
00756 z_m[i] =
normal_sample();
00757
for(
int i=0; i<
n; i++)
00758 z[i] =
normal_sample();
00759
00760
Vec new_point = generated_set(t);
00761
for(
int j=0; j<n; j++)
00762 {
00763 new_point[j] = 0;
00764
for(
int k=0;
k<n_dim;
k++)
00765
if(
S_svd[
k] >
svd_threshold)
00766 new_point[j] +=
Ut_svd(
k,j)*z_m[
k];
00767 new_point[j] *=
sqrt(this_sm-this_sn);
00768 new_point[j] += z[j]*
sqrt(this_sn);
00769 }
00770 new_point += this_mu +
t_row;
00771
00772 }
00773
string path =
"gen_points_" +
tostring(step) +
".psave";
00774
PLearn::save(path,generated_set);
00775 ith_step_generated_set[step] = VMat(generated_set);
00776 }
00777
00778
train_and_generated_set =
vconcat(train_set &
ith_step_generated_set);
00779
00780
00781
Fs.
resize(train_set.
length() + train_set.
length()*
n_random_walk_per_point*n_random_walk_step);
00782
Bs.
resize(train_set.
length() + train_set.
length()*
n_random_walk_per_point*n_random_walk_step);
00783
mus.
resize(train_set.
length() + train_set.
length()*
n_random_walk_per_point*n_random_walk_step,
n);
00784
sms.
resize(train_set.
length() + train_set.
length()*
n_random_walk_per_point*n_random_walk_step);
00785
sns.
resize(train_set.
length() + train_set.
length()*
n_random_walk_per_point*n_random_walk_step);
00786
00787
for(
int t=0; t<
train_and_generated_set.
length(); t++)
00788 {
00789
Fs[t].
resize(
tangent_plane.
length(),
tangent_plane.
width());
00790
train_and_generated_set->getRow(t,
t_row);
00791
predictor->fprop(
t_row,
Fs[t].toVec() &
mus(t) &
sms.
subVec(t,1) &
sns.
subVec(t,1));
00792
00793
00794
00795
static Mat F_copy;
00796 F_copy.
resize(
Fs[t].length(),
Fs[t].width());
00797 F_copy <<
Fs[t];
00798
00799
lapackSVD(F_copy,
Ut_svd,
S_svd,
V_svd);
00800
Bs[t].
resize(
n_dim,
train_and_generated_set.
width());
00801
Bs[t].
clear();
00802
for (
int k=0;
k<
S_svd.
length();
k++)
00803 {
00804
real s_k =
S_svd[
k];
00805
if (s_k>
svd_threshold)
00806 {
00807
real coef = 1/s_k;
00808
for (
int i=0;i<
n_dim;i++)
00809 {
00810
real* Bi =
Bs[t][i];
00811
for (
int j=0;j<
n;j++)
00812 Bi[j] +=
V_svd(i,
k)*
Ut_svd(
k,j)*coef;
00813 }
00814 }
00815 }
00816
00817 }
00818 }
00819
00820 void GaussianContinuum::get_image_matrix(
Mat& image,
VMat image_points_vmat,
int begin,
string file_path)
00821 {
00822
VMat reference_set =
new SubVMatrix(
train_and_generated_set,begin,0,
train_and_generated_set.
length()-begin,
n);
00823 cout <<
"Creating image matrix: " << file_path <<
endl;
00824 image.
resize(
points_per_dim,
points_per_dim);
00825
image_nearest_neighbors.
resize(
points_per_dim*
points_per_dim,
n_neighbors_density);
00826
00827
00828
for(
int t=0; t<image_points_vmat.
length(); t++)
00829 {
00830 image_points_vmat->getRow(t,
t_row);
00831
TVec<int> nn =
image_nearest_neighbors(t);
00832
computeNearestNeighbors(reference_set,
t_row, nn);
00833 }
00834
00835
for(
int t=0; t<image_points_vmat.
length(); t++)
00836 {
00837
00838 image_points_vmat->getRow(t,
t_row);
00839
real this_p_x = 0;
00840
00841
for(
int neighbor=0; neighbor<
n_neighbors_density; neighbor++)
00842 {
00843
train_and_generated_set->getRow(begin+
image_nearest_neighbors(t,neighbor),
neighbor_row);
00844
substract(
t_row,
neighbor_row,
x_minus_neighbor);
00845
substract(
x_minus_neighbor,
mus(begin+
image_nearest_neighbors(t,neighbor)),
z);
00846
product(
w,
Bs[begin+
image_nearest_neighbors(t,neighbor)],
z);
00847
transposeProduct(
zm,
Fs[begin+
image_nearest_neighbors(t,neighbor)],
w);
00848
substract(
z,
zm,
zn);
00849 this_p_x +=
exp(-0.5*(
pownorm(
zm,2)/
sms[begin+
image_nearest_neighbors(t,neighbor)] +
pownorm(
zn,2)/
sns[begin+
image_nearest_neighbors(t,neighbor)]
00850 +
n_dim*
log(
sms[begin+
image_nearest_neighbors(t,neighbor)]) + (
n-
n_dim)*
log(
sns[begin+
image_nearest_neighbors(t,neighbor)])) -
n/2.0 *
Log2Pi);
00851 }
00852
00853 this_p_x /= reference_set.
length();
00854
int y_coord = t/points_per_dim;
00855
int x_coord = t%points_per_dim;
00856 image(points_per_dim - y_coord - 1,x_coord) = this_p_x;
00857 }
00858
PLearn::save(file_path,image);
00859
00860 }
00861
00862 void GaussianContinuum::compute_train_and_validation_costs()
00863 {
00864
00865
00866
t_row.
resize(train_set.
width());
00867
for(
int t=0; t<train_set.
length(); t++)
00868 {
00869
Fs[t].
resize(
tangent_plane.
length(),
tangent_plane.
width());
00870 train_set->getRow(t,
t_row);
00871
predictor->fprop(
t_row,
Fs[t].toVec() &
mus(t) &
sms.
subVec(t,1) &
sns.
subVec(t,1));
00872
00873
00874
00875
static Mat F_copy;
00876 F_copy.
resize(
Fs[t].length(),
Fs[t].width());
00877 F_copy <<
Fs[t];
00878
00879
lapackSVD(F_copy,
Ut_svd,
S_svd,
V_svd);
00880
Bs[t].
resize(
n_dim,train_set.
width());
00881
Bs[t].
clear();
00882
for (
int k=0;
k<
S_svd.
length();
k++)
00883 {
00884
real s_k =
S_svd[
k];
00885
if (s_k>
svd_threshold)
00886 {
00887
real coef = 1/s_k;
00888
for (
int i=0;i<
n_dim;i++)
00889 {
00890
real* Bi =
Bs[t][i];
00891
for (
int j=0;j<
n;j++)
00892 Bi[j] +=
V_svd(i,
k)*
Ut_svd(
k,j)*coef;
00893 }
00894 }
00895 }
00896
00897 }
00898
00899
00900
00901
real nll_train = 0;
00902
00903
for(
int t=0; t<train_set.
length(); t++)
00904 {
00905
00906 train_set->getRow(t,
t_row);
00907
p_x->value[t] = 0;
00908
00909
for(
int neighbor=0; neighbor<
n_neighbors_density; neighbor++)
00910 {
00911 train_set->getRow(
train_nearest_neighbors(t,neighbor),
neighbor_row);
00912
substract(
t_row,
neighbor_row,
x_minus_neighbor);
00913
substract(
x_minus_neighbor,
mus(
train_nearest_neighbors(t,neighbor)),
z);
00914
product(
w,
Bs[
train_nearest_neighbors(t,neighbor)],
z);
00915
transposeProduct(
zm,
Fs[
train_nearest_neighbors(t,neighbor)],
w);
00916
substract(
z,
zm,
zn);
00917
p_x->value[t] +=
exp(-0.5*(
pownorm(
zm,2)/
sms[
train_nearest_neighbors(t,neighbor)] +
pownorm(
zn,2)/
sns[
train_nearest_neighbors(t,neighbor)]
00918 +
n_dim*
log(
sms[
train_nearest_neighbors(t,neighbor)]) + (
n-
n_dim)*
log(
sns[
train_nearest_neighbors(t,neighbor)])) -
n/2.0 *
Log2Pi);
00919 }
00920
p_x->value[t] /= train_set.
length();
00921 nll_train -=
log(
p_x->value[t]);
00922
00923
if(
print_parameters)
00924 {
00925
output_f_all(
t_row);
00926 cout <<
"data point = " <<
x->value <<
" parameters = " <<
tangent_plane->value <<
" " <<
mu->value <<
" " <<
sm->value <<
" " <<
sn->value <<
" p(x) = " <<
p_x->value[t] <<
endl;
00927 }
00928 }
00929
00930 nll_train /= train_set.
length();
00931
00932 cout <<
"NLL train = " << nll_train <<
endl;
00933
00934
00935
00936
real nll_validation = 0;
00937
00938
for(
int t=0; t<validation_set.
length(); t++)
00939 {
00940
00941 validation_set->getRow(t,
t_row);
00942
real this_p_x = 0;
00943
00944
for(
int neighbor=0; neighbor<
n_neighbors_density; neighbor++)
00945 {
00946 train_set->getRow(
validation_nearest_neighbors(t,neighbor),
neighbor_row);
00947
substract(
t_row,
neighbor_row,
x_minus_neighbor);
00948
substract(
x_minus_neighbor,
mus(
validation_nearest_neighbors(t,neighbor)),
z);
00949
product(
w,
Bs[
validation_nearest_neighbors(t,neighbor)],
z);
00950
transposeProduct(
zm,
Fs[
validation_nearest_neighbors(t,neighbor)],
w);
00951
substract(
z,
zm,
zn);
00952 this_p_x +=
exp(-0.5*(
pownorm(
zm,2)/
sms[
validation_nearest_neighbors(t,neighbor)] +
pownorm(
zn,2)/
sns[
validation_nearest_neighbors(t,neighbor)]
00953 +
n_dim*
log(
sms[
validation_nearest_neighbors(t,neighbor)]) + (
n-
n_dim)*
log(
sns[
validation_nearest_neighbors(t,neighbor)])) -
n/2.0 *
Log2Pi);
00954 }
00955
00956 this_p_x /= train_set.
length();
00957 nll_validation -=
log(this_p_x);
00958 }
00959
00960 nll_validation /= validation_set.
length();
00961
00962 cout <<
"NLL validation = " << nll_validation <<
endl;
00963
00964
if(
use_best_model && nll_validation <
best_validation_cost)
00965 {
00966
best_validation_cost = nll_validation;
00967
PLearn::save(
"temp_learner.psave",*
this);
00968 }
00969
00970 }
00971
00972
00973 void GaussianContinuum::build()
00974 {
00975 inherited::build();
00976
build_();
00977 }
00978
00979
extern void varDeepCopyField(
Var& field, CopiesMap& copies);
00980
00981 void GaussianContinuum::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00982 { inherited::makeDeepCopyFromShallowCopy(copies);
00983
00984
deepCopyField(
cost_of_one_example, copies);
00985
varDeepCopyField(
b, copies);
00986
varDeepCopyField(
W, copies);
00987
varDeepCopyField(
c, copies);
00988
varDeepCopyField(
V, copies);
00989
varDeepCopyField(
tangent_targets, copies);
00990
varDeepCopyField(
muV, copies);
00991
varDeepCopyField(
smV, copies);
00992
varDeepCopyField(
smb, copies);
00993
varDeepCopyField(
snV, copies);
00994
varDeepCopyField(
snb, copies);
00995
varDeepCopyField(
mu, copies);
00996
varDeepCopyField(
sm, copies);
00997
varDeepCopyField(
sn, copies);
00998
deepCopyField(
parameters, copies);
00999
deepCopyField(
optimizer, copies);
01000
deepCopyField(
predictor, copies);
01001 }
01002
01003
01004 int GaussianContinuum::outputsize()
const
01005
{
01006
if(
output_type ==
"tangent_plane_variance_normalized")
01007
return output_f->outputsize-1;
01008
else
01009
return output_f->outputsize;
01010 }
01011
01012 void GaussianContinuum::forget()
01013 {
01014
if (train_set)
initializeParams();
01015 stage = 0;
01016 }
01017
01018 void GaussianContinuum::train()
01019 {
01020
01021
01022
01023
if(
save_image_mat)
01024 {
01025
if(
n != 2)
PLERROR(
"In GaussianContinuum::train(): Image matrix creation is only implemented for 2d problems");
01026
01027
real step_x = (
upper_x-
lower_x)/(
points_per_dim-1);
01028
real step_y = (
upper_y-
lower_y)/(
points_per_dim-1);
01029
image_points_mat.
resize(
points_per_dim*
points_per_dim,
n);
01030
for(
int i=0; i<points_per_dim; i++)
01031
for(
int j=0; j<points_per_dim; j++)
01032 {
01033
image_points_mat(i*points_per_dim + j,0) =
lower_x + j*step_x;
01034
image_points_mat(i*points_per_dim + j,1) =
lower_y + i*step_y;
01035 }
01036
01037
image_points_vmat =
VMat(
image_points_mat);
01038
image_nearest_neighbors.
resize(
image_points_mat.
length(),
n_neighbors_density);
01039
01040
for(
int t=0; t<
image_points_vmat.
length(); t++)
01041 {
01042
image_points_vmat->getRow(t,
t_row);
01043
TVec<int> nn =
image_nearest_neighbors(t);
01044
computeNearestNeighbors(train_set,
t_row, nn);
01045 }
01046
01047 }
01048
01049
01050
01051
01052
01053
for(
int t=0; t<train_set.
length(); t++)
01054 {
01055 train_set->getRow(t,
t_row);
01056
TVec<int> nn =
train_nearest_neighbors(t);
01057
computeNearestNeighbors(train_set,
t_row, nn, t);
01058 }
01059
01060
01061
01062
for(
int t=0; t<validation_set.
length(); t++)
01063 {
01064 validation_set->getRow(t,
t_row);
01065
TVec<int> nn =
validation_nearest_neighbors(t);
01066
computeNearestNeighbors(train_set,
t_row, nn);
01067 }
01068
01069
VMat train_set_with_targets;
01070
VMat targets_vmat;
01071
if (!
cost_of_one_example)
01072
PLERROR(
"GaussianContinuum::train: build has not been run after setTrainingSet!");
01073
01074 targets_vmat =
local_neighbors_differences(train_set,
n_neighbors,
false,
true);
01075
01076 train_set_with_targets =
hconcat(train_set, targets_vmat);
01077 train_set_with_targets->defineSizes(
inputsize()+
inputsize()*
n_neighbors+1+
n_neighbors,0);
01078
int l = train_set->
length();
01079
01080
int nsamples =
batch_size>0 ?
batch_size : l;
01081
01082
01083
01084
01085
while(stage<nstages)
01086 {
01087
for(
int t=0; t<train_set.
length(); t++)
01088 {
01089
t_row.
resize(train_set_with_targets.
width());
01090 train_set_with_targets->getRow(t,
t_row);
01091
cost_of_one_example->inputs <<
t_row;
01092
cost_of_one_example->bproppath.clearGradient();
01093
sum_nll->gradient[0] = -1*(dynamic_cast<GradientOptimizer*>((
Optimizer*)
optimizer))->start_learning_rate;
01094
cost_of_one_example->bproppath.fbprop();
01095
01096
cost_of_one_example->parameters.updateAndClear();
01097
cost_of_one_example->bproppath.clearGradient();
01098
01099 }
01100
if(stage != 0 && stage%
compute_cost_every_n_epochs == 0)
01101
compute_train_and_validation_costs();
01102 stage++;
01103 }
01104
Var totalcost =
meanOf(train_set_with_targets,
cost_of_one_example, nsamples);
01105 cout <<
"----------------" <<
endl;
01106
01107 stage = 0;
01108
01109
01110
01111
01112
if(
optimizer)
01113 {
01114
optimizer->setToOptimize(
parameters, totalcost);
01115
optimizer->build();
01116 }
01117
else PLERROR(
"GaussianContinuum::train can't train without setting an optimizer first!");
01118
01119
01120
int optstage_per_lstage = l/nsamples;
01121
01122
ProgressBar* pb = 0;
01123
if(report_progress>0)
01124 pb =
new ProgressBar(
"Training GaussianContinuum from stage " +
tostring(stage) +
" to " +
tostring(nstages), nstages-stage);
01125
01126
t_row.
resize(train_set.
width());
01127
01128
01129
01130
01131
01132
int initial_stage = stage;
01133
bool early_stop=
false;
01134
while(stage<nstages && !early_stop)
01135 {
01136
optimizer->nstages = optstage_per_lstage;
01137 train_stats->forget();
01138
optimizer->early_stop =
false;
01139
optimizer->optimizeN(*train_stats);
01140 train_stats->finalize();
01141
if(verbosity>2)
01142 cout <<
"Epoch " << stage <<
" train objective: " << train_stats->getMean() <<
endl;
01143 ++stage;
01144
if(pb)
01145 pb->
update(stage-initial_stage);
01146
01147
if(stage != 0 && stage%
compute_cost_every_n_epochs == 0)
01148
compute_train_and_validation_costs();
01149 }
01150
if(verbosity>1)
01151 cout <<
"EPOCH " << stage <<
" train objective: " << train_stats->getMean() <<
endl;
01152
01153
if(pb)
01154
delete pb;
01155
01156
if(
use_best_model)
01157
PLearn::load(
"temp_learner.psave",*
this);
01158
01159
01160
01161
Mat noisy_data_set(validation_set.
length(),
n);
01162
for(
int t=0; t<validation_set.
length(); t++)
01163 {
01164 validation_set->getRow(t,
t_row);
01165
Vec noisy_point = noisy_data_set(t);
01166
noisy_data(
t_row);
01167 noisy_point <<
x->value;
01168 }
01169
PLearn::save(
"noisy_data.psave",noisy_data_set);
01170
01171
if(
save_image_mat)
01172 {
01173 cout <<
"Creating image matrix" <<
endl;
01174
image_prob_mat.
resize(
points_per_dim,
points_per_dim);
01175
Mat image_points(
points_per_dim*
points_per_dim,2);
01176
Mat image_mu_vectors(points_per_dim*points_per_dim,2);
01177
Mat image_sigma_vectors(points_per_dim*points_per_dim,2);
01178
for(
int t=0; t<
image_points_vmat.
length(); t++)
01179 {
01180
01181
image_points_vmat->getRow(t,
t_row);
01182
real this_p_x = 0;
01183
01184
for(
int neighbor=0; neighbor<
n_neighbors_density; neighbor++)
01185 {
01186 train_set->getRow(
image_nearest_neighbors(t,neighbor),
neighbor_row);
01187
substract(
t_row,
neighbor_row,
x_minus_neighbor);
01188
substract(
x_minus_neighbor,
mus(
image_nearest_neighbors(t,neighbor)),
z);
01189
product(
w,
Bs[
image_nearest_neighbors(t,neighbor)],
z);
01190
transposeProduct(
zm,
Fs[
image_nearest_neighbors(t,neighbor)],
w);
01191
substract(
z,
zm,
zn);
01192 this_p_x +=
exp(-0.5*(
pownorm(
zm,2)/
sms[
image_nearest_neighbors(t,neighbor)] +
pownorm(
zn,2)/
sns[
image_nearest_neighbors(t,neighbor)]
01193 +
n_dim*
log(
sms[
image_nearest_neighbors(t,neighbor)]) + (
n-
n_dim)*
log(
sns[
image_nearest_neighbors(t,neighbor)])) -
n/2.0 *
Log2Pi);
01194 }
01195
01196 this_p_x /= train_set.
length();
01197
int y_coord = t/points_per_dim;
01198
int x_coord = t%points_per_dim;
01199
image_prob_mat(points_per_dim - y_coord - 1,x_coord) = this_p_x;
01200
01201
output_f_all(
t_row);
01202
01203 image_points(t,0) =
t_row[0];
01204 image_points(t,1) =
t_row[1];
01205
01206 image_mu_vectors(t) <<
mu->value;
01207
01208 image_sigma_vectors(t,0) =
sm->value[0];
01209 image_sigma_vectors(t,1) =
sn->value[0];
01210 }
01211
PLearn::save(
"image.psave",
image_prob_mat);
01212
PLearn::save(
"image_points.psave",image_points);
01213
PLearn::save(
"image_mu_vectors.psave",image_mu_vectors);
01214
PLearn::save(
"image_sigma_vectors.psave",image_sigma_vectors);
01215
01216
if(
n_random_walk_step > 0)
01217 {
01218
make_random_walk();
01219
01220
string path =
"image_rw_" +
tostring(0) +
".psave";
01221
image_prob_mat.
clear();
01222
get_image_matrix(
image_prob_mat,
image_points_vmat, 0, path);
01223
01224
for(
int i=0; i<
n_random_walk_step; i++)
01225 {
01226 path =
"image_rw_" +
tostring(i+1) +
".psave";
01227
image_prob_mat.
clear();
01228
get_image_matrix(
image_prob_mat,
image_points_vmat, train_set.
length()*
n_random_walk_per_point+train_set.
length(),path);
01229 }
01230 }
01231 }
01232 }
01233
01234 void GaussianContinuum::initializeParams()
01235 {
01236
if (seed_>=0)
01237
manual_seed(seed_);
01238
else
01239
PLearn::seed();
01240
01241
if (
architecture_type==
"embedding_neural_network")
01242 {
01243
real delta = 1.0 /
sqrt(
real(
inputsize()));
01244
fill_random_uniform(
V->value, -delta, delta);
01245 delta = 1.0 /
real(
n_hidden_units);
01246
fill_random_uniform(
W->matValue, -delta, delta);
01247
c->value.clear();
01248
fill_random_uniform(
smV->matValue, -delta, delta);
01249
smb->value.clear();
01250
fill_random_uniform(
smV->matValue, -delta, delta);
01251
snb->value.clear();
01252
fill_random_uniform(
snV->matValue, -delta, delta);
01253
fill_random_uniform(
muV->matValue, -delta, delta);
01254
01255 }
01256
else PLERROR(
"other types not handled yet!");
01257
01258
for(
int i=0; i<
p_x.
length(); i++)
01259
p_x->value[i] = 1.0/
p_x.
length();
01260
01261
if(
optimizer)
01262
optimizer->reset();
01263 }
01264
01265
01266 void GaussianContinuum::computeOutput(
const Vec& input,
Vec& output)
const
01267
{
01268
if(
output_type ==
"tangent_plane_variance_normalized")
01269 {
01270
int nout =
outputsize()+1;
01271
Vec temp_output(nout);
01272 temp_output <<
output_f(input);
01273
Mat F = temp_output.
subVec(0,temp_output.
length()-1).toMat(
n_dim,
n);
01274
if(
n_dim*
n != temp_output.
length()-1)
PLERROR(
"WHAT!!!");
01275
for(
int i=0; i<
F.length(); i++)
01276 {
01277
real norm =
pownorm(
F(i),1);
01278
F(i) *=
sqrt(temp_output[temp_output.
length()-1])/
norm;
01279 }
01280
01281 output.
resize(temp_output.
length()-1);
01282 output << temp_output.
subVec(0,temp_output.
length()-1);
01283 }
01284
else
01285 {
01286
int nout =
outputsize();
01287 output.
resize(nout);
01288 output <<
output_f(input);
01289 }
01290 }
01291
01292 void GaussianContinuum::computeCostsFromOutputs(
const Vec& input,
const Vec& output,
01293
const Vec& target,
Vec& costs)
const
01294
{
01295
01296 }
01297
01298 TVec<string> GaussianContinuum::getTestCostNames()
const
01299
{
01300
return getTrainCostNames();
01301 }
01302
01303 TVec<string> GaussianContinuum::getTrainCostNames()
const
01304
{
01305
TVec<string> cost(1); cost[0] =
"projection_error";
01306
return cost;
01307 }
01308
01309
01310
01311 }