00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00044
#include "DistanceKernel.h"
00045
#include "DotProductKernel.h"
00046
#include <plearn/math/plapack.h>
00047
#include "ReconstructionWeightsKernel.h"
00048
00049
namespace PLearn {
00050
using namespace std;
00051
00053
00055 ReconstructionWeightsKernel::ReconstructionWeightsKernel()
00056 : build_in_progress(false),
00057 new_data(true),
00058 ignore_nearest(1),
00059 knn(5),
00060 regularizer(1e-6)
00061 {
00062 is_symmetric =
false;
00063
sub_data =
new SelectRowsVMatrix();
00064 }
00065
00066
PLEARN_IMPLEMENT_OBJECT(
ReconstructionWeightsKernel,
00067
"Computes the reconstruction weights of a point given its neighbors.",
00068
"K(x, x_i) = the weight of x_i in the reconstruction of x by its knn\n"
00069
"nearest neighbors. More precisely, we compute weights W_i such that\n"
00070
"|| x - \\sum_j W_i x_i ||^2 is minimized, and K(x,x_i) = W_i.\n"
00071
"If the second argument is not in the training set, K(x,y) will be 0.\n"
00072
"In order not to compute K(x_i, x_j) = delta_{ij} when applied on\n"
00073
"training points, one can set the 'ignore_nearest' option to 1 (or more),\n"
00074
"which will ensure we do not use x_i itself in its reconstruction by its\n"
00075
"nearest neighbors (however, the total number of neighbors computed,\n"
00076
"including x_i itself, will always stay equal to knn).\n"
00077
"Note that this is NOT a symmetric kernel!\n"
00078 );
00079
00081
00083 void ReconstructionWeightsKernel::declareOptions(
OptionList& ol)
00084 {
00085
00086
00087
declareOption(ol,
"knn", &ReconstructionWeightsKernel::knn, OptionBase::buildoption,
00088
"The number of nearest neighbors considered (including the point itself).");
00089
00090
declareOption(ol,
"regularizer", &ReconstructionWeightsKernel::regularizer, OptionBase::buildoption,
00091
"A factor multiplied by the trace of the local Gram matrix and added to\n"
00092
"the diagonal to ensure stability when solving the linear system.");
00093
00094
declareOption(ol,
"ignore_nearest", &ReconstructionWeightsKernel::ignore_nearest, OptionBase::buildoption,
00095
"The number of nearest neighbors to ignore when computing the reconstruction weights.");
00096
00097
declareOption(ol,
"distance_kernel", &ReconstructionWeightsKernel::distance_kernel, OptionBase::buildoption,
00098
"The kernel used to compute the distances.\n"
00099
"If not specified, then the usual Euclidean distance will be used.");
00100
00101
declareOption(ol,
"dot_product_kernel", &ReconstructionWeightsKernel::dot_product_kernel, OptionBase::buildoption,
00102
"The kernel used to compute dot products in the neighborhood of each data point.\n"
00103
"If not specified, then the usual Euclidean dot product will be used.");
00104
00105
00106 inherited::declareOptions(ol);
00107 }
00108
00110
00112 void ReconstructionWeightsKernel::build()
00113 {
00114
build_in_progress =
true;
00115 inherited::build();
00116
build_();
00117 }
00118
00120
00122 void ReconstructionWeightsKernel::build_()
00123 {
00124
if (
distance_kernel) {
00125
dist_ker =
distance_kernel;
00126 }
else {
00127
dist_ker =
new DistanceKernel(2);
00128
dist_ker->report_progress = this->report_progress;
00129 }
00130
00131
if (
dot_product_kernel) {
00132
dp_ker =
dot_product_kernel;
00133 }
else {
00134
dp_ker =
new DotProductKernel();
00135
dp_ker->build();
00136 }
00137
00138
if (
ignore_nearest >
knn)
00139
PLERROR(
"In ReconstructionWeightsKernel::build_ - You can't ignore more than 'knn' neighbors");
00140
build_in_progress =
false;
00141
00142
00143
if (specify_dataset) {
00144 this->
setDataForKernelMatrix(specify_dataset);
00145 }
00146 }
00147
00149
00151 void ReconstructionWeightsKernel::computeLLEMatrix(
const Mat& lle_mat)
const {
00152
if (lle_mat.
length() != n_examples || lle_mat.
width() != n_examples)
00153
PLERROR(
"In ReconstructionWeightsKernel::computeLLEMatrix - Wrong size for 'lle_mat'");
00154 lle_mat.
clear();
00155
ProgressBar* pb = 0;
00156
if (report_progress)
00157 pb =
new ProgressBar(
"Computing LLE matrix", n_examples);
00158
int neighb_j, neighb_k;
00159
real w_ij;
00160
for (
int i = 0; i < n_examples; i++) {
00161
for (
int j = 0; j <
knn - 1; j++) {
00162 neighb_j =
neighbors(i, j + 1);
00163 w_ij =
weights(i, j);
00164 lle_mat(i, neighb_j) += w_ij;
00165 lle_mat(neighb_j, i) += w_ij;
00166
for (
int k = 0;
k < knn - 1;
k++) {
00167 neighb_k = neighbors(i,
k + 1);
00168 lle_mat(neighb_j, neighb_k) -= w_ij * weights(i,
k);
00169 }
00170 }
00171
if (report_progress)
00172 pb->
update(i + 1);
00173 }
00174
if (pb)
00175
delete pb;
00176 }
00177
00179
00181 void ReconstructionWeightsKernel::computeWeights() {
00182
static Vec point_i;
00183
static Vec weights_i;
00184
if (!data)
00185
PLERROR(
"In ReconstructionWeightsKernel::computeWeights - Can only be called if 'data' has been set");
00186 point_i.
resize(data_inputsize);
00187
weights.
resize(n_examples,
knn -
ignore_nearest);
00188
00189
Mat distances(n_examples, n_examples);
00190
dist_ker->computeGramMatrix(distances);
00191
neighbors =
00192 computeKNNeighbourMatrixFromDistanceMatrix(distances,
knn,
true, report_progress);
00193 distances =
Mat();
00194
00195
is_neighbor_of.
resize(n_examples);
00196
TVec<int> row(2);
00197
for (
int i = 0; i < n_examples; i++)
00198
is_neighbor_of[i].
resize(0, 2);
00199
for (
int i = 0; i < n_examples; i++) {
00200 row[0] = i;
00201
for (
int j = ignore_nearest; j <
knn; j++) {
00202 row[1] = j;
00203
is_neighbor_of[
neighbors(i,j)].appendRow(row);
00204 }
00205 }
00206
for (
int i = 0; i < n_examples; i++)
00207
sortRows(
is_neighbor_of[i]);
00208
00209
TVec<int> neighbors_of_i;
00210
ProgressBar* pb = 0;
00211
if (report_progress)
00212 pb =
new ProgressBar(
"Computing reconstruction weights", n_examples);
00213
for (
int i = 0; i < n_examples; i++) {
00214
00215 neighbors_of_i =
neighbors(i).subVec(ignore_nearest,
knn - ignore_nearest);
00216 weights_i =
weights(i);
00217 data->
getSubRow(i, 0, point_i);
00218
reconstruct(point_i, neighbors_of_i, weights_i);
00219
if (report_progress)
00220 pb->
update(i+1);
00221 }
00222
if (pb)
00223
delete pb;
00224 }
00225
00227
00229 real ReconstructionWeightsKernel::evaluate(
const Vec& x1,
const Vec& x2)
const {
00230
static int j;
00231
if (isInData(x2, &j)) {
00232
00233
return evaluate_x_i(x1, j);
00234 }
else {
00235
00236
return 0;
00237 }
00238 }
00239
00241
00243 real ReconstructionWeightsKernel::evaluate_i_j(
int i,
int j)
const {
00244
static TVec<int> neighbors_of_i;
00245
if (
ignore_nearest == 0) {
00246
00247
00248
if (i == j)
00249
return 1.0;
00250
else
00251
return 0;
00252 }
else {
00253
#ifdef BOUNDCHECK
00254
if (
ignore_nearest !=
knn -
weights.
width())
00255
PLERROR(
"In ReconstructionWeightsKernel::evaluate_i_j - You must recompute the weights after modifying the 'ignore_nearest' option");
00256
#endif
00257
neighbors_of_i =
neighbors(i);
00258
for (
int k =
ignore_nearest;
k <
knn;
k++) {
00259
if (neighbors_of_i[
k] == j) {
00260
return weights(i,
k - ignore_nearest);
00261 }
00262 }
00263
return 0;
00264 }
00265 }
00266
00268
00270 real ReconstructionWeightsKernel::evaluate_i_x(
int i,
const Vec& x,
real squared_norm_of_x)
const {
00271
static int j;
00272
if (isInData(
x, &j))
00273
return evaluate_i_j(i,j);
00274
else
00275
return 0;
00276 }
00277
00279
00281 real ReconstructionWeightsKernel::evaluate_sum_k_i_k_j(
int i,
int j)
const {
00282
static TMat<int> i_is_neighb_of, j_is_neighb_of;
00283 i_is_neighb_of =
is_neighbor_of[i];
00284 j_is_neighb_of = is_neighbor_of[j];
00285
int test_n;
00286
int k_i = 0;
00287
int k_j = 0;
00288
real sum = 0;
00289
00290
if (
ignore_nearest !=
knn -
weights.
width())
00291
PLERROR(
"In ReconstructionWeightsKernel::evaluate_sum_k_i_k_j - You must recompute the weights after modifying 'ignore_nearest'");
00292
while (k_i < i_is_neighb_of.
length()) {
00293 test_n = i_is_neighb_of(k_i, 0);
00294
while (k_j < j_is_neighb_of.
length() && test_n > j_is_neighb_of(k_j, 0))
00295 k_j++;
00296
if (k_j < j_is_neighb_of.length()) {
00297
if (test_n == j_is_neighb_of(k_j, 0)) {
00298
00299
sum +=
weights(test_n, i_is_neighb_of(k_i, 1) -
ignore_nearest) * weights(test_n, j_is_neighb_of(k_j, 1) -
ignore_nearest);
00300 k_i++;
00301 k_j++;
00302 }
else {
00303
00304 test_n = j_is_neighb_of(k_j, 0);
00305
while (k_i < i_is_neighb_of.length() && test_n > i_is_neighb_of(k_i, 0))
00306 k_i++;
00307 }
00308 }
else {
00309
00310
return sum;
00311 }
00312 }
00313
return sum;
00314 }
00315
00317
00319 real ReconstructionWeightsKernel::evaluate_x_i(
const Vec& x,
int i,
real squared_norm_of_x)
const {
00320
return evaluate_x_i_again(
x, i, squared_norm_of_x,
true);
00321 }
00322
00324
00326 real ReconstructionWeightsKernel::evaluate_x_i_again(
const Vec& x,
int i,
real squared_norm_of_x,
bool first_time)
const {
00327
if (first_time) {
00328
neighbors_of_x.
resize(
knn);
00329
00330
dist_ker->computeNearestNeighbors(
x,
k_xi_x_sorted,
knn);
00331
neighbors_of_x <<
k_xi_x_sorted.
subMat(
ignore_nearest, 1,
knn, 1);
00332
00333
reconstruct(
x,
neighbors_of_x,
weights_x);
00334 }
00335
int n_j =
neighbors_of_x.
find(i);
00336
if (n_j == -1)
00337
00338
return 0;
00339
return weights_x[n_j];
00340 }
00341
00343
00345 void ReconstructionWeightsKernel::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00346 {
00347 inherited::makeDeepCopyFromShallowCopy(copies);
00348
00349
00350
00351
00352
00353
00354
00355
00356
PLERROR(
"ReconstructionWeightsKernel::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!");
00357 }
00358
00360
00362 void ReconstructionWeightsKernel::reconstruct(
const Vec& x,
const TVec<int>& neighbors,
Vec& w)
const {
00363
static bool need_init;
00364 need_init =
new_data;
00365
int k_neighb = neighbors.
length();
00366
if (
ones.
length() != k_neighb) {
00367
00368 need_init =
true;
00369
ones.
resize(k_neighb);
00370
ones.
fill(1);
00371 }
00372 w.
resize(k_neighb);
00373
if (need_init) {
00374
00375
local_gram.
resize(k_neighb, k_neighb);
00376
centered_neighborhood =
new ShiftAndRescaleVMatrix();
00377
centered_neighborhood->no_scale =
true;
00378
centered_neighborhood->negate_shift =
true;
00379
centered_neighborhood->automatic =
false;
00380
centered_neighborhood->vm = (
SelectRowsVMatrix*)
sub_data;
00381 new_data =
false;
00382 }
00383
00384
sub_data->indices = neighbors;
00385
sub_data->build();
00386
centered_neighborhood->shift =
x;
00387
centered_neighborhood->build();
00388
00389
00390
dp_ker->setDataForKernelMatrix((
ShiftAndRescaleVMatrix*)
centered_neighborhood);
00391
dp_ker->computeGramMatrix(
local_gram);
00392
00393
regularizeMatrix(
local_gram,
regularizer);
00394
00395
Vec weights_x =
solveLinearSystem(
local_gram,
ones);
00396
00397 w << weights_x;
00398
00399 w /=
sum(w);
00400 }
00401
00403
00405 void ReconstructionWeightsKernel::setDataForKernelMatrix(
VMat the_data) {
00406
if (
build_in_progress)
00407
return;
00408 inherited::setDataForKernelMatrix(the_data);
00409
dist_ker->setDataForKernelMatrix(the_data);
00410
sub_data->source = the_data;
00411
new_data =
true;
00412
computeWeights();
00413 }
00414
00415 }
00416