00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00043
#include <plearn/ker/DistanceKernel.h>
00044
#include "KNNVMatrix.h"
00045
#include "SelectRowsVMatrix.h"
00046
#include "SubVMatrix.h"
00047
00048
namespace PLearn {
00049
using namespace std;
00050
00052
00054 KNNVMatrix::KNNVMatrix()
00055 : knn(6),
00056 report_progress(1)
00057 {}
00058
00059
PLEARN_IMPLEMENT_OBJECT(
KNNVMatrix,
00060
"A VMatrix that sees the nearest neighbours of each sample in the source VMat.",
00061
"Each sample is followed by its (knn-1) nearest neighbours.\n"
00062
"To each row is appended an additional target, which is:\n"
00063
" - 1 if it is the first of a bag of neighbours,\n"
00064
" - 2 if it is the last of a bag,\n"
00065
" - 0 if it is none of these,\n"
00066
" - 3 if it is both (only for knn == 1).\n"
00067
"In addition, if a kernel_pij kernel is provided,, in the input part of the VMatrix\n"
00068
"is appended p_ij, where\n"
00069
" p_ij = K(x_i,x_j) / \\sum_{k \\in knn(i), k != i} K(x_i,x_k)\n"
00070
"where K = kernel_pij, and j != i (for j == i, p_ij = -1).");
00071
00073
00075 void KNNVMatrix::declareOptions(
OptionList& ol)
00076 {
00077
declareOption(ol,
"k_nn_mat", &KNNVMatrix::k_nn_mat, OptionBase::buildoption,
00078
"TODO comment");
00079
00080
declareOption(ol,
"knn", &KNNVMatrix::knn, OptionBase::buildoption,
00081
"The number of nearest neighbours to consider (including the point itself).");
00082
00083
declareOption(ol,
"kernel_pij", &KNNVMatrix::kernel_pij, OptionBase::buildoption,
00084
"An optional kernel used to compute the pij weights (see help).");
00085
00086
declareOption(ol,
"report_progress", &KNNVMatrix::report_progress, OptionBase::buildoption,
00087
"TODO comment");
00088
00089
00090
00091
00092
00093
00094
00095
00096 inherited::declareOptions(ol);
00097 }
00098
00100
00102 void KNNVMatrix::build()
00103 {
00104 inherited::build();
00105
build_();
00106 }
00107
00109
00111 void KNNVMatrix::build_() {
00112
if (source) {
00113
int n = source->
length();
00114
bool recompute_nn =
true;
00115
if (
k_nn_mat) {
00116
if (
k_nn_mat->
length() > 0) {
00117
00118
if (
k_nn_mat->
length() == source->
length()) {
00119
if (
k_nn_mat->
width() <
knn) {
00120
PLWARNING(
"In KNNVMatrix::build_ - Not enough neighbours in the given k_nn_mat, will recompute nearest neighbours");
00121 }
else {
00122
00123 recompute_nn =
false;
00124
nn.
resize(n,
knn);
00125
for (
int i = 0; i < n; i++) {
00126
k_nn_mat->
getSubRow(i, 0,
nn(i));
00127 }
00128 }
00129 }
else {
00130
00131
00132
00133
PP<SelectRowsVMatrix> smat = dynamic_cast<SelectRowsVMatrix*>((
VMatrix*) source);
00134
if (!smat.
isNull() && smat->source->length() ==
k_nn_mat->
length()) {
00135
00136
00137
PLWARNING(
"In KNNVMatrix::build_ - Will consider the given k_nn_mat has been computed on source's distr VMat");
00138 recompute_nn =
false;
00139
00140
nn.
resize(n,
knn);
00141
Vec store_nn(
k_nn_mat->
width());
00142
for (
int i = 0; i < n; i++) {
00143
nn(i,0) = i;
00144
k_nn_mat->getRow(smat->indices[i], store_nn);
00145
int k = 1;
00146
for (
int j = 1; j <
knn; j++) {
00147
bool ok =
false;
00148
while (!ok &&
k < store_nn.
length()) {
00149
int q = smat->indices.find(
int(store_nn[
k]));
00150
if (q >= 0) {
00151
00152 ok =
true;
00153
nn(i,j) = q;
00154 }
00155
k++;
00156 }
00157
if (
k == store_nn.
length()) {
00158
00159
PLERROR(
"In KNNVMatrix::build_ - Not enough neighbours in the SelectRowsVMatrix");
00160 }
00161 }
00162 }
00163 }
else {
00164
00165
PP<SubVMatrix> smat = dynamic_cast<SubVMatrix*>((
VMatrix*) source);
00166
if ( !smat.
isNull()
00167 && smat->parent->length() ==
k_nn_mat->
length()
00168 && smat->width() == smat->parent->width()) {
00169
00170
00171
PLWARNING(
"In KNNVMatrix::build_ - Will consider the given k_nn_mat has been computed on source's parent VMat");
00172 recompute_nn =
false;
00173
nn.
resize(n,
knn);
00174
Vec store_nn(
k_nn_mat->
width());
00175
for (
int i = 0; i < n; i++) {
00176
nn(i,0) = i;
00177
k_nn_mat->getRow(i + smat->istart, store_nn);
00178
int k = 1;
00179
for (
int j = 1; j <
knn; j++) {
00180
bool ok =
false;
00181
while (!ok &&
k < store_nn.
length()) {
00182
int q =
int(store_nn[
k]) - smat->istart;
00183
if (q >= 0 && q < smat->length()) {
00184
00185 ok =
true;
00186
nn(i,j) = q - smat->istart;
00187 }
00188
k++;
00189 }
00190
if (
k == store_nn.
length()) {
00191
00192
PLERROR(
"In KNNVMatrix::build_ - Not enough neighbours in the SubVMatrix");
00193 }
00194
00195 }
00196 }
00197 }
else {
00198
00199
PLWARNING(
"In KNNVMatrix::build_ - Don't know what to do with k_nn_mat, will recompute the nearest neighbours");
00200 }
00201 }
00202 }
00203 }
00204 }
00205
00206
if (recompute_nn) {
00207
00208
if (
k_nn_mat) {
00209
if (
k_nn_mat->
length() > 0) {
00210
PLERROR(
"In KNNVMatrix::build_ - The given k_nn_mat already has data, free it first");
00211 }
00212 }
00213
00214
DistanceKernel dk(2);
00215
if (
report_progress) {
00216 dk.
report_progress =
true;
00217 dk.
build();
00218 }
00219 dk.
setDataForKernelMatrix(source);
00220
Mat distances(n,n);
00221 dk.
computeGramMatrix(distances);
00222
00223
nn = dk.
computeNeighbourMatrixFromDistanceMatrix(distances);
00224
00225
00226
00227
nn.
resize(n,
knn);
00228
00229
if (
k_nn_mat) {
00230
for (
int i = 0; i < n; i++) {
00231
k_nn_mat->appendRow(
nn(i));
00232 }
00233 }
00234 }
00235
00236
00237 targetsize_ = source->targetsize() + 1;
00238 length_ = n *
knn;
00239 width_ = source->
width() + 1;
00240
setMetaInfoFromSource();
00241
00242
00243
if (
kernel_pij) {
00244
00245 inputsize_++;
00246 width_++;
00247
kernel_pij->setDataForKernelMatrix(source);
00248
int n = source->
length();
00249
pij.
resize(n, knn-1);
00250
for (
int i = 0; i < n; i++) {
00251
real sum = 0;
00252
real k_ij;
00253
for (
int j = 1; j < knn; j++) {
00254
00255 k_ij =
kernel_pij->evaluate_i_j(i,
int(
nn(i,j)));
00256
pij(i,j-1) = k_ij;
00257
sum += k_ij;
00258 }
00259
pij.
row(i) /=
sum;
00260 }
00261 }
00262 }
00263 }
00264
00266
00268 void KNNVMatrix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00269 {
00270 inherited::makeDeepCopyFromShallowCopy(copies);
00271
00272
00273
00274
00275
00276
00277
00278
deepCopyField(
source_row, copies);
00279
deepCopyField(
nn, copies);
00280
deepCopyField(
pij, copies);
00281
00282
00283
00284
00285
deepCopyField(
kernel_pij, copies);
00286
00287
PLWARNING(
"In KNNVMatrix::makeDeepCopyFromShallowCopy - k_nn_mat will not be deep copied");
00288
00289 }
00290
00292
00294 int KNNVMatrix::getSourceIndexOf(
int i,
int& i_ref,
int& i_n)
const {
00295 i_ref = i /
knn;
00296 i_n = i %
knn;
00297
int i_neighbour_source =
int(
nn(i_ref, i_n));
00298
return i_neighbour_source;
00299 }
00300
00302
00304 void KNNVMatrix::getNewRow(
int i,
const Vec& v)
const {
00305
source_row.
resize(source->
width());
00306
int i_n;
00307
int i_ref;
00308
int real_i =
getSourceIndexOf(i, i_ref, i_n);
00309 source->getRow(real_i,
source_row);
00310
if (
kernel_pij) {
00311 v.
subVec(0, source->inputsize()) <<
source_row.
subVec(0, source->inputsize());
00312
if (i_n > 0) {
00313 v[source->inputsize()] =
pij(i_ref, i_n - 1);
00314 }
else {
00315 v[source->inputsize()] = -1;
00316 }
00317 }
else {
00318 v.
subVec(0, source->inputsize() + source->targetsize())
00319 <<
source_row.
subVec(0, source->inputsize() + source->targetsize());
00320 }
00321 v.
subVec(
inputsize(), source->targetsize())
00322 <<
source_row.
subVec(source->inputsize(), source->targetsize());
00323 v[
inputsize() + source->targetsize()] =
getTag(i_n);
00324
if (
weightsize() > 0) {
00325 v.
subVec(
inputsize() +
targetsize(),
weightsize())
00326 <<
source_row.
subVec(source->inputsize() + source->targetsize(), source->weightsize());
00327 }
00328 }
00329
00331
00333 int KNNVMatrix::getTag(
int p)
const {
00334
00335
if (
knn == 1)
return 3;
00336
if (p == 0)
return 1;
00337
if (p ==
knn - 1)
return 2;
00338
return 0;
00339 }
00340
00341 }