00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00044
#include "DistanceKernel.h"
00045
#include <plearn/vmat/FileVMatrix.h>
00046
#include "GeodesicDistanceKernel.h"
00047
00048
namespace PLearn {
00049
using namespace std;
00050
00052
00054 GeodesicDistanceKernel::GeodesicDistanceKernel()
00055 : geodesic_file(""),
00056 knn(10),
00057 pow_distance(false),
00058 shortest_algo("djikstra")
00059 {
00060
distance_kernel =
new DistanceKernel(2);
00061 }
00062
00063 GeodesicDistanceKernel::GeodesicDistanceKernel(
Ker the_distance_kernel,
int the_knn,
00064
string the_geodesic_file,
bool the_pow_distance)
00065 : geodesic_file(the_geodesic_file),
00066 knn(the_knn),
00067 pow_distance(the_pow_distance),
00068 shortest_algo("djikstra")
00069 {
00070
distance_kernel = the_distance_kernel;
00071
build();
00072 }
00073
00074
PLEARN_IMPLEMENT_OBJECT(
GeodesicDistanceKernel,
00075
"Computes the geodesic distance based on k nearest neighbors.",
00076
""
00077 );
00078
00080
00082 void GeodesicDistanceKernel::declareOptions(
OptionList& ol)
00083 {
00084
00085
00086
declareOption(ol,
"knn", &GeodesicDistanceKernel::knn, OptionBase::buildoption,
00087
"The number of nearest neighbors considered.");
00088
00089
declareOption(ol,
"distance_kernel", &GeodesicDistanceKernel::distance_kernel, OptionBase::buildoption,
00090
"The kernel giving the distance between two points.");
00091
00092
declareOption(ol,
"pow_distance", &GeodesicDistanceKernel::pow_distance, OptionBase::buildoption,
00093
"If set to 1, then it will compute the squared geodesic distance.");
00094
00095
declareOption(ol,
"geodesic_file", &GeodesicDistanceKernel::geodesic_file, OptionBase::buildoption,
00096
"If provided, the geodesic distances will be saved in this file in binary format.");
00097
00098
declareOption(ol,
"shortest_algo", &GeodesicDistanceKernel::shortest_algo, OptionBase::buildoption,
00099
"The algorithm used to compute the geodesic distances:\n"
00100
" - floyd : Floyd's algorithm\n"
00101
" - djikstra : Djikstra's algorithm");
00102
00103
00104
00105
declareOption(ol,
"geo_distances", &GeodesicDistanceKernel::geo_distances, OptionBase::learntoption,
00106
"The geodesic distances between training points.");
00107
00108
00109 inherited::declareOptions(ol);
00110 }
00111
00113
00115 void GeodesicDistanceKernel::build()
00116 {
00117 inherited::build();
00118
build_();
00119 }
00120
00122
00124 void GeodesicDistanceKernel::build_()
00125 {
00126 }
00127
00129
00131 int GeodesicDistanceKernel::computeNearestGeodesicNeighbour(
int i,
const Mat& distances_xi_x_sorted,
real* dist_i)
const {
00132
real min = distances_xi_x_sorted(0,0) +
geo_distances->get(i,
int(distances_xi_x_sorted(0,1)));
00133
real dist;
00134
int indice = 0;
00135
for (
int j = 1; j <
knn; j++) {
00136
dist = distances_xi_x_sorted(j,0) +
geo_distances->get(i,
int(distances_xi_x_sorted(j,1)));
00137
if (
dist <
min) {
00138
min =
dist;
00139 indice = j;
00140 }
00141 }
00142
if (dist_i)
00143 *dist_i =
min;
00144
return int(distances_xi_x_sorted(indice,1));
00145 }
00146
00148
00150 real GeodesicDistanceKernel::computeShortestDistance(
int i,
const Mat& distances_xi_x_sorted)
const {
00151
static real result;
00152
computeNearestGeodesicNeighbour(i, distances_xi_x_sorted, &result);
00153
return result;
00154 }
00155
00157
00159 real GeodesicDistanceKernel::evaluate(
const Vec& x1,
const Vec& x2)
const {
00160
distance_kernel->computeNearestNeighbors(x1,
dist_xi_x_sorted1,
knn);
00161
distance_kernel->computeNearestNeighbors(x2,
dist_xi_x_sorted2,
knn);
00162
real min =
REAL_MAX;
00163
real dist;
00164
for (
int j = 0; j <
knn; j++) {
00165
for (
int k = 0;
k < knn;
k++) {
00166
dist =
dist_xi_x_sorted1(j,0) +
dist_xi_x_sorted2(
k,0)
00167 +
geo_distances->get(
int(dist_xi_x_sorted1(j,1)),
int(dist_xi_x_sorted2(
k,1)));
00168
if (
dist <
min) {
00169
min =
dist;
00170 }
00171 }
00172 }
00173
if (
pow_distance) {
00174
return square(
min);
00175 }
else {
00176
return min;
00177 }
00178 }
00179
00181
00183 real GeodesicDistanceKernel::evaluate_i_j(
int i,
int j)
const {
00184
if (
pow_distance) {
00185
return square(
geo_distances->get(i,j));
00186 }
else {
00187
return geo_distances->get(i,j);
00188 }
00189 }
00190
00192
00194 real GeodesicDistanceKernel::evaluate_i_x(
int i,
const Vec& x,
real squared_norm_of_x)
const {
00195
return evaluate_i_x_again(i,
x, squared_norm_of_x,
true);
00196 }
00197
00199
00201 real GeodesicDistanceKernel::evaluate_i_x_from_distances(
int i,
const Mat& distances_xi_x_sorted)
const {
00202
if (
pow_distance) {
00203
return square(
computeShortestDistance(i, distances_xi_x_sorted));
00204 }
else {
00205
return computeShortestDistance(i, distances_xi_x_sorted);
00206 }
00207 }
00208
00210
00212 real GeodesicDistanceKernel::evaluate_i_x_again(
int i,
const Vec& x,
real squared_norm_of_x,
bool first_time)
const {
00213
if (first_time) {
00214
distance_kernel->computeNearestNeighbors(
x,
dist_xi_x_sorted,
knn);
00215 }
00216
if (
pow_distance) {
00217
return square(
computeShortestDistance(i,
dist_xi_x_sorted));
00218 }
else {
00219
return computeShortestDistance(i,
dist_xi_x_sorted);
00220 }
00221 }
00222
00224
00226 void GeodesicDistanceKernel::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00227 {
00228 inherited::makeDeepCopyFromShallowCopy(copies);
00229
00230
00231
00232
00233
00234
00235
00236
00237
PLERROR(
"GeodesicDistanceKernel::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!");
00238 }
00239
00241
00243 void GeodesicDistanceKernel::setDataForKernelMatrix(
VMat the_data) {
00244 inherited::setDataForKernelMatrix(the_data);
00245
distance_kernel->setDataForKernelMatrix(the_data);
00246
int n = n_examples;
00247
00248
if (
geo_distances &&
geo_distances->
length() == n &&
geo_distances->
width() == n) {
00249
return;
00250 }
00251
00252
Mat distances(n,n);
00253
distance_kernel->computeGramMatrix(distances);
00254
00255
TMat<int> neighborhoods = Kernel::computeKNNeighbourMatrixFromDistanceMatrix(distances,
knn,
true, report_progress);
00256
00257
Mat geodesic(n,n);
00258
real big_value =
REAL_MAX / 3.0;
00259
ProgressBar* pb = 0;
00260
if (report_progress)
00261 pb =
new ProgressBar(
"Computing geodesic distances", n);
00262
if (
shortest_algo ==
"floyd") {
00263
00264 geodesic.
fill(big_value);
00265
int neighbor;
00266
real d;
00267
for (
int i = 0; i < n; i++) {
00268 geodesic(i,i) = 0;
00269
for (
int j = 1; j <
knn; j++) {
00270 neighbor = neighborhoods(i,j);
00271 d = distances(i, neighbor);
00272 geodesic(i, neighbor) = d;
00273 geodesic(neighbor, i) = d;
00274 }
00275 }
00276
00277
real dist;
00278
for (
int k = 0;
k < n;
k++) {
00279
for (
int i = 0; i < n; i++) {
00280
for (
int j = 0; j < n; j++) {
00281
dist = geodesic(i,
k) + geodesic(
k,j);
00282
if (geodesic(i,j) >
dist) {
00283 geodesic(i,j) =
dist;
00284 }
00285 }
00286 }
00287
if (report_progress)
00288 pb->
update(
k + 1);
00289 }
00290 }
else if (
shortest_algo ==
"djikstra") {
00291
00292
00293
00294
TVec< TVec<int> > sym_neighborhoods(n);
00295
int neighb;
00296
for (
int i = 0; i < n; i++) {
00297
for (
int j = 1; j <
knn; j++) {
00298 neighb = neighborhoods(i, j);
00299 sym_neighborhoods[i].
append(neighb);
00300 sym_neighborhoods[neighb].
append(i);
00301 }
00302 }
00303
Vec d;
00304
TVec<bool> T(n);
00305
int t,
min, i, j, m,
k;
00306
real dist;
00307
for (
k = 0;
k < n;
k++) {
00308 d = geodesic(
k);
00309 d.
fill(big_value);
00310 d[
k] = 0;
00311 T.
fill(
true);
00312
for (i = 0; i < n; i++) {
00313
min = 0;
00314
while (!T[
min])
00315
min++;
00316
for (m =
min + 1; m < n; m++) {
00317
if (T[m] && d[m] < d[
min]) {
00318
min = m;
00319 }
00320 }
00321
for (j = 0; j < sym_neighborhoods[
min].
length(); j++) {
00322 t = sym_neighborhoods[
min][j];
00323
if (T[t]) {
00324
dist = d[
min] + distances(
min, t);
00325
if (d[t] >
dist) {
00326 d[t] =
dist;
00327 }
00328 }
00329 }
00330 T[
min] =
false;
00331 }
00332
if (report_progress)
00333 pb->
update(
k+1);
00334 }
00335 }
else {
00336
PLERROR(
"In GeodesicDistanceKernel::setDataForKernelMatrix - Unknown value for 'shortest_algo'");
00337 }
00338
if (pb)
00339
delete pb;
00340
00341
if (
geodesic_file ==
"") {
00342
geo_distances =
VMat(geodesic);
00343 }
else {
00344
00345
geo_distances =
new FileVMatrix(
geodesic_file, n, n);
00346
geo_distances->putMat(0, 0, geodesic);
00347 }
00348 }
00349
00350 }
00351
00352