00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
#include "Kernel.h"
00044
#include <plearn/base/ProgressBar.h>
00045
00046
namespace PLearn {
00047
using namespace std;
00048
00049
using namespace std;
00050
00051
PLEARN_IMPLEMENT_ABSTRACT_OBJECT(Kernel,
"ONE LINE DESCR",
"NO HELP");
00052 Kernel::~Kernel() {}
00053
00055
00057 Kernel::Kernel(
bool is__symmetric)
00058 : lock_xi(false),
00059 lock_xj(false),
00060 lock_k_xi_x(false),
00061 data_inputsize(-1),
00062 n_examples(-1),
00063 is_symmetric(is__symmetric),
00064 report_progress(0)
00065 {}
00066
00068
00070 void Kernel::declareOptions(
OptionList &ol)
00071 {
00072
00073
00074
00075
declareOption(ol,
"is_symmetric", &Kernel::is_symmetric, OptionBase::buildoption,
00076
"Whether this kernel is symmetric or not.");
00077
00078
declareOption(ol,
"report_progress", &Kernel::report_progress, OptionBase::buildoption,
00079
"If set to 1, a progress bar will be displayed when computing the Gram matrix,\n"
00080
"or for other possibly costly operations.");
00081
00082
declareOption(ol,
"specify_dataset", &Kernel::specify_dataset, OptionBase::buildoption,
00083
"If set, then setDataForKernelMatrix will be called with this dataset at build time");
00084
00085
00086
00087
declareOption(ol,
"data_inputsize", &Kernel::data_inputsize, OptionBase::learntoption,
00088
"The inputsize of 'data' (if -1, is set to data.width()).");
00089
00090
declareOption(ol,
"n_examples", &Kernel::n_examples, OptionBase::learntoption,
00091
"The number of examples in 'data'.");
00092
00093 inherited::declareOptions(ol);
00094 }
00095
00097
00099 void Kernel::build() {
00100 inherited::build();
00101
build_();
00102 }
00103
00105
00107 void Kernel::build_() {
00108
if (
specify_dataset) {
00109 this->
setDataForKernelMatrix(
specify_dataset);
00110 }
00111 }
00112
00114
00116 void Kernel::makeDeepCopyFromShallowCopy(
CopiesMap& copies)
00117 {
00118 inherited::makeDeepCopyFromShallowCopy(copies);
00119
deepCopyField(
evaluate_xi, copies);
00120
deepCopyField(
evaluate_xj, copies);
00121
deepCopyField(
k_xi_x, copies);
00122
deepCopyField(
data, copies);
00123
deepCopyField(
specify_dataset, copies);
00124 }
00125
00127
00129 void Kernel::setDataForKernelMatrix(
VMat the_data)
00130 {
00131
data = the_data;
00132
if (
data) {
00133
data_inputsize =
data->inputsize();
00134
if (
data_inputsize == -1) {
00135
00136
data_inputsize =
data->
width();
00137 }
00138
n_examples =
data->
length();
00139 }
else {
00140
data_inputsize = 0;
00141
n_examples = 0;
00142 }
00143 }
00144
00146
00148 void Kernel::addDataForKernelMatrix(
const Vec& newRow)
00149 {
00150
try{
00151
data->appendRow(newRow);
00152 }
00153
catch(
const PLearnError& e){
00154
PLERROR(
"Kernel::addDataForKernelMatrix: if one intends to use this method,\n"
00155
"he must provide a data matrix for which the appendRow method is\n"
00156
"implemented.");
00157 }
00158 }
00159
00161
00163 real Kernel::evaluate_i_j(
int i,
int j)
const {
00164
static real result;
00165
if (
lock_xi ||
lock_xj) {
00166
00167
Vec xi(
data_inputsize);
00168
Vec xj(
data_inputsize);
00169
data->
getSubRow(i, 0, xi);
00170
data->
getSubRow(j, 0, xj);
00171
return evaluate(xi, xj);
00172 }
else {
00173
lock_xi =
true;
00174
lock_xj =
true;
00175
evaluate_xi.
resize(
data_inputsize);
00176
evaluate_xj.
resize(
data_inputsize);
00177
data->
getSubRow(i, 0,
evaluate_xi);
00178
data->
getSubRow(j, 0,
evaluate_xj);
00179 result =
evaluate(
evaluate_xi,
evaluate_xj);
00180
lock_xi =
false;
00181
lock_xj =
false;
00182
return result;
00183 }
00184 }
00185
00186
00188
00190 real Kernel::evaluate_i_x(
int i,
const Vec& x,
real squared_norm_of_x)
const {
00191
static real result;
00192
if (
lock_xi) {
00193
Vec xi(
data_inputsize);
00194
data->
getSubRow(i, 0, xi);
00195
return evaluate(xi,
x);
00196 }
else {
00197
lock_xi =
true;
00198
evaluate_xi.
resize(
data_inputsize);
00199
data->
getSubRow(i, 0,
evaluate_xi);
00200 result =
evaluate(
evaluate_xi,
x);
00201
lock_xi =
false;
00202
return result;
00203 }
00204 }
00205
00207
00209 real Kernel::evaluate_x_i(
const Vec& x,
int i,
real squared_norm_of_x)
const {
00210
static real result;
00211
if(
is_symmetric)
00212
return evaluate_i_x(i,
x,squared_norm_of_x);
00213
else {
00214
if (
lock_xi) {
00215
Vec xi(
data_inputsize);
00216
data->
getSubRow(i, 0, xi);
00217
return evaluate(
x, xi);
00218 }
else {
00219
lock_xi =
true;
00220
evaluate_xi.
resize(
data_inputsize);
00221
data->
getSubRow(i, 0,
evaluate_xi);
00222 result =
evaluate(
x,
evaluate_xi);
00223
lock_xi =
false;
00224
return result;
00225 }
00226 }
00227 }
00228
00230
00232 real Kernel::evaluate_i_x_again(
int i,
const Vec& x,
real squared_norm_of_x,
bool first_time)
const {
00233
return evaluate_i_x(i,
x, squared_norm_of_x);
00234 }
00235
00237
00239 real Kernel::evaluate_x_i_again(
const Vec& x,
int i,
real squared_norm_of_x,
bool first_time)
const {
00240
return evaluate_x_i(
x, i, squared_norm_of_x);
00241 }
00242
00244
00246 void Kernel::evaluate_all_i_x(
const Vec& x,
Vec& k_xi_x,
real squared_norm_of_x,
int istart)
const {
00247 k_xi_x[0] =
evaluate_i_x_again(istart,
x, squared_norm_of_x,
true);
00248
int i_max = istart + k_xi_x.
length();
00249
for (
int i = istart + 1; i < i_max; i++) {
00250 k_xi_x[i] = evaluate_i_x_again(i,
x, squared_norm_of_x);
00251 }
00252 }
00253
00255
00257 void Kernel::evaluate_all_x_i(
const Vec& x,
Vec& k_x_xi,
real squared_norm_of_x,
int istart)
const {
00258 k_x_xi[0] =
evaluate_x_i_again(
x, istart, squared_norm_of_x,
true);
00259
int i_max = istart + k_x_xi.
length();
00260
for (
int i = istart + 1; i < i_max; i++) {
00261 k_x_xi[i] = evaluate_x_i_again(
x, i, squared_norm_of_x);
00262 }
00263 }
00264
00266
00268 bool Kernel::isInData(
const Vec& x,
int* i)
const {
00269
return data->find(
x, 1e-8, i);
00270 }
00271
00273
00275 void Kernel::computeNearestNeighbors(
const Vec& x,
Mat& k_xi_x_sorted,
int knn)
const {
00276
Vec k_val;
00277
bool unlock =
true;
00278
if (
lock_k_xi_x) {
00279 k_val.
resize(
n_examples);
00280 unlock =
false;
00281 }
00282
else {
00283
lock_k_xi_x =
true;
00284
k_xi_x.
resize(
n_examples);
00285 k_val =
k_xi_x;
00286 }
00287 k_xi_x_sorted.
resize(
n_examples, 2);
00288
00289
evaluate_all_i_x(
x, k_val);
00290
00291
for (
int i = 0; i <
n_examples; i++) {
00292 k_xi_x_sorted(i,0) = k_val[i];
00293 k_xi_x_sorted(i,1) =
real(i);
00294 }
00295
partialSortRows(k_xi_x_sorted, knn);
00296
if (unlock)
00297
lock_k_xi_x =
false;
00298 }
00299
00301
00303 void Kernel::computeGramMatrix(
Mat K)
const
00304
{
00305
if (!
data)
PLERROR(
"Kernel::computeGramMatrix should be called only after setDataForKernelMatrix");
00306
int l=
data->
length();
00307
int m=K.
mod();
00308
ProgressBar* pb = 0;
00309
int count = 0;
00310
if (
report_progress) {
00311 pb =
new ProgressBar(
"Computing Gram matrix for " +
classname(), (l * (l + 1)) / 2);
00312 }
00313
real Kij;
00314
real* Ki;
00315
real* Kji_;
00316
for (
int i=0;i<l;i++)
00317 {
00318 Ki = K[i];
00319 Kji_ = &K[0][i];
00320
for (
int j=0; j<=i; j++,Kji_+=m)
00321 {
00322 Kij =
evaluate_i_j(i,j);
00323 *Ki++ = Kij;
00324
if (j<i)
00325 *Kji_ = Kij;
00326 }
00327
if (pb) {
00328
count += i + 1;
00329 pb->
update(
count);
00330 }
00331 }
00332
if (pb) {
00333
delete pb;
00334 }
00335 }
00336
00338
00340 void Kernel::setParameters(
Vec paramvec)
00341 {
PLERROR(
"setParameters(Vec paramvec) not implemented for this kernel"); }
00342
00344
00346 Vec Kernel::getParameters()
const
00347
{
return Vec(); }
00348
00350
00352 bool Kernel::hasData() {
00353
return data;
00354 }
00355
00357
00359 void Kernel::apply(
VMat m1,
VMat m2,
Mat& result)
const
00360
{
00361 result.
resize(m1->
length(), m2->
length());
00362
int m1w = m1->inputsize();
00363
if (m1w == -1) {
00364 m1w = m1->
width();
00365 }
00366
int m2w = m2->inputsize();
00367
if (m2w == -1) {
00368 m2w = m2->
width();
00369 }
00370
Vec m1_i(m1w);
00371
Vec m2_j(m2w);
00372
ProgressBar* pb = 0;
00373
bool easy_case = (
is_symmetric && m1 == m2);
00374
int l1 = m1->length();
00375
int l2 = m2->
length();
00376
if (
report_progress) {
00377
int nb_steps;
00378
if (easy_case) {
00379 nb_steps = (l1 * (l1 + 1)) / 2;
00380 }
else {
00381 nb_steps = l1 * l2;
00382 }
00383 pb =
new ProgressBar(
"Applying " +
classname() +
" to two matrices", nb_steps);
00384 }
00385
int count = 0;
00386
if(easy_case)
00387 {
00388
for(
int i=0; i<m1->length(); i++)
00389 {
00390 m1->getSubRow(i,0,m1_i);
00391
for(
int j=0; j<=i; j++)
00392 {
00393 m2->
getSubRow(j,0,m2_j);
00394
real val =
evaluate(m1_i,m2_j);
00395 result(i,j) =
val;
00396 result(j,i) =
val;
00397 }
00398
if (pb) {
00399
count += i + 1;
00400 pb->
update(
count);
00401 }
00402 }
00403 }
00404
else
00405 {
00406
for(
int i=0; i<m1->length(); i++)
00407 {
00408 m1->getSubRow(i,0,m1_i);
00409
for(
int j=0; j<m2->
length(); j++)
00410 {
00411 m2->
getSubRow(j,0,m2_j);
00412 result(i,j) =
evaluate(m1_i,m2_j);
00413 }
00414
if (pb) {
00415
count += l2;
00416 pb->
update(
count);
00417 }
00418 }
00419 }
00420
if (pb)
00421
delete pb;
00422 }
00423
00424
00425 void Kernel::apply(
VMat m,
const Vec& x,
Vec& result)
const
00426
{
00427 result.
resize(m->
length());
00428
int mw = m->inputsize();
00429
if (mw == -1) {
00430 mw = m->
width();
00431 }
00432
Vec m_i(mw);
00433
for(
int i=0; i<m->
length(); i++)
00434 {
00435 m->
getSubRow(i,0,m_i);
00436 result[i] =
evaluate(m_i,
x);
00437 }
00438 }
00439
00440
00441 void Kernel::apply(
Vec x,
VMat m,
Vec& result)
const
00442
{
00443 result.
resize(m->
length());
00444
int mw = m->inputsize();
00445
if (mw == -1) {
00446 mw = m->
width();
00447 }
00448
Vec m_i(mw);
00449
for(
int i=0; i<m->
length(); i++)
00450 {
00451 m->
getSubRow(i,0,m_i);
00452 result[i] =
evaluate(
x,m_i);
00453 }
00454 }
00455
00456
00457 Mat Kernel::apply(
VMat m1,
VMat m2)
const
00458
{
00459
Mat result;
00460
apply(m1,m2,result);
00461
return result;
00462 }
00463
00465
00467 real Kernel::test(
VMat d,
real threshold,
real sameness_below_threshold,
real sameness_above_threshold)
const
00468
{
00469
int nerrors = 0;
00470
int inputsize = (d->
width()-1)/2;
00471
for(
int i=0; i<d->
length(); i++)
00472 {
00473
Vec inputs = d(i);
00474
Vec input1 = inputs.
subVec(0,inputsize);
00475
Vec input2 = inputs.
subVec(inputsize,inputsize);
00476
real sameness = inputs[inputs.length()-1];
00477
real kernelvalue =
evaluate(input1,input2);
00478 cerr <<
"[" << kernelvalue <<
" " << sameness <<
"]\n";
00479
if(kernelvalue<threshold)
00480 {
00481
if(sameness==sameness_above_threshold)
00482 nerrors++;
00483 }
00484
else
00485 {
00486
if(sameness==sameness_below_threshold)
00487 nerrors++;
00488 }
00489 }
00490
return real(nerrors)/d->
length();
00491 }
00492
00493
00495
00497 TMat<int> Kernel::computeKNNeighbourMatrixFromDistanceMatrix(
const Mat& D,
int knn,
bool insure_self_first_neighbour,
bool report_progress)
00498 {
00499
int npoints = D.
length();
00500
TMat<int> neighbours(npoints, knn);
00501
Mat tmpsort(npoints,2);
00502
00503
ProgressBar* pb = 0;
00504
if (report_progress) {
00505 pb =
new ProgressBar(
"Computing neighbour matrix", npoints);
00506 }
00507
00508
Mat indices;
00509
for(
int i=0; i<npoints; i++)
00510 {
00511
for(
int j=0; j<npoints; j++)
00512 {
00513 tmpsort(j,0) = D(i,j);
00514 tmpsort(j,1) = j;
00515 }
00516
if(insure_self_first_neighbour)
00517 tmpsort(i,0) = -FLT_MAX;
00518
00519
partialSortRows(tmpsort, knn);
00520 indices = tmpsort.
column(1).subMatRows(0,knn);
00521
for (
int j = 0; j < knn; j++) {
00522 neighbours(i,j) =
int(indices(j,0));
00523 }
00524
if (pb)
00525 pb->
update(i);
00526 }
00527
if (pb)
00528
delete pb;
00529
return neighbours;
00530 }
00531
00533
00535
00536 Mat Kernel::computeNeighbourMatrixFromDistanceMatrix(
const Mat& D,
bool insure_self_first_neighbour,
bool report_progress)
00537 {
00538
int npoints = D.
length();
00539
Mat neighbours(npoints, npoints);
00540
Mat tmpsort(npoints,2);
00541
00542
ProgressBar* pb = 0;
00543
if (report_progress) {
00544 pb =
new ProgressBar(
"Computing neighbour matrix", npoints);
00545 }
00546
00547
00548
for(
int i=0; i<npoints; i++)
00549 {
00550
for(
int j=0; j<npoints; j++)
00551 {
00552 tmpsort(j,0) = D(i,j);
00553 tmpsort(j,1) = j;
00554 }
00555
if(insure_self_first_neighbour)
00556 tmpsort(i,0) = -FLT_MAX;
00557
00558
sortRows(tmpsort);
00559 neighbours(i) << tmpsort.
column(1);
00560
if (pb)
00561 pb->
update(i);
00562 }
00563
if (pb)
00564
delete pb;
00565
return neighbours;
00566 }
00567
00569
00571 Mat Kernel::estimateHistograms(
VMat d,
real sameness_threshold,
real minval,
real maxval,
int nbins)
const
00572
{
00573
real binwidth = (maxval-minval)/nbins;
00574
int inputsize = (d->
width()-1)/2;
00575
Mat histo(2,nbins);
00576
Vec histo_below = histo(0);
00577
Vec histo_above = histo(1);
00578
int nbelow=0;
00579
int nabove=0;
00580
for(
int i=0; i<d->
length(); i++)
00581 {
00582
Vec inputs = d(i);
00583
Vec input1 = inputs.
subVec(0,inputsize);
00584
Vec input2 = inputs.
subVec(inputsize,inputsize);
00585
real sameness = inputs[inputs.length()-1];
00586
real kernelvalue =
evaluate(input1,input2);
00587
if(kernelvalue>=minval && kernelvalue<maxval)
00588 {
00589
int binindex =
int((kernelvalue-minval)/binwidth);
00590
if(sameness<sameness_threshold)
00591 {
00592 histo_below[binindex]++;
00593 nbelow++;
00594 }
00595
else
00596 {
00597 histo_above[binindex]++;
00598 nabove++;
00599 }
00600 }
00601 }
00602 histo_below /=
real(nbelow);
00603 histo_above /=
real(nabove);
00604
return histo;
00605 }
00606
00607
00608 Mat Kernel::estimateHistograms(
Mat input_and_class,
real minval,
real maxval,
int nbins)
const
00609
{
00610
real binwidth = (maxval-minval)/nbins;
00611
int inputsize = input_and_class.
width()-1;
00612
Mat inputs = input_and_class.
subMatColumns(0,inputsize);
00613
Mat classes = input_and_class.
column(inputsize);
00614
Mat histo(4,nbins);
00615
Vec histo_mean_same = histo(0);
00616
Vec histo_mean_other = histo(1);
00617
Vec histo_min_same = histo(2);
00618
Vec histo_min_other = histo(3);
00619
00620
for(
int i=0; i<inputs.
length(); i++)
00621 {
00622
Vec input = inputs(i);
00623
real input_class = classes(i,0);
00624
real sameclass_meandist = 0.0;
00625
real otherclass_meandist = 0.0;
00626
real sameclass_mindist = FLT_MAX;
00627
real otherclass_mindist = FLT_MAX;
00628
for(
int j=0; j<inputs.
length(); j++)
00629
if(j!=i)
00630 {
00631
real dist =
evaluate(input, inputs(j));
00632
if(classes(j,0)==input_class)
00633 {
00634 sameclass_meandist +=
dist;
00635
if(
dist<sameclass_mindist)
00636 sameclass_mindist =
dist;
00637 }
00638
else
00639 {
00640 otherclass_meandist +=
dist;
00641
if(
dist<otherclass_mindist)
00642 otherclass_mindist =
dist;
00643 }
00644 }
00645 sameclass_meandist /= (inputs.
length()-1);
00646 otherclass_meandist /= (inputs.
length()-1);
00647
if(sameclass_meandist>=minval && sameclass_meandist<maxval)
00648 histo_mean_same[
int((sameclass_meandist-minval)/binwidth)]++;
00649
if(sameclass_mindist>=minval && sameclass_mindist<maxval)
00650 histo_min_same[int((sameclass_mindist-minval)/binwidth)]++;
00651
if(otherclass_meandist>=minval && otherclass_meandist<maxval)
00652 histo_mean_other[int((otherclass_meandist-minval)/binwidth)]++;
00653
if(otherclass_mindist>=minval && otherclass_mindist<maxval)
00654 histo_min_other[int((otherclass_mindist-minval)/binwidth)]++;
00655 }
00656 histo_mean_same /=
sum(histo_mean_same);
00657 histo_min_same /=
sum(histo_min_same);
00658 histo_mean_other /=
sum(histo_mean_other);
00659 histo_min_other /=
sum(histo_min_other);
00660
return histo;
00661 }
00662
00663
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681
00683
00685
00686
00687 Mat
findClosestPairsOfDifferentClass(
int k, VMat data, Ker dist)
00688 {
00689 Mat result(
k,3);
00690
real maxdistinlist = -FLT_MAX;
00691
int posofmaxdistinlist = -1;
00692
int kk=0;
00693 Vec rowi(data.width());
00694 Vec inputi = rowi.
subVec(0,rowi.length()-1);
00695
real& targeti = rowi[rowi.length()-1];
00696 Vec rowj(data.width());
00697 Vec inputj = rowj.
subVec(0,rowj.length()-1);
00698
real& targetj = rowj[rowj.length()-1];
00699
for(
int i=0; i<data.length(); i++)
00700 {
00701 data->getRow(i,rowi);
00702
for(
int j=0; j<data.length(); j++)
00703 {
00704 data->getRow(j,rowj);
00705
if(targeti!=targetj)
00706 {
00707
real d =
dist(inputi,inputj);
00708
if(kk<
k)
00709 {
00710 result(kk,0) = i;
00711 result(kk,1) = j;
00712 result(kk,2) = d;
00713
if(d>maxdistinlist)
00714 {
00715 maxdistinlist = d;
00716 posofmaxdistinlist = kk;
00717 }
00718 kk++;
00719 }
00720
else if(d<maxdistinlist)
00721 {
00722 result(posofmaxdistinlist,0) = i;
00723 result(posofmaxdistinlist,1) = j;
00724 result(posofmaxdistinlist,2) = d;
00725 posofmaxdistinlist =
argmax(result.column(2));
00726 maxdistinlist = result(posofmaxdistinlist,2);
00727 }
00728 }
00729 }
00730 }
00731
sortRows(result, 2);
00732
return result;
00733 }
00734
00735 }
00736