00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00037
#include "ManifoldParzen2.h"
00038
00039
#include <plearn/math/plapack.h>
00040
#include <plearn/base/general.h>
00041
#include <plearn/math/TMat.h>
00042
#include <plearn/math/TMat_maths.h>
00043
#include <plearn/math/BottomNI.h>
00044
00045
namespace PLearn {
00046
00047
PLEARN_IMPLEMENT_OBJECT(ManifoldParzen2,
00048
"ManifoldParzen implements a manifold Parzen.",
00049
""
00050 );
00051
00053
00055 ManifoldParzen2::ManifoldParzen2()
00056 : nneighbors(4),
00057 ncomponents(1),
00058 use_last_eigenval(true),
00059 scale_factor(1)
00060 {
00061 nstages = 1;
00062 }
00063
00064 ManifoldParzen2::ManifoldParzen2(
int the_nneighbors,
int the_ncomponents,
bool use_last_eigenvalue,
real the_scale_factor)
00065 : nneighbors(the_nneighbors),ncomponents(the_ncomponents),use_last_eigenval(true),scale_factor(the_scale_factor)
00066 {
00067 }
00068
00069
00070 void ManifoldParzen2::build()
00071 {
00072 inherited::build();
00073
build_();
00074 }
00075
00076
00077
00078 void ManifoldParzen2::build_()
00079 {}
00080
00081 void ManifoldParzen2::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00082 {
00083 PLearner::makeDeepCopyFromShallowCopy(copies);
00084
00085
00086
00087
00088
00089
00090
00091
00092
PLERROR(
"ManifoldParzen2::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!");
00093 }
00094
00095
00096 void computeNearestNeighbors(
Mat dataset,
Vec x,
Mat& neighbors,
int ignore_row=-1)
00097 {
00098
int K = neighbors.
length();
00099
BottomNI<real> neighbs(K);
00100
for(
int i=0; i<dataset.
length(); i++)
00101
if(i!=ignore_row)
00102 neighbs.
update(
powdistance(dataset(i),
x), i);
00103 neighbs.
sort();
00104
TVec< pair<real,int> > indices = neighbs.
getBottomN();
00105
int nonzero=0;
00106
for(
int k=0;
k<K;
k++)
00107 {
00108
if(indices[
k].
first>0)
00109 nonzero++;
00110 neighbors(
k) << dataset(indices[
k].second);
00111 }
00112
if(nonzero==0)
00113
PLERROR(
"All neighbors had 0 distance. Use more neighbors. (There were %i other patterns with same values)",neighbs.
nZeros());
00114 }
00115
00116
00117
00118
00119
00120 void computePrincipalComponents(
Mat dataset,
Vec& eig_values,
Mat& eig_vectors)
00121 {
00122
#ifdef BOUNDCHECK
00123
if(eig_vectors.
width()!=dataset.
width())
00124
PLERROR(
"In computePrincipalComponents eig_vectors and dataset must have same width");
00125
if(eig_values.
length() != eig_vectors.
length())
00126
PLERROR(
"In computePrincipalComponents eig_values vec and eig_vectors mat must have same length");
00127
#endif
00128
00129
static Mat covar;
00130
int ncomp = eig_values.
length();
00131 covar.
resize(dataset.
width(), dataset.
width());
00132
transposeProduct(covar, dataset,dataset);
00133
eigenVecOfSymmMat(covar, ncomp, eig_values, eig_vectors);
00134
for (
int i=0;i<eig_values.
length();i++)
00135
if (eig_values[i]<0)
00136 eig_values[i] = 0;
00137 }
00138
00139 void computeLocalPrincipalComponents(
Mat& dataset,
int which_pattern,
Mat& delta_neighbors,
Vec& eig_values,
Mat& eig_vectors,
Vec& mean)
00140 {
00141
Vec center = dataset(which_pattern);
00142
if (
center.hasMissing())
00143
PLERROR(
"dataset row %d has missing values!", which_pattern);
00144
computeNearestNeighbors(dataset,
center, delta_neighbors, which_pattern);
00145
mean.resize(delta_neighbors.
width());
00146
columnMean(delta_neighbors,
mean);
00147 delta_neighbors -=
mean;
00148
computePrincipalComponents(delta_neighbors, eig_values, eig_vectors);
00149 }
00150
00151 void ManifoldParzen2::train()
00152 {
00153
Mat trainset(train_set);
00154
int l = train_set.
length();
00155
int w = train_set.
width();
00156
00157 type =
"general";
00158 L = l;
00159 D =
ncomponents;
00160 GaussMix::build();
00161
resizeStuffBeforeTraining();
00162
00163
00164
00165
Mat delta_neighbors(
nneighbors, w);
00166
Vec eigvals(ncomponents+1);
00167
Mat components_eigenvecs(ncomponents+1,w);
00168
for(
int i=0; i<l; i++)
00169 {
00170
if(i%100==0)
00171 cerr <<
"[SEQUENTIAL TRAIN: processing pattern #" << i <<
"/" << l <<
"]\n";
00172
00173
00174 mu(i) << trainset(i);
00175
00176
Vec center;
00177
computeLocalPrincipalComponents(trainset, i, delta_neighbors, eigvals, components_eigenvecs,
center);
00178
00179 eigvals *=
scale_factor;
00180
00181
00182
00183
real d=0;
00184
for(
int k=0;
k<delta_neighbors.
length();
k++)
00185 d+=
dist(delta_neighbors(
k),
Vec(D,0.0),2);
00186 d/=delta_neighbors.
length();
00187
00188
00189
real lambda0;
00190
if(
use_last_eigenval)
00191 {
00192
00193
00194
00195
int last=ncomponents;
00196 lambda0 = eigvals[last];
00197
while (lambda0==0 && last>0)
00198 lambda0 = eigvals[--last];
00199
00200
if (lambda0 == 0)
00201
PLERROR(
"All (%i) principal components have zero variance!?",eigvals.length());
00202 }
00203
else lambda0 =
global_lambda0;
00204
00205 alpha[i] = 1.0 / l;
00206 n_eigen = eigvals.
length() - 1;
00207 GaussMix::build();
00208
resizeStuffBeforeTraining();
00209 mu(i) <<
center;
00210 eigenvalues(i) << eigvals;
00211 eigenvalues(i, n_eigen_computed - 1) = lambda0;
00212 eigenvectors[i] << components_eigenvecs;
00213
00214 }
00215 stage = 1;
00216
build();
00217 }
00218
00219 }