00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00039
#include "VecStatsCollector.h"
00040
#include "TMat_maths.h"
00041
#include <plearn/base/stringutils.h>
00042
00043
namespace PLearn {
00044
using namespace std;
00045
00046 VecStatsCollector::VecStatsCollector()
00047 :maxnvalues(0), compute_covariance(false)
00048 {}
00049
00050
PLEARN_IMPLEMENT_OBJECT(
VecStatsCollector,
"Collects basic statistics on a vector",
"VecStatsCollector allows to collect statistics on a series of vectors.\n"
00051
"Individual vectors x are presented by calling update(x), and this class will\n"
00052
"collect both individual statistics for each element (as a Vec<StatsCollector>)\n"
00053
"as well as (optionally) compute the covariance matrix.");
00054
00055 void VecStatsCollector::declareOptions(
OptionList& ol)
00056 {
00057
00058
00059
00060
00061
00062
00063
declareOption(ol,
"maxnvalues", &VecStatsCollector::maxnvalues, OptionBase::buildoption,
00064
"maximum number of different values to keep track of for each element");
00065
declareOption(ol,
"compute_covariance", &VecStatsCollector::compute_covariance, OptionBase::buildoption,
00066
"should we compute and keep X'.X ?");
00067
00068
declareOption(ol,
"stats", &VecStatsCollector::stats, OptionBase::learntoption,
00069
"the stats for each element");
00070
declareOption(ol,
"cov", &VecStatsCollector::cov, OptionBase::learntoption,
00071
"the uncentered covariance matrix (mean not subtracted): X'.X");
00072
00073
declareOption(ol,
"fieldnames", &VecStatsCollector::fieldnames, OptionBase::buildoption,
00074
"Names of the fields of the vector");
00075
00076
00077 inherited::declareOptions(ol);
00078 }
00079
00080 double VecStatsCollector::getStat(
const string& statspec)
00081 {
00082
PIStringStream in(statspec);
00083
string statname;
00084 in.
smartReadUntilNext(
"[", statname);
00085
string fieldname;
00086 in.
smartReadUntilNext(
"]", fieldname);
00087
int fieldnum =
getFieldNum(fieldname);
00088
if(fieldnum<0)
00089
PLERROR(
"In VecStatsCollector::getStat invalid fieldname: %s",fieldname.c_str());
00090
00091
00092
00093
00094
if (
stats.
length() == 0)
00095
return MISSING_VALUE;
00096
00097
return getStats(fieldnum).
getStat(statname);
00098 }
00099
00100 int VecStatsCollector::getFieldNum(
const string& fieldname_or_num)
const
00101
{
00102
int num =
fieldnames.
find(fieldname_or_num);
00103
if(num<0) {
00104
00105
if (
pl_isnumber(fieldname_or_num)) {
00106 num =
toint(fieldname_or_num);
00107 }
else {
00108
00109 num = -1;
00110 }
00111 }
00112
return num;
00113 }
00114
00115
00116 void VecStatsCollector::update(
const Vec& x,
real weight)
00117 {
00118
int n =
x.size();
00119
if(
stats.
size()==0)
00120 {
00121
stats.
resize(n);
00122
for(
int k=0;
k<n;
k++)
00123 {
00124
stats[
k].maxnvalues =
maxnvalues;
00125
stats[
k].forget();
00126 }
00127
if(
compute_covariance)
00128 {
00129
cov.
resize(n,n);
00130
cov.
fill(0);
00131 }
00132 }
00133
00134
if(
stats.
size()!=n)
00135
PLERROR(
"In VecStatsCollector: problem, called update with vector of length %d, while size of stats (and most likeley previously seen vector) is %d", n,
stats.
size());
00136
00137
00138
00139
00140
for(
int k=0;
k<n;
k++)
00141 {
00142
stats[
k].update(
x[
k], weight);
00143
00144
00145 }
00146
00147
if(
compute_covariance)
00148
00149
00150
00151
00152
00153
00154
00155
00156
externalProductScaleAcc(
cov,
x,
x, weight);
00157 }
00158
00160 void VecStatsCollector::update(
const Mat& m)
00161 {
00162
int l = m.
length();
00163
for(
int i=0; i<l; i++)
00164
update(m(i));
00165 }
00166
00168 void VecStatsCollector::update(
const Mat& m,
const Vec& weights)
00169 {
00170
if (m.
length() != weights.
size())
00171
PLERROR(
"VecStatsCollector::update: matrix height (%d) "
00172
"is incompatible with weights length (%d)", m.
length(),
00173 weights.
size());
00174
int l = m.
length();
00175
for(
int i=0; i<l; i++)
00176
update(m(i), weights[i]);
00177 }
00178
00179 void VecStatsCollector::build_()
00180 {}
00181
00182 void VecStatsCollector::build()
00183 {
00184 inherited::build();
00185
build_();
00186 }
00187
00188 void VecStatsCollector::forget()
00189 {
00190
stats.
resize(0);
00191
cov.
resize(0,0);
00192 }
00193
00194 void VecStatsCollector::finalize()
00195 {
00196
int n =
stats.
size();
00197
for(
int i=0; i<n; i++)
00198
stats[i].finalize();
00199 }
00200
00202 Vec VecStatsCollector::getMean()
const
00203
{
00204
int n =
stats.
size();
00205
Vec res(n);
00206
for(
int k=0;
k<n;
k++)
00207 res[
k] =
stats[
k].mean();
00208
return res;
00209 }
00210
00212 Vec VecStatsCollector::getVariance()
const
00213
{
00214
int n =
stats.
size();
00215
Vec res(n);
00216
for(
int k=0;
k<n;
k++)
00217 res[
k] =
stats[
k].variance();
00218
return res;
00219 }
00220
00222 Vec VecStatsCollector::getStdDev()
const
00223
{
00224
int n =
stats.
size();
00225
Vec res(n);
00226
for(
int k=0;
k<n;
k++)
00227 res[
k] =
stats[
k].stddev();
00228
return res;
00229 }
00230
00232 Vec VecStatsCollector::getStdError()
const
00233
{
00234
int n =
stats.
size();
00235
Vec res(n);
00236
for(
int k=0;
k<n;
k++)
00237 res[
k] =
stats[
k].stderror();
00238
return res;
00239 }
00240
00242 Mat VecStatsCollector::getCovariance()
const
00243
{
00244
double invN = 1./
stats[0].n();
00245
Vec meanvec =
getMean();
00246
Mat covariance(
cov.
length(),
cov.
width());
00247
for(
int i=0; i<
cov.
length(); i++)
00248
for(
int j=0; j<
cov.
width(); j++)
00249
covariance(i, j) = invN*
cov(i, j) - meanvec[i]*meanvec[j];
00250
return covariance;
00251 }
00252
00254 Mat VecStatsCollector::getCorrelation()
const
00255
{
00256
Mat norm(
cov.
width(),
cov.
width());
00257
externalProduct(
norm,
getStdDev(),
getStdDev());
00258
return getCovariance()/
norm;
00259 }
00260
00261 void VecStatsCollector::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00262 {
00263 Object::makeDeepCopyFromShallowCopy(copies);
00264
deepCopyField(
stats, copies);
00265 }
00266
00267 }