00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
#ifndef StatsCollector_INC
00041
#define StatsCollector_INC
00042
00043
#include <plearn/base/general.h>
00044
#include <plearn/base/Object.h>
00045
#include "TMat.h"
00046
#include <plearn/base/RealMapping.h>
00047
00048
namespace PLearn {
00049
using namespace std;
00050
00051 class StatsCollectorCounts
00052 {
00053
public:
00054 double n;
00055 double nbelow;
00056 double sum;
00057 double sumsquare;
00058 int id;
00059
00060 StatsCollectorCounts():
00061
n(0),
nbelow(0),
00062
sum(0.),
sumsquare(0.),
id(0) {}
00063 };
00064
00065 typedef pair<real,StatsCollectorCounts*>
PairRealSCCType;
00066
00068
00069 inline PStream&
operator>>(
PStream& in,
StatsCollectorCounts& c)
00070 { in >> c.
n >> c.
nbelow >> c.
sum >> c.
sumsquare >> c.
id;
return in; }
00071
00072 inline PStream&
operator<<(
PStream& out,
const StatsCollectorCounts& c)
00073 { out << c.
n << c.
nbelow << c.
sum << c.
sumsquare << c.
id;
return out; }
00074
00081 class StatsCollector:
public Object
00082 {
00083
public:
00084 typedef Object inherited;
00085
PLEARN_DECLARE_OBJECT(
StatsCollector);
00086
00087
public:
00088
00089 typedef Object inherited;
00090
00091
00092
00095 int maxnvalues;
00096
00097
00098
00099
00100 double nmissing_;
00101 double nnonmissing_;
00102 double sum_;
00103 double sumsquare_;
00104 double sumweights_;
00105 real min_;
00106 real max_;
00107 real first_;
00108 real last_;
00109
00113 map<real,StatsCollectorCounts>
counts;
00114
00115
private:
00117
00118
void build_();
00119
00120
protected:
00122
static void declareOptions(
OptionList& ol);
00123
00124
public:
00125
00126
00127
StatsCollector(
int the_maxnvalues=0);
00128
00129 real n()
const {
return nmissing_ +
nnonmissing_; }
00130 real nmissing()
const {
return nmissing_; }
00131 real nnonmissing()
const {
return nnonmissing_; }
00132 real sum()
const {
return real(
sum_+
nnonmissing_*
first_); }
00133
00134 real sumsquare()
const {
return real(
sumsquare_+2*
first_*
sum()-first_*first_*
nnonmissing_); }
00135 real min()
const {
return min_; }
00136 real max()
const {
return max_; }
00137 real mean()
const {
return real(
sum()/
nnonmissing_); }
00138
00139 real variance()
const {
return real((
sumsquare_ -
square(
sum_)/
nnonmissing_)/(nnonmissing_-1)); }
00140 real stddev()
const {
return sqrt(
variance()); }
00141 real stderror()
const {
return sqrt(
variance()/
nnonmissing()); }
00142 real first_obs()
const {
return first_; }
00143 real last_obs()
const {
return last_; }
00144 real sharperatio()
const {
return mean()/
stddev(); }
00145
00155
real getStat(
const string& statname)
const;
00156
00158
virtual void build();
00159
00161
void forget();
00162
00164
void update(
real val,
real weight = 1.0);
00165
00167 void finalize() {}
00168
00169 map<real,StatsCollectorCounts> *
getCounts(){
return &
counts;}
00170 int getMaxNValues(){
return maxnvalues;}
00171
00174
Mat cdf(
bool normalized=
true)
const;
00175
00178
void sortIds();
00179
00187
RealMapping getBinMapping(
double discrete_mincount,
00188
double continuous_mincount,
00189
real tolerance=.1,
00190
TVec<double>* fcount=0)
const;
00191
00192
RealMapping getAllValuesMapping(
TVec<double>* fcount=0)
const;
00202
RealMapping getAllValuesMapping(
TVec<bool>* to_be_included,
TVec<double>* fcount=0,
bool ignore_other =
false,
real tolerance = 0)
const;
00203
00204
virtual void oldwrite(ostream& out)
const;
00205
virtual void oldread(istream& in);
00206
virtual void print(ostream& out)
const;
00207
00208 };
00209
00210
DECLARE_OBJECT_PTR(StatsCollector);
00211
00215
template <>
00216 inline void deepCopyField(
StatsCollector& field, CopiesMap& copies)
00217 {
00218 field.
makeDeepCopyFromShallowCopy(copies);
00219 }
00220
00221 TVec<RealMapping>
computeRanges(TVec<StatsCollector> stats,
int discrete_mincount,
int continuous_mincount);
00222
00223 }
00224
00225
#endif