00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00044
#include "ConditionalStatsCollector.h"
00045
00046
namespace PLearn {
00047
using namespace std;
00048
00049 ConditionalStatsCollector::ConditionalStatsCollector()
00050 :
inherited(),
00051 condvar(0)
00052 {}
00053
00054
PLEARN_IMPLEMENT_OBJECT(
ConditionalStatsCollector,
"ONE LINE DESCRIPTION",
"MULTI LINE\nHELP");
00055
00056 void ConditionalStatsCollector::declareOptions(
OptionList& ol)
00057 {
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
declareOption(ol,
"condvar", &ConditionalStatsCollector::condvar, OptionBase::buildoption,
00071
"index of conditioning variable \n");
00072
00073
declareOption(ol,
"ranges", &ConditionalStatsCollector::ranges, OptionBase::buildoption,
00074
"ranges[k] must contain bin-mappings for variable k, \n"
00075
"which maps it to an integer ( 0 to mappings[k].size()-1 ) \n");
00076
00077
00078
declareOption(ol,
"counts", &ConditionalStatsCollector::counts, OptionBase::learntoption,
00079
"counts[k](i,j) is the number of times the variable k fell in \n"
00080
"range i while variable condvar was in range j \n"
00081
"counts[k] has one more row and column than there are mapping ranges:\n"
00082
"the last ones counting MISSING_VALUE occurences.\n"
00083
"Actually counts is the 'number of times' only when update is called \n"
00084
"without a weight. Otherwise it's really the sum of the sample weights.");
00085
00086
00087
declareOption(ol,
"sums", &ConditionalStatsCollector::sums, OptionBase::learntoption,
00088
"sums[k](i,j) contains the sum of variable k's values that fell in range i while condvar was in range j \n"
00089
"(unlike counts, these do not have an extra row and column for misisng value");
00090
00091
declareOption(ol,
"sums_condvar", &ConditionalStatsCollector::sums_condvar, OptionBase::learntoption,
00092
"sums_condvar[k](i,j) contains the (possibly weighted) sum of variable condvar's values that fell in range i while variable k was in range j \n"
00093
"(unlike counts, these do not have an extra row and column for misisng value)");
00094
00095
declareOption(ol,
"sumsquares", &ConditionalStatsCollector::sumsquares, OptionBase::learntoption,
00096
"sumsquares[k](i,j) contains the (possibly weighted) sum of squares of variable k's values that fell in range i while condvar was in range j \n"
00097
"(unlike counts, these do not have an extra row and column for misisng value)");
00098
00099
declareOption(ol,
"sumsquares_condvar", &ConditionalStatsCollector::sumsquares_condvar, OptionBase::learntoption,
00100
"sumsquares_condvar[k](i,j) contains the (possibly weighted) sum of squares of condvar's values that fell in range i while variable k was in range j \n"
00101
"(unlike counts, these do not have an extra row and column for misisng value)");
00102
00103
declareOption(ol,
"minima", &ConditionalStatsCollector::minima, OptionBase::learntoption,
00104
"minima[k](i,j) contains the min of variable k's values that fell in range i while condvar was in range j \n"
00105
"(unlike counts, these do not have an extra row and column for misisng value)");
00106
00107
declareOption(ol,
"minima_condvar", &ConditionalStatsCollector::minima_condvar, OptionBase::learntoption,
00108
"minima_condvar[k](i,j) contains the min of variable condvar's values that fell in range i while variable k was in range j \n"
00109
"(unlike counts, these do not have an extra row and column for misisng value)");
00110
00111
declareOption(ol,
"maxima", &ConditionalStatsCollector::maxima, OptionBase::learntoption,
00112
"maxima[k](i,j) contains the max of variable k's values that fell in range i while condvar was in range j \n"
00113
"(unlike counts, these do not have an extra row and column for misisng value)");
00114
00115
declareOption(ol,
"maxima_condvar", &ConditionalStatsCollector::maxima_condvar, OptionBase::learntoption,
00116
"maxima_condvar[k](i,j) contains the max of variable condvar's values that fell in range i while variable k was in range j \n"
00117
"(unlike counts, these do not have an extra row and column for misisng value)");
00118
00119
00120 inherited::declareOptions(ol);
00121 }
00122
00123 void ConditionalStatsCollector::build_()
00124 {
00125
if(
counts.
size()==0)
00126
forget();
00127 }
00128
00129
00130 void ConditionalStatsCollector::build()
00131 {
00132 inherited::build();
00133
build_();
00134 }
00135
00136 void ConditionalStatsCollector::forget()
00137 {
00138
counts.
resize(0);
00139
sums.
resize(0);
00140
sumsquares.
resize(0);
00141
minima.
resize(0);
00142
maxima.
resize(0);
00143
sums_condvar.
resize(0);
00144
sumsquares_condvar.
resize(0);
00145
minima_condvar.
resize(0);
00146
maxima_condvar.
resize(0);
00147 }
00148
00149 void ConditionalStatsCollector::setBinMappingsAndCondvar(
const TVec<RealMapping>& the_ranges,
int the_condvar)
00150 {
00151
ranges = the_ranges;
00152
condvar = the_condvar;
00153
forget();
00154 }
00155
00156 int ConditionalStatsCollector::findrange(
int varindex,
real val)
const
00157
{
00158
RealMapping& r =
ranges[varindex];
00159
int pos = -1;
00160
if(
is_missing(
val))
00161 pos = r.
length();
00162
else
00163 {
00164 pos = (
int) r.
map(
val);
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180 }
00181
return pos;
00182 }
00183
00184 void ConditionalStatsCollector::update(
const Vec& v,
real weight)
00185 {
00186
int nvars =
ranges.
length();
00187
if(v.
length()!=nvars)
00188
PLERROR(
"IN ConditionalStatsCollectos::update length of update vector and nvars differ!");
00189
00190
if(
counts.
length()!=nvars)
00191 {
00192
counts.
resize(nvars);
00193
sums.
resize(nvars);
00194
sums_condvar.
resize(nvars);
00195
sumsquares.
resize(nvars);
00196
sumsquares_condvar.
resize(nvars);
00197
minima.
resize(nvars);
00198
minima_condvar.
resize(nvars);
00199
maxima.
resize(nvars);
00200
maxima_condvar.
resize(nvars);
00201
int nranges_condvar =
ranges[
condvar].
length()+1;
00202
for(
int k=0;
k<nvars;
k++)
00203 {
00204
int nranges_k = ranges[
k].length()+1;
00205
counts[
k].
resize(nranges_k, nranges_condvar);
00206
counts[
k].
fill(0);
00207
sums[
k].
resize(nranges_k, nranges_condvar);
00208
sums[
k].
fill(0);
00209
sums_condvar[
k].
resize(nranges_condvar, nranges_k);
00210
sums_condvar[
k].
fill(0);
00211
sumsquares[
k].
resize(nranges_k, nranges_condvar);
00212
sumsquares[
k].
fill(0);
00213
sumsquares_condvar[
k].
resize(nranges_condvar, nranges_k);
00214
sumsquares_condvar[
k].
fill(0);
00215
minima[
k].
resize(nranges_k, nranges_condvar);
00216
minima[
k].
fill(FLT_MAX);
00217
minima_condvar[
k].
resize(nranges_condvar, nranges_k);
00218
minima_condvar[
k].
fill(FLT_MAX);
00219
maxima[
k].
resize(nranges_k, nranges_condvar);
00220
maxima[
k].
fill(-FLT_MAX);
00221
maxima_condvar[
k].
resize(nranges_condvar, nranges_k);
00222
maxima_condvar[
k].
fill(-FLT_MAX);
00223 }
00224 }
00225
00226
real condvar_val = v[
condvar];
00227
int j =
findrange(
condvar, condvar_val);
00228
if(j==-1)
00229
PLWARNING(
"In ConditionalStatsCollector::update value of conditioning var in none of the ranges");
00230
for(
int k=0;
k<nvars;
k++)
00231 {
00232
real val = v[
k];
00233
int i = findrange(
k,
val);
00234
if(i==-1)
00235 {
00236
PLWARNING(
"In ConditionalStatsCollector::update value of variable #%d in none of the ranges",
k);
00237 }
00238
00239
counts[
k](i,j)+=weight;
00240
if(!
is_missing(
val))
00241 {
00242
sums[
k](i,j) += weight*
val;
00243
sumsquares[
k](i,j) += weight*
square(
val);
00244
if(
val<
minima[
k](i,j))
00245 minima[
k](i,j) =
val;
00246
if(
val>
maxima[
k](i,j))
00247 maxima[
k](i,j) =
val;
00248 }
00249
00250
if(!
is_missing(condvar_val))
00251 {
00252
sums_condvar[
k](j,i) += weight*condvar_val;
00253
sumsquares_condvar[
k](j,i) += weight*
square(condvar_val);
00254
if(condvar_val<
minima_condvar[
k](j,i))
00255 minima_condvar[
k](j,i) = condvar_val;
00256
if(condvar_val>
maxima_condvar[
k](j,i))
00257 maxima_condvar[
k](j,i) = condvar_val;
00258 }
00259 }
00260 }
00261
00262 void ConditionalStatsCollector::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00263 {
00264 inherited::makeDeepCopyFromShallowCopy(copies);
00265
00266
deepCopyField(
ranges, copies);
00267
deepCopyField(
counts, copies);
00268
deepCopyField(
sums, copies);
00269
deepCopyField(
sumsquares, copies);
00270
deepCopyField(
minima, copies);
00271
deepCopyField(
maxima, copies);
00272
deepCopyField(
sums_condvar, copies);
00273
deepCopyField(
sumsquares_condvar, copies);
00274
deepCopyField(
minima_condvar, copies);
00275
deepCopyField(
maxima_condvar, copies);
00276 }
00277
00278 }