Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

ConditionalStatsCollector.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // ConditionalStatsCollector.cc 00004 // 00005 // Copyright (C) 2003 Pascal Vincent 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: ConditionalStatsCollector.cc,v 1.6 2004/03/03 01:31:23 plearner Exp $ 00037 ******************************************************* */ 00038 00039 // Authors: Pascal Vincent 00040 00044 #include "ConditionalStatsCollector.h" 00045 00046 namespace PLearn { 00047 using namespace std; 00048 00049 ConditionalStatsCollector::ConditionalStatsCollector() 00050 : inherited(), 00051 condvar(0) 00052 {} 00053 00054 PLEARN_IMPLEMENT_OBJECT(ConditionalStatsCollector, "ONE LINE DESCRIPTION", "MULTI LINE\nHELP"); 00055 00056 void ConditionalStatsCollector::declareOptions(OptionList& ol) 00057 { 00058 // ### Declare all of this object's options here 00059 // ### For the "flags" of each option, you should typically specify 00060 // ### one of OptionBase::buildoption, OptionBase::learntoption or 00061 // ### OptionBase::tuningoption. Another possible flag to be combined with 00062 // ### is OptionBase::nosave 00063 00064 // ### ex: 00065 // declareOption(ol, "myoption", &ConditionalStatsCollector::myoption, OptionBase::buildoption, 00066 // "Help text describing this option"); 00067 // ... 00068 00069 00070 declareOption(ol, "condvar", &ConditionalStatsCollector::condvar, OptionBase::buildoption, 00071 "index of conditioning variable \n"); 00072 00073 declareOption(ol, "ranges", &ConditionalStatsCollector::ranges, OptionBase::buildoption, 00074 "ranges[k] must contain bin-mappings for variable k, \n" 00075 "which maps it to an integer ( 0 to mappings[k].size()-1 ) \n"); 00076 00077 00078 declareOption(ol, "counts", &ConditionalStatsCollector::counts, OptionBase::learntoption, 00079 "counts[k](i,j) is the number of times the variable k fell in \n" 00080 "range i while variable condvar was in range j \n" 00081 "counts[k] has one more row and column than there are mapping ranges:\n" 00082 "the last ones counting MISSING_VALUE occurences.\n" 00083 "Actually counts is the 'number of times' only when update is called \n" 00084 "without a weight. Otherwise it's really the sum of the sample weights."); 00085 00086 00087 declareOption(ol, "sums", &ConditionalStatsCollector::sums, OptionBase::learntoption, 00088 "sums[k](i,j) contains the sum of variable k's values that fell in range i while condvar was in range j \n" 00089 "(unlike counts, these do not have an extra row and column for misisng value"); 00090 00091 declareOption(ol, "sums_condvar", &ConditionalStatsCollector::sums_condvar, OptionBase::learntoption, 00092 "sums_condvar[k](i,j) contains the (possibly weighted) sum of variable condvar's values that fell in range i while variable k was in range j \n" 00093 "(unlike counts, these do not have an extra row and column for misisng value)"); 00094 00095 declareOption(ol, "sumsquares", &ConditionalStatsCollector::sumsquares, OptionBase::learntoption, 00096 "sumsquares[k](i,j) contains the (possibly weighted) sum of squares of variable k's values that fell in range i while condvar was in range j \n" 00097 "(unlike counts, these do not have an extra row and column for misisng value)"); 00098 00099 declareOption(ol, "sumsquares_condvar", &ConditionalStatsCollector::sumsquares_condvar, OptionBase::learntoption, 00100 "sumsquares_condvar[k](i,j) contains the (possibly weighted) sum of squares of condvar's values that fell in range i while variable k was in range j \n" 00101 "(unlike counts, these do not have an extra row and column for misisng value)"); 00102 00103 declareOption(ol, "minima", &ConditionalStatsCollector::minima, OptionBase::learntoption, 00104 "minima[k](i,j) contains the min of variable k's values that fell in range i while condvar was in range j \n" 00105 "(unlike counts, these do not have an extra row and column for misisng value)"); 00106 00107 declareOption(ol, "minima_condvar", &ConditionalStatsCollector::minima_condvar, OptionBase::learntoption, 00108 "minima_condvar[k](i,j) contains the min of variable condvar's values that fell in range i while variable k was in range j \n" 00109 "(unlike counts, these do not have an extra row and column for misisng value)"); 00110 00111 declareOption(ol, "maxima", &ConditionalStatsCollector::maxima, OptionBase::learntoption, 00112 "maxima[k](i,j) contains the max of variable k's values that fell in range i while condvar was in range j \n" 00113 "(unlike counts, these do not have an extra row and column for misisng value)"); 00114 00115 declareOption(ol, "maxima_condvar", &ConditionalStatsCollector::maxima_condvar, OptionBase::learntoption, 00116 "maxima_condvar[k](i,j) contains the max of variable condvar's values that fell in range i while variable k was in range j \n" 00117 "(unlike counts, these do not have an extra row and column for misisng value)"); 00118 00119 // Now call the parent class' declareOptions 00120 inherited::declareOptions(ol); 00121 } 00122 00123 void ConditionalStatsCollector::build_() 00124 { 00125 if(counts.size()==0) 00126 forget(); 00127 } 00128 00129 // ### Nothing to add here, simply calls build_ 00130 void ConditionalStatsCollector::build() 00131 { 00132 inherited::build(); 00133 build_(); 00134 } 00135 00136 void ConditionalStatsCollector::forget() 00137 { 00138 counts.resize(0); 00139 sums.resize(0); 00140 sumsquares.resize(0); 00141 minima.resize(0); 00142 maxima.resize(0); 00143 sums_condvar.resize(0); 00144 sumsquares_condvar.resize(0); 00145 minima_condvar.resize(0); 00146 maxima_condvar.resize(0); 00147 } 00148 00149 void ConditionalStatsCollector::setBinMappingsAndCondvar(const TVec<RealMapping>& the_ranges, int the_condvar) 00150 { 00151 ranges = the_ranges; 00152 condvar = the_condvar; 00153 forget(); 00154 } 00155 00156 int ConditionalStatsCollector::findrange(int varindex, real val) const 00157 { 00158 RealMapping& r = ranges[varindex]; 00159 int pos = -1; 00160 if(is_missing(val)) 00161 pos = r.length(); 00162 else 00163 { 00164 pos = (int) r.map(val); 00165 /* 00166 if(pos==-1) 00167 { 00168 real minimum = r.begin()->first.low; 00169 real maximum = (--r.end())->first.high; 00170 00171 PLWARNING("In ConditionalStatsCollector::findrange(%d, %.18g) value of variable not in mapping (min=%.18g, max=%.18g)",varindex,val,minimum,maximum); 00172 cerr << r << endl; 00173 00174 if(val>maximum && val-maximum<1e-6) 00175 pos = r.length()-1; 00176 else if(val<minimum && minimum-val<1e-6) 00177 pos = 0; 00178 } 00179 */ 00180 } 00181 return pos; 00182 } 00183 00184 void ConditionalStatsCollector::update(const Vec& v, real weight) 00185 { 00186 int nvars = ranges.length(); 00187 if(v.length()!=nvars) 00188 PLERROR("IN ConditionalStatsCollectos::update length of update vector and nvars differ!"); 00189 00190 if(counts.length()!=nvars) 00191 { 00192 counts.resize(nvars); 00193 sums.resize(nvars); 00194 sums_condvar.resize(nvars); 00195 sumsquares.resize(nvars); 00196 sumsquares_condvar.resize(nvars); 00197 minima.resize(nvars); 00198 minima_condvar.resize(nvars); 00199 maxima.resize(nvars); 00200 maxima_condvar.resize(nvars); 00201 int nranges_condvar = ranges[condvar].length()+1; // +1 for missing values 00202 for(int k=0; k<nvars; k++) 00203 { 00204 int nranges_k = ranges[k].length()+1; // +1 for missing values 00205 counts[k].resize(nranges_k, nranges_condvar); 00206 counts[k].fill(0); 00207 sums[k].resize(nranges_k, nranges_condvar); 00208 sums[k].fill(0); 00209 sums_condvar[k].resize(nranges_condvar, nranges_k); 00210 sums_condvar[k].fill(0); 00211 sumsquares[k].resize(nranges_k, nranges_condvar); 00212 sumsquares[k].fill(0); 00213 sumsquares_condvar[k].resize(nranges_condvar, nranges_k); 00214 sumsquares_condvar[k].fill(0); 00215 minima[k].resize(nranges_k, nranges_condvar); 00216 minima[k].fill(FLT_MAX); 00217 minima_condvar[k].resize(nranges_condvar, nranges_k); 00218 minima_condvar[k].fill(FLT_MAX); 00219 maxima[k].resize(nranges_k, nranges_condvar); 00220 maxima[k].fill(-FLT_MAX); 00221 maxima_condvar[k].resize(nranges_condvar, nranges_k); 00222 maxima_condvar[k].fill(-FLT_MAX); 00223 } 00224 } 00225 00226 real condvar_val = v[condvar]; 00227 int j = findrange(condvar, condvar_val); 00228 if(j==-1) 00229 PLWARNING("In ConditionalStatsCollector::update value of conditioning var in none of the ranges"); 00230 for(int k=0; k<nvars; k++) 00231 { 00232 real val = v[k]; 00233 int i = findrange(k, val); 00234 if(i==-1) 00235 { 00236 PLWARNING("In ConditionalStatsCollector::update value of variable #%d in none of the ranges",k); 00237 } 00238 00239 counts[k](i,j)+=weight; 00240 if(!is_missing(val)) 00241 { 00242 sums[k](i,j) += weight*val; 00243 sumsquares[k](i,j) += weight*square(val); 00244 if(val<minima[k](i,j)) 00245 minima[k](i,j) = val; 00246 if(val>maxima[k](i,j)) 00247 maxima[k](i,j) = val; 00248 } 00249 00250 if(!is_missing(condvar_val)) 00251 { 00252 sums_condvar[k](j,i) += weight*condvar_val; 00253 sumsquares_condvar[k](j,i) += weight*square(condvar_val); 00254 if(condvar_val<minima_condvar[k](j,i)) 00255 minima_condvar[k](j,i) = condvar_val; 00256 if(condvar_val>maxima_condvar[k](j,i)) 00257 maxima_condvar[k](j,i) = condvar_val; 00258 } 00259 } 00260 } 00261 00262 void ConditionalStatsCollector::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00263 { 00264 inherited::makeDeepCopyFromShallowCopy(copies); 00265 00266 deepCopyField(ranges, copies); 00267 deepCopyField(counts, copies); 00268 deepCopyField(sums, copies); 00269 deepCopyField(sumsquares, copies); 00270 deepCopyField(minima, copies); 00271 deepCopyField(maxima, copies); 00272 deepCopyField(sums_condvar, copies); 00273 deepCopyField(sumsquares_condvar, copies); 00274 deepCopyField(minima_condvar, copies); 00275 deepCopyField(maxima_condvar, copies); 00276 } 00277 00278 } // end of namespace PLearn

Generated on Tue Aug 17 15:50:22 2004 for PLearn by doxygen 1.3.7