Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

VecStatsCollector.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 // VecStatsCollector.cc 00003 // 00004 // Copyright (C) 2002 Pascal Vincent 00005 // 00006 // Redistribution and use in source and binary forms, with or without 00007 // modification, are permitted provided that the following conditions are met: 00008 // 00009 // 1. Redistributions of source code must retain the above copyright 00010 // notice, this list of conditions and the following disclaimer. 00011 // 00012 // 2. Redistributions in binary form must reproduce the above copyright 00013 // notice, this list of conditions and the following disclaimer in the 00014 // documentation and/or other materials provided with the distribution. 00015 // 00016 // 3. The name of the authors may not be used to endorse or promote 00017 // products derived from this software without specific prior written 00018 // permission. 00019 // 00020 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00021 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00022 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00023 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00025 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00026 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00027 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00028 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00029 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 // 00031 // This file is part of the PLearn library. For more information on the PLearn 00032 // library, go to the PLearn Web site at www.plearn.org 00033 00034 /* ******************************************************* 00035 * $Id: VecStatsCollector.cc,v 1.21 2004/07/21 16:30:53 chrish42 Exp $ 00036 ******************************************************* */ 00037 00039 #include "VecStatsCollector.h" 00040 #include "TMat_maths.h" 00041 #include <plearn/base/stringutils.h> 00042 00043 namespace PLearn { 00044 using namespace std; 00045 00046 VecStatsCollector::VecStatsCollector() 00047 :maxnvalues(0), compute_covariance(false) 00048 {} 00049 00050 PLEARN_IMPLEMENT_OBJECT(VecStatsCollector, "Collects basic statistics on a vector", "VecStatsCollector allows to collect statistics on a series of vectors.\n" 00051 "Individual vectors x are presented by calling update(x), and this class will\n" 00052 "collect both individual statistics for each element (as a Vec<StatsCollector>)\n" 00053 "as well as (optionally) compute the covariance matrix."); 00054 00055 void VecStatsCollector::declareOptions(OptionList& ol) 00056 { 00057 // ### Declare all of this object's options here 00058 // ### For the "flags" of each option, you should typically specify 00059 // ### one of OptionBase::buildoption, OptionBase::learntoption or 00060 // ### OptionBase::tuningoption. Another possible flag to be combined with 00061 // ### is OptionBase::nosave 00062 00063 declareOption(ol, "maxnvalues", &VecStatsCollector::maxnvalues, OptionBase::buildoption, 00064 "maximum number of different values to keep track of for each element"); 00065 declareOption(ol, "compute_covariance", &VecStatsCollector::compute_covariance, OptionBase::buildoption, 00066 "should we compute and keep X'.X ?"); 00067 00068 declareOption(ol, "stats", &VecStatsCollector::stats, OptionBase::learntoption, 00069 "the stats for each element"); 00070 declareOption(ol, "cov", &VecStatsCollector::cov, OptionBase::learntoption, 00071 "the uncentered covariance matrix (mean not subtracted): X'.X"); 00072 00073 declareOption(ol, "fieldnames", &VecStatsCollector::fieldnames, OptionBase::buildoption, 00074 "Names of the fields of the vector"); 00075 00076 // Now call the parent class' declareOptions 00077 inherited::declareOptions(ol); 00078 } 00079 00080 double VecStatsCollector::getStat(const string& statspec) 00081 { 00082 PIStringStream in(statspec); 00083 string statname; 00084 in.smartReadUntilNext("[", statname); 00085 string fieldname; 00086 in.smartReadUntilNext("]", fieldname); 00087 int fieldnum = getFieldNum(fieldname); 00088 if(fieldnum<0) 00089 PLERROR("In VecStatsCollector::getStat invalid fieldname: %s",fieldname.c_str()); 00090 00091 // It could be that nothing was accumulated into the stats collector, 00092 // which is different from accessing the "wrong" field. In the first 00093 // case, return MISSING_VALUE 00094 if (stats.length() == 0) 00095 return MISSING_VALUE; 00096 00097 return getStats(fieldnum).getStat(statname); 00098 } 00099 00100 int VecStatsCollector::getFieldNum(const string& fieldname_or_num) const 00101 { 00102 int num = fieldnames.find(fieldname_or_num); 00103 if(num<0) { 00104 // not found 00105 if (pl_isnumber(fieldname_or_num)) { 00106 num = toint(fieldname_or_num); 00107 } else { 00108 // Unknown field. 00109 num = -1; 00110 } 00111 } 00112 return num; 00113 } 00114 00115 00116 void VecStatsCollector::update(const Vec& x, real weight) 00117 { 00118 int n = x.size(); 00119 if(stats.size()==0) 00120 { 00121 stats.resize(n); 00122 for(int k=0; k<n; k++) 00123 { 00124 stats[k].maxnvalues = maxnvalues; 00125 stats[k].forget(); 00126 } 00127 if(compute_covariance) 00128 { 00129 cov.resize(n,n); 00130 cov.fill(0); 00131 } 00132 } 00133 00134 if(stats.size()!=n) 00135 PLERROR("In VecStatsCollector: problem, called update with vector of length %d, while size of stats (and most likeley previously seen vector) is %d", n, stats.size()); 00136 00137 // this speeds things up a bit 00138 //bool has_missing=false; 00139 00140 for(int k=0; k<n; k++) 00141 { 00142 stats[k].update(x[k], weight); 00143 /* if(is_missing(x[k])) 00144 x[k]=0;//has_missing=true;*/ 00145 } 00146 00147 if(compute_covariance) 00148 /* if(has_missing) 00149 { 00150 for(int i=0;i<n;i++) 00151 for(int j=0;j<n;j++) 00152 if(!is_missing(x[i]) && !is_missing(x[j])) 00153 cov(i,j)+=x[i]*x[j]; 00154 } 00155 else*/ 00156 externalProductScaleAcc(cov, x, x, weight); 00157 } 00158 00160 void VecStatsCollector::update(const Mat& m) 00161 { 00162 int l = m.length(); 00163 for(int i=0; i<l; i++) 00164 update(m(i)); 00165 } 00166 00168 void VecStatsCollector::update(const Mat& m, const Vec& weights) 00169 { 00170 if (m.length() != weights.size()) 00171 PLERROR("VecStatsCollector::update: matrix height (%d) " 00172 "is incompatible with weights length (%d)", m.length(), 00173 weights.size()); 00174 int l = m.length(); 00175 for(int i=0; i<l; i++) 00176 update(m(i), weights[i]); 00177 } 00178 00179 void VecStatsCollector::build_() 00180 {} 00181 00182 void VecStatsCollector::build() 00183 { 00184 inherited::build(); 00185 build_(); 00186 } 00187 00188 void VecStatsCollector::forget() 00189 { 00190 stats.resize(0); 00191 cov.resize(0,0); 00192 } 00193 00194 void VecStatsCollector::finalize() 00195 { 00196 int n = stats.size(); 00197 for(int i=0; i<n; i++) 00198 stats[i].finalize(); 00199 } 00200 00202 Vec VecStatsCollector::getMean() const 00203 { 00204 int n = stats.size(); 00205 Vec res(n); 00206 for(int k=0; k<n; k++) 00207 res[k] = stats[k].mean(); 00208 return res; 00209 } 00210 00212 Vec VecStatsCollector::getVariance() const 00213 { 00214 int n = stats.size(); 00215 Vec res(n); 00216 for(int k=0; k<n; k++) 00217 res[k] = stats[k].variance(); 00218 return res; 00219 } 00220 00222 Vec VecStatsCollector::getStdDev() const 00223 { 00224 int n = stats.size(); 00225 Vec res(n); 00226 for(int k=0; k<n; k++) 00227 res[k] = stats[k].stddev(); 00228 return res; 00229 } 00230 00232 Vec VecStatsCollector::getStdError() const 00233 { 00234 int n = stats.size(); 00235 Vec res(n); 00236 for(int k=0; k<n; k++) 00237 res[k] = stats[k].stderror(); 00238 return res; 00239 } 00240 00242 Mat VecStatsCollector::getCovariance() const 00243 { 00244 double invN = 1./stats[0].n(); 00245 Vec meanvec = getMean(); 00246 Mat covariance(cov.length(), cov.width()); 00247 for(int i=0; i<cov.length(); i++) 00248 for(int j=0; j<cov.width(); j++) 00249 covariance(i, j) = invN*cov(i, j) - meanvec[i]*meanvec[j]; 00250 return covariance; 00251 } 00252 00254 Mat VecStatsCollector::getCorrelation() const 00255 { 00256 Mat norm(cov.width(),cov.width()); 00257 externalProduct(norm,getStdDev(),getStdDev()); 00258 return getCovariance()/norm; 00259 } 00260 00261 void VecStatsCollector::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00262 { 00263 Object::makeDeepCopyFromShallowCopy(copies); 00264 deepCopyField(stats, copies); 00265 } 00266 00267 } // end of namespace PLearn

Generated on Tue Aug 17 16:10:27 2004 for PLearn by doxygen 1.3.7