Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

StatsCollector.h

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 2001,2002 Pascal Vincent 00005 // 00006 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: StatsCollector.h,v 1.29 2004/07/21 16:30:53 chrish42 Exp $ 00037 * This file is part of the PLearn library. 00038 ******************************************************* */ 00039 00040 #ifndef StatsCollector_INC 00041 #define StatsCollector_INC 00042 00043 #include <plearn/base/general.h> 00044 #include <plearn/base/Object.h> 00045 #include "TMat.h" 00046 #include <plearn/base/RealMapping.h> 00047 00048 namespace PLearn { 00049 using namespace std; 00050 00051 class StatsCollectorCounts 00052 { 00053 public: 00054 double n; 00055 double nbelow; 00056 double sum; 00057 double sumsquare; 00058 int id; 00059 00060 StatsCollectorCounts(): 00061 n(0), nbelow(0), 00062 sum(0.), sumsquare(0.),id(0) {} 00063 }; 00064 00065 typedef pair<real,StatsCollectorCounts*> PairRealSCCType; 00066 00068 00069 inline PStream& operator>>(PStream& in, StatsCollectorCounts& c) 00070 { in >> c.n >> c.nbelow >> c.sum >> c.sumsquare >> c.id; return in; } 00071 00072 inline PStream& operator<<(PStream& out, const StatsCollectorCounts& c) 00073 { out << c.n << c.nbelow << c.sum << c.sumsquare << c.id; return out; } 00074 00081 class StatsCollector: public Object 00082 { 00083 public: 00084 typedef Object inherited; 00085 PLEARN_DECLARE_OBJECT(StatsCollector); 00086 00087 public: 00088 00089 typedef Object inherited; 00090 00091 // ** Build options ** 00092 00095 int maxnvalues; 00096 00097 00098 // ** Learnt options ** 00099 00100 double nmissing_; 00101 double nnonmissing_; 00102 double sum_; 00103 double sumsquare_; 00104 double sumweights_; 00105 real min_; 00106 real max_; 00107 real first_; 00108 real last_; 00109 00113 map<real,StatsCollectorCounts> counts; 00114 00115 private: 00117 // (Please implement in .cc) 00118 void build_(); 00119 00120 protected: 00122 static void declareOptions(OptionList& ol); 00123 00124 public: 00125 00126 00127 StatsCollector(int the_maxnvalues=0); 00128 00129 real n() const { return nmissing_ + nnonmissing_; } 00130 real nmissing() const { return nmissing_; } 00131 real nnonmissing() const { return nnonmissing_; } 00132 real sum() const { return real(sum_+nnonmissing_*first_); } 00133 //real sumsquare() const { return real(sumsquare_); } 00134 real sumsquare() const { return real(sumsquare_+2*first_*sum()-first_*first_*nnonmissing_); } 00135 real min() const { return min_; } 00136 real max() const { return max_; } 00137 real mean() const { return real(sum()/nnonmissing_); } 00138 //real variance() const { return real((sumsquare_ - square(sum_)/nnonmissing_)/(nnonmissing_-1)); } 00139 real variance() const { return real((sumsquare_ - square(sum_)/nnonmissing_)/(nnonmissing_-1)); } 00140 real stddev() const { return sqrt(variance()); } 00141 real stderror() const { return sqrt(variance()/nnonmissing()); } 00142 real first_obs() const { return first_; } 00143 real last_obs() const { return last_; } 00144 real sharperatio() const { return mean()/stddev(); } 00145 00155 real getStat(const string& statname) const; 00156 00158 virtual void build(); 00159 00161 void forget(); 00162 00164 void update(real val, real weight = 1.0); 00165 00167 void finalize() {} 00168 00169 map<real,StatsCollectorCounts> * getCounts(){return &counts;} 00170 int getMaxNValues(){return maxnvalues;} 00171 00174 Mat cdf(bool normalized=true) const; 00175 00178 void sortIds(); 00179 00187 RealMapping getBinMapping(double discrete_mincount, 00188 double continuous_mincount, 00189 real tolerance=.1, 00190 TVec<double>* fcount=0) const; 00191 00192 RealMapping getAllValuesMapping(TVec<double>* fcount=0) const; 00202 RealMapping getAllValuesMapping(TVec<bool>* to_be_included, TVec<double>* fcount=0, bool ignore_other = false, real tolerance = 0) const; 00203 00204 virtual void oldwrite(ostream& out) const; 00205 virtual void oldread(istream& in); 00206 virtual void print(ostream& out) const; 00207 00208 }; 00209 00210 DECLARE_OBJECT_PTR(StatsCollector); 00211 00215 template <> 00216 inline void deepCopyField(StatsCollector& field, CopiesMap& copies) 00217 { 00218 field.makeDeepCopyFromShallowCopy(copies); 00219 } 00220 00221 TVec<RealMapping> computeRanges(TVec<StatsCollector> stats, int discrete_mincount, int continuous_mincount); 00222 00223 } // end of namespace PLearn 00224 00225 #endif

Generated on Tue Aug 17 16:06:56 2004 for PLearn by doxygen 1.3.7