Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

SDBWithStats.h

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2002 Pascal Vincent, Yoshua Bengio and University of Montreal 00006 // 00007 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 00038 00039 /* ******************************************************* 00040 * $Id: SDBWithStats.h,v 1.4 2004/03/03 14:11:01 tihocan Exp $ 00041 * AUTHORS: Pascal Vincent 00042 * This file is part of the PLearn library. 00043 ******************************************************* */ 00044 00045 00048 #ifndef SDBWithStats_INC 00049 #define SDBWithStats_INC 00050 00051 //#include "general.h" 00052 #include "SimpleDB.h" 00053 #include <map> 00054 00055 namespace PLearn { 00056 using namespace std; 00057 00058 class FieldStat 00059 { 00060 friend class SDBWithStats; 00061 00062 protected: 00063 00065 int nonmissing_; 00066 int missing_; 00067 00069 double sum_; 00070 double sumsquare_; 00071 00072 double min_; 00073 double max_; 00074 double mean_; 00075 double stddev_; 00076 00077 public: 00078 00080 map<string,int> symbolcount; 00081 mutable map<string,int> symbolid; 00082 int nsymbols() { return (int)symbolcount.size(); } 00083 static int max_nsymbols; 00084 00085 FieldStat() 00086 :nonmissing_(0), missing_(0), 00087 sum_(0.), sumsquare_(0), min_(FLT_MAX), max_(-FLT_MAX) 00088 {} 00089 00090 int ntotal() const { return missing_+nonmissing_; } 00091 int missing() const { return missing_; } 00092 int nonmissing() const { return nonmissing_; } 00093 real mean() const { return real(mean_); } 00094 real stddev() const { return real(stddev_); } 00095 real min() const { return real(min_); } 00096 real max() const { return real(max_); } 00097 00098 void updateString(const string& sym); 00099 void updateNumber(double d); 00100 void updateMissing() { ++missing_; } 00101 00102 void clear(); 00103 void finalize(); 00104 }; 00105 00106 class SDBWithStats: public SDB 00107 { 00108 public: 00109 vector<FieldStat> fieldstat; 00110 int nfields() { return (int)getSchema().size(); } 00111 string fieldname(int i) { return getSchema()[i].name; } 00112 00113 public: 00114 SDBWithStats(string basename, string path=".", AccessType access = readwrite, 00115 bool verbose=true); 00116 00117 void forgetStats(); 00118 void computeStats(unsigned int nrows); 00119 void computeStats() { computeStats(size()); } 00120 00121 bool hasStats(); 00122 void saveStats(); 00123 void loadStats(); 00124 00125 FieldStat& getStat(int i); 00126 const FieldStat& getStat(int i) const; 00127 FieldStat& getStat(const string& fieldname); 00128 const FieldStat& getStat(const string& fieldname) const; 00129 }; 00130 00131 } // end of namespace PLearn 00132 00133 #endif

Generated on Tue Aug 17 16:04:41 2004 for PLearn by doxygen 1.3.7