PLearn: EmpiricalDistribution.cc Source File

00001 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 2002 Pascal Vincent 00005 // 00006 // Redistribution and use in source and binary forms, with or without 00007 // modification, are permitted provided that the following conditions are met: 00008 // 00009 // 1. Redistributions of source code must retain the above copyright 00010 // notice, this list of conditions and the following disclaimer. 00011 // 00012 // 2. Redistributions in binary form must reproduce the above copyright 00013 // notice, this list of conditions and the following disclaimer in the 00014 // documentation and/or other materials provided with the distribution. 00015 // 00016 // 3. The name of the authors may not be used to endorse or promote 00017 // products derived from this software without specific prior written 00018 // permission. 00019 // 00020 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00021 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00022 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00023 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00025 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00026 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00027 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00028 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00029 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 // 00031 // This file is part of the PLearn library. For more information on the PLearn 00032 // library, go to the PLearn Web site at www.plearn.org 00033 00034 00035 00036 #include "EmpiricalDistribution.h" 00037 #include <plearn/math/random.h> 00038 #include <plearn/vmat/VMat_maths.h> 00039 00040 namespace PLearn { 00041 using namespace std; 00042 00043 00044 00045 PLEARN_IMPLEMENT_OBJECT(EmpiricalDistribution, "ONE LINE DESCR", "NO HELP"); 00046 00047 void EmpiricalDistribution::makeDeepCopyFromShallowCopy(CopiesMap& copies) 00048 { 00049 inherited::makeDeepCopyFromShallowCopy(copies); 00050 deepCopyField(data, copies); 00051 } 00052 00053 00054 EmpiricalDistribution::EmpiricalDistribution() 00055 :inherited() 00056 { 00057 seed(); 00058 } 00059 00060 00061 EmpiricalDistribution::EmpiricalDistribution(int inputsize, bool random_sample_) 00062 :inherited(), random_sample(random_sample_) 00063 { 00064 inputsize_ = inputsize; 00065 current_sample_x = 0; 00066 current_sample_y = 0; 00067 flip = false; 00068 seed(); 00069 } 00070 00071 00072 00073 void EmpiricalDistribution::declareOptions(OptionList& ol) 00074 { 00075 inherited::declareOptions(ol); 00076 } 00077 00078 00079 void EmpiricalDistribution::train(VMat training_set) 00080 { 00081 if(inputsize_ == 0) 00082 PLERROR("inputsize_ must be specified before training"); 00083 data = training_set.subMatColumns(0, inputsize_); 00084 targetsize_ = data.width()-inputsize_; 00085 length = training_set.length(); 00086 } 00087 00088 double EmpiricalDistribution::log_density(const Vec& x) const 00089 { 00090 PLERROR("Density not implemented for EmpiricalDistribution"); 00091 return 0; 00092 } 00093 00094 00095 double EmpiricalDistribution::survival_fn(const Vec& x) const 00096 { 00097 double nbHigher = 0; 00098 bool addOne; 00099 for(int i = 0; i<data.length(); i++){ 00100 addOne = true; 00101 for(int j = 0;j<data.width(); j++){ 00102 if(data(i,j) <= x[j]) 00103 addOne = false; 00104 } 00105 if(addOne) 00106 nbHigher++; 00107 } 00108 return nbHigher / ((double) data.length()); 00109 } 00110 00111 double EmpiricalDistribution::cdf(const Vec& x) const 00112 { 00113 double nbLower = 0; 00114 bool addOne; 00115 for(int i = 0; i<data.length(); i++){ 00116 addOne = true; 00117 for(int j = 0;j<data.width(); j++){ 00118 if(data(i,j) >= x[j]) 00119 addOne = false; 00120 } 00121 if(addOne) 00122 nbLower++; 00123 } 00124 return nbLower / ((double) data.length()); 00125 } 00126 00127 00128 Vec EmpiricalDistribution::expectation() const 00129 { 00130 Vec mean(inputsize_); 00131 computeMean(data, mean); 00132 return mean; 00133 } 00134 00135 Mat EmpiricalDistribution::variance() const 00136 { 00137 00138 Vec mean(inputsize_); 00139 Mat covar(inputsize_,inputsize_); 00140 computeMeanAndCovar(data, mean, covar); 00141 return covar; 00142 } 00143 00144 00145 void EmpiricalDistribution::generate(Vec& x) const 00146 { 00147 if(random_sample){ 00148 x.resize(data.width()); 00149 x << data(uniform_multinomial_sample(length)); 00150 } 00151 //Hack for generating all the possible combinations of two 00152 //examples. 00153 else{ 00154 if(!flip){ 00155 x.resize(data.width()); 00156 x << data(current_sample_x); 00157 flip = true; 00158 } 00159 else{ 00160 x.resize(data.width()); 00161 x << data(current_sample_y); 00162 current_sample_y++; 00163 flip = false; 00164 } 00165 if(current_sample_y == length){ 00166 current_sample_y = 0; 00167 current_sample_x++; 00168 } 00169 if(current_sample_x == length) 00170 current_sample_x = 0; 00171 } 00172 } 00173 00174 } // end of namespace PLearn