PLearn: ScaledConditionalCDFSmoother.cc Source File

00001 00002 00003 // -*- C++ -*- 00004 00005 // ScaledConditionalCDFSmoother.cc 00006 // 00007 // Copyright (C) *YEAR* *AUTHOR(S)* 00008 // ... 00009 // Copyright (C) *YEAR* *AUTHOR(S)* 00010 // 00011 // Redistribution and use in source and binary forms, with or without 00012 // modification, are permitted provided that the following conditions are met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. The name of the authors may not be used to endorse or promote 00022 // products derived from this software without specific prior written 00023 // permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00026 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00027 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00028 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00029 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00030 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00031 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00032 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00033 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00034 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00035 // 00036 // This file is part of the PLearn library. For more information on the PLearn 00037 // library, go to the PLearn Web site at www.plearn.org 00038 00039 /* ******************************************************* 00040 * $Id: ScaledConditionalCDFSmoother.cc,v 1.8 2004/02/20 21:11:46 chrish42 Exp $ 00041 ******************************************************* */ 00042 00045 #include "ScaledConditionalCDFSmoother.h" 00046 //#include "HistogramDistribution.h" //to get static fns. to calc survival <--> density // already inc. from ConditionalCDFSmoother 00047 00048 namespace PLearn { 00049 using namespace std; 00050 00051 ScaledConditionalCDFSmoother::ScaledConditionalCDFSmoother() 00052 :ConditionalCDFSmoother(), preserve_relative_density(true) 00053 { 00054 } 00055 00056 PLEARN_IMPLEMENT_OBJECT(ScaledConditionalCDFSmoother, 00057 "This smoothes a low-resolution histogram using as prior a high-resolution one.", 00058 "This class takes as 'prior_cdf' a detailed histogram (usually derived from\n" 00059 "an unconditional distribution) and uses it to smooth a given survival\n" 00060 "function and provide extra detail (high resolution).\n" 00061 "Two smoothing formula are provided, both of which guarantee that the smoothed\n" 00062 "survival function takes the same value as the raw one at or near original bin\n" 00063 "positions. In between the original bin positions, the smoothed survival\n" 00064 "is obtained by applying one of two possible formula, according to the\n" 00065 "preserve_relative_density option.\n"); 00066 00067 void ScaledConditionalCDFSmoother::declareOptions(OptionList& ol) 00068 { 00069 declareOption(ol, "preserve_relative_density", &ScaledConditionalCDFSmoother::preserve_relative_density, 00070 OptionBase::buildoption, 00071 "If true then the following formula is used inside each of the large intervals (t_0,t_1):\n" 00072 " S(y_t) = S(y_{t_0})+(PS(y_t)-PS(y_{t_0}))(RS(y_{t_0})-RS(y_{t_1}))/(PS(y_{t_1})-PS(y_{t_0})\n" 00073 "where S(y_t) is the smoothed survival function at position y_t, PS(y_t) is the prior\n" 00074 "survival function at y_t, and RS(y_t) is the rough survival function (which is to be\n" 00075 "smoothed) at y_t. Note that RS is only known at the extremes of the interval, y_{t_0}\n" 00076 "and y_{t_1}. Note that this formula has the property that within the interval, the\n" 00077 "density is the prior density, scaled by the ratio of the total density in the interval\n" 00078 "for the target rough curve with respect to the prior curve\n" 00079 "If false, then the following formula is used instead, using the same notation:\n" 00080 " S(y_t) = PS(y_t)(RS(y_{t_0})/PS(y_{t_0}) + (y_t - y_{t_0})(RS(y_{t_1})-RS(y_{t_0}))/(PS(y_{t_1}) (t_1 - t_0)))\n" 00081 "What is the justification for this second formula?\n" 00082 ); 00083 00084 00085 // Now call the parent class' declareOptions 00086 inherited::declareOptions(ol); 00087 } 00088 00089 void ScaledConditionalCDFSmoother::build_() 00090 { 00091 } 00092 00093 // ### Nothing to add here, simply calls build_ 00094 void ScaledConditionalCDFSmoother::build() 00095 { 00096 inherited::build(); 00097 build_(); 00098 } 00099 00100 00101 void ScaledConditionalCDFSmoother::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00102 { 00103 Object::makeDeepCopyFromShallowCopy(copies); 00104 } 00105 00106 00107 // To obtain each bin of the smoothed_function, scale multiplicatively each bin 00108 // density of prior_cdf by a fixed factor per source_function bin, that 00109 // makes it match the probability under the corresponding source_function 00110 // bin. 00111 real ScaledConditionalCDFSmoother::smooth(const Vec& source_function, Vec& smoothed_function, 00112 Vec bin_positions, Vec dest_bin_positions) const 00113 { 00114 // put in 'survival_fn' the multiplicatively adjusted unconditional_survival_fn 00115 // such that the estimatedS values at yvalues match. In each segment 00116 // between prev_y and next_y. The adjustment ratio varies linearly from 00117 // estimatedS[prev_y]/unconditionalS[prev_y] to estimatedS[next_y]/unconditionalS[next_y]): 00118 // prev_ratio = estimatedS[prev_y]/unconditionalS[prev_y] 00119 // next_ratio = estimatedS[next_y]/unconditionalS[next_y] 00120 // adjustment = prev_ratio + (y-prev_y)*next_ratio/(next_y-prev_y) 00121 // s(y) = unconditional_s(y)*adjustment 00122 00123 if (!prior_cdf) 00124 PLERROR("in ScaledConditionalCDFSmoother::smooth you need to supply a prior_cdf"); 00125 //assume source_function is a survival fn. 00126 if(bin_positions.size() != source_function.size()+1) 00127 PLERROR("in ScaledConditionalCDFSmoother::smooth you need to supply bin_positions"); 00128 if(dest_bin_positions.size() == 0) 00129 PLERROR("in ScaledConditionalCDFSmoother::smooth you need to supply dest_bin_positions"); 00130 smoothed_function.resize(dest_bin_positions.size()-1); 00131 00132 00133 int j= 0; 00134 for(int i= 0; i < source_function.size(); ++i) 00135 { 00136 Vec v0(1), v1(1);//prev_y, next_y 00137 v0[0]= bin_positions[i]; 00138 v1[0]= bin_positions[i+1]; 00139 00140 real prev_ratio= source_function[i]/prior_cdf->survival_fn(v0); 00141 real next_ratio; 00142 if(i == source_function.size()-1) 00143 next_ratio= 0.0; 00144 else 00145 next_ratio= source_function[i+1]/prior_cdf->survival_fn(v1); 00146 00147 cout << source_function[i] << '\t' << prev_ratio << '\t' << next_ratio << '\t' << v0[0] << '\t' << v1[0] << endl; 00148 real slope = !preserve_relative_density? 0 : 00149 ((source_function[i+1]-source_function[i])/(prior_cdf->survival_fn(v1)-prior_cdf->survival_fn(v0))); 00150 real absisse = !preserve_relative_density? 0 : 00151 (source_function[i] - slope * prior_cdf->survival_fn(v0)); 00152 while(j < smoothed_function.size() && dest_bin_positions[j+1] <= bin_positions[i+1]) 00153 { 00154 Vec v(1); 00155 v[0]= dest_bin_positions[j]; 00156 // the line below seems wrong, so I have fixed it -- YB 00157 // the reason it seems wrong is that smoothed_function[j_final] should be equal 00158 // to source_function[i+1], but it is not, currently. 00159 // smoothed_function[j]= prior_cdf->survival_fn(v) * (prev_ratio + (v[0]-v0[0])*next_ratio/(v1[0]-v0[0])); 00160 if (!preserve_relative_density) 00161 smoothed_function[j]= prior_cdf->survival_fn(v) * 00162 (prev_ratio + (v[0]-v0[0])*(next_ratio-prev_ratio)/(v1[0]-v0[0])); 00163 else // scale with bin number, i.e. warped with density 00164 smoothed_function[j]= absisse + slope * prior_cdf->survival_fn(v); 00165 cout << '\t' << v[0] << '\t' << prior_cdf->survival_fn(v) << '\t' << smoothed_function[j] << endl; 00166 ++j; 00167 } 00168 } 00169 00170 00171 00172 00173 00174 00175 00176 00177 00178 00179 00180 00181 00182 00183 /* 00184 //assume source_function is a survival fn. 00185 if(bin_positions.size() != source_function.size()+1) 00186 PLERROR("in ScaledConditionalCDFSmoother::smooth you need to supply bin_positions"); 00187 if(dest_bin_positions.size() == 0) 00188 PLERROR("in ScaledConditionalCDFSmoother::smooth you need to supply dest_bin_positions"); 00189 smoothed_function.resize(dest_bin_positions.size()-1); 00190 Vec f0(dest_bin_positions.size()-1); //new density 00191 00192 00193 int j= 0; 00194 real factor= 1.0; 00195 for(int i= 0; i < source_function.size(); ++i) 00196 { 00197 Vec v0(1), v1(1); 00198 v0[0]= bin_positions[i]; 00199 v1[0]= bin_positions[i+1]; 00200 real prior_prob= prior_cdf->survival_fn(v0) - prior_cdf->survival_fn(v1); 00201 real prob; 00202 if(i < source_function.size()-1) 00203 prob= (source_function[i]-source_function[i+1]); 00204 else 00205 prob= source_function[i]; 00206 00207 if(0 < prior_prob && prob != 0.0) 00208 factor= prob / prior_prob; 00209 // else: use prev. factor 00210 00211 //dummy-temp 00212 cout << v0[0] << '-' << v1[0] << ":\t" << prob << '/' << prior_prob << '=' << factor << endl; 00213 00214 00215 while(j < smoothed_function.size() && dest_bin_positions[j+1] <= bin_positions[i+1]) 00216 { 00217 Vec v(1); 00218 v[0]= (dest_bin_positions[j]+dest_bin_positions[j+1])/2; 00219 // smoothed_function[j]= factor * prior_cdf->survival_fn(v); 00220 f0[j]= factor * prior_cdf->density(v); 00221 //dummy-temp 00222 cout << '\t' << smoothed_function[j] << "= " << factor << " * " << prior_cdf->survival_fn(v) << endl; 00223 00224 ++j; 00225 } 00226 } 00227 00228 00229 HistogramDistribution::calc_survival_from_density(f0, smoothed_function, dest_bin_positions); 00230 00231 */ 00232 00233 /* 00234 int j= 0; 00235 real factor= 1.0; 00236 for(int i= 0; i < source_function.size(); ++i) 00237 { 00238 Vec v0(1), v1(1); 00239 v0[0]= bin_positions[i]; 00240 v1[0]= bin_positions[i+1]; 00241 real prior_prob= prior_cdf->survival_fn(v0) - prior_cdf->survival_fn(v1); 00242 real prob; 00243 if(i < source_function.size()-1) 00244 prob= (source_function[i]-source_function[i+1]); 00245 else 00246 prob= source_function[i]; 00247 00248 if(0 < prior_prob && prob != 0.0) 00249 factor= prob / prior_prob; 00250 // else: use prev. factor 00251 00252 //dummy-temp 00253 cout << v0[0] << '-' << v1[0] << ":\t" << prob << '/' << prior_prob << '=' << factor << endl; 00254 00255 00256 while(j < smoothed_function.size() && dest_bin_positions[j+1] <= bin_positions[i+1]) 00257 { 00258 Vec v(1); 00259 v[0]= (dest_bin_positions[j]+dest_bin_positions[j+1])/2; 00260 smoothed_function[j]= factor * prior_cdf->survival_fn(v); 00261 //dummy-temp 00262 cout << '\t' << smoothed_function[j] << "= " << factor << " * " << prior_cdf->survival_fn(v) << endl; 00263 00264 ++j; 00265 } 00266 } 00267 */ 00268 00269 return 0.0; //dummy - FIXME - xsm 00270 } 00271 00272 } // end of namespace PLearn