PLearn: SumOverBagsVariable.cc Source File

00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2002 Pascal Vincent, Yoshua Bengio, Rejean Ducharme and University of Montreal 00006 // Copyright (C) 2001-2002 Nicolas Chapados, Ichiro Takeuchi, Jean-Sebastien Senecal 00007 // Copyright (C) 2002 Xiangdong Wang, Christian Dorion 00008 00009 // Redistribution and use in source and binary forms, with or without 00010 // modification, are permitted provided that the following conditions are met: 00011 // 00012 // 1. Redistributions of source code must retain the above copyright 00013 // notice, this list of conditions and the following disclaimer. 00014 // 00015 // 2. Redistributions in binary form must reproduce the above copyright 00016 // notice, this list of conditions and the following disclaimer in the 00017 // documentation and/or other materials provided with the distribution. 00018 // 00019 // 3. The name of the authors may not be used to endorse or promote 00020 // products derived from this software without specific prior written 00021 // permission. 00022 // 00023 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00024 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00025 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00026 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00028 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00029 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00030 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00031 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00032 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 // 00034 // This file is part of the PLearn library. For more information on the PLearn 00035 // library, go to the PLearn Web site at www.plearn.org 00036 00037 00038 /* ******************************************************* 00039 * $Id: SumOverBagsVariable.cc,v 1.13 2004/04/27 16:02:26 morinf Exp $ 00040 * This file is part of the PLearn library. 00041 ******************************************************* */ 00042 00043 #include "SumOverBagsVariable.h" 00044 //#include "PLMPI.h" 00045 //#include "DisplayUtils.h" 00046 00047 namespace PLearn { 00048 using namespace std; 00049 00050 00051 00054 PLEARN_IMPLEMENT_OBJECT(SumOverBagsVariable, "Variable that sums the value of a Func each time evaluated on a subsequence of a VMat\n", 00055 "returns\n" 00056 " Sum_{bags in vmat} f(inputs and targets in bag)\n" 00057 "(it can average this sum over the number of bags if the 'average' option is set).\n" 00058 "By convention a bag is a sequence of rows of the vmat in which the last column of the target\n" 00059 "indicates whether the row is the first one (and/or) the last one, with its two least significant bits:\n" 00060 " last_column_of_target == 1 ==> first row\n" 00061 " last_column_of_target == 2 ==> last row\n" 00062 " last_column_of_target == 0 ==> intermediate row\n" 00063 " last_column_of_target == 1+2==3 ==> single-row bag (both first and last).\n" 00064 "The option n_samples controls how many terms in the sum are considered at a time:\n" 00065 " n_samples <= 0: sum over the whole vmat (e.g. for batch gradient computation)\n" 00066 " n_samples = 1: sum over a single bag at a time (e.g. for stochastic gradient)\n" 00067 " where each fprop or fbprop advances to the next bag\n" 00068 " otherwise: sum over n_samples bags at a time (e.g. for min-batch training)\n" 00069 "The last column of the target is not given in the call to f, but a bag_size input is provided instead.\n" 00070 "The inputs to f are: (matrix of bag inputs, the bag size, the bag target, [the bag weight])\n" 00071 "(the bag weight is included only if there are weights in the original VMat)." 00072 ); 00073 00075 // SumOverBagsVariable // 00077 SumOverBagsVariable::SumOverBagsVariable() 00078 : vmat(), f(), 00079 average(0), 00080 max_bag_size(-1), n_samples(1), 00081 transpose(0), 00082 curpos() 00083 {} 00084 00085 SumOverBagsVariable::SumOverBagsVariable(VMat the_vmat, Func the_f, int max_bagsize, int nsamples, bool the_average, bool the_transpose) 00086 : inherited(nonInputParentsOfPath(the_f->inputs,the_f->outputs), 00087 the_f->outputs[0]->length(), 00088 the_f->outputs[0]->width()), 00089 vmat(the_vmat), f(the_f), 00090 average(the_average), 00091 max_bag_size(max_bagsize), n_samples(nsamples), 00092 transpose(the_transpose), 00093 curpos(0), bag_size(0) 00094 { 00095 build(); 00096 } 00097 00099 // build // 00101 void SumOverBagsVariable::build() 00102 { 00103 inherited::build(); 00104 build_(); 00105 } 00106 00108 // build_ // 00110 void SumOverBagsVariable::build_() 00111 { 00112 if (vmat) 00113 { 00114 if (f->outputs.size()!=1) 00115 PLERROR("SumOverBagsVariable: expected a func with a single output variable (you may use concat to form a single output Var)"); 00116 if (vmat->weightsize()!=0 && vmat->weightsize()!=1) 00117 PLERROR("SumOverBagsVariable expected vmat->weightsize to be 0 or 1"); 00118 00119 if (transpose) { 00120 input_values.resize(vmat->inputsize(), max_bag_size); 00121 } else { 00122 input_values.resize(max_bag_size,vmat->inputsize()); 00123 } 00124 output_value.resize(f->outputs[0]->nelems()); 00125 output_av = Array<Vec>(output_value); 00126 gradient_av = Array<Vec>(gradient); 00127 f->inputs.setDontBpropHere(true); 00128 00129 bag_size_vec.resize(1); 00130 bag_target_and_bag_signal.resize(vmat->targetsize()); 00131 bag_target.resize(vmat->targetsize() - 1); 00132 bag_signal = bag_target_and_bag_signal.subVec(vmat->targetsize()-1,1); 00133 int ws = vmat->weightsize(); 00134 bag_weight.resize(ws); 00135 if (ws > 0) { 00136 f_inputs.resize(4); 00137 f_inputs[3] = bag_weight; 00138 } else { 00139 f_inputs.resize(3); 00140 } 00141 f_inputs[0] = input_values.toVec(); 00142 f_inputs[1] = bag_size_vec; 00143 f_inputs[2] = bag_target; 00144 unused_gradients.resize(f_inputs.size()); 00145 for (int i=0;i<f_inputs.size();i++) unused_gradients[i] = f_inputs[i].copy(); 00146 } 00147 } 00148 00150 // declareOptions // 00152 void SumOverBagsVariable::declareOptions(OptionList& ol) 00153 { 00154 declareOption(ol, "f", &SumOverBagsVariable::f, OptionBase::buildoption, 00155 " Func that is applied on each bag, whose input is the following array of Vars:\n" 00156 " (matrix of bag inputs, the bag size, the bag target, [the bag weight]).\n"); 00157 00158 declareOption(ol, "vmat", &SumOverBagsVariable::vmat, OptionBase::buildoption, 00159 " VMatrix that contains the data, with multiple consecutive rows forming one bag.\n" 00160 " The last column of the target indicates the beginning and end of each bag, as follows:\n" 00161 " last_column_of_target == 1 ==> first row\n" 00162 " last_column_of_target == 2 ==> last row\n" 00163 " last_column_of_target == 0 ==> intermediate row\n" 00164 " last_column_of_target == 1+2==3 ==> single-row bag (both first and last).\n"); 00165 00166 declareOption(ol, "average", &SumOverBagsVariable::average, OptionBase::buildoption, 00167 " If set to 1, then will compute the mean of the sum, and not the sum itself."); 00168 00169 declareOption(ol, "max_bag_size", &SumOverBagsVariable::max_bag_size, OptionBase::buildoption, 00170 " maximum number of examples in a bag (more than that in vmat will trigger a run-time error).\n"); 00171 00172 declareOption(ol, "n_samples", &SumOverBagsVariable::n_samples, OptionBase::buildoption, 00173 " number of bags to iterate over (1 for online gradient, <=0 for batch)."); 00174 00175 declareOption(ol, "transpose", &SumOverBagsVariable::transpose, OptionBase::buildoption, 00176 " If set to 1, then the bag inputs will be put in columns instead of rows.\n" 00177 " This can be useful if the Func f takes column vars as inputs."); 00178 00179 inherited::declareOptions(ol); 00180 } 00181 00183 // recomputeSize // 00185 void SumOverBagsVariable::recomputeSize(int& l, int& w) const 00186 { 00187 if (f && f->outputs.size()) { 00188 l = f->outputs[0]->length(); 00189 w = f->outputs[0]->width(); 00190 } else 00191 l = w = 0; 00192 } 00193 00194 00196 // makeDeepCopyFromShallowCopy // 00198 void SumOverBagsVariable::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00199 { 00200 NaryVariable::makeDeepCopyFromShallowCopy(copies); 00201 deepCopyField(vmat, copies); 00202 deepCopyField(f, copies); 00203 deepCopyField(output_value, copies); 00204 deepCopyField(input_values, copies); 00205 deepCopyField(bag_size_vec, copies); 00206 deepCopyField(bag_target_and_bag_signal, copies); 00207 deepCopyField(bag_target, copies); 00208 deepCopyField(bag_signal, copies); 00209 deepCopyField(bag_weight, copies); 00210 deepCopyField(f_inputs, copies); 00211 deepCopyField(unused_gradients, copies); 00212 deepCopyField(output_av, copies); 00213 deepCopyField(gradient_av, copies); 00214 } 00215 00216 00218 // fpropOneBag // 00220 void SumOverBagsVariable::fpropOneBag(bool do_bprop) 00221 { 00222 static real dummy_weight=0; 00223 bool reached_end_of_bag=false; 00224 if (transpose) { 00225 input_values.resize(input_values.length(), max_bag_size); 00226 } else { 00227 input_values.resize(max_bag_size,input_values.width()); 00228 } 00229 for (bag_size=0;!reached_end_of_bag;bag_size++) 00230 { 00231 if (bag_size>=max_bag_size) 00232 PLERROR("SumOverBagsVariable: bag size=%d > expected max. bag size(%d)", 00233 bag_size,max_bag_size); 00234 Vec input_value; 00235 if (transpose) { 00236 input_value.resize(input_values.length()); 00237 } else { 00238 input_value = input_values(bag_size); 00239 } 00240 if (vmat->weightsize()>0) 00241 { 00242 real& weight = bag_weight[0]; 00243 vmat->getExample(curpos,input_value,bag_target_and_bag_signal,weight); 00244 } 00245 else 00246 vmat->getExample(curpos,input_value,bag_target_and_bag_signal,dummy_weight); 00247 if (bag_size == 0) { 00248 // It's the first element: we copy the good target. 00249 bag_target << bag_target_and_bag_signal.subVec(0, bag_target_and_bag_signal.length() - 1); 00250 } 00251 if (transpose) { 00252 // Need to put input_value into input_values, because it uses a separate 00253 // storage. 00254 input_values.column(bag_size) << input_value; 00255 } 00256 if (bag_size==0 && !(int(bag_signal[0]) & 1)) 00257 PLERROR("SumOverBagsVariable: data synchronization error, first row of bag has wrong bag signal"); 00258 reached_end_of_bag = (int(bag_signal[0]) & 2); 00259 if(++curpos == vmat->length()) 00260 { 00261 curpos = 0; 00262 if (!reached_end_of_bag) 00263 { 00264 PLERROR("SumOverBagsVariable: last bag of VMatrix is not complete"); 00265 return; 00266 } 00267 } 00268 } 00269 bag_size_vec[0]=bag_size; 00270 if (do_bprop) 00271 f->fbprop(f_inputs,output_av,unused_gradients,gradient_av); 00272 else 00273 f->fprop(f_inputs,output_av); 00274 value += output_value; 00275 } 00276 00278 // fprop // 00280 void SumOverBagsVariable::fprop() 00281 { 00282 value.clear(); 00283 f->recomputeParents(); 00284 if (n_samples==1) 00285 fpropOneBag(); 00286 else if (n_samples<=0) // one pass through the whole data set 00287 { 00288 curpos=0; 00289 int count_bags = 0; 00290 do { 00291 fpropOneBag(); 00292 count_bags++; 00293 } 00294 while (curpos>0); 00295 if (average) { 00296 value /= count_bags; 00297 } 00298 } 00299 else { 00300 for (int i=0;i<n_samples;i++) 00301 fpropOneBag(); 00302 if (average) { 00303 value /= n_samples; 00304 } 00305 } 00306 } 00307 00308 00310 // fbprop // 00312 void SumOverBagsVariable::fbprop() 00313 { 00314 value.clear(); 00315 f->recomputeParents(); 00316 if (n_samples==1) 00317 fpropOneBag(true); 00318 else if (n_samples<=0) // one pass through the whole data set 00319 { 00320 if (average) { 00321 // We don't know in advance how many bags there are, so the gradient 00322 // can't be propagated correctly. 00323 PLERROR("In SumOverBagsVariable::fbprop - If you want to get the average, you must tell me the number of bags in n_samples > 0, because I'm too dumb to guess it."); 00324 } 00325 curpos = 0; 00326 do { 00327 fpropOneBag(true); 00328 } 00329 while (curpos>0); 00330 } 00331 else { 00332 if (average) { 00333 gradient /= n_samples; 00334 } 00335 for (int i=0;i<n_samples;i++) 00336 fpropOneBag(true); 00337 if (average) { 00338 value /= n_samples; 00339 } 00340 } 00341 } 00342 00344 // bprop // 00346 void SumOverBagsVariable::bprop() 00347 { 00348 fbprop(); 00349 } 00350 00352 // printInfo // 00354 void SumOverBagsVariable::printInfo(bool print_gradient) 00355 { 00356 f->fproppath.printInfo(print_gradient); 00357 cout << info() << " : " << getName() << "(max_bag_size=" << max_bag_size << ", "; 00358 cout << ", n_samples=" << n_samples << ") = " << value; 00359 if (print_gradient) cout << " gradient=" << gradient; 00360 cout << endl; 00361 } 00362 00363 00364 } // end of namespace PLearn 00365 00366