Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

ToBagSplitter.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // ToBagSplitter.cc 00004 // 00005 // Copyright (C) 2004 Olivier Delalleau 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: ToBagSplitter.cc,v 1.3 2004/07/21 16:30:55 chrish42 Exp $ 00037 ******************************************************* */ 00038 00039 // Authors: Olivier Delalleau 00040 00044 #include "SelectRowsVMatrix.h" 00045 #include <plearn/var/SumOverBagsVariable.h> 00046 #include "ToBagSplitter.h" 00047 00048 namespace PLearn { 00049 using namespace std; 00050 00052 // ToBagSplitter // 00054 ToBagSplitter::ToBagSplitter() 00055 : Splitter(), 00056 expected_size_of_bag(10) 00057 {} 00058 00059 PLEARN_IMPLEMENT_OBJECT(ToBagSplitter, 00060 "A Splitter that makes any existing splitter operate on bags only.", 00061 "The dataset provided must contain bag information, as described in\n" 00062 "SumOverBagsVariable"); 00063 00065 // declareOptions // 00067 void ToBagSplitter::declareOptions(OptionList& ol) 00068 { 00069 declareOption(ol, "expected_size_of_bag", &ToBagSplitter::expected_size_of_bag, OptionBase::buildoption, 00070 "The expected size of each bag. It is not compulsory to change this option."); 00071 00072 declareOption(ol, "sub_splitter", &ToBagSplitter::sub_splitter, OptionBase::buildoption, 00073 "The underlying splitter we want to make operate on bags."); 00074 00075 // Now call the parent class' declareOptions 00076 inherited::declareOptions(ol); 00077 } 00078 00080 // build // 00082 void ToBagSplitter::build() 00083 { 00084 inherited::build(); 00085 build_(); 00086 } 00087 00089 // build_ // 00091 void ToBagSplitter::build_() 00092 { 00093 if (dataset) { 00094 // Prepare the bags index list. 00095 int max_ninstances = 1; 00096 // The first column in bags_store gives the number of instances in the bag, 00097 // and the following columns give the indices of the corresponding rows in 00098 // the original dataset. 00099 Mat bags_store(dataset->length() / expected_size_of_bag + 1, expected_size_of_bag + 1); 00100 int num_bag = 0; 00101 int num_instance = 0; 00102 int bag_signal_column = dataset->inputsize() + dataset->targetsize() - 1; // Bag signal in the last target column. 00103 for (int i = 0; i < dataset->length(); i++) { 00104 if (num_instance + 1 >= bags_store.width()) { 00105 if (num_instance > 10*(expected_size_of_bag+1)) 00106 PLERROR("ToBagSplitter: found bag size (%d) more than 10 times bigger than expected_size_of_bag (%d)!\n", 00107 num_instance,expected_size_of_bag); 00108 // Need to resize bags_store. 00109 bags_store.resize(bags_store.length(), bags_store.width() * 2); 00110 } 00111 if (num_instance >= max_ninstances) { 00112 max_ninstances = num_instance + 1; 00113 } 00114 bags_store(num_bag, num_instance + 1) = i; 00115 num_instance++; 00116 if (int(dataset->get(i, bag_signal_column)) & SumOverBagsVariable::TARGET_COLUMN_LAST) { 00117 // Last element of a bag. 00118 bags_store(num_bag, 0) = num_instance; // Store the number of instances in this bag. 00119 num_bag++; 00120 num_instance = 0; 00121 if (num_bag >= bags_store.length()) { 00122 // Need to resize bags_store. 00123 bags_store.resize(bags_store.length() * 2, bags_store.width()); 00124 } 00125 } 00126 } 00127 // Resize to the minimum size needed. 00128 bags_store.resize(num_bag, max_ninstances + 1); 00129 bags_index = VMat(bags_store); 00130 // Provide this index to the sub_splitter. 00131 sub_splitter->setDataSet(bags_index); 00132 } 00133 } 00134 00136 // getSplit // 00138 TVec<VMat> ToBagSplitter::getSplit(int k) 00139 { 00140 // ### Build and return the kth split 00141 TVec<VMat> sub_splits = sub_splitter->getSplit(k); 00142 TVec<VMat> result; 00143 for (int i = 0; i < sub_splits.length(); i++) { 00144 // Get the list of corresponding indices in the original dataset. 00145 Mat indices = sub_splits[i].toMat(); 00146 // Turn it into a TVec<int>. 00147 TVec<int> indices_int; 00148 for (int j = 0; j < indices.length(); j++) { 00149 for (int k = 0; k < indices(j, 0); k++) { 00150 int indice = int(indices(j, k + 1)); 00151 indices_int.append(indice); 00152 } 00153 } 00154 result.append(new SelectRowsVMatrix(dataset, indices_int)); 00155 } 00156 return result; 00157 } 00158 00160 // makeDeepCopyFromShallowCopy // 00162 void ToBagSplitter::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00163 { 00164 Splitter::makeDeepCopyFromShallowCopy(copies); 00165 00166 // ### Call deepCopyField on all "pointer-like" fields 00167 // ### that you wish to be deepCopied rather than 00168 // ### shallow-copied. 00169 // ### ex: 00170 // deepCopyField(trainvec, copies); 00171 00172 // ### Remove this line when you have fully implemented this method. 00173 PLERROR("ToBagSplitter::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!"); 00174 } 00175 00177 // nSetsPerSplit // 00179 int ToBagSplitter::nSetsPerSplit() const 00180 { 00181 // ### Return the number of sets per split 00182 return sub_splitter->nSetsPerSplit(); 00183 } 00184 00186 // nsplits // 00188 int ToBagSplitter::nsplits() const 00189 { 00190 return sub_splitter->nsplits(); 00191 } 00192 00194 // setDataSet // 00196 void ToBagSplitter::setDataSet(VMat the_dataset) { 00197 inherited::setDataSet(the_dataset); 00198 // Need to recompute the bags index. 00199 build(); 00200 } 00201 00202 } // end of namespace PLearn

Generated on Tue Aug 17 16:08:48 2004 for PLearn by doxygen 1.3.7