Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

TrainTestBagsSplitter.cc

Go to the documentation of this file.
00001 00002 00003 // -*- C++ -*- 00004 00005 // TrainTestBagsSplitter.cc 00006 // 00007 // Copyright (C) 1998 Pascal Vincent 00008 // Copyright (C) 1999,2000 Pascal Vincent, Yoshua Bengio and University of Montreal 00009 // Copyright (C) 2002 Frederic Morin 00010 // 00011 // Redistribution and use in source and binary forms, with or without 00012 // modification, are permitted provided that the following conditions are met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. The name of the authors may not be used to endorse or promote 00022 // products derived from this software without specific prior written 00023 // permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00026 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00027 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00028 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00029 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00030 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00031 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00032 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00033 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00034 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00035 // 00036 // This file is part of the PLearn library. For more information on the PLearn 00037 // library, go to the PLearn Web site at www.plearn.org 00038 00039 /* ******************************************************* 00040 * $Id: TrainTestBagsSplitter.cc,v 1.3 2004/03/12 23:35:47 tihocan Exp $ 00041 ******************************************************* */ 00042 00045 #include "TrainTestBagsSplitter.h" 00046 #include "SumOverBagsVariable.h" 00047 00048 namespace PLearn { 00049 using namespace std; 00050 00051 TrainTestBagsSplitter::TrainTestBagsSplitter(real the_test_fraction) 00052 : append_train(0), test_fraction(the_test_fraction) 00053 {}; 00054 00055 PLEARN_IMPLEMENT_OBJECT(TrainTestBagsSplitter,"Splits a dataset in two parts", 00056 "TrainTestBagsSplitter implements a single split of the dataset into\n" 00057 "a training set and a test set (the test part being the last few samples of the dataset)\n" 00058 "Optionally a third set is provided which is the training set itself (in order to test on it)\n"); 00059 00060 void TrainTestBagsSplitter::declareOptions(OptionList& ol) 00061 { 00062 declareOption(ol, "append_train", &TrainTestBagsSplitter::append_train, OptionBase::buildoption, 00063 "if set to 1, the trainset will be appended after the test set (thus each split" 00064 " will contain three sets)"); 00065 00066 declareOption(ol, "test_fraction", &TrainTestBagsSplitter::test_fraction, OptionBase::buildoption, 00067 "the fraction of the dataset reserved to the test set"); 00068 00069 inherited::declareOptions(ol); 00070 } 00071 00072 void TrainTestBagsSplitter::build_() 00073 { 00074 } 00075 00076 // ### Nothing to add here, simply calls build_ 00077 void TrainTestBagsSplitter::build() 00078 { 00079 inherited::build(); 00080 build_(); 00081 } 00082 00083 int TrainTestBagsSplitter::nsplits() const 00084 { 00085 return 1; // only one split 00086 } 00087 00088 int TrainTestBagsSplitter::nSetsPerSplit() const 00089 { 00090 if (append_train) 00091 return 3; 00092 else 00093 return 2; 00094 } 00095 00096 TVec<VMat> TrainTestBagsSplitter::getSplit(int k) 00097 { 00098 // soooo.. what is k for? 00099 if (k) 00100 PLERROR("TrainTestBagsSplitter::getSplit() - k cannot be greater than 0"); 00101 00102 TVec<VMat> split_(2); 00103 00104 int l = dataset->length(); 00105 int test_length = int(test_fraction*l); 00106 00107 Vec v; 00108 dataset->getRow(test_length, v); 00109 // Goes until it finds the end of the bag 00110 // TODO If == 3, it's different but it's the end of a bag, no? 00111 PLWARNING("In TrainTestBagsSplitter::getSplit - I think it won't work (see TODO in code)"); 00112 while ( v[dataset->width()-1] != SumOverBagsVariable::TARGET_COLUMN_LAST) 00113 { 00114 ++test_length; 00115 dataset->getRow(test_length, v); 00116 } 00117 00118 int train_length = l - test_length; 00119 00120 split_[0] = dataset.subMatRows(0, train_length); 00121 split_[1] = dataset.subMatRows(train_length, test_length); 00122 if (append_train) { 00123 split_.resize(3); 00124 split_[2] = split_[0]; 00125 } 00126 return split_; 00127 } 00128 00129 } // end of namespace PLearn

Generated on Tue Aug 17 16:08:58 2004 for PLearn by doxygen 1.3.7