00001 // -*- C++ -*- 00002 00003 // Splitter.h 00004 // 00005 // Copyright (C) 2002 Pascal Vincent, Frederic Morin 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: Splitter.h,v 1.10 2004/07/21 16:30:55 chrish42 Exp $ 00037 ******************************************************* */ 00038 00040 #ifndef Splitter_INC 00041 #define Splitter_INC 00042 00043 #include <plearn/base/Object.h> 00044 #include <plearn/base/Array.h> 00045 #include "VMat.h" 00046 00047 namespace PLearn { 00048 using namespace std; 00049 00063 class Splitter: public Object 00064 { 00065 typedef Object inherited; 00066 00067 protected: 00068 // ********************* 00069 // * protected options * 00070 // ********************* 00071 00072 VMat dataset; 00073 00074 public: 00075 // **************** 00076 // * Constructors * 00077 // **************** 00078 00079 PLEARN_DECLARE_ABSTRACT_OBJECT(Splitter); 00080 00081 Splitter() {}; 00082 00084 virtual void setDataSet(VMat the_dataset); 00085 00087 VMat getDataSet() 00088 { return dataset; } 00089 00091 virtual int nsplits() const = 0; 00092 00094 virtual int nSetsPerSplit() const = 0; 00095 00097 virtual TVec<VMat> getSplit(int i=0) = 0; 00098 00100 virtual void makeDeepCopyFromShallowCopy(map<const void*, void*>& copies); 00101 00102 }; 00103 00104 // Declares a few other classes and functions related to this class 00105 DECLARE_OBJECT_PTR(Splitter); 00106 00107 00108 // A few useful function for splitting the datasets... (can be called in Splitter subclasses 00109 00119 void split(VMat d, real test_fraction, VMat& train, VMat& test, int i = 0, bool use_all = false); 00120 00122 void split(VMat d, real validation_fraction, real test_fraction, VMat& train, VMat& valid, VMat& test,bool do_shuffle=false); 00123 00130 Vec randomSplit(VMat d, real test_fraction, VMat& train, VMat& test); 00131 00133 void randomSplit(VMat d, real validation_fraction, real test_fraction, VMat& train, VMat& valid, VMat& test); 00134 00135 00136 } // end of namespace PLearn 00137 00138 #endif