00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00040 
#include "FractionSplitter.h"
00041 
00042 
namespace PLearn {
00043 
using namespace std;
00044 
00045 FractionSplitter::FractionSplitter() 
00046 : 
Splitter(),
00047   round_to_closest(0)
00048 {}
00049 
00050 
00051 
PLEARN_IMPLEMENT_OBJECT(
FractionSplitter,
00052     
"A Splitter that can extract several subparts of a dataset in each split.",
00053     
"Ranges of the dataset are specified explicitly as start:end positions,\n"
00054     
"that can be absolute or relative to the number of samples in the training set.");
00055 
00056 void FractionSplitter::declareOptions(
OptionList& ol)
00057 {
00058 
00059   
declareOption(ol, 
"round_to_closest", &FractionSplitter::round_to_closest, OptionBase::buildoption,
00060                 
"If set to 1, then the integer value found when using fractions will\n"
00061                 
"be the closest integer, instead of the integer part.");
00062   
00063   
declareOption(ol, 
"splits", &FractionSplitter::splits, OptionBase::buildoption,
00064                 
"A matrix of start:end pairs. Each row represents a split. \n"
00065                 
"Each start:end element represents a range of samples in the dataset to be splitted. \n"
00066                 
"start and end, which are positions in the datataset, can be specified as either \n"
00067                 
"a fraction of the dataset length (if <=1), or an absolute number of elements (if >1).\n"
00068                 
"The range includes all samples from start to end, but excluding the end sample \n"
00069                 
"(so that, for ex., the same value can be used as the start of the next range \n"
00070                 
"without having the two ranges ovelap). \n"
00071                 
"The value 1 is a bit special as it always means \"until last element inclusive\".\n"
00072                 
"Ex: 1 2 [ 0:0.80, 0.80:1 ]  yields a single split with the first part being the first 80% \n"
00073                 
"of the data, and the second the next 20% \n");
00074 
00075   
00076   inherited::declareOptions(ol);
00077 }
00078 
00079 void FractionSplitter::build_()
00080 {
00081 }
00082 
00083 
00084 void FractionSplitter::build()
00085 {
00086   inherited::build();
00087   
build_();
00088 }
00089 
00090 void FractionSplitter::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00091 {
00092   Splitter::makeDeepCopyFromShallowCopy(copies);
00093 
00094   
00095   
00096   
00097   
00098   
00099 
00100   
00101   
PLERROR(
"FractionSplitter::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!");
00102 }
00103 
00104 int FractionSplitter::nsplits()
 const
00105 
{
00106   
return splits.
length();
00107 }
00108 
00109 int FractionSplitter::nSetsPerSplit()
 const
00110 
{
00111   
return splits.
width();
00112 }
00113 
00114 
00115 TVec<VMat> FractionSplitter::getSplit(
int k)
00116 {
00117   
TVec< pair<real,real> > frac_k = 
splits(
k);
00118   
int n = frac_k.
length();
00119   
TVec<VMat> vms(n);
00120   
int l = dataset.
length();
00121   
int start = 0;
00122   
int end = 0;
00123   
for(
int i=0; i<n; i++)
00124     {
00125       
real fstart = frac_k[i].
first;
00126       
real fend = frac_k[i].second;
00127 
00128       
if(fstart>1) 
00129         start = 
int(fstart);
00130       
else {
00131         
if (
round_to_closest) {
00132           start = int(fstart*l + 0.5);
00133         } 
else {
00134           start = int(fstart*l);
00135         }
00136       }
00137 
00138       
if(fend>1) 
00139         end = int(fend);
00140       
else if(fend==1) 
00141         end = l;
00142       
else {
00143         
if (
round_to_closest) {
00144           end = int(fend*l + 0.5);
00145         } 
else {
00146           end = int(fend*l);
00147         }
00148       }
00149 
00150       vms[i] = dataset.
subMatRows(start, end-start);
00151     }
00152   
return vms;
00153 }
00154 
00155 }