00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00040
#include "FractionSplitter.h"
00041
00042
namespace PLearn {
00043
using namespace std;
00044
00045 FractionSplitter::FractionSplitter()
00046 :
Splitter(),
00047 round_to_closest(0)
00048 {}
00049
00050
00051
PLEARN_IMPLEMENT_OBJECT(
FractionSplitter,
00052
"A Splitter that can extract several subparts of a dataset in each split.",
00053
"Ranges of the dataset are specified explicitly as start:end positions,\n"
00054
"that can be absolute or relative to the number of samples in the training set.");
00055
00056 void FractionSplitter::declareOptions(
OptionList& ol)
00057 {
00058
00059
declareOption(ol,
"round_to_closest", &FractionSplitter::round_to_closest, OptionBase::buildoption,
00060
"If set to 1, then the integer value found when using fractions will\n"
00061
"be the closest integer, instead of the integer part.");
00062
00063
declareOption(ol,
"splits", &FractionSplitter::splits, OptionBase::buildoption,
00064
"A matrix of start:end pairs. Each row represents a split. \n"
00065
"Each start:end element represents a range of samples in the dataset to be splitted. \n"
00066
"start and end, which are positions in the datataset, can be specified as either \n"
00067
"a fraction of the dataset length (if <=1), or an absolute number of elements (if >1).\n"
00068
"The range includes all samples from start to end, but excluding the end sample \n"
00069
"(so that, for ex., the same value can be used as the start of the next range \n"
00070
"without having the two ranges ovelap). \n"
00071
"The value 1 is a bit special as it always means \"until last element inclusive\".\n"
00072
"Ex: 1 2 [ 0:0.80, 0.80:1 ] yields a single split with the first part being the first 80% \n"
00073
"of the data, and the second the next 20% \n");
00074
00075
00076 inherited::declareOptions(ol);
00077 }
00078
00079 void FractionSplitter::build_()
00080 {
00081 }
00082
00083
00084 void FractionSplitter::build()
00085 {
00086 inherited::build();
00087
build_();
00088 }
00089
00090 void FractionSplitter::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00091 {
00092 Splitter::makeDeepCopyFromShallowCopy(copies);
00093
00094
00095
00096
00097
00098
00099
00100
00101
PLERROR(
"FractionSplitter::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!");
00102 }
00103
00104 int FractionSplitter::nsplits()
const
00105
{
00106
return splits.
length();
00107 }
00108
00109 int FractionSplitter::nSetsPerSplit()
const
00110
{
00111
return splits.
width();
00112 }
00113
00114
00115 TVec<VMat> FractionSplitter::getSplit(
int k)
00116 {
00117
TVec< pair<real,real> > frac_k =
splits(
k);
00118
int n = frac_k.
length();
00119
TVec<VMat> vms(n);
00120
int l = dataset.
length();
00121
int start = 0;
00122
int end = 0;
00123
for(
int i=0; i<n; i++)
00124 {
00125
real fstart = frac_k[i].
first;
00126
real fend = frac_k[i].second;
00127
00128
if(fstart>1)
00129 start =
int(fstart);
00130
else {
00131
if (
round_to_closest) {
00132 start = int(fstart*l + 0.5);
00133 }
else {
00134 start = int(fstart*l);
00135 }
00136 }
00137
00138
if(fend>1)
00139 end = int(fend);
00140
else if(fend==1)
00141 end = l;
00142
else {
00143
if (
round_to_closest) {
00144 end = int(fend*l + 0.5);
00145 }
else {
00146 end = int(fend*l);
00147 }
00148 }
00149
00150 vms[i] = dataset.
subMatRows(start, end-start);
00151 }
00152
return vms;
00153 }
00154
00155 }