00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00044 
#include "SelectRowsVMatrix.h"
00045 
#include <plearn/var/SumOverBagsVariable.h>  
00046 
#include "ToBagSplitter.h"
00047 
00048 
namespace PLearn {
00049 
using namespace std;
00050 
00052 
00054 ToBagSplitter::ToBagSplitter() 
00055   : 
Splitter(),
00056     expected_size_of_bag(10)
00057 {}
00058 
00059 
PLEARN_IMPLEMENT_OBJECT(
ToBagSplitter,
00060     
"A Splitter that makes any existing splitter operate on bags only.",
00061     
"The dataset provided must contain bag information, as described in\n"
00062     
"SumOverBagsVariable");
00063 
00065 
00067 void ToBagSplitter::declareOptions(
OptionList& ol)
00068 {
00069    
declareOption(ol, 
"expected_size_of_bag", &ToBagSplitter::expected_size_of_bag, OptionBase::buildoption,
00070        
"The expected size of each bag. It is not compulsory to change this option.");
00071 
00072    
declareOption(ol, 
"sub_splitter", &ToBagSplitter::sub_splitter, OptionBase::buildoption,
00073        
"The underlying splitter we want to make operate on bags.");
00074 
00075   
00076   inherited::declareOptions(ol);
00077 }
00078 
00080 
00082 void ToBagSplitter::build()
00083 {
00084   inherited::build();
00085   
build_();
00086 }
00087 
00089 
00091 void ToBagSplitter::build_()
00092 {
00093   
if (dataset) {
00094     
00095     
int max_ninstances = 1;
00096     
00097     
00098     
00099     
Mat bags_store(dataset->
length() / 
expected_size_of_bag + 1, 
expected_size_of_bag + 1);
00100     
int num_bag = 0;
00101     
int num_instance = 0;
00102     
int bag_signal_column = dataset->inputsize() + dataset->targetsize() - 1; 
00103     
for (
int i = 0; i < dataset->
length(); i++) {
00104       
if (num_instance + 1 >= bags_store.
width()) {
00105         
if (num_instance > 10*(
expected_size_of_bag+1))
00106           
PLERROR(
"ToBagSplitter: found bag size (%d) more than 10 times bigger than expected_size_of_bag (%d)!\n",
00107                   num_instance,
expected_size_of_bag);
00108         
00109         bags_store.
resize(bags_store.
length(), bags_store.
width() * 2);
00110       }
00111       
if (num_instance >= max_ninstances) {
00112         max_ninstances = num_instance + 1;
00113       }
00114       bags_store(num_bag, num_instance + 1) = i;
00115       num_instance++;
00116       
if (
int(dataset->get(i, bag_signal_column)) & SumOverBagsVariable::TARGET_COLUMN_LAST) {
00117         
00118         bags_store(num_bag, 0) = num_instance; 
00119         num_bag++;
00120         num_instance = 0;
00121         
if (num_bag >= bags_store.
length()) {
00122           
00123           bags_store.
resize(bags_store.
length() * 2, bags_store.
width());
00124         }
00125       }
00126     }
00127     
00128     bags_store.
resize(num_bag, max_ninstances + 1);
00129     
bags_index = 
VMat(bags_store);
00130     
00131     
sub_splitter->setDataSet(
bags_index);
00132   }
00133 }
00134 
00136 
00138 TVec<VMat> ToBagSplitter::getSplit(
int k)
00139 {
00140   
00141   
TVec<VMat> sub_splits = 
sub_splitter->getSplit(
k);
00142   
TVec<VMat> result;
00143   
for (
int i = 0; i < sub_splits.
length(); i++) {
00144     
00145     
Mat indices = sub_splits[i].
toMat();
00146     
00147     
TVec<int> indices_int;
00148     
for (
int j = 0; j < indices.
length(); j++) {
00149       
for (
int k = 0; 
k < indices(j, 0); 
k++) {
00150         
int indice = 
int(indices(j, 
k + 1));
00151         indices_int.
append(indice);
00152       }
00153     }
00154     result.
append(
new SelectRowsVMatrix(dataset, indices_int));
00155   }
00156   
return result;
00157 }
00158 
00160 
00162 void ToBagSplitter::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00163 {
00164   Splitter::makeDeepCopyFromShallowCopy(copies);
00165 
00166   
00167   
00168   
00169   
00170   
00171 
00172   
00173   
PLERROR(
"ToBagSplitter::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!");
00174 }
00175 
00177 
00179 int ToBagSplitter::nSetsPerSplit()
 const
00180 
{
00181   
00182   
return sub_splitter->nSetsPerSplit();
00183 }
00184 
00186 
00188 int ToBagSplitter::nsplits()
 const
00189 
{
00190   
return sub_splitter->nsplits();
00191 }
00192 
00194 
00196 void ToBagSplitter::setDataSet(
VMat the_dataset) {
00197   inherited::setDataSet(the_dataset);
00198   
00199   
build();
00200 }
00201 
00202 }