00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
#include "SumOverBagsVariable.h"
00044
00045
00046
00047
namespace PLearn {
00048
using namespace std;
00049
00050
00051
00054
PLEARN_IMPLEMENT_OBJECT(SumOverBagsVariable,
"Variable that sums the value of a Func each time evaluated on a subsequence of a VMat\n",
00055
"returns\n"
00056
" Sum_{bags in vmat} f(inputs and targets in bag)\n"
00057
"(it can average this sum over the number of bags if the 'average' option is set).\n"
00058
"By convention a bag is a sequence of rows of the vmat in which the last column of the target\n"
00059
"indicates whether the row is the first one (and/or) the last one, with its two least significant bits:\n"
00060
" last_column_of_target == 1 ==> first row\n"
00061
" last_column_of_target == 2 ==> last row\n"
00062
" last_column_of_target == 0 ==> intermediate row\n"
00063
" last_column_of_target == 1+2==3 ==> single-row bag (both first and last).\n"
00064
"The option n_samples controls how many terms in the sum are considered at a time:\n"
00065
" n_samples <= 0: sum over the whole vmat (e.g. for batch gradient computation)\n"
00066
" n_samples = 1: sum over a single bag at a time (e.g. for stochastic gradient)\n"
00067
" where each fprop or fbprop advances to the next bag\n"
00068
" otherwise: sum over n_samples bags at a time (e.g. for min-batch training)\n"
00069
"The last column of the target is not given in the call to f, but a bag_size input is provided instead.\n"
00070
"The inputs to f are: (matrix of bag inputs, the bag size, the bag target, [the bag weight])\n"
00071
"(the bag weight is included only if there are weights in the original VMat)."
00072 );
00073
00075
00077 SumOverBagsVariable::SumOverBagsVariable()
00078 : vmat(), f(),
00079 average(0),
00080 max_bag_size(-1), n_samples(1),
00081
transpose(0),
00082 curpos()
00083 {}
00084
00085 SumOverBagsVariable::SumOverBagsVariable(
VMat the_vmat,
Func the_f,
int max_bagsize,
int nsamples,
bool the_average,
bool the_transpose)
00086 :
inherited(
nonInputParentsOfPath(the_f->inputs,the_f->outputs),
00087 the_f->outputs[0]->length(),
00088 the_f->outputs[0]->width()),
00089 vmat(the_vmat), f(the_f),
00090 average(the_average),
00091 max_bag_size(max_bagsize), n_samples(nsamples),
00092
transpose(the_transpose),
00093 curpos(0), bag_size(0)
00094 {
00095
build();
00096 }
00097
00099
00101 void SumOverBagsVariable::build()
00102 {
00103 inherited::build();
00104
build_();
00105 }
00106
00108
00110 void SumOverBagsVariable::build_()
00111 {
00112
if (
vmat)
00113 {
00114
if (
f->outputs.size()!=1)
00115
PLERROR(
"SumOverBagsVariable: expected a func with a single output variable (you may use concat to form a single output Var)");
00116
if (
vmat->weightsize()!=0 &&
vmat->weightsize()!=1)
00117
PLERROR(
"SumOverBagsVariable expected vmat->weightsize to be 0 or 1");
00118
00119
if (
transpose) {
00120
input_values.
resize(
vmat->inputsize(),
max_bag_size);
00121 }
else {
00122
input_values.
resize(
max_bag_size,
vmat->inputsize());
00123 }
00124
output_value.
resize(
f->outputs[0]->nelems());
00125
output_av =
Array<Vec>(
output_value);
00126
gradient_av =
Array<Vec>(gradient);
00127
f->inputs.setDontBpropHere(
true);
00128
00129
bag_size_vec.
resize(1);
00130
bag_target_and_bag_signal.
resize(
vmat->targetsize());
00131
bag_target.
resize(
vmat->targetsize() - 1);
00132
bag_signal =
bag_target_and_bag_signal.
subVec(
vmat->targetsize()-1,1);
00133
int ws =
vmat->weightsize();
00134
bag_weight.
resize(
ws);
00135
if (
ws > 0) {
00136
f_inputs.
resize(4);
00137
f_inputs[3] =
bag_weight;
00138 }
else {
00139
f_inputs.
resize(3);
00140 }
00141
f_inputs[0] =
input_values.
toVec();
00142
f_inputs[1] =
bag_size_vec;
00143
f_inputs[2] =
bag_target;
00144
unused_gradients.
resize(
f_inputs.
size());
00145
for (
int i=0;i<
f_inputs.
size();i++)
unused_gradients[i] =
f_inputs[i].
copy();
00146 }
00147 }
00148
00150
00152 void SumOverBagsVariable::declareOptions(
OptionList& ol)
00153 {
00154
declareOption(ol,
"f", &SumOverBagsVariable::f, OptionBase::buildoption,
00155
" Func that is applied on each bag, whose input is the following array of Vars:\n"
00156
" (matrix of bag inputs, the bag size, the bag target, [the bag weight]).\n");
00157
00158
declareOption(ol,
"vmat", &SumOverBagsVariable::vmat, OptionBase::buildoption,
00159
" VMatrix that contains the data, with multiple consecutive rows forming one bag.\n"
00160
" The last column of the target indicates the beginning and end of each bag, as follows:\n"
00161
" last_column_of_target == 1 ==> first row\n"
00162
" last_column_of_target == 2 ==> last row\n"
00163
" last_column_of_target == 0 ==> intermediate row\n"
00164
" last_column_of_target == 1+2==3 ==> single-row bag (both first and last).\n");
00165
00166
declareOption(ol,
"average", &SumOverBagsVariable::average, OptionBase::buildoption,
00167
" If set to 1, then will compute the mean of the sum, and not the sum itself.");
00168
00169
declareOption(ol,
"max_bag_size", &SumOverBagsVariable::max_bag_size, OptionBase::buildoption,
00170
" maximum number of examples in a bag (more than that in vmat will trigger a run-time error).\n");
00171
00172
declareOption(ol,
"n_samples", &SumOverBagsVariable::n_samples, OptionBase::buildoption,
00173
" number of bags to iterate over (1 for online gradient, <=0 for batch).");
00174
00175
declareOption(ol,
"transpose", &SumOverBagsVariable::transpose, OptionBase::buildoption,
00176
" If set to 1, then the bag inputs will be put in columns instead of rows.\n"
00177
" This can be useful if the Func f takes column vars as inputs.");
00178
00179 inherited::declareOptions(ol);
00180 }
00181
00183
00185 void SumOverBagsVariable::recomputeSize(
int& l,
int& w)
const
00186
{
00187
if (
f &&
f->outputs.size()) {
00188 l =
f->outputs[0]->length();
00189 w =
f->outputs[0]->width();
00190 }
else
00191 l = w = 0;
00192 }
00193
00194
00196
00198 void SumOverBagsVariable::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00199 {
00200 NaryVariable::makeDeepCopyFromShallowCopy(copies);
00201
deepCopyField(
vmat, copies);
00202
deepCopyField(
f, copies);
00203
deepCopyField(
output_value, copies);
00204
deepCopyField(
input_values, copies);
00205
deepCopyField(
bag_size_vec, copies);
00206
deepCopyField(
bag_target_and_bag_signal, copies);
00207
deepCopyField(
bag_target, copies);
00208
deepCopyField(
bag_signal, copies);
00209
deepCopyField(
bag_weight, copies);
00210
deepCopyField(
f_inputs, copies);
00211
deepCopyField(
unused_gradients, copies);
00212
deepCopyField(
output_av, copies);
00213
deepCopyField(
gradient_av, copies);
00214 }
00215
00216
00218
00220 void SumOverBagsVariable::fpropOneBag(
bool do_bprop)
00221 {
00222
static real dummy_weight=0;
00223
bool reached_end_of_bag=
false;
00224
if (
transpose) {
00225
input_values.
resize(
input_values.
length(),
max_bag_size);
00226 }
else {
00227
input_values.
resize(
max_bag_size,
input_values.
width());
00228 }
00229
for (
bag_size=0;!reached_end_of_bag;
bag_size++)
00230 {
00231
if (
bag_size>=
max_bag_size)
00232
PLERROR(
"SumOverBagsVariable: bag size=%d > expected max. bag size(%d)",
00233
bag_size,
max_bag_size);
00234
Vec input_value;
00235
if (
transpose) {
00236 input_value.
resize(
input_values.
length());
00237 }
else {
00238 input_value =
input_values(
bag_size);
00239 }
00240
if (
vmat->weightsize()>0)
00241 {
00242
real& weight =
bag_weight[0];
00243
vmat->
getExample(
curpos,input_value,
bag_target_and_bag_signal,weight);
00244 }
00245
else
00246
vmat->
getExample(
curpos,input_value,
bag_target_and_bag_signal,dummy_weight);
00247
if (
bag_size == 0) {
00248
00249
bag_target <<
bag_target_and_bag_signal.
subVec(0,
bag_target_and_bag_signal.
length() - 1);
00250 }
00251
if (
transpose) {
00252
00253
00254
input_values.
column(
bag_size) << input_value;
00255 }
00256
if (
bag_size==0 && !(
int(
bag_signal[0]) & 1))
00257
PLERROR(
"SumOverBagsVariable: data synchronization error, first row of bag has wrong bag signal");
00258 reached_end_of_bag = (
int(
bag_signal[0]) & 2);
00259
if(++
curpos ==
vmat->
length())
00260 {
00261
curpos = 0;
00262
if (!reached_end_of_bag)
00263 {
00264
PLERROR(
"SumOverBagsVariable: last bag of VMatrix is not complete");
00265
return;
00266 }
00267 }
00268 }
00269
bag_size_vec[0]=
bag_size;
00270
if (do_bprop)
00271
f->fbprop(
f_inputs,
output_av,
unused_gradients,
gradient_av);
00272
else
00273
f->fprop(
f_inputs,
output_av);
00274 value +=
output_value;
00275 }
00276
00278
00280 void SumOverBagsVariable::fprop()
00281 {
00282 value.
clear();
00283
f->recomputeParents();
00284
if (
n_samples==1)
00285
fpropOneBag();
00286
else if (
n_samples<=0)
00287 {
00288
curpos=0;
00289
int count_bags = 0;
00290
do {
00291
fpropOneBag();
00292 count_bags++;
00293 }
00294
while (
curpos>0);
00295
if (
average) {
00296 value /= count_bags;
00297 }
00298 }
00299
else {
00300
for (
int i=0;i<
n_samples;i++)
00301
fpropOneBag();
00302
if (
average) {
00303 value /= n_samples;
00304 }
00305 }
00306 }
00307
00308
00310
00312 void SumOverBagsVariable::fbprop()
00313 {
00314 value.
clear();
00315
f->recomputeParents();
00316
if (
n_samples==1)
00317
fpropOneBag(
true);
00318
else if (
n_samples<=0)
00319 {
00320
if (
average) {
00321
00322
00323
PLERROR(
"In SumOverBagsVariable::fbprop - If you want to get the average, you must tell me the number of bags in n_samples > 0, because I'm too dumb to guess it.");
00324 }
00325
curpos = 0;
00326
do {
00327
fpropOneBag(
true);
00328 }
00329
while (
curpos>0);
00330 }
00331
else {
00332
if (
average) {
00333 gradient /=
n_samples;
00334 }
00335
for (
int i=0;i<
n_samples;i++)
00336
fpropOneBag(
true);
00337
if (
average) {
00338 value /= n_samples;
00339 }
00340 }
00341 }
00342
00344
00346 void SumOverBagsVariable::bprop()
00347 {
00348
fbprop();
00349 }
00350
00352
00354 void SumOverBagsVariable::printInfo(
bool print_gradient)
00355 {
00356
f->fproppath.printInfo(print_gradient);
00357 cout <<
info() <<
" : " <<
getName() <<
"(max_bag_size=" <<
max_bag_size <<
", ";
00358 cout <<
", n_samples=" <<
n_samples <<
") = " << value;
00359
if (print_gradient) cout <<
" gradient=" << gradient;
00360 cout <<
endl;
00361 }
00362
00363
00364 }
00365
00366