00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00044
#include "CumVMatrix.h"
00045
00046
namespace PLearn {
00047
using namespace std;
00048
00049
00050 CumVMatrix::CumVMatrix()
00051 :
inherited(), average(false)
00052
00053 {
00054
00055
00056
00057
00058 }
00059
00060
PLEARN_IMPLEMENT_OBJECT(
CumVMatrix,
"Add columns that a cumulated values of given columns",
00061
"The user specifies one or more columns and for each such <column-name>\n"
00062
"a cum-<column-name> column is created which will contain the sum from row 0\n"
00063
"to the current row of <column-name>.\n");
00064
00065 void CumVMatrix::getNewRow(
int i,
const Vec& v)
const
00066
{
00067 source->getRow(i,sourcerow);
00068
if (
is_missing(
accumulated_columns(i,0)))
00069 {
00070
int k=i-1;
00071
while (
k>=0 &&
is_missing(
accumulated_columns(
k,0)))
k--;
00072
if (
k<0)
00073 {
00074
k=0;
00075 source->getRow(
k,
previous_sourcerow);
00076
for (
int j=0;j<
columns.
length();j++)
00077
accumulated_columns(
k,j) =
previous_sourcerow[
columns[j]];
00078 }
00079
for (;
k<i;
k++)
00080 {
00081 source->getRow(
k+1,
previous_sourcerow);
00082
for (
int j=0;j<
columns.
length();j++)
00083
accumulated_columns(
k+1,j) =
accumulated_columns(
k,j) +
previous_sourcerow[
columns[j]];
00084 }
00085 }
00086
if (
average)
00087 {
00088
real normalization = 1.0 / (i+1);
00089
for (
int j=0;j<
columns_to_accumulate.
length();j++)
00090
row[sourcerow.
length()+j]=normalization*
accumulated_columns(i,j);
00091 }
00092
else
00093 {
00094
for (
int j=0;j<
columns_to_accumulate.
length();j++)
00095
row[sourcerow.
length()+j]=
accumulated_columns(i,j);
00096 }
00097 v <<
row;
00098 }
00099
00100 void CumVMatrix::declareOptions(
OptionList& ol)
00101 {
00102
00103
00104
00105
00106
00107
00108
declareOption(ol,
"columns_to_accumulate", &CumVMatrix::columns_to_accumulate, OptionBase::buildoption,
00109
"Names of the columns to accumulate.");
00110
00111
declareOption(ol,
"average", &CumVMatrix::average, OptionBase::buildoption,
00112
"whether to report the sum (default, when average=false) or the average");
00113
00114
00115 inherited::declareOptions(ol);
00116 }
00117
00118 void CumVMatrix::build_()
00119 {
00120
int nc=
columns_to_accumulate.
length();
00121
if (source)
00122 {
00123
row.
resize(source->
width()+nc);
00124 sourcerow =
row.
subVec(0,source->
width());
00125
previous_sourcerow.
resize(source->
width());
00126
columns.
resize(nc);
00127
for (
int j=0;j<nc;j++)
00128
if ((
columns[j] = source->fieldIndex(
columns_to_accumulate[j])) == -1)
00129
PLERROR(
"CumVMatrix: provided field name %s not found in source VMatrix",
columns_to_accumulate[j].
c_str());
00130
00131 setMtime(
max(
getMtime(),source->getMtime()));
00132
00133
00134
if(length_<0)
00135 length_ = source->
length();
00136
if(width_<0)
00137 width_ = source->
width() + nc;
00138
00139
accumulated_columns.
resize(length_,nc);
00140
accumulated_columns.
fill(
MISSING_VALUE);
00141
00142
00143
if(!
hasFieldInfos() && source->hasFieldInfos() )
00144 {
00145
Array<VMField>& sinfo = source->getFieldInfos();
00146
int w=sinfo.
size();
00147 sinfo.
resize(w+nc);
00148
for (
int j=0;j<nc;j++)
00149 {
00150 sinfo[w+j]=sinfo[
columns[j]];
00151 sinfo[w+j].name =
"cum-"+sinfo[w+j].name;
00152 }
00153 setFieldInfos(sinfo);
00154 }
00155 }
00156 }
00157
00158
00159 void CumVMatrix::build()
00160 {
00161 inherited::build();
00162
build_();
00163 }
00164
00165 void CumVMatrix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00166 {
00167 inherited::makeDeepCopyFromShallowCopy(copies);
00168
00169
deepCopyField(
columns_to_accumulate, copies);
00170
deepCopyField(
accumulated_columns, copies);
00171
deepCopyField(
columns, copies);
00172
00173 }
00174
00175 }
00176