00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00044
#include "MovingAverageVMatrix.h"
00045
00046
namespace PLearn {
00047
using namespace std;
00048
00049
00050 MovingAverageVMatrix::MovingAverageVMatrix()
00051 :
inherited(), centered_windows(true)
00052
00053 {
00054
00055
00056
00057
00058 }
00059
00060
PLEARN_IMPLEMENT_OBJECT(
MovingAverageVMatrix,
"Perform moving average of given columns",
00061
"The user specifies one or more columns and for each such <column-name>\n"
00062
"a moving average window size: a ma<windowsize>-<column-name> column is\n"
00063
"created which will contain at row t the moving average from row t-<windowsize>+1\n"
00064
"to t inclusively of <column-name>.\n");
00065
00066 void MovingAverageVMatrix::getNewRow(
int i,
Vec& v)
const
00067
{
00068 source->getRow(i,sourcerow);
00069
int max_target =
centered_windows?
min(i+
max_window_size/2,
length()-1):i;
00070
if (
is_missing(
sums(max_target,0)))
00071 {
00072
int k=max_target-1;
00073
while (
k>=0 &&
is_missing(
sums(
k,0)))
k--;
00074
if (
k<0)
00075 {
00076
k=0;
00077 source->getRow(
k,
previous_sourcerow);
00078
for (
int j=0;j<
columns.
length();j++)
00079 {
00080
real new_value =
previous_sourcerow[
columns[j]];
00081
if (
is_missing(new_value))
00082 {
00083
sums(
k,j) = 0;
00084
ma(
k,j) =
MISSING_VALUE;
00085 }
00086
else
00087 {
00088
sums(
k,j) = new_value;
00089
nnonmissing(
k,j) = 1;
00090
ma(
k,j) = new_value;
00091 }
00092 }
00093 }
00094
for (;
k<max_target;
k++)
00095 {
00096 source->getRow(
k+1,
previous_sourcerow);
00097
for (
int j=0;j<
columns.
length();j++)
00098 {
00099
real new_value =
previous_sourcerow[
columns[j]];
00100
if (!
is_missing(new_value))
00101 {
00102
sums(
k+1,j) =
sums(
k,j) + new_value;
00103
nnonmissing(
k+1,j) =
nnonmissing(
k,j) + 1;
00104 }
00105
else sums(
k+1,j) =
sums(
k,j);
00106
int delta =
k+1-
window_sizes[j];
00107
int n_at_window_start = (delta>=0)?
nnonmissing(delta,j):0;
00108
int n =
nnonmissing(
k+1,j) - n_at_window_start;
00109
if (n>0)
00110 {
00111
if (delta>=0)
00112
ma(
k+1,j) = (
sums(
k+1,j) -
sums(delta,j))/n;
00113
else
00114
ma(
k+1,j) =
sums(
k+1,j)/n;
00115 }
00116
else
00117
ma(
k+1,j) =
MISSING_VALUE;
00118 }
00119 }
00120 }
00121
for (
int j=0;j<
columns_to_average.
length();j++)
00122 {
00123
int target =
centered_windows?
min(i+
window_sizes[j]/2,
length()-1):i;
00124
row[sourcerow.
length()+j]=
ma(target,j);
00125 }
00126 v <<
row;
00127 }
00128
00129 void MovingAverageVMatrix::declareOptions(
OptionList& ol)
00130 {
00131
00132
00133
00134
00135
00136
00137
declareOption(ol,
"columns_to_average", &MovingAverageVMatrix::columns_to_average, OptionBase::buildoption,
00138
"Names of the columns to average.");
00139
00140
declareOption(ol,
"window_sizes", &MovingAverageVMatrix::window_sizes, OptionBase::buildoption,
00141
"Sizes (in number of rows) of the moving average windows for each column to average.");
00142
00143
declareOption(ol,
"centered_windows", &MovingAverageVMatrix::centered_windows, OptionBase::buildoption,
00144
"Wether or not to center the window around the current example or to average only over previous examples");
00145
00146
00147 inherited::declareOptions(ol);
00148 }
00149
00150 void MovingAverageVMatrix::build_()
00151 {
00152
int nc=
columns_to_average.
length();
00153
if (source)
00154 {
00155
row.
resize(source->
width()+nc);
00156 sourcerow =
row.
subVec(0,source->
width());
00157
previous_sourcerow.
resize(source->
width());
00158
columns.
resize(nc);
00159
max_window_size=0;
00160
if (
window_sizes.
length()!=nc)
00161
PLERROR(
"MovingAverageVMatrix: the window_sizes option should have the same length as the columns_to_average option (got %d and %d)",
00162
window_sizes.
length(),nc);
00163
for (
int j=0;j<nc;j++)
00164 {
00165
if ((
columns[j] = source->fieldIndex(
columns_to_average[j])) == -1)
00166
PLERROR(
"MovingAverageVMatrix: provided field name %s not found in source VMatrix",
columns_to_average[j].
c_str());
00167
if (
window_sizes[j]>
max_window_size)
00168
max_window_size=
window_sizes[j];
00169 }
00170
00171 setMtime(
max(
getMtime(),source->getMtime()));
00172
00173
00174
if(length_<0)
00175 length_ = source->
length();
00176
if(width_<0)
00177 width_ = source->
width() + nc;
00178
00179
sums.
resize(length_,nc);
00180
sums.
fill(
MISSING_VALUE);
00181
nnonmissing.
resize(length_,nc);
00182
nnonmissing.
clear();
00183
ma.
resize(length_,nc);
00184
ma.
fill(
MISSING_VALUE);
00185
00186
00187
if(!
hasFieldInfos() && source->hasFieldInfos() )
00188 {
00189
Array<VMField>& sinfo = source->getFieldInfos();
00190
int w=sinfo.
size();
00191 sinfo.
resize(w+nc);
00192
for (
int j=0;j<nc;j++)
00193 {
00194 sinfo[w+j]=sinfo[
columns[j]];
00195 sinfo[w+j].name =
"ma"+
tostring(
window_sizes[j])+
"-"+sinfo[w+j].name;
00196 }
00197 setFieldInfos(sinfo);
00198 }
00199 }
00200 }
00201
00202
00203 void MovingAverageVMatrix::build()
00204 {
00205 inherited::build();
00206
build_();
00207 }
00208
00209 void MovingAverageVMatrix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00210 {
00211 inherited::makeDeepCopyFromShallowCopy(copies);
00212
00213
deepCopyField(
columns, copies);
00214
deepCopyField(
columns_to_average, copies);
00215
deepCopyField(
sums, copies);
00216
deepCopyField(
window_sizes, copies);
00217
00218 }
00219
00220 }
00221