00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
#include "JoinVMatrix.h"
00041
00042
namespace PLearn {
00043
using namespace std;
00044
00045
00046
PLEARN_IMPLEMENT_OBJECT(JoinVMatrix,
"ONE LINE DESCR",
"NO HELP");
00047
00048 JoinVMatrix::JoinVMatrix(
VMat mas,
VMat sla,
TVec<int> mi,
TVec<int> si)
00049 :
inherited(mas.length(),mas.width()),master(mas),slave(sla),master_idx(mi),slave_idx(si)
00050 {
00051
build();
00052 }
00053
00054
void
00055 JoinVMatrix::build()
00056 {
00057 inherited::build();
00058
build_();
00059 }
00060
00061
void
00062 JoinVMatrix::build_()
00063 {
00064
if (
master &&
slave) {
00065
if(
master_idx.
size()!=
slave_idx.
size())
00066
PLERROR(
"JoinVMatrix : master and slave field correspondance don't have same dimensions ");
00067
00068
for(
int j=0;j<
width();j++)
00069 declareField(j,
master->
fieldName(j), VMField::UnknownType);
00070
00071
temp.
resize(
slave.
width());
00072
tempkey.
resize(
master_idx.
size());
00073
00074
for(
int i=0;i<
slave.
length();i++) {
00075
slave->getRow(i,
temp);
00076
for(
int j=0;j<
slave_idx.
size();j++)
00077
tempkey[j]=
temp[
slave_idx[j]];
00078
mp.insert(make_pair(
tempkey,i));
00079 }
00080 }
00081 }
00082
00083
void
00084 JoinVMatrix::declareOptions(
OptionList &ol)
00085 {
00086
declareOption(ol,
"master", &JoinVMatrix::master, OptionBase::buildoption,
"");
00087
declareOption(ol,
"slave", &JoinVMatrix::slave, OptionBase::buildoption,
"");
00088
declareOption(ol,
"master_idx", &JoinVMatrix::master_idx, OptionBase::buildoption,
"");
00089
declareOption(ol,
"slave_idx", &JoinVMatrix::slave_idx, OptionBase::buildoption,
"");
00090 inherited::declareOptions(ol);
00091 }
00092
00093 void JoinVMatrix::addStatField(
const string & statis,
const string & namefrom,
const string & nameto)
00094 {
00095 width_++;
00096
int from=
slave->fieldIndex(namefrom),to=
width()-1;
00097
if(from==-1)
00098
PLERROR(
"Unknown field in JOIN operation : %s",namefrom.c_str());
00099 declareField(to, nameto, VMField::UnknownType);
00100
00101
if(statis==
"COUNT")
00102
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::COUNT));
00103
else if(statis==
"NMISSING")
00104
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::NMISSING));
00105
else if(statis==
"NNONMISSING")
00106
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::NNONMISSING));
00107
else if(statis==
"SUM")
00108
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::SUM));
00109
else if(statis==
"SUMSQUARE")
00110
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::SUMSQUARE));
00111
else if(statis==
"MEAN")
00112
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::MEAN));
00113
else if(statis==
"VARIANCE")
00114
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::VARIANCE));
00115
else if(statis==
"MIN")
00116
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::MIN));
00117
else if(statis==
"MAX")
00118
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::MAX));
00119
else if(statis==
"STDDEV")
00120
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::STDDEV));
00121
else if(statis==
"STDERR")
00122
fld.push_back(
JoinFieldStat(from,to,JoinFieldStat::STDERR));
00123
else PLERROR(
"Unknown statistic in JOIN operation : %s",statis.c_str());
00124 }
00125
00126 void JoinVMatrix::getNewRow(
int idx,
const Vec& v)
const
00127
{
00128
real nonmiss;
00129
master->getRow(idx,v.
subVec(0,
master.
width()));
00130
00131
00132
for(
int j=0;j<
master_idx.
size();j++)
00133
tempkey[j]=v[
master_idx[j]];
00134 Maptype::const_iterator it,low,upp;
00135 pair<Maptype::const_iterator,Maptype::const_iterator> tit=
mp.equal_range(
tempkey);
00136 low=tit.first;
00137 upp=tit.second;
00138
00139
Vec popo(v.subVec(
master.
width(),
width()-
master.
width()));
00140
00141
int sz=(
int)
fld.size();
00142
Vec count(sz,0.0),nmissing(sz,0.0),
sum(sz,0.0),
sumsquare(sz,0.0),
min(sz,FLT_MAX),
max(sz,-FLT_MAX);
00143
real val;
00144
if(low!=
mp.end())
00145 {
00146
for(it=low;it!=upp;++it)
00147 {
00148
slave->getRow(it->second,
temp);
00149
for(
int i=0;i<sz;i++)
00150 {
00151
val=
temp[
fld[i].from];
00152
count[i]++;
00153
if(
is_missing(
val))nmissing[i]++;
00154
else
00155 {
00156
sum[i]+=
val;
00157
sumsquare[i]+=
val*
val;
00158
if(
min[i]>
val)
min[i]=
val;
00159
if(
max[i]<
val)
max[i]=
val;
00160 }
00161 }
00162 }
00163 }
00164
for(
int i=0;i<sz;i++)
00165 {
00166 nonmiss=
count[i]-nmissing[i];
00167
switch(
fld[i].stat)
00168 {
00169
case JoinFieldStat::COUNT:
00170 popo[i]=
count[i];
00171
break;
00172
case JoinFieldStat::NMISSING:
00173 popo[i]=nmissing[i];
00174
break;
00175
case JoinFieldStat::NNONMISSING:
00176 popo[i]=nonmiss;
00177
break;
00178
case JoinFieldStat::SUM:
00179 popo[i]=
sum[i];
00180
break;
00181
case JoinFieldStat::SUMSQUARE:
00182 popo[i]=
sumsquare[i];
00183
break;
00184
case JoinFieldStat::MEAN:
00185 popo[i]=
sum[i]/
count[i];
00186
break;
00187
case JoinFieldStat::VARIANCE:
00188 popo[i]=(
sumsquare[i] -
sum[i]*
sum[i]/nonmiss)/(nonmiss-1);
00189
break;
00190
case JoinFieldStat::STDDEV:
00191 popo[i]=
sqrt((
sumsquare[i] -
sum[i]*
sum[i]/nonmiss)/(nonmiss-1));
00192
break;
00193
case JoinFieldStat::STDERR:
00194 popo[i]=
sqrt((
sumsquare[i] -
sum[i]*
sum[i]/nonmiss)/(nonmiss-1)/nonmiss);
00195
break;
00196
case JoinFieldStat::MIN:
00197 popo[i]=
min[i];
00198
break;
00199
case JoinFieldStat::MAX:
00200 popo[i]=
max[i];
00201
break;
00202
default:
PLERROR(
"Unknown statistic in JoinVMatrix!");
00203 }
00204 }
00205 }
00206
00207 string JoinVMatrix::getValString(
int col,
real val)
const
00208
{
00209
if(col<
master.
width())
00210
return master->getValString(col,
val);
00211
else
00212
return slave->getValString(col,
val);
00213 }
00214
00215 real JoinVMatrix::getStringVal(
int col,
const string & str)
const
00216
{
00217
if(col<
master.
width())
00218
return master->getStringVal(col,str);
00219
else
00220
return slave->getStringVal(col,str);
00221 }
00222
00223 const map<string,real>& JoinVMatrix::getStringToRealMapping(
int col)
const
00224
{
00225
if(col<
master.
width())
00226
return master->getStringToRealMapping(col);
00227
else
00228
return slave->getStringToRealMapping(col);
00229
00230 }
00231
00232 const map<real,string>& JoinVMatrix::getRealToStringMapping(
int col)
const
00233
{
00234
if(col<
master.
width())
00235
return master->getRealToStringMapping(col);
00236
else
00237
return slave->getRealToStringMapping(col);
00238 }
00239
00240
00241 string JoinVMatrix::getString(
int row,
int col)
const
00242
{
00243
if(col<
master.
width())
00244
return master->getString(row,col);
00245
else
00246
return slave->getString(row,col);
00247 }
00248
00249 }