00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
#include "DiskVMatrix.h"
00043
#include "FileVMatrix.h"
00044
#include "SubVMatrix.h"
00045
#include "VMat_maths.h"
00046
#include "VMatrix.h"
00047
00048
#include <plearn/math/BottomNI.h>
00049
#include <plearn/ker/Kernel.h>
00050
#include <plearn/var/Func.h>
00051
#include <plearn/base/stringutils.h>
00052
#include <plearn/math/TopNI.h>
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
namespace PLearn {
00070
using namespace std;
00071
00074
PLEARN_IMPLEMENT_ABSTRACT_OBJECT(VMatrix,
"ONE LINE DESCR",
"NO HELP");
00075
00076 VMatrix::VMatrix()
00077 : lockf_(0), length_(-1), width_(-1), mtime_(0),
00078 inputsize_(-1), targetsize_(-1), weightsize_(-1),
00079 writable(false)
00080 {}
00081
00082 VMatrix::VMatrix(
int the_length,
int the_width)
00083 : lockf_(0), length_(the_length), width_(the_width), mtime_(0),
00084 inputsize_(-1), targetsize_(-1), weightsize_(-1),
00085 writable(false),
00086
map_sr(
TVec<map<
string,
real> >(the_width)),
00087
map_rs(
TVec<map<
real,
string> >(the_width)),
00088 fieldstats(0)
00089 {}
00090
00091 void VMatrix::declareOptions(
OptionList & ol)
00092 {
00093
declareOption(ol,
"writable", &VMatrix::writable, OptionBase::buildoption,
"Are write operations permitted?");
00094
declareOption(ol,
"length", &VMatrix::length_, OptionBase::buildoption,
00095
"length of the matrix (number of rows)");
00096
declareOption(ol,
"width", &VMatrix::width_, OptionBase::buildoption,
00097
"width of the matrix (number of columns; -1 indicates this varies from sample to sample...)");
00098
declareOption(ol,
"inputsize", &VMatrix::inputsize_, OptionBase::buildoption,
00099
"size of input part (-1 if variable or unspecified, 0 if no input)");
00100
declareOption(ol,
"targetsize", &VMatrix::targetsize_, OptionBase::buildoption,
00101
"size of target part (-1 if variable or unspecified, 0 if no target)");
00102
declareOption(ol,
"weightsize", &VMatrix::weightsize_, OptionBase::buildoption,
00103
"size of weights (-1 if unspecified, 0 if no weight, 1 for sample weight, >1 currently not supported (include it is recommended to include additional info in target. weight is really reserved for a per sample weight)).");
00104
declareOption(ol,
"metadatadir", &VMatrix::metadatadir, OptionBase::buildoption,
00105
"A directory in which to store meta-information for this matrix \n"
00106
"You don't always have to give this explicitly. For ex. if your \n"
00107
"vmat is the outer VMatrix in a .vmat file, the metadatadir will \n"
00108
"automatically be set to name_of_vmat_file.metadata/ \n"
00109
"And if it is the source inside another VMatrix that sets its \n"
00110
"metadatadir, it will often be set from that surrounding vmat's metadata \n");
00111 inherited::declareOptions(ol);
00112 }
00113
00114 void VMatrix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00115 {
00116 inherited::makeDeepCopyFromShallowCopy(copies);
00117
deepCopyField(
get_row, copies);
00118
deepCopyField(
dotrow_1, copies);
00119
deepCopyField(
dotrow_2, copies);
00120
deepCopyField(
field_stats, copies);
00121
deepCopyField(
map_sr, copies);
00122
deepCopyField(
map_rs, copies);
00123
deepCopyField(
fieldinfos, copies);
00124
deepCopyField(
fieldstats, copies);
00125 }
00126
00127 Array<VMField>&
VMatrix::getFieldInfos()
const
00128
{
00129
if(
fieldinfos.
size()==0 &&
hasMetaDataDir())
00130 {
00131
string fname =
append_slash(
getMetaDataDir()) +
"fieldnames";
00132
if(
isfile(
fname))
00133
loadFieldInfos();
00134 }
00135
00136
int ninfos =
fieldinfos.
size();
00137
int w =
width();
00138
if(ninfos!=w && w > 0)
00139 {
00140
fieldinfos.
resize(w);
00141
for(
int j=ninfos; j<w; j++)
00142
fieldinfos[j] =
VMField(
tostring(j));
00143 }
00144
00145
return fieldinfos;
00146 }
00147
00148 void VMatrix::setFieldInfos(
const Array<VMField>& finfo)
00149 {
00150
fieldinfos=finfo;
00151 }
00152
00153 bool VMatrix::hasFieldInfos()
const
00154
{
00155
return fieldinfos.
size()>0;
00156 }
00157
00158 void VMatrix::unduplicateFieldNames()
00159 {
00160 map<string,vector<int> > mp;
00161
for(
int i=0;i<
width();i++)
00162 mp[
getFieldInfos(i).name].push_back(i);
00163 map<string,vector<int> >::iterator it;
00164
for(it=mp.begin();it!=mp.end();++it)
00165
if(it->second.size()!=1)
00166 {
00167
vector<int> v=it->second;
00168
for(
unsigned int j=0;j<v.size();j++)
00169
fieldinfos[v[j]].name+=
"."+
tostring(j);
00170 }
00171 }
00172
00173 TVec<string> VMatrix::fieldNames()
const
00174
{
00175
int d =
width();
00176
TVec<string> names(d);
00177
for(
int i=0; i<d; i++)
00178 names[i] =
fieldName(i);
00179
return names;
00180 }
00181
00182 int VMatrix::fieldIndex(
const string& fieldname)
const
00183
{
00184
Array<VMField>& infos =
getFieldInfos();
00185
for(
int i=0; i<
width(); i++)
00186
if(infos[i].name==fieldname)
00187
return i;
00188
return -1;
00189 }
00190
00191 int VMatrix::getFieldIndex(
const string& fieldname_or_num)
const
00192
{
00193
int i =
fieldIndex(fieldname_or_num);
00194
if(i==-1)
00195 i =
toint(fieldname_or_num);
00196
if (i < 0 || i >=
width())
00197
PLERROR(
"In VMatrix::getFieldIndex - Asked for an unvalid column number");
00198
return i;
00199 }
00200
00201 void VMatrix::build_()
00202 {
00203
if(
metadatadir!=
"")
00204
setMetaDataDir(
metadatadir);
00205 }
00206
00207 void VMatrix::build()
00208 {
00209 inherited::build();
00210
build_();
00211 }
00212
00213 void VMatrix::printFieldInfo(ostream& out,
int fieldnum)
const
00214
{
00215
VMField fi =
getFieldInfos(fieldnum);
00216
StatsCollector& s =
getStats(fieldnum);
00217
00218 out <<
"Field #" << fieldnum <<
": ";
00219 out << fi.
name <<
"\t type: ";
00220
switch(fi.
fieldtype)
00221 {
00222
case VMField::UnknownType:
00223 out <<
"UnknownType\n";
00224
break;
00225
case VMField::Continuous:
00226 out <<
"Continuous\n";
00227
break;
00228
case VMField::DiscrGeneral:
00229 out <<
"DiscrGeneral\n";
00230
break;
00231
case VMField::DiscrMonotonic:
00232 out <<
"DiscrMonotonic\n";
00233
break;
00234
case VMField::DiscrFloat:
00235 out <<
"DiscrFloat\n";
00236
break;
00237
case VMField::Date:
00238 out <<
"Date\n";
00239
break;
00240
default:
00241
PLERROR(
"Can't write name of type");
00242 }
00243
00244 out.precision(12);
00245 out <<
"nmissing: " << s.
nmissing() <<
'\n';
00246 out <<
"nnonmissing: " << s.
nnonmissing() <<
'\n';
00247 out <<
"sum: " << s.
sum() <<
'\n';
00248 out <<
"mean: " << s.
mean() <<
'\n';
00249 out <<
"stddev: " << s.
stddev() <<
'\n';
00250 out <<
"min: " << s.
min() <<
'\n';
00251 out <<
"max: " << s.
max() <<
'\n';
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272 out <<
endl <<
endl;
00273 }
00274
00275 void VMatrix::printFieldInfo(ostream& out,
const string& fieldname_or_num)
const
00276
{
00277
printFieldInfo(out,
getFieldIndex(fieldname_or_num));
00278 }
00279
00280 void VMatrix::printFields(ostream& out)
const
00281
{
00282
for(
int j=0; j<
width(); j++)
00283 {
00284
printFieldInfo(out,j);
00285 out <<
"-----------------------------------------------------" <<
endl;
00286 }
00287 }
00288
00289 void VMatrix::getExample(
int i,
Vec& input,
Vec& target,
real& weight)
00290 {
00291
if(
inputsize_<0)
00292
PLERROR(
"In VMatrix::getExample, inputsize_ not defined for this vmat");
00293 input.
resize(
inputsize_);
00294
getSubRow(i,0,input);
00295
if(
targetsize_<0)
00296
PLERROR(
"In VMatrix::getExample, targetsize_ not defined for this vmat");
00297 target.
resize(
targetsize_);
00298
if (
targetsize_ > 0) {
00299
getSubRow(i,
inputsize_,target);
00300 }
00301
00302
if(
weightsize_==0)
00303 weight = 1;
00304
else if(
weightsize_<0)
00305
PLERROR(
"In VMatrix::getExample, weightsize_ not defined for this vmat");
00306
else if(
weightsize_>1)
00307
PLERROR(
"In VMatrix::getExample, weightsize_ >1 not supported by this call");
00308
else
00309 weight =
get(i,
inputsize_+
targetsize_);
00310 }
00311
00312
00313 void VMatrix::computeStats()
00314 {
00315
fieldstats =
Array<VMFieldStat>(
width());
00316
Vec row(
width());
00317
for(
int i=0; i<
length(); i++)
00318 {
00319
getRow(i,row);
00320
for(
int j=0; j<
width(); j++)
00321
fieldstats[j].update(row[j]);
00322 }
00323 }
00324
00325 void VMatrix::loadStats(
const string& filename)
00326 {
00327 ifstream in(filename.c_str());
00328
if(!in)
00329
PLERROR(
"In VMatrix::loadStats Couldn't open file %s for reading",filename.c_str());
00330
int nfields;
00331 in >> nfields;
00332
if(nfields!=
width())
00333
PLWARNING(
"In VMatrix::loadStats nfields differes from VMat width");
00334
00335
fieldstats.
resize(nfields);
00336
for(
int j=0; j<
fieldstats.
size(); j++)
00337
fieldstats[j].
read(in);
00338 }
00339
00340 void VMatrix::saveStats(
const string& filename)
const
00341
{
00342 ofstream out(filename.c_str());
00343
if(!out)
00344
PLERROR(
"In VMatrix::saveStats Couldn't open file %s for writing",filename.c_str());
00345 out <<
fieldstats.
size() <<
endl;
00346
for(
int j=0; j<
fieldstats.
size(); j++)
00347 {
00348
fieldstats[j].
write(out);
00349 out <<
endl;
00350 }
00351 }
00352
00353
00354 string VMatrix::fieldheader(
int elementcharwidth)
00355 {
00356
00357
00358
return "VMatrix::fieldheader NOT YET IMPLEMENTED";
00359 }
00360
00361
00362 void VMatrix::declareField(
int fieldindex,
const string& fieldname, VMField::FieldType fieldtype)
00363 {
getFieldInfos(fieldindex) =
VMField(fieldname,fieldtype); }
00364
00365 void VMatrix::declareFieldNames(
TVec<string> fnames)
00366 {
00367
if(fnames.
length()!=
width())
00368
PLERROR(
"In VMatrix::declareFieldNames length of fnames differs from width() of VMatrix");
00369
for(
int i=0; i<fnames.
length(); i++)
00370
declareField(i,fnames[i]);
00371 }
00372
00373 void VMatrix::saveFieldInfos()
const
00374
{
00375
if(
fieldinfos.
size()==0)
00376
return;
00377
string filename =
append_slash(
getMetaDataDir()) +
"fieldnames";
00378 ofstream out(filename.c_str());
00379
if(!out)
00380
PLERROR(
"In VMatrix::saveFieldInfos Couldn't open file %s for writing",filename.c_str());
00381
for(
int i= 0; i <
fieldinfos.
length(); ++i)
00382 out <<
fieldinfos[i].name <<
'\t' << fieldinfos[i].fieldtype <<
endl;
00383 }
00384
00385 void VMatrix::loadFieldInfos()
const
00386
{
00387
string filename =
append_slash(
getMetaDataDir()) +
"fieldnames";
00388 ifstream in(filename.c_str());
00389
if(!in)
00390
PLERROR(
"In VMatrix::loadFieldInfos Couldn't open file %s for reading",filename.c_str());
00391
00392
int w =
width();
00393
fieldinfos.
resize(w);
00394
for(
int i= 0; i < w; ++i)
00395 {
00396
vector<string> v(
split(
pgetline(in)));
00397
switch(v.size())
00398 {
00399
case 1:
fieldinfos[i] =
VMField(v[0]);
break;
00400
case 2:
fieldinfos[i] = VMField(v[0], VMField::FieldType(
toint(v[1])));
break;
00401
default:
PLERROR(
"In VMatrix::loadFieldInfos Format not recognized. Each line should be '<name> {<type>}'.");
00402 }
00403 }
00404 }
00405
00406
00407 string VMatrix::resolveFieldInfoLink(
string target,
string source)
00408 {
00409
string contents =
removeblanks(
loadFileAsString(source));
00410
if(contents==source)
00411
return "ERROR";
00412
if(
isdir(contents))
00413 {
00414
00415 contents+=
slash;
00416
if(
isfile(contents+target+
".lnk"))
00417
return resolveFieldInfoLink(target,contents+target+
".lnk");
00418
else if(
isfile(contents+target))
00419
return contents+target;
00420
else if(
isfile(contents+
slash+
"__default.lnk"))
00421
return resolveFieldInfoLink(target,contents+
slash+
"__default.lnk");
00422
00423
else return contents+target;
00424 }
00425
else if(contents.substr(contents.size()-4,4)==
".lnk")
00426
return resolveFieldInfoLink(target,contents);
00427
else return contents;
00428 }
00429
00430 void VMatrix::setSFIFFilename(
int col,
string ext,
string filepath)
00431 {
00432
setSFIFFilename(
fieldName(col),ext,filepath);
00433 }
00434
00435
00436 void VMatrix::setSFIFFilename(
string fieldname,
string ext,
string filepath)
00437 {
00438
string target =
makeFileNameValid(fieldname+ext);
00439
string normalfname =
getMetaDataDir()+
"FieldInfo"+
slash+target;
00440
rm(normalfname+
".lnk");
00441
if(filepath==normalfname || filepath==
"")
00442 {
00443
rm(normalfname+
".lnk");
00444
return;
00445 }
00446
00447
force_mkdir_for_file(normalfname);
00448 ofstream o((normalfname+
".lnk").
c_str());
00449 o<<filepath<<
endl;
00450 }
00451
00452 string VMatrix::getSFIFFilename(
int col,
string ext)
00453 {
00454
return getSFIFFilename(
fieldName(col),ext);
00455 }
00456
00457 string VMatrix::getSFIFFilename(
string fieldname,
string ext)
00458 {
00459
string target =
makeFileNameValid(fieldname+ext);
00460
string normalfname =
getMetaDataDir()+
"FieldInfo"+
slash+target;
00461
string defaultlinkfname =
getMetaDataDir()+
"FieldInfo"+
slash+
"__default.lnk";
00462
if(
isfile(normalfname))
00463
return normalfname;
00464
else if(
isfile(normalfname+
".lnk"))
00465
return resolveFieldInfoLink(target, normalfname+
".lnk");
00466
else if(
isfile(defaultlinkfname))
00467
return resolveFieldInfoLink(target, defaultlinkfname);
00468
00469
else return normalfname;
00470 }
00471
00472 bool VMatrix::isSFIFDirect(
int col,
string ext)
00473 {
00474
return isSFIFDirect(
fieldName(col), ext);
00475 }
00476
00477 bool VMatrix::isSFIFDirect(
string fieldname,
string ext)
00478 {
00479
string target =
makeFileNameValid(fieldname+ext);
00480
string normalfname =
getMetaDataDir()+
"FieldInfo"+
slash+target;
00481
return getSFIFFilename(fieldname,ext) == normalfname;
00482 }
00483
00485 void VMatrix::addStringMapping(
int col,
string str,
real val)
00486 {
00487
init_map_sr();
00488
map_sr[col][str]=
val;
00489
map_rs[col][
val]=str;
00490 }
00491
00492 real VMatrix::addStringMapping(
int col,
string str)
00493 {
00494
init_map_sr();
00495 map<string,real>& m =
map_sr[col];
00496 map<string,real>::iterator it = m.
find(str);
00497
00498
real val = 0;
00499
if(it != m.end())
00500
val = it->second;
00501
else
00502 {
00503
val = -100 - m.size();
00504
addStringMapping(col, str,
val);
00505 }
00506
return val;
00507 }
00508
00509 void VMatrix::removeAllStringMappings()
00510 {
00511
init_map_sr();
00512
for(
int i=0;i<
width();i++)
00513 {
00514
map_sr[i].
clear();
00515
map_rs[i].
clear();
00516 }
00517 }
00518
00519 void VMatrix::removeColumnStringMappings(
int c)
00520 {
00521
init_map_sr();
00522
map_sr[c].
clear();
00523
map_rs[c].
clear();
00524 }
00525
00527
00529 void VMatrix::saveAllStringMappings()
00530 {
00531
string fname;
00532
for(
int i=0;i<
width();i++)
00533 {
00534
fname =
getSFIFFilename(i,
".smap");
00535
saveStringMappings(i,
fname);
00536 }
00537 }
00538
00539 void VMatrix::saveStringMappings(
int col,
string fname)
00540 {
00541 map<string, real> the_map =
getStringToRealMapping(col);
00542
if(the_map.size()==0)
00543 {
00544
rm(
fname);
00545
return;
00546 }
00547
force_mkdir_for_file(
fname);
00548
POFStream o(
fname.c_str());
00549
00550
if(!o)
00551
PLERROR(
"File %s can't be opened",
fname.c_str());
00552
for(map<string,real>::iterator it = the_map.begin();it!=the_map.end();++it)
00553 o << it->first << it->second <<
endl;
00554 }
00555
00557 void VMatrix::removeStringMapping(
int col,
string str)
00558 {
00559
init_map_sr();
00560 map<string,real>::iterator sriterator;
00561
00562
if((sriterator =
map_sr[col].
find(str)) ==
map_sr[col].end())
00563
return;
00564
real val =
map_sr[col][str];
00565
map_sr[col].erase(sriterator);
00566
map_rs[col].erase(
map_rs[col].
find(
val));
00567 }
00568
00570 void VMatrix::setStringMapping(
int col,
const map<string,real> & zemap)
00571 {
00572
init_map_sr();
00573
map_sr[col]=zemap;
00574
map_rs[col].
clear();
00575
for(map<string,real>::iterator it =
map_sr[col].
begin();it!=
map_sr[col].end();++it)
00576
map_rs[col][it->second]=it->
first;
00577 }
00578
00580 void VMatrix::deleteStringMapping(
int col)
00581 {
00582
init_map_sr();
00583
if(col>=
map_sr.
size() ||
00584 col>=
map_rs.
size())
00585
PLERROR(
"deleteStringMapping : out of bounds for col=%i in string mapping array (size=%i).\n Current VMatrix\nclass"\
00586
"is '%s' (or maybe derivated class?). be sure to set\n map_sr(rs) to appropriate sizes as soon as you know the width of the matrix\n"\
00587
"(in constructor or elsewhere)",col,
map_sr.
size(),
classname().c_str());
00588
map_sr[col].
clear();
00589
map_rs[col].
clear();
00590 }
00591
00593
00595 string VMatrix::getValString(
int col,
real val)
const
00596
{
00597
if(
is_missing(
val))
00598
return "";
00599
init_map_sr();
00600
if(
map_rs[col].
find(
val)==
map_rs[col].
end())
00601
return "";
00602
else return map_rs[col][
val];
00603 }
00604
00606
00608 real VMatrix::getStringVal(
int col,
const string & str)
const
00609
{
00610
if(
map_sr.
length()==0 ||
map_sr[col].
find(str)==
map_sr[col].
end())
00611
return MISSING_VALUE;
00612
else return map_sr[col][str];
00613 }
00614
00616
00618 string VMatrix::getString(
int row,
int col)
const
00619
{
00620
static string str;
00621
real val =
get(row,col);
00622 str =
getValString(col,
val);
00623
if (str ==
"")
00624
00625
return tostring(
val);
00626
else
00627
return str;
00628 }
00629
00631
00633 const map<string,real>& VMatrix::getStringToRealMapping(
int col)
const {
00634
init_map_sr();
00635
return map_sr[col];
00636 }
00637
00639
00641 const map<real,string>& VMatrix::getRealToStringMapping(
int col)
const {
00642
init_map_sr();
00643
return map_rs[col];
00644 }
00645
00646 void VMatrix::setMetaDataDir(
const string& the_metadatadir)
00647 {
00648
if(the_metadatadir==
"")
00649
PLERROR(
"Called setMetaDataDir with an empty string");
00650
metadatadir = the_metadatadir;
00651
if(!
force_mkdir(
metadatadir))
00652
PLERROR(
"In VMatrix::setMetadataDir could not create directory %s",
metadatadir.c_str());
00653
metadatadir =
abspath(
metadatadir);
00654 }
00655
00657
00659 void VMatrix::copySizesFrom(
VMat m) {
00660
defineSizes(m->inputsize(), m->targetsize(), m->weightsize());
00661 }
00662
00664
00666 bool VMatrix::looksTheSameAs(
VMat m) {
00667
return !(
00668 this->
width() != m->
width()
00669 || this->
length() != m->
length()
00670 || this->
inputsize() != m->inputsize()
00671 || this->
weightsize() != m->weightsize()
00672 || this->
targetsize() != m->targetsize() );
00673 }
00674
00675 string getHost()
00676 {
00677
return "TODO";
00678 }
00679
00680 int getPid()
00681 {
00682
return -999;
00683 }
00684
00685 string getUser()
00686 {
00687
return "TODO";
00688 }
00689
00690 void VMatrix::lockMetaDataDir()
const
00691
{
00692
if(!
hasMetaDataDir())
00693
PLERROR(
"In VMatrix::lockMetaDataDir(): metadatadir was not set");
00694
if(
lockf_!=0)
00695
PLERROR(
"VMatrix::lockMetaDataDir() called while already locked by this object.");
00696
if(!
pathexists(
metadatadir))
00697
force_mkdir(
metadatadir);
00698
string lockfile =
append_slash(
metadatadir)+
".lock";
00699
lockf_ = fopen(lockfile.c_str(),
"w");
00700
if(
lockf_==0)
00701 {
00702
string bywho;
00703
try{ bywho =
loadFileAsString(lockfile); }
00704
catch(...) { bywho =
"UNKNOWN (could not read .lock file)"; }
00705
00706 cerr <<
"! Waiting for .lock on directory " <<
metadatadir
00707 <<
" created by " << bywho <<
endl;
00708 }
00709
do
00710 {
00711
00712 sleep(1);
00713
lockf_ = fopen(lockfile.c_str(),
"w");
00714 }
while(
lockf_==0);
00715
00716 fprintf(
lockf_,
"host %s, pid %d, user %s",
getHost().
c_str(),
getPid(),
getUser().
c_str());
00717 fflush(
lockf_);
00718
00719 }
00720
00721 void VMatrix::unlockMetaDataDir()
const
00722
{
00723
if(
lockf_==0)
00724
PLERROR(
"In VMatrix::unlockMetaDataDir() was called while no lock is held by this object");
00725 fclose(
lockf_);
00726
string lockfile =
append_slash(
metadatadir)+
".lock";
00727
rm(lockfile);
00728
lockf_ = 0;
00729 }
00730
00731 string VMatrix::getMetaDataDir()
const
00732
{
00733
00734
00735
return metadatadir;
00736 }
00737
00739
00741 void VMatrix::loadAllStringMappings()
00742 {
00743
00744
if(
classname()==
"StrTableVMatrix")
00745
return;
00746
for(
int i=0;i<
width();i++)
00747
loadStringMapping(i);
00748 }
00749
00751
00753 void VMatrix::loadStringMapping(
int col)
00754
00755 {
00756
if(!
hasMetaDataDir())
00757
return;
00758
string fname =
getSFIFFilename(col,
".smap");
00759
init_map_sr();
00760
force_mkdir(
getMetaDataDir()+
"FieldInfo"+
slash);
00761
00762
if(!
isfile(
fname))
00763 {
00764
00765
00766
00767
00768
return;
00769 }
00770
00771
deleteStringMapping(col);
00772
00773
00774
PIFStream f(
fname);
00775
if(!f)
00776
PLERROR(
string(
"File "+
fname+
" cannot be opened.").
c_str());
00777
00778
00779
00780
00781
00782
00783
while(f)
00784 {
00785
string s;
00786
real val;
00787 f >> s >>
val;
00788
if(f)
00789 {
00790
map_sr[col][s]=
val;
00791
map_rs[col][
val]=s;
00792 }
00793 }
00794 }
00795
00797
00799 void VMatrix::copyStringMappingsFrom(
VMat source) {
00800
if (
width_ != source->
width()) {
00801
PLERROR(
"In VMatrix::copyStringMappingsFrom - The source VMatrix doesn't have the same width");
00802 }
00803
map_rs.
resize(
width_);
00804
map_sr.
resize(
width_);
00805
for (
int i = 0; i <
width_; i++) {
00806
setStringMapping(i, source->getStringToRealMapping(i));
00807 }
00808 }
00809
00811 TVec<StatsCollector> VMatrix::getStats()
const
00812
{
00813
if(!
field_stats)
00814 {
00815
string statsfile =
getMetaDataDir() +
slash+
"stats.psave";
00816
if (
isfile(statsfile) &&
getMtime()<
mtime(statsfile))
00817 {
00818
if(
getMtime()==0)
00819
PLWARNING(
"Warning: using a saved stat file (%s) but mtime is 0.\n(cannot be sure file is up to date)",statsfile.c_str());
00820
PLearn::load(statsfile,
field_stats);
00821 }
00822
else
00823 {
00824
VMat vm = const_cast<VMatrix*>(
this);
00825
field_stats =
PLearn::computeStats(vm, 2000);
00826
PLearn::save(statsfile,
field_stats);
00827 }
00828 }
00829
return field_stats;
00830 }
00831
00832 TVec<RealMapping> VMatrix::getRanges()
00833 {
00834
TVec<RealMapping> ranges;
00835
string rangefile =
getMetaDataDir() +
slash+
"ranges.psave";
00836
if(
isfile(rangefile))
00837
PLearn::load(rangefile, ranges);
00838
else
00839 {
00840 ranges =
computeRanges(
getStats(),std::max(10,
length()/200),std::max(10,
length()/100) );
00841
PLearn::save(rangefile, ranges);
00842 }
00843
return ranges;
00844 }
00845
00846
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869 real VMatrix::get(
int i,
int j)
const
00870
{
00871
PLERROR(
"get(i,j) method not implemented for this VMat (name=%s), please implement.",
classname().
c_str());
00872
return 0.0;
00873 }
00874
00875 void VMatrix::put(
int i,
int j,
real value)
00876 {
00877
PLERROR(
"put(i,j,value) method not implemented for this VMat, please implement.");
00878 }
00879
00880 void VMatrix::getColumn(
int j,
Vec v)
const
00881
{
00882
#ifdef BOUNDCHECK
00883
if(v.
length() !=
length())
00884
PLERROR(
"In VMatrix::getColumn v must have the same length as the VMatrix");
00885
#endif
00886
for(
int i=0; i<v.
length(); i++)
00887 v[i] =
get(i,j);
00888 }
00889
00891
00893 void VMatrix::getSubRow(
int i,
int j,
Vec v)
const
00894
{
00895
for(
int k=0;
k<v.
length();
k++)
00896 v[
k] =
get(i,j+
k);
00897 }
00898
00899 void VMatrix::putSubRow(
int i,
int j,
Vec v)
00900 {
00901
for(
int k=0;
k<v.
length();
k++)
00902
put(i, j+
k, v[
k]);
00903 }
00904
00906
00908 void VMatrix::getRow(
int i,
Vec v)
const
00909
{
00910
#ifdef BOUNDCHECK
00911
if(v.
length() !=
width())
00912
PLERROR(
"In VMatrix::getRow(i,v) length of v and width of VMatrix differ");
00913
#endif
00914
getSubRow(i,0,v);
00915 }
00916
00917 void VMatrix::putRow(
int i,
Vec v)
00918 {
00919
#ifdef BOUNDCHECK
00920
if(v.
length() !=
width())
00921
PLERROR(
"In VMatrix::putRow(i,v) length of v and width of VMatrix differ");
00922
#endif
00923
putSubRow(i,0,v);
00924 }
00925
00926 void VMatrix::fill(
real value)
00927 {
00928
Vec v(
width(), value);
00929
for (
int i=0; i<
length(); i++)
putRow(i,v);
00930 }
00931
00932 void VMatrix::appendRow(
Vec v)
00933 {
00934
PLERROR(
"This method (appendRow) not implemented by VMatrix subclass!");
00935 }
00936
00937 void VMatrix::flush()
00938 {}
00939
00940 void VMatrix::putOrAppendRow(
int i,
Vec v)
00941 {
00942
if(i==
length())
00943
appendRow(v);
00944
else if(i<
length())
00945
putRow(i,v);
00946
else
00947
PLERROR(
"In putOrAppendRow, index %d out of range",i);
00948 }
00949
00950 void VMatrix::forcePutRow(
int i,
Vec v)
00951 {
00952
if(i<
length())
00953
putRow(i,v);
00954
else
00955 {
00956
Vec emptyrow(
width());
00957 emptyrow.
clear();
00958
while(
length()<i)
00959
appendRow(emptyrow);
00960
appendRow(v);
00961 }
00962 }
00963
00964 void VMatrix::getMat(
int i,
int j,
Mat m)
const
00965
{
00966
#ifdef BOUNDCHECK
00967
if(i<0 || j<0 || i+m.
length()>
length() || j+m.
width()>
width())
00968
PLERROR(
"In VMatrix::getMat(i,j,m) OUT OF BOUNDS");
00969
#endif
00970
for(
int ii=0; ii<m.
length(); ii++)
00971 {
00972
Vec v = m(ii);
00973
getSubRow(i+ii, j, v);
00974 }
00975 }
00976
00977 void VMatrix::putMat(
int i,
int j,
Mat m)
00978 {
00979
#ifdef BOUNDCHECK
00980
if(i<0 || j<0 || i+m.
length()>
length() || j+m.
width()>
width())
00981
PLERROR(
"In VMatrix::putMat(i,j,m) OUT OF BOUNDS");
00982
#endif
00983
for(
int ii=0; ii<m.
length(); ii++)
00984 {
00985
Vec v = m(ii);
00986
putSubRow(i+ii, j, v);
00987 }
00988 }
00989
00990 void VMatrix::compacify() {}
00991
00992
00993 Mat VMatrix::toMat()
const
00994
{
00995
Mat m(
length(),
width());
00996
getMat(0,0,m);
00997
return m;
00998 }
00999
01000 VMat VMatrix::subMat(
int i,
int j,
int l,
int w)
01001 {
return new SubVMatrix(
this,i,j,l,w); }
01002
01004
01006 real VMatrix::dot(
int i1,
int i2,
int inputsize)
const
01007
{
01008
dotrow_1.
resize(inputsize);
01009
dotrow_2.
resize(inputsize);
01010
getSubRow(i1, 0,
dotrow_1);
01011
getSubRow(i2, 0,
dotrow_2);
01012
return PLearn::dot(
dotrow_1,
dotrow_2);
01013 }
01014
01015 real VMatrix::dot(
int i,
const Vec& v)
const
01016
{
01017
dotrow_1.
resize(v.
length());
01018
getSubRow(i, 0,
dotrow_1);
01019
return PLearn::dot(
dotrow_1, v);
01020 }
01021
01023
01025 void VMatrix::getRow(
int i,
VarArray& inputs)
const
01026
{
01027
Vec v(
width());
01028
getRow(i,v);
01029 inputs << v;
01030 }
01031
01032
01034
01036 bool VMatrix::find(
const Vec& input,
real tolerance,
int* i)
const {
01037
get_row.
resize(
inputsize());
01038
#ifdef BOUNDCHECK
01039
if (input.
length() !=
inputsize())
01040
PLERROR(
"In VMatrix::find - The given vector must be the same size as inputsize");
01041
#endif
01042
for (
int j = 0; j <
length(); j++) {
01043
getSubRow(j, 0,
get_row);
01044
if (
powdistance(input,
get_row, 2.0) < tolerance) {
01045
if (i)
01046 *i = j;
01047
return true;
01048 }
01049 }
01050
if (i)
01051 *i = -1;
01052
return false;
01053 }
01054
01055 void VMatrix::print(ostream& out)
const
01056
{
01057
Vec v(
width());
01058
for(
int i=0; i<
length(); i++)
01059 {
01060
getRow(i,v);
01061 out << v <<
endl;
01062 }
01063 }
01064
01065 void VMatrix::oldwrite(ostream& out)
const
01066
{
01067
writeHeader(out,
"VMatrix");
01068
writeField(out,
"length_",
length_);
01069
writeField(out,
"width_",
width_);
01070
01071
01072
writeFooter(out,
"VMatrix");
01073 }
01074
01075 void VMatrix::oldread(istream& in)
01076 {
01077
readHeader(in,
"VMatrix");
01078
readField(in,
"length_",
length_);
01079
readField(in,
"width_",
width_);
01080
01081
01082
readFooter(in,
"VMatrix");
01083 }
01084
01085 VMatrix::~VMatrix()
01086 {}
01087
01088 void VMatrix::save(
const string& filename)
const
01089
{
savePMAT(filename); }
01090
01091 void VMatrix::savePMAT(
const string& pmatfile)
const
01092
{
01093
if (
width() == -1)
01094
PLERROR(
"In VMat::save Saving in a pmat file is only possible for constant width Distributions (where width()!=-1)");
01095
01096
int nsamples =
length();
01097
01098
FileVMatrix m(pmatfile,nsamples,
width());
01099 m.
setFieldInfos(
getFieldInfos());
01100
Vec tmpvec(
width());
01101
01102
ProgressBar pb(cout,
"Saving to pmat", nsamples);
01103
01104
for(
int i=0; i<nsamples; i++)
01105 {
01106
getRow(i,tmpvec);
01107 m.
putRow(i,tmpvec);
01108 pb(i);
01109 }
01110
01111
01112
if (
fieldinfos.
size() > 0) m.
saveFieldInfos();
01113 }
01114
01115 void VMatrix::saveDMAT(
const string& dmatdir)
const
01116
{
01117
force_rmdir(dmatdir);
01118
DiskVMatrix vm(dmatdir,
width());
01119 vm.
setFieldInfos(
getFieldInfos());
01120
Vec v(
width());
01121
01122
ProgressBar pb(cout,
"Saving to dmat",
length());
01123
01124
for(
int i=0;i<
length();i++)
01125 {
01126
getRow(i,v);
01127 vm.
appendRow(v);
01128 pb(i);
01129
01130 }
01131 }
01132
01134
01136 void VMatrix::saveAMAT(
const string& amatfile,
bool verbose,
bool no_header)
const
01137
{
01138
int l =
length();
01139
int w =
width();
01140
01141 ofstream out(amatfile.c_str());
01142
if (!out)
01143
PLERROR(
"In saveAscii could not open file %s for writing",amatfile.c_str());
01144
01145
if (!no_header) {
01146 out <<
"#size: "<< l <<
' ' << w <<
endl;
01147 }
01148 out.precision(15);
01149
if(w>0 && !no_header)
01150 {
01151 out <<
"#: ";
01152
for(
int k=0;
k<w;
k++)
01153
01154 out <<
space_to_underscore(
fieldName(
k)) <<
' ';
01155 out <<
"\n";
01156 }
01157
01158
Vec v(w);
01159
01160
ProgressBar* pb = 0;
01161
if (verbose)
01162 pb =
new ProgressBar(cout,
"Saving to amat",
length());
01163
01164
for(
int i=0;i<l;i++)
01165 {
01166
getRow(i,v);
01167
for(
int j=0; j<w; j++)
01168 out << v[j] <<
' ';
01169 out <<
"\n";
01170
if (verbose)
01171 pb->
update(i);
01172 }
01173
if (verbose)
01174
delete pb;
01175 }
01176
01177
01178
01179
01180 void VMatrix::evaluateKernel(
Ker ker,
int v1_startcol,
int v1_ncols,
01181
const Vec& v2,
const Vec& result,
int startrow,
int nrows)
const
01182
{
01183
int endrow = (nrows>0) ?startrow+nrows :
length_;
01184
if(result.
length() != endrow-startrow)
01185
PLERROR(
"In VMatrix::evaluateKernel length of result vector does not match the row range");
01186
01187
Vec v1(v1_ncols);
01188
for(
int i=startrow; i<endrow; i++)
01189 {
01190
getSubRow(i,v1_startcol,v1);
01191 result[i] = ker(v1,v2);
01192 }
01193 }
01194
01195
01196 real VMatrix::evaluateKernelSum(
Ker ker,
int v1_startcol,
int v1_ncols,
01197
const Vec& v2,
int startrow,
int nrows,
int ignore_this_row)
const
01198
{
01199
int endrow = (nrows>0) ?startrow+nrows :
length_;
01200
double result = 0.;
01201
Vec v1(v1_ncols);
01202
for(
int i=startrow; i<endrow; i++)
01203
if(i!=ignore_this_row)
01204 {
01205
getSubRow(i,v1_startcol,v1);
01206 result += ker(v1,v2);
01207 }
01208
return (
real)result;
01209 }
01210
01211
01212
01213 real VMatrix::evaluateKernelWeightedTargetSum(
Ker ker,
int v1_startcol,
int v1_ncols,
const Vec& v2,
01214
int t_startcol,
int t_ncols,
Vec& targetsum,
int startrow,
int nrows,
int ignore_this_row)
const
01215
{
01216
int endrow = (nrows>0) ?startrow+nrows :
length_;
01217 targetsum.
clear();
01218
double result = 0.;
01219
Vec v1(v1_ncols);
01220
Vec target(t_ncols);
01221
for(
int i=startrow; i<endrow; i++)
01222
if(i!=ignore_this_row)
01223 {
01224
getSubRow(i,v1_startcol,v1);
01225
getSubRow(i,t_startcol,target);
01226
real kerval = ker(v1,v2);
01227 result += kerval;
01228
multiplyAcc(targetsum, target,kerval);
01229 }
01230
return (
real)result;
01231 }
01232
01233 TVec< pair<real,int> > VMatrix::evaluateKernelTopN(
int N,
Ker ker,
int v1_startcol,
int v1_ncols,
01234
const Vec& v2,
int startrow,
int nrows,
int ignore_this_row)
const
01235
{
01236
int endrow = (nrows>0) ?startrow+nrows :
length_;
01237
TopNI<real> extrema(N);
01238
Vec v1(v1_ncols);
01239
for(
int i=startrow; i<endrow; i++)
01240
if(i!=ignore_this_row)
01241 {
01242
getSubRow(i,v1_startcol,v1);
01243
real kerval = ker(v1,v2);
01244 extrema.
update(kerval,i);
01245 }
01246 extrema.
sort();
01247
return extrema.
getTopN();
01248 }
01249
01250 TVec< pair<real,int> > VMatrix::evaluateKernelBottomN(
int N,
Ker ker,
int v1_startcol,
int v1_ncols,
01251
const Vec& v2,
int startrow,
int nrows,
int ignore_this_row)
const
01252
{
01253
int endrow = (nrows>0) ?startrow+nrows :
length_;
01254
BottomNI<real> extrema(N);
01255
Vec v1(v1_ncols);
01256
for(
int i=startrow; i<endrow; i++)
01257
if(i!=ignore_this_row)
01258 {
01259
getSubRow(i,v1_startcol,v1);
01260
real kerval = ker(v1,v2);
01261 extrema.
update(kerval,i);
01262 }
01263 extrema.
sort();
01264
return extrema.
getBottomN();
01265 }
01266
01267
01268
01269
01270 void VMatrix::accumulateXtY(
int X_startcol,
int X_ncols,
int Y_startcol,
int Y_ncols,
01271
Mat& result,
int startrow,
int nrows,
int ignore_this_row)
const
01272
{
01273
int endrow = (nrows>0) ?startrow+nrows :
length_;
01274
Vec x(X_ncols);
01275
Vec y(Y_ncols);
01276
for(
int i=startrow; i<endrow; i++)
01277
if(i!=ignore_this_row)
01278 {
01279
getSubRow(i,X_startcol,
x);
01280
getSubRow(i,Y_startcol,y);
01281
externalProductAcc(result,
x,y);
01282 }
01283 }
01284
01285
01286
01287 void VMatrix::accumulateXtX(
int X_startcol,
int X_ncols,
01288
Mat& result,
int startrow,
int nrows,
int ignore_this_row)
const
01289
{
01290
Vec x(X_ncols);
01291
int endrow = (nrows>0) ?startrow+nrows :
length_;
01292
for(
int i=startrow; i<endrow; i++)
01293
if(i!=ignore_this_row)
01294 {
01295
getSubRow(i,X_startcol,
x);
01296
externalProductAcc(result,
x,
x);
01297 }
01298 }
01299
01300 void VMatrix::evaluateSumOfFprop(
Func f,
Vec& output_result,
int nsamples)
01301 {
01302
01303
01304
01305
static int curpos = 0;
01306
if (nsamples == -1) nsamples =
length();
01307
Vec input_value(
width());
01308
Vec output_value(output_result.
length());
01309
01310 f->recomputeParents();
01311 output_result.
clear();
01312
01313
for(
int i=0; i<nsamples; i++)
01314 {
01315
getRow(curpos++, input_value);
01316 f->fprop(input_value, output_value);
01317 output_result += output_value;
01318
if(curpos ==
length()) curpos = 0;
01319 }
01320 }
01321
01322 void VMatrix::evaluateSumOfFbprop(
Func f,
Vec& output_result,
Vec& output_gradient,
int nsamples)
01323 {
01324
01325
01326
01327
static int curpos = 0;
01328
if (nsamples == -1) nsamples =
length();
01329
Vec input_value(
width());
01330
Vec input_gradient(
width());
01331
Vec output_value(output_result.
length());
01332
01333 f->recomputeParents();
01334 output_result.
clear();
01335
01336
for(
int i=0; i<nsamples; i++)
01337 {
01338
getRow(curpos++, input_value);
01339 f->fbprop(input_value, output_value, input_gradient, output_gradient);
01340
01341 output_result += output_value;
01342
if(curpos ==
length()) curpos = 0;
01343 }
01344 }
01345
01346
01347 }