00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00043
#ifndef VMatrix_INC
00044
#define VMatrix_INC
00045
00046
#include <cstdlib>
00047
#include <map>
00048
#include <plearn/base/PP.h>
00049
#include <plearn/math/TMat.h>
00050
#include <plearn/var/VarArray.h>
00051
#include <plearn/io/IntVecFile.h>
00052
#include <plearn/math/StatsCollector.h>
00053
#include <plearn/math/TMat_maths_impl.h>
00054
#include "VMField.h"
00055
00056
namespace PLearn {
00057
using namespace std;
00058
00059
class Ker;
00060
class VMat;
00061
class Func;
00062
00065 class VMatrix:
public Object
00066 {
00067
00068
private:
00069
00070 typedef Object inherited;
00071
friend class VMat;
00072
00073 mutable FILE*
lockf_;
00074
00076 mutable Vec get_row;
00077
00079 mutable Vec dotrow_1;
00080 mutable Vec dotrow_2;
00081
00082
protected:
00083
00084 int length_;
00085 int width_;
00086 time_t
mtime_;
00087
00088
00089
00090
00091
00092
00093 int inputsize_;
00094 int targetsize_;
00095 int weightsize_;
00096
00097
00098 bool writable;
00099
00102 string metadatadir;
00103
00104
00105 string alias_;
00106
00107
00108 mutable TVec<StatsCollector> field_stats;
00109
00110
00111 mutable TVec<map<string,real> >
map_sr;
00112 mutable TVec<map<real,string> >
map_rs;
00113
00114
private:
00115
00117
void build_();
00118
00119
public:
00120
00121 mutable Array<VMField> fieldinfos;
00122 Array<VMFieldStat> fieldstats;
00123
00124
VMatrix();
00125
00126
VMatrix(
int the_length,
int the_width);
00127
00128
00129
virtual void build();
00130
00131
static void declareOptions(
OptionList & ol);
00132
00133 void init_map_sr()
const {
if (
map_sr.
length()==0) {
map_sr.
resize(
width());
map_rs.
resize(
width()); } }
00134
00135
00136
00137
00138
00139 inline void defineSizes(
int inputsize,
int targetsize,
int weightsize=0)
00140 {
inputsize_ = inputsize,
targetsize_ = targetsize,
weightsize_ = weightsize; }
00141
00144
void copySizesFrom(
VMat m);
00145
00147
bool looksTheSameAs(
VMat m);
00148
00149 inline int inputsize()
const {
return inputsize_; }
00150 inline int targetsize()
const {
return targetsize_; }
00151 inline int weightsize()
const {
return weightsize_; }
00152 inline bool hasWeights()
const {
return weightsize_>0; }
00153
00157
virtual void getExample(
int i,
Vec& input,
Vec& target,
real& weight);
00158
00170 #define SPECIAL_FORMAT ((real)3.1e36)
00171
00172
00173
void setFieldInfos(
const Array<VMField>& finfo);
00175
bool hasFieldInfos() const;
00177 Array<VMField>& getFieldInfos() const;
00178 VMField& getFieldInfos(
int fieldindex)
const {
return getFieldInfos()[fieldindex]; }
00179
void declareField(
int fieldindex,
const string& fieldname, VMField::FieldType fieldtype=VMField::UnknownType);
00180
void declareFieldNames(
TVec<string> fnames);
00181
00184
int fieldIndex(
const string& fieldname)
const;
00185
00187
00189
int getFieldIndex(
const string& fieldname_or_num)
const;
00190
00191 string fieldName(
int fieldindex)
const {
return getFieldInfos(fieldindex).name; }
00192
TVec<string> fieldNames() const;
00193
void unduplicateFieldNames();
00194
00195 VMField::FieldType fieldType(
int fieldindex)
const {
return getFieldInfos(fieldindex).fieldtype; }
00196 VMField::FieldType
fieldType(
const string& fieldname)
const {
return fieldType(fieldIndex(fieldname)); }
00197 const VMFieldStat&
fieldStat(
int j)
const {
return fieldstats[j]; }
00198 const VMFieldStat& fieldStat(
const string& fieldname)
const {
return fieldStat(fieldIndex(fieldname)); }
00199
00200
void printFields(ostream& out)
const;
00201
void printFieldInfo(ostream& out,
int fieldnum)
const;
00202
void printFieldInfo(ostream& out,
const string& fieldname_or_num)
const;
00203
00204
string fieldheader(
int elementcharwidth=8);
00205
00206
00207
void saveFieldInfos() const;
00208
void loadFieldInfos() const;
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
void setSFIFFilename(
int col,
string ext,
string filepath="");
00221
void setSFIFFilename(
string fieldname,
string ext,
string filepath="");
00222
00223
00224
00225
00226
00227
00228
string getSFIFFilename(
int col,
string ext);
00229
string getSFIFFilename(
string fieldname,
string ext);
00230
00231
00232
bool isSFIFDirect(
int col,
string ext);
00233
bool isSFIFDirect(
string fieldname,
string ext);
00234
00235
00237
00238
00239
void saveAllStringMappings();
00240
00241
00242
void saveStringMappings(
int col,
string fname);
00243
00245
void addStringMapping(
int col,
string str,
real val);
00246
00249
real addStringMapping(
int col,
string str);
00250
00252
void removeAllStringMappings();
00253
00255
void removeColumnStringMappings(
int c);
00256
00258
void removeStringMapping(
int col,
string str);
00259
00261
void setStringMapping(
int col, const map<
string,
real>& zemap);
00262
00264
void deleteStringMapping(
int col);
00265
00267
void loadStringMapping(
int col);
00268
00270
void loadAllStringMappings();
00271
00273
void copyStringMappingsFrom(
VMat source);
00274
00277 virtual string getValString(
int col,
real val) const;
00278
00280 virtual const map<string,
real>& getStringToRealMapping(
int col) const;
00281
00283 virtual const map<
real,string>& getRealToStringMapping(
int col) const;
00284
00286 virtual real getStringVal(
int col, const string & str) const;
00287
00289 virtual string getString(
int row,
int col) const;
00290
00292
00293 virtual
void computeStats();
00294 bool hasStats()
const {
return fieldstats.
size()>0; }
00295
void saveStats(
const string& filename)
const;
00296
void loadStats(
const string& filename);
00297
00301
virtual void setMetaDataDir(
const string& the_metadatadir);
00302
00304 bool hasMetaDataDir()
const {
return metadatadir!=
""; }
00305
00307
string getMetaDataDir() const;
00308
00315
void lockMetaDataDir() const;
00316
00319
void unlockMetaDataDir() const;
00320
00323 string getAlias()
const {
return alias_; }
00324 void setAlias(
const string& the_alias) {
alias_ = the_alias; }
00325
00328
TVec<StatsCollector> getStats() const;
00329
00330 StatsCollector& getStats(
int fieldnum)
const
00331
{
return getStats()[fieldnum]; }
00332
00335
TVec<RealMapping> getRanges();
00336
00340
00341
00342
00343
virtual void save(
const string& filename)
const;
00344
00345
virtual void savePMAT(
const string& pmatfile)
const;
00346
virtual void saveDMAT(
const string& dmatdir)
const;
00350
virtual void saveAMAT(
const string& amatfile,
bool verbose=
true,
bool no_header =
false)
const;
00351
00352 inline int width()
const
00353
{
00354
#ifdef BOUNDCHECK
00355
if (!
this)
00356
PLERROR(
"VMATRIX::width() This object has pointer this=NULL");
00357
#endif
00358
return width_;
00359 }
00360 inline int length()
const
00361
{
00362
#ifdef BOUNDCHECK
00363
if (!
this)
00364
PLERROR(
"VMATRIX::length() This object has pointer this=NULL");
00365
#endif
00366
return length_;
00367 }
00368
00369 inline bool isWritable()
const {
return writable; }
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
string resolveFieldInfoLink(
string target,
string source);
00383
00389 inline time_t
getMtime()
const {
return mtime_; }
00390
00394 inline void setMtime(time_t t) {
mtime_ = t; }
00395
00397
virtual real get(
int i,
int j)
const;
00398
00400
virtual void put(
int i,
int j,
real value);
00401
00404
virtual void getSubRow(
int i,
int j,
Vec v)
const;
00405
00410
virtual void putSubRow(
int i,
int j,
Vec v);
00411
00413
virtual void appendRow(
Vec v);
00414
00416
virtual void flush();
00417
00419
void putOrAppendRow(
int i,
Vec v);
00420
00424
void forcePutRow(
int i,
Vec v);
00425
00428
virtual void getRow(
int i,
Vec v)
const;
00429
00430
virtual void putRow(
int i,
Vec v);
00431
virtual void fill(
real value);
00432
virtual void getMat(
int i,
int j,
Mat m)
const;
00433
virtual void putMat(
int i,
int j,
Mat m);
00434
00436
virtual void getColumn(
int i,
Vec v)
const;
00437
00443
virtual bool find(
const Vec& input,
real tolerance,
int* i = 0)
const;
00444
00453
virtual Mat toMat() const;
00454
00457 virtual
void compacify();
00458
00460 virtual
void reset_dimensions() {}
00461
00466
virtual VMat subMat(
int i,
int j,
int l,
int w);
00467
00474
virtual real dot(
int i1,
int i2,
int inputsize)
const;
00475
00476 inline real dot(
int i1,
int i2)
const {
return dot(i1,i2,
width()); }
00477
00479
virtual real dot(
int i,
const Vec& v)
const;
00480
00481 operator Mat()
const {
return toMat(); }
00482
00485
virtual void getRow(
int i,
VarArray& inputs)
const;
00486
00487
void print(ostream& out)
const;
00488
virtual void oldwrite(ostream& out)
const;
00489
virtual void oldread(istream& in);
00490
00491
PLEARN_DECLARE_ABSTRACT_OBJECT(
VMatrix);
00492
void makeDeepCopyFromShallowCopy(map<const void*, void*>& copies);
00493
00494
00505
virtual void evaluateKernel(
Ker ker,
int v1_startcol,
int v1_ncols,
00506
const Vec& v2,
const Vec& result,
int startrow=0,
int nrows=-1)
const;
00507
00509
virtual real evaluateKernelSum(
Ker ker,
int v1_startcol,
int v1_ncols,
00510
const Vec& v2,
int startrow=0,
int nrows=-1,
int ignore_this_row=-1)
const;
00511
00514
virtual real evaluateKernelWeightedTargetSum(
Ker ker,
int v1_startcol,
int v1_ncols,
const Vec& v2,
00515
int t_startcol,
int t_ncols,
Vec& targetsum,
int startrow=0,
int nrows=-1,
int ignore_this_row=-1)
const;
00516
00517
00522
virtual TVec< pair<real,int> > evaluateKernelTopN(
int N,
Ker ker,
int v1_startcol,
int v1_ncols,
00523
const Vec& v2,
int startrow=0,
int nrows=-1,
int ignore_this_row=-1)
const;
00524
00527
virtual TVec< pair<real,int> > evaluateKernelBottomN(
int N,
Ker ker,
int v1_startcol,
int v1_ncols,
00528
const Vec& v2,
int startrow=0,
int nrows=-1,
int ignore_this_row=-1)
const;
00529
00530
00535
virtual void accumulateXtY(
int X_startcol,
int X_ncols,
int Y_startcol,
int Y_ncols,
00536
Mat& result,
int startrow=0,
int nrows=-1,
int ignore_this_row=-1)
const;
00537
00538
00543
virtual void accumulateXtX(
int X_startcol,
int X_ncols,
00544
Mat& result,
int startrow=0,
int nrows=-1,
int ignore_this_row=-1)
const;
00545
00547
virtual void evaluateSumOfFprop(
Func f,
Vec& output_result,
int nsamples=-1);
00548
virtual void evaluateSumOfFbprop(
Func f,
Vec& output_result,
Vec& output_gradient,
int nsamples=-1);
00549
00550
virtual ~VMatrix();
00551 };
00552
00553
DECLARE_OBJECT_PTR(VMatrix);
00554
00555 }
00556
00557
#endif
00558