00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
#include "DiskVMatrix.h"
00042
#include "VecCompressor.h"
00043
00044
namespace PLearn <%
00045 using namespace std;
00046
00047
00048
00051 DiskVMatrix::DiskVMatrix(
const string& the_dirname,
bool readwrite)
00052 : readwritemode(readwrite),freshnewfile(false),
00053 dirname(
remove_trailing_slash(the_dirname))
00054 {
00055 build_();
00056 }
00057
00058 DiskVMatrix::DiskVMatrix(
const string& the_dirname,
int the_width,
bool write_double_as_float)
00059 : RowBufferedVMatrix(0,the_width),readwritemode(true),
00060 freshnewfile(true),dirname(
remove_trailing_slash(the_dirname))
00061 {
00062 build_();
00063 }
00064
00065
void DiskVMatrix::writeRow(ostream& out,
const Vec& v)
00066 { VecCompressor::writeCompressedVec(out,v); }
00067
00068
void DiskVMatrix::readRow(istream& in,
const Vec& v)
00069 { VecCompressor::readCompressedVec(in, v); }
00070
00071
void DiskVMatrix::build()
00072 {
00073 inherited::build();
00074 build_();
00075 }
00076
00077
void DiskVMatrix::build_()
00078 {
00079
if(!freshnewfile)
00080 {
00081
if(!
isdir(dirname))
00082
PLERROR(
"In DiskVMatrix constructor, directory %s could not be found",dirname.c_str());
00083 setMetaDataDir(dirname +
".metadata");
00084 setMtime(
mtime(
append_slash(dirname)+
"indexfile"));
00085 ios::openmode omode;
00086
if(readwritemode)
00087 omode = ios::in | ios::out | ios::binary;
00088
else
00089 omode = ios::in | ios::binary;
00090
00091
string indexfname = dirname+
"/indexfile";
00092 indexf =
new fstream();
00093 indexf->open(indexfname.c_str(), omode);
00094
if(!*indexf)
00095 PLERROR(
"In DiskVMatrix constructor, could not open file %s in specified mode", indexfname.c_str());
00096
00097 int header;
00098 indexf->read((
char*)&header,
sizeof(
int));
00099 indexf->read((
char*)&length_,
sizeof(
int));
00100 indexf->read((
char*)&width_,
sizeof(
int));
00101
00102 int k=0;
00103 string fname = dirname+
"/"+
tostring(k)+
".data";
00104 while(
isfile(fname))
00105 {
00106 fstream* f =
new fstream();
00107 f->open(
fname.c_str(), omode);
00108
if(!(*f))
00109
PLERROR(
"In DiskVMatrix constructor, could not open file %s in specified mode",
fname.c_str());
00110 dataf.append(f);
00111
fname = dirname+
"/"+
tostring(++
k)+
".data";
00112 }
00113
00114 current_row_index = -1;
00115 current_row.resize(width_);
00116 other_row_index = -1;
00117 other_row.resize(width_);
00118
00119
00120 map_sr = TVec<map<string,real> >(width_);
00121 map_rs = TVec<map<real,string> >(width_);
00122
00123
getFieldInfos();
00124 }
00125
else
00126 {
00127
if(
isdir(dirname))
00128
PLERROR(
"In DiskVMatrix constructor (with specified width), directory %s already exists",dirname.c_str());
00129 setMetaDataDir(dirname +
".metadata");
00130 setMtime(
mtime(
append_slash(dirname)+
"indexfile"));
00131
00132
00133
if(
isfile(dirname))
00134 unlink(dirname.c_str());
00135
if(!
force_mkdir(dirname))
00136
PLERROR(
"In DiskVMatrix constructor (with specified width), could not create directory %s Error was: %s",dirname.c_str(), strerror(errno));
00137
00138
string indexfname = dirname +
"/indexfile";
00139 indexf =
new fstream();
00140 indexf->open(indexfname.c_str(),ios::in | ios::out | ios::trunc | ios::binary);
00141
00142
int header = 123408;
00143 indexf->write((
char*)&header,
sizeof(
int));
00144 indexf->write((
char*)&length_,
sizeof(
int));
00145 indexf->write((
char*)&width_,
sizeof(
int));
00146
00147
string fname = dirname +
"/0.data";
00148
00149 fstream* f =
new fstream();
00150 f->open(
fname.c_str(), ios::in | ios::out | ios::trunc | ios::binary);
00151 dataf.append(f);
00152 }
00153 freshnewfile=
false;
00154 }
00155
00156
void DiskVMatrix::declareOptions(OptionList &ol)
00157 {
00158
declareOption(ol,
"dirname", &DiskVMatrix::dirname, OptionBase::buildoption,
"Directory name of the.dmat");
00159 inherited::declareOptions(ol);
00160 }
00161
00162
void DiskVMatrix::getRow(
int i, Vec v)
const
00163
{
00164
#ifdef BOUNDCHECK
00165
if(i<0 || i>length())
00166
PLERROR(
"In DiskVMatrix::getRow, bad row number %d",i);
00167
if(v.length() != width())
00168
PLERROR(
"In DiskVMatrix::getRow, length of v (%d) does not match matrix width (%d)",v.length(),width());
00169
#endif
00170
00171
unsigned char filenum;
00172
unsigned int position;
00173 indexf->seekg(3*
sizeof(
int) + i*(
sizeof(
unsigned char)+
sizeof(
unsigned int)));
00174 indexf->get((
char&)filenum);
00175 indexf->read((
char*)&position,
sizeof(
unsigned int));
00176 fstream* f = dataf[int(filenum)];
00177 f->seekg(position);
00178
binread_compressed(*f,v.data(),v.length());
00179 }
00180
00181
void DiskVMatrix::putRow(
int i, Vec v)
00182 {
00183
#ifdef BOUNDCHECK
00184
if(i<0 || i>length())
00185
PLERROR(
"In DiskVMatrix::putRow, bad row number %d",i);
00186
if(v.length() != width())
00187
PLERROR(
"In DiskVMatrix::putRow, length of v (%d) does not match matrix width (%d)",v.length(),width());
00188
#endif
00189
00190
unsigned char filenum;
00191
unsigned int position;
00192 indexf->seekg(3*
sizeof(
int) + i*(
sizeof(
unsigned char)+
sizeof(
unsigned int)));
00193 indexf->get((
char&)filenum);
00194 indexf->read((
char*)&position,
sizeof(
unsigned int));
00195 fstream* f = dataf[int(filenum)];
00196 f->seekp(position);
00197
binwrite_compressed(*f,v.data(), v.length());
00198 }
00199
00200
void DiskVMatrix::appendRow(Vec v)
00201 {
00202
if(!readwritemode)
00203
PLERROR(
"In DiskVMatrix::appendRow cannot append row in read only mode, set readwrite parameter to true when calling the constructor");
00204
if(v.length() != width())
00205
PLERROR(
"In DiskVMatrix::appendRow, length of v (%d) does not match matrix width (%d)",v.length(),width());
00206
00207
int filenum = dataf.size()-1;
00208 fstream* f = dataf[filenum];
00209 f->seekp(0,ios::end);
00210
unsigned int position = f->tellp();
00211
if(position>500000000L)
00212 {
00213 filenum++;
00214
string filename = dirname +
"/" +
tostring(filenum) +
".data";
00215 f =
new fstream();
00216 f->open(filename.c_str(), ios::in | ios::out | ios::trunc | ios::binary);
00217 dataf.append(f);
00218 position = 0;
00219 }
00220
binwrite_compressed(*f,v.data(),v.length());
00221 indexf->seekp(0,ios::end);
00222 indexf->put((
unsigned char)filenum);
00223 indexf->write((
char*)&position,
sizeof(
unsigned int));
00224 length_++;
00225 indexf->seekp(
sizeof(
int),ios::beg);
00226 indexf->write((
char*)&length_,
sizeof(
int));
00227
00228 }
00229
00230
void DiskVMatrix::flush()
00231 {
00232
int filenum = dataf.size()-1;
00233 fstream* f = dataf[filenum];
00234 f->flush();
00235 indexf->flush();
00236 }
00237
00238 DiskVMatrix::~DiskVMatrix()
00239 {
00240
for(
int i=0; i<dataf.size(); i++)
00241
delete dataf[i];
00242
delete indexf;
00243 saveFieldInfos();
00244 }
00245
00246
PLEARN_IMPLEMENT_OBJECT(DiskVMatrix,
"ONE LINE DESCR",
"NO HELP");
00247
00248
00249 %>