00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
#include <errno.h>
00042
#include "DiskVMatrix.h"
00043
#include <plearn/io/pl_io.h>
00044
#include <errno.h>
00045
00046
namespace PLearn {
00047
using namespace std;
00048
00049
#ifdef WIN32
00050
#include <io.h>
00051
#define unlink _unlink
00052
#endif
00053
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087 DiskVMatrix::DiskVMatrix()
00088 : indexf(0),freshnewfile(false),
00089 old_format(false),swap_endians(false),
00090 tolerance(1e-6)
00091 {
00092 writable =
false;
00093 }
00094
00095 DiskVMatrix::DiskVMatrix(
const string& the_dirname,
bool readwrite)
00096 : indexf(0),freshnewfile(false),
00097 old_format(false),swap_endians(false),
00098 dirname(
remove_trailing_slash(the_dirname)),
00099 tolerance(1e-6)
00100 {
00101 writable = readwrite;
00102
build_();
00103 }
00104
00105 DiskVMatrix::DiskVMatrix(
const string& the_dirname,
int the_width,
bool write_double_as_float)
00106 :
RowBufferedVMatrix(0,the_width),
00107 indexf(0),
00108 freshnewfile(true),
00109 old_format(false),swap_endians(false),
00110 dirname(
remove_trailing_slash(the_dirname)),
00111 tolerance(1e-6)
00112 {
00113 writable =
true;
00114
build_();
00115 }
00116
00117 void DiskVMatrix::build()
00118 {
00119 inherited::build();
00120
build_();
00121 }
00122
00123 void DiskVMatrix::build_()
00124 {
00125
if(!
freshnewfile)
00126 {
00127
if(!
isdir(
dirname))
00128
PLERROR(
"In DiskVMatrix constructor, directory %s could not be found",
dirname.c_str());
00129 setMetaDataDir(
dirname +
".metadata");
00130 setMtime(
mtime(
append_slash(
dirname)+
"indexfile"));
00131
string omode;
00132
if(writable)
00133 omode =
"r+b";
00134
else
00135 omode =
"rb";
00136
00137
string indexfname =
dirname+
slash+
"indexfile";
00138
indexf = fopen(indexfname.c_str(), omode.c_str());
00139
if(!
indexf)
00140
PLERROR(
"In DiskVMatrix constructor, could not open file %s in specified mode", indexfname.c_str());
00141
00142
unsigned char header[4];
00143 fread(
header,1,4,
indexf);
00144
if(
header[0]==
'L' ||
header[0]==
'B')
00145 {
00146
old_format =
false;
00147
swap_endians = (
header[0]!=
byte_order());
00148 }
00149
else if(
header[0]==16)
00150 {
00151
old_format =
true;
00152
if(
byte_order()!=
'L')
00153
PLERROR(
"Old format DiskVMatrix can only be read from a little-endian machine.\n"
00154
"Convert it to a new format on a little-endian machine prior to attempt\n"
00155
"using it from a big endian machine.\n");
00156
swap_endians =
false;
00157 }
00158
else
00159 {
00160
PLERROR(
"Wrong header byte in index file %s: ascii code %d\n"
00161
"(should be 'L' or 'B' or '...')\n", indexfname.c_str(),
header[0]);
00162 }
00163
00164 fread(&length_,
sizeof(
int),1,
indexf);
00165 fread(&width_,
sizeof(
int),1,
indexf);
00166
if(
swap_endians)
00167 {
00168
endianswap(&length_);
00169
endianswap(&width_);
00170 }
00171
int k=0;
00172
string fname =
dirname+
slash+
tostring(
k)+
".data";
00173
while(
isfile(
fname))
00174 {
00175 FILE* f = fopen(
fname.c_str(), omode.c_str());
00176
if(!f)
00177
PLERROR(
"In DiskVMatrix constructor, could not open file %s in specified mode",
fname.c_str());
00178
dataf.
append(f);
00179
fname =
dirname+
slash+
tostring(++
k)+
".data";
00180 }
00181
00182 current_row_index = -1;
00183 current_row.
resize(width_);
00184 other_row_index = -1;
00185 other_row.
resize(width_);
00186
00187
00188 map_sr =
TVec<map<string,real> >(width_);
00189 map_rs =
TVec<map<real,string> >(width_);
00190
00191
getFieldInfos();
00192
if (writable)
00193 fseek(
indexf, 0, SEEK_END);
00194 }
00195
else
00196 {
00197
if(
isdir(
dirname))
00198
PLERROR(
"In DiskVMatrix constructor (with specified width), directory %s already exists",
dirname.c_str());
00199 setMetaDataDir(
dirname +
".metadata");
00200 setMtime(
mtime(
append_slash(
dirname)+
"indexfile"));
00201
00202
if(
isfile(
dirname))
00203 unlink(
dirname.c_str());
00204
if(!
force_mkdir(
dirname))
00205
PLERROR(
"In DiskVMatrix constructor (with specified width), could not create directory %s Error was: %s",
dirname.c_str(), strerror(errno));
00206
00207
string indexfname =
dirname +
slash +
"indexfile";
00208
indexf = fopen(indexfname.c_str(),
"w+b");
00209
00210
char header[4];
00211
header[0] =
byte_order();
00212
header[1] =
' ';
00213
header[2] =
' ';
00214
header[3] =
' ';
00215 fwrite(
header,1,4,
indexf);
00216 fwrite((
char*)&length_,
sizeof(
int),1,
indexf);
00217 fwrite((
char*)&width_,
sizeof(
int),1,
indexf);
00218
00219
string fname =
dirname +
slash +
"0.data";
00220 FILE* f = fopen(
fname.c_str(),
"w+b");
00221
dataf.
append(f);
00222 }
00223
freshnewfile=
false;
00224 }
00225
00226 void DiskVMatrix::declareOptions(
OptionList &ol)
00227 {
00228
declareOption(ol,
"dirname", &DiskVMatrix::dirname, OptionBase::buildoption,
"Directory name of the.dmat");
00229
declareOption(ol,
"tolerance", &DiskVMatrix::tolerance, OptionBase::buildoption,
"The absolute error tolerance for storing doubles as floats");
00230 inherited::declareOptions(ol);
00231 }
00232
00233 void DiskVMatrix::getNewRow(
int i,
const Vec& v)
const
00234
{
00235
#ifdef BOUNDCHECK
00236
if(i<0 || i>
length())
00237
PLERROR(
"In DiskVMatrix::getNewRow, bad row number %d",i);
00238
if(v.
length() !=
width())
00239
PLERROR(
"In DiskVMatrix::getNewRow, length of v (%d) does not match matrix width (%d)",v.
length(),
width());
00240
#endif
00241
00242
unsigned char filenum;
00243
unsigned int position;
00244 fseek(
indexf,3*
sizeof(
int) + i*(
sizeof(
unsigned char)+
sizeof(
unsigned int)), SEEK_SET);
00245 fread(&filenum,
sizeof(
unsigned char),1,
indexf);
00246 fread(&position,
sizeof(
unsigned int),1,
indexf);
00247
if(
swap_endians)
00248
endianswap(&position);
00249 FILE* f =
dataf[int(filenum)];
00250 fseek(f,position,SEEK_SET);
00251
if(
old_format)
00252
binread_compressed(f,v.
data(),v.
length());
00253
else
00254
new_read_compressed(f, v.
data(), v.
length(),
swap_endians);
00255 }
00256
00257 void DiskVMatrix::putRow(
int i,
Vec v)
00258 {
00259
PLERROR(
"putRow cannot in general be correctly and efficiently implemented for a DiskVMatrix.\n"
00260
"Use appendRow if you wish to write more rows.");
00261 }
00262
00263 void DiskVMatrix::appendRow(
Vec v)
00264 {
00265
if(!writable)
00266
PLERROR(
"In DiskVMatrix::appendRow cannot append row in read only mode, set readwrite parameter to true when calling the constructor");
00267
if(v.
length() !=
width())
00268
PLERROR(
"In DiskVMatrix::appendRow, length of v (%d) does not match matrix width (%d)",v.
length(),
width());
00269
00270
int filenum =
dataf.
size()-1;
00271 FILE* f =
dataf[filenum];
00272 fseek(f,0,SEEK_END);
00273
unsigned int position = (
unsigned int)ftell(f);
00274
if(position>500000000L)
00275 {
00276 fflush(f);
00277 filenum++;
00278
string filename =
dirname +
slash +
tostring(filenum) +
".data";
00279 f = fopen(filename.c_str(),
"w+b");
00280 dataf.append(f);
00281 position = 0;
00282 }
00283
if(
old_format)
00284
binwrite_compressed(f,v.
data(),v.
length());
00285
else
00286
new_write_compressed(f, v.
data(),v.
length(),
tolerance,
swap_endians);
00287
00288 fseek(
indexf,0,SEEK_END);
00289 fputc(filenum,
indexf);
00290 fwrite((
char*)&position,
sizeof(
unsigned int),1,
indexf);
00291 length_++;
00292 fseek(
indexf,
sizeof(
int),SEEK_SET);
00293
int le = length_;
00294
if(
swap_endians)
00295
endianswap(&le);
00296 fwrite(&le,
sizeof(
int),1,
indexf);
00297 }
00298
00299 void DiskVMatrix::flush()
00300 {
00301
int filenum =
dataf.
size()-1;
00302 FILE* f =
dataf[filenum];
00303 fflush(f);
00304 fflush(
indexf);
00305 }
00306
00307 DiskVMatrix::~DiskVMatrix()
00308 {
00309
for(
int i=0; i<
dataf.
size(); i++)
00310 {
00311
if(
dataf[i])
00312 fclose(
dataf[i]);
00313 }
00314
00315
if(
indexf)
00316 fclose(
indexf);
00317
00318
saveFieldInfos();
00319 }
00320
00321
PLEARN_IMPLEMENT_OBJECT(
DiskVMatrix,
"ONE LINE DESCR",
"NO HELP");
00322
00323
#ifdef WIN32
00324
#undef unlink
00325
#endif
00326
00327 }