00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
#include "FileVMatrix.h"
00042 
#include <plearn/io/fileutils.h>
00043 
00044 
namespace PLearn {
00045 
using namespace std;
00046 
00047 
00050 
PLEARN_IMPLEMENT_OBJECT(FileVMatrix, 
"ONE LINE DESCR", 
"NO HELP");
00051 
00053 
00055 FileVMatrix::FileVMatrix()
00056   :filename_(""), f(0), build_new_file(false)
00057 {
00058   writable=
true;
00059 }
00060 
00061 FileVMatrix::FileVMatrix(
const string& filename, 
bool writable_)
00062   :filename_(
abspath(filename)), f(0), build_new_file(!
isfile(filename))
00063 {
00064   writable = writable_;
00065   
build_();
00066 }
00067 
00068 static int strlen(
char* s) {
00069   
int n=0;
00070   
while (s[n]!=0) 
00071     n++;
00072   
return n;
00073 }
00074 
00075 FileVMatrix::FileVMatrix(
const string& filename, 
int the_length, 
int the_width)
00076 : 
inherited(the_length, the_width), filename_(
abspath(filename)), f(0),
00077   build_new_file(true)
00078 {
00079   writable = 
true;
00080   
build_();
00081 }
00082 
00083 FileVMatrix::FileVMatrix(
const string& filename, 
int the_length, 
const TVec<string>& fieldnames)
00084 : 
inherited(the_length, fieldnames.length()), filename_(
abspath(filename)), f(0),
00085   build_new_file(true)
00086 {
00087   writable = 
true;
00088   
build_();
00089   declareFieldNames(fieldnames);
00090   
saveFieldInfos();
00091 }
00092 
00094 
00096 void FileVMatrix::build()
00097 {
00098   inherited::build();
00099   
build_();
00100 }
00101 
00103 
00105 void FileVMatrix::build_()
00106 {
00107   
00108   
00109   
00110   
00111 
00112   
00113   
if (
f) {
00114     fclose(
f);
00115   }
00116 
00117   
char header[
DATAFILE_HEADERLENGTH];
00118   
char matorvec[20];
00119   
char datatype[20];
00120   
char endiantype[20];
00121 
00122   
if (
build_new_file || !
isfile(
filename_))
00123     
force_mkdir_for_file(
filename_);
00124 
00125   setMetaDataDir(
filename_ + 
".metadata"); 
00126   setMtime(
mtime(
filename_));
00127 
00128   
if (
build_new_file || !
isfile(
filename_))
00129   {
00130     
if (!writable) {
00131       
PLERROR(
"In FileVMatrix::build_ - You asked to create a new file, but 'writable' is set to 0 !");
00132     }
00133     
f = fopen(
filename_.c_str(),
"w+b");
00134     
if (!
f)
00135       
PLERROR(
"In FileVMatrix constructor, could not open file %s",
filename_.c_str());
00136 
00137 
#ifdef USEFLOAT
00138 
    file_is_float = 
true;
00139 
#endif
00140 
#ifdef USEDOUBLE
00141 
    file_is_float = 
false;
00142 
#endif 
00143 
#ifdef LITTLEENDIAN
00144 
    file_is_bigendian = 
false; 
00145 
#endif
00146 
#ifdef BIGENDIAN
00147 
    file_is_bigendian = 
true; 
00148 
#endif
00149 
00150     
updateHeader();
00151     
00152     
if(length_ > 0 && width_ > 0) 
00153     {             
00154       
if( fseek(
f, 
DATAFILE_HEADERLENGTH+length_*width_*
sizeof(
real)-1, SEEK_SET) <0 )
00155       {
00156         perror(
"");
00157         
PLERROR(
"In FileVMatrix::build_ - Could not fseek to last byte");
00158       }
00159       fputc(
'\0',
f);
00160     }
00161   }
00162   
else
00163   {
00164     
if (writable)
00165       
f = fopen(
filename_.c_str(), 
"r+b");
00166     
else
00167       
f = fopen(
filename_.c_str(), 
"rb");
00168 
00169     
if (! 
f)
00170       
PLERROR(
"FileVMatrix::build: could not open file %s", 
filename_.c_str());
00171     
00172     fread(
header,
DATAFILE_HEADERLENGTH,1,
f);
00173     
if(
header[
DATAFILE_HEADERLENGTH-1]!=
'\n')
00174       
PLERROR(
"In FileVMatrix constructor, wrong header for PLearn binary matrix format. Please use checkheader (in PLearn/Scripts) to check the file.(0)");
00175     
int file_length, file_width;
00176     
bool need_update_header = 
false;
00177     sscanf(
header, 
"%s%d%d%s%s", matorvec, &file_length, &file_width, datatype, endiantype);
00178     
if (file_length == -1 && this->length_ >= 0 && writable) {
00179       
00180       
00181       
00182       need_update_header = 
true;
00183     } 
else if (file_length >= 0 && this->length_ >= 0 && file_length != this->length_) {
00184       
PLERROR(
"In FileVMatrix::build_ - Lengths of the VMatrix and of the file loaded differ");
00185     } 
else {
00186       this->length_ = file_length;
00187     }
00188 
00189     
if (file_width == -1 && this->width_ >= 0 && writable) {
00190       
00191       need_update_header = 
true;
00192     } 
else if (file_width >= 0 && this->width_ >= 0 && file_width != this->width_) {
00193       
PLERROR(
"In FileVMatrix::build_ - Widths of the VMatrix and of the file loaded differ");
00194     } 
else {
00195       this->width_ = file_width;
00196     }
00197 
00198     
if (need_update_header) {
00199       
updateHeader();
00200     }
00201 
00202     
if (strcmp(matorvec,
"MATRIX")!=0)
00203       
PLERROR(
"In FileVMatrix constructor, wrong header for PLearn binary matrix format. Please use checkheader (in PLearn/Scripts) to check the file.(1)");
00204 
00205     
if (strcmp(endiantype,
"LITTLE_ENDIAN")==0)
00206       
file_is_bigendian = 
false;
00207     
else if (strcmp(endiantype,
"BIG_ENDIAN")==0)
00208       
file_is_bigendian = 
true;
00209     
else
00210       
PLERROR(
"In FileVMatrix constructor, wrong header for PLearn binary matrix format. Please use checkheader (in PLearn/Scripts) to check the file.(2)");
00211 
00212     
if (strcmp(datatype,
"FLOAT")==0)
00213       
file_is_float = 
true;
00214     
else if (strcmp(datatype,
"DOUBLE")==0)
00215       
file_is_float = 
false;
00216     
else
00217       
PLERROR(
"In FileVMatrix constructor, wrong header for PLearn binary matrix format. Please use checkheader (in PLearn/Scripts) to check the file.(3)");
00218 
00219     
00220     
if (width_ >= 0) {
00221       map_sr = 
TVec<map<string,real> >(width_);
00222       map_rs = 
TVec<map<real,string> >(width_);
00223     }
00224   }
00225 
00226   
if (width_ >= 0) {
00227     
getFieldInfos();
00228   }
00229 }
00230 
00232 
00234 void FileVMatrix::declareOptions(
OptionList & ol)
00235 {
00236   
declareOption(ol, 
"filename", &FileVMatrix::filename_, OptionBase::buildoption, 
"Filename of the matrix");
00237 
00238   inherited::declareOptions(ol);
00239 }
00240 
00242 
00244 void FileVMatrix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00245 {
00246   inherited::makeDeepCopyFromShallowCopy(copies);
00247 
00248   
00249   
00250   
00251   
00252   
00253 
00254   
00255 
00256 
00257   
00258   
PLWARNING(
"FileVMatrix::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!");
00259 
00260   
f = 0;   
00261   
build(); 
00262 }
00263 
00265 
00267 FileVMatrix::~FileVMatrix()
00268 { 
00269   
saveFieldInfos();
00270   
if(
f) {
00271     fclose(
f); 
00272     
00273   }
00274 }
00275 
00277 
00279 void FileVMatrix::getNewRow(
int i, 
const Vec& v)
 const
00280 
{
00281   
if(
file_is_float)
00282   {
00283     fseek(
f, 
DATAFILE_HEADERLENGTH+(i*width_)*
sizeof(
float), SEEK_SET);
00284     
fread_float(
f, v.
data(), v.
length(), 
file_is_bigendian);
00285   }
00286   
else
00287   {
00288     fseek(
f, 
DATAFILE_HEADERLENGTH+(i*width_)*
sizeof(
double), SEEK_SET);
00289     
fread_double(
f, v.
data(), v.
length(), 
file_is_bigendian);
00290   }  
00291 }
00292 
00294 
00296 void FileVMatrix::putSubRow(
int i, 
int j, 
Vec v)
00297 {
00298   
if(
file_is_float)
00299   {
00300     fseek(
f, 
DATAFILE_HEADERLENGTH+(i*width_+j)*
sizeof(
float), SEEK_SET);
00301     
fwrite_float(
f, v.
data(), v.
length(), 
file_is_bigendian);
00302   }
00303   
else
00304   {
00305     fseek(
f, 
DATAFILE_HEADERLENGTH+(i*width_+j)*
sizeof(
double), SEEK_SET);
00306     
fwrite_double(
f, v.
data(), v.
length(), 
file_is_bigendian);
00307   }  
00308 }
00309 
00311 
00313 void FileVMatrix::put(
int i, 
int j, 
real value)
00314 {
00315   
if(
file_is_float)
00316   {
00317     fseek(
f, 
DATAFILE_HEADERLENGTH+(i*width_+j)*
sizeof(
float), SEEK_SET);
00318     
fwrite_float(
f,
float(value),
file_is_bigendian);
00319   }
00320   
else
00321   {
00322     fseek(
f, 
DATAFILE_HEADERLENGTH+(i*width_+j)*
sizeof(
double), SEEK_SET);
00323     
fwrite_double(
f,
double(value),
file_is_bigendian);
00324   }
00325 }
00326 
00328 
00330 void FileVMatrix::appendRow(
Vec v)
00331 {
00332   
if(
file_is_float)
00333   {
00334     fseek(
f,
DATAFILE_HEADERLENGTH+length_*width_*
sizeof(
float), SEEK_SET);
00335     
fwrite_float(
f, v.
data(), v.
length(), 
file_is_bigendian);
00336   }
00337   
else
00338   {
00339     fseek(
f,
DATAFILE_HEADERLENGTH+length_*width_*
sizeof(
double), SEEK_SET);
00340     
fwrite_double(
f, v.
data(), v.
length(), 
file_is_bigendian);
00341   }
00342   length_++;
00343 
00344   
updateHeader();
00345 }
00346 
00348 
00350 void FileVMatrix::flush()
00351 {
00352   fflush(
f);
00353 }
00354 
00356 
00358 void FileVMatrix::updateHeader() {
00359   
char header[
DATAFILE_HEADERLENGTH]; 
00360 
#ifdef USEFLOAT
00361 
#ifdef LITTLEENDIAN
00362 
  sprintf(
header,
"MATRIX %d %d FLOAT LITTLE_ENDIAN", length_, width_);
00363 
#endif
00364 
#ifdef BIGENDIAN
00365 
  sprintf(
header,
"MATRIX %d %d FLOAT BIG_ENDIAN", length_, width_);
00366 
#endif
00367 
#endif
00368 
#ifdef USEDOUBLE
00369 
#ifdef LITTLEENDIAN
00370 
  sprintf(
header,
"MATRIX %d %d DOUBLE LITTLE_ENDIAN", length_, width_);
00371 
#endif
00372 
#ifdef BIGENDIAN
00373 
  sprintf(
header,
"MATRIX %d %d DOUBLE BIG_ENDIAN", length_, width_);
00374 
#endif
00375 
#endif
00376 
  int pos = 
strlen(
header);
00377   
for(; pos<
DATAFILE_HEADERLENGTH; pos++)
00378     {
00379       
header[pos] = 
' ';
00380     }
00381   
header[
DATAFILE_HEADERLENGTH-1] = 
'\n';
00382   fseek(
f,0,SEEK_SET);
00383   fwrite(
header,1,
DATAFILE_HEADERLENGTH,
f);
00384 }
00385 
00386 }