Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

FileVMatrix.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2001 Pascal Vincent, Yoshua Bengio, Rejean Ducharme and University of Montreal 00006 // Copyright (C) 2002 Pascal Vincent, Julien Keable, Xavier Saint-Mleux 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 /* ******************************************************* 00038 * $Id: FileVMatrix.cc,v 1.24 2004/08/16 15:45:44 dorionc Exp $ 00039 ******************************************************* */ 00040 00041 #include "FileVMatrix.h" 00042 #include <plearn/io/fileutils.h> 00043 00044 namespace PLearn { 00045 using namespace std; 00046 00047 00050 PLEARN_IMPLEMENT_OBJECT(FileVMatrix, "ONE LINE DESCR", "NO HELP"); 00051 00053 // FileVMatrix // 00055 FileVMatrix::FileVMatrix() 00056 :filename_(""), f(0), build_new_file(false) 00057 { 00058 writable=true; 00059 } 00060 00061 FileVMatrix::FileVMatrix(const string& filename, bool writable_) 00062 :filename_(abspath(filename)), f(0), build_new_file(!isfile(filename)) 00063 { 00064 writable = writable_; 00065 build_(); 00066 } 00067 00068 static int strlen(char* s) { 00069 int n=0; 00070 while (s[n]!=0) 00071 n++; 00072 return n; 00073 } 00074 00075 FileVMatrix::FileVMatrix(const string& filename, int the_length, int the_width) 00076 : inherited(the_length, the_width), filename_(abspath(filename)), f(0), 00077 build_new_file(true) 00078 { 00079 writable = true; 00080 build_(); 00081 } 00082 00083 FileVMatrix::FileVMatrix(const string& filename, int the_length, const TVec<string>& fieldnames) 00084 : inherited(the_length, fieldnames.length()), filename_(abspath(filename)), f(0), 00085 build_new_file(true) 00086 { 00087 writable = true; 00088 build_(); 00089 declareFieldNames(fieldnames); 00090 saveFieldInfos(); 00091 } 00092 00094 // build // 00096 void FileVMatrix::build() 00097 { 00098 inherited::build(); 00099 build_(); 00100 } 00101 00103 // build_ // 00105 void FileVMatrix::build_() 00106 { 00107 00108 // Code below is commented because the filename may have been changed, 00109 // in which case f should be modified. 00110 // TODO note that since it's a FILE*, there will probably be some memory leak. 00111 // if (f) return; // file already built 00112 // Since we are going to re-create it, we can close the current f. 00113 if (f) { 00114 fclose(f); 00115 } 00116 00117 char header[DATAFILE_HEADERLENGTH]; 00118 char matorvec[20]; 00119 char datatype[20]; 00120 char endiantype[20]; 00121 00122 if (build_new_file || !isfile(filename_)) 00123 force_mkdir_for_file(filename_); 00124 00125 setMetaDataDir(filename_ + ".metadata"); 00126 setMtime(mtime(filename_)); 00127 00128 if (build_new_file || !isfile(filename_)) 00129 { 00130 if (!writable) { 00131 PLERROR("In FileVMatrix::build_ - You asked to create a new file, but 'writable' is set to 0 !"); 00132 } 00133 f = fopen(filename_.c_str(),"w+b"); 00134 if (!f) 00135 PLERROR("In FileVMatrix constructor, could not open file %s",filename_.c_str()); 00136 00137 #ifdef USEFLOAT 00138 file_is_float = true; 00139 #endif 00140 #ifdef USEDOUBLE 00141 file_is_float = false; 00142 #endif 00143 #ifdef LITTLEENDIAN 00144 file_is_bigendian = false; 00145 #endif 00146 #ifdef BIGENDIAN 00147 file_is_bigendian = true; 00148 #endif 00149 00150 updateHeader(); 00151 00152 if(length_ > 0 && width_ > 0) //ensure we can allocate enough space... if len>0, to ensure 00153 { // that the header ends with a '\n'. 00154 if( fseek(f, DATAFILE_HEADERLENGTH+length_*width_*sizeof(real)-1, SEEK_SET) <0 ) 00155 { 00156 perror(""); 00157 PLERROR("In FileVMatrix::build_ - Could not fseek to last byte"); 00158 } 00159 fputc('\0',f); 00160 } 00161 } 00162 else 00163 { 00164 if (writable) 00165 f = fopen(filename_.c_str(), "r+b"); 00166 else 00167 f = fopen(filename_.c_str(), "rb"); 00168 00169 if (! f) 00170 PLERROR("FileVMatrix::build: could not open file %s", filename_.c_str()); 00171 00172 fread(header,DATAFILE_HEADERLENGTH,1,f); 00173 if(header[DATAFILE_HEADERLENGTH-1]!='\n') 00174 PLERROR("In FileVMatrix constructor, wrong header for PLearn binary matrix format. Please use checkheader (in PLearn/Scripts) to check the file.(0)"); 00175 int file_length, file_width; 00176 bool need_update_header = false; 00177 sscanf(header, "%s%d%d%s%s", matorvec, &file_length, &file_width, datatype, endiantype); 00178 if (file_length == -1 && this->length_ >= 0 && writable) { 00179 // The length set in the file is not valid, but we have specified a length. 00180 // This can happen if build() has been called once before the sizes have 00181 // been specified. In this case we must modify the file's length. 00182 need_update_header = true; 00183 } else if (file_length >= 0 && this->length_ >= 0 && file_length != this->length_) { 00184 PLERROR("In FileVMatrix::build_ - Lengths of the VMatrix and of the file loaded differ"); 00185 } else { 00186 this->length_ = file_length; 00187 } 00188 00189 if (file_width == -1 && this->width_ >= 0 && writable) { 00190 // Same as above, but for the width. 00191 need_update_header = true; 00192 } else if (file_width >= 0 && this->width_ >= 0 && file_width != this->width_) { 00193 PLERROR("In FileVMatrix::build_ - Widths of the VMatrix and of the file loaded differ"); 00194 } else { 00195 this->width_ = file_width; 00196 } 00197 00198 if (need_update_header) { 00199 updateHeader(); 00200 } 00201 00202 if (strcmp(matorvec,"MATRIX")!=0) 00203 PLERROR("In FileVMatrix constructor, wrong header for PLearn binary matrix format. Please use checkheader (in PLearn/Scripts) to check the file.(1)"); 00204 00205 if (strcmp(endiantype,"LITTLE_ENDIAN")==0) 00206 file_is_bigendian = false; 00207 else if (strcmp(endiantype,"BIG_ENDIAN")==0) 00208 file_is_bigendian = true; 00209 else 00210 PLERROR("In FileVMatrix constructor, wrong header for PLearn binary matrix format. Please use checkheader (in PLearn/Scripts) to check the file.(2)"); 00211 00212 if (strcmp(datatype,"FLOAT")==0) 00213 file_is_float = true; 00214 else if (strcmp(datatype,"DOUBLE")==0) 00215 file_is_float = false; 00216 else 00217 PLERROR("In FileVMatrix constructor, wrong header for PLearn binary matrix format. Please use checkheader (in PLearn/Scripts) to check the file.(3)"); 00218 00219 //resize the string mappings 00220 if (width_ >= 0) { 00221 map_sr = TVec<map<string,real> >(width_); 00222 map_rs = TVec<map<real,string> >(width_); 00223 } 00224 } 00225 00226 if (width_ >= 0) { 00227 getFieldInfos(); 00228 } 00229 } 00230 00232 // declareOptions // 00234 void FileVMatrix::declareOptions(OptionList & ol) 00235 { 00236 declareOption(ol, "filename", &FileVMatrix::filename_, OptionBase::buildoption, "Filename of the matrix"); 00237 00238 inherited::declareOptions(ol); 00239 } 00240 00242 // makeDeepCopyFromShallowCopy // 00244 void FileVMatrix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00245 { 00246 inherited::makeDeepCopyFromShallowCopy(copies); 00247 00248 // ### Call deepCopyField on all "pointer-like" fields 00249 // ### that you wish to be deepCopied rather than 00250 // ### shallow-copied. 00251 // ### ex: 00252 // deepCopyField(trainvec, copies); 00253 00254 // TODO Copy correctly the field FILE* f. 00255 // deepCopyField(f, copies); 00256 00257 // Not an error because we may want to do some deep-copying sometimes. 00258 PLWARNING("FileVMatrix::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!"); 00259 00260 f = 0; // Because we will open again the file (f should not be shared). 00261 build(); // To open the file. 00262 } 00263 00265 // ~FileVMatrix // 00267 FileVMatrix::~FileVMatrix() 00268 { 00269 saveFieldInfos(); 00270 if(f) { 00271 fclose(f); 00272 // TODO Shouldn't we also delete f ? 00273 } 00274 } 00275 00277 // getNewRow // 00279 void FileVMatrix::getNewRow(int i, const Vec& v) const 00280 { 00281 if(file_is_float) 00282 { 00283 fseek(f, DATAFILE_HEADERLENGTH+(i*width_)*sizeof(float), SEEK_SET); 00284 fread_float(f, v.data(), v.length(), file_is_bigendian); 00285 } 00286 else 00287 { 00288 fseek(f, DATAFILE_HEADERLENGTH+(i*width_)*sizeof(double), SEEK_SET); 00289 fread_double(f, v.data(), v.length(), file_is_bigendian); 00290 } 00291 } 00292 00294 // putSubRow // 00296 void FileVMatrix::putSubRow(int i, int j, Vec v) 00297 { 00298 if(file_is_float) 00299 { 00300 fseek(f, DATAFILE_HEADERLENGTH+(i*width_+j)*sizeof(float), SEEK_SET); 00301 fwrite_float(f, v.data(), v.length(), file_is_bigendian); 00302 } 00303 else 00304 { 00305 fseek(f, DATAFILE_HEADERLENGTH+(i*width_+j)*sizeof(double), SEEK_SET); 00306 fwrite_double(f, v.data(), v.length(), file_is_bigendian); 00307 } 00308 } 00309 00311 // put // 00313 void FileVMatrix::put(int i, int j, real value) 00314 { 00315 if(file_is_float) 00316 { 00317 fseek(f, DATAFILE_HEADERLENGTH+(i*width_+j)*sizeof(float), SEEK_SET); 00318 fwrite_float(f,float(value),file_is_bigendian); 00319 } 00320 else 00321 { 00322 fseek(f, DATAFILE_HEADERLENGTH+(i*width_+j)*sizeof(double), SEEK_SET); 00323 fwrite_double(f,double(value),file_is_bigendian); 00324 } 00325 } 00326 00328 // appendRow // 00330 void FileVMatrix::appendRow(Vec v) 00331 { 00332 if(file_is_float) 00333 { 00334 fseek(f,DATAFILE_HEADERLENGTH+length_*width_*sizeof(float), SEEK_SET); 00335 fwrite_float(f, v.data(), v.length(), file_is_bigendian); 00336 } 00337 else 00338 { 00339 fseek(f,DATAFILE_HEADERLENGTH+length_*width_*sizeof(double), SEEK_SET); 00340 fwrite_double(f, v.data(), v.length(), file_is_bigendian); 00341 } 00342 length_++; 00343 00344 updateHeader(); 00345 } 00346 00348 // flush // 00350 void FileVMatrix::flush() 00351 { 00352 fflush(f); 00353 } 00354 00356 // updateHeader // 00358 void FileVMatrix::updateHeader() { 00359 char header[DATAFILE_HEADERLENGTH]; 00360 #ifdef USEFLOAT 00361 #ifdef LITTLEENDIAN 00362 sprintf(header,"MATRIX %d %d FLOAT LITTLE_ENDIAN", length_, width_); 00363 #endif 00364 #ifdef BIGENDIAN 00365 sprintf(header,"MATRIX %d %d FLOAT BIG_ENDIAN", length_, width_); 00366 #endif 00367 #endif 00368 #ifdef USEDOUBLE 00369 #ifdef LITTLEENDIAN 00370 sprintf(header,"MATRIX %d %d DOUBLE LITTLE_ENDIAN", length_, width_); 00371 #endif 00372 #ifdef BIGENDIAN 00373 sprintf(header,"MATRIX %d %d DOUBLE BIG_ENDIAN", length_, width_); 00374 #endif 00375 #endif 00376 int pos = strlen(header); 00377 for(; pos<DATAFILE_HEADERLENGTH; pos++) 00378 { 00379 header[pos] = ' '; 00380 } 00381 header[DATAFILE_HEADERLENGTH-1] = '\n'; 00382 fseek(f,0,SEEK_SET); 00383 fwrite(header,1,DATAFILE_HEADERLENGTH,f); 00384 } 00385 00386 } // end of namespcae PLearn

Generated on Tue Aug 17 15:52:59 2004 for PLearn by doxygen 1.3.7