Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

OldDiskVMatrix.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2001 Pascal Vincent, Yoshua Bengio, Rejean Ducharme and University of Montreal 00006 // Copyright (C) 2002 Pascal Vincent, Julien Keable, Xavier Saint-Mleux 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 /* ******************************************************* 00038 * $Id: DiskVMatrix.cc,v 1.6 2003/08/13 08:13:46 plearner Exp $ 00039 ******************************************************* */ 00040 00041 #include "DiskVMatrix.h" 00042 #include "VecCompressor.h" 00043 00044 namespace PLearn <% 00045 using namespace std; 00046 00047 00048 00051 DiskVMatrix::DiskVMatrix(const string& the_dirname, bool readwrite) 00052 : readwritemode(readwrite),freshnewfile(false), 00053 dirname(remove_trailing_slash(the_dirname)) 00054 { 00055 build_(); 00056 } 00057 00058 DiskVMatrix::DiskVMatrix(const string& the_dirname, int the_width, bool write_double_as_float) 00059 : RowBufferedVMatrix(0,the_width),readwritemode(true), 00060 freshnewfile(true),dirname(remove_trailing_slash(the_dirname)) 00061 { 00062 build_(); 00063 } 00064 00065 void DiskVMatrix::writeRow(ostream& out, const Vec& v) 00066 { VecCompressor::writeCompressedVec(out,v); } 00067 00068 void DiskVMatrix::readRow(istream& in, const Vec& v) 00069 { VecCompressor::readCompressedVec(in, v); } 00070 00071 void DiskVMatrix::build() 00072 { 00073 inherited::build(); 00074 build_(); 00075 } 00076 00077 void DiskVMatrix::build_() 00078 { 00079 if(!freshnewfile) 00080 { 00081 if(!isdir(dirname)) 00082 PLERROR("In DiskVMatrix constructor, directory %s could not be found",dirname.c_str()); 00083 setMetaDataDir(dirname + ".metadata"); 00084 setMtime(mtime(append_slash(dirname)+"indexfile")); 00085 ios::openmode omode; 00086 if(readwritemode) 00087 omode = ios::in | ios::out | ios::binary; 00088 else // read-only 00089 omode = ios::in | ios::binary; 00090 00091 string indexfname = dirname+"/indexfile"; 00092 indexf = new fstream(); 00093 indexf->open(indexfname.c_str(), omode); 00094 if(!*indexf) 00095 PLERROR("In DiskVMatrix constructor, could not open file %s in specified mode", indexfname.c_str()); 00096 00097 int header; 00098 indexf->read((char*)&header,sizeof(int)); 00099 indexf->read((char*)&length_,sizeof(int)); 00100 indexf->read((char*)&width_,sizeof(int)); 00101 00102 int k=0; 00103 string fname = dirname+"/"+tostring(k)+".data"; 00104 while(isfile(fname)) 00105 { 00106 fstream* f = new fstream(); 00107 f->open(fname.c_str(), omode); 00108 if(!(*f)) 00109 PLERROR("In DiskVMatrix constructor, could not open file %s in specified mode", fname.c_str()); 00110 dataf.append(f); 00111 fname = dirname+"/"+tostring(++k)+".data"; 00112 } 00113 // Stuff related to RowBufferedVMatrix, for consistency 00114 current_row_index = -1; 00115 current_row.resize(width_); 00116 other_row_index = -1; 00117 other_row.resize(width_); 00118 00119 //resize the string mappings 00120 map_sr = TVec<map<string,real> >(width_); 00121 map_rs = TVec<map<real,string> >(width_); 00122 00123 getFieldInfos(); 00124 } 00125 else 00126 { 00127 if(isdir(dirname)) 00128 PLERROR("In DiskVMatrix constructor (with specified width), directory %s already exists",dirname.c_str()); 00129 setMetaDataDir(dirname + ".metadata"); 00130 setMtime(mtime(append_slash(dirname)+"indexfile")); 00131 00132 //ios::openmode omode; 00133 if(isfile(dirname)) // patch for running mkstemp (TmpFilenames) 00134 unlink(dirname.c_str()); 00135 if(!force_mkdir(dirname)) // force directory creation 00136 PLERROR("In DiskVMatrix constructor (with specified width), could not create directory %s Error was: %s",dirname.c_str(), strerror(errno)); 00137 00138 string indexfname = dirname + "/indexfile"; 00139 indexf = new fstream(); 00140 indexf->open(indexfname.c_str(),ios::in | ios::out | ios::trunc | ios::binary); 00141 00142 int header = 123408; 00143 indexf->write((char*)&header,sizeof(int)); 00144 indexf->write((char*)&length_,sizeof(int)); 00145 indexf->write((char*)&width_,sizeof(int)); 00146 00147 string fname = dirname + "/0.data"; 00148 // These two line don't work (core dump!) with our actual libraries (sigh!) 00149 fstream* f = new fstream(); 00150 f->open(fname.c_str(), ios::in | ios::out | ios::trunc | ios::binary); 00151 dataf.append(f); 00152 } 00153 freshnewfile=false; 00154 } 00155 00156 void DiskVMatrix::declareOptions(OptionList &ol) 00157 { 00158 declareOption(ol, "dirname", &DiskVMatrix::dirname, OptionBase::buildoption, "Directory name of the.dmat"); 00159 inherited::declareOptions(ol); 00160 } 00161 00162 void DiskVMatrix::getRow(int i, Vec v) const 00163 { 00164 #ifdef BOUNDCHECK 00165 if(i<0 || i>length()) 00166 PLERROR("In DiskVMatrix::getRow, bad row number %d",i); 00167 if(v.length() != width()) 00168 PLERROR("In DiskVMatrix::getRow, length of v (%d) does not match matrix width (%d)",v.length(),width()); 00169 #endif 00170 00171 unsigned char filenum; 00172 unsigned int position; 00173 indexf->seekg(3*sizeof(int) + i*(sizeof(unsigned char)+sizeof(unsigned int))); 00174 indexf->get((char&)filenum); 00175 indexf->read((char*)&position,sizeof(unsigned int)); 00176 fstream* f = dataf[int(filenum)]; 00177 f->seekg(position); 00178 binread_compressed(*f,v.data(),v.length()); 00179 } 00180 00181 void DiskVMatrix::putRow(int i, Vec v) 00182 { 00183 #ifdef BOUNDCHECK 00184 if(i<0 || i>length()) 00185 PLERROR("In DiskVMatrix::putRow, bad row number %d",i); 00186 if(v.length() != width()) 00187 PLERROR("In DiskVMatrix::putRow, length of v (%d) does not match matrix width (%d)",v.length(),width()); 00188 #endif 00189 00190 unsigned char filenum; 00191 unsigned int position; 00192 indexf->seekg(3*sizeof(int) + i*(sizeof(unsigned char)+sizeof(unsigned int))); 00193 indexf->get((char&)filenum); 00194 indexf->read((char*)&position,sizeof(unsigned int)); 00195 fstream* f = dataf[int(filenum)]; 00196 f->seekp(position); 00197 binwrite_compressed(*f,v.data(), v.length()); 00198 } 00199 00200 void DiskVMatrix::appendRow(Vec v) 00201 { 00202 if(!readwritemode) 00203 PLERROR("In DiskVMatrix::appendRow cannot append row in read only mode, set readwrite parameter to true when calling the constructor"); 00204 if(v.length() != width()) 00205 PLERROR("In DiskVMatrix::appendRow, length of v (%d) does not match matrix width (%d)",v.length(),width()); 00206 00207 int filenum = dataf.size()-1; 00208 fstream* f = dataf[filenum]; 00209 f->seekp(0,ios::end); 00210 unsigned int position = f->tellp(); 00211 if(position>500000000L) 00212 { 00213 filenum++; 00214 string filename = dirname + "/" + tostring(filenum) + ".data"; 00215 f = new fstream(); 00216 f->open(filename.c_str(), ios::in | ios::out | ios::trunc | ios::binary); 00217 dataf.append(f); 00218 position = 0; 00219 } 00220 binwrite_compressed(*f,v.data(),v.length()); 00221 indexf->seekp(0,ios::end); 00222 indexf->put((unsigned char)filenum); 00223 indexf->write((char*)&position,sizeof(unsigned int)); 00224 length_++; 00225 indexf->seekp(sizeof(int),ios::beg); 00226 indexf->write((char*)&length_,sizeof(int)); 00227 // indexf.flush(); 00228 } 00229 00230 void DiskVMatrix::flush() 00231 { 00232 int filenum = dataf.size()-1; 00233 fstream* f = dataf[filenum]; 00234 f->flush(); 00235 indexf->flush(); 00236 } 00237 00238 DiskVMatrix::~DiskVMatrix() 00239 { 00240 for(int i=0; i<dataf.size(); i++) 00241 delete dataf[i]; 00242 delete indexf; 00243 saveFieldInfos(); 00244 } 00245 00246 PLEARN_IMPLEMENT_OBJECT(DiskVMatrix, "ONE LINE DESCR", "NO HELP"); 00247 00248 00249 %> // end of namespcae PLearn

Generated on Tue Aug 17 16:00:06 2004 for PLearn by doxygen 1.3.7