Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

VVMatrix.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // VVMatrix.cc 00004 // Copyright (C) 2002 Pascal Vincent and Julien Keable 00005 // Copyright (C) 2003 Olivier Delalleau 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: VVMatrix.cc,v 1.26 2004/08/09 16:16:39 tihocan Exp $ 00037 * This file is part of the PLearn library. 00038 ******************************************************* */ 00039 00040 #include <plearn/db/getDataSet.h> 00041 #include <plearn/io/IntVecFile.h> 00042 #include <plearn/math/random.h> 00043 #include "ConcatColumnsVMatrix.h" 00044 #include "ConcatRowsVMatrix.h" 00045 #include "DiskVMatrix.h" 00046 #include "FileVMatrix.h" 00047 #include "JoinVMatrix.h" 00048 #include "SelectRowsFileIndexVMatrix.h" 00049 #include "VMatLanguage.h" 00050 #include "VVMatrix.h" 00051 00052 #ifdef WIN32 00053 #include <time.h> 00054 #else 00055 #include <unistd.h> 00056 #endif 00057 00058 #define NEW_SYNTAX_CHAR '@' 00059 00060 namespace PLearn { 00061 using namespace std; 00062 00063 PLEARN_IMPLEMENT_OBJECT(VVMatrix, 00064 "A VMat that reads a '.vmat' file.", 00065 "" 00066 ); 00067 00069 // declareOptions // 00071 void VVMatrix::declareOptions(OptionList &ol) 00072 { 00073 declareOption(ol, "filename", &VVMatrix::the_filename, OptionBase::buildoption, "Path to the .vmat file"); 00074 inherited::declareOptions(ol); 00075 } 00076 00077 00078 // vmat genfilterindex source.pmat toto.pvm toto.indexes 00079 00080 void VVMatrix::generateFilterIndexFile(VMat source, const string & code, const string& ivfname) 00081 { 00082 rm(ivfname); 00083 IntVecFile ivf(ivfname,true); 00084 VMatLanguage filt(source); 00085 vector<string> fn; 00086 for(int i=0;i<source.width();i++) 00087 fn.push_back(source->fieldName(i)); 00088 filt.compileString(code,fn); 00089 Vec bla(1); 00090 Vec src(source.width()); 00091 ProgressBar pb(cerr,"Filtering",source.length()); 00092 for(int i=0;i<source.length();i++) 00093 { 00094 filt.run(i,bla); 00095 if(bla[0]) 00096 ivf.append(i); 00097 pb(i); 00098 } 00099 ivf.close(); 00100 } 00101 00102 VMat VVMatrix::buildFilteredVMatFromVPL(VMat source, const string & code, const string& ivfname, time_t date_of_code) 00103 { 00104 if(getNonBlankLines(code).empty()) 00105 return source; 00106 if(!isfile(ivfname) || mtime(ivfname) < date_of_code) 00107 generateFilterIndexFile(source, code, ivfname); 00108 IntVecFile ivf(ivfname,true); 00109 return new SelectRowsFileIndexVMatrix(source, ivfname); 00110 } 00111 00112 00113 vector<vector<string> > VVMatrix::extractSourceMatrix(const string & str,const string& filename) 00114 { 00115 vector<vector<string> > mstr; 00116 if (str[0] == NEW_SYNTAX_CHAR) { 00117 // We are using the new syntax : we don't care about this for now, 00118 // too bad for the getDate method ! 00119 return mstr; 00120 } 00121 vector<string> lines=getNonBlankLines(str); 00122 for(unsigned int i=0;i<lines.size();i++) 00123 mstr.push_back(split(lines[i],"|")); 00124 00125 for(unsigned int i=0;i<mstr.size();i++) 00126 for(unsigned int j=0;j<mstr[i].size();j++) 00127 { 00128 string srcstr = removeblanks(mstr[i][j]); 00129 if(srcstr[0]!=slash_char) 00130 { 00131 string potential_dir = extract_directory(filename); 00132 size_t p = srcstr.find(":"); 00133 string potential_path = potential_dir + srcstr.substr(0,p); 00134 if(file_exists(potential_path)) 00135 srcstr=potential_dir+srcstr; 00136 } 00137 mstr[i][j] = srcstr; 00138 } 00139 return mstr; 00140 } 00141 00142 time_t VVMatrix::getDateOfVMat(const string& filename) 00143 { 00144 string in=readFileAndMacroProcess(filename); 00145 size_t idx_source = in.find("<SOURCES>"); 00146 size_t cidx_source; 00147 00148 time_t latest = getDateOfCode(filename),tmp; 00149 00150 if(idx_source!=string::npos) 00151 { 00152 idx_source += strlen("<SOURCES>"); // skip beyond 00153 cidx_source=in.find("</SOURCES>"); 00154 if(cidx_source==string::npos) 00155 PLERROR("Cannot find closing tag </SOURCES>. File is %s",filename.c_str()); 00156 string sec=in.substr(idx_source,cidx_source-idx_source); 00157 vector<vector<string> > mstr = extractSourceMatrix(sec,filename); 00158 // TODO Make it work with the new syntax 00159 for(unsigned int i=0;i<mstr.size();i++) 00160 for(unsigned int j=0;j<mstr[i].size();j++) 00161 { 00162 vector<string> vecstr; 00163 vecstr=split(mstr[i][j],":"); 00164 if(vecstr.size()==2 || vecstr.size()==4) 00165 { 00166 // also check for date of possible IntVecFile 00167 if(vecstr[1][0]!=slash_char) 00168 vecstr[1]=extract_directory(filename)+vecstr[1]; 00169 if((tmp=mtime(vecstr[1])) > latest) 00170 latest=tmp; 00171 } 00172 if((tmp=getDataSetDate(vecstr[0])) > latest) 00173 latest=tmp; 00174 } 00175 } 00176 return latest; 00177 } 00178 00179 // returns the result from the join operation 00180 void VVMatrix::processJoinSection(const vector<string> & code, VMat & tmpsource) 00181 { 00182 unsigned int i=0; 00183 while(i<code.size()) 00184 { 00185 while(isBlank(code[i]))i++; 00186 VMat slave=getDataSet(code[i++]); 00187 vector<string> mess=split(code[i++],"[]"); 00188 if(mess.size()!=3) 00189 PLERROR("In JOIN section, field correspondance syntax is : [master1,master2] -> [slave1,slave2]"); 00190 vector<string> mfields=split(mess[0],","); 00191 vector<string> sfields=split(mess[2],","); 00192 TVec<int> midx(mfields.size()),sidx(sfields.size()); 00193 for(unsigned int j=0;j<mfields.size();j++) 00194 midx[j]=tmpsource->fieldIndex(mfields[j]); 00195 for(unsigned int j=0;j<sfields.size();j++) 00196 sidx[j]=slave->fieldIndex(sfields[j]); 00197 00198 JoinVMatrix * jvm= new JoinVMatrix(tmpsource,slave,midx,sidx); 00199 00200 // browse field declarations of the <JOIN> section to declare new fields in the resulting JoinVMatrix 00201 00202 while(i<code.size() && code[i].find(":")!=string::npos) 00203 { 00204 vector<string> crunch=split(code[i],":"); 00205 if(crunch.size()!=2)PLERROR("In JOIN section : field declaration section syntax incorrect. EG: MEAN(master1) :newfieldname"); 00206 vector<string> st=split(removeblanks(crunch[0]),"()"); 00207 if(st.size()!=2)PLERROR("In JOIN section : field declaration section syntax incorrect. EG: MEAN(master1) :newfieldname"); 00208 jvm->addStatField(removeblanks(st[0]),removeblanks(st[1]),removeblanks(crunch[1])); 00209 i++; 00210 } 00211 while(i<code.size() && isBlank(code[i]))i++; 00212 tmpsource=jvm; 00213 } 00214 } 00215 00216 // generate a file (ivfname) containing indexes of rows of 'source' that remain after filtering with 00217 // the *every* possible step that changes the index of rows (i.e : prefilter, shuffle.. postfiltering) 00218 // -- Not optimal, since it will first *precompute* if any postfilter is required 00219 void VVMatrix::generateVMatIndex(VMat source, const string& meta_data_dir, 00220 const string & filename, time_t date_of_code,const string & in, 00221 size_t idx_prefilter, size_t cidx_prefilter, 00222 size_t idx_postfilter, size_t cidx_postfilter, 00223 size_t idx_process, size_t cidx_process, 00224 size_t idx_shuffle, size_t cidx_shuffle, 00225 size_t idx_join, size_t cidx_join) 00226 { 00227 VMat tmpsource = source; 00228 rm(meta_data_dir+slash+"source.indexes"); 00229 00230 if(idx_prefilter!=string::npos) 00231 { 00232 idx_prefilter+=11; 00233 cidx_prefilter=in.find("</PREFILTER>"); 00234 if(cidx_prefilter==string::npos) 00235 PLERROR("Cannot find closing tag </PREFILTER>. File is %s",filename.c_str()); 00236 string code=in.substr(idx_prefilter,cidx_prefilter-idx_prefilter); 00237 cout<<"Prefiltering.. "<<endl; 00238 tmpsource = buildFilteredVMatFromVPL(tmpsource,code,meta_data_dir+slash+"incomplete.source.prefilter.indexes",date_of_code); 00239 } 00240 00241 if(idx_join!=string::npos) 00242 { 00243 cidx_join=in.find("</JOIN>"); 00244 if(cidx_join==string::npos) 00245 PLERROR("Cannot find closing tag </JOIN>. File is %s",filename.c_str()); 00246 vector<string> code=split(in.substr(idx_join+6,cidx_join-idx_join-6),"\n"); 00247 processJoinSection(code,tmpsource); 00248 } 00249 00250 if(idx_process!=string::npos) 00251 { 00252 cidx_process=in.find("</PROCESSING>"); 00253 if(cidx_process==string::npos) 00254 PLERROR("Cannot find closing tag </PROCESSING>. File is %s",filename.c_str()); 00255 string code=in.substr(idx_process+12,cidx_process-idx_process-12); 00256 tmpsource = new PreprocessingVMatrix(tmpsource,code); 00257 } 00258 00259 if(idx_postfilter!=string::npos) 00260 { 00261 idx_postfilter+=12; 00262 cidx_postfilter=in.find("</POSTFILTER>"); 00263 if(cidx_postfilter==string::npos) 00264 PLERROR("Cannot find closing tag </POSTFILTER>. File is %s",filename.c_str()); 00265 string code=in.substr(idx_postfilter,cidx_postfilter-idx_postfilter); 00266 cout<<"Postfiltering.. "<<endl; 00267 tmpsource = buildFilteredVMatFromVPL(tmpsource,code,meta_data_dir+slash+"incomplete.source.postfilter.indexes",date_of_code); 00268 } 00269 00270 // combines pre and postfilter index file in a single one 00271 if(isfile(meta_data_dir+slash+"incomplete.source.prefilter.indexes")) 00272 if(isfile(meta_data_dir+slash+"incomplete.source.postfilter.indexes")) 00273 { 00274 IntVecFile ivf(meta_data_dir+slash+"source.indexes",true); 00275 IntVecFile preivf(meta_data_dir+slash+"incomplete.source.prefilter.indexes"); 00276 IntVecFile postivf(meta_data_dir+slash+"incomplete.source.postfilter.indexes"); 00277 for(int i=0;i<postivf.length();i++) 00278 ivf.append(preivf[postivf[i]]); 00279 } 00280 else 00281 mv(meta_data_dir+slash+"incomplete.source.prefilter.indexes " 00282 +meta_data_dir+slash+"source.indexes"); 00283 00284 else 00285 if(isfile(meta_data_dir+slash+"incomplete.source.postfilter.indexes")) 00286 mv(meta_data_dir+slash+"incomplete.source.postfilter.indexes " 00287 +meta_data_dir+slash+"source.indexes"); 00288 00289 if(idx_shuffle!=string::npos) 00290 { 00291 idx_shuffle+=9; 00292 cidx_shuffle=in.find("</SHUFFLE>"); 00293 if(cidx_shuffle==string::npos) 00294 PLERROR("Cannot find closing tag </SHUFFLE>. File is %s",filename.c_str()); 00295 string seedstr=removeblanks(in.substr(idx_shuffle,cidx_shuffle-idx_shuffle)); 00296 if(seedstr=="") 00297 manual_seed(clock()); 00298 else 00299 { 00300 int seed = toint(seedstr); 00301 manual_seed(seed); 00302 } 00303 // if a source.indexes file exists, shuffle it, otherwise, create it 00304 TVec<int> idx; 00305 if(isfile(meta_data_dir+slash+"source.indexes")) 00306 { 00307 IntVecFile ivf(meta_data_dir+slash+"source.indexes"); 00308 idx=ivf.getVec(); 00309 } 00310 else idx=TVec<int>(0,tmpsource->length()-1,1); 00311 shuffleElements(idx); 00312 rm(meta_data_dir+slash+"source.indexes"); 00313 IntVecFile newivf(meta_data_dir+slash+"source.indexes",true); 00314 newivf.append(idx); 00315 } 00316 // remove intermediate files 00317 rm(meta_data_dir+slash+"incomplete.source.prefilter.indexes"); 00318 rm(meta_data_dir+slash+"incomplete.source.postfilter.indexes"); 00319 } 00320 00321 bool VVMatrix::isPrecomputedAndUpToDate() 00322 { 00323 string meta_data_dir=remove_trailing_slash(the_filename)+".metadata"; 00324 if(isfile(meta_data_dir+slash+"precomputed.pmat") && 00325 mtime(meta_data_dir+slash+"precomputed.pmat") > getMtime()) 00326 return true; 00327 if(pathexists(meta_data_dir+slash+"precomputed.dmat"+slash) && 00328 mtime(meta_data_dir+slash+"precomputed.dmat"+slash+"indexfile") > getMtime()) 00329 return true; 00330 return false; 00331 } 00332 00333 string VVMatrix::getPrecomputedDataName() 00334 { 00335 string meta_data_dir=remove_trailing_slash(the_filename)+".metadata"; 00336 if(isfile(meta_data_dir+slash+"precomputed.pmat")) 00337 return meta_data_dir+slash+"precomputed.pmat"; 00338 if(pathexists(meta_data_dir+slash+"precomputed.dmat"+slash)) 00339 return meta_data_dir+slash+"precomputed.dmat"; 00340 return "** no precomputed data found **"; 00341 } 00342 00343 00344 VMat VVMatrix::createPreproVMat(const string & filename) 00345 { 00346 string in=readFileAndMacroProcess(filename); 00347 size_t idx_source = in.find("<SOURCES>"); 00348 size_t idx_prefilter = in.find("<PREFILTER>"); 00349 size_t idx_postfilter = in.find("<POSTFILTER>"); 00350 size_t idx_process = in.find("<PROCESSING>"); 00351 size_t idx_shuffle = in.find("<SHUFFLE>"); 00352 size_t idx_join = in.find("<JOIN>"); 00353 size_t idx_precompute = in.find("<PRECOMPUTE>"); 00354 size_t idx_sizes = in.find("<SIZES>"); 00355 size_t cidx_source = 0, cidx_prefilter = 0, cidx_postfilter = 0, 00356 cidx_process = 0, cidx_shuffle = 0, cidx_precompute = 0, 00357 cidx_join = 0, cidx_sizes = 0; 00358 string precomp; 00359 VMat source; 00360 00361 bool olddebugval=VMatLanguage::output_preproc; 00362 VMatLanguage::output_preproc = in.find("<DEBUG>")!=string::npos; 00363 00364 if( VMatLanguage::output_preproc ) 00365 cerr<<"DEBUG is on (remove <DEBUG> in "+filename+" to turn it off)"<<endl; 00366 00367 string meta_data_dir=remove_trailing_slash(filename)+".metadata"; 00368 force_mkdir(meta_data_dir); 00369 time_t date_of_code = getDateOfVMat(filename); 00370 00371 // remove pollution : all stuff that has possibly been interrupted during computation 00372 rm (meta_data_dir+slash+"incomplete.*"); 00373 00374 bool sizes_spec = false; 00375 int inputsize = -1; 00376 int targetsize = -1; 00377 int weightsize = -1; 00378 // Check if sizes are specified. 00379 if(idx_sizes != string::npos) 00380 { 00381 sizes_spec = true; 00382 idx_sizes += 7; 00383 cidx_sizes = in.find("</SIZES>"); 00384 if(cidx_sizes == string::npos) 00385 PLERROR("Cannot find closing tag </SIZES>. File is %s",filename.c_str()); 00386 vector<string> sizes = split(in.substr(idx_sizes, cidx_sizes - idx_sizes),"\n"); 00387 if (sizes.size() != 3) { 00388 PLERROR("You should specify exactly 3 sizes between <SIZES> and </SIZES> (file is %s)", filename.c_str()); 00389 } 00390 inputsize = toint(sizes[0]); 00391 targetsize = toint(sizes[1]); 00392 weightsize = toint(sizes[2]); 00393 } 00394 00395 if(isfile(meta_data_dir+slash+"precomputed.pmat")) 00396 { 00397 // Precomputed version exist in pmat format. 00398 // If it seems old, we display a warning (one may still want to use it 00399 // if he knows the changes made to the vmat code do not alter the data). 00400 if(mtime(meta_data_dir+slash+"precomputed.pmat") < date_of_code) { 00401 PLWARNING("In VVMatrix::createPreproVMat - The precomputed data (in %s) is older than current code, you may want to recompute again", meta_data_dir.c_str()); 00402 } 00403 source = new FileVMatrix(meta_data_dir+slash+"precomputed.pmat"); 00404 source->setMetaDataDir(meta_data_dir); 00405 source->setMtime(date_of_code); 00406 source->defineSizes(inputsize, targetsize, weightsize); 00407 return source; 00408 } 00409 00410 if(pathexists(meta_data_dir+slash+"precomputed.dmat"+slash)) 00411 { 00412 // precomputed version exist in DiskVMatrix format, 00413 // so we use it *if it is up to date* 00414 if(mtime(meta_data_dir+slash+"precomputed.dmat"+slash+"indexfile") > date_of_code) 00415 { 00416 source = new DiskVMatrix(meta_data_dir+slash+"precomputed.dmat"+slash); 00417 source->setMetaDataDir(meta_data_dir); 00418 source->setMtime(date_of_code); 00419 source->defineSizes(inputsize, targetsize, weightsize); 00420 return source; 00421 } 00422 } 00423 00424 // if index_section is true, then this dataset needs a file containing the index of the rows to keep 00425 bool index_section = idx_prefilter!=string::npos || idx_postfilter!=string::npos || idx_shuffle!=string::npos ; 00426 00427 // if true, index file lacks or is out of date 00428 bool must_regen_index = index_section && 00429 (!isfile(meta_data_dir+slash+"source.indexes") || date_of_code > mtime(meta_data_dir+slash+"source.indexes")); 00430 00431 // erase obsolete source.index if necessary 00432 if(isfile(meta_data_dir+slash+"source.indexes") && !index_section) 00433 rm (meta_data_dir+slash+"source.indexes"); 00434 00435 if(idx_source!=string::npos) 00436 { 00437 // go over tag 00438 idx_source+=9; 00439 cidx_source=in.find("</SOURCES>"); 00440 if(cidx_source==string::npos) 00441 PLERROR("Cannot find closing tag </SOURCES>. File is %s",filename.c_str()); 00442 // 'sec' is the text content of the <SOURCES> section 00443 string sec=in.substr(idx_source,cidx_source-idx_source); 00444 00445 if (sec[1] == NEW_SYNTAX_CHAR) { 00446 // We are using the new syntax 00447 sec[1] = ' '; // remove the special character indicating the new syntax 00448 source = dynamic_cast<VMatrix*>(newObject(sec)); 00449 if(source.isNull()) { 00450 PLERROR("In VVMatrix::createPreproVMat '%s' is not a valid VMatrix subclass",sec.c_str()); 00451 } 00452 } else { 00453 00454 vector<vector<string> > mstr = extractSourceMatrix(sec,filename); 00455 Array<VMat> vmrows(mstr.size()); 00456 // we need to build a VMat that is the concatenation of the datasets contained in 'mstr' 00457 00458 for(unsigned int i=0;i<mstr.size();i++) 00459 { 00460 Array<VMat> ar(mstr[i].size()); 00461 for(unsigned int j=0;j<mstr[i].size();j++) 00462 { 00463 vector<string> vecstr; 00464 vecstr=split(mstr[i][j],":"); 00465 ar[j]=getDataSet(vecstr[0]); 00466 00467 // handle different cases of dataset specification 00468 // legal formats are: 00469 // 1- simple dataset filename 00470 // 2- intVecFile specification : filename:intVecFile_Filename 00471 // 3- range specifiaction : filename:start:length 00472 // 4- range specifiaction + intVecFile: filename:intVecFile_Filename:start:length 00473 switch(vecstr.size()) 00474 { 00475 case 1: // only dataset name so we do nothing 00476 break; 00477 case 2: // we have an intVecFile specification 00478 // prefix with the path to the current vmat 00479 if(vecstr[1][0]!=slash_char) 00480 vecstr[1]=extract_directory(filename)+vecstr[1]; 00481 ar[j]=new SelectRowsFileIndexVMatrix(ar[j],vecstr[1]); 00482 break; 00483 case 3: // submatRows range specification 00484 ar[j]=ar[j].subMatRows(toint(vecstr[1]),toint(vecstr[2])); 00485 break; 00486 case 4: // intVecFile + submatRows 00487 if(vecstr[1][0]!=slash_char) 00488 vecstr[1]=extract_directory(filename)+vecstr[1]; 00489 ar[j]=new SelectRowsFileIndexVMatrix(ar[j],vecstr[1]); 00490 ar[j]=ar[j].subMatRows(toint(vecstr[2]),toint(vecstr[3])); 00491 break; 00492 default: 00493 PLERROR("Strange number of semicolumns... Format of source must be something.vmat[:indexfile.index][:start_row:length]. File is %s",filename.c_str()); 00494 break; 00495 } 00496 } 00497 // if we have more than one filename in this row, we use hconcat to consolidate 'ar'. 00498 vmrows[i] = ar.size()==1?ar[0]:hconcat(ar); 00499 if(vmrows[i].length()==-1) 00500 PLERROR("Trying to hconcat matrix with different lengths! File is %s",filename.c_str()); 00501 } 00502 00503 source = vconcat(vmrows); 00504 if(mstr.size()==0) 00505 PLERROR("No source matrix found in <SOURCES> section! File is %s",filename.c_str()); 00506 } 00507 } 00508 else PLERROR("Need at least a <SOURCES> section ! File is %s",filename.c_str()); 00509 00510 if(must_regen_index) 00511 generateVMatIndex(source, meta_data_dir, filename, date_of_code, in, idx_prefilter, cidx_prefilter, 00512 idx_postfilter, cidx_postfilter, idx_process, cidx_process, 00513 idx_shuffle, cidx_shuffle, idx_join, cidx_join); 00514 00515 if(idx_join!=string::npos) 00516 { 00517 cidx_join=in.find("</JOIN>"); 00518 if(cidx_join==string::npos) 00519 PLERROR("Cannot find closing tag </JOIN>. File is %s",filename.c_str()); 00520 vector<string> code=split(in.substr(idx_join+6,cidx_join-idx_join-6),"\n"); 00521 processJoinSection(code,source); 00522 } 00523 00524 if(idx_process!=string::npos) 00525 { 00526 cidx_process=in.find("</PROCESSING>"); 00527 if(cidx_process==string::npos) 00528 PLERROR("Cannot find closing tag </PROCESSING>. File is %s",filename.c_str()); 00529 string code=in.substr(idx_process+12,cidx_process-idx_process-12); 00530 source = new PreprocessingVMatrix(source,code); 00531 } 00532 00533 // if source.index exists at this point, we need to apply it 00534 if(isfile(meta_data_dir+slash+"source.indexes")) 00535 source = new SelectRowsFileIndexVMatrix(source,meta_data_dir+slash+"source.indexes"); 00536 00537 // next lines handle precomputation of matrix 00538 if(idx_precompute!=string::npos) 00539 { 00540 idx_precompute+=12; 00541 cidx_precompute=in.find("</PRECOMPUTE>"); 00542 if(cidx_precompute==string::npos) 00543 PLERROR("Cannot find closing tag </PRECOMPUTE>. File is %s",filename.c_str()); 00544 precomp=removeblanks(in.substr(idx_precompute,cidx_precompute-idx_precompute)); 00545 if(precomp == "dmat") 00546 { 00547 cout<<"Rendering DMAT : "<<meta_data_dir+slash+"precomputed.dmat"+slash<<endl; 00548 source->saveDMAT(meta_data_dir+slash+"incomplete.precomputed.dmat"+slash); 00549 int cnt=0; 00550 if (isdir(meta_data_dir+slash+"precomputed.dmat"+slash)) { 00551 while(cnt++ < 5 && 00552 !force_rmdir(meta_data_dir+slash+"precomputed.dmat"+slash)) 00553 { 00554 cerr<<"Could not rm -rf '"+meta_data_dir+ 00555 slash+"precomputed.dmat/'. Maybe 'Stale NFS file handle' curse again. Retrying in 2 sec." 00556 <<endl; 00557 sleep(2); 00558 } 00559 } 00560 00561 mvforce(meta_data_dir+slash+"incomplete.precomputed.dmat"+slash+" "+meta_data_dir+slash+"precomputed.dmat"+slash); 00562 if(pathexists(meta_data_dir+slash+"incomplete.precomputed.dmat.metadata"+slash)) 00563 { 00564 rm(meta_data_dir+slash+"precomputed.dmat.metadata"+slash); 00565 mvforce(meta_data_dir+slash+"incomplete.precomputed.dmat.metadata"+slash+" "+meta_data_dir+slash+"precomputed.dmat.metadata"+slash); 00566 } 00567 source=new DiskVMatrix(meta_data_dir+slash+"precomputed.dmat"+slash); 00568 } 00569 else if(precomp == "pmat") 00570 { 00571 cout<<"Rendering PMAT : "<<meta_data_dir+slash+"precomputed.pmat"<<endl; 00572 source->savePMAT(meta_data_dir+slash+"incomplete.precomputed.pmat"); 00573 mvforce(meta_data_dir+slash+"incomplete.precomputed.pmat "+meta_data_dir+slash+"precomputed.pmat"); 00574 // Seems to be useless now (TODO: remove ?) 00575 // mvforce(meta_data_dir+slash+"incomplete.precomputed.pmat.fieldnames "+meta_data_dir+slash+"precomputed.pmat.fieldnames"); 00576 if(pathexists(meta_data_dir+slash+"incomplete.precomputed.pmat.metadata"+slash)) 00577 mvforce(meta_data_dir+slash+"incomplete.precomputed.pmat.metadata"+slash+" "+meta_data_dir+slash+"precomputed.pmat.metadata"+slash); 00578 // Save the string mappings. 00579 source->setMetaDataDir(meta_data_dir); 00580 source->saveAllStringMappings(); 00581 source = new FileVMatrix(meta_data_dir+slash+"precomputed.pmat"); 00582 source->setMetaDataDir(meta_data_dir); 00583 source->setMtime(date_of_code); 00584 source->defineSizes(inputsize, targetsize, weightsize); 00585 // TODO The 3 lines above are duplicated throughout the code, this is ugly. 00586 } 00587 else if(precomp!="") 00588 PLERROR("Unsupported precomputing format : %s. File is %s", precomp.c_str(),filename.c_str()); 00589 } 00590 00591 VMatLanguage::output_preproc=olddebugval; 00592 source->setMetaDataDir(meta_data_dir); 00593 source->setMtime(date_of_code); 00594 if (sizes_spec) { 00595 source->defineSizes(inputsize, targetsize, weightsize); 00596 } 00597 return source; 00598 } 00599 00600 void VVMatrix::build() 00601 { 00602 inherited::build(); 00603 build_(); 00604 } 00605 00606 void VVMatrix::build_() 00607 { 00608 if (the_filename != "") { 00609 setMetaDataDir(makeExplicitPath(the_filename+".metadata")); 00610 force_mkdir(getMetaDataDir()); 00611 00612 code = readFileAndMacroProcess(the_filename); 00613 if(removeblanks(code)[0]=='<') // old xml-like format 00614 the_mat = createPreproVMat(the_filename); 00615 else // New standard PLearn object description format 00616 { 00617 the_mat = dynamic_cast<VMatrix*>(newObject(code)); 00618 if(the_mat.isNull()) 00619 PLERROR("Object described in %s is not a VMatrix subclass",the_filename.c_str()); 00620 the_mat->setMetaDataDir(getMetaDataDir()); 00621 } 00622 00623 setMtime(the_mat->getMtime()); 00624 length_ = the_mat.length(); 00625 width_ = the_mat.width(); 00626 00627 // Copy the sizes. 00628 copySizesFrom(the_mat); 00629 00630 // Copy the string mappings. 00631 copyStringMappingsFrom(the_mat); 00632 00633 // Copy the parent field names 00634 fieldinfos.resize(width_); 00635 if (the_mat->getFieldInfos().size() > 0) 00636 for(int j=0; j<width_; j++) 00637 fieldinfos[j] = the_mat->getFieldInfos()[j]; 00638 } 00639 } 00640 00641 // string maps are those loaded from the .vmat metadatadir, not those of the source vmatrix anymore 00642 // could be changed.. 00643 00644 // real VVMatrix::getStringVal(int col, const string & str) const 00645 // { 00646 // return the_mat->getStringVal(col, str); 00647 // } 00648 // string VVMatrix::getValString(int col, real val) const 00649 // { 00650 // return the_mat->getValString(col, val); 00651 // } 00652 // string VVMatrix::getString(int row,int col) const 00653 // { 00654 // return the_mat->getString(row, col); 00655 // } 00656 00657 // const map<string,real>& VVMatrix::getStringToRealMapping(int col) const 00658 // { 00659 // return the_mat->getStringToRealMapping(col); 00660 // } 00661 00662 00664 // makeDeepCopyFromShallowCopy // 00666 void VVMatrix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) { 00667 inherited::makeDeepCopyFromShallowCopy(copies); 00668 deepCopyField(the_mat, copies); 00669 } 00670 00671 } // end of namespace PLearn

Generated on Tue Aug 17 16:11:01 2004 for PLearn by doxygen 1.3.7