Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

VMatrix.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2002 Pascal Vincent, Yoshua Bengio and University of Montreal 00006 // 00007 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 00038 /* 00039 * $Id: VMatrix.cc,v 1.68 2004/08/09 16:23:13 tihocan Exp $ 00040 ******************************************************* */ 00041 00042 #include "DiskVMatrix.h" 00043 #include "FileVMatrix.h" 00044 #include "SubVMatrix.h" 00045 #include "VMat_maths.h" 00046 #include "VMatrix.h" 00047 00048 #include <plearn/math/BottomNI.h> 00049 #include <plearn/ker/Kernel.h> 00050 #include <plearn/var/Func.h> 00051 #include <plearn/base/stringutils.h> 00052 #include <plearn/math/TopNI.h> 00053 00054 //#include "VMat.h" 00055 //#include "TMat_maths.h" 00056 //#include "Array.h" 00057 //#include "random.h" 00058 //#include "Kernel.h" 00059 //#include "Func.h" 00060 //#include "TmpFilenames.h" 00061 //#include "fileutils.h" 00062 //#include <vector> 00063 //#include "TopNI.h" 00064 //#include "BottomNI.h" 00065 //#include "VVMatrix.h" 00066 // #include "DisplayUtils.h" 00067 00068 00069 namespace PLearn { 00070 using namespace std; 00071 00074 PLEARN_IMPLEMENT_ABSTRACT_OBJECT(VMatrix, "ONE LINE DESCR", "NO HELP"); 00075 00076 VMatrix::VMatrix() 00077 : lockf_(0), length_(-1), width_(-1), mtime_(0), 00078 inputsize_(-1), targetsize_(-1), weightsize_(-1), 00079 writable(false) 00080 {} 00081 00082 VMatrix::VMatrix(int the_length, int the_width) 00083 : lockf_(0), length_(the_length), width_(the_width), mtime_(0), 00084 inputsize_(-1), targetsize_(-1), weightsize_(-1), 00085 writable(false), 00086 map_sr(TVec<map<string,real> >(the_width)), 00087 map_rs(TVec<map<real,string> >(the_width)), 00088 fieldstats(0) 00089 {} 00090 00091 void VMatrix::declareOptions(OptionList & ol) 00092 { 00093 declareOption(ol, "writable", &VMatrix::writable, OptionBase::buildoption, "Are write operations permitted?"); 00094 declareOption(ol, "length", &VMatrix::length_, OptionBase::buildoption, 00095 "length of the matrix (number of rows)"); 00096 declareOption(ol, "width", &VMatrix::width_, OptionBase::buildoption, 00097 "width of the matrix (number of columns; -1 indicates this varies from sample to sample...)"); 00098 declareOption(ol, "inputsize", &VMatrix::inputsize_, OptionBase::buildoption, 00099 "size of input part (-1 if variable or unspecified, 0 if no input)"); 00100 declareOption(ol, "targetsize", &VMatrix::targetsize_, OptionBase::buildoption, 00101 "size of target part (-1 if variable or unspecified, 0 if no target)"); 00102 declareOption(ol, "weightsize", &VMatrix::weightsize_, OptionBase::buildoption, 00103 "size of weights (-1 if unspecified, 0 if no weight, 1 for sample weight, >1 currently not supported (include it is recommended to include additional info in target. weight is really reserved for a per sample weight))."); 00104 declareOption(ol, "metadatadir", &VMatrix::metadatadir, OptionBase::buildoption, 00105 "A directory in which to store meta-information for this matrix \n" 00106 "You don't always have to give this explicitly. For ex. if your \n" 00107 "vmat is the outer VMatrix in a .vmat file, the metadatadir will \n" 00108 "automatically be set to name_of_vmat_file.metadata/ \n" 00109 "And if it is the source inside another VMatrix that sets its \n" 00110 "metadatadir, it will often be set from that surrounding vmat's metadata \n"); 00111 inherited::declareOptions(ol); 00112 } 00113 00114 void VMatrix::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00115 { 00116 inherited::makeDeepCopyFromShallowCopy(copies); 00117 deepCopyField(get_row, copies); 00118 deepCopyField(dotrow_1, copies); 00119 deepCopyField(dotrow_2, copies); 00120 deepCopyField(field_stats, copies); 00121 deepCopyField(map_sr, copies); 00122 deepCopyField(map_rs, copies); 00123 deepCopyField(fieldinfos, copies); 00124 deepCopyField(fieldstats, copies); 00125 } 00126 00127 Array<VMField>& VMatrix::getFieldInfos() const 00128 { 00129 if(fieldinfos.size()==0 && hasMetaDataDir()) 00130 { 00131 string fname = append_slash(getMetaDataDir()) + "fieldnames"; 00132 if(isfile(fname)) // file exists 00133 loadFieldInfos(); 00134 } 00135 00136 int ninfos = fieldinfos.size(); 00137 int w = width(); 00138 if(ninfos!=w && w > 0) 00139 { 00140 fieldinfos.resize(w); 00141 for(int j=ninfos; j<w; j++) 00142 fieldinfos[j] = VMField(tostring(j)); 00143 } 00144 00145 return fieldinfos; 00146 } 00147 00148 void VMatrix::setFieldInfos(const Array<VMField>& finfo) 00149 { 00150 fieldinfos=finfo; 00151 } 00152 00153 bool VMatrix::hasFieldInfos() const 00154 { 00155 return fieldinfos.size()>0; 00156 } 00157 00158 void VMatrix::unduplicateFieldNames() 00159 { 00160 map<string,vector<int> > mp; 00161 for(int i=0;i<width();i++) 00162 mp[getFieldInfos(i).name].push_back(i); 00163 map<string,vector<int> >::iterator it; 00164 for(it=mp.begin();it!=mp.end();++it) 00165 if(it->second.size()!=1) 00166 { 00167 vector<int> v=it->second; 00168 for(unsigned int j=0;j<v.size();j++) 00169 fieldinfos[v[j]].name+="."+tostring(j); 00170 } 00171 } 00172 00173 TVec<string> VMatrix::fieldNames() const 00174 { 00175 int d = width(); 00176 TVec<string> names(d); 00177 for(int i=0; i<d; i++) 00178 names[i] = fieldName(i); 00179 return names; 00180 } 00181 00182 int VMatrix::fieldIndex(const string& fieldname) const 00183 { 00184 Array<VMField>& infos = getFieldInfos(); 00185 for(int i=0; i<width(); i++) 00186 if(infos[i].name==fieldname) 00187 return i; 00188 return -1; 00189 } 00190 00191 int VMatrix::getFieldIndex(const string& fieldname_or_num) const 00192 { 00193 int i = fieldIndex(fieldname_or_num); 00194 if(i==-1) 00195 i = toint(fieldname_or_num); 00196 if (i < 0 || i >= width()) 00197 PLERROR("In VMatrix::getFieldIndex - Asked for an unvalid column number"); 00198 return i; 00199 } 00200 00201 void VMatrix::build_() 00202 { 00203 if(metadatadir!="") 00204 setMetaDataDir(metadatadir); // make sure we perform all necessary operations 00205 } 00206 00207 void VMatrix::build() 00208 { 00209 inherited::build(); 00210 build_(); 00211 } 00212 00213 void VMatrix::printFieldInfo(ostream& out, int fieldnum) const 00214 { 00215 VMField fi = getFieldInfos(fieldnum); 00216 StatsCollector& s = getStats(fieldnum); 00217 00218 out << "Field #" << fieldnum << ": "; 00219 out << fi.name << "\t type: "; 00220 switch(fi.fieldtype) 00221 { 00222 case VMField::UnknownType: 00223 out << "UnknownType\n"; 00224 break; 00225 case VMField::Continuous: 00226 out << "Continuous\n"; 00227 break; 00228 case VMField::DiscrGeneral: 00229 out << "DiscrGeneral\n"; 00230 break; 00231 case VMField::DiscrMonotonic: 00232 out << "DiscrMonotonic\n"; 00233 break; 00234 case VMField::DiscrFloat: 00235 out << "DiscrFloat\n"; 00236 break; 00237 case VMField::Date: 00238 out << "Date\n"; 00239 break; 00240 default: 00241 PLERROR("Can't write name of type"); 00242 } 00243 00244 out.precision(12); 00245 out << "nmissing: " << s.nmissing() << '\n'; 00246 out << "nnonmissing: " << s.nnonmissing() << '\n'; 00247 out << "sum: " << s.sum() << '\n'; 00248 out << "mean: " << s.mean() << '\n'; 00249 out << "stddev: " << s.stddev() << '\n'; 00250 out << "min: " << s.min() << '\n'; 00251 out << "max: " << s.max() << '\n'; 00252 00253 /* 00254 if(!s.counts.empty()) 00255 { 00256 out << "\nCOUNTS: \n"; 00257 map<real,StatsCollectorCounts>::const_iterator it = s.counts.begin(); 00258 map<real,StatsCollectorCounts>::const_iterator countsend = s.counts.end(); 00259 while(it!=countsend) 00260 { 00261 real val = it->first; 00262 const StatsCollectorCounts& co = it->second; 00263 out << " " << val; 00264 string s = getValString(fieldnum, val); 00265 if(s!="") 00266 out << " " << s; 00267 out << " \t: n=" << co.n << "\t nbelow=" << co.nbelow << "\t sumbelow=" << co.sum << endl; 00268 ++it; 00269 } 00270 } 00271 */ 00272 out << endl << endl; 00273 } 00274 00275 void VMatrix::printFieldInfo(ostream& out, const string& fieldname_or_num) const 00276 { 00277 printFieldInfo(out, getFieldIndex(fieldname_or_num)); 00278 } 00279 00280 void VMatrix::printFields(ostream& out) const 00281 { 00282 for(int j=0; j<width(); j++) 00283 { 00284 printFieldInfo(out,j); 00285 out << "-----------------------------------------------------" << endl; 00286 } 00287 } 00288 00289 void VMatrix::getExample(int i, Vec& input, Vec& target, real& weight) 00290 { 00291 if(inputsize_<0) 00292 PLERROR("In VMatrix::getExample, inputsize_ not defined for this vmat"); 00293 input.resize(inputsize_); 00294 getSubRow(i,0,input); 00295 if(targetsize_<0) 00296 PLERROR("In VMatrix::getExample, targetsize_ not defined for this vmat"); 00297 target.resize(targetsize_); 00298 if (targetsize_ > 0) { 00299 getSubRow(i,inputsize_,target); 00300 } 00301 00302 if(weightsize_==0) 00303 weight = 1; 00304 else if(weightsize_<0) 00305 PLERROR("In VMatrix::getExample, weightsize_ not defined for this vmat"); 00306 else if(weightsize_>1) 00307 PLERROR("In VMatrix::getExample, weightsize_ >1 not supported by this call"); 00308 else 00309 weight = get(i,inputsize_+targetsize_); 00310 } 00311 00312 00313 void VMatrix::computeStats() 00314 { 00315 fieldstats = Array<VMFieldStat>(width()); 00316 Vec row(width()); 00317 for(int i=0; i<length(); i++) 00318 { 00319 getRow(i,row); 00320 for(int j=0; j<width(); j++) 00321 fieldstats[j].update(row[j]); 00322 } 00323 } 00324 00325 void VMatrix::loadStats(const string& filename) 00326 { 00327 ifstream in(filename.c_str()); 00328 if(!in) 00329 PLERROR("In VMatrix::loadStats Couldn't open file %s for reading",filename.c_str()); 00330 int nfields; 00331 in >> nfields; 00332 if(nfields!=width()) 00333 PLWARNING("In VMatrix::loadStats nfields differes from VMat width"); 00334 00335 fieldstats.resize(nfields); 00336 for(int j=0; j<fieldstats.size(); j++) 00337 fieldstats[j].read(in); 00338 } 00339 00340 void VMatrix::saveStats(const string& filename) const 00341 { 00342 ofstream out(filename.c_str()); 00343 if(!out) 00344 PLERROR("In VMatrix::saveStats Couldn't open file %s for writing",filename.c_str()); 00345 out << fieldstats.size() << endl; 00346 for(int j=0; j<fieldstats.size(); j++) 00347 { 00348 fieldstats[j].write(out); 00349 out << endl; 00350 } 00351 } 00352 00353 00354 string VMatrix::fieldheader(int elementcharwidth) 00355 { 00356 // Implementation not done yet 00357 00358 return "VMatrix::fieldheader NOT YET IMPLEMENTED"; 00359 } 00360 00361 00362 void VMatrix::declareField(int fieldindex, const string& fieldname, VMField::FieldType fieldtype) 00363 { getFieldInfos(fieldindex) = VMField(fieldname,fieldtype); } 00364 00365 void VMatrix::declareFieldNames(TVec<string> fnames) 00366 { 00367 if(fnames.length()!=width()) 00368 PLERROR("In VMatrix::declareFieldNames length of fnames differs from width() of VMatrix"); 00369 for(int i=0; i<fnames.length(); i++) 00370 declareField(i,fnames[i]); 00371 } 00372 00373 void VMatrix::saveFieldInfos() const 00374 { 00375 if(fieldinfos.size()==0) 00376 return; 00377 string filename = append_slash(getMetaDataDir()) + "fieldnames"; 00378 ofstream out(filename.c_str()); 00379 if(!out) 00380 PLERROR("In VMatrix::saveFieldInfos Couldn't open file %s for writing",filename.c_str()); 00381 for(int i= 0; i < fieldinfos.length(); ++i) 00382 out << fieldinfos[i].name << '\t' << fieldinfos[i].fieldtype << endl; 00383 } 00384 00385 void VMatrix::loadFieldInfos() const 00386 { 00387 string filename = append_slash(getMetaDataDir()) + "fieldnames"; 00388 ifstream in(filename.c_str()); 00389 if(!in) 00390 PLERROR("In VMatrix::loadFieldInfos Couldn't open file %s for reading",filename.c_str()); 00391 00392 int w = width(); 00393 fieldinfos.resize(w); 00394 for(int i= 0; i < w; ++i) 00395 { 00396 vector<string> v(split(pgetline(in))); 00397 switch(v.size()) 00398 { 00399 case 1: fieldinfos[i] = VMField(v[0]); break; 00400 case 2: fieldinfos[i] = VMField(v[0], VMField::FieldType(toint(v[1]))); break; 00401 default: PLERROR("In VMatrix::loadFieldInfos Format not recognized. Each line should be '<name> {<type>}'."); 00402 } 00403 } 00404 } 00405 00406 // comments: see .h 00407 string VMatrix::resolveFieldInfoLink(string target, string source) 00408 { 00409 string contents = removeblanks(loadFileAsString(source)); 00410 if(contents==source) 00411 return "ERROR"; 00412 if(isdir(contents)) 00413 { 00414 //just in case it lacks a slash.. 00415 contents+=slash; 00416 if(isfile(contents+target+".lnk")) 00417 return resolveFieldInfoLink(target,contents+target+".lnk"); 00418 else if(isfile(contents+target)) 00419 return contents+target; 00420 else if(isfile(contents+slash+"__default.lnk")) 00421 return resolveFieldInfoLink(target,contents+slash+"__default.lnk"); 00422 // assume target is there, but file is empty thus inexistant 00423 else return contents+target; 00424 } 00425 else if(contents.substr(contents.size()-4,4)==".lnk") 00426 return resolveFieldInfoLink(target,contents); 00427 else return contents; 00428 } 00429 00430 void VMatrix::setSFIFFilename(int col, string ext, string filepath) 00431 { 00432 setSFIFFilename(fieldName(col),ext,filepath); 00433 } 00434 00435 00436 void VMatrix::setSFIFFilename(string fieldname, string ext, string filepath) 00437 { 00438 string target = makeFileNameValid(fieldname+ext); 00439 string normalfname = getMetaDataDir()+"FieldInfo"+slash+target; 00440 rm(normalfname+".lnk"); 00441 if(filepath==normalfname || filepath=="") 00442 { 00443 rm(normalfname+".lnk"); 00444 return; 00445 } 00446 00447 force_mkdir_for_file(normalfname); 00448 ofstream o((normalfname+".lnk").c_str()); 00449 o<<filepath<<endl; 00450 } 00451 00452 string VMatrix::getSFIFFilename(int col, string ext) 00453 { 00454 return getSFIFFilename(fieldName(col),ext); 00455 } 00456 00457 string VMatrix::getSFIFFilename(string fieldname, string ext) 00458 { 00459 string target = makeFileNameValid(fieldname+ext); 00460 string normalfname = getMetaDataDir()+"FieldInfo"+slash+target; 00461 string defaultlinkfname = getMetaDataDir()+"FieldInfo"+slash+"__default.lnk"; 00462 if(isfile(normalfname)) 00463 return normalfname; 00464 else if(isfile(normalfname+".lnk")) 00465 return resolveFieldInfoLink(target, normalfname+".lnk"); 00466 else if(isfile(defaultlinkfname)) 00467 return resolveFieldInfoLink(target, defaultlinkfname); 00468 // assume target is here, but file is empty thus inexistant 00469 else return normalfname; 00470 } 00471 00472 bool VMatrix::isSFIFDirect(int col, string ext) 00473 { 00474 return isSFIFDirect(fieldName(col), ext); 00475 } 00476 00477 bool VMatrix::isSFIFDirect(string fieldname, string ext) 00478 { 00479 string target = makeFileNameValid(fieldname+ext); 00480 string normalfname = getMetaDataDir()+"FieldInfo"+slash+target; 00481 return getSFIFFilename(fieldname,ext) == normalfname; 00482 } 00483 00485 void VMatrix::addStringMapping(int col, string str, real val) 00486 { 00487 init_map_sr(); 00488 map_sr[col][str]=val; 00489 map_rs[col][val]=str; 00490 } 00491 00492 real VMatrix::addStringMapping(int col, string str) 00493 { 00494 init_map_sr(); 00495 map<string,real>& m = map_sr[col]; 00496 map<string,real>::iterator it = m.find(str); 00497 00498 real val = 0; 00499 if(it != m.end()) // str was found in map 00500 val = it->second; 00501 else // str not found in map: add a new mapping 00502 { 00503 val = -100 - m.size(); 00504 addStringMapping(col, str, val); 00505 } 00506 return val; 00507 } 00508 00509 void VMatrix::removeAllStringMappings() 00510 { 00511 init_map_sr(); 00512 for(int i=0;i<width();i++) 00513 { 00514 map_sr[i].clear(); 00515 map_rs[i].clear(); 00516 } 00517 } 00518 00519 void VMatrix::removeColumnStringMappings(int c) 00520 { 00521 init_map_sr(); 00522 map_sr[c].clear(); 00523 map_rs[c].clear(); 00524 } 00525 00527 // saveAllStringMappings // 00529 void VMatrix::saveAllStringMappings() 00530 { 00531 string fname; 00532 for(int i=0;i<width();i++) 00533 { 00534 fname = getSFIFFilename(i,".smap"); 00535 saveStringMappings(i,fname); 00536 } 00537 } 00538 00539 void VMatrix::saveStringMappings(int col,string fname) 00540 { 00541 map<string, real> the_map = getStringToRealMapping(col); 00542 if(the_map.size()==0) 00543 { 00544 rm(fname); 00545 return; 00546 } 00547 force_mkdir_for_file(fname); 00548 POFStream o(fname.c_str()); 00549 // ofstream o(fname.c_str()); 00550 if(!o) 00551 PLERROR( "File %s can't be opened",fname.c_str()); 00552 for(map<string,real>::iterator it = the_map.begin();it!=the_map.end();++it) 00553 o << it->first << it->second << endl; 00554 } 00555 00557 void VMatrix::removeStringMapping(int col, string str) 00558 { 00559 init_map_sr(); 00560 map<string,real>::iterator sriterator; 00561 // check if the mapping ractually exists 00562 if((sriterator = map_sr[col].find(str)) == map_sr[col].end()) 00563 return; 00564 real val = map_sr[col][str]; 00565 map_sr[col].erase(sriterator); 00566 map_rs[col].erase(map_rs[col].find(val)); 00567 } 00568 00570 void VMatrix::setStringMapping(int col, const map<string,real> & zemap) 00571 { 00572 init_map_sr(); 00573 map_sr[col]=zemap; 00574 map_rs[col].clear(); 00575 for(map<string,real>::iterator it = map_sr[col].begin();it!=map_sr[col].end();++it) 00576 map_rs[col][it->second]=it->first; 00577 } 00578 00580 void VMatrix::deleteStringMapping(int col) 00581 { 00582 init_map_sr(); 00583 if(col>=map_sr.size() || 00584 col>=map_rs.size()) 00585 PLERROR("deleteStringMapping : out of bounds for col=%i in string mapping array (size=%i).\n Current VMatrix\nclass"\ 00586 "is '%s' (or maybe derivated class?). be sure to set\n map_sr(rs) to appropriate sizes as soon as you know the width of the matrix\n"\ 00587 "(in constructor or elsewhere)",col,map_sr.size(),classname().c_str()); 00588 map_sr[col].clear(); 00589 map_rs[col].clear(); 00590 } 00591 00593 // getValString // 00595 string VMatrix::getValString(int col, real val) const 00596 { 00597 if(is_missing(val)) 00598 return ""; 00599 init_map_sr(); 00600 if(map_rs[col].find(val)==map_rs[col].end()) 00601 return ""; 00602 else return map_rs[col][val]; 00603 } 00604 00606 // getStringVal // 00608 real VMatrix::getStringVal(int col,const string & str) const 00609 { 00610 if(map_sr.length()==0 || map_sr[col].find(str)==map_sr[col].end()) 00611 return MISSING_VALUE; 00612 else return map_sr[col][str]; 00613 } 00614 00616 // getString // 00618 string VMatrix::getString(int row,int col) const 00619 { 00620 static string str; 00621 real val = get(row,col); 00622 str = getValString(col, val); 00623 if (str == "") 00624 // There is no string mapping associated to this value. 00625 return tostring(val); 00626 else 00627 return str; 00628 } 00629 00631 // getStringToRealMapping // 00633 const map<string,real>& VMatrix::getStringToRealMapping(int col) const { 00634 init_map_sr(); 00635 return map_sr[col]; 00636 } 00637 00639 // getRealToStringMapping // 00641 const map<real,string>& VMatrix::getRealToStringMapping(int col) const { 00642 init_map_sr(); 00643 return map_rs[col]; 00644 } 00645 00646 void VMatrix::setMetaDataDir(const string& the_metadatadir) 00647 { 00648 if(the_metadatadir=="") 00649 PLERROR("Called setMetaDataDir with an empty string"); 00650 metadatadir = the_metadatadir; 00651 if(!force_mkdir(metadatadir)) 00652 PLERROR("In VMatrix::setMetadataDir could not create directory %s",metadatadir.c_str()); 00653 metadatadir = abspath(metadatadir); 00654 } 00655 00657 // copySizesFrom // 00659 void VMatrix::copySizesFrom(VMat m) { 00660 defineSizes(m->inputsize(), m->targetsize(), m->weightsize()); 00661 } 00662 00664 // looksTheSameAs // 00666 bool VMatrix::looksTheSameAs(VMat m) { 00667 return !( 00668 this->width() != m->width() 00669 || this->length() != m->length() 00670 || this->inputsize() != m->inputsize() 00671 || this->weightsize() != m->weightsize() 00672 || this->targetsize() != m->targetsize() ); 00673 } 00674 00675 string getHost() 00676 { 00677 return "TODO"; 00678 } 00679 00680 int getPid() 00681 { 00682 return -999; 00683 } 00684 00685 string getUser() 00686 { 00687 return "TODO"; 00688 } 00689 00690 void VMatrix::lockMetaDataDir() const 00691 { 00692 if(!hasMetaDataDir()) 00693 PLERROR("In VMatrix::lockMetaDataDir(): metadatadir was not set"); 00694 if(lockf_!=0) // already locked by this object! 00695 PLERROR("VMatrix::lockMetaDataDir() called while already locked by this object."); 00696 if(!pathexists(metadatadir)) 00697 force_mkdir(metadatadir); 00698 string lockfile = append_slash(metadatadir)+".lock"; 00699 lockf_ = fopen(lockfile.c_str(),"w"); 00700 if(lockf_==0) 00701 { 00702 string bywho; 00703 try{ bywho = loadFileAsString(lockfile); } 00704 catch(...) { bywho = "UNKNOWN (could not read .lock file)"; } 00705 00706 cerr << "! Waiting for .lock on directory " << metadatadir 00707 << " created by " << bywho << endl; 00708 } 00709 do 00710 { 00711 // try again after a second 00712 sleep(1); 00713 lockf_ = fopen(lockfile.c_str(),"w"); 00714 } while(lockf_==0); 00715 00716 fprintf(lockf_, "host %s, pid %d, user %s", getHost().c_str(), getPid(), getUser().c_str()); 00717 fflush(lockf_); // Don't close it: to keep the lock! 00718 00719 } 00720 00721 void VMatrix::unlockMetaDataDir() const 00722 { 00723 if(lockf_==0) 00724 PLERROR("In VMatrix::unlockMetaDataDir() was called while no lock is held by this object"); 00725 fclose(lockf_); 00726 string lockfile = append_slash(metadatadir)+".lock"; 00727 rm(lockfile); 00728 lockf_ = 0; 00729 } 00730 00731 string VMatrix::getMetaDataDir() const 00732 { 00733 // if(!hasMetaDataDir()) 00734 // PLERROR("In VMatrix::getMetaDataDir(): metadatadir was not set"); 00735 return metadatadir; 00736 } 00737 00739 // loadAllStringMappings // 00741 void VMatrix::loadAllStringMappings() 00742 { 00743 // if this is a StrTableVMatrix, smap are already created 00744 if(classname()=="StrTableVMatrix") 00745 return; 00746 for(int i=0;i<width();i++) 00747 loadStringMapping(i); 00748 } 00749 00751 // loadStringMapping // 00753 void VMatrix::loadStringMapping(int col) 00754 // loads the appropriate string map file for column 'col' 00755 { 00756 if(!hasMetaDataDir()) 00757 return; 00758 string fname = getSFIFFilename(col,".smap"); 00759 init_map_sr(); 00760 force_mkdir(getMetaDataDir()+"FieldInfo"+slash); 00761 00762 if(!isfile(fname)) 00763 { 00764 // ofstream o(fname.c_str()); 00765 // if(o.bad()) 00766 // PLERROR( string("\nEmpty new file "+fname+" could not be created.\n (This is ony done to check consistency of path. File is deleted afterward.)").c_str()); 00767 // rm(fname); 00768 return; 00769 } 00770 00771 deleteStringMapping(col); 00772 00773 // smap file exists, open it 00774 PIFStream f(fname); 00775 if(!f) 00776 PLERROR( string("File "+fname+" cannot be opened.").c_str()); 00777 00778 /* string pref; 00779 f>>pref; 00780 if(string(pref)!="#SMAP") 00781 PLERROR( string("File "+fname+" is not a valid String mapping file.\nShould start with #SMAP on first line (this is to prevent inopportunely overwritting another type of file)").c_str()); 00782 */ 00783 while(f) 00784 { 00785 string s; 00786 real val; 00787 f >> s >> val; 00788 if(f) 00789 { 00790 map_sr[col][s]= val; 00791 map_rs[col][val]=s; 00792 } 00793 } 00794 } 00795 00797 // copyStringMappingsFrom // 00799 void VMatrix::copyStringMappingsFrom(VMat source) { 00800 if (width_ != source->width()) { 00801 PLERROR("In VMatrix::copyStringMappingsFrom - The source VMatrix doesn't have the same width"); 00802 } 00803 map_rs.resize(width_); 00804 map_sr.resize(width_); 00805 for (int i = 0; i < width_; i++) { 00806 setStringMapping(i, source->getStringToRealMapping(i)); 00807 } 00808 } 00809 00811 TVec<StatsCollector> VMatrix::getStats() const 00812 { 00813 if(!field_stats) 00814 { 00815 string statsfile = getMetaDataDir() + slash+"stats.psave"; 00816 if (isfile(statsfile) && getMtime()<mtime(statsfile)) 00817 { 00818 if(getMtime()==0) 00819 PLWARNING("Warning: using a saved stat file (%s) but mtime is 0.\n(cannot be sure file is up to date)",statsfile.c_str()); 00820 PLearn::load(statsfile, field_stats); 00821 } 00822 else 00823 { 00824 VMat vm = const_cast<VMatrix*>(this); 00825 field_stats = PLearn::computeStats(vm, 2000); 00826 PLearn::save(statsfile, field_stats); 00827 } 00828 } 00829 return field_stats; 00830 } 00831 00832 TVec<RealMapping> VMatrix::getRanges() 00833 { 00834 TVec<RealMapping> ranges; 00835 string rangefile = getMetaDataDir() + slash+"ranges.psave"; 00836 if(isfile(rangefile)) 00837 PLearn::load(rangefile, ranges); 00838 else 00839 { 00840 ranges = computeRanges(getStats(),std::max(10,length()/200),std::max(10,length()/100) ); 00841 PLearn::save(rangefile, ranges); 00842 } 00843 return ranges; 00844 } 00845 00846 /* 00848 PP<ConditionalStatsCollector> VMatrix::getConditionalStats(int condfield) 00849 { 00850 PP<ConditionalStatsCollector> condst; 00851 TVec<RealMapping> ranges = getRanges(); 00852 string condstatfile = getMetaDataDir() + slash+"stats" + tostring(condfield) + ".psave"; 00853 string rangefile = getMetaDataDir() + slash+"ranges.psave"; 00854 cerr << "rangefile: " << mtime(rangefile) << " condstatfile: " << mtime(condstatfile) << endl; 00855 if(mtime(rangefile)>mtime(condstatfile)) 00856 { 00857 cerr << ">> Computing conditional stats conditioned on field " << condfield << endl; 00858 cerr << " (because file " << rangefile << " was more recent than cache file " << condstatfile << ")" << endl; 00859 condst = computeConditionalStats(this, condfield, ranges); 00860 PLearn::save(condstatfile, *condst); 00861 } 00862 else 00863 PLearn::load(condstatfile, *condst); 00864 return condst; 00865 } 00866 */ 00867 // Eventually to be changed to pure virtual, once get has been implemented in all subclasses 00868 // calls to sample can then be replaced by getRow everywhere 00869 real VMatrix::get(int i, int j) const 00870 { 00871 PLERROR("get(i,j) method not implemented for this VMat (name=%s), please implement.",classname().c_str()); 00872 return 0.0; 00873 } 00874 00875 void VMatrix::put(int i, int j, real value) 00876 { 00877 PLERROR("put(i,j,value) method not implemented for this VMat, please implement."); 00878 } 00879 00880 void VMatrix::getColumn(int j, Vec v) const 00881 { 00882 #ifdef BOUNDCHECK 00883 if(v.length() != length()) 00884 PLERROR("In VMatrix::getColumn v must have the same length as the VMatrix"); 00885 #endif 00886 for(int i=0; i<v.length(); i++) 00887 v[i] = get(i,j); 00888 } 00889 00891 // getSubRow // 00893 void VMatrix::getSubRow(int i, int j, Vec v) const 00894 { 00895 for(int k=0; k<v.length(); k++) 00896 v[k] = get(i,j+k); 00897 } 00898 00899 void VMatrix::putSubRow(int i, int j, Vec v) 00900 { 00901 for(int k=0; k<v.length(); k++) 00902 put(i, j+k, v[k]); 00903 } 00904 00906 // getRow // 00908 void VMatrix::getRow(int i, Vec v) const 00909 { 00910 #ifdef BOUNDCHECK 00911 if(v.length() != width()) 00912 PLERROR("In VMatrix::getRow(i,v) length of v and width of VMatrix differ"); 00913 #endif 00914 getSubRow(i,0,v); 00915 } 00916 00917 void VMatrix::putRow(int i, Vec v) 00918 { 00919 #ifdef BOUNDCHECK 00920 if(v.length() != width()) 00921 PLERROR("In VMatrix::putRow(i,v) length of v and width of VMatrix differ"); 00922 #endif 00923 putSubRow(i,0,v); 00924 } 00925 00926 void VMatrix::fill(real value) 00927 { 00928 Vec v(width(), value); 00929 for (int i=0; i<length(); i++) putRow(i,v); 00930 } 00931 00932 void VMatrix::appendRow(Vec v) 00933 { 00934 PLERROR("This method (appendRow) not implemented by VMatrix subclass!"); 00935 } 00936 00937 void VMatrix::flush() 00938 {} 00939 00940 void VMatrix::putOrAppendRow(int i, Vec v) 00941 { 00942 if(i==length()) 00943 appendRow(v); 00944 else if(i<length()) 00945 putRow(i,v); 00946 else 00947 PLERROR("In putOrAppendRow, index %d out of range",i); 00948 } 00949 00950 void VMatrix::forcePutRow(int i, Vec v) 00951 { 00952 if(i<length()) 00953 putRow(i,v); 00954 else 00955 { 00956 Vec emptyrow(width()); 00957 emptyrow.clear(); 00958 while(length()<i) 00959 appendRow(emptyrow); 00960 appendRow(v); 00961 } 00962 } 00963 00964 void VMatrix::getMat(int i, int j, Mat m) const 00965 { 00966 #ifdef BOUNDCHECK 00967 if(i<0 || j<0 || i+m.length()>length() || j+m.width()>width()) 00968 PLERROR("In VMatrix::getMat(i,j,m) OUT OF BOUNDS"); 00969 #endif 00970 for(int ii=0; ii<m.length(); ii++) 00971 { 00972 Vec v = m(ii); 00973 getSubRow(i+ii, j, v); 00974 } 00975 } 00976 00977 void VMatrix::putMat(int i, int j, Mat m) 00978 { 00979 #ifdef BOUNDCHECK 00980 if(i<0 || j<0 || i+m.length()>length() || j+m.width()>width()) 00981 PLERROR("In VMatrix::putMat(i,j,m) OUT OF BOUNDS"); 00982 #endif 00983 for(int ii=0; ii<m.length(); ii++) 00984 { 00985 Vec v = m(ii); 00986 putSubRow(i+ii, j, v); 00987 } 00988 } 00989 00990 void VMatrix::compacify() {} 00991 00992 00993 Mat VMatrix::toMat() const 00994 { 00995 Mat m(length(),width()); 00996 getMat(0,0,m); 00997 return m; 00998 } 00999 01000 VMat VMatrix::subMat(int i, int j, int l, int w) 01001 { return new SubVMatrix(this,i,j,l,w); } 01002 01004 // dot // 01006 real VMatrix::dot(int i1, int i2, int inputsize) const 01007 { 01008 dotrow_1.resize(inputsize); 01009 dotrow_2.resize(inputsize); 01010 getSubRow(i1, 0, dotrow_1); 01011 getSubRow(i2, 0, dotrow_2); 01012 return PLearn::dot(dotrow_1, dotrow_2); 01013 } 01014 01015 real VMatrix::dot(int i, const Vec& v) const 01016 { 01017 dotrow_1.resize(v.length()); 01018 getSubRow(i, 0, dotrow_1); 01019 return PLearn::dot(dotrow_1, v); 01020 } 01021 01023 // getRow // 01025 void VMatrix::getRow(int i, VarArray& inputs) const 01026 { 01027 Vec v(width()); 01028 getRow(i,v); 01029 inputs << v; 01030 } 01031 01032 01034 // find // 01036 bool VMatrix::find(const Vec& input, real tolerance, int* i) const { 01037 get_row.resize(inputsize()); 01038 #ifdef BOUNDCHECK 01039 if (input.length() != inputsize()) 01040 PLERROR("In VMatrix::find - The given vector must be the same size as inputsize"); 01041 #endif 01042 for (int j = 0; j < length(); j++) { 01043 getSubRow(j, 0, get_row); 01044 if (powdistance(input, get_row, 2.0) < tolerance) { 01045 if (i) 01046 *i = j; 01047 return true; 01048 } 01049 } 01050 if (i) 01051 *i = -1; 01052 return false; 01053 } 01054 01055 void VMatrix::print(ostream& out) const 01056 { 01057 Vec v(width()); 01058 for(int i=0; i<length(); i++) 01059 { 01060 getRow(i,v); 01061 out << v << endl; 01062 } 01063 } 01064 01065 void VMatrix::oldwrite(ostream& out) const 01066 { 01067 writeHeader(out,"VMatrix"); 01068 writeField(out,"length_", length_); 01069 writeField(out,"width_", width_); 01070 //writeField(out,"fieldinfos", fieldinfos); 01071 //writeField(out,"fieldstats", fieldstats); 01072 writeFooter(out,"VMatrix"); 01073 } 01074 01075 void VMatrix::oldread(istream& in) 01076 { 01077 readHeader(in,"VMatrix"); 01078 readField(in,"length_", length_); 01079 readField(in,"width_", width_); 01080 //readField(in,"fieldinfos", fieldinfos); 01081 //readField(in,"fieldstats", fieldstats); 01082 readFooter(in,"VMatrix"); 01083 } 01084 01085 VMatrix::~VMatrix() 01086 {} 01087 01088 void VMatrix::save(const string& filename) const 01089 { savePMAT(filename); } 01090 01091 void VMatrix::savePMAT(const string& pmatfile) const 01092 { 01093 if (width() == -1) 01094 PLERROR("In VMat::save Saving in a pmat file is only possible for constant width Distributions (where width()!=-1)"); 01095 01096 int nsamples = length(); 01097 01098 FileVMatrix m(pmatfile,nsamples,width()); 01099 m.setFieldInfos(getFieldInfos()); 01100 Vec tmpvec(width()); 01101 01102 ProgressBar pb(cout, "Saving to pmat", nsamples); 01103 01104 for(int i=0; i<nsamples; i++) 01105 { 01106 getRow(i,tmpvec); 01107 m.putRow(i,tmpvec); 01108 pb(i); 01109 } 01110 01111 //save field names if necessary 01112 if (fieldinfos.size() > 0) m.saveFieldInfos(); 01113 } 01114 01115 void VMatrix::saveDMAT(const string& dmatdir) const 01116 { 01117 force_rmdir(dmatdir); 01118 DiskVMatrix vm(dmatdir,width()); 01119 vm.setFieldInfos(getFieldInfos()); 01120 Vec v(width()); 01121 01122 ProgressBar pb(cout, "Saving to dmat", length()); 01123 01124 for(int i=0;i<length();i++) 01125 { 01126 getRow(i,v); 01127 vm.appendRow(v); 01128 pb(i); 01129 //cerr<<i<<" "<<flush; 01130 } 01131 } 01132 01134 // saveAMAT // 01136 void VMatrix::saveAMAT(const string& amatfile, bool verbose, bool no_header) const 01137 { 01138 int l = length(); 01139 int w = width(); 01140 01141 ofstream out(amatfile.c_str()); 01142 if (!out) 01143 PLERROR("In saveAscii could not open file %s for writing",amatfile.c_str()); 01144 01145 if (!no_header) { 01146 out << "#size: "<< l << ' ' << w << endl; 01147 } 01148 out.precision(15); 01149 if(w>0 && !no_header) 01150 { 01151 out << "#: "; 01152 for(int k=0; k<w; k++) 01153 //there must not be any space in a field name... 01154 out << space_to_underscore(fieldName(k)) << ' '; 01155 out << "\n"; 01156 } 01157 01158 Vec v(w); 01159 01160 ProgressBar* pb = 0; 01161 if (verbose) 01162 pb = new ProgressBar(cout, "Saving to amat", length()); 01163 01164 for(int i=0;i<l;i++) 01165 { 01166 getRow(i,v); 01167 for(int j=0; j<w; j++) 01168 out << v[j] << ' '; 01169 out << "\n"; 01170 if (verbose) 01171 pb->update(i); 01172 } 01173 if (verbose) 01174 delete pb; 01175 } 01176 01177 // This will compute for this vmat m a result vector (whose length must be tha same as m's) 01178 // s.t. result[i] = ker( m(i).subVec(v1_startcol,v1_ncols) , v2) 01179 // i.e. the kernel value betweeen each (sub)row of m and v2 01180 void VMatrix::evaluateKernel(Ker ker, int v1_startcol, int v1_ncols, 01181 const Vec& v2, const Vec& result, int startrow, int nrows) const 01182 { 01183 int endrow = (nrows>0) ?startrow+nrows :length_; 01184 if(result.length() != endrow-startrow) 01185 PLERROR("In VMatrix::evaluateKernel length of result vector does not match the row range"); 01186 01187 Vec v1(v1_ncols); 01188 for(int i=startrow; i<endrow; i++) 01189 { 01190 getSubRow(i,v1_startcol,v1); 01191 result[i] = ker(v1,v2); 01192 } 01193 } 01194 01195 // returns sum_i [ ker( m(i).subVec(v1_startcol,v1_ncols) , v2) ] 01196 real VMatrix::evaluateKernelSum(Ker ker, int v1_startcol, int v1_ncols, 01197 const Vec& v2, int startrow, int nrows, int ignore_this_row) const 01198 { 01199 int endrow = (nrows>0) ?startrow+nrows :length_; 01200 double result = 0.; 01201 Vec v1(v1_ncols); 01202 for(int i=startrow; i<endrow; i++) 01203 if(i!=ignore_this_row) 01204 { 01205 getSubRow(i,v1_startcol,v1); 01206 result += ker(v1,v2); 01207 } 01208 return (real)result; 01209 } 01210 01211 // targetsum := sum_i [ m(i).subVec(t_startcol,t_ncols) * ker( m(i).subVec(v1_startcol,v1_ncols) , v2) ] 01212 // and returns sum_i [ ker( m(i).subVec(v1_startcol,v1_ncols) , v2) ] 01213 real VMatrix::evaluateKernelWeightedTargetSum(Ker ker, int v1_startcol, int v1_ncols, const Vec& v2, 01214 int t_startcol, int t_ncols, Vec& targetsum, int startrow, int nrows, int ignore_this_row) const 01215 { 01216 int endrow = (nrows>0) ?startrow+nrows :length_; 01217 targetsum.clear(); 01218 double result = 0.; 01219 Vec v1(v1_ncols); 01220 Vec target(t_ncols); 01221 for(int i=startrow; i<endrow; i++) 01222 if(i!=ignore_this_row) 01223 { 01224 getSubRow(i,v1_startcol,v1); 01225 getSubRow(i,t_startcol,target); 01226 real kerval = ker(v1,v2); 01227 result += kerval; 01228 multiplyAcc(targetsum, target,kerval); 01229 } 01230 return (real)result; 01231 } 01232 01233 TVec< pair<real,int> > VMatrix::evaluateKernelTopN(int N, Ker ker, int v1_startcol, int v1_ncols, 01234 const Vec& v2, int startrow, int nrows, int ignore_this_row) const 01235 { 01236 int endrow = (nrows>0) ?startrow+nrows :length_; 01237 TopNI<real> extrema(N); 01238 Vec v1(v1_ncols); 01239 for(int i=startrow; i<endrow; i++) 01240 if(i!=ignore_this_row) 01241 { 01242 getSubRow(i,v1_startcol,v1); 01243 real kerval = ker(v1,v2); 01244 extrema.update(kerval,i); 01245 } 01246 extrema.sort(); 01247 return extrema.getTopN(); 01248 } 01249 01250 TVec< pair<real,int> > VMatrix::evaluateKernelBottomN(int N, Ker ker, int v1_startcol, int v1_ncols, 01251 const Vec& v2, int startrow, int nrows, int ignore_this_row) const 01252 { 01253 int endrow = (nrows>0) ?startrow+nrows :length_; 01254 BottomNI<real> extrema(N); 01255 Vec v1(v1_ncols); 01256 for(int i=startrow; i<endrow; i++) 01257 if(i!=ignore_this_row) 01258 { 01259 getSubRow(i,v1_startcol,v1); 01260 real kerval = ker(v1,v2); 01261 extrema.update(kerval,i); 01262 } 01263 extrema.sort(); 01264 return extrema.getBottomN(); 01265 } 01266 01267 // result += transpose(X).Y 01268 // Where X = this->subMatColumns(X_startcol,X_ncols) 01269 // and Y = this->subMatColumns(Y_startcol,Y_ncols); 01270 void VMatrix::accumulateXtY(int X_startcol, int X_ncols, int Y_startcol, int Y_ncols, 01271 Mat& result, int startrow, int nrows, int ignore_this_row) const 01272 { 01273 int endrow = (nrows>0) ?startrow+nrows :length_; 01274 Vec x(X_ncols); 01275 Vec y(Y_ncols); 01276 for(int i=startrow; i<endrow; i++) 01277 if(i!=ignore_this_row) 01278 { 01279 getSubRow(i,X_startcol,x); 01280 getSubRow(i,Y_startcol,y); 01281 externalProductAcc(result, x,y); 01282 } 01283 } 01284 01285 // result += transpose(X).Y 01286 // Where X = this->subMatColumns(X_startcol,X_ncols) 01287 void VMatrix::accumulateXtX(int X_startcol, int X_ncols, 01288 Mat& result, int startrow, int nrows, int ignore_this_row) const 01289 { 01290 Vec x(X_ncols); 01291 int endrow = (nrows>0) ?startrow+nrows :length_; 01292 for(int i=startrow; i<endrow; i++) 01293 if(i!=ignore_this_row) 01294 { 01295 getSubRow(i,X_startcol,x); 01296 externalProductAcc(result, x,x); 01297 } 01298 } 01299 01300 void VMatrix::evaluateSumOfFprop(Func f, Vec& output_result, int nsamples) 01301 { 01302 //if (f->outputs.size()!=1) 01303 // PLERROR("In evaluateSumOfFprop: function must have a single variable output (maybe you can concat the vars into a single one, if this is really what you want)"); 01304 01305 static int curpos = 0; 01306 if (nsamples == -1) nsamples = length(); 01307 Vec input_value(width()); 01308 Vec output_value(output_result.length()); 01309 01310 f->recomputeParents(); 01311 output_result.clear(); 01312 01313 for(int i=0; i<nsamples; i++) 01314 { 01315 getRow(curpos++, input_value); 01316 f->fprop(input_value, output_value); 01317 output_result += output_value; 01318 if(curpos == length()) curpos = 0; 01319 } 01320 } 01321 01322 void VMatrix::evaluateSumOfFbprop(Func f, Vec& output_result, Vec& output_gradient, int nsamples) 01323 { 01324 // if(f->outputs.size()!=1) 01325 // PLERROR("In evaluateSumOfFprop: function must have a single variable output (maybe you can concat the vars into a single one, if this is really what you want)"); 01326 01327 static int curpos = 0; 01328 if (nsamples == -1) nsamples = length(); 01329 Vec input_value(width()); 01330 Vec input_gradient(width()); 01331 Vec output_value(output_result.length()); 01332 01333 f->recomputeParents(); 01334 output_result.clear(); 01335 01336 for(int i=0; i<nsamples; i++) 01337 { 01338 getRow(curpos++, input_value); 01339 f->fbprop(input_value, output_value, input_gradient, output_gradient); 01340 //displayFunction(f, true); 01341 output_result += output_value; 01342 if(curpos == length()) curpos = 0; 01343 } 01344 } 01345 01346 01347 } // end of namespace PLearn

Generated on Tue Aug 17 16:10:41 2004 for PLearn by doxygen 1.3.7