Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

VMatLanguage.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 2002 Pascal Vincent and Julien Keable 00005 // 00006 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 00036 00037 00038 /* ******************************************************* 00039 * $Id: VMatLanguage.cc,v 1.28 2004/07/29 13:04:22 tihocan Exp $ 00040 * This file is part of the PLearn library. 00041 ******************************************************* */ 00042 00043 #include "VMatLanguage.h" 00044 #include <plearn/base/stringutils.h> 00045 #include <plearn/io/fileutils.h> 00046 #include <plearn/db/getDataSet.h> 00047 #include <plearn/base/PDate.h> 00048 00049 namespace PLearn { 00050 using namespace std; 00051 00052 bool VMatLanguage::output_preproc=false; 00053 00054 // returns oldest modification date of a file containing VPL code, searching recursively every 00055 // file placed after a INCLUDE token 00056 time_t getDateOfCode(const string& codefile) 00057 { 00058 time_t latest = mtime(codefile); 00059 string token; 00060 ifstream in(codefile.c_str()); 00061 if(in.bad()) 00062 PLERROR("Cannot open file : %s",codefile.c_str()); 00063 00064 in >> token; 00065 while(!in.eof()) 00066 { 00067 if(token=="INCLUDE") 00068 { 00069 in >> token; 00070 time_t t=getDateOfCode(token); 00071 if(t>latest) 00072 latest=t; 00073 } 00074 in >> token; 00075 } 00076 return latest; 00077 } 00078 00079 map<string, int> VMatLanguage::opcodes; 00080 00081 PLEARN_IMPLEMENT_OBJECT(VMatLanguage, "ONE LINE DESCR", "NO HELP"); 00082 00083 void 00084 VMatLanguage::build() 00085 { 00086 inherited::build(); 00087 build_(); 00088 } 00089 00090 void 00091 VMatLanguage::build_() 00092 { 00093 build_opcodes_map(); 00094 } 00095 00096 void 00097 VMatLanguage::declareOptions(OptionList &ol) 00098 { 00099 declareOption(ol, "vmsource", &VMatLanguage::vmsource, OptionBase::buildoption, ""); 00100 inherited::declareOptions(ol); 00101 } 00102 00103 // this function (that really should be sliced to to smaller pieces someday) takes raw VPL code and 00104 // returns the preprocessed sourcecode along with the defines and the fieldnames it generated 00105 void VMatLanguage::preprocess(istream& in, map<string, string>& defines, string& processed_sourcecode, vector<string>& fieldnames) 00106 { 00107 // PStream pout(&cout); 00108 // pout << defines << endl; 00109 char buf[500]; 00110 string token; 00111 size_t spos; 00112 map<string,string>::iterator pos; 00113 while(in >> token) 00114 { 00115 pos=defines.find(token); 00116 00117 // are we sitting on a mapping declaration? 00118 if(token[0]=='{') 00119 { 00120 //skip mapping to avoid brackets conflicts with fieldcopy macro syntax 00121 char car; 00122 processed_sourcecode+=token; 00123 // if the token is only a part of the mapping... 00124 if(token.find("}")==string::npos) 00125 { 00126 // just eat till the end of the mapping 00127 while((car=in.get())!='}' && !in.eof()) 00128 processed_sourcecode+=car; 00129 processed_sourcecode+="}"; 00130 } 00131 } 00132 // did we find a fieldName declaration? 00133 // format is either :myField or :myField:a:b 00134 else if(token[0]==':') 00135 { 00136 if(isBlank(token.substr(1))) 00137 PLERROR("Found a ':' with no fieldname. Do not put a whitespace after the ':'"); 00138 vector<string> parts=split(token,":"); 00139 if(parts.size()==3) 00140 { 00141 int a=toint(parts[1]); 00142 int b=0; 00143 // let the chance for the second interval boundary to be a "DEFINE" 00144 // this is used with onehot and @myfield.ranges10.nbins 00145 // ie: @myfield.onehot10 :myfieldonehot:0:@myfield.ranges10.nbins 00146 if(pl_isnumber(parts[2])) 00147 b=toint(parts[2]); 00148 else 00149 { 00150 if(defines.find(parts[2])!=defines.end()) 00151 b=toint(defines[parts[2]]); 00152 else 00153 PLERROR("found a undefined non-numeric boundary in multifield declaration : '%s'",parts[2].c_str()); 00154 } 00155 00156 for(int i=a;i<=b;i++) 00157 fieldnames.push_back(parts[0]+tostring(i)); 00158 } 00159 else if (parts.size()==1) 00160 fieldnames.push_back(token.substr(1)); 00161 else PLERROR("Strange fieldname format (multiple declaration format is :label:0:10"); 00162 } 00163 // did we find a fieldcopy macro? 00164 else if(token[0]=='[') 00165 { 00166 vector<string> parts=split(token.substr(1),":]"); 00167 00168 // fieldcopy macro type is [start,end] 00169 // fields can be refered to as %number or @name 00170 if(parts.size()==2) 00171 { 00172 string astr=parts[0].substr(1); 00173 string bstr=parts[1].substr(1); 00174 int a=-1,b=-1; 00175 00176 if(parts[0][0]=='@') 00177 { 00178 for(int i=0;i<vmsource.width();i++) 00179 if(vmsource->fieldName(i)==astr){a=i;break;} 00180 } 00181 else if(parts[0][0]=='%') 00182 a=toint(parts[0].substr(1)); 00183 else PLERROR("fieldcopy macro syntax is : [start:end] EG: [@year:%6]. 'end' must be after 'start'.. OR [field] to copy a single field"); 00184 00185 if(parts[1][0]=='@') 00186 { 00187 for(int i=0;i<vmsource.width();i++) 00188 if(vmsource->fieldName(i)==bstr){b=i;break;} 00189 } 00190 else if(parts[1][0]=='%') 00191 b=toint(parts[1].substr(1)); 00192 else PLERROR("fieldcopy macro syntax is : [start:end] EG: [@year:%6]. 'end' must be after 'start'.. OR [field] to copy a single field"); 00193 00194 if(a>b) 00195 PLERROR("In copyfield macro, you have specified a start field that is after the end field. Eg : [%10:%5]"); 00196 if(a==-1) 00197 PLERROR("In copyfield macro, unknown field :%s",astr.c_str()); 00198 if(b==-1) 00199 PLERROR("In copyfield macro, unknown field :%s",astr.c_str()); 00200 00201 for(int i=a;i<=b;i++) 00202 { 00203 processed_sourcecode+=string("%")+tostring(i)+ " "; 00204 fieldnames.push_back(vmsource->fieldName(i)); 00205 } 00206 } 00207 // fieldcopy macro type is [field] 00208 else if(parts.size()==1) 00209 { 00210 string astr=parts[0].substr(1); 00211 int a=-1; 00212 if(parts[0][0]=='@') 00213 { 00214 for(int i=0;i<vmsource.width();i++) 00215 if(vmsource->fieldName(i)==astr){a=i;break;} 00216 } 00217 else if(parts[0][0]=='%') 00218 a=toint(parts[0].substr(1)); 00219 else PLERROR("fieldcopy macro syntax is : [start:end] EG: [@year:%6]. 'end' must be after 'start'.. OR [field] to copy a single field"); 00220 if(a==-1) 00221 PLERROR("In copyfield macro, unknown field :%s",astr.c_str()); 00222 processed_sourcecode+=string("%")+tostring(a)+ " "; 00223 fieldnames.push_back(vmsource->fieldName(a)); 00224 } 00225 else PLERROR("Strange fieldcopy format. e.g : [%0:%5]. Found parts %s",join(parts," ").c_str()); 00226 } 00227 00228 // did we find a comment? 00229 else if(token[0]=='#') 00230 skipRestOfLine(in); 00231 00232 // include declaration 00233 else if(token=="INCLUDE") 00234 { 00235 in >> token; 00236 // Try to be intelligent and find out if the file belongs directly to another .?mat (the case of a 00237 // stats file for example) and warn if the file is out of date 00238 00239 // Mhhh.. is this still pertinent? This "stats" and "bins" thing is semi-standard I think 00240 size_t idx_meta = token.find(".metadata"); 00241 size_t idx_stats = token.find("stats."); 00242 size_t idx_bins = token.find("bins."); 00243 if(idx_meta!=string::npos && (idx_stats!=string::npos || idx_bins!=string::npos)) 00244 { 00245 string file=token.substr(0,idx_meta); 00246 if(getDataSetDate(file) > mtime(token)) 00247 PLWARNING("File %s seems out of date with parent matrix %s",token.c_str(),file.c_str()); 00248 } 00249 00250 ifstream incfile(token.c_str()); 00251 if(incfile.bad()) 00252 PLERROR("Cannot open file %s\n",token.c_str()); 00253 // process recursively this included file 00254 // **POSSIBLE DRAWBACK : defines done in this file will be used in the next recursion level 00255 preprocess(incfile,defines, processed_sourcecode,fieldnames); 00256 00257 } 00258 // define declaration 00259 else if(token=="DEFINE") 00260 { 00261 in >> token; 00262 in.getline(buf,500); 00263 defines[token.c_str()]=string(buf); 00264 } 00265 else if(pos!=defines.end()) 00266 { 00267 // the token is a macro (define) so we process it recursively until it's stable 00268 // (necessary since the define macro can use defines recursively) 00269 string oldstr=pos->second,newstr; 00270 bool unstable=true; 00271 while(unstable) 00272 { 00273 istrstream strm(oldstr.c_str()); 00274 newstr=""; 00275 preprocess(strm,defines,newstr,fieldnames); 00276 if(removeblanks(oldstr)==removeblanks(newstr)) 00277 unstable=false; 00278 oldstr=newstr; 00279 } 00280 processed_sourcecode+=newstr + " "; 00281 } 00282 // did we find a reference to a string value of a VMatrix that has overloaded getStringVal(..) e.g.:StrTableVMatrix 00283 // In VPL, you can push on the stack the value of a string according to the string map of a particular column 00284 // e.g. : to push value of string "WBush" from field MostSuspectAmericanPresidents, write @MostSuspectsAmericanPresidents."WBush" 00285 else if ((token[0]=='@' || token[0]=='%') && token[token.length()-1]=='"' && (spos=token.find(".\""))!=string::npos) 00286 00287 { 00288 string colname=token.substr(1,spos-1); 00289 string str=token.substr(spos+2,token.length()-spos-3); 00290 // do we have a named field reference? 00291 if(token[0]=='@') 00292 { 00293 pos=defines.find(string("@")+colname); 00294 if(pos==defines.end()) 00295 PLERROR("unknown field : %s",colname.c_str()); 00296 colname=pos->second.substr(1); 00297 } 00298 int colnum=toint(colname); 00299 real r=vmsource->getStringVal(colnum,str); 00300 if(is_missing(r)) 00301 PLERROR("%s : %s is not a known string for this field",token.c_str(),str.c_str()); 00302 processed_sourcecode+=tostring(r)+" "; 00303 } 00304 else processed_sourcecode+=token + " "; 00305 } 00306 } 00307 00308 // generate bytecode from preprocessed sourcecode 00309 void VMatLanguage::generateCode(istream& processed_sourcecode) 00310 { 00311 char car; 00312 string token; 00313 map<string,int>::iterator pos; 00314 car=peekAfterSkipBlanks(processed_sourcecode); 00315 while(!processed_sourcecode.eof()) 00316 { 00317 if (car=='{') 00318 { 00319 int mapnum = mappings.size(); 00320 mappings.resize(mapnum+1); 00321 mappings[mapnum].read(processed_sourcecode); 00322 program.append(opcodes["__applymapping"]); 00323 program.append(mapnum); 00324 } 00325 else 00326 { 00327 processed_sourcecode>>token; 00328 if( pl_isnumber(token)) 00329 // assume we have a float 00330 { 00331 float zefloat=tofloat(token); 00332 program.append(opcodes["__insertconstant"]); 00333 program.append(*(int*)&zefloat); 00334 } 00335 else if (token[0]=='%') 00336 { 00337 vector<string> parts=split(token,":"); 00338 if (parts.size()==1) // expecting e.g. %10 for column 10 00339 { 00340 program.append(opcodes["__getfieldval"]); 00341 int val=toint(token.substr(1)); 00342 program.append(val); 00343 } 00344 else if (parts.size()==2) // expecting e.g. %10-%20 for columns 10 to 20 inclusive 00345 { 00346 program.append(opcodes["__getfieldsrange"]); 00347 int a=toint(parts[0].substr(1)); 00348 int b=toint(parts[1].substr(1)); 00349 program.append(a); 00350 program.append(b); 00351 } 00352 } 00353 else 00354 { 00355 pos=opcodes.find(token); 00356 if(pos!=opcodes.end()) 00357 program.append(pos->second); 00358 else PLERROR("Undefined keyword : %s",token.c_str()); 00359 } 00360 } 00361 car=peekAfterSkipBlanks(processed_sourcecode); 00362 } 00363 } 00364 00365 void VMatLanguage::generateCode(const string& processed_sourcecode) 00366 { 00367 istrstream in(processed_sourcecode.c_str()); 00368 generateCode(in); 00369 } 00370 00371 //void declareField(int fieldindex, const string& fieldname, VMField::FieldType fieldtype=VMField::UnknownType) 00372 00373 void VMatLanguage::compileString(const string & code, vector<string>& fieldnames) 00374 { 00375 istrstream in(code.c_str()); 00376 compileStream(in,fieldnames); 00377 } 00378 00379 void VMatLanguage::compileFile(const string & filename, vector<string>& fieldnames) 00380 { 00381 ifstream in(filename.c_str()); 00382 if(in.bad()) 00383 PLERROR("Cannot open file %s",filename.c_str()); 00384 compileStream(in,fieldnames); 00385 } 00386 00387 void VMatLanguage::compileStream(istream & in, vector<string>& fieldnames) 00388 { 00389 map<string,string> defines; 00390 string processed_sourcecode; 00391 00392 program.resize(0); 00393 mappings.resize(0); 00394 00395 // first, warn user if a fieldname appears twice or more in the source matrix 00396 for(int i=0;i<vmsource.width();i++) 00397 { 00398 if(defines.find(string("@")+vmsource->fieldName(i)) != defines.end()) 00399 PLERROR("fieldname %s is duplicate in processed matrix",(string("@")+vmsource->fieldName(i)).c_str()); 00400 defines[string("@")+vmsource->fieldName(i)]=string("%")+tostring(i); 00401 } 00402 00403 // the filednames parameter is an output vector in which we put the fieldnames of the final VMat 00404 fieldnames.clear(); 00405 preprocess(in, defines, processed_sourcecode, fieldnames); 00406 00407 if(output_preproc) 00408 { 00409 cerr<<"Preprocessed code:"<<endl<<processed_sourcecode<<endl; 00410 cerr<<"FieldNames : "<<endl<<fieldnames<<endl; 00411 } 00412 generateCode(processed_sourcecode); 00413 } 00414 00416 void VMatLanguage::build_opcodes_map() 00417 { 00418 if(opcodes.empty()) 00419 { 00420 opcodes["__insertconstant"] = 0; // followed by a floating point number (4 bytes, just like int) 00421 opcodes["__getfieldval"] = 1; // followed by field# 00422 opcodes["__applymapping"] = 2; // followed by mapping# 00423 opcodes["pop"] = 3; 00424 opcodes["dup"] = 4; 00425 opcodes["exch"] = 5; 00426 opcodes["onehot"] = 6; 00427 opcodes["+"] = 7; 00428 opcodes["-"] = 8; 00429 opcodes["*"] = 9; 00430 opcodes["/"] = 10; 00431 opcodes["=="] = 11; 00432 opcodes["!="] = 12; 00433 opcodes[">"] = 13; 00434 opcodes[">="] = 14; 00435 opcodes["<"] = 15; 00436 opcodes["<="] = 16; 00437 opcodes["and"] = 17; 00438 opcodes["or"] = 18; 00439 opcodes["not"] = 19; 00440 opcodes["ifelse"] = 20; 00441 opcodes["fabs"] = 21; 00442 opcodes["rint"] = 22; 00443 opcodes["floor"] = 23; 00444 opcodes["ceil"] = 24; 00445 opcodes["log"] = 25; 00446 opcodes["exp"] = 26; 00447 opcodes["rowindex"] = 27; // pushes the rownum on the stack 00448 opcodes["isnan"] = 28; 00449 opcodes["year"] = 29; // from format CYYMMDD -> YYYY 00450 opcodes["month"] = 30; // CYYMMDD -> MM 00451 opcodes["day"] = 31; // CYYMMDD -> DD 00452 opcodes["daydiff"] = 32; // nb. days 00453 opcodes["monthdiff"] = 33; // continuous: nb. days / (365.25/12) 00454 opcodes["yeardiff"] = 34; // continuous: nb. days / 365.25 00455 opcodes["year_month_day"] = 35; // CYYMMDD -> YYYY MM DD 00456 opcodes["todate"] = 36; // YYYY MM DD -> CYYMMDD 00457 opcodes["dayofweek"] = 37; // from CYYMMDD -> [0..6] (0=monday; 6=sunday) 00458 opcodes["today"] = 38; // today's date CYYMMDD 00459 opcodes["date2julian"] = 39; // CYYMMDD -> nb. days 00460 opcodes["julian2date"] = 40; // nb. days -> CYYMMDD 00461 opcodes["min"] = 41; // a b -> (a<b? a : b) 00462 opcodes["max"] = 42; // a b -> (a<b? b : a) 00463 opcodes["sqrt"] = 43; 00464 opcodes["^"] = 44; 00465 opcodes["mod"] = 45; 00466 opcodes["vecscalmul"] = 46; // x1 ... xn n alpha --> (x1*alpha) ... (xn*alpha) 00467 opcodes["__getfieldsrange"] = 47; // %N:%M pushes field %N up to %M. M must be >= N. 00468 opcodes["select"] = 48; // v0 v1 v2 v3 ... vn-1 n i --> vi 00469 opcodes["length"] = 49; // the length of the currently processed column. 00470 opcodes["sign"] = 50; // a --> sign(a) (0 -1 or +1) 00471 opcodes["get"] = 51; // pos --> value_of_stack_at_pos (if pos is negative then it's relative to stacke end ex: -1 get will get the previous element of stack) 00472 opcodes["memput"] = 52; // a mempos --> ( a is saved in memory position mempos) 00473 opcodes["memget"] = 53; // mempos --> val ( gets val from memory in position mempos) 00474 opcodes["neg"] = 54; // a --> -a 00475 opcodes["missing"] = 55; // a missing value 00476 } 00477 } 00478 00479 void VMatLanguage::run(const Vec& srcvec, const Vec& result, int rowindex) const 00480 { 00481 real a,b,c; 00482 if(srcvec.length()!=vmsource.width()) 00483 PLERROR("In VMatLanguage::run, srcvec should have length %d, not %d.",vmsource.width(),srcvec.length()); 00484 pstack.resize(0); 00485 TVec<int>::iterator pptr = program.begin(); 00486 TVec<int>::iterator pptrend = program.end(); 00487 real* pfieldvalues = srcvec.data(); 00488 while(pptr!=pptrend) 00489 { 00490 int op = *pptr++; 00491 switch(op) 00492 { 00493 case 0: // insertconstant 00494 pstack.push(*((float*)pptr++)); 00495 break; 00496 case 1: // getfieldval 00497 //if(*pptr > fieldvalues.width()) PLERROR("Tried to acces an out of bound field in VPL code"); 00498 pstack.push(pfieldvalues[*pptr++]); 00499 break; 00500 case 2: // applymapping 00501 pstack.push(mappings[*pptr++].map(pstack.pop())); 00502 break; 00503 case 3: // pop 00504 pstack.pop(); 00505 break; 00506 case 4: // dup 00507 pstack.push(pstack.top()); 00508 break; 00509 case 5: // exch 00510 b = pstack.pop(); 00511 a = pstack.pop(); 00512 pstack.push(b); 00513 pstack.push(a); 00514 break; 00515 case 6: // onehot 00516 { 00517 int nclasses = int(pstack.pop()); 00518 int index = int(pstack.pop()); 00519 for(int i=0; i<nclasses; i++) 00520 pstack.push(i==index ?1 :0); 00521 } 00522 break; 00523 case 7: // + 00524 b = pstack.pop(); 00525 a = pstack.pop(); 00526 pstack.push(a+b); 00527 break; 00528 case 8: // - 00529 b = pstack.pop(); 00530 a = pstack.pop(); 00531 pstack.push(a-b); 00532 break; 00533 case 9: // * 00534 b = pstack.pop(); 00535 a = pstack.pop(); 00536 pstack.push(a*b); 00537 break; 00538 case 10: // / 00539 b = pstack.pop(); 00540 a = pstack.pop(); 00541 pstack.push(a/b); 00542 break; 00543 case 11: // == 00544 b = pstack.pop(); 00545 a = pstack.pop(); 00546 pstack.push( ((float)a==(float)b) ?1 :0); 00547 break; 00548 case 12: // != 00549 b = pstack.pop(); 00550 a = pstack.pop(); 00551 pstack.push(((float)a!=(float)b) ?1 :0); 00552 break; 00553 case 13: // > 00554 b = pstack.pop(); 00555 a = pstack.pop(); 00556 pstack.push(((float)a>(float)b) ?1 :0); 00557 break; 00558 case 14: // >= 00559 b = pstack.pop(); 00560 a = pstack.pop(); 00561 pstack.push(((float)a>=(float)b) ?1 :0); 00562 break; 00563 case 15: // < 00564 b = pstack.pop(); 00565 a = pstack.pop(); 00566 pstack.push(((float)a<(float)b) ?1 :0); 00567 break; 00568 case 16: // <= 00569 b = pstack.pop(); 00570 a = pstack.pop(); 00571 pstack.push(((float)a<=(float)b) ?1 :0); 00572 break; 00573 case 17: // and 00574 b = pstack.pop(); 00575 a = pstack.pop(); 00576 pstack.push((a&&b) ?1 :0); 00577 break; 00578 case 18: // or 00579 b = pstack.pop(); 00580 a = pstack.pop(); 00581 pstack.push((a||b) ?1 :0); 00582 break; 00583 case 19: // not 00584 pstack.push((pstack.pop()==0) ?1 :0); 00585 break; 00586 case 20: // ifelse 00587 c = pstack.pop(); 00588 b = pstack.pop(); 00589 a = pstack.pop(); 00590 pstack.push((a!=0)?b:c); 00591 break; 00592 case 21: // fabs 00593 pstack.push(fabs(pstack.pop())); 00594 break; 00595 case 22: // rint 00596 pstack.push(rint(pstack.pop())); 00597 break; 00598 case 23: // floor 00599 pstack.push(floor(pstack.pop())); 00600 break; 00601 case 24: // ceil 00602 pstack.push(ceil(pstack.pop())); 00603 break; 00604 case 25: // log 00605 pstack.push(log(pstack.pop())); 00606 break; 00607 case 26: // exp 00608 pstack.push(exp(pstack.pop())); 00609 break; 00610 case 27: // rowindex 00611 pstack.push(real(rowindex)); 00612 break; 00613 case 28: // isnan 00614 pstack.push(isnan(pstack.pop())?1:0); 00615 break; 00616 case 29: //year 00617 pstack.push(float_to_date(pstack.pop()).year); 00618 break; 00619 case 30: //month 00620 pstack.push(float_to_date(pstack.pop()).month); 00621 break; 00622 case 31: //day 00623 pstack.push(float_to_date(pstack.pop()).day); 00624 break; 00625 case 32: //daydiff 00626 b= pstack.pop(); 00627 a= pstack.pop(); 00628 if (!isnan(a) && !isnan(b)) { 00629 pstack.push(float_to_date(a)-float_to_date(b)); 00630 } 00631 else { 00632 pstack.push(MISSING_VALUE); 00633 } 00634 break; 00635 case 33: //monthdiff 00636 b= pstack.pop(); 00637 a= pstack.pop(); 00638 pstack.push((float_to_date(a)-float_to_date(b))*(12.0/365.25)); 00639 break; 00640 case 34: //yeardiff 00641 b= pstack.pop(); 00642 a= pstack.pop(); 00643 if (is_missing(a) || is_missing(b)) 00644 pstack.push(MISSING_VALUE); 00645 else 00646 pstack.push((float_to_date(a)-float_to_date(b))/365.25); 00647 break; 00648 case 35: //year_month_day 00649 { 00650 PDate d(float_to_date(pstack.pop())); 00651 pstack.push(d.year); 00652 pstack.push(d.month); 00653 pstack.push(d.day); 00654 } 00655 break; 00656 case 36: //todate 00657 c = pstack.pop(); 00658 b = pstack.pop(); 00659 a = pstack.pop(); 00660 pstack.push(date_to_float(PDate((int)a, (int)b, (int)c))); 00661 break; 00662 case 37: //dayofweek 00663 pstack.push(float_to_date(pstack.pop()).dayOfWeek()); 00664 break; 00665 case 38: //today 00666 pstack.push(date_to_float(PDate::today())); 00667 break; 00668 case 39: //date2julian 00669 pstack.push(float_to_date(pstack.pop()).toJulianDay()); 00670 break; 00671 case 40: //julian2date 00672 pstack.push(date_to_float(PDate((int)pstack.pop()))); 00673 break; 00674 case 41: //min 00675 a= pstack.pop(); 00676 b= pstack.pop(); 00677 pstack.push(a<b? a : b); 00678 break; 00679 case 42: //max 00680 a= pstack.pop(); 00681 b= pstack.pop(); 00682 pstack.push(a<b? b : a); 00683 break; 00684 case 43: // sqrt 00685 pstack.push(sqrt(pstack.pop())); 00686 break; 00687 case 44: // ^ 00688 a= pstack.pop(); 00689 b= pstack.pop(); 00690 pstack.push(pow(a,b)); 00691 break; 00692 case 45: // mod 00693 a= pstack.pop(); 00694 b= pstack.pop(); 00695 pstack.push((int)b % (int)a); 00696 break; 00697 case 46: // vecscalmul 00698 { 00699 a = pstack.pop(); // n 00700 b = pstack.pop(); // alpha 00701 int start = int(pstack.length()-a); 00702 for (int i=0;i<a;i++) 00703 pstack[start+i] *= b; 00704 break; 00705 } 00706 case 47: // __getfieldsrange %M:%N pushes fields %N to %M inclusively on the stack 00707 { 00708 int M = *pptr++; 00709 int N = *pptr++; 00710 for (int i=M;i<=N;i++) 00711 pstack.push(pfieldvalues[i]); 00712 break; 00713 } 00714 case 48: // select: v0 v1 v2 v3 ... vn-1 n i --> vi 00715 { 00716 int i = (int)pstack.pop(); 00717 int n = (int)pstack.pop(); 00718 a = MISSING_VALUE; 00719 while(--n>=0) 00720 { 00721 if(n==i) 00722 a = pstack.pop(); 00723 else 00724 pstack.pop(); 00725 } 00726 pstack.push(a); 00727 break; 00728 } 00729 case 49: // length 00730 { 00731 pstack.push(srcvec.length()); 00732 break; 00733 } 00734 case 50: // sign 00735 { 00736 a = pstack.pop(); 00737 if(a>0) 00738 pstack.push(1.); 00739 else if(a<0) 00740 pstack.push(-1.); 00741 else 00742 pstack.push(0.); 00743 break; 00744 } 00745 case 51: // get 00746 { 00747 { 00748 int i = int(pstack.pop()); 00749 if(i>=0) 00750 pstack.push(pstack[i]); 00751 else 00752 pstack.push(pstack.length()+i); 00753 } 00754 break; 00755 } 00756 case 52: // memput 00757 { 00758 { 00759 int i = int(pstack.pop()); 00760 a = pstack.pop(); 00761 if(mem.size()<i+1) 00762 mem.resize(i+1); 00763 mem[i] = a; 00764 } 00765 break; 00766 } 00767 case 53: // memget 00768 { 00769 { 00770 int i = int(pstack.pop()); 00771 pstack.push(mem[i]); 00772 } 00773 break; 00774 } 00775 case 54: // neg 00776 pstack.push(-pstack.pop()); 00777 break; 00778 case 55: // missing 00779 pstack.push(MISSING_VALUE); 00780 break; 00781 default: 00782 PLERROR("BUG IN PreproInterpretor::run while running program: invalid opcode: %d", op); 00783 } 00784 } 00785 // copy to result vec. 00786 //for(int i=0;i<pstack.size();i++) 00787 // cout<<pstack[i]<<" "; 00788 //cout<<endl; 00789 if (pstack.length() > result.length()) 00790 PLERROR("Parsing VMatLanguage: left with %d too many items on the stack!", 00791 pstack.length()-result.length()); 00792 if (pstack.length() < result.length()) 00793 PLERROR("Parsing VMatLanguage: left with %d missing items on the stack!", 00794 result.length()-pstack.length()); 00795 pstack >> result; 00796 } 00797 00798 void VMatLanguage::run(int rowindex, const Vec& result) const 00799 { 00800 myvec.resize(vmsource.width()); 00801 vmsource->getRow(rowindex,myvec); 00802 run(myvec, result, rowindex); 00803 } 00804 00805 void PreprocessingVMatrix::getNewRow(int i, const Vec& v) const 00806 { 00807 program.run(i,v); 00808 } 00809 00810 PLEARN_IMPLEMENT_OBJECT(PreprocessingVMatrix, "ONE LINE DESCR", "NO HELP"); 00811 00812 PreprocessingVMatrix::PreprocessingVMatrix(VMat the_source, const string& program_string) 00813 : source(the_source), program(the_source) 00814 { 00815 program.compileString(program_string,fieldnames); 00816 build(); 00817 } 00818 00819 void 00820 PreprocessingVMatrix::build() 00821 { 00822 inherited::build(); 00823 build_(); 00824 } 00825 00826 void 00827 PreprocessingVMatrix::build_() 00828 { 00829 if (source) { 00830 fieldinfos.resize((int)fieldnames.size()); 00831 for(unsigned int j=0; j<fieldnames.size(); j++) 00832 fieldinfos[j] = VMField(fieldnames[j]); 00833 00834 sourcevec.resize(source->width()); 00835 width_ = (int)fieldnames.size(); 00836 length_ = source.length(); 00837 } 00838 } 00839 00840 void 00841 PreprocessingVMatrix::declareOptions(OptionList &ol) 00842 { 00843 declareOption(ol, "source", &PreprocessingVMatrix::source, OptionBase::buildoption, ""); 00844 declareOption(ol, "program", &PreprocessingVMatrix::program, OptionBase::buildoption, ""); 00845 declareOption(ol, "fieldnames", &PreprocessingVMatrix::fieldnames, OptionBase::buildoption, ""); 00846 inherited::declareOptions(ol); 00847 } 00848 00849 } // end of namespace PLearn

Generated on Tue Aug 17 16:10:35 2004 for PLearn by doxygen 1.3.7