Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

vmatmain.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // vmatmain.cc 00004 // Copyright (C) 2002 Pascal Vincent, Julien Keable, Xavier Saint-Mleux, Rejean Ducharme 00005 // 00006 // Redistribution and use in source and binary forms, with or without 00007 // modification, are permitted provided that the following conditions are met: 00008 // 00009 // 1. Redistributions of source code must retain the above copyright 00010 // notice, this list of conditions and the following disclaimer. 00011 // 00012 // 2. Redistributions in binary form must reproduce the above copyright 00013 // notice, this list of conditions and the following disclaimer in the 00014 // documentation and/or other materials provided with the distribution. 00015 // 00016 // 3. The name of the authors may not be used to endorse or promote 00017 // products derived from this software without specific prior written 00018 // permission. 00019 // 00020 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00021 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00022 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00023 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00025 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00026 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00027 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00028 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00029 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 // 00031 // This file is part of the PLearn library. For more information on the PLearn 00032 // library, go to the PLearn Web site at www.plearn.org 00033 00034 /* ******************************************************* 00035 * $Id: vmatmain.cc,v 1.35 2004/08/16 15:45:34 dorionc Exp $ 00036 ******************************************************* */ 00037 00038 #include "vmatmain.h" 00039 #include <plearn/base/general.h> 00040 #include <plearn/math/StatsCollector.h> 00041 #include <plearn/vmat/VMatLanguage.h> 00042 #include <plearn/vmat/VVMatrix.h> 00043 #include <plearn/vmat/VMat.h> 00044 #include <plearn/math/TMat_maths.h> 00045 #include <plearn/base/stringutils.h> 00046 #include <plearn/db/getDataSet.h> 00047 #include <plearn/display/Gnuplot.h> 00048 00049 // norman: added check 00050 #ifdef WIN32 00051 #include "curses.h" 00052 #undef min 00053 #undef max 00054 #undef clear // I want to use a clear and it is already defined in curses.h 00055 00056 #else 00057 #include <curses.h> 00058 #endif 00059 00060 namespace PLearn { 00061 using namespace std; 00062 00065 int print_diff(ostream& out, VMat m1, VMat m2, double tolerance) 00066 { 00067 int ndiff = 0; 00068 if(m1.length()!=m2.length() || m1.width()!=m2.width()) 00069 { 00070 out << "Size of the two matrices differ: " 00071 << m1.length() << " x " << m1.width() << " vs. " 00072 << m2.length() << " x " << m2.width() << endl; 00073 return -1; 00074 } 00075 int l = m1.length(); 00076 int w = m1.width(); 00077 Vec v1(w); 00078 Vec v2(w); 00079 for(int i=0; i<l; i++) 00080 { 00081 m1->getRow(i,v1); 00082 m2->getRow(i,v2); 00083 for(int j=0; j<w; j++) 00084 { 00085 double d = v1[j]-v2[j]; 00086 if(fabs(d)>tolerance || (is_missing(v1[j]) && !is_missing(v2[j])) || (is_missing(v2[j]) && !is_missing(v1[j]))) 00087 { 00088 out << "Elements at " << i << ',' << j << " differ by " << d << endl; 00089 ++ndiff; 00090 } 00091 } 00092 } 00093 return ndiff; 00094 } 00095 00096 void interactiveDisplayCDF(const Array<VMat>& vmats) 00097 { 00098 int k = vmats.size(); 00099 int w = vmats[0]->width(); 00100 00101 Array<string> name(k); 00102 cout << ">>>> Dimensions of vmats: \n"; 00103 for(int i=0; i<k; i++) 00104 { 00105 name[i] = vmats[i]->getMetaDataDir(); 00106 cout << name[i] << ": \t " << vmats[i]->length() << " x " << vmats[i]->width() << endl; 00107 } 00108 00109 vmats[0]->printFields(cout); 00110 00111 Gnuplot gp; 00112 00113 for(;;) 00114 { 00115 // TVec<RealMapping> ranges = vm->getRanges(); 00116 00117 cout << "Field (0.." << w-1 << ") [low high] ? "; 00118 vector<string> command; 00119 int varnum = -1; 00120 real low = -FLT_MAX; // means autorange 00121 real high = FLT_MAX; // means autorange 00122 do 00123 { 00124 command = split(pgetline(cin)); 00125 if(command.size()==0) 00126 vmats[0]->printFields(cout); 00127 else 00128 { 00129 varnum = toint(command[0]); 00130 if(varnum<0 || varnum>=w) 00131 vmats[0]->printFields(cout); 00132 else if(command.size()==3) 00133 { 00134 low = toreal(command[1]); 00135 high = toreal(command[2]); 00136 } 00137 } 00138 } while(varnum<0 || varnum>=w); 00139 00140 00141 cout << "\n\n*************************************" << endl; 00142 cout << "** #" << varnum << ": " << vmats[0]->fieldName(varnum) << " **" << endl; 00143 cout << "*************************************" << endl; 00144 00145 Array<Mat> m(k); 00146 00147 for(int i=0; i<k; i++) 00148 { 00149 TVec<StatsCollector> stats = vmats[i]->getStats(); 00150 StatsCollector& st = stats[varnum]; 00151 m[i] = st.cdf(true); 00152 cout << "[ " << name[i] << " ]" << endl; 00153 cout << st << endl; 00154 } 00155 // cout << "RANGES: " << endl; 00156 // cout << ranges[varnum]; 00157 00158 if(low == -FLT_MAX) 00159 gp << "set xrange [*:*]" << endl; 00160 else 00161 gp << "set xrange [" << low << ":" << high << "]" << endl; 00162 00163 if(k>=4) 00164 gp.plot(m[0],"title '"+name[0]+"'", m[1], "title '" + name[1]+"'", m[2], "title '" + name[2]+"'", m[3], "title '"+name[3]+"'"); 00165 else if(k>=3) 00166 gp.plot(m[0],"title '"+name[0]+"'", m[1], "title '"+name[1]+"'", m[2], "title '"+name[2]+"'"); 00167 else if(k>=2) 00168 gp.plot(m[0],"title '"+name[0]+"'", m[1], "title '"+name[1]+"'"); 00169 else 00170 gp.plot(m[0],"title '"+name[0]+"'"); 00171 } 00172 } 00173 00174 void displayBasicStats(VMat vm) 00175 { 00176 int nfields = vm.width(); 00177 TVec<StatsCollector> stats = vm->getStats(); 00178 00179 cout << "# \t fieldname \t mean \t stddev \t min \t max \t count \t nmissing \t stderr" << endl; 00180 for(int k=0; k<nfields; k++) 00181 { 00182 cout << k << " \t" 00183 << vm->fieldName(k) << " \t" 00184 << stats[k].mean() << " \t" 00185 << stats[k].stddev() << " \t" 00186 << stats[k].min() << " \t" 00187 << stats[k].max() << " \t" 00188 << stats[k].n() << " \t" 00189 << stats[k].nmissing() << " \t" 00190 << stats[k].stderror() << " \t" 00191 << endl; 00192 } 00193 } 00194 00195 00196 void printDistanceStatistics(VMat vm, int inputsize) 00197 { 00198 int l = vm.length(); 00199 int w = vm.width(); 00200 Vec x1(w); 00201 Vec x2(w); 00202 StatsCollector collector(2); 00203 ProgressBar pb(cerr, "Computing distance statistics", l-1); 00204 for(int i=0; i<l-1; i++) 00205 { 00206 vm->getRow(i,x1); 00207 vm->getRow(i+1,x2); 00208 real d = L2distance(x1.subVec(0,inputsize),x2.subVec(0,inputsize)); 00209 collector.update(d); 00210 pb(i); 00211 } 00212 00213 cout << "Euclidean distance statistics: " << endl; 00214 cout << collector << endl; 00215 } 00216 00217 /* 00218 void printConditionalStats(VMat vm, int condfield) 00219 { 00220 cout << "*** Ranges ***" << endl; 00221 TVec<RealMapping> ranges = vm->getRanges(); 00222 PP<ConditionalStatsCollector> st = vm->getConditionalStats(condfield); 00223 int w = vm->width(); 00224 for(int i=0; i<w; i++) 00225 { 00226 cout << "Field #" << i << ": " << vm->fieldName(i) << endl; 00227 cout << "Ranges: " << ranges[i] << endl; 00228 } 00229 cout << "\n\n------------------------------------------------------------" << endl; 00230 cout << "** Raw counts conditioned on field #" << condfield << " (" << vm->fieldName(condfield) << ") **\n" << endl; 00231 for(int k=0; k<w; k++) 00232 { 00233 cout << "#" << k << " " << vm->fieldName(condfield) << endl; 00234 cout << st->counts[k] << endl; 00235 } 00236 00237 cout << "\n\n------------------------------------------------------------" << endl; 00238 cout << "** Joint probabilities (percentage) **\n" << endl; 00239 for(int k=0; k<w; k++) 00240 { 00241 TMat<int>& C = st->counts[k]; 00242 Mat m(C.length(), C.width()); 00243 m << C; 00244 m /= sum(m); 00245 m *= real(100); 00246 cout << "#" << k << " " << vm->fieldName(condfield) << endl; 00247 cout << m << endl; 00248 } 00249 00250 cout << "\n\n------------------------------------------------------------" << endl; 00251 cout << "** Conditional probabilities conditioned on << " << vm->fieldName(condfield) << " **\n" << endl; 00252 for(int k=0; k<w; k++) 00253 { 00254 TMat<int>& C = st->counts[k]; 00255 Mat m(C.length(), C.width()); 00256 m << C; 00257 normalizeRows(m); 00258 m *= real(100); 00259 cout << "#" << k << " " << vm->fieldName(condfield) << endl; 00260 cout << m << endl; 00261 } 00262 00263 cout << "\n\n------------------------------------------------------------" << endl; 00264 cout << "** Conditional probabilities conditioned on the other variables **\n" << endl; 00265 for(int k=0; k<w; k++) 00266 { 00267 TMat<int>& C = st->counts[k]; 00268 Mat m(C.length(), C.width()); 00269 m << C; 00270 normalizeColumns(m); 00271 m *= real(100); 00272 cout << "#" << k << " " << vm->fieldName(condfield) << endl; 00273 cout << m << endl; 00274 } 00275 00276 00277 } 00278 */ 00279 00280 /* 00281 int findNextIndexOfValue(VMat m, int col, real value, int startrow=0) 00282 { 00283 if(m->hasMetaDataDir()) 00284 { 00285 string fpath = apppend_slash(m->getMetaDataDir())+"CachedColumns/"+tostring(col); 00286 if(!file_exists(filepath)) 00287 00288 00289 } 00290 } 00291 */ 00292 00293 // returns false if the input is invalid and write in strReason the reason 00294 bool getList(char* str, int curj, const VMat& vm, Vec& outList, char* strReason) 00295 { 00296 vector<string>columnList; 00297 if (str[0] == '\0') 00298 { 00299 // nothing was inserted, then gets the current column 00300 char strj[10]; 00301 sprintf(strj, "%d", curj); 00302 columnList.push_back(strj); 00303 } 00304 else 00305 { 00306 columnList = split(str, " -,", true); 00307 } 00308 00309 vector<string>::iterator vsIt; 00310 00311 // checks for errors 00312 bool invalidInput = false; 00313 int colVal = 0; 00314 char separator = 0; 00315 00316 for (vsIt = columnList.begin(); vsIt != columnList.end(); vsIt++) 00317 { 00318 if (pl_isnumber(*vsIt)) 00319 { 00320 if (colVal > toint(*vsIt) && separator == '-') 00321 { 00322 invalidInput = true; 00323 strcpy(strReason, "Second element in range smaller than the first"); 00324 break; 00325 } 00326 colVal = toint(*vsIt); 00327 if (colVal < 0 || colVal >= vm->width()) 00328 { 00329 invalidInput = true; 00330 strcpy(strReason, "Invalid column number"); 00331 break; 00332 } 00333 } 00334 else 00335 { 00336 // there was already a separator! 00337 if (separator == '-') 00338 { 00339 invalidInput = true; 00340 strcpy(strReason, "Too many '-' separators"); 00341 break; 00342 } 00343 00344 separator = (*vsIt)[0]; 00345 if (separator != '-' && 00346 separator != ',') 00347 { 00348 invalidInput = true; 00349 strcpy(strReason, "Invalid column separator"); 00350 break; 00351 } 00352 } 00353 } 00354 00355 outList.clear(); 00356 if (separator == '-') 00357 { 00358 int start = toint(columnList.front()); 00359 int end = toint(columnList.back()); 00360 for (int colIdx = start; colIdx <= end; ++colIdx) 00361 outList.push_back(colIdx); 00362 } 00363 else if (separator == ',') 00364 { 00365 for (vsIt = columnList.begin(); vsIt != columnList.end(); ++vsIt) 00366 { 00367 if (pl_isnumber(*vsIt)) 00368 outList.push_back(toint(*vsIt)); 00369 } 00370 } 00371 else if (separator == 0) 00372 { 00373 outList.push_back(toint(columnList.front())); 00374 } 00375 00376 return invalidInput; 00377 } 00378 00379 void viewVMat(const VMat& vm) 00380 { 00381 initscr(); 00382 cbreak(); 00383 noecho(); 00384 keypad(stdscr,TRUE); 00385 00386 VMat vm_showed = vm; 00387 00388 int key = 0; 00389 bool view_strings = true; 00390 // If 'indent_strings_left' is set to false, then strings will be indented to the right. 00391 bool indent_strings_left = true; 00392 bool hide_sameval = false; // if true, values that are the same as the one of the previous vmat line will not be displayed, (three dot will be displayed instead). 00393 bool transposed = false; 00394 00395 int namewidth = 0; 00396 for(int j=0; j<vm->width(); j++) 00397 namewidth = max(namewidth, (int) vm->fieldName(j).size()); 00398 00399 int valwidth = 15; 00400 int valstrwidth = valwidth-1; 00401 00402 char* valstrformat = "%14s"; 00403 00404 int curi = 0; 00405 int curj = 0; 00406 int starti = 0; 00407 int startj = 0; 00408 00409 int vStartHelp = 0; 00410 00411 bool onError=false; 00412 00413 map<int,Vec> cached_columns; 00414 00415 try { 00416 00417 while(key != 'q' && key != 'Q') 00418 { 00419 erase(); 00420 00421 int leftcolwidth = transposed ?1+namewidth :10; 00422 00423 int nj = transposed ? LINES-3 : (COLS-leftcolwidth)/valwidth; 00424 int ni = transposed ? (COLS-leftcolwidth)/valwidth : LINES-4; 00425 00426 int endj = min(vm_showed->width(), startj+nj); 00427 int endi = min(vm_showed->length(), starti+ni); 00428 00429 int x=0, y=0; // (curses coordinates are (y,x) ) 00430 00431 // print field names 00432 for(int j=startj; j<endj; j++) 00433 { 00434 string s = vm_showed->fieldName(j); 00435 // if(j==curj) 00436 // attron(A_REVERSE); 00437 if(transposed) 00438 mvprintw(1+(j-startj),0,"%s", s.c_str()); 00439 else 00440 { 00441 x = 1+leftcolwidth+(j-startj)*valwidth; 00442 mvprintw(0, x, valstrformat, s.substr(0,valstrwidth).c_str() ); 00443 if((int)s.length() > valstrwidth) 00444 mvprintw(1, x, valstrformat, s.substr(valstrwidth,valstrwidth).c_str() ); 00445 } 00446 // attroff(A_REVERSE); 00447 } 00448 00449 Vec v(vm_showed.width()); 00450 Vec oldv(vm_showed.width()); 00451 00452 for(int i=starti; i<endi; i++) 00453 { 00454 if(transposed) 00455 { 00456 y = 0; 00457 x = 1+leftcolwidth+(i-starti)*valwidth; 00458 mvprintw(y,x,"%14d",i); 00459 } 00460 else 00461 { 00462 y = i-starti+2; 00463 x = 0; 00464 mvprintw(y,x,"%d",i); 00465 } 00466 00467 vm_showed->getRow(i,v); 00468 00469 for(int j=startj; j<endj; j++) 00470 { 00471 real val = v[j]; 00472 string s = vm_showed->getValString(j,val); 00473 if (!view_strings || s == "") 00474 s = tostring(val); 00475 else { 00476 // This is a string. Maybe we want to indent it to the right. 00477 // In this case we truncate it to its last characters. 00478 if (!indent_strings_left) { 00479 if (s.size() >= (size_t) valstrwidth) { 00480 s = s.substr(s.size() - valstrwidth, valstrwidth); 00481 } else { 00482 string added_spaces((size_t) (valstrwidth - s.size()), ' '); 00483 s = added_spaces + s; 00484 } 00485 } 00486 } 00487 00488 if(transposed) 00489 y = 1+(j-startj); 00490 else 00491 x = 1+leftcolwidth+(j-startj)*valwidth; 00492 00493 if( i == curi || (vm_showed.width() > 1 && j == curj) ) 00494 attron(A_REVERSE); 00495 //else if () 00496 // attron(A_REVERSE); 00497 00498 if(hide_sameval && i>starti && (val==oldv[j] || is_missing(val)&&is_missing(oldv[j])) ) 00499 mvprintw(y, x, valstrformat, "..."); 00500 else 00501 mvprintw(y, x, valstrformat, s.substr(0,valstrwidth).c_str()); 00502 00503 attroff(A_REVERSE); 00504 } 00505 oldv << v; 00506 } 00507 00508 string strval = vm_showed->getString(curi, curj); 00509 mvprintw(0,0,"Cols[%d-%d]", 0, vm_showed.width()-1); 00510 mvprintw(LINES-1,0," %dx%d line= %d col= %d %s = %s (%f)", 00511 vm_showed->length(), vm_showed->width(), 00512 curi, curj, vm_showed->fieldName(curj).c_str(), strval.c_str(), vm_showed(curi,curj)); 00513 00514 refresh(); 00515 if (!onError) 00516 key = getch(); 00517 else 00518 onError = false; 00519 00521 switch(key) 00522 { 00523 case KEY_LEFT: 00524 if(transposed) 00525 { 00526 if(curi>0) 00527 --curi; 00528 if(curi<starti) 00529 starti = curi; 00530 } 00531 else 00532 { 00533 if(curj>0) 00534 --curj; 00535 if(curj<startj) 00536 startj=curj; 00537 } 00538 break; 00540 case KEY_RIGHT: 00541 if(transposed) 00542 { 00543 if(curi<vm_showed->length()-1) 00544 ++curi; 00545 if(curi>=starti+ni) 00546 ++starti; 00547 } 00548 else 00549 { 00550 if(curj<vm_showed->width()-1) 00551 ++curj; 00552 if(curj>=startj+nj) 00553 ++startj; 00554 } 00555 break; 00557 case KEY_UP: 00558 if(transposed) 00559 { 00560 if(curj>0) 00561 --curj; 00562 if(curj<startj) 00563 startj=curj; 00564 } 00565 else 00566 { 00567 if(curi>0) 00568 --curi; 00569 if(curi<starti) 00570 starti = curi; 00571 } 00572 break; 00574 case KEY_DOWN: 00575 if(transposed) 00576 { 00577 if(curj<vm_showed->width()-1) 00578 ++curj; 00579 if(curj>=startj+nj) 00580 ++startj; 00581 } 00582 else 00583 { 00584 if(curi<vm_showed->length()-1) 00585 ++curi; 00586 if(curi>=starti+ni) 00587 ++starti; 00588 } 00589 break; 00591 case KEY_PPAGE: 00592 if(transposed) 00593 { 00594 curj -= nj; 00595 startj -= nj; 00596 if(startj<0) 00597 startj = 0; 00598 if(curj<0) 00599 curj = 0; 00600 } 00601 else 00602 { 00603 curi -= ni; 00604 starti -= ni; 00605 if(starti<0) 00606 starti = 0; 00607 if(curi<0) 00608 curi = 0; 00609 } 00610 break; 00612 case KEY_NPAGE: 00613 if(transposed) 00614 { 00615 curj += nj; 00616 startj += nj; 00617 if(curj>=vm_showed->width()) 00618 curj = vm_showed->width()-1; 00619 if(startj>vm_showed->width()-nj) 00620 startj = max(0,vm_showed->width()-nj); 00621 } 00622 else 00623 { 00624 curi += ni; 00625 starti += ni; 00626 if(curi>=vm_showed->length()) 00627 curi = vm_showed->length()-1; 00628 if(starti>vm_showed->length()-ni) 00629 starti = max(0,vm_showed->length()-ni); 00630 } 00631 break; 00633 case KEY_HOME: 00634 // not working on unix for the moment: see http://dickey.his.com/xterm/xterm.faq.html#xterm_pc_style 00635 if(transposed) 00636 { 00637 curi = 0; 00638 starti = 0; 00639 } 00640 else 00641 { 00642 curj = 0; 00643 startj = 0; 00644 } 00645 break; 00647 case KEY_END: 00648 // not working on unix for the moment: see http://dickey.his.com/xterm/xterm.faq.html#xterm_pc_style 00649 if(transposed) 00650 { 00651 curi = vm_showed->length()-1; 00652 starti = max(curi-ni + 1, 0); 00653 } 00654 else 00655 { 00656 curj = vm_showed->width()-1; 00657 startj = max(curj-nj + 1, 0); 00658 } 00659 break; 00661 case '.': 00662 hide_sameval = !hide_sameval; 00663 break; 00665 case 't': case 'T': 00666 transposed = !transposed; 00667 nj = transposed ? LINES-3 : (COLS-leftcolwidth)/valwidth; 00668 ni = transposed ? (COLS-leftcolwidth)/valwidth : LINES-4; 00669 starti = max(0,curi-ni/2); 00670 startj = max(0,curj-nj/2); 00671 //endj = min(vm_showed->width(), startj+nj); 00672 //endi = min(vm_showed->length(), starti+ni); 00673 break; 00675 case '/': // search for value 00676 { 00677 echo(); 00678 char strmsg[] = {"Search for value or string: "}; 00679 mvprintw(LINES-1,0,strmsg); 00680 // clear the rest of the line 00681 clrtoeol(); 00682 move(LINES-1, (int)strlen(strmsg)); 00683 char l[10]; 00684 getnstr(l, 10); 00685 string searchme = removeblanks(l); 00686 real searchval = vm_showed(curi,curj); 00687 if(searchme!="") 00688 { 00689 searchval = vm_showed->getStringVal(curj, searchme); 00690 if(is_missing(searchval)) 00691 { 00692 searchval = toreal(searchme); 00693 // This one gives a very bad error: to be changed 00694 if(is_missing(searchval)) 00695 PLERROR("Search item is neither a string with a valid mapping, nor convertible to a real"); 00696 } 00697 } 00698 00699 Vec cached; 00700 if(cached_columns.find(curj)!=cached_columns.end()) 00701 cached = cached_columns[curj]; 00702 else 00703 { 00704 mvprintw(LINES-1,0,"Building cache..."); 00705 // clear the rest of the line 00706 clrtoeol(); 00707 refresh(); 00708 cached.resize(vm_showed->length()); 00709 vm_showed->getColumn(curj,cached); 00710 cached_columns[curj] = cached; 00711 } 00712 00713 mvprintw(LINES-1,0,"Searching for value %f ...",searchval); 00714 clrtoeol(); 00715 refresh(); 00716 ++curi; // start searching from next row 00717 while(curi<vm_showed->length() && cached[curi]!=searchval) 00718 ++curi; 00719 if(curi>=vm_showed->length()) 00720 curi = 0; 00721 ni = transposed ? (COLS-leftcolwidth)/valwidth : LINES-4; 00722 starti = max(0,curi-ni/2); 00723 } 00724 break; 00726 case (int)'l': case (int)'L': 00727 { 00728 echo(); 00729 char strmsg[] = {"Goto line: "}; 00730 mvprintw(LINES-1,0,strmsg); 00731 clrtoeol(); 00732 move(LINES-1, (int)strlen(strmsg)); 00733 char l[10]; 00734 getnstr(l, 10); 00735 if(l[0] == '\0' || !pl_isnumber(l) || toint(l) < 0 || toint(l)>=vm_showed->length()) 00736 { 00737 mvprintw(LINES-1,0,"*** Invalid line number ***"); 00738 clrtoeol(); 00739 refresh(); 00740 // wait until the user types something 00741 key = getch(); 00742 onError = true; 00743 } 00744 else 00745 { 00746 curi= toint(l); 00747 starti = max(curi-ni + 1, 0); 00748 //starti = curi; 00749 } 00750 noecho(); 00751 } 00752 break; 00754 case (int)'c': case (int)'C': 00755 { 00756 echo(); 00757 char strmsg[] = {"Goto column: "}; 00758 mvprintw(LINES-1,0,strmsg); 00759 clrtoeol(); 00760 move(LINES-1, (int)strlen(strmsg)); 00761 char c[200]; 00762 getnstr(c, 10); 00763 string the_col = c; 00764 int col_num = -1; 00765 try { 00766 col_num = vm_showed->getFieldIndex(the_col); 00767 } catch (...) {} 00768 if(col_num < 0) 00769 { 00770 mvprintw(LINES-1,0,"*** Invalid column number ***"); 00771 clrtoeol(); 00772 refresh(); 00773 // wait until the user types something 00774 key = getch(); 00775 onError = true; 00776 } 00777 else 00778 { 00779 curj = col_num; 00780 startj = max(curj-nj + 1, 0); 00781 } 00782 noecho(); 00783 } 00784 break; 00786 case (int)'v': case (int)'V': 00787 { 00788 echo(); 00789 char strmsg[] = {"View dataset ('Enter' = reload last dataset): "}; 00790 mvprintw(LINES-1,0,strmsg); 00791 clrtoeol(); 00792 move(LINES-1, (int) strlen(strmsg)); 00793 char c[200]; 00794 getnstr(c, 200); 00795 string dataset = c; 00796 if (dataset == "") { 00797 // Reload last dataset. 00798 dataset = vmat_view_dataset; 00799 } 00800 VMat new_vm; 00801 bool error = false; 00802 try { 00803 new_vm = getDataSet(dataset); 00804 vmat_view_dataset = dataset; 00805 } catch(const PLearnError& e) { 00806 error = true; 00807 } 00808 if (error) { 00809 mvprintw(LINES-1,0,"*** Invalid dataset ***"); 00810 clrtoeol(); 00811 refresh(); 00812 // Wait until the user types something. 00813 key = getch(); 00814 onError = true; 00815 } else { 00816 // Display the new dataset. 00817 // First close the current display. 00818 mvprintw(LINES-1,0,""); 00819 clrtoeol(); 00820 refresh(); 00821 endwin(); 00822 // And launch the new one. 00823 viewVMat(new_vm); 00824 } 00825 } 00826 break; 00828 case (int)'e': case (int)'E': 00829 { 00830 echo(); 00831 char strmsg[100]; 00832 sprintf(strmsg, "Enter column(s) or range (ex: 7;1-20;7,8,12) to export (enter=%d): ", curj); 00833 mvprintw(LINES-1,0,strmsg); 00834 clrtoeol(); 00835 00836 move(LINES-1, (int)strlen(strmsg)); 00837 char strRange[50]; 00838 getnstr(strRange, 50); 00839 00840 Vec indexs; 00841 char strReason[100] = {"\0"}; 00842 bool invalidInput = getList(strRange, curj, vm_showed, indexs, strReason); 00843 00844 if (invalidInput) 00845 { 00846 mvprintw(LINES-1,0,"*** Invalid input: %s ***", strReason); 00847 clrtoeol(); 00848 refresh(); 00849 // wait until the user types something 00850 key = getch(); 00851 onError = true; 00852 } 00853 else 00854 { 00855 00856 char filemsg[] = {"Enter file name (enter=outCol.txt): "}; 00857 mvprintw(LINES-1,0,filemsg); 00858 clrtoeol(); 00859 00860 move(LINES-1, (int)strlen(filemsg)); 00861 char fname[200]; 00862 getnstr(fname, 200); 00863 00864 if (fname[0] == '\0') 00865 strcpy(fname, "outCol.txt"); 00866 00867 mvprintw(LINES-1,0,"Writing file '%s'...", fname); 00868 clrtoeol(); 00869 refresh(); 00870 00871 ofstream outFile(fname, ios::out); 00872 00873 for (Vec::iterator it = indexs.begin(); it != indexs.end(); ++it) 00874 { 00875 outFile << vm_showed->fieldName((int)*it) << '\t'; 00876 } 00877 outFile << endl; 00878 00879 outFile << vm_showed.columns(indexs); 00880 outFile.close(); 00881 00882 mvprintw(LINES-1,0,"*** Output written on: %s ***", fname); 00883 clrtoeol(); 00884 refresh(); 00885 // wait until the user types something 00886 key = getch(); 00887 00888 } 00889 00890 noecho(); 00891 } 00892 break; 00894 case (int)'r': case (int)'R': 00895 { 00896 echo(); 00897 char strmsg[100]; 00898 sprintf(strmsg, "Enter column(s) or range (ex: 7;1-20;7,8,12) to view (enter=%d): ", curj); 00899 mvprintw(LINES-1,0,strmsg); 00900 clrtoeol(); 00901 00902 move(LINES-1, (int)strlen(strmsg)); 00903 char c[50]; 00904 getnstr(c, 50); 00905 00906 Vec indexs; 00907 char strReason[100] = {"\0"}; 00908 bool invalidInput = getList(c, curj, vm_showed, indexs, strReason); 00909 00910 if (invalidInput) 00911 { 00912 mvprintw(LINES-1,0,"*** Invalid input: %s ***", strReason); 00913 clrtoeol(); 00914 refresh(); 00915 // wait until the user types something 00916 key = getch(); 00917 onError = true; 00918 } 00919 else 00920 { 00921 vm_showed = vm_showed.columns(indexs); 00922 if (curj>=vm_showed.width()) 00923 { 00924 curj = vm_showed.width()-1; 00925 startj = max(curj-nj + 1, 0); 00926 } 00927 } 00928 00929 noecho(); 00930 } 00931 break; 00933 case (int)'x': case (int)'X': 00934 // Hide the currently selected row. 00935 { 00936 //echo(); 00937 Vec index(vm_showed.width() - 1); 00938 for (int i = 0; i < curj; i++) { 00939 index[i] = i; 00940 } 00941 for (int i = curj + 1; i < vm_showed.width(); i++) { 00942 index[i - 1] = i; 00943 } 00944 vm_showed = vm_showed.columns(index); 00945 if (curj>=vm_showed.width()) { 00946 curj = vm_showed.width()-1; 00947 startj = max(curj-nj + 1, 0); 00948 } 00949 00950 // noecho(); 00951 } 00952 break; 00954 case (int)'a': case (int)'A': 00955 vm_showed = vm; 00956 break; 00958 case (int)'s': 00959 if (indent_strings_left) 00960 // Toggle display. 00961 view_strings = !view_strings; 00962 else { 00963 // Do not remove display if we only asked to change indentation. 00964 indent_strings_left = true; 00965 if (!view_strings) 00966 view_strings = true; 00967 } 00968 break; 00969 case (int)'S': 00970 // Same as above, except we indent to the right. 00971 if (!indent_strings_left) 00972 view_strings = !view_strings; 00973 else { 00974 indent_strings_left = false; 00975 if (!view_strings) 00976 view_strings = true; 00977 } 00978 break; 00980 case (int)'h': case (int)'H': 00981 erase(); 00982 00983 vStartHelp = 2; 00984 00985 mvprintw(0,COLS/2-6,"*** HELP ***"); 00986 00987 mvprintw(vStartHelp++,10,"KEYS:"); 00988 mvprintw(vStartHelp++,10," - up: move up one line"); 00989 mvprintw(vStartHelp++,10," - down: move down one line"); 00990 mvprintw(vStartHelp++,10," - right: move right one column"); 00991 mvprintw(vStartHelp++,10," - left: move left one column"); 00992 mvprintw(vStartHelp++,10," - page up: move up one screen"); 00993 mvprintw(vStartHelp++,10," - page down: move down one screen"); 00994 mvprintw(vStartHelp++,10," - home: move to the first column"); 00995 mvprintw(vStartHelp++,10," - end: move to the last column"); 00996 mvprintw(vStartHelp++,10," - 'r' or 'R': show only a range or a set of columns"); 00997 mvprintw(vStartHelp++,10," - 'x' or 'X': hide the currently selected column"); 00998 mvprintw(vStartHelp++,10," - 'a' or 'A': show all the columns"); 00999 mvprintw(vStartHelp++,10," - 'l' or 'L': prompt for a line number and go to that line"); 01000 mvprintw(vStartHelp++,10," - 'c' or 'C': prompt for a column number and go to that column"); 01001 mvprintw(vStartHelp++,10," - 's' or 'S': toggle display string fields as strings or numbers ('S' = right indentation)"); 01002 mvprintw(vStartHelp++,10," - 't' or 'T': toggle transposed display mode"); 01003 mvprintw(vStartHelp++,10," - 'e' or 'E': export a range or a set of columns to file"); 01004 mvprintw(vStartHelp++,10," - 'v' or 'V': prompt for another dataset to view"); 01005 mvprintw(vStartHelp++,10," - '.' : toggle displaying of ... for values that do not change"); 01006 mvprintw(vStartHelp++,10," - '/' : search for a value of the current field"); 01007 mvprintw(vStartHelp++,10," - 'h' or 'H': display this screen"); 01008 mvprintw(vStartHelp++,10," - 'q' or 'Q': quit program"); 01009 mvprintw(vStartHelp++,COLS/2-13,"(press any key to continue)"); 01010 01011 refresh(); 01012 getch(); 01013 01014 break; 01015 01016 case (int)'q': case (int)'Q': 01017 break; 01018 01020 default: 01021 mvprintw(LINES-1,0,"*** Invalid command (type 'h' for help) ***"); 01022 // clear the rest of the line 01023 clrtoeol(); 01024 01025 refresh(); 01026 01027 // wait until the user types something 01028 key = getch(); 01029 onError = true; 01030 01031 //sleep(1); 01032 break; 01033 } 01034 } 01035 } // end try 01036 catch(const PLearnError& e) 01037 { 01038 endwin(); 01039 throw(e); 01040 } 01041 01042 // make sure it is clean 01043 mvprintw(LINES-1,0,""); 01044 clrtoeol(); 01045 refresh(); 01046 01047 endwin(); 01048 } 01049 01050 /* OLD CODE 01051 01052 01053 void viewVMat(const VMat& vm, int lin, int col) 01054 { 01055 initscr(); 01056 cbreak(); 01057 noecho(); 01058 keypad(stdscr,TRUE); 01059 01060 int key= 0, curl= 0, curc= 0; 01061 bool view_strings= true; 01062 01063 while(key != 'q' && key != 'Q') 01064 { 01065 erase(); 01066 01067 for(int j= 0; (j+2)*10 < COLS && j+col<vm->width(); ++j) 01068 { 01069 string s= vm->fieldName(j+col); 01070 mvprintw(0,(j+1)*10-1," %9s", s.substr(0,9).c_str()); 01071 if(s.length() > 9) 01072 mvprintw(1,(j+1)*10-1," %9s", s.substr(9,9).c_str()); 01073 } 01074 for(int i= 0; i < LINES-3 && i+lin<vm->length(); ++i) 01075 { 01076 mvprintw(i+2,0,"%d", i+lin); 01077 for(int j= 0; (j+2)*10 < COLS && j+col<vm->width(); ++j) 01078 { 01079 if(i == curl || j == curc) 01080 attron(A_REVERSE); 01081 01082 real x= vm(i+lin, j+col); 01083 string s= vm->getValString(j+col, x); 01084 if(!view_strings || s == "") 01085 mvprintw(i+2,(j+1)*10-1," %9f", x); 01086 else 01087 mvprintw(i+2,(j+1)*10-1," %9s", s.substr(0,9).c_str()); 01088 01089 attroff(A_REVERSE); 01090 } 01091 } 01092 01093 //real x= vm(curl+lin, curc+col); 01094 //string strval= vm->getValString(curc+col, x); 01095 //if(!view_strings || strval == "") 01096 //strval= tostring(x); 01097 01098 string strval = vm->getString(curl+lin, curc+col); 01099 mvprintw(LINES-1,0," %dx%d line= %d col= %d [%s] val= %s", 01100 vm->length(), vm->width(), 01101 curl+lin, curc+col, vm->fieldName(curc+col).c_str(), strval.c_str()); 01102 01103 01104 refresh(); 01105 key= getch(); 01106 01107 switch(key) 01108 { 01109 case KEY_UP: 01110 if(0 < curl) --curl; 01111 else if(lin>0) --lin; 01112 break; 01113 case KEY_DOWN: 01114 if(curl < LINES-4 && curl+lin < vm->length()-1) ++curl; 01115 else if(lin < vm->length()-1) ++lin; 01116 if(curl+lin >= vm->length()) curl= vm->length()-lin-1; 01117 break; 01118 case KEY_PPAGE: 01119 lin-=LINES-3; 01120 if(lin < 0) lin= 0; 01121 break; 01122 case KEY_NPAGE: 01123 lin+=LINES-3; 01124 if(lin >= vm->length()) lin= vm->length()-1; 01125 if(curl+lin >= vm->length()) curl= vm->length()-lin-1; 01126 break; 01127 case KEY_LEFT: 01128 if(0 < curc) --curc; 01129 else if(col>0) --col; 01130 break; 01131 case KEY_RIGHT: 01132 if(curc < COLS/10-2 && curc+col < vm->width()-1) ++curc; 01133 else if(col < vm->width()-1) ++col; 01134 if(curc+col >= vm->width()) curc= vm->width()-col-1; 01135 break; 01136 case (int)'l': case (int)'L': 01137 { 01138 echo(); 01139 mvprintw(LINES-1,0,"Goto line: "); 01140 move(LINES-1, 11); 01141 char l[10]; 01142 getnstr(l, 10); 01143 if(!pl_isnumber(l) || toint(l) < 0 || toint(l)>=vm->length()) 01144 { 01145 mvprintw(LINES-1,0,"*** Invalid line number ***"); 01146 refresh(); 01147 sleep(1); 01148 } 01149 else 01150 lin= toint(l); 01151 noecho(); 01152 } 01153 break; 01154 case (int)'c': case (int)'C': 01155 { 01156 echo(); 01157 mvprintw(LINES-1,0,"Goto column: "); 01158 move(LINES-1, 13); 01159 char c[10]; 01160 getnstr(c, 10); 01161 if(!pl_isnumber(c) || toint(c) < 0 || toint(c)>=vm->width()) 01162 { 01163 mvprintw(LINES-1,0,"*** Invalid column number ***"); 01164 refresh(); 01165 sleep(1); 01166 } 01167 else 01168 col= toint(c); 01169 noecho(); 01170 } 01171 break; 01172 01173 case (int)'s': case (int)'S': 01174 if(view_strings) 01175 { 01176 mvprintw(LINES-1,0,"*** Strings already shown ***"); 01177 refresh(); 01178 sleep(1); 01179 } 01180 else 01181 view_strings= true; 01182 break; 01183 01184 case (int)'n': case (int)'N': 01185 if(!view_strings) 01186 { 01187 mvprintw(LINES-1,0,"*** Numbers already shown ***"); 01188 refresh(); 01189 sleep(1); 01190 } 01191 else 01192 view_strings= false; 01193 break; 01194 01195 case (int)'h': case (int)'H': 01196 erase(); 01197 01198 mvprintw(0,COLS/2-6,"*** HELP ***"); 01199 01200 mvprintw(2,10,"KEYS:"); 01201 mvprintw(3,10," - up: move up one line"); 01202 mvprintw(4,10," - down: move down one line"); 01203 mvprintw(5,10," - right: move right one column"); 01204 mvprintw(6,10," - left: move left one column"); 01205 mvprintw(7,10," - page up: move up one screen"); 01206 mvprintw(8,10," - page down: move down one screen"); 01207 mvprintw(9,10," - 'l' or 'L': prompt for a line number and go to that line"); 01208 mvprintw(10,10," - 'c' or 'C': prompt for a column number and go to that column"); 01209 mvprintw(11,10," - 's' or 'S': display string fields as strings"); 01210 mvprintw(12,10," - 'n' or 'N': display string fields as numbers"); 01211 mvprintw(13,10," - 'h' or 'H': display this screen"); 01212 mvprintw(14,10," - 'q' or 'Q': quit program"); 01213 01214 mvprintw(16,COLS/2-13,"(press any key to continue)"); 01215 01216 refresh(); 01217 getch(); 01218 01219 break; 01220 01221 case (int)'q': case (int)'Q': 01222 break; 01223 01224 default: 01225 mvprintw(LINES-1,0,"*** Invalid command (type 'h' for help) ***"); 01226 refresh(); 01227 sleep(1); 01228 break; 01229 } 01230 } 01231 01232 endwin(); 01233 } 01234 */ 01235 01236 void plotVMats(char* defs[], int ndefs) 01237 { 01238 /* defs[] is of format: 01239 { "<dataset0>", "<col0>[:<row0>:<nrows0>]", ..., "<datasetN>", "<colN>[:<rowN>:<nrowsN>]" } 01240 */ 01241 int nseries= ndefs/2; 01242 TmpFilenames tmpfnames(nseries, "/tmp/", "_vmat_plot_"); 01243 Array<VMat> vmats(nseries); 01244 Array<Vec> series(nseries); 01245 string gp_command= "plot "; 01246 for(int i= 0; i < nseries; ++i) 01247 { 01248 vmats[i]= getDataSet(string(defs[2*i])); 01249 01250 vector<string> spec= PLearn::split(defs[2*i+1], ":"); 01251 01252 series[i].resize(vmats[i].length()); 01253 vmats[i]->getColumn(toint(spec[0]),series[i]); 01254 01255 if(spec.size() == 3) 01256 { 01257 int row= toint(spec[1]), nrows= toint(spec[2]); 01258 if(row+nrows > series[i].length()) 01259 nrows= series[i].length()-row; 01260 series[i]= series[i].subVec(row, nrows); 01261 } 01262 else if(spec.size() != 1) 01263 PLERROR("in plotVMats: invalid spec for vmat %s: '%s'; sould be '<col>[:<row>:<nrows>]'.", 01264 defs[2*i], defs[2*i+1]); 01265 01266 saveGnuplot(tmpfnames[i].c_str(), series[i]); 01267 chmod(tmpfnames[i].c_str(),0777); 01268 gp_command+= " '" + tmpfnames[i] + "' title '" + defs[2*i] + ' ' + defs[2*i+1] + "' " + tostring(i+1) +", "; 01269 } 01270 gp_command.resize(gp_command.length()-2); 01271 01272 Gnuplot gp; 01273 gp << gp_command << endl; 01274 01275 cout << "Press any key to close GNUplot window and exit." << endl; 01276 cin.get(); 01277 } 01278 01279 int vmatmain(int argc, char** argv) 01280 { 01281 01282 if(argc<3) 01283 { 01284 cerr << 01285 "Usage: vmat info <dataset> \n" 01286 " Will info about dataset (size, etc..)\n" 01287 " or: vmat fields <dataset> [name_only] [transpose]\n" 01288 " To list the fields with their names (if 'name_only' is specified, the indexes won't be displayed,\n" 01289 " and if 'transpose' is also added, the fields will be listed on a single line)\n" 01290 " or: vmat fieldinfo <dataset> <fieldname_or_num>\n" 01291 " To display statistics for that field \n" 01292 " or: vmat cat <dataset> [<optional_vpl_filtering_code>]\n" 01293 " To display the dataset \n" 01294 " or: vmat sascat <dataset.vmat> <dataset.txt>\n" 01295 " To output in <filename.txt> the dataset in SAS-like tab-separated format with field names on the first line\n" 01296 " or: vmat view <vmat> \n" 01297 " Interactive display of a vmat. \n" 01298 " or: vmat stats <dataset> \n" 01299 " Will display basic statistics for each field \n" 01300 " or: vmat convert <source> <destination> \n" 01301 " To convert any dataset into a .amat .pmat or .dmat format \n" 01302 " The extension of the destination is used to determine the format you want \n" 01303 " or: vmat gendef <source> [binnum1 binnum2 ...] \n" 01304 " Generate stats for dataset (will put them in its associated metadatadir). \n" 01305 " or: vmat genvmat <source_dataset> <dest_vmat> [binned{num} | onehot{num} | normalized]\n" 01306 " Will generate a template .vmat file with all the fields of the source preprocessed\n" 01307 " with the processing you specify\n" 01308 " or: vmat genkfold <source_dataset> <fileprefix> <kvalue>\n" 01309 " Will generate <kvalue> pairs of .vmat that are splitted so they can be used for kfold trainings\n" 01310 " The first .vmat-pair will be named <fileprefix>_train_1.vmat (all source_dataset except the first 1/k)\n" 01311 " and <fileprefix>_test_1.vmat (the first 1/k of <source_dataset>\n" 01312 " or: vmat diff <dataset1> <dataset2> [tolerance] \n" 01313 " Will report all elements that differ by more than tolerance (defauts to 1e-6) \n" 01314 " or: vmat cdf <dataset> [<dataset> ...] \n" 01315 " To interactively display cumulative density function for each field \n" 01316 " along with its basic statistics \n" 01317 // " or: vmat cond <dataset> <condfield#> \n" 01318 // " Interactive display of coditional statistics conditioned on the \n" 01319 // " conditioning field <condfield#> \n" 01320 " or: vmat diststat <dataset> <inputsize>\n" 01321 " Will compute and output basic statistics on the euclidean distance \n" 01322 " between two consecutive input points \n" 01323 "<dataset> is a parameter understandable by getDataSet. This includes all matrix file\n" 01324 "file formats. Type 'vmat help dataset' so see what other <dataset> strings are possible." << endl; 01325 exit(0); 01326 } 01327 01328 string command = argv[1]; 01329 01330 if(command=="cdf") 01331 { 01332 Array<VMat> vmats; 01333 for(int i=2; i<argc; i++) 01334 { 01335 string dbname = argv[i]; 01336 VMat vm = getDataSet(dbname); 01337 vmats.append(vm); 01338 } 01339 interactiveDisplayCDF(vmats); 01340 } 01341 /* 01342 else if(command=="cond") 01343 { 01344 string dbname = argv[2]; 01345 VMat vm = getDataSet(dbname); 01346 cout << "** Using dataset: " << dbname << " **" << endl; 01347 cout << "Metadata for this dataset in: " << vm->getMetaDataDir() << endl; 01348 int condfield = atoi(argv[3]); 01349 printConditionalStats(vm, condfield); 01350 } 01351 */ 01352 else if(command=="convert") 01353 { 01354 string source = argv[2]; 01355 string destination = argv[3]; 01356 VMat vm = getDataSet(source); 01357 01358 string ext = extract_extension(destination); 01359 if(ext==".amat") 01360 vm->saveAMAT(destination); 01361 else if(ext==".pmat") 01362 vm->savePMAT(destination); 01363 else if(ext==".dmat") 01364 vm->saveDMAT(destination); 01365 else if(ext == ".csv") 01366 { 01367 ofstream out(destination.c_str()); 01368 for(int i=0; i < vm.length(); i++) 01369 { 01370 int last = vm.width()-1; 01371 for(int j=0; j < last; j++) 01372 out << vm(i, j) << "," << flush; 01373 out << vm(i, last) << endl; 01374 } 01375 out.close(); 01376 } 01377 else 01378 { 01379 cerr << "ERROR: can only convert to .amat .pmat .dmat or .csv" << endl 01380 << "Please specify a destination name with a valid extension " << endl; 01381 } 01382 } 01383 else if(command=="info") 01384 { 01385 string dbname = argv[2]; 01386 VMat vm = getDataSet(dbname); 01387 cout<<vm.length()<<" x "<<vm.width()<<endl; 01388 cout << "inputsize: " << vm->inputsize() << endl; 01389 cout << "targetsize: " << vm->targetsize() << endl; 01390 cout << "weightsize: " << vm->weightsize() << endl; 01391 VVMatrix * vvm = dynamic_cast<VVMatrix*>((VMatrix*)vm); 01392 if(vvm!=NULL) 01393 { 01394 cout<<"Last modification (including dependencies of .vmat): "<<vvm->getMtime()<<endl; 01395 bool ispre=vvm->isPrecomputedAndUpToDate(); 01396 cout<<"precomputed && uptodate : "; 01397 if(ispre) 01398 { 01399 cout <<"yes : " << vvm->getPrecomputedDataName()<<endl; 01400 cout<<"timestamp of precom. data : "<<getDataSetDate(vvm->getPrecomputedDataName())<<endl; 01401 } 01402 else cout <<"no"<<endl; 01403 } 01404 } 01405 else if(command=="fields") 01406 { 01407 bool add_info = true; 01408 bool transpose = false; 01409 if (argc >= 4) { 01410 add_info = !(string(argv[3]) == "name_only"); 01411 } 01412 if (argc >= 5) { 01413 transpose = (string(argv[4]) == "transpose"); 01414 } 01415 string dbname = argv[2]; 01416 VMat vm = getDataSet(dbname); 01417 if (add_info) { 01418 cout<<"FieldNames: "; 01419 if (!transpose) { 01420 cout << endl; 01421 } 01422 } 01423 for(int i=0;i<vm.width();i++) { 01424 if (add_info) { 01425 cout << i << ": "; 01426 } 01427 cout << vm->fieldName(i); 01428 if (transpose) { 01429 cout << " "; 01430 } else { 01431 cout << endl; 01432 } 01433 } 01434 if (transpose) { 01435 // It misses a carriage return after everything is displayed. 01436 cout << endl; 01437 } 01438 } 01439 else if(command=="fieldinfo") 01440 { 01441 string dbname = argv[2]; 01442 string fieldname_or_num = argv[3]; 01443 VMat vm = getDataSet(dbname); 01444 vm->printFieldInfo(cout, fieldname_or_num); 01445 } 01446 else if(command=="stats") 01447 { 01448 string dbname = argv[2]; 01449 VMat vm = getDataSet(dbname); 01450 displayBasicStats(vm); 01451 } 01452 else if(command=="gendef") 01453 { 01454 string dbname = argv[2]; 01455 TVec<int> bins(argc-3); 01456 for(int i=3;i<argc;i++) 01457 bins[i-3]=toint(argv[i]); 01458 01459 VMat vm = getDataSet(dbname); 01460 TVec<StatsCollector> sc = vm->getStats(); 01461 // write stats file in metadatadir 01462 string name = vm->getMetaDataDir()+"/stats.def"; 01463 ofstream sfile(name.c_str()); 01464 for(int i=0;i<sc.size();i++) 01465 { 01466 sfile<<"DEFINE @"<<vm->fieldName(i)<<".mean "<<tostring(sc[i].mean())<<endl; 01467 sfile<<"DEFINE @"<<vm->fieldName(i)<<".stddev "<<tostring(sc[i].stddev())<<endl; 01468 sfile<<"DEFINE @"<<vm->fieldName(i)<<".stderr "<<tostring(sc[i].stderror())<<endl; 01469 sfile<<"DEFINE @"<<vm->fieldName(i)<<".min "<<tostring(sc[i].min())<<endl; 01470 sfile<<"DEFINE @"<<vm->fieldName(i)<<".max "<<tostring(sc[i].max())<<endl; 01471 sfile<<"DEFINE @"<<vm->fieldName(i)<<".normalized @"<<vm->fieldName(i)<<" @"<<vm->fieldName(i)<<".mean - @"<< 01472 vm->fieldName(i)<<".stddev /"<<endl; 01473 sfile<<"DEFINE @"<<vm->fieldName(i)<<".sum "<<tostring(sc[i].sum())<<endl; 01474 sfile<<"DEFINE @"<<vm->fieldName(i)<<".sumsquare "<<tostring(sc[i].sumsquare())<<endl; 01475 sfile<<"DEFINE @"<<vm->fieldName(i)<<".variance "<<tostring(sc[i].variance())<<endl; 01476 } 01477 for(int i=0;i<bins.size();i++) 01478 { 01479 int b=bins[i]; 01480 string name = vm->getMetaDataDir()+"/bins"+tostring(b)+".def"; 01481 ofstream bfile(name.c_str()); 01482 RealMapping rm; 01483 for(int j=0;j<sc.size();j++) 01484 { 01485 rm = sc[j].getBinMapping(int(vm.length()/real(b)),int(vm.length()/real(b))); 01486 bfile<<"DEFINE @"<<vm->fieldName(j)<<".ranges"+tostring(b)+" "<<rm<<endl; 01487 bfile<<"DEFINE @"<<vm->fieldName(j)<<".ranges"+tostring(b)+".nbins "<<rm.size()<<endl; 01488 bfile<<"DEFINE @"<<vm->fieldName(j)<<".ranges"+tostring(b)+".nbins_m1 "<<rm.size()-1<<endl; 01489 bfile<<"DEFINE @"<<vm->fieldName(j)<<".binned"+tostring(b)+" @"<<vm->fieldName(j)<<" @" 01490 <<vm->fieldName(j)<<".ranges"+tostring(b)<<endl; 01491 bfile<<"DEFINE @"<<vm->fieldName(j)<<".onehot"+tostring(b)+" @"<<vm->fieldName(j)<<".binned" 01492 +tostring(b)+" @"<<vm->fieldName(j)<<".ranges"+tostring(b)+".nbins onehot"<<endl; 01493 01494 } 01495 } 01496 } 01497 else if(command=="genkfold") 01498 { 01499 if(argc<5) 01500 { 01501 cerr<<"usage vmat genkfold <source_dataset> <fileprefix> <kvalue>\n"; 01502 exit(1); 01503 } 01504 string dbname = argv[2]; 01505 string prefix = argv[3]; 01506 int kval=toint(argv[4]); 01507 VMat vm = getDataSet(dbname); 01508 for(int i=0;i<kval;i++) 01509 { 01510 ofstream out((prefix+"_train_"+tostring(i+1)+".vmat").c_str()); 01511 out<<"<SOURCES>"<<endl; 01512 int ntest = vm.length()/kval; 01513 int ntrain_before_test = i*ntest; 01514 int ntrain_after_test = vm.length()-(i+1)*ntest; 01515 if(ntrain_before_test>0) 01516 out<<dbname<<":0:"<<ntrain_before_test<<endl; 01517 if(ntrain_after_test>0) 01518 out<<dbname<<":"<<ntest+ntrain_before_test<<":"<<ntrain_after_test<<endl; 01519 out<<"</SOURCES>"<<endl; 01520 ofstream out2((prefix+"_test_"+tostring(i+1)+".vmat").c_str()); 01521 out2<<"<SOURCES>"<<endl; 01522 out2<<dbname<<":"<<ntrain_before_test<<":"<<ntest<<endl; 01523 out2<<"</SOURCES>"<<endl; 01524 } 01525 } 01526 else if(command=="genvmat") 01527 { 01528 if(argc<5) 01529 { 01530 cerr<<"usage vmat genvmat <source_dataset> <dest_vmat> (binned{num} | onehot{num} | normalized)\n"; 01531 exit(1); 01532 } 01533 string dbname = argv[2]; 01534 string destvmat = argv[3]; 01535 string type=argv[4]; 01536 int typen= 0; 01537 int bins= 0; 01538 if(type.find("binned")!=string::npos) 01539 { 01540 typen=0; 01541 bins=toint(type.substr(6)); 01542 } 01543 else if(type.find("onehot")!=string::npos) 01544 { 01545 typen=1; 01546 bins=toint(type.substr(6)); 01547 } 01548 else if(type.find("normalized")!=string::npos) 01549 typen=2; 01550 else PLERROR("Unknown operation: %s",type.c_str()); 01551 01552 VMat vm = getDataSet(dbname); 01553 ofstream out(destvmat.c_str()); 01554 01555 out<<"<SOURCES>"<<endl; 01556 out<<dbname<<endl; 01557 out<<"</SOURCES>"<<endl; 01558 out<<"<PROCESSING>"<<endl; 01559 out<<"INCLUDE "<<dbname+".metadata/stats.def"<<endl; 01560 if(typen!=2) 01561 out<<"INCLUDE "<<dbname+".metadata/bins"<<bins<<".def"<<endl; 01562 01563 for(int i=0;i<vm.width();i++) 01564 { 01565 switch(typen) 01566 { 01567 case 0: 01568 out<<"@"<<vm->fieldName(i)<<".binned"<<bins<<endl; 01569 out<<":"<<vm->fieldName(i)<<endl; 01570 break; 01571 case 1: 01572 out<<"@"<<vm->fieldName(i)<<".onehot"<<bins<<endl; 01573 out<<":"<<vm->fieldName(i)<<".:0:@"<<vm->fieldName(i)<<".ranges"<<bins<<".nbins_m1"<<endl; 01574 break; 01575 case 2: 01576 out<<"@"<<vm->fieldName(i)<<".normalized"<<endl; 01577 out<<":"<<vm->fieldName(i)<<endl; 01578 break; 01579 } 01580 01581 } 01582 out<<"</PROCESSING>"<<endl; 01583 out.close(); 01584 } 01585 else if(command=="diststat") 01586 { 01587 VMat vm = getDataSet(argv[2]); 01588 int inputsize = atoi(argv[3]); 01589 printDistanceStatistics(vm, inputsize); 01590 } 01591 else if(command=="diff") 01592 { 01593 VMat vm1 = getDataSet(argv[2]); 01594 VMat vm2 = getDataSet(argv[3]); 01595 double tol = 1e-6; 01596 if(argc>=5) 01597 tol = atof(argv[4]); 01598 print_diff(cout, vm1, vm2, tol); 01599 } 01600 else if(command=="cat") 01601 { 01602 if(argc!=4 && argc!=3) 01603 PLERROR("'vmat cat' must be used that way : vmat cat FILE [vplFilteringCode]"); 01604 string dbname = argv[2]; 01605 string code; 01606 VMat vm = getDataSet(dbname); 01607 Vec tmp(vm.width()); 01608 if(argc==4) 01609 01610 { 01611 code=argv[3]; 01612 01613 VMatLanguage vpl(vm); 01614 vector<string> fn; 01615 for(int i=0;i<vm->width();i++) 01616 fn.push_back(vm->fieldName(i)); 01617 vpl.compileString(code,fn); 01618 Vec answer(1); 01619 for(int i=0;i<vm.length();i++) 01620 { 01621 vpl.run(i,answer); 01622 if(answer[0]) { 01623 vm->getRow(i, tmp); 01624 cout<<tmp<<endl; 01625 } 01626 } 01627 } 01628 else 01629 for(int i=0;i<vm.length();i++) 01630 { 01631 vm->getRow(i,tmp); 01632 cout<<tmp<<endl; 01633 } 01634 } 01635 else if(command=="sascat") 01636 { 01637 if(argc!=4) 01638 PLERROR("'vmat sascat' must be used that way : vmat sascat <in-dataset> <out-filename.txt>"); 01639 string dbname = argv[2]; 01640 string outname = argv[3]; 01641 string code; 01642 VMat vm = getDataSet(dbname); 01643 ofstream out(outname.c_str()); 01644 for (int i=0;i<vm.width();i++) 01645 out << vm->fieldName(i) << "\t"; 01646 out << endl; 01647 for(int i=0;i<vm.length();i++) 01648 { 01649 for (int j=0;j<vm.width();j++) 01650 out << vm->getString(i,j) << "\t"; 01651 out<<endl; 01652 } 01653 } 01654 /* OLD CODE 01655 else if(command=="view") 01656 { 01657 if(argc > 5) 01658 PLERROR("Bad number of arguments. Syntax for option view: %s view <dbname> [<row> [<col>]]", argv[0]); 01659 VMat vm= getDataSet(string(argv[2])); 01660 viewVMat(vm, argc>=4? toint(argv[3]) : 0, argc==5? toint(argv[4]) : 0); 01661 } 01662 */ 01663 else if(command=="view") 01664 { 01665 vmat_view_dataset = string(argv[2]); 01666 VMat vm = getDataSet(vmat_view_dataset); 01667 viewVMat(vm); 01668 } 01669 else if(command=="plot") 01670 { 01671 if(0 != argc%2) 01672 PLERROR("Bad number of arguments. Syntax for option plot:\n" 01673 "%s plot <dbname0> <col0>[:<row0>:<nrows0>] {<dbnameN> <colN>[:<rowN>:<nrowsN>]}", argv[0]); 01674 plotVMats(argv+2, argc-2); 01675 } 01676 else if(command=="help") 01677 { 01678 cout << getDataSetHelp() << endl; 01679 } 01680 else 01681 PLERROR("Unknown command : %s",command.c_str()); 01682 return 0; 01683 } 01684 01685 } // end of namespace PLearn

Generated on Tue Aug 17 16:10:35 2004 for PLearn by doxygen 1.3.7