00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
#ifndef MatIO_INC
00045
#define MatIO_INC
00046
00047
#include <plearn/math/TMat.h>
00048
#include "fileutils.h"
00049
#include <stdlib.h>
00050
#include <plearn/base/stringutils.h>
00051
00052
namespace PLearn {
00053
using namespace std;
00054
00055
00058 inline Mat makeMat(
int length,
int width,
const string& values)
00059 {
Mat m(length,width); m << values;
return m; }
00060
00061 inline Vec makeVec(
int length,
const string& values)
00062 {
Vec v(length); v << values;
return v; }
00063
00066
void loadMat(
const string& filename, TMat<float>& mat);
00067
void loadMat(
const string& filename, TMat<double>& mat);
00068
void loadVec(
const string& filename, TVec<float>& vec);
00069
void loadVec(
const string& filename, TVec<double>& vec);
00070
00072
void savePVec(
const string& filename,
const TVec<float>& vec);
00073
void savePVec(
const string& filename,
const TVec<double>& vec);
00074
void loadPVec(
const string& filename, TVec<float>& vec);
00075
void loadPVec(
const string& filename, TVec<double>& vec);
00076
void savePMat(
const string& filename,
const TMat<float>& mat);
00077
void savePMat(
const string& filename,
const TMat<double>& mat);
00078
void loadPMat(
const string& filename, TMat<float>& mat);
00079
void loadPMat(
const string& filename, TMat<double>& mat);
00080
00082
00084
template<
class T>
void loadAscii(
const string& filename, TMat<T>& mat, TVec<string>& fieldnames, TVec<map<string,real> >* map_sr = 0);
00085
template<
class T>
void loadAscii(
const string& filename, TMat<T>& mat);
00086
00087
void parseSizeFromRemainingLines(
const string& filename, ifstream& in,
bool& could_be_old_amat,
int& length,
int& width);
00088
00089
00090
00091
00092
00093
template<
class T>
void saveAscii(
const string& filename,
const TMat<T>& mat,
00094
const TVec<string>& fieldnames);
00095
template<
class T>
void saveAscii(
const string& filename,
const TMat<T>& mat);
00096
00098
template<
class T>
void saveAscii(
const string& filename,
const TVec<T>& vec);
00099
template<
class T>
void loadAscii(
const string& filename, TVec<T>& vec);
00100
00102
void loadGnuplot(
const string& filename, Mat& mat);
00103
void saveGnuplot(
const string& filename,
const Vec& vec);
00104
void saveGnuplot(
const string& filename,
const Mat& mat);
00105
00107
00108
00113
void matlabSave(
const string& dir,
const string& plot_title,
const Vec& data,
00114
const Vec& add_col,
const Vec& bounds,
string lengend=
"",
bool save_plot=
true);
00115
void matlabSave(
const string& dir,
const string& plot_title,
00116
const Vec& xValues,
00117
const Vec& yValues,
const Vec& add_col,
const Vec& bounds,
string lengend=
"",
bool save_plot=
true);
00118
00120
void matlabSave(
const string& dir,
const string& plot_title,
const Mat& data,
00121
const Vec& add_col,
const Vec& bounds, TVec<string> legend=TVec<string>(),
bool save_plot=
true);
00122
00131
void matlabSave(
const string& dir,
const string& plot_title,
00132
const Vec& xValues,
00133
const Mat& yValues,
const Vec& add_col,
const Vec& bounds, TVec<string> legend=TVec<string>(),
bool save_plot=
true);
00135
00137
void loadAsciiWithoutSize(
const string& filename,
const Vec& vec);
00138
void saveAsciiWithoutSize(
const string& filename,
const Vec& vec);
00139
void loadAsciiWithoutSize(
const string& filename,
const Mat& mat);
00140
void saveAsciiWithoutSize(
const string& filename,
const Mat& mat);
00141
00143 Mat
loadSNMat(
const string& filename);
00144 Vec
loadSNVec(
const string& filename);
00145
void saveSNMat(
const string& filename,
const Mat& mat);
00146
void saveSNVec(
const string& filename,
const Vec& vec);
00147
00149 Mat
loadADMat(
const string& filename);
00150 Vec
loadADVec(
const string& filename);
00151
00167 Mat
loadUCIMLDB(
const string& filename,
char ****to_symbols=0,
int **to_n_symbols=0, TVec<int>* max_in_col = 0, TVec<string>* header_columns = 0);
00168
00179 Mat
loadSTATLOG(
const string& filename,
char ****to_symbols=0,
int **to_n_symbols=0);
00180
00189
void loadJPEGrgb(
const string& jpeg_filename, Mat& rgbmat,
int& row_size,
int scale = 1);
00190
00191
00192
00193
00194
template<
class T>
00195 void loadAscii(
const string& filename,
TMat<T>& mat,
TVec<string>& fieldnames,
TVec<map<string,real> >* map_sr = 0)
00196 {
00197 ifstream in(filename.c_str());
00198
if(!in)
00199
PLERROR(
"Could not open file %s for reading", filename.c_str());
00200
00201
int length = -1;
00202
int width = -1;
00203
bool could_be_old_amat=
true;
00204
00205 in >>
ws;
00206
string line;
00207
00208
while(in.peek()==
'#')
00209 {
00210 getline(in, line);
00211 could_be_old_amat =
false;
00212
00213 size_t pos=line.find(
":");
00214
if(pos!=string::npos)
00215 {
00216
string sub=line.substr(0,pos);
00217
if(sub==
"#size")
00218 {
00219
string siz=
removeblanks((line.substr(pos)).substr(1));
00220
vector<string> dim =
split(siz,
" ");
00221
if(dim.size()!=2)
PLERROR(
"I need exactly 2 dimensions for matrix");
00222 length =
toint(dim[0]);
00223 width =
toint(dim[1]);
00224 }
00225
else if(sub==
"#")
00226 {
00227
string fnl=line.substr(pos).substr(1);
00228 fieldnames =
split(fnl,
" ");
00229 width=fieldnames.
size();
00230 }
00231 }
00232 in >>
ws;
00233 }
00234
00235
if(length==-1)
00236
parseSizeFromRemainingLines(filename, in, could_be_old_amat, length, width);
00237
00238
if(length==-1)
00239
PLERROR(
"In loadAscii: trying to load but couldn't determine file format automatically for %s",filename.c_str());
00240
00241
if(width != -1 && width != fieldnames.
length())
00242 {
00243
if (fieldnames.
length() != 0)
00244
PLWARNING(
"In loadAscii: Number of fieldnames (%d) and width (%d) mismatch in file %s. "
00245
"Replacing fieldnames by 'Field-0', 'Field-1', ...",
00246 fieldnames.
length(), width, filename.c_str());
00247 fieldnames.
resize(width);
00248
for(
int i= 0; i < width; ++i)
00249 fieldnames[i]=
string(
"Field-") +
tostring(i);
00250 }
00251
00252
00253
00254 in.close();
00255 ifstream loadmat(filename.c_str());
00256
00257 mat.
resize(length,width);
00258
TVec<int> current_map(width);
00259 current_map.
fill(1001);
00260
TVec<T> current_max(width);
00261 current_max.
clear();
00262
00263
if (
map_sr) {
00264
map_sr->resize(width);
00265 }
00266
string inp_element;
00267
for(
int i=0; i<length; i++)
00268 {
00269 T* mat_i = mat[i];
00270
skipBlanksAndComments(loadmat);
00271
for(
int j=0; j<width; j++) {
00272
00273
if (loadmat) {
00274 loadmat >> inp_element;
00275
if (
pl_isnumber(inp_element)) {
00276 mat_i[j] = strtod(inp_element.c_str(), 0);
00277
if (
map_sr) {
00278 T
val = mat_i[j];
00279
00280
00281
if (
val > current_max[j])
00282 current_max[j] =
val;
00283
if (current_max[j] >= current_map[j])
00284 current_map[j] =
int(current_max[j] + 1);
00285 map<string,real>& m = (*map_sr)[j];
00286
for (map<string,real>::iterator it = m.begin(); it != m.end(); it++) {
00287
if (it->second ==
val) {
00288
00289
00290
00291
00292
int cur_max_plus_one = int(
real(current_max[j]) + 1);
00293
if (cur_max_plus_one > current_map[j]) {
00294 it->second = cur_max_plus_one;
00295 current_map[j] = cur_max_plus_one;
00296 }
else
00297 it->second = current_map[j];
00298 current_map[j]++;
00299
00300
for (
int k = 0;
k < i;
k++) {
00301
if (mat(
k, j) ==
val)
00302 mat(
k, j) = it->second;
00303 }
00304 }
00305 }
00306 }
00307 }
else {
00308
00309
if (
map_sr) {
00310
00311 map<string,real>& m = (*map_sr)[j];
00312 map<string,real>::iterator it = m.find(inp_element);
00313
if(it != m.end()) {
00314
00315 mat_i[j] = it->second;
00316 }
else {
00317
00318 (*map_sr)[j][inp_element] = current_map[j];
00319 mat_i[j] = current_map[j];
00320 current_map[j]++;
00321 }
00322 }
else
00323
PLERROR(
"In loadAscii - You need to provide 'map_sr' if you want to load an ASCII file with strings");
00324 }
00325 }
00326
if (!loadmat) {
00327 loadmat.clear();
00328 mat_i[j] =
MISSING_VALUE;
00329 }
00330 }
00331 }
00332 }
00333
00340
template<
class T>
00341 void loadAsciiSingleBinaryDescriptor(
const string& filename,
TMat<T>& mat)
00342 {
00343 ifstream in(filename.c_str());
00344
if(!in)
00345
PLERROR(
"In loadAsciiSingleBinaryDescriptor: Could not open file %s for reading", filename.c_str());
00346
00347
int length = -1;
00348
int width = -1;
00349
00350 in >>
ws;
00351
string line;
00352
00353
while(in.peek()==
'#')
00354 {
00355 getline(in, line);
00356
00357 size_t pos=line.find(
":");
00358
if(pos!=string::npos)
00359 {
00360
string sub=line.substr(0,pos);
00361
if(sub==
"#size")
00362 {
00363
string siz=
removeblanks((line.substr(pos)).substr(1));
00364
vector<string> dim =
split(siz,
" ");
00365
if(dim.size()!=2)
PLERROR(
"In loadAsciiSingleBinaryDescriptor: I need exactly 2 dimensions for matrix");
00366 length =
toint(dim[0]);
00367 width =
toint(dim[1]);
00368 }
00369 }
00370 in >>
ws;
00371 }
00372
00373
if(length==-1)
00374 {
00375
PLERROR(
"In loadAsciiSingleBinaryDescriptor: Be nice and specify a width and length");
00376 }
00377
00378
00379
00380 mat.
resize(length,width);
00381
string inp_element;
00382
for(
int i=0; i<length; i++)
00383 {
00384 T* mat_i = mat[i];
00385
skipBlanksAndComments(in);
00386 in >> inp_element;
00387 in >> inp_element;
00388
if (inp_element.length() != (
unsigned int) width) {
00389
PLERROR(
"In loadAsciiSingleBinaryDescriptor, a descriptor isn't the right size");
00390 }
00391
for(
int j=0; j<width; j++) {
00392
00393 mat_i[j] = strtod(inp_element.substr(j,1).c_str(), 0);
00394 }
00395 }
00396 }
00397
00398
template<
class T>
00399 void loadAscii(
const string& filename,
TVec<T>& vec)
00400 {
00401 ifstream in(filename.c_str());
00402
if(!in)
00403
PLERROR(
"In loadAscii could not open file %s for reading",filename.c_str());
00404
00405
int size = -1;
00406 in >> size;
00407
if (size<0 || size>1e10)
00408
PLERROR(
"In Vec::loadAscii the file is probably not in the right format: size=%d", size);
00409 vec.
resize(size);
00410
typename TVec<T>::iterator it = vec.
begin();
00411
typename TVec<T>::iterator itend = vec.
end();
00412
00413
00414
00415
string inp_element;
00416
for(; it!=itend; ++it) {
00417
00418
if (in) {
00419 in >> inp_element;
00420 *it = strtod(inp_element.c_str(), 0);
00421 }
00422
if (!in) {
00423 in.
clear();
00424 *it =
MISSING_VALUE;
00425 }
00426 }
00427 }
00428
00429
00430
template<
class T>
00431 void saveAscii(
const string& filename,
const TMat<T>& mat)
00432 {
00433
saveAscii(filename, mat,
TVec<string>());
00434 }
00435
00436
template<
class T>
00437 void saveAscii(
const string& filename,
const TMat<T>& mat,
const TVec<string>& fieldnames)
00438 {
00439 ofstream out(filename.c_str());
00440
if (!out)
00441
PLERROR(
"In saveAscii could not open file %s for writing",filename.c_str());
00442
00443 out <<
"#size: "<< mat.
length() <<
' ' << mat.
width() <<
endl;
00444 out.precision(15);
00445
if(fieldnames.
size()>0)
00446 {
00447 out <<
"#: ";
00448
for(
int k=0;
k<fieldnames.
size();
k++)
00449
00450 out <<
space_to_underscore(fieldnames[
k]) <<
' ';
00451 out <<
endl;
00452 }
00453
00454
for(
int i=0; i<mat.
length(); i++)
00455 {
00456
const T* row_i = mat[i];
00457
for(
int j=0; j<mat.width(); j++)
00458 out << row_i[j] <<
' ';
00459 out <<
'\n';
00460 }
00461 }
00462
00463
template<
class T>
00464 void saveAscii(
const string& filename,
const TVec<T>& vec)
00465 {
00466 ofstream out(filename.c_str());
00467
if (!out)
00468
PLERROR(
"In saveAscii: could not open file %s for writing",filename.c_str());
00469
00470 out << vec.
length() <<
endl;
00471 out.precision(15);
00472
00473
typename TVec<T>::iterator it = vec.
begin();
00474
typename TVec<T>::iterator itend = vec.
end();
00475
for(; it!=itend; ++it)
00476 out << *it <<
' ';
00477 out <<
endl;
00478 }
00479
00480
template<
class T>
00481 void loadAscii(
const string& filename,
TMat<T>& mat)
00482 {
00483
TVec<std::string> fn;
00484
loadAscii(filename,mat,fn);
00485 }
00486
00487 }
00488
00489
00490
#endif