00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00037
#ifndef ROWMAPSPARSEMATRIX
00038
#define ROWMAPSPARSEMATRIX
00039
00040
#include <map>
00041
#include "Mat.h"
00042
#include "TMat.h"
00043
#include "SparseMatrix.h"
00044
00045
namespace PLearn {
00046
using namespace std;
00047
00048
00068
template <
class T>
00069 class RowMapSparseMatrix {
00070
public:
00071 vector< map<int,T> > rows;
00072
protected:
00073 int _width;
00074
00075
public:
00076 bool save_binary;
00077
00078 RowMapSparseMatrix(
int n_rows=0,
int n_columns=0)
00079 : rows(n_rows), _width(n_columns), save_binary(true) {}
00080
00081 RowMapSparseMatrix(
string filename) {
load(filename); }
00082
00083 RowMapSparseMatrix(
const Mat& m,
bool fill_all=
true) : rows(m.length()), _width(m.width()), save_binary(true)
00084 {
00085
if (fill_all)
00086
for (
int i=0;i<
length();i++)
00087 {
00088
real* r=m[i];
00089 map<int,T>& row_i=rows[i];
00090
for (
int j=0;j<
width();j++)
00091 row_i[j]=T(r[j]);
00092 }
00093
else
00094
for (
int i=0;i<
length();i++)
00095 {
00096
real* r=m[i];
00097 map<int,T>& row_i=rows[i];
00098
for (
int j=0;j<
width();j++){
00099
if(T(r[j])!=0)
00100 row_i[j]=T(r[j]);
00101 }
00102 }
00103 }
00104
00106 RowMapSparseMatrix(
const SparseMatrix& sm,
int n_rows,
int n_cols) : rows(n_rows), _width(n_cols), save_binary(false) {
00107
00108
for (
int j = 0; j < n_cols; j++) {
00109
int bcol_j = (
int)sm.
beginRow[j];
00110
int ecol_j = (
int)sm.
endRow[j];
00111
for (
int row_i = bcol_j; row_i <= ecol_j; row_i++) {
00112
int i = (
int)sm.
row[row_i];
00113 (*this)(i, j) = (T)sm.
values[row_i];
00114 }
00115 }
00116
00117 }
00118
00120 void resize(
int n_rows,
int n_columns) {
00121 rows.resize(n_rows);
00122 _width=n_columns;
00123
clear();
00124 }
00125
00127 void clear() {
00128
int s=rows.size();
00129
for (
int i=0;i<s;i++)
00130 rows[i].clear();
00131 }
00132
00133 void clearRow(
int i)
00134 {
00135 rows[i].clear();
00136 }
00137
00138 T& operator()(
int i,
int j) {
00139
#ifdef BOUNDCHECK
00140
if (i<0 || i>=
length() && j<0 || j>=
width())
00141
PLERROR(
"RowMapSparseMatrix: out-of-bound access to (%d,%d), dims=(%d,%d)",
00142 i,j,
length(),
width());
00143
#endif
00144
return rows[i][j];
00145 }
00146
00147 const T& operator()(
int i,
int j)
const {
00148
#ifdef BOUNDCHECK
00149
if (i<0 || i>=
length() && j<0 || j>=
width())
00150
PLERROR(
"RowMapSparseMatrix: out-of-bound access to (%d,%d), dims=(%d,%d)",
00151 i,j,
length(),
width());
00152
#endif
00153
map<int,T>& row_i = rows[i];
00154
typename map<int,T>::const_iterator it = row_i.find(j);
00155
if (it==row_i.end())
00156
return 0;
00157
return it->second;
00158 }
00159
00172 map<int,T>& operator()(
int i) {
return rows[i]; }
00173
00175 int size()
const {
00176
int _size=0;
00177
for (
unsigned int i=0;i<rows.size();i++)
00178 _size += rows[i].size();
00179
return _size;
00180 }
00181 int length()
const {
return rows.size(); }
00182 int width()
const {
return _width; }
00183
00184 void save(
string filename)
const {
00185 ofstream out(filename.c_str());
write(out);
00186 }
00187 void saveAscii(
string filename) {
00188
bool old_mode=save_binary;
00189 save_binary =
false;
save(filename);
00190 save_binary=old_mode;
00191 }
00193 void load(
string filename) { ifstream in(filename.c_str());
read(in); }
00194 void read(istream& in) {
00195
readHeader(in,
"RowMapSparseMatrix");
00196
int n_bytes=0;
00197
readField(in,
"sizeof(T)",n_bytes,4);
00198
readField(in,
"save_binary",save_binary);
00199
if (n_bytes!=
sizeof(T) && save_binary)
00200
PLERROR(
"RowMapSparseMatrix<T>::read, input matrix has sizeof(T)=%d, expected %d",n_bytes,
sizeof(T));
00201
int n_rows;
00202
readField(in,
"n_rows",n_rows);
00203 rows.resize(n_rows);
00204
readField(in,
"width",_width);
00205
for (
int i=0;i<n_rows;i++)
00206 {
00207 map<int,T>& row_i = rows[i];
00208
int n_elements;
00209
if (save_binary)
00210 {
00211
binread(in,n_elements);
00212
for (
int j=0;j<n_elements;j++)
00213 {
00214
int col;
00215 T value;
00216
binread(in,col);
00217
binread(in,value);
00218 row_i[col]=value;
00219 }
00220 }
else
00221 {
00222
PLearn::read(in,n_elements);
00223
for (
int j=0;j<n_elements;j++)
00224 {
00225
int col;
00226 T value;
00227
PLearn::read(in,col);
00228
PLearn::read(in,value);
00229 row_i[col]=value;
00230 }
00231 }
00232 }
00233
readFooter(in,
"RowMapSparseMatrix");
00234 }
00235
00237 void write(ostream& out)
const {
00238
writeHeader(out,
"RowMapSparseMatrix");
00239
int sizeofT =
sizeof(T);
00240
writeField(out,
"sizeof(T)",sizeofT);
00241
writeField(out,
"save_binary",save_binary);
00242
writeField(out,
"n_rows",rows.size());
00243
writeField(out,
"width",_width);
00244
int s=rows.size();
00245
for (
int i=0;i<s;i++)
00246 {
00247
const map<int,T>& row_i = rows[i];
00248
typename map<int,T>::const_iterator it = row_i.begin();
00249
typename map<int,T>::const_iterator end = row_i.end();
00250
int n_elements = row_i.size();
00251
if (save_binary)
00252 {
00253
binwrite(out,n_elements);
00254
for (;it!=end;++it)
00255 {
00256
binwrite(out,it->first);
00257
binwrite(out,it->second);
00258 }
00259 }
00260
else
00261 {
00262
PLearn::write(out,n_elements);
00263
for (;it!=end;++it)
00264 {
00265
PLearn::write(out,it->first);
00266
PLearn::write(out,it->second);
00267 }
00268 }
00269 }
00270
writeFooter(out,
"RowMapSparseMatrix");
00271 }
00272
00277 void saveNonZeroElements(
string filename)
const {
00278 ofstream out(filename.c_str());
00279 out <<
length() <<
" " <<
width() <<
" " <<
size() <<
endl;
00280
int s=rows.size();
00281
for (
int i=0;i<s;i++)
00282 {
00283
const map<int,T>& row_i = rows[i];
00284
typename map<int,T>::const_iterator it = row_i.begin();
00285
typename map<int,T>::const_iterator end = row_i.end();
00286
for (;it!=end;++it)
00287 out << i <<
" " << it->first <<
" " << it->second <<
endl;
00288 }
00289 }
00290
00293 void product(
const Vec& x,
Vec& y) {
00294
real* _y=y.
data();
00295
real* _x=
x.data();
00296
#ifdef BOUNDCHECK
00297
if (y.
length()!=
length() ||
x.length()!=
width())
00298
PLERROR(
"RowMapSparseMatrix::product: inconsistent arguments (%d,%d), dims=(%d,%d)",
00299
x.length(),y.
length(),
length(),
width());
00300
#endif
00301
int s=rows.size();
00302
for (
int i=0;i<s;i++)
00303 {
00304
real res=0;
00305 map<int,T>& row_i = rows[i];
00306
typename map<int,T>::const_iterator it = row_i.begin();
00307
typename map<int,T>::const_iterator end = row_i.end();
00308
for (;it!=end;++it)
00309 res += it->second * _x[it->first];
00310 _y[i] = res;
00311 }
00312 }
00313
00314 Mat toMat()
00315 {
00316
Mat res(
length(),
width());
00317
for (
int i=0;i<
length();i++)
00318 {
00319 map<int,T>& row_i = rows[i];
00320
typename map<int,T>::const_iterator it = row_i.begin();
00321
typename map<int,T>::const_iterator end = row_i.end();
00322
real* res_i=res[i];
00323
for (;it!=end;++it)
00324 res_i[it->first] = it->second;
00325 }
00326
return res;
00327 }
00328
00329 bool isSymmetric(
real tolerance=0)
00330 {
00331
for (
int i=0;i<
length();i++)
00332 {
00333 map<int,T>& row_i = rows[i];
00334
typename map<int,T>::const_iterator it = row_i.begin();
00335
typename map<int,T>::const_iterator end = row_i.end();
00336
for (;it!=end;++it)
00337
if (it->first!=i)
00338 {
00339 T& other_guy = (*this)(it->first,i);
00340
if (fabs(other_guy - it->second)>tolerance)
00341
return false;
00342 }
00343 }
00344
return true;
00345 }
00346
00354 bool fillSymmetricPart(
real tolerance=0)
00355 {
00356
for (
int i=0;i<
length();i++)
00357 {
00358 map<int,T>& row_i = rows[i];
00359
typename map<int,T>::const_iterator it = row_i.begin();
00360
typename map<int,T>::const_iterator end = row_i.end();
00361
for (;it!=end;++it)
00362 {
00363
int j=it->first;
00364
if (j!=i)
00365 {
00367 map<int,T>& row_j= rows[j];
00368
typename map<int,T>::iterator symm_it =row_j.find(i);
00369
if (symm_it==row_j.end())
00371 row_j[i]=it->second;
00372
else
00374
if (fabs(symm_it->second - it->second)>tolerance)
00375
return false;
00376 }
00377 }
00378 }
00379
return true;
00380 }
00381
00383 void diag(
Vec& d)
00384 {
00385
real* d_=d.
data();
00386
for (
int i=0;i<
length();i++)
00387 d_[i] = (*this)(i,i);
00388 }
00389
00392 void diagonalOfSquare(
Vec& d)
00393 {
00394
real* d_=d.
data();
00395
for (
int i=0;i<
length();i++)
00396 {
00397
real sum2=0;
00398 map<int,T>& row_i = rows[i];
00399
typename map<int,T>::const_iterator it = row_i.begin();
00400
typename map<int,T>::const_iterator end = row_i.end();
00401
for (;it!=end;++it)
00402 sum2 += it->second * it->second;
00403 d_[i] = sum2;
00404 }
00405 }
00406
00408 real dotRow(
int i,
Vec v)
00409 {
00410
#ifdef BOUNDCHECK
00411
if (v.
length()!=
width())
00412
PLERROR(
"RowMapSparseMatrix::dotRow(%d,v), v.length_=%d != matrix width=%d",
00413 i,v.
length(),
width());
00414
#endif
00415
real s = 0;
00416
real* v_=v.
data();
00417 map<int,T>& row_i = rows[i];
00418
typename map<int,T>::const_iterator it = row_i.begin();
00419
typename map<int,T>::const_iterator end = row_i.end();
00420
for (;it!=end;++it)
00421 s += it->second * v_[it->first];
00422
return s;
00423 }
00424
00426 real dotColumn(
int j,
Vec v)
00427 {
00428
#ifdef BOUNDCHECK
00429
if (v.
length()!=
length())
00430
PLERROR(
"RowMapSparseMatrix::dotColumn(%d,v), v.length_=%d != matrix length=%d",
00431 j,v.
length(),
length());
00432
#endif
00433
PLWARNING(
"RowMapSparseMatrix is not appropriate to perform dotColumn operations");
00434
real s=0;
00435
real* v_=v.
data();
00436
for (
int i=0;i<v.
length();v++)
00437 s += (*this)(i,j) * v_[i];
00438
return s;
00439 }
00440
00442 void transposeProduct(
RowMapSparseMatrix& m,
bool verbose =
false)
00443 {
00444
RowMapSparseMatrix<T>&
self = (*this);
00445
int n =
self.length();
00446
RowMapSparseMatrix<T> mt(n, n);
00447
transpose(m, mt);
00448
int nnz = 0;
00449
for (
int i = 0; i < n; i++)
00450 {
00451
if (verbose)
00452 {
00453
if (i % 10 == 0 && i != 0) {
00454 cout <<
"[" << i <<
"]" <<
" ";
00455
if (i % 100 == 0) cout <<
endl;
00456
else cout.
flush();
00457 }
00458 }
00459
for (
int j = 0; j < n; j++)
00460 {
00461 T
val = multiplyVecs(mt(i), mt(j));
00462
if (
val != 0)
00463 {
00464 nnz++;
00465
self(i, j) =
val;
00466 }
00467 }
00468 }
00469
if (verbose)
00470 cout <<
endl;
00471 }
00472
00475 void add2Rows(
Vec row,
bool only_on_non_zeros=
true)
00476 {
00477
if (!only_on_non_zeros)
00478
PLERROR(
"RowMapSparseMatrix::add2Rows(Vec,bool) works only with bool=true");
00479
real* r=row.
data();
00480
for (
int i=0;i<
length();i++)
00481 {
00482 map<int,T>& row_i = rows[i];
00483
typename map<int,T>::iterator it = row_i.begin();
00484
typename map<int,T>::const_iterator end = row_i.end();
00485
for (;it!=end;++it)
00486 it->second += r[it->first];
00487 }
00488 }
00491 void add2Columns(
Vec col,
bool only_on_non_zeros=
true)
00492 {
00493
if (!only_on_non_zeros)
00494
PLERROR(
"RowMapSparseMatrix::add2Columns(Vec,bool) works only with bool=true");
00495
for (
int i=0;i<
length();i++)
00496 {
00497
real col_i=col[i];
00498 map<int,T>& row_i = rows[i];
00499
typename map<int,T>::iterator it = row_i.begin();
00500
typename map<int,T>::const_iterator end = row_i.end();
00501
for (;it!=end;++it)
00502 it->second += col_i;
00503 }
00504 }
00507 void add(
real scalar,
bool only_on_non_zeros=
true)
00508 {
00509
if (!only_on_non_zeros)
00510
PLERROR(
"RowMapSparseMatrix::add(real,bool) works only with bool=true");
00511
for (
int i=0;i<
length();i++)
00512 {
00513 map<int,T>& row_i = rows[i];
00514
typename map<int,T>::iterator it = row_i.begin();
00515
typename map<int,T>::const_iterator end = row_i.end();
00516
for (;it!=end;++it)
00517 it->second += scalar;
00518 }
00519 }
00520
00526 void averageAcrossRowsAndColumns(
Vec avg_across_rows,
Vec avg_across_columns,
00527
bool only_on_non_zeros=
true)
00528 {
00529
if (!only_on_non_zeros)
00530
PLERROR(
"RowMapSparseMatrix::averageAcrossRowsAndColumns works only with only_on_non_zeros=true");
00531 avg_across_rows.
resize(
width());
00532 avg_across_columns.
resize(
length());
00533 avg_across_rows.
clear();
00534 avg_across_columns.
clear();
00535
TVec<int> column_counts(
width());
00536
for (
int i=0;i<
length();i++)
00537 {
00538
real& avg_cols_i=avg_across_columns[i];
00539
real* avg_rows = avg_across_rows.
data();
00540 map<int,T>& row_i = rows[i];
00541
typename map<int,T>::const_iterator it = row_i.begin();
00542
typename map<int,T>::const_iterator end = row_i.end();
00543
int n=0;
00544
for (;it!=end;++it)
00545 {
00546 avg_cols_i += it->second;
00547
int j=it->first;
00548 avg_rows[j] += it->second;
00549 n++;
00550 column_counts[j]++;
00551 }
00552 avg_cols_i /= n;
00553 }
00554
for (
int j=0;j<
width();j++)
00555 avg_across_rows[j] /= column_counts[j];
00556 }
00557
00563 real sumRow(
int i)
00564 {
00565
real s=0;
00566 map<int,T>& row_i = rows[i];
00567
typename map<int,T>::const_iterator it = row_i.begin();
00568
typename map<int,T>::const_iterator end = row_i.end();
00569
for (;it!=end;++it)
00570 s += it->second;
00571
return s;
00572 }
00573
00574
00576 void operator*=(
real scalar)
00577 {
00578
for (
int i=0;i<
length();i++)
00579 {
00580 map<int,T>& row_i = rows[i];
00581
typename map<int,T>::iterator it = row_i.begin();
00582
typename map<int,T>::const_iterator end = row_i.end();
00583
for (;it!=end;++it)
00584 it->second *= scalar;
00585 }
00586 }
00587
00589 static void transpose(
RowMapSparseMatrix<T>& src,
RowMapSparseMatrix<T>& dest)
00590 {
00591 dest.
clear();
00592
for (
int i = 0; i < src.
length(); i++)
00593 {
00594 map<int, T>& row = src(i);
00595
typename map<int, T>::iterator beg = row.begin();
00596
typename map<int, T>::iterator end = row.end();
00597
while (beg != end)
00598 {
00599
int col = beg->first;
00600 T
val = beg->second;
00601 dest(col, i) =
val;
00602 ++beg;
00603 }
00604 }
00605 }
00606
00608 static real multiplyVecs(map<int, T>& map1, map<int, T>& map2)
00609 {
00610
if (map1.size() == 0 || map2.size() == 0)
00611
return 0;
00612
typename map<int, T>::iterator beg1 = map1.begin();
00613
typename map<int, T>::iterator beg2 = map2.begin();
00614
typename map<int, T>::iterator end1 = map1.end();
00615
typename map<int, T>::iterator end2 = map2.end();
00616
int col1, col2;
00617 T val1, val2,
sum = 0;
00618
bool fend1 = (beg1 == end1), fend2 = (beg2 == end2);
00619
int OUT = getMaxColumnIndex(map1, map2) + 1;
00620
00621
while (!fend1 || !fend2) {
00622
if (!fend1)
00623 col1 = beg1->first;
00624
else
00625 col1 = OUT;
00626
if (!fend2)
00627 col2 = beg2->first;
00628
else
00629 col2 = OUT;
00630 val1 = beg1->second;
00631 val2 = beg2->second;
00632
if (col1 == col2) {
00633
sum += (val1 * val2);
00634 ++beg1;
00635
if (beg1 == end1) fend1 =
true;
00636 ++beg2;
00637
if (beg2 == end2) fend2 =
true;
00638 }
else if (col1 < col2) {
00639
00640 ++beg1;
00641
if (beg1 == end1) fend1 =
true;
00642 }
else if (col1 > col2) {
00643
00644 ++beg2;
00645
if (beg2 == end2) fend2 =
true;
00646 }
00647 }
00648
return sum;
00649 }
00650
00651 static real euclidianDistance(map<int, real>& map1, map<int, real>& map2)
00652 {
00653
if (map1.size() == 0 || map2.size() == 0)
00654
return 0;
00655 map<int, real>::iterator beg1 = map1.begin();
00656 map<int, real>::iterator beg2 = map2.begin();
00657 map<int, real>::iterator end1 = map1.end();
00658 map<int, real>::iterator end2 = map2.end();
00659
int col1, col2;
00660
real val1, val2, diff,
sum = 0;
00661
bool fend1 = (beg1 == end1), fend2 = (beg2 == end2);
00662
int OUT = getMaxColumnIndex(map1, map2) + 1;
00663
00664
while (!fend1 || !fend2)
00665 {
00666
if (!fend1)
00667 col1 = beg1->first;
00668
else
00669 col1 = OUT;
00670
if (!fend2)
00671 col2 = beg2->first;
00672
else
00673 col2 = OUT;
00674 val1 = beg1->second;
00675 val2 = beg2->second;
00676
if (col1 == col2)
00677 {
00678 diff = val1 - val2;
00679
sum += (diff * diff);
00680 beg1++;
00681
if (beg1 == end1) fend1 =
true;
00682 beg2++;
00683
if (beg2 == end2) fend2 =
true;
00684 }
else if (col1 < col2)
00685 {
00686 diff = val1;
00687
sum += (diff * diff);
00688 beg1++;
00689
if (beg1 == end1) fend1 =
true;
00690 }
else if (col1 > col2)
00691 {
00692 diff = val2;
00693
sum += (diff * diff);
00694 beg2++;
00695
if (beg2 == end2) fend2 =
true;
00696 }
00697 }
00698
00700
return sum;
00701 }
00702
00704 static void substractVecs(map<int, real>& map1, map<int, real>& map2,
Vec& dest)
00705 {
00706 map<int, real>::iterator beg1 = map1.begin();
00707 map<int, real>::iterator beg2 = map2.begin();
00708 map<int, real>::iterator end1 = map1.end();
00709 map<int, real>::iterator end2 = map2.end();
00710
int col1, col2;
00711
real val1, val2;
00712
bool fend1 = (beg1 == end1), fend2 = (beg2 == end2);
00713
int OUT = getMaxColumnIndex(map1, map2) + 1;
00714
00715
while (!fend1 || !fend2)
00716 {
00717
if (!fend1)
00718 col1 = beg1->first;
00719
else
00720 col1 = OUT;
00721
if (!fend2)
00722 col2 = beg2->first;
00723
else
00724 col2 = OUT;
00725 val1 = beg1->second;
00726 val2 = beg2->second;
00727
if (col1 == col2)
00728 {
00729 dest[col1] = val1 - val2;
00730 beg1++;
00731
if (beg1 == end1) fend1 =
true;
00732 beg2++;
00733
if (beg2 == end2) fend2 =
true;
00734 }
else if (col1 < col2)
00735 {
00736 dest[col1] = val1;
00737 beg1++;
00738
if (beg1 == end1) fend1 =
true;
00739 }
else if (col1 > col2)
00740 {
00741 dest[col2] = 0 - val2;
00742 beg2++;
00743
if (beg2 == end2) fend2 =
true;
00744 }
00745 }
00746 }
00747
00750 static int getMaxColumnIndex(map<int, T>& map1, map<int, T>& map2)
00751 {
00752 map<int, real>::iterator end1 = map1.end();
00753 map<int, real>::iterator end2 = map2.end();
00754 --end1;
00755 --end2;
00756
return MAX(end1->first, end2->first);
00757 }
00758
00763 void exportToMatlabReadableFormat(
string filename)
00764 {
00765 ofstream out(filename.c_str());
00766
#ifdef USEDOUBLE
00767
out.precision(20);
00768
#else
00769
out.precision(6);
00770
#endif
00771
for (
unsigned int i = 0; i < rows.size(); i++)
00772 {
00773
typename map<int, T>::iterator beg = rows[i].begin();
00774
typename map<int, T>::iterator end = rows[i].end();
00775
while (beg != end)
00776 {
00777 out << i + 1 <<
" " << beg->first + 1 <<
" " << beg->second <<
endl;
00778 beg++;
00779 }
00780 }
00783
int l =
length();
00784
int w =
width();
00785 l--; w--;
00786 map<int, T>& row_l=rows[l];
00787
if (row_l.find(w) == row_l.end())
00788 out << l + 1 <<
" " << w + 1 <<
" " << 0 <<
endl;
00789 }
00790
00793 real density()
00794 {
00795
return size() /
real(
length() *
width());
00796 }
00797
00798 };
00799
00800
template <
class T>
00801 void product(
RowMapSparseMatrix<T>& M,
const Vec& x,
Vec& y) { M.
product(
x,y); }
00802
00803 }
00804
00805
#endif