00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00037
#include "SmoothedProbSparseMatrix.h"
00038
00039
namespace PLearn {
00040
00041 SmoothedProbSparseMatrix::SmoothedProbSparseMatrix(
int n_rows,
int n_cols,
string name,
int mode,
bool double_access)
00042 :
ProbSparseMatrix(n_rows, n_cols, name,
mode, double_access)
00043 {
00044
smoothingMethod = 0;
00045 }
00046
00047
00048
00049
00050
00051
00052 void SmoothedProbSparseMatrix::normalizeCondLaplace(
ProbSparseMatrix& nXY,
bool clear_nXY)
00053 {
00054
int nXY_height = nXY.
getHeight();
00055
int nXY_width = nXY.
getWidth();
00056
smoothingMethod = 1;
00057
if (mode ==
ROW_WISE && (nXY.
getMode() ==
ROW_WISE || nXY.
isDoubleAccessible()))
00058 {
00059
clear();
00060
normalizationSum.
resize(nXY_height);
00061
for (
int i = 0; i < nXY_height; i++)
00062 {
00063
00064
real sum_row_i = nXY.
sumRow(i)+nXY_width;
00065
00066
normalizationSum[i] = sum_row_i;
00067 map<int, real>& row_i = nXY.
getRow(i);
00068
for (map<int, real>::iterator it = row_i.begin(); it != row_i.end(); ++it)
00069 {
00070
int j = it->first;
00071
real nij = it->second;
00072
00073
if (nij > 0.0){
00074
real pij = (nij +1)/ sum_row_i;
00075
set(i, j, pij);
00076 }
00077 }
00078 }
00079
if (clear_nXY)
00080 nXY.
clear();
00081 }
else if (mode ==
COLUMN_WISE && (nXY.
getMode() ==
COLUMN_WISE || nXY.
isDoubleAccessible())){
00082
clear();
00083
normalizationSum.
resize(nXY_width);
00084
for (
int j = 0; j < nXY_width; j++){
00085
00086
real sum_col_j = nXY.
sumCol(j)+nXY_height;
00087
00088
normalizationSum[j] = sum_col_j;
00089 map<int, real>& col_j = nXY.
getCol(j);
00090
for (map<int, real>::iterator it = col_j.begin(); it != col_j.end(); ++it){
00091
int i = it->first;
00092
real nij = it->second;
00093
00094
if (nij > 0.0){
00095
set(i, j, (nij+1) / sum_col_j);
00096 }
00097 }
00098 }
00099
if (clear_nXY)
00100 nXY.
clear();
00101 }
else{
00102
PLERROR(
"pXY and nXY accessibility modes must match");
00103 }
00104 }
00105
00106 void SmoothedProbSparseMatrix::normalizeCondBackoff(
ProbSparseMatrix& nXY,
real disc,
Vec& bDist,
bool clear_nXY,
bool shadow)
00107 {
00108
00109
00110
00111
int i,j;
00112
real nij,pij;
00113
real minval,discval;
00114
int nXY_height = nXY.
getHeight();
00115
int nXY_width = nXY.
getWidth();
00116
00117
if(shadow){
00118
smoothingMethod = 2;
00119 }
else{
00120
smoothingMethod = 3;
00121 }
00122
00123
00124
00125
00126
00127
backoffDist.
resize(bDist.
size());
00128
backoffDist << bDist;
00129
if (mode ==
ROW_WISE && (nXY.
getMode() ==
ROW_WISE || nXY.
isDoubleAccessible())){
00130
clear();
00131
if (
backoffDist.
size()!=nXY_width)
PLERROR(
"Wrong dimension for backoffDistribution");
00132
normalizationSum.
resize(nXY_height);
normalizationSum.
clear();
00133
backoffNormalization.
resize(nXY_height);
backoffNormalization.
clear();
00134
discountedMass.
resize(nXY_height);
discountedMass.
clear();
00135
for (
int i = 0; i < nXY_height; i++){
00136
00137
real sum_row_i = nXY.
sumRow(i);
00138
if (sum_row_i==0){
00139
00140
normalizationSum[j]=nXY_width;
00141 }
else{
00142
00143
normalizationSum[i] = sum_row_i;
00144 }
00145
backoffNormalization[i]= 1.0;
00146 map<int, real>& row_i = nXY.
getRow(i);
00147
00148 minval=FLT_MAX;
00149
for (map<int, real>::iterator it = row_i.begin(); it != row_i.end(); ++it){
00150 minval= it->second<minval?it->second:minval;
00151 }
00152 discval = minval*disc;
00153
for (map<int, real>::iterator it = row_i.begin(); it != row_i.end(); ++it){
00154 j = it->first;
00155 nij = it->second;
00156
if(nij>discval){
00157
discountedMass[i]+=discval;
00158
00159 pij = (nij -discval)/ sum_row_i;
00160
if (pij<0)
PLERROR(
"modified count < 0 in Backoff Smoothing SmoothedProbSparseMatrix %s",nXY.
getName().c_str());
00161
00162
backoffNormalization[i]-=
backoffDist[j];
00163 }
else{
00164 pij = nij/ sum_row_i;
00165 }
00166
00167
set(i, j,pij);
00168
00169 }
00170
if(
discountedMass[i]==0)
PLERROR(
"Discounted mass is null but count are not null in %s line %d",nXY.
getName().c_str(),i);
00171 }
00172
00173
if (clear_nXY)nXY.
clear();
00174 }
else if (mode ==
COLUMN_WISE && (nXY.
getMode() ==
COLUMN_WISE || nXY.
isDoubleAccessible())){
00175
clear();
00176
normalizationSum.
resize(nXY_width);
normalizationSum.
clear();
00177
backoffNormalization.
resize(nXY_width);
backoffNormalization.
clear();
00178
discountedMass.
resize(nXY_width);
discountedMass.
clear();
00179
for ( j = 0; j < nXY_width; j++){
00180
00181
real sum_col_j = nXY.
sumCol(j);
00182
if (sum_col_j==0){
00183
00184
normalizationSum[j]=nXY_height;
00185
continue;
00186 }
else{
00187
00188
normalizationSum[j] = sum_col_j;
00189 }
00190
00191
backoffNormalization[j]= 1.0;
00192 map<int, real>& col_j = nXY.
getCol(j);
00193
00194 minval=FLT_MAX;
00195
for (map<int, real>::iterator it = col_j.begin(); it != col_j.end(); ++it){
00196 minval= (it->second<minval && it->second!=0) ?it->second:minval;
00197 }
00198 discval = minval*disc;
00199
for (map<int, real>::iterator it = col_j.begin(); it != col_j.end(); ++it){
00200 i = it->first;
00201 nij = it->second;
00202
if(nij>discval){
00203
discountedMass[j]+=discval;
00204
00205 pij = (nij -discval)/ sum_col_j;
00206
if (pij<0)
PLERROR(
"modified count < 0 in Backoff Smoothing SmoothedProbSparseMatrix %s : i=%d j=%d p=%f",nXY.
getName().c_str(),i,j,pij);
00207
00208
backoffNormalization[j]-=
backoffDist[i];
00209 }
else{
00210 pij = nij / sum_col_j;
00211 }
00212
if(pij<=0 || pij>1)
PLERROR(
"Invalide smoothed probability %f in %s",pij,nXY.
getName().c_str());
00213
set(i, j, pij);
00214 }
00215
if(
discountedMass[j]==0){
00216
PLERROR(
"Discounted mass is null but count are not null in %s col %d",nXY.
getName().c_str(),j);
00217 }
00218 }
00219
00220
if (clear_nXY)nXY.
clear();
00221 }
else{
00222
PLERROR(
"pXY and nXY accessibility modes must match");
00223 }
00224 }
00225
00226 real SmoothedProbSparseMatrix::get(
int i,
int j)
00227 {
00228
#ifdef BOUNDCHECK
00229
if (i < 0 || i >= height || j < 0 || j >= width)
00230
PLERROR(
"SmoothedProbSparseMatrix.get : out-of-bound access to (%d, %d), dims = (%d, %d)", i, j, height, width);
00231
#endif
00232
00233
00234
if(
smoothingMethod==0){
00235
return ProbSparseMatrix::get(i,j);
00236 }
00237
if (mode ==
ROW_WISE){
00238 map<int, real>& row_i = rows[i];
00239 map<int, real>::iterator it = row_i.find(j);
00240
if (it == row_i.end()){
00241
00242
if (
discountedMass[i]==0)
return 1/
normalizationSum[i];
00243
00244
if(
smoothingMethod==1)
return 1/
normalizationSum[i];
00245
00246
if(
smoothingMethod==2)
return discountedMass[i]*
backoffDist[j]/(
normalizationSum[i]*
backoffNormalization[i]);
00247
00248
if(
smoothingMethod==3)
return discountedMass[i]*backoffDist[j]/(
normalizationSum[i]);
00249 }
else{
00250
00251
if(
smoothingMethod==3)
return it->second+
discountedMass[i]*
backoffDist[j]/(
normalizationSum[i]);
00252
return it->second;
00253 }
00254 }
else{
00255 map<int, real>& col_j = cols[j];
00256 map<int, real>::iterator it = col_j.find(i);
00257
if (it == col_j.end()){
00258
00259
if (
discountedMass[j]==0)
return 1/
normalizationSum[j];
00260
00261
if(
smoothingMethod==1)
return 1/
normalizationSum[j];
00262
00263
if(
smoothingMethod==2)
return discountedMass[j]*
backoffDist[i]/(
normalizationSum[j]*
backoffNormalization[j]);
00264
00265
if(
smoothingMethod==3)
return discountedMass[j]*backoffDist[i]/(
normalizationSum[j]);
00266 }
else{
00267
00268
if(
smoothingMethod==3)
return it->second+
discountedMass[j]*
backoffDist[i]/(
normalizationSum[j]);
00269
return it->second;
00270 }
00271 }
00272
return;
00273 }
00274
00275
00276 bool SmoothedProbSparseMatrix::checkCondProbIntegrity()
00277 {
00278
real sum = 0.0;
00279
real backsum;
00280
int null_size;
00281
if (
normalizationSum.
size()==0)
return false;
00282
00283
if (mode ==
ROW_WISE){
00284
for (
int i = 0; i < height; i++){
00285 map<int, real>& row_i = rows[i];
00286
00287
if(
smoothingMethod==1)
sum = (width-row_i.size())/
normalizationSum[i];
00288
else if(
smoothingMethod==2||
smoothingMethod==3)
sum =
discountedMass[i]/
normalizationSum[i];
00289 backsum=0;
00290
00291
00292
for (map<int, real>::iterator it = row_i.begin(); it != row_i.end(); ++it){
00293
sum += it->second;
00294
if (
smoothingMethod==2)backsum +=
backoffDist[it->first];
00295 }
00296
if (
smoothingMethod==2){
00297
if (fabs(1.0 - backsum -
backoffNormalization[i]) > 1e-4 ){
00298 cout <<
" Inconsistent backoff normalization " << i <<
" : "<<
backoffNormalization[i]<<
" "<< backsum;
00299
return false;
00300 }
00301 }
00302
if (fabs(
sum - 1.0) > 1e-4 && (
sum > 0.0))
00303 cout <<
" Inconsistent line " << i <<
" sum = "<<
sum;
00304
return false;
00305 }
00306
return true;
00307 }
else{
00308
for (
int j = 0; j < width; j++){
00309 map<int, real>& col_j = cols[j];
00310
if(
smoothingMethod==1)
sum = (height-col_j.size())/
normalizationSum[j];
00311
else if(
smoothingMethod==2 ||
smoothingMethod==3)
sum =
discountedMass[j]/
normalizationSum[j];
00312
00313 backsum=0;
00314
00315
for (map<int, real>::iterator it = col_j.begin(); it != col_j.end(); ++it){
00316
sum += it->second;
00317
if (
smoothingMethod==2)backsum +=
backoffDist[it->first];
00318 }
00319
if (
smoothingMethod==2){
00320
if (fabs(1.0 - backsum -
backoffNormalization[j]) > 1e-4 ){
00321 cout <<
" Inconsistent backoff normalization " << j <<
" : "<<
backoffNormalization[j]<<
" "<< backsum;
00322
return false;
00323 }
00324 }
00325
if(fabs(
sum - 1.0) > 1e-4 && (
sum > 0.0)){
00326 cout <<
" Inconsistent column " << j <<
" sum = "<<
sum;
00327
return false;
00328 }
00329 }
00330
return true;
00331 }
00332 }
00333
00334 void SmoothedProbSparseMatrix::write(
PStream& out)
const
00335
{
00336 ProbSparseMatrix::write(out);
00337
string class_name =
getClassName();
00338
switch(out.
outmode)
00339 {
00340
case PStream::raw_ascii :
00341
case PStream::pretty_ascii :
00342
PLERROR(
"raw/pretty_ascii write not implemented in %s", class_name.c_str());
00343
break;
00344
case PStream::raw_binary :
00345
PLERROR(
"raw_binary write not implemented in %s", class_name.c_str());
00346
break;
00347
case PStream::plearn_binary :
00348
case PStream::plearn_ascii :
00349 out.
write(class_name +
"(");
00350 out <<
smoothingMethod;
00351 out <<
normalizationSum;
00352 out <<
backoffDist;
00353 out <<
backoffNormalization;
00354 out <<
discountedMass;
00355 out.
write(
")\n");
00356
break;
00357
default:
00358
PLERROR(
"unknown outmode in %s::write(PStream& out)", class_name.c_str());
00359
break;
00360 }
00361 }
00362
00363 void SmoothedProbSparseMatrix::read(
PStream& in)
00364 {
00365
ProbSparseMatrix::read(in);
00366
string class_name =
getClassName();
00367
switch (in.
inmode)
00368 {
00369
case PStream::raw_ascii :
00370
PLERROR(
"raw_ascii read not implemented in %s", class_name.c_str());
00371
break;
00372
case PStream::raw_binary :
00373
PLERROR(
"raw_binary read not implemented in %s", class_name.c_str());
00374
break;
00375
case PStream::plearn_ascii :
00376
case PStream::plearn_binary :
00377 {
00378 in.
skipBlanksAndCommentsAndSeparators();
00379
string word(class_name.size() + 1,
' ');
00380
for (
unsigned int i = 0; i < class_name.size() + 1; i++)
00381 in.
get(word[i]);
00382
if (word != class_name +
"(")
00383
PLERROR(
"in %s::(PStream& in), '%s' is not a proper header", class_name.c_str(), word.c_str());
00384 in.
skipBlanksAndCommentsAndSeparators();
00385 in >>
smoothingMethod;
00386 in.
skipBlanksAndCommentsAndSeparators();
00387 in >>
normalizationSum;
00388 in.
skipBlanksAndCommentsAndSeparators();
00389 in >>
backoffDist;
00390 in.
skipBlanksAndCommentsAndSeparators();
00391 in >>
backoffNormalization;
00392 in.
skipBlanksAndCommentsAndSeparators();
00393 in >>
discountedMass;
00394 in.
skipBlanksAndCommentsAndSeparators();
00395
int c = in.
get();
00396
if(c !=
')')
00397
PLERROR(
"in %s::(PStream& in), expected a closing parenthesis, found '%c'", class_name.c_str(), c);
00398 }
00399
break;
00400
default:
00401
PLERROR(
"unknown inmode in %s::write(PStream& out)", class_name.c_str());
00402
break;
00403 }
00404 }
00405
00406 void ComplementedProbSparseMatrix::complement(
ProbSparseMatrix& nXY,
bool clear_nXY)
00407 {
00408
int nXY_height = nXY.
getHeight();
00409
int nXY_width = nXY.
getWidth();
00410
00411
rowSum.
resize(nXY_height);
00412
columnSum.
resize(nXY_width);
00413
if (mode ==
ROW_WISE && (nXY.
getMode() ==
ROW_WISE || nXY.
isDoubleAccessible())){
00414
for (
int i = 0; i < nXY_height; i++){
00415 map<int, real>& row_i = nXY.
getRow(i);
00416
for (map<int, real>::iterator it = row_i.begin(); it != row_i.end(); ++it){
00417
rowSum[i]+=it->second;
00418
columnSum[it->first]+=it->second;
00419
grandSum+=it->second;
00420 }
00421 }
00422
00423
if (clear_nXY)
00424 nXY.
clear();
00425 }
else if (mode ==
COLUMN_WISE && (nXY.
getMode() ==
COLUMN_WISE || nXY.
isDoubleAccessible())){
00426
00427
for (
int j = 0; j < nXY_width; j++){
00428 map<int, real>& col_j = nXY.
getCol(j);
00429
for (map<int, real>::iterator it = col_j.begin(); it != col_j.end(); ++it){
00430
rowSum[it->first]+=it->second;
00431
columnSum[j]+=it->second;
00432
grandSum+=it->second;
00433 }
00434 }
00435
if (clear_nXY)
00436 nXY.
clear();
00437 }
else{
00438
PLERROR(
"pXY and nXY accessibility modes must match");
00439 }
00440 }
00441
00442 real ComplementedProbSparseMatrix::get(
int i,
int j)
00443 {
00444
#ifdef BOUNDCHECK
00445
if (i < 0 || i >= height || j < 0 || j >= width)
00446
PLERROR(
"SmoothedProbSparseMatrix.get : out-of-bound access to (%d, %d), dims = (%d, %d)", i, j, height, width);
00447
#endif
00448
if (mode ==
ROW_WISE){
00449 map<int, real>& row_i = rows[i];
00450 map<int, real>::iterator it = row_i.find(j);
00451
if (it == row_i.end()){
00452
return rowSum[j]/(
grandSum-
columnSum[i]);
00453 }
else{
00454
return (
rowSum[j]-it->second)/(
grandSum-
columnSum[i]);
00455 }
00456 }
else{
00457 map<int, real>& col_j = cols[j];
00458 map<int, real>::iterator it = col_j.find(i);
00459
if (it == col_j.end()){
00460
return columnSum[j]/(
grandSum-
rowSum[i]);
00461 }
else{
00462
00463
return (
columnSum[j]-it->second)/(
grandSum-
rowSum[i]);
00464 }
00465 }
00466 }
00467
00468 bool ComplementedProbSparseMatrix::checkCondProbIntegrity()
00469 {
00470
real sum = 0.0;
00471
00472
int null_size;
00473
00474
00475
if (mode ==
ROW_WISE){
00476
for (
int i = 0; i < height; i++){
00477 map<int, real>& row_i = rows[i];
00478
00479
for (map<int, real>::iterator it = row_i.begin(); it != row_i.end(); ++it){
00480
00481 }
00482
00483
if (fabs(
sum - 1.0) > 1e-4 && (
sum > 0.0))
00484 cout <<
" Inconsistent line " << i <<
" sum = "<<
sum;
00485
return false;
00486 }
00487
return true;
00488 }
else{
00489
for (
int j = 0; j < width; j++){
00490 map<int, real>& col_j = cols[j];
00491
00492
for (map<int, real>::iterator it = col_j.begin(); it != col_j.end(); ++it){
00493
00494 }
00495
00496
if(fabs(
sum - 1.0) > 1e-4 && (
sum > 0.0)){
00497 cout <<
" Inconsistent column " << j <<
" sum = "<<
sum;
00498
return false;
00499 }
00500 }
00501
return true;
00502 }
00503 }
00504
00505
00506 }