00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00049
00050
#include "pl_io.h"
00051
#include <plearn/base/plerror.h>
00052
00053
#include <plearn/base/byte_order.h>
00054
00055
namespace PLearn {
00056
using namespace std;
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 inline void write_compr_mode_and_size(ostream& out,
unsigned char mode,
int size)
00077 {
00078
#ifdef BOUNDCHECK
00079
if(size<0 || size>=(1<<30))
00080
PLERROR(
"In write_compr_mode_and_size: size out of bounds");
00081
#endif
00082
unsigned int imode = (
unsigned int)
mode;
00083
if(size<(1<<6))
00084 {
00085
unsigned char sizenum = (
unsigned char) size | (
unsigned char) (imode<<6);
00086
binwrite(out, sizenum);
00087 }
00088
else if(size<(1<<14))
00089 {
00090
unsigned short sizenum = (
unsigned short) size | (
unsigned short) (imode<<14);
00091
unsigned char header = 0x00;
00092
binwrite(out,
header);
00093
binwrite(out, sizenum);
00094 }
00095
else
00096 {
00097
unsigned int sizenum = (
unsigned int) size | (
unsigned int) (imode<<30);
00098
unsigned char header = 0xC0;
00099
binwrite(out,
header);
00100
binwrite(out, sizenum);
00101 }
00102 }
00103
00104 inline void read_compr_mode_and_size(istream& in,
unsigned char& mode,
int& size)
00105 {
00106
unsigned char sizenum_byte;
00107
binread(in, sizenum_byte);
00108
if(sizenum_byte==0x00)
00109 {
00110
unsigned short sizenum;
00111
binread(in, sizenum);
00112
mode = (
unsigned char)(sizenum>>14);
00113 size =
int(sizenum & (
unsigned short)0x3FFF);
00114 }
00115
else if(sizenum_byte==0xC0)
00116 {
00117
unsigned int sizenum;
00118
binread(in, sizenum);
00119
mode = (
unsigned char)(sizenum>>30);
00120 size =
int(sizenum & (
unsigned int)0x3FFFFFFF);
00121 }
00122
else
00123 {
00124
mode = sizenum_byte>>6;
00125 size =
int(sizenum_byte & (
unsigned char)0x3F);
00126 }
00127 }
00128
00129 void binread_compressed(istream& in,
double* data,
int l)
00130 {
00131
unsigned char mode;
00132
int n;
00133
double* p = data;
00134
char cval;
00135
while(l>0)
00136 {
00137
read_compr_mode_and_size(in,
mode, n);
00138
00139 l -= n;
00140
switch(
mode)
00141 {
00142
case 0:
00143
while(n--)
00144 *p++ = 0;
00145
break;
00146
case 1:
00147
while(n--)
00148 *p++ = 0;
00149 *p++ = 1;
00150 --l;
00151
break;
00152
case 2:
00153
while(n--)
00154 {
00155
binread(in,cval);
00156 *p++ = double(cval);
00157 }
00158
break;
00159
case 3:
00160
binread(in,p,n);
00161 p += n;
00162
break;
00163
default:
00164
PLERROR(
"BUG IN binread_compressed: mode is only 2 bits, so how can it be other than 0,1,2,3 ?");
00165 }
00166 }
00167
00168
if(l!=0)
00169
PLERROR(
"In binread_compressed : l is not 0 at exit of function, wrong data?");
00170 }
00171
00172 void binwrite_compressed(ostream& out,
const double* data,
int l)
00173 {
00174
double val = 0.;
00175
while(l)
00176 {
00177
val = *data;
00178
if(
val==0.)
00179 {
00180
int n=0;
00181
while(l && *data==0.)
00182 { ++n; ++data; --l; }
00183
if(l && *data==1.)
00184 {
00185
write_compr_mode_and_size(out, 1, n);
00186 ++data; --l;
00187 }
00188
else
00189
write_compr_mode_and_size(out, 0, n);
00190 }
00191
else if(
val==1.)
00192 {
00193
write_compr_mode_and_size(out, 1, 0);
00194 ++data; --l;
00195 }
00196
else if( double(
char(
val))==
val )
00197 {
00198
const double* start = data;
00199
int n=0;
00200
while(l && double(
char(
val=*data))==
val &&
val!=0 &&
val!=1)
00201 { ++n; ++data; --l; }
00202
write_compr_mode_and_size(out, 2, n);
00203
while(n--)
00204
binwrite(out,
char(*start++));
00205 }
00206
else
00207 {
00208
const double* start = data;
00209
int n=0;
00210
while(l && (
val=*data)!=0 &&
val!=1 && double(
char(
val))!=
val)
00211 { ++n; ++data; --l; }
00212
write_compr_mode_and_size(out, 3, n);
00213
binwrite(out,start,n);
00214 }
00215 }
00216 }
00217 void binread_compressed(istream& in,
float* data,
int l)
00218 {
00219
unsigned char mode;
00220
int n;
00221
float* p = data;
00222
while(l>0)
00223 {
00224
read_compr_mode_and_size(in,
mode, n);
00225
00226
if(
mode==0 ||
mode==1)
00227 {
00228
while(n--)
00229 { *p++ = 0; --l; }
00230
if(
mode==1)
00231 { *p++ = 1; --l; }
00232 }
00233
else if(
mode==2)
00234 {
00235
char val;
00236
while(n--)
00237 {
00238
binread(in,
val);
00239 *p++ = float(
val);
00240 --l;
00241 }
00242 }
00243
else if(
mode==3)
00244 {
00245
binread(in,p,n);
00246 p += n;
00247 l -= n;
00248 }
00249
else
00250
PLERROR(
"BUG IN binread_compressed: mode is only 2 bits, so how can it be other than 0,1,2,3 ?");
00251 }
00252
00253
if(l!=0)
00254
PLERROR(
"In binread_compressed : l is not 0 at exit of function, wrong data?");
00255 }
00256
00257 void binwrite_compressed(ostream& out,
const float* data,
int l)
00258 {
00259
float val = 0.;
00260
while(l)
00261 {
00262
val = *data;
00263
if(
val==0.)
00264 {
00265
int n=0;
00266
while(l && *data==0.)
00267 { ++n; ++data; --l; }
00268
if(l && *data==1.)
00269 {
00270
write_compr_mode_and_size(out, 1, n);
00271 ++data; --l;
00272 }
00273
else
00274
write_compr_mode_and_size(out, 0, n);
00275 }
00276
else if(
val==1.)
00277 {
00278
write_compr_mode_and_size(out, 1, 0);
00279 ++data; --l;
00280 }
00281
else if( float(
char(
val))==
val )
00282 {
00283
const float* start = data;
00284
int n=0;
00285
while(l && float(
char(
val=*data))==
val &&
val!=0 &&
val!=1)
00286 { ++n; ++data; --l; }
00287
write_compr_mode_and_size(out, 2, n);
00288
while(n--)
00289
binwrite(out,
char(*start++));
00290 }
00291
else
00292 {
00293
const float* start = data;
00294
int n=0;
00295
while(l && (
val=*data)!=0 &&
val!=1 && float(
char(
val))!=
val)
00296 { ++n; ++data; --l; }
00297
write_compr_mode_and_size(out, 3, n);
00298
binwrite(out,start,n);
00299 }
00300 }
00301 }
00302
00303
00304
00305
00306
00307 inline void read_compr_mode_and_size(FILE* in,
unsigned char& mode,
int& size)
00308 {
00309
unsigned char sizenum_byte;
00310
binread(in, sizenum_byte);
00311
if(sizenum_byte==0x00)
00312 {
00313
unsigned short sizenum;
00314
binread(in, sizenum);
00315
mode = (
unsigned char)(sizenum>>14);
00316 size =
int(sizenum & (
unsigned short)0x3FFF);
00317 }
00318
else if(sizenum_byte==0xC0)
00319 {
00320
unsigned int sizenum;
00321
binread(in, sizenum);
00322
mode = (
unsigned char)(sizenum>>30);
00323 size =
int(sizenum & (
unsigned int)0x3FFFFFFF);
00324 }
00325
else
00326 {
00327
mode = sizenum_byte>>6;
00328 size =
int(sizenum_byte & (
unsigned char)0x3F);
00329 }
00330 }
00331
00332 void binread_compressed(FILE* in,
double* data,
int l)
00333 {
00334
unsigned char mode;
00335
int n;
00336
double* p = data;
00337
char cval;
00338
while(l>0)
00339 {
00340
read_compr_mode_and_size(in,
mode, n);
00341
00342 l -= n;
00343
switch(
mode)
00344 {
00345
case 0:
00346
while(n--)
00347 *p++ = 0;
00348
break;
00349
case 1:
00350
while(n--)
00351 *p++ = 0;
00352 *p++ = 1;
00353 --l;
00354
break;
00355
case 2:
00356
while(n--)
00357 {
00358
binread(in,cval);
00359 *p++ = double(cval);
00360 }
00361
break;
00362
case 3:
00363
binread(in,p,n);
00364 p += n;
00365
break;
00366
default:
00367
PLERROR(
"BUG IN binread_compressed: mode is only 2 bits, so how can it be other than 0,1,2,3 ?");
00368 }
00369 }
00370
00371
if(l!=0)
00372
PLERROR(
"In binread_compressed : l is not 0 at exit of function, wrong data?");
00373 }
00374
00375 void binwrite_compressed(FILE* out,
const double* data,
int l)
00376 {
00377
PLERROR(
"Not implemented");
00378 }
00379
00380 void binread_compressed(FILE* in,
float* data,
int l)
00381 {
00382
unsigned char mode;
00383
int n;
00384
float* p = data;
00385
while(l>0)
00386 {
00387
read_compr_mode_and_size(in,
mode, n);
00388
00389
if(
mode==0 ||
mode==1)
00390 {
00391
while(n--)
00392 { *p++ = 0; --l; }
00393
if(
mode==1)
00394 { *p++ = 1; --l; }
00395 }
00396
else if(
mode==2)
00397 {
00398
char val;
00399
while(n--)
00400 {
00401
binread(in,
val);
00402 *p++ = float(
val);
00403 --l;
00404 }
00405 }
00406
else if(
mode==3)
00407 {
00408
binread(in,p,n);
00409 p += n;
00410 l -= n;
00411 }
00412
else
00413
PLERROR(
"BUG IN binread_compressed: mode is only 2 bits, so how can it be other than 0,1,2,3 ?");
00414 }
00415
00416
if(l!=0)
00417
PLERROR(
"In binread_compressed : l is not 0 at exit of function, wrong data?");
00418 }
00419
00420 void binwrite_compressed(FILE* out,
const float* data,
int l)
00421 {
00422
PLERROR(
"Not implemented");
00423 }
00424
00425
00426
00427
00428
00429 inline void write_compr_mode_and_size_ptr(
char*& out,
unsigned char mode,
int size)
00430 {
00431
union {
unsigned short s;
char cs[2];} unis;
00432
union {
unsigned int i;
char ci[2];} unii;
00433
#ifdef BOUNDCHECK
00434
if(size<0 || size>=(1<<30))
00435
PLERROR(
"In write_compr_mode_and_size: size out of bounds");
00436
#endif
00437
unsigned int imode = (
unsigned int)
mode;
00438
if(size<(1<<6))
00439 {
00440
unsigned char sizenum = (
unsigned char) size | (
unsigned char) (imode<<6);
00441 (*out++) = sizenum;
00442 }
00443
else if(size<(1<<14))
00444 {
00445 unis.s = (
unsigned short) size | (
unsigned short) (imode<<14);
00446
unsigned char header = 0x00;
00447 (*out++) =
header;
00448 (*out++) = unis.cs[0];
00449 (*out++) = unis.cs[1];
00450 }
00451
else
00452 {
00453 unii.i = (
unsigned int) size | (
unsigned int) (imode<<30);
00454
unsigned char header = 0xC0;
00455 (*out++) =
header;
00456 (*out++) = unii.ci[0];
00457 (*out++) = unii.ci[1];
00458 (*out++) = unii.ci[2];
00459 (*out++) = unii.ci[3];
00460 }
00461 }
00462
00463 inline void read_compr_mode_and_size_ptr(
char*& in,
unsigned char& mode,
int& size)
00464 {
00465
union {
unsigned short s;
char cs[2];} unis;
00466
union {
unsigned int i;
char ci[4];} unii;
00467
00468
unsigned char sizenum_byte;
00469 sizenum_byte = (*in++);
00470
if(sizenum_byte==0x00)
00471 {
00472 unis.cs[0] = (*in++);
00473 unis.cs[1] = (*in++);
00474
mode = (
unsigned char)(unis.s>>14);
00475 size =
int(unis.s & (
unsigned short)0x3FFF);
00476 }
00477
else if(sizenum_byte==0xC0)
00478 {
00479 unii.ci[0] = (*in++);
00480 unii.ci[1] = (*in++);
00481 unii.ci[2] = (*in++);
00482 unii.ci[3] = (*in++);
00483
mode = (
unsigned char)(unii.i>>30);
00484 size =
int(unii.i & (
unsigned int)0x3FFFFFFF);
00485 }
00486
else
00487 {
00488
mode = sizenum_byte>>6;
00489 size =
int(sizenum_byte & (
unsigned char)0x3F);
00490 }
00491 }
00492
00493
00494 void uncompress_vec(
char* comprbuf,
double* data,
int l,
bool double_stored_as_float)
00495 {
00496
unsigned char mode;
00497
int n;
00498
double* p = data;
00499
while(l>0)
00500 {
00501
read_compr_mode_and_size_ptr(comprbuf,
mode, n);
00502
00503
if(
mode==0 ||
mode==1)
00504 {
00505
while(n--)
00506 { *p++ = 0; --l; }
00507
if(
mode==1)
00508 { *p++ = 1; --l; }
00509 }
00510
else if(
mode==2)
00511 {
00512
char val;
00513
while(n--)
00514 {
00515
val=(*comprbuf++);
00516 *p++ = double(
val);
00517 --l;
00518 }
00519 }
00520
else if(
mode==3)
00521 {
00522 memcpy(p,comprbuf,
sizeof(
double)*n);
00523 comprbuf+=
sizeof(
double)*n;
00524 p += n;
00525 l -= n;
00526 }
00527
else
00528
PLERROR(
"BUG IN binread_compressed: mode is only 2 bits, so how can it be other than 0,1,2,3 ?");
00529 }
00530
00531
if(l!=0)
00532
PLERROR(
"In binread_compressed : l is not 0 at exit of function, wrong data?");
00533 }
00534
00535 void compress_vec(
char* comprbuf,
const double* data,
int l,
bool double_stored_as_float)
00536 {
00537
00538
double val = 0.;
00539
while(l)
00540 {
00541
val = *data;
00542
if(
val==0.)
00543 {
00544
int n=0;
00545
while(l && *data==0.)
00546 { ++n; ++data; --l; }
00547
if(l && *data==1.)
00548 {
00549
write_compr_mode_and_size_ptr(comprbuf, 1, n);
00550 ++data; --l;
00551 }
00552
else
00553
write_compr_mode_and_size_ptr(comprbuf, 0, n);
00554 }
00555
else if(
val==1.)
00556 {
00557
write_compr_mode_and_size_ptr(comprbuf, 1, 0);
00558 ++data; --l;
00559 }
00560
else if( double(
char(
val))==
val )
00561 {
00562
const double* start = data;
00563
int n=0;
00564
while(l && double(
char(
val=*data))==
val &&
val!=0 &&
val!=1)
00565 { ++n; ++data; --l; }
00566
write_compr_mode_and_size_ptr(comprbuf, 2, n);
00567
while(n--)
00568 (*comprbuf++) = char(*start++);
00569 }
00570
else
00571 {
00572
const double* start = data;
00573
int n=0;
00574
while(l && (
val=*data)!=0 &&
val!=1 && double(
char(
val))!=
val)
00575 { ++n; ++data; --l; }
00576
write_compr_mode_and_size_ptr(comprbuf, 3, n);
00577 memcpy(comprbuf,start,n*
sizeof(
double));
00578 comprbuf += n*
sizeof(
double);
00579 }
00580 }
00581 }
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620
00621
00622
00623 size_t
new_read_compressed(FILE* in,
real* vec,
int l,
bool swap_endians)
00624 {
00625 size_t nbytes = 0;
00626
unsigned char mode;
00627
unsigned int N = 0;
00628
00629
while(l)
00630 {
00631
mode = (
unsigned char)(getc(in));
00632 ++nbytes;
00633
unsigned char N1 = (
mode & 0x1F);
00634
switch(N1)
00635 {
00636
case 0:
00637 N1 = (
unsigned char)(getc(in));
00638 ++nbytes;
00639 N = N1;
00640
break;
00641
case 30:
00642
unsigned short N2;
00643 fread(&N2,2,1,in);
00644
if(swap_endians)
00645
endianswap(&N2);
00646 nbytes += 2;
00647 N = N2;
00648
break;
00649
case 31:
00650 fread(&N,4,1,in);
00651
if(swap_endians)
00652
endianswap(&N);
00653 nbytes += 4;
00654
break;
00655
default:
00656 N = N1;
00657 }
00658
00659
if(
mode & (
unsigned char)(0x80))
00660 {
00661 l -= N;
00662
while(N--)
00663 *vec++ = 0;
00664 N = 1;
00665 }
00666
00667
if(!l)
00668
break;
00669
00670 l -= N;
00671
mode = ((
mode & ~0x80) >> 5);
00672
switch(
mode)
00673 {
00674
case 0:
00675 {
00676
while(N--)
00677 *vec++ = 1;
00678 }
00679
break;
00680
case 1:
00681 {
00682
signed char val;
00683 nbytes += N;
00684
while(N--)
00685 {
00686
val = (
signed char)(getc(in));
00687
if(
val==-128)
00688 *vec++ =
MISSING_VALUE;
00689
else
00690 *vec++ =
val;
00691 }
00692 }
00693
break;
00694
case 2:
00695 {
00696
float val;
00697 nbytes += N<<2;
00698
while(N--)
00699 {
00700 fread(&
val,
sizeof(
float),1,in);
00701
if(swap_endians)
00702
endianswap(&
val);
00703 *vec++ =
val;
00704 }
00705 }
00706
break;
00707
case 3:
00708 {
00709 nbytes += N<<3;
00710 fread(vec,
sizeof(
double),N,in);
00711
if(swap_endians)
00712
endianswap(vec,N);
00713 vec += N;
00714 }
00715 }
00716 }
00717
return nbytes;
00718 }
00719
00720 unsigned char new_get_compr_data_type(
double x,
double tolerance)
00721 {
00722
if(
is_missing(
x))
00723
return 1;
00724
else if(
x==1.)
00725
return 0;
00726
else if(double(
char(
x))==
x &&
x!=-128)
00727
return 1;
00728
else if(fabs(
double(
float(
x))-
x)<=tolerance)
00729
return 2;
00730
return 3;
00731 }
00732
00733 unsigned char new_get_compr_data_type(
float x)
00734 {
00735
if(
is_missing(
x))
00736
return 1;
00737
else if(
x==1.)
00738
return 0;
00739
else if(float(
char(
x))==
x)
00740
return 1;
00741
return 2;
00742 }
00743
00745 size_t
new_write_mode_and_size(FILE* out,
bool insert_zeroes,
unsigned int N,
unsigned char data_type)
00746 {
00747 size_t nbytes = 0;
00748
unsigned char mode = data_type<<5;
00749
if(insert_zeroes)
00750
mode |= (
unsigned char)0x80;
00751
if(N<30)
00752 {
00753
mode |= (
unsigned char)N;
00754 putc(
mode,out);
00755 nbytes = 1;
00756 }
00757
else if(N<=UCHAR_MAX)
00758 {
00759 putc(
mode,out);
00760 putc((
unsigned char)N,out);
00761 nbytes = 2;
00762 }
00763
else if(N<=USHRT_MAX)
00764 {
00765
mode |= (
unsigned char)30;
00766 putc(
mode,out);
00767
unsigned short N2 = (
unsigned short)N;
00768 fwrite(&N2,
sizeof(
unsigned short),1,out);
00769 nbytes = 3;
00770 }
00771
else
00772 {
00773
mode |= (
unsigned char)31;
00774 putc(
mode,out);
00775
unsigned int N4 = (
unsigned int)N;
00776 fwrite(&N4,
sizeof(
unsigned int),1,out);
00777 nbytes = 5;
00778 }
00779
return nbytes;
00780 }
00781
00782 size_t
new_write_raw_data_as(FILE* out,
real *vec,
int l,
unsigned char data_type)
00783 {
00784 size_t nbytes = 0;
00785
switch(data_type)
00786 {
00787
case 1:
00788 nbytes = l;
00789
while(l--)
00790 {
00791
real val = *vec++;
00792
if(
is_missing(
val))
00793 putc(0x80,out);
00794
else
00795 putc((
unsigned char)static_cast<signed char>(
val),out);
00796 }
00797
break;
00798
case 2:
00799 nbytes = l*
sizeof(
float);
00800
while(l--)
00801 {
00802
float val = static_cast<float>(*vec++);
00803 fwrite(&
val,
sizeof(
float),1,out);
00804 }
00805
break;
00806
case 3:
00807 nbytes = l*
sizeof(
double);
00808
while(l--)
00809 {
00810
double val = static_cast<double>(*vec++);
00811 fwrite(&
val,
sizeof(
double),1,out);
00812 }
00813
break;
00814 }
00815
return nbytes;
00816 }
00817
00818
00819 size_t
new_write_compressed(FILE* out,
real* vec,
int l,
double tolerance,
bool swap_endians)
00820 {
00821
if(swap_endians)
00822
PLERROR(
"swap_endians in new_write_compressed not yet supported (currently only supported by new_read_compresed");
00823
00824 size_t nbytes = 0;
00825
00826
while(l)
00827 {
00828
int nzeroes = 0;
00829
while(l && *vec==0.)
00830 {
00831 ++nzeroes;
00832 ++vec;
00833 --l;
00834 }
00835
00836
int nvals = 0;
00837
unsigned char data_type = 0;
00838
if(l)
00839 {
00840
real* ptr = vec;
00841 data_type =
new_get_compr_data_type(*ptr, tolerance);
00842 ++nvals;
00843 ++ptr;
00844 --l;
00845
while(l && *ptr!=0. &&
new_get_compr_data_type(*ptr, tolerance)==data_type)
00846 {
00847 ++nvals;
00848 ++ptr;
00849 --l;
00850 }
00851 }
00852
00853
00854
00855
00856
if(nzeroes)
00857 {
00858
00859 nbytes +=
new_write_mode_and_size(out,
true, nzeroes, data_type);
00860
if(nvals)
00861 {
00862 nbytes +=
new_write_raw_data_as(out, vec, 1, data_type);
00863 ++vec;
00864 --nvals;
00865 }
00866 }
00867
00868
if(nvals)
00869 {
00870 nbytes +=
new_write_mode_and_size(out,
false, nvals, data_type);
00871 nbytes +=
new_write_raw_data_as(out, vec, nvals, data_type);
00872 vec += nvals;
00873 }
00874
00875 }
00876
return nbytes;
00877 }
00878
00879 }