PLearn: pl_io.cc Source File

00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999,2000 Pascal Vincent, Yoshua Bengio and University of Montreal 00006 // Copyright (C) 2002 Frederic Morin, Xavier Saint-Mleux, Pascal Vincent 00007 // 00008 00009 // Redistribution and use in source and binary forms, with or without 00010 // modification, are permitted provided that the following conditions are met: 00011 // 00012 // 1. Redistributions of source code must retain the above copyright 00013 // notice, this list of conditions and the following disclaimer. 00014 // 00015 // 2. Redistributions in binary form must reproduce the above copyright 00016 // notice, this list of conditions and the following disclaimer in the 00017 // documentation and/or other materials provided with the distribution. 00018 // 00019 // 3. The name of the authors may not be used to endorse or promote 00020 // products derived from this software without specific prior written 00021 // permission. 00022 // 00023 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00024 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00025 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00026 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00028 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00029 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00030 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00031 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00032 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 // 00034 // This file is part of the PLearn library. For more information on the PLearn 00035 // library, go to the PLearn Web site at www.plearn.org 00036 00037 00038 00039 00040 /* ******************************************************* 00041 * $Id: pl_io.cc,v 1.8 2004/07/21 16:30:51 chrish42 Exp $ 00042 * AUTHORS: Pascal Vincent 00043 * This file is part of the PLearn library. 00044 ******************************************************* */ 00045 00046 00049 //#include <limits> 00050 #include "pl_io.h" 00051 #include <plearn/base/plerror.h> 00052 //#include "pl_math.h" 00053 #include <plearn/base/byte_order.h> 00054 00055 namespace PLearn { 00056 using namespace std; 00057 00058 00059 /* compressed array of float format 00060 All data on disk are to be in little-endian format. 00061 bit patterns described from a reading perspective 00062 00063 Attempt to read sizenum as 1 byte 00064 Read 1 byte. 00065 If the bit pattern is 00000000 (0x00) then the sizenum is the following 2-byte unsigned short : read it 00066 If the bit pattern is 11000000 (0xC0)then the sizenum is the following 4-byte unsigned int : read it 00067 Otherwise the sizenum is the unsigned byte just read 00068 00069 The two most significant bits of the sizenum are the mode bits, the remaining bits give a size 00070 Mode 00 (decimal 0) means: insert 'size' zeros 00071 Mode 01 (decimal 1) means: insert 'size' zeros and then a 1 00072 Mode 10 (decimal 2) means: insert 'size' values to be found are in the following 'size' signed bytes 00073 Mode 11 (decimal 3) means: insert 'size' values to be found are in the following 'size' floating points (different implementations for single and for double precision below 00074 */ 00075 00076 inline void write_compr_mode_and_size(ostream& out, unsigned char mode, int size) 00077 { 00078 #ifdef BOUNDCHECK 00079 if(size<0 || size>=(1<<30)) 00080 PLERROR("In write_compr_mode_and_size: size out of bounds"); 00081 #endif 00082 unsigned int imode = (unsigned int) mode; 00083 if(size<(1<<6)) 00084 { 00085 unsigned char sizenum = (unsigned char) size | (unsigned char) (imode<<6); 00086 binwrite(out, sizenum); 00087 } 00088 else if(size<(1<<14)) 00089 { 00090 unsigned short sizenum = (unsigned short) size | (unsigned short) (imode<<14); 00091 unsigned char header = 0x00; 00092 binwrite(out, header); 00093 binwrite(out, sizenum); 00094 } 00095 else 00096 { 00097 unsigned int sizenum = (unsigned int) size | (unsigned int) (imode<<30); 00098 unsigned char header = 0xC0; 00099 binwrite(out, header); 00100 binwrite(out, sizenum); 00101 } 00102 } 00103 00104 inline void read_compr_mode_and_size(istream& in, unsigned char& mode, int& size) 00105 { 00106 unsigned char sizenum_byte; 00107 binread(in, sizenum_byte); 00108 if(sizenum_byte==0x00) // sizenum is an unsigned short 00109 { 00110 unsigned short sizenum; 00111 binread(in, sizenum); 00112 mode = (unsigned char)(sizenum>>14); 00113 size = int(sizenum & (unsigned short)0x3FFF); 00114 } 00115 else if(sizenum_byte==0xC0) // sizenum is an unsigned int 00116 { 00117 unsigned int sizenum; 00118 binread(in, sizenum); 00119 mode = (unsigned char)(sizenum>>30); 00120 size = int(sizenum & (unsigned int)0x3FFFFFFF); 00121 } 00122 else // sizenum is the byte we just read 00123 { 00124 mode = sizenum_byte>>6; 00125 size = int(sizenum_byte & (unsigned char)0x3F); 00126 } 00127 } 00128 00129 void binread_compressed(istream& in, double* data, int l) 00130 { 00131 unsigned char mode; 00132 int n; 00133 double* p = data; 00134 char cval; 00135 while(l>0) 00136 { 00137 read_compr_mode_and_size(in, mode, n); 00138 //cerr << "mode: " << int(mode) << " size: " << n << endl; 00139 l -= n; 00140 switch(mode) 00141 { 00142 case 0: 00143 while(n--) 00144 *p++ = 0; 00145 break; 00146 case 1: 00147 while(n--) 00148 *p++ = 0; 00149 *p++ = 1; 00150 --l; 00151 break; 00152 case 2: 00153 while(n--) 00154 { 00155 binread(in,cval); 00156 *p++ = double(cval); 00157 } 00158 break; 00159 case 3: 00160 binread(in,p,n); 00161 p += n; 00162 break; 00163 default: 00164 PLERROR("BUG IN binread_compressed: mode is only 2 bits, so how can it be other than 0,1,2,3 ?"); 00165 } 00166 } 00167 00168 if(l!=0) 00169 PLERROR("In binread_compressed : l is not 0 at exit of function, wrong data?"); 00170 } 00171 00172 void binwrite_compressed(ostream& out, const double* data, int l) 00173 { 00174 double val = 0.; 00175 while(l) 00176 { 00177 val = *data; 00178 if(val==0.) 00179 { 00180 int n=0; 00181 while(l && *data==0.) 00182 { ++n; ++data; --l; } 00183 if(l && *data==1.) 00184 { 00185 write_compr_mode_and_size(out, 1, n); 00186 ++data; --l; 00187 } 00188 else 00189 write_compr_mode_and_size(out, 0, n); 00190 } 00191 else if(val==1.) 00192 { 00193 write_compr_mode_and_size(out, 1, 0); 00194 ++data; --l; 00195 } 00196 else if( double(char(val))==val ) 00197 { 00198 const double* start = data; 00199 int n=0; 00200 while(l && double(char(val=*data))==val && val!=0 && val!=1) 00201 { ++n; ++data; --l; } 00202 write_compr_mode_and_size(out, 2, n); 00203 while(n--) 00204 binwrite(out,char(*start++)); 00205 } 00206 else 00207 { 00208 const double* start = data; 00209 int n=0; 00210 while(l && (val=*data)!=0 && val!=1 && double(char(val))!=val) 00211 { ++n; ++data; --l; } 00212 write_compr_mode_and_size(out, 3, n); 00213 binwrite(out,start,n); 00214 } 00215 } 00216 } 00217 void binread_compressed(istream& in, float* data, int l) 00218 { 00219 unsigned char mode; 00220 int n; 00221 float* p = data; 00222 while(l>0) 00223 { 00224 read_compr_mode_and_size(in, mode, n); 00225 //cerr << "mode: " << int(mode) << " size: " << n << endl; 00226 if(mode==0 || mode==1) 00227 { 00228 while(n--) 00229 { *p++ = 0; --l; } 00230 if(mode==1) 00231 { *p++ = 1; --l; } 00232 } 00233 else if(mode==2) 00234 { 00235 char val; 00236 while(n--) 00237 { 00238 binread(in,val); 00239 *p++ = float(val); 00240 --l; 00241 } 00242 } 00243 else if(mode==3) 00244 { 00245 binread(in,p,n); 00246 p += n; 00247 l -= n; 00248 } 00249 else 00250 PLERROR("BUG IN binread_compressed: mode is only 2 bits, so how can it be other than 0,1,2,3 ?"); 00251 } 00252 00253 if(l!=0) 00254 PLERROR("In binread_compressed : l is not 0 at exit of function, wrong data?"); 00255 } 00256 00257 void binwrite_compressed(ostream& out, const float* data, int l) 00258 { 00259 float val = 0.; 00260 while(l) 00261 { 00262 val = *data; 00263 if(val==0.) 00264 { 00265 int n=0; 00266 while(l && *data==0.) 00267 { ++n; ++data; --l; } 00268 if(l && *data==1.) 00269 { 00270 write_compr_mode_and_size(out, 1, n); 00271 ++data; --l; 00272 } 00273 else 00274 write_compr_mode_and_size(out, 0, n); 00275 } 00276 else if(val==1.) 00277 { 00278 write_compr_mode_and_size(out, 1, 0); 00279 ++data; --l; 00280 } 00281 else if( float(char(val))==val ) 00282 { 00283 const float* start = data; 00284 int n=0; 00285 while(l && float(char(val=*data))==val && val!=0 && val!=1) 00286 { ++n; ++data; --l; } 00287 write_compr_mode_and_size(out, 2, n); 00288 while(n--) 00289 binwrite(out,char(*start++)); 00290 } 00291 else 00292 { 00293 const float* start = data; 00294 int n=0; 00295 while(l && (val=*data)!=0 && val!=1 && float(char(val))!=val) 00296 { ++n; ++data; --l; } 00297 write_compr_mode_and_size(out, 3, n); 00298 binwrite(out,start,n); 00299 } 00300 } 00301 } 00302 00303 // ******************************************** 00304 // *** compressed vector to and from FILE* *** 00305 // ******************************************** 00306 00307 inline void read_compr_mode_and_size(FILE* in, unsigned char& mode, int& size) 00308 { 00309 unsigned char sizenum_byte; 00310 binread(in, sizenum_byte); 00311 if(sizenum_byte==0x00) // sizenum is an unsigned short 00312 { 00313 unsigned short sizenum; 00314 binread(in, sizenum); 00315 mode = (unsigned char)(sizenum>>14); 00316 size = int(sizenum & (unsigned short)0x3FFF); 00317 } 00318 else if(sizenum_byte==0xC0) // sizenum is an unsigned int 00319 { 00320 unsigned int sizenum; 00321 binread(in, sizenum); 00322 mode = (unsigned char)(sizenum>>30); 00323 size = int(sizenum & (unsigned int)0x3FFFFFFF); 00324 } 00325 else // sizenum is the byte we just read 00326 { 00327 mode = sizenum_byte>>6; 00328 size = int(sizenum_byte & (unsigned char)0x3F); 00329 } 00330 } 00331 00332 void binread_compressed(FILE* in, double* data, int l) 00333 { 00334 unsigned char mode; 00335 int n; 00336 double* p = data; 00337 char cval; 00338 while(l>0) 00339 { 00340 read_compr_mode_and_size(in, mode, n); 00341 //cerr << "mode: " << int(mode) << " size: " << n << endl; 00342 l -= n; 00343 switch(mode) 00344 { 00345 case 0: 00346 while(n--) 00347 *p++ = 0; 00348 break; 00349 case 1: 00350 while(n--) 00351 *p++ = 0; 00352 *p++ = 1; 00353 --l; 00354 break; 00355 case 2: 00356 while(n--) 00357 { 00358 binread(in,cval); 00359 *p++ = double(cval); 00360 } 00361 break; 00362 case 3: 00363 binread(in,p,n); 00364 p += n; 00365 break; 00366 default: 00367 PLERROR("BUG IN binread_compressed: mode is only 2 bits, so how can it be other than 0,1,2,3 ?"); 00368 } 00369 } 00370 00371 if(l!=0) 00372 PLERROR("In binread_compressed : l is not 0 at exit of function, wrong data?"); 00373 } 00374 00375 void binwrite_compressed(FILE* out, const double* data, int l) 00376 { 00377 PLERROR("Not implemented"); 00378 } 00379 00380 void binread_compressed(FILE* in, float* data, int l) 00381 { 00382 unsigned char mode; 00383 int n; 00384 float* p = data; 00385 while(l>0) 00386 { 00387 read_compr_mode_and_size(in, mode, n); 00388 //cerr << "mode: " << int(mode) << " size: " << n << endl; 00389 if(mode==0 || mode==1) 00390 { 00391 while(n--) 00392 { *p++ = 0; --l; } 00393 if(mode==1) 00394 { *p++ = 1; --l; } 00395 } 00396 else if(mode==2) 00397 { 00398 char val; 00399 while(n--) 00400 { 00401 binread(in,val); 00402 *p++ = float(val); 00403 --l; 00404 } 00405 } 00406 else if(mode==3) 00407 { 00408 binread(in,p,n); 00409 p += n; 00410 l -= n; 00411 } 00412 else 00413 PLERROR("BUG IN binread_compressed: mode is only 2 bits, so how can it be other than 0,1,2,3 ?"); 00414 } 00415 00416 if(l!=0) 00417 PLERROR("In binread_compressed : l is not 0 at exit of function, wrong data?"); 00418 } 00419 00420 void binwrite_compressed(FILE* out, const float* data, int l) 00421 { 00422 PLERROR("Not implemented"); 00423 } 00424 00425 // ******************************************** 00426 // *** compressed vector to and from memory *** 00427 // ******************************************** 00428 00429 inline void write_compr_mode_and_size_ptr(char*& out, unsigned char mode, int size) 00430 { 00431 union {unsigned short s;char cs[2];} unis; 00432 union {unsigned int i;char ci[2];} unii; 00433 #ifdef BOUNDCHECK 00434 if(size<0 || size>=(1<<30)) 00435 PLERROR("In write_compr_mode_and_size: size out of bounds"); 00436 #endif 00437 unsigned int imode = (unsigned int) mode; 00438 if(size<(1<<6)) 00439 { 00440 unsigned char sizenum = (unsigned char) size | (unsigned char) (imode<<6); 00441 (*out++) = sizenum; 00442 } 00443 else if(size<(1<<14)) 00444 { 00445 unis.s = (unsigned short) size | (unsigned short) (imode<<14); 00446 unsigned char header = 0x00; 00447 (*out++) = header; 00448 (*out++) = unis.cs[0]; 00449 (*out++) = unis.cs[1]; 00450 } 00451 else 00452 { 00453 unii.i = (unsigned int) size | (unsigned int) (imode<<30); 00454 unsigned char header = 0xC0; 00455 (*out++) = header; 00456 (*out++) = unii.ci[0]; 00457 (*out++) = unii.ci[1]; 00458 (*out++) = unii.ci[2]; 00459 (*out++) = unii.ci[3]; 00460 } 00461 } 00462 00463 inline void read_compr_mode_and_size_ptr(char*& in, unsigned char& mode, int& size) 00464 { 00465 union {unsigned short s;char cs[2];} unis; 00466 union {unsigned int i;char ci[4];} unii; 00467 00468 unsigned char sizenum_byte; 00469 sizenum_byte = (*in++); 00470 if(sizenum_byte==0x00) // sizenum is an unsigned short 00471 { 00472 unis.cs[0] = (*in++); 00473 unis.cs[1] = (*in++); 00474 mode = (unsigned char)(unis.s>>14); 00475 size = int(unis.s & (unsigned short)0x3FFF); 00476 } 00477 else if(sizenum_byte==0xC0) // sizenum is an unsigned int 00478 { 00479 unii.ci[0] = (*in++); 00480 unii.ci[1] = (*in++); 00481 unii.ci[2] = (*in++); 00482 unii.ci[3] = (*in++); 00483 mode = (unsigned char)(unii.i>>30); 00484 size = int(unii.i & (unsigned int)0x3FFFFFFF); 00485 } 00486 else // sizenum is the byte we just read 00487 { 00488 mode = sizenum_byte>>6; 00489 size = int(sizenum_byte & (unsigned char)0x3F); 00490 } 00491 } 00492 00493 00494 void uncompress_vec(char* comprbuf, double* data, int l, bool double_stored_as_float) 00495 { 00496 unsigned char mode; 00497 int n; 00498 double* p = data; 00499 while(l>0) 00500 { 00501 read_compr_mode_and_size_ptr(comprbuf, mode, n); 00502 //cerr << "mode: " << int(mode) << " size: " << n << endl; 00503 if(mode==0 || mode==1) 00504 { 00505 while(n--) 00506 { *p++ = 0; --l; } 00507 if(mode==1) 00508 { *p++ = 1; --l; } 00509 } 00510 else if(mode==2) 00511 { 00512 char val; 00513 while(n--) 00514 { 00515 val=(*comprbuf++); 00516 *p++ = double(val); 00517 --l; 00518 } 00519 } 00520 else if(mode==3) 00521 { 00522 memcpy(p,comprbuf,sizeof(double)*n); 00523 comprbuf+=sizeof(double)*n; 00524 p += n; 00525 l -= n; 00526 } 00527 else 00528 PLERROR("BUG IN binread_compressed: mode is only 2 bits, so how can it be other than 0,1,2,3 ?"); 00529 } 00530 00531 if(l!=0) 00532 PLERROR("In binread_compressed : l is not 0 at exit of function, wrong data?"); 00533 } 00534 00535 void compress_vec(char* comprbuf, const double* data, int l, bool double_stored_as_float) 00536 { 00537 // char* comprbufold=comprbuf; 00538 double val = 0.; 00539 while(l) 00540 { 00541 val = *data; 00542 if(val==0.) 00543 { 00544 int n=0; 00545 while(l && *data==0.) 00546 { ++n; ++data; --l; } 00547 if(l && *data==1.) 00548 { 00549 write_compr_mode_and_size_ptr(comprbuf, 1, n); 00550 ++data; --l; 00551 } 00552 else 00553 write_compr_mode_and_size_ptr(comprbuf, 0, n); 00554 } 00555 else if(val==1.) 00556 { 00557 write_compr_mode_and_size_ptr(comprbuf, 1, 0); 00558 ++data; --l; 00559 } 00560 else if( double(char(val))==val ) 00561 { 00562 const double* start = data; 00563 int n=0; 00564 while(l && double(char(val=*data))==val && val!=0 && val!=1) 00565 { ++n; ++data; --l; } 00566 write_compr_mode_and_size_ptr(comprbuf, 2, n); 00567 while(n--) 00568 (*comprbuf++) = char(*start++); 00569 } 00570 else 00571 { 00572 const double* start = data; 00573 int n=0; 00574 while(l && (val=*data)!=0 && val!=1 && double(char(val))!=val) 00575 { ++n; ++data; --l; } 00576 write_compr_mode_and_size_ptr(comprbuf, 3, n); 00577 memcpy(comprbuf,start,n*sizeof(double)); 00578 comprbuf += n*sizeof(double); 00579 } 00580 } 00581 } 00582 00583 00584 // ******************************************************** 00585 // ******************************************************** 00586 // ********* NEW COMPRESSION FORMAT ********** 00587 // ******************************************************** 00588 // ******************************************************** 00589 00590 /* 00591 00592 Format description: 00593 00594 A succession of [ mode-byte, optionally followed by specificaitons of length N, followed by data ] 00595 00596 The way N is encoded will be explained later. 00597 00598 The bits of the mode-byte are interpreted as follows: 00599 * Most significant bit: 00600 0 : insert the following N values of type T 00601 1 : insert N zeroes and then the following single value of type T 00602 00603 * Next 2 bits indicate the data type T 00604 00 (=0): ones (that's just 1.0, no further data is given to provide the value) 00605 01 (=1): small 1 byte signed integers in the range [-127, +127], or missing values (indicated by -128: bit pattern 0x80) 00606 10 (=2): 4 byte float 00607 11 (=3): 8 byte double 00608 00609 In all but the 00 case, 1 or N corresponding values of type T are 00610 to be read in the stream (after possibly reading N) 00611 00612 * Next 5 bits (values between 0 .. 31) indicate how to get the number N, 00613 00614 1..29: N is that particular value (between 1 and 29) 00615 0 : N is given in the following 1 byte unsigned char 00616 30: N is given in the following 2 byte unsigned short 00617 31: N is given in the following 4 byte unsigned int 00618 00619 00620 */ 00621 00622 00623 size_t new_read_compressed(FILE* in, real* vec, int l, bool swap_endians) 00624 { 00625 size_t nbytes = 0; // number of bytes read 00626 unsigned char mode; // the mode byte 00627 unsigned int N = 0; // N (number of 0s or values to insert) 00628 00629 while(l) 00630 { 00631 mode = (unsigned char)(getc(in)); 00632 ++nbytes; 00633 unsigned char N1 = (mode & 0x1F); 00634 switch(N1) 00635 { 00636 case 0: // N is the 1 byte to follow 00637 N1 = (unsigned char)(getc(in)); 00638 ++nbytes; 00639 N = N1; 00640 break; 00641 case 30: // N is the 2 bytes to follow 00642 unsigned short N2; 00643 fread(&N2,2,1,in); 00644 if(swap_endians) 00645 endianswap(&N2); 00646 nbytes += 2; 00647 N = N2; 00648 break; 00649 case 31: // N is the 4 bytes to follow 00650 fread(&N,4,1,in); 00651 if(swap_endians) 00652 endianswap(&N); 00653 nbytes += 4; 00654 break; 00655 default: // N is N1 00656 N = N1; 00657 } 00658 00659 if(mode & (unsigned char)(0x80)) // most significant bit is on 00660 { // insert N zeros 00661 l -= N; 00662 while(N--) 00663 *vec++ = 0; 00664 N = 1; 00665 } 00666 00667 if(!l) // vec ends with zeroes, so there's no extra single value to append. We're done! 00668 break; 00669 00670 l -= N; 00671 mode = ((mode & ~0x80) >> 5); // get the 2 bits we're interested in 00672 switch(mode) 00673 { 00674 case 0: // type ones 00675 { 00676 while(N--) 00677 *vec++ = 1; 00678 } 00679 break; 00680 case 1: // type signed char (or missing value if -128) 00681 { 00682 signed char val; 00683 nbytes += N; 00684 while(N--) 00685 { 00686 val = (signed char)(getc(in)); 00687 if(val==-128) 00688 *vec++ = MISSING_VALUE; 00689 else 00690 *vec++ = val; 00691 } 00692 } 00693 break; 00694 case 2: // type float 00695 { 00696 float val; 00697 nbytes += N<<2; 00698 while(N--) 00699 { 00700 fread(&val,sizeof(float),1,in); 00701 if(swap_endians) 00702 endianswap(&val); 00703 *vec++ = val; 00704 } 00705 } 00706 break; 00707 case 3: // type double 00708 { 00709 nbytes += N<<3; 00710 fread(vec,sizeof(double),N,in); 00711 if(swap_endians) 00712 endianswap(vec,N); 00713 vec += N; 00714 } 00715 } 00716 } 00717 return nbytes; 00718 } 00719 00720 unsigned char new_get_compr_data_type(double x, double tolerance) 00721 { 00722 if(is_missing(x)) 00723 return 1; 00724 else if(x==1.) 00725 return 0; 00726 else if(double(char(x))==x && x!=-128) 00727 return 1; 00728 else if(fabs(double(float(x))-x)<=tolerance) 00729 return 2; 00730 return 3; 00731 } 00732 00733 unsigned char new_get_compr_data_type(float x) 00734 { 00735 if(is_missing(x)) 00736 return 1; 00737 else if(x==1.) 00738 return 0; 00739 else if(float(char(x))==x) 00740 return 1; 00741 return 2; 00742 } 00743 00745 size_t new_write_mode_and_size(FILE* out, bool insert_zeroes, unsigned int N, unsigned char data_type) 00746 { 00747 size_t nbytes = 0; // nbytes written 00748 unsigned char mode = data_type<<5; 00749 if(insert_zeroes) 00750 mode |= (unsigned char)0x80; 00751 if(N<30) 00752 { 00753 mode |= (unsigned char)N; 00754 putc(mode,out); 00755 nbytes = 1; 00756 } 00757 else if(N<=UCHAR_MAX) 00758 { 00759 putc(mode,out); 00760 putc((unsigned char)N,out); 00761 nbytes = 2; 00762 } 00763 else if(N<=USHRT_MAX) 00764 { 00765 mode |= (unsigned char)30; 00766 putc(mode,out); 00767 unsigned short N2 = (unsigned short)N; 00768 fwrite(&N2,sizeof(unsigned short),1,out); 00769 nbytes = 3; 00770 } 00771 else // (N<=UINT_MAX) 00772 { 00773 mode |= (unsigned char)31; 00774 putc(mode,out); 00775 unsigned int N4 = (unsigned int)N; 00776 fwrite(&N4,sizeof(unsigned int),1,out); 00777 nbytes = 5; 00778 } 00779 return nbytes; 00780 } 00781 00782 size_t new_write_raw_data_as(FILE* out, real *vec, int l, unsigned char data_type) 00783 { 00784 size_t nbytes = 0; // nbytes written 00785 switch(data_type) 00786 { 00787 case 1: 00788 nbytes = l; 00789 while(l--) 00790 { 00791 real val = *vec++; 00792 if(is_missing(val)) 00793 putc(0x80,out); 00794 else 00795 putc((unsigned char)static_cast<signed char>(val),out); 00796 } 00797 break; 00798 case 2: 00799 nbytes = l*sizeof(float); 00800 while(l--) 00801 { 00802 float val = static_cast<float>(*vec++); 00803 fwrite(&val,sizeof(float),1,out); 00804 } 00805 break; 00806 case 3: 00807 nbytes = l*sizeof(double); 00808 while(l--) 00809 { 00810 double val = static_cast<double>(*vec++); 00811 fwrite(&val,sizeof(double),1,out); 00812 } 00813 break; 00814 } 00815 return nbytes; 00816 } 00817 00818 // Warning: this is low-level code written for efficiency 00819 size_t new_write_compressed(FILE* out, real* vec, int l, double tolerance, bool swap_endians) 00820 { 00821 if(swap_endians) 00822 PLERROR("swap_endians in new_write_compressed not yet supported (currently only supported by new_read_compresed"); 00823 00824 size_t nbytes = 0; // number of bytes written 00825 00826 while(l) 00827 { 00828 int nzeroes = 0; 00829 while(l && *vec==0.) 00830 { 00831 ++nzeroes; 00832 ++vec; 00833 --l; 00834 } 00835 00836 int nvals = 0; 00837 unsigned char data_type = 0; 00838 if(l) 00839 { 00840 real* ptr = vec; 00841 data_type = new_get_compr_data_type(*ptr, tolerance); 00842 ++nvals; 00843 ++ptr; 00844 --l; 00845 while(l && *ptr!=0. && new_get_compr_data_type(*ptr, tolerance)==data_type) 00846 { 00847 ++nvals; 00848 ++ptr; 00849 --l; 00850 } 00851 } 00852 00853 // Now we know nzeroes, nvals, and data_type 00854 // So let's encode it: 00855 00856 if(nzeroes) // we have zeroes 00857 { 00858 // write the code for zeroes followed by a single value 00859 nbytes += new_write_mode_and_size(out, true, nzeroes, data_type); 00860 if(nvals) // write the following single value 00861 { 00862 nbytes += new_write_raw_data_as(out, vec, 1, data_type); 00863 ++vec; 00864 --nvals; 00865 } 00866 } 00867 00868 if(nvals) // we have some remaining values 00869 { 00870 nbytes += new_write_mode_and_size(out, false, nvals, data_type); 00871 nbytes += new_write_raw_data_as(out, vec, nvals, data_type); 00872 vec += nvals; 00873 } 00874 00875 } // end of for(;;) 00876 return nbytes; 00877 } 00878 00879 } // end of namespace PLearn