00001 // -*- C++ -*- 00002 00003 // VecCompressor.cc 00004 // Copyright (C) 2001 Pascal Vincent 00005 // Copyright (C) 2002 Pascal Vincent, Julien Keable, Xavier Saint-Mleux 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 00036 /* ******************************************************* 00037 * $Id: VecCompressor.cc,v 1.1 2002/10/03 07:35:28 plearner Exp $ 00038 ******************************************************* */ 00039 00040 #include "VecCompressor.h" 00041 00042 namespace PLearn <% 00043 using namespace std; 00044 00045 // ******************* 00046 // ** VecCompressor ** 00047 00048 signed char* VecCompressor::compressVec(const Vec& v, signed char* data) 00049 { 00050 real* vdata = v.data(); 00051 signed char* ptr = data; 00052 00053 // mode can be '0' for zeroes, 'F' for floats, 'I' for small integers (signed chars) 00054 // If mode is '0' abs(count) indicates the number of zeroes, 00055 // a positive sign indicates switch to 'F' mode 00056 // a negative sign indicates switch to 'I' mode 00057 // a 0 count means insert 127 zeros and stay in zero mode 00058 // If mode is 'F' abs(count) indicates the number of floats that follow 00059 // a positive sign indicates switch to 'I' mode 00060 // a negative sign indicates switch to '0' mode 00061 // a 0 count means insert 127 floats and stay in float mode 00062 // If mode is 'I' abs(count) indicates the number of small integers that follow 00063 // a positive sign indicates switch to 'F' mode 00064 // a negative sign indicates switch to '0' mode 00065 // a 0 count means insert 127 small integers and stay in 'I' mode 00066 00067 int l = v.length(); 00068 00069 int i=0; 00070 real val = vdata[i]; 00071 00072 signed char mode = 'F'; 00073 if(val==0.) 00074 mode = '0'; 00075 else if(issmallint(val)) 00076 mode = 'I'; 00077 // else 'F' 00078 00079 int count = 0; 00080 int istart = 0; 00081 float fval = 0.; 00082 signed char* pfval = (signed char*)&fval; 00083 00084 *ptr++ = mode; 00085 00086 while(i<l) 00087 { 00088 switch(mode) 00089 { 00090 case '0': 00091 istart = i; 00092 while(i<l && is0(vdata[i])) 00093 i++; 00094 count = i - istart; 00095 while(count>127) 00096 { 00097 *ptr++ = 0; 00098 count -= 127; 00099 } 00100 if(i>=l || issmallint(vdata[i])) 00101 { 00102 *ptr++ = (signed char)(-count); 00103 mode = 'I'; 00104 } 00105 else 00106 { 00107 *ptr++ = (signed char)count; 00108 mode = 'F'; 00109 } 00110 break; 00111 00112 case 'I': 00113 istart = i; 00114 while(i<l && isI(vdata[i])) 00115 i++; 00116 count = i - istart; 00117 while(count>127) 00118 { 00119 *ptr++ = 0; 00120 int n = 127; 00121 while(n--) 00122 *ptr++ = (signed char)vdata[istart++]; 00123 count -= 127; 00124 } 00125 if(i>=l || is0(vdata[i])) 00126 { 00127 *ptr++ = (signed char)(-count); 00128 mode = '0'; 00129 } 00130 else // next value is a floating point 00131 { 00132 *ptr++ = (signed char)count; 00133 mode = 'F'; 00134 } 00135 while(count--) 00136 *ptr++ = (signed char)vdata[istart++]; 00137 break; 00138 00139 case 'F': 00140 istart = i; 00141 val = vdata[i]; 00142 while(i<l && isF(vdata[i])) 00143 i++; 00144 count = i - istart; 00145 while(count>127) 00146 { 00147 *ptr++ = 0; 00148 int n = 127; 00149 while(n--) 00150 { 00151 fval = (float)vdata[istart++]; 00152 *ptr++ = pfval[0]; 00153 *ptr++ = pfval[1]; 00154 *ptr++ = pfval[2]; 00155 *ptr++ = pfval[3]; 00156 } 00157 count -= 127; 00158 } 00159 if(i>=l || is0(vdata[i])) 00160 { 00161 *ptr++ = (signed char)(-count); 00162 mode = '0'; 00163 } 00164 else 00165 { 00166 *ptr++ = (signed char)count; 00167 mode = 'I'; 00168 } 00169 while(count--) 00170 { 00171 fval = (float)vdata[istart++]; 00172 *ptr++ = pfval[0]; 00173 *ptr++ = pfval[1]; 00174 *ptr++ = pfval[2]; 00175 *ptr++ = pfval[3]; 00176 } 00177 } 00178 } 00179 return ptr; 00180 } 00181 00182 void VecCompressor::uncompressVec(signed char* data, const Vec& v) 00183 { 00184 // mode can be '0' for zeroes, 'F' for floats, 'I' for small integers (signed chars) 00185 // If mode is '0' abs(count) indicates the number of zeroes, 00186 // a positive sign indicates switch to 'F' mode 00187 // a negative sign indicates switch to 'I' mode 00188 // a 0 count means insert 127 zeros and stay in zero mode 00189 // If mode is 'F' abs(count) indicates the number of floats that follow 00190 // a positive sign indicates switch to 'I' mode 00191 // a negative sign indicates switch to '0' mode 00192 // a 0 count means insert 127 floats and stay in float mode 00193 // If mode is 'I' abs(count) indicates the number of small integers that follow 00194 // a positive sign indicates switch to 'F' mode 00195 // a negative sign indicates switch to '0' mode 00196 // a 0 count means insert 127 small integers and stay in 'I' mode 00197 00198 real* vptr = v.data(); 00199 real* vptrend = vptr+v.length(); 00200 signed char* ptr = data; 00201 signed char mode = *ptr++; 00202 float fval = 0.; 00203 signed char* pfval = (signed char*)&fval; 00204 signed char count; 00205 while(vptr!=vptrend) 00206 { 00207 count = *ptr++; 00208 switch(mode) 00209 { 00210 case '0': 00211 if(count<0) 00212 { 00213 mode = 'I'; 00214 count = -count; 00215 } 00216 else if(count>0) 00217 mode = 'F'; 00218 else 00219 count = 127; 00220 00221 while(count--) 00222 *vptr++ = 0.; 00223 break; 00224 00225 case 'I': 00226 if(count<0) 00227 { 00228 mode = '0'; 00229 count = -count; 00230 } 00231 else if(count>0) 00232 mode = 'F'; 00233 else 00234 count = 127; 00235 00236 while(count--) 00237 *vptr++ = real(*ptr++); 00238 break; 00239 00240 case 'F': 00241 if(count<0) 00242 { 00243 mode = '0'; 00244 count = -count; 00245 } 00246 else if(count>0) 00247 mode = 'I'; 00248 else 00249 count = 127; 00250 00251 while(count--) 00252 { 00253 pfval[0] = *ptr++; 00254 pfval[1] = *ptr++; 00255 pfval[2] = *ptr++; 00256 pfval[3] = *ptr++; 00257 *vptr++ = real(fval); 00258 } 00259 break; 00260 00261 default: 00262 PLERROR("Problem in VecCompressor::uncompressVec this should not happen!!! (wrong data format?)"); 00263 } 00264 } 00265 } 00266 00267 void VecCompressor::writeCompressedVec(ostream& out, const Vec& v) 00268 { 00269 real* vdata = v.data(); 00270 00271 // mode can be '0' for zeroes, 'F' for floats, 'I' for small integers (signed chars) 00272 // If mode is '0' abs(count) indicates the number of zeroes, 00273 // a positive sign indicates switch to 'F' mode 00274 // a negative sign indicates switch to 'I' mode 00275 // a 0 count means insert 127 zeros and stay in zero mode 00276 // If mode is 'F' abs(count) indicates the number of floats that follow 00277 // a positive sign indicates switch to 'I' mode 00278 // a negative sign indicates switch to '0' mode 00279 // a 0 count means insert 127 floats and stay in float mode 00280 // If mode is 'I' abs(count) indicates the number of small integers that follow 00281 // a positive sign indicates switch to 'F' mode 00282 // a negative sign indicates switch to '0' mode 00283 // a 0 count means insert 127 small integers and stay in 'I' mode 00284 00285 int l = v.length(); 00286 00287 int i=0; 00288 real val = vdata[i]; 00289 00290 signed char mode = 'F'; 00291 if(val==0.) 00292 mode = '0'; 00293 else if(issmallint(val)) 00294 mode = 'I'; 00295 // else 'F' 00296 00297 int count = 0; 00298 int istart = 0; 00299 float fval = 0.; 00300 00301 write_sbyte(out,mode); 00302 00303 while(i<l) 00304 { 00305 switch(mode) 00306 { 00307 case '0': 00308 istart = i; 00309 while(i<l && is0(vdata[i])) 00310 i++; 00311 count = i - istart; 00312 while(count>127) 00313 { 00314 write_sbyte(out,0); 00315 count -= 127; 00316 } 00317 if(i>=l || issmallint(vdata[i])) 00318 { 00319 write_sbyte(out,-count); 00320 mode = 'I'; 00321 } 00322 else 00323 { 00324 write_sbyte(out,count); 00325 mode = 'F'; 00326 } 00327 break; 00328 00329 case 'I': 00330 istart = i; 00331 while(i<l && isI(vdata[i])) 00332 i++; 00333 count = i - istart; 00334 while(count>127) 00335 { 00336 write_sbyte(out,0); 00337 int n = 127; 00338 while(n--) 00339 write_sbyte(out,(signed char)vdata[istart++]); 00340 count -= 127; 00341 } 00342 if(i>=l || is0(vdata[i])) 00343 { 00344 write_sbyte(out,-count); 00345 mode = '0'; 00346 } 00347 else // next value is a floating point 00348 { 00349 write_sbyte(out,count); 00350 mode = 'F'; 00351 } 00352 while(count--) 00353 write_sbyte(out,(signed char)vdata[istart++]); 00354 break; 00355 00356 case 'F': 00357 istart = i; 00358 while(i<l && isF(vdata[i])) 00359 i++; 00360 count = i - istart; 00361 while(count>127) 00362 { 00363 write_sbyte(out,0); 00364 int n = 127; 00365 while(n--) 00366 { 00367 fval = (float)vdata[istart++]; 00368 out.write((char*)&fval,4); 00369 } 00370 count -= 127; 00371 } 00372 if(i>=l || is0(vdata[i])) 00373 { 00374 write_sbyte(out,-count); 00375 mode = '0'; 00376 } 00377 else 00378 { 00379 write_sbyte(out,count); 00380 mode = 'I'; 00381 } 00382 while(count--) 00383 { 00384 fval = (float)vdata[istart++]; 00385 out.write((char*)&fval,4); 00386 } 00387 } 00388 } 00389 } 00390 00391 void VecCompressor::readCompressedVec(istream& in, const Vec& v) 00392 { 00393 // mode can be '0' for zeroes, 'F' for floats, 'I' for small integers (signed chars) 00394 // If mode is '0' abs(count) indicates the number of zeroes, 00395 // a positive sign indicates switch to 'F' mode 00396 // a negative sign indicates switch to 'I' mode 00397 // a 0 count means insert 127 zeros and stay in zero mode 00398 // If mode is 'F' abs(count) indicates the number of floats that follow 00399 // a positive sign indicates switch to 'I' mode 00400 // a negative sign indicates switch to '0' mode 00401 // a 0 count means insert 127 floats and stay in float mode 00402 // If mode is 'I' abs(count) indicates the number of small integers that follow 00403 // a positive sign indicates switch to 'F' mode 00404 // a negative sign indicates switch to '0' mode 00405 // a 0 count means insert 127 small integers and stay in 'I' mode 00406 00407 real* vptr = v.data(); 00408 real* vptrend = vptr+v.length(); 00409 signed char mode = read_sbyte(in); 00410 float fval = 0.; 00411 signed char count; 00412 00413 while(vptr!=vptrend) 00414 { 00415 count = read_sbyte(in); 00416 // cerr << int(count) << ' '; 00417 switch(mode) 00418 { 00419 case '0': 00420 if(count<0) 00421 { 00422 mode = 'I'; 00423 count = -count; 00424 } 00425 else if(count>0) 00426 mode = 'F'; 00427 else 00428 count = 127; 00429 00430 while(count--) 00431 *vptr++ = 0.; 00432 break; 00433 00434 case 'I': 00435 if(count<0) 00436 { 00437 mode = '0'; 00438 count = -count; 00439 } 00440 else if(count>0) 00441 mode = 'F'; 00442 else 00443 count = 127; 00444 00445 while(count--) 00446 *vptr++ = real(read_sbyte(in)); 00447 break; 00448 00449 case 'F': 00450 if(count<0) 00451 { 00452 mode = '0'; 00453 count = -count; 00454 } 00455 else if(count>0) 00456 mode = 'I'; 00457 else 00458 count = 127; 00459 00460 while(count--) 00461 { 00462 in.read((char*)&fval,4); 00463 *vptr++ = real(fval); 00464 } 00465 break; 00466 00467 default: 00468 PLERROR("Problem in VecCompressor::readCompressedVec this should not happen!!! (wrong data format?)"); 00469 } 00470 } 00471 // cerr << endl; 00472 } 00473 00474 00475 /* 00476 00477 Format description: 00478 00479 A succession of [ mode-byte, optionally followed by specificaitons of length N, followed by data ] 00480 00481 The way N is encoded will be explained later. 00482 00483 The bits of the mode-byte are interpreted as follows: 00484 * Most significant bit: 00485 0 : insert the following N values of type T 00486 1 : insert N zeroes and then the following single value of type T 00487 00488 * Next 2 bits indicate the data type T 00489 00 : ones (that's just 1.0, no further data is given to provide the value) 00490 01 : small 1 byte signed integers 00491 10 : 4 byte float 00492 11 : 8 byte double 00493 00494 In all but the 00 case, 1 or N corresponding values of type T are to be read in the stream (after possibly reading N) 00495 00496 * Next 5 bits (values between 0 .. 31) indicate how to get the number N, 00497 00498 0 : N is given in the following 1 byte unsigned char 00499 1..29: N is that particular value (between 1 and 29) 00500 30: N is given in the following 2 byte unsigned short 00501 31: N is given in the following 4 byte unsigned int 00502 00503 00504 */ 00505 00506 00507 /* 00508 size_t my_read_compressed(istream& in, double* vec, int l) 00509 { 00510 size_t nbytes = 0; // number of bytes read 00511 unsigned char mode; // the mode byte 00512 unsigned int N = 0; // N (number of 0s or values to insert) 00513 00514 while(l) 00515 { 00516 in.get(mode); 00517 ++nbytes; 00518 unsigned char N1 = (mode & 07); 00519 switch(N1) 00520 { 00521 case 0: // N is the 1 byte to follow 00522 in.get(N1); 00523 ++nbytes; 00524 N = N1; 00525 break; 00526 case 30: // N is the 2 bytes to follow 00527 unsigned short N2; 00528 in.read((char*)&N2,2); 00529 nbytes += 2; 00530 N = N2; 00531 break; 00532 case 31: // N is the 4 bytes to follow 00533 in.read((char*)&N,4); 00534 nbytes += 4; 00535 break; 00536 default: // N is N1 00537 N = N1; 00538 } 00539 00540 if(mode & (unsigned char)0x80) // most significant bit is on 00541 { // insert N zeros 00542 l -= N; 00543 while(N--) 00544 *vec++ = 0; 00545 N = 1; 00546 } 00547 00548 if(!l) // vec ends with zeroes, so there's no extra single value to append. We're done! 00549 break; 00550 00551 l -= N; 00552 mode = ((mode & ~0x80) >> 5); // get the 2 bits we're interested in 00553 switch(mode) 00554 { 00555 case 0: // type ones 00556 { 00557 while(N--) 00558 *vec++ = 1; 00559 } 00560 break; 00561 case 1: // type signed char 00562 { 00563 signed char val; 00564 nbytes += N; 00565 while(N--) 00566 { 00567 in.get(val); 00568 *vec++ = val; 00569 } 00570 } 00571 break; 00572 case 2: // type float 00573 { 00574 float val; 00575 nbytes += N<<2; 00576 while(N--) 00577 { 00578 in.read((char*)&val,sizeof(float)); 00579 *vec++ = val; 00580 } 00581 } 00582 break; 00583 case 3: // type double 00584 { 00585 nbytes += N<<3; 00586 in.read((char*)vec, N<<3); 00587 vec += N; 00588 } 00589 } 00590 } 00591 } 00592 00593 unsigned char get_compr_data_type(double x, bool force_float) 00594 { 00595 if(x==1.) 00596 return 0; 00597 else if(double(char(x))==x) 00598 return 1; 00599 else if(force_float || double(float(x))==x) 00600 return 2; 00601 return 3; 00602 } 00603 00604 unsigned char get_compr_data_type(float x) 00605 { 00606 if(x==1.) 00607 return 0; 00608 else if(float(char(x))==x) 00609 return 1; 00610 return 2; 00611 } 00612 00613 #include <limits> 00614 00616 size_t my_write_mode_and_size(ostream& out, bool insert_zeroes, unsigned int N, unsigned char data_type) 00617 { 00618 size_t nbytes = 0; // nbytes written 00619 unsigned char mode = data_type<<5; 00620 if(insert_zeroes) 00621 mode |= (unsigned char)0x80; 00622 if(N<30) 00623 { 00624 mode |= (unsigned char)N; 00625 out.put(mode); 00626 nbytes = 1; 00627 } 00628 else if(N<=UCHAR_MAX) 00629 { 00630 out.put(mode); 00631 out.put((unsigned char)N); 00632 nbytes = 2; 00633 } 00634 else if(N<=USHRT_MAX) 00635 { 00636 mode |= (unsigned char)30; 00637 out.put(mode); 00638 unsigned short N2 = (unsigned short)N; 00639 out.write((char*)&N2,sizeof(unsigned short)); 00640 nbytes = 3; 00641 } 00642 else // (N<=UINT_MAX) 00643 { 00644 mode |= (unsigned char)31; 00645 out.put(mode); 00646 unsigned int N4 = (unsigned int)N; 00647 out.write((char*)&N4,sizeof(unsigned int)); 00648 nbytes = 5; 00649 } 00650 return nbytes; 00651 } 00652 00653 size_t my_write_raw_data_as(ostream& out, double *vec, int l, unsigned char data_type) 00654 { 00655 size_t nbytes = 0; // nbytes written 00656 switch(data_type) 00657 { 00658 case 1: 00659 nbytes = l; 00660 while(l--) 00661 out.put((char)*vec++); 00662 break; 00663 case 2: 00664 nbytes = l*sizeof(float); 00665 while(l--) 00666 { 00667 float val = float(*vec++); 00668 out.write((char*)&val,sizeof(float)); 00669 } 00670 break; 00671 case 3: 00672 nbytes = l*sizeof(double); 00673 out.write((char*)vec,nbytes); 00674 break; 00675 } 00676 return nbytes; 00677 } 00678 00679 // Warning: this is low-level code written for efficiency 00680 size_t my_write_compressed(ostream& out, double* vec, int l, bool force_float=false) 00681 { 00682 size_t nbytes = 0; // number of bytes read 00683 00684 for(;;) 00685 { 00686 int nzeroes = 0; 00687 while(l && *vec==0.) 00688 { 00689 ++nzeroes; 00690 ++vec; 00691 --l; 00692 } 00693 00694 int nvals = 0; 00695 unsigned char data_type = 0; 00696 if(l) 00697 { 00698 double* ptr = vec; 00699 data_type = get_compr_data_type(*ptr, force_float); 00700 ++nvals; 00701 ++ptr; 00702 --l; 00703 while(l && *ptr!=0. && get_compr_data_type(*ptr)==data_type) 00704 { 00705 ++nvals; 00706 ++ptr; 00707 --l; 00708 } 00709 } 00710 00711 // Now we know nzeroes, nvals, and data_type 00712 // So let's encode it: 00713 00714 if(nzeroes) // we have zeroes 00715 { 00716 // write the code for zeroes followed by a single value 00717 nbytes += my_write_mode_and_size(out, true, nzeroes, data_type); 00718 if(nvals) // write the following single value 00719 { 00720 nbytes += my_write_raw_data_as(out, vec, 1, data_type); 00721 ++vec; 00722 --nvals; 00723 } 00724 } 00725 00726 if(nvals) // we have some remaining values 00727 { 00728 nbytes += my_write_mode_and_size(out, false, nvals, data_type); 00729 nbytes += my_write_raw_data_as(out, vec, nvals, data_type); 00730 } 00731 00732 } // end of for(;;) 00733 return nbytes; 00734 } 00735 00736 */ 00737 00738 %> // end of namespcae PLearn