00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #define _GNU_SOURCE 1
00036
00037
#include "SimpleDB.h"
00038
#include <plearn/math/random.h>
00039
#include <plearn/base/stringutils.h>
00040
00041
namespace PLearn {
00042
using namespace std;
00043
00044
00045
00046
00047 const char MissingString =
'\0';
00048 const unsigned char MissingCharacter = (
unsigned char)SCHAR_MIN;
00049 const signed char MissingSignedChar = (
signed char)SCHAR_MIN;
00050 const short MissingShort = SHRT_MIN;
00051 const int MissingInt = INT_MIN;
00052 const float MissingFloat =
MISSING_VALUE;
00053 const double MissingDouble =
MISSING_VALUE;
00054 const PDate MissingDate;
00055
00056
00057
00058
00059 bool Schema::findColumn(
const string& name,
int& position,
int& start,
00060
int& precision)
const
00061
{
00062 const_iterator it = begin(), end = this->end();
00063 position = start = precision = 0;
00064
00065
for (; it != end; start += it->precision, ++it, ++position)
00066
if (it->name == name) {
00067 precision = it->precision;
00068
break;
00069 }
00070
return (it == end)?
false :
true;
00071 }
00072
00073
00074 FieldPtr Schema::findColumn(
int position)
const
00075
{
00076
int orig_position = position;
00077 ptrdiff_t offset_= 0;
00078 const_iterator it=begin(), end=this->end();
00079
for (; position && it != end; --position, ++it)
00080 offset_+= it->precision;
00081
if (it == end)
00082
PLERROR(
"Column %d does not exist in schema",
00083 orig_position);
00084
return FieldPtr(orig_position, offset_);
00085 }
00086
00087
00088 FieldPtr Schema::findColumn(
const string& name)
const
00089
{
00090
int position, start, precision;
00091
bool found =
findColumn(name, position, start, precision);
00092
if (!found)
00093
PLERROR(
"Column %s does not exist in schema",
00094 name.c_str());
00095
return FieldPtr(position, start);
00096 }
00097
00098
00099
00100
00101 FieldValue::FieldValue()
00102 : field_type_(
Unknown), precision_(0)
00103 {}
00104
00105 FieldValue::FieldValue(
const FieldValue& fv)
00106 : field_type_(fv.field_type_), precision_(fv.precision_)
00107 {
00108
switch (
field_type_) {
00109
case Unknown:
00110
break;
00111
00112
case StringType:
00113 {
00114
int stringlen = (
int)
strlen(fv.
string_val_);
00115
if (stringlen+1 !=
precision_)
00116
PLERROR(
"Strings in a database field must contain a terminating null");
00117
string_val_ =
new char[
precision_];
00118 strcpy(
string_val_, fv.
string_val_);
00119 }
00120
break;
00121
00122
case CharacterType:
00123
case SignedCharType:
00124
case ShortType:
00125
case IntType:
00126
long_val_ = fv.
long_val_;
00127
break;
00128
00129
case FloatType:
00130
case DoubleType:
00131
double_val_ = fv.
double_val_;
00132
break;
00133
00134
case DateType:
00135
date_val_ = fv.
date_val_;
00136
break;
00137 }
00138 }
00139
00140 FieldValue::~FieldValue()
00141 {
00142
switch(
field_type_) {
00143
case StringType:
00144
delete[]
string_val_;
00145
00146
case Unknown:
00147
case CharacterType:
00148
case SignedCharType:
00149
case ShortType:
00150
case IntType:
00151
case FloatType:
00152
case DoubleType:
00153
case DateType:
00154
break;
00155 }
00156 }
00157
00158 FieldValue::FieldValue(
const char* str)
00159 : field_type_(
StringType), precision_(strlen(str)+1)
00160 {
00161
string_val_ =
new char[
precision_];
00162 strcpy(
string_val_, str);
00163 }
00164
00165 FieldValue::FieldValue(
unsigned char x)
00166 : field_type_(
CharacterType),
00167 precision_(
Field("",
CharacterType).precision),
00168 long_val_(
x)
00169 {}
00170
00171 FieldValue::FieldValue(
signed char x)
00172 : field_type_(
SignedCharType),
00173 precision_(
Field("",
SignedCharType).precision),
00174 long_val_(
x)
00175 {}
00176
00177 FieldValue::FieldValue(
short x)
00178 : field_type_(
ShortType),
00179 precision_(
Field("",
ShortType).precision),
00180 long_val_(
x)
00181 {}
00182
00183 FieldValue::FieldValue(
int x)
00184 : field_type_(
IntType),
00185 precision_(
Field("",
IntType).precision),
00186 long_val_(
x)
00187 {}
00188
00189 FieldValue::FieldValue(
float x)
00190 : field_type_(
FloatType),
00191 precision_(
Field("",
FloatType).precision),
00192 double_val_(
x)
00193 {}
00194
00195 FieldValue::FieldValue(
double x)
00196 : field_type_(
DoubleType),
00197 precision_(
Field("",
DoubleType).precision),
00198 double_val_(
x)
00199 {}
00200
00201 FieldValue::FieldValue(
const PDate& x)
00202 : field_type_(
DateType),
00203 precision_(
Field("",
DateType).precision)
00204 {
00205
date_val_.
year =
x.year;
00206
date_val_.
month =
x.month;
00207
date_val_.
day =
x.day;
00208 }
00209
00210 bool FieldValue::isMissing()
const
00211
{
00212
switch (
field_type_) {
00213
case Unknown:
return true;
00214
case StringType:
return !
string_val_ ||
string_val_[0] ==
MissingString;
00215
case CharacterType:
return (
unsigned char)
long_val_ ==
MissingCharacter;
00216
case SignedCharType:
return (
signed char)
long_val_ ==
MissingSignedChar;
00217
case ShortType:
return (
short)
long_val_ ==
MissingShort;
00218
case IntType:
return (
int)
long_val_ ==
MissingInt;
00219
case FloatType:
00220
case DoubleType:
return isnan(
double_val_);
00221
case DateType:
00222
return date_val_.
year ==
MissingDate.
year &&
00223
date_val_.
month ==
MissingDate.
month &&
00224
date_val_.
day ==
MissingDate.
day;
00225 }
00226
return false;
00227 }
00228
00229 void FieldValue::setMissing()
00230 {
00231
switch (
field_type_) {
00232
case Unknown:
break;
00233
case StringType:
if (
string_val_)
string_val_[0] =
MissingString;
break;
00234
case CharacterType:
long_val_ = long(
MissingCharacter);
break;
00235
case SignedCharType:
long_val_ = long(
MissingSignedChar);
break;
00236
case ShortType:
long_val_ = long(
MissingShort);
break;
00237
case IntType:
long_val_ = long(
MissingInt);
break;
00238
case FloatType:
00239
case DoubleType:
double_val_ =
MissingDouble;
break;
00240
case DateType:
00241
date_val_.
year =
MissingDate.
year;
00242
date_val_.
month =
MissingDate.
month;
00243
date_val_.
day =
MissingDate.
day;
00244
break;
00245 }
00246 }
00247
00248 string FieldValue::toString()
const
00249
{
00250
if (
isMissing())
00251
return "";
00252
switch (
field_type_) {
00253
case Unknown:
return "";
00254
case StringType:
return space_to_underscore(
string_val_);
00255
case CharacterType:
return string(1,
char(
long_val_));
00256
case SignedCharType:
00257
case ShortType:
00258
case IntType:
return tostring(long_val_);
00259
case FloatType:
00260
case DoubleType:
return tostring(
double_val_);
00261
case DateType:
00262
return PDate(
date_val_.
year,
date_val_.
month,
date_val_.
day).info();
00263 }
00264
return "";
00265 }
00266
00267 double FieldValue::toDouble()
const
00268
{
00269
if (
isMissing())
00270
return MISSING_VALUE;
00271
switch (
field_type_) {
00272
case Unknown:
00273
return MISSING_VALUE;
00274
00275
case StringType:
00276
case CharacterType:
00277
PLERROR(
"Cannot convert string or character field to double");
00278
break;
00279
00280
case SignedCharType:
00281
case ShortType:
00282
case IntType:
00283
return double(
long_val_);
00284
00285
case FloatType:
00286
case DoubleType:
00287
return double_val_;
00288
00289
case DateType:
00290
return double(
date_to_float(
PDate(
date_val_.
year,
date_val_.
month,
00291
date_val_.
day)));
00292 }
00293
return MISSING_VALUE;
00294 }
00295
00296 PDate FieldValue::toDate()
const
00297
{
00298
switch(
field_type_) {
00299
case DateType:
00300
return PDate(
date_val_.
year,
date_val_.
month,
date_val_.
day);
00301
00302
default:
00303
PLERROR(
"Cannot convert non-date field type to a date");
00304 }
00305
return PDate();
00306 }
00307
00308 FieldValue& FieldValue::operator=(
FieldValue rhs)
00309 {
00310
swap(rhs);
00311
return *
this;
00312 }
00313
00314 bool FieldValue::operator==(
const FieldValue& rhs)
const
00315
{
00316
const FieldValue& lhs = *
this;
00317
const FieldType& lhs_type =
field_type_;
00318
const FieldType& rhs_type = rhs.
field_type_;
00319
00320
00321
if (lhs_type ==
StringType && rhs_type ==
StringType)
00322
return !strcmp(lhs.
string_val_, rhs.
string_val_);
00323
else if (lhs_type ==
StringType || rhs_type ==
StringType)
00324
PLERROR(
"A string can be compared for equality only with another string");
00325
00326
00327
else if (lhs_type ==
DateType && rhs_type ==
DateType)
00328
return
00329
PDate(lhs.
date_val_.
year, lhs.
date_val_.
month, lhs.
date_val_.
day) ==
00330
PDate(rhs.
date_val_.
year, rhs.
date_val_.
month, rhs.
date_val_.
day);
00331
else if (lhs_type ==
DateType || rhs_type ==
DateType)
00332
PLERROR(
"A date can be compared for equality only with another date");
00333
00334
00335
else if (lhs.
isIntegral() && rhs.
isIntegral())
00336
return lhs.
long_val_ == rhs.
long_val_;
00337
00338
00339
else if (lhs.
isFloating() && rhs.
isFloating())
00340
return lhs.
double_val_ == rhs.
double_val_;
00341
00342
00343
else if (lhs.
isIntegral() && rhs.
isFloating())
00344
return lhs.
long_val_ == rhs.
double_val_;
00345
else if (lhs.
isFloating() && lhs.
isIntegral())
00346
return lhs.
double_val_ == rhs.
long_val_;
00347
00348
00349
else
00350
PLERROR(
"Unrecognized case in equality testing between FieldValues");
00351
00352
return false;
00353 }
00354
00355 bool FieldValue::operator<(
const FieldValue& rhs)
const
00356
{
00357
const FieldValue& lhs = *
this;
00358
const FieldType& lhs_type =
field_type_;
00359
const FieldType& rhs_type = rhs.
field_type_;
00360
00361
00362
if (lhs_type ==
StringType && rhs_type ==
StringType)
00363
return strcmp(lhs.
string_val_, rhs.
string_val_) < 0;
00364
else if (lhs_type ==
StringType || rhs_type ==
StringType)
00365
PLERROR(
"A string can be relationally compared only with another string");
00366
00367
00368
else if (lhs_type ==
DateType && rhs_type ==
DateType)
00369
return
00370
PDate(lhs.
date_val_.
year, lhs.
date_val_.
month, lhs.
date_val_.
day) <
00371
PDate(rhs.
date_val_.
year, rhs.
date_val_.
month, rhs.
date_val_.
day);
00372
else if (lhs_type ==
DateType || rhs_type ==
DateType)
00373
PLERROR(
"A date can be relationally compared only with another date");
00374
00375
00376
else if (lhs.
isIntegral() && rhs.
isIntegral())
00377
return lhs.
long_val_ < rhs.
long_val_;
00378
00379
00380
else if (lhs.
isFloating() && rhs.
isFloating())
00381
return lhs.
double_val_ < rhs.
double_val_;
00382
00383
00384
else if (lhs.
isIntegral() && rhs.
isFloating())
00385
return lhs.
long_val_ < rhs.
double_val_;
00386
else if (lhs.
isFloating() && lhs.
isIntegral())
00387
return lhs.
double_val_ < rhs.
long_val_;
00388
00389
00390
else
00391
PLERROR(
"Unrecognized case in relational testing between FieldValues");
00392
00393
return false;
00394 }
00395
00396 FieldValue FieldValue::operator+(
const FieldValue& rhs)
const
00397
{
00398
const FieldValue& lhs = *
this;
00399
const FieldType& lhs_type =
field_type_;
00400
const FieldType& rhs_type = rhs.
field_type_;
00401
00402
00403
if (lhs_type ==
StringType || rhs_type ==
StringType)
00404
PLERROR(
"Strings cannot be added");
00405
else if (lhs_type ==
CharacterType || rhs_type ==
CharacterType)
00406
PLERROR(
"Characters cannot be added");
00407
else if (lhs_type ==
DateType || rhs_type ==
DateType)
00408
PLERROR(
"Dates cannot be added");
00409
00410
00411
else if (lhs.
isIntegral() && rhs.
isIntegral())
00412
return FieldValue(
int(lhs.
long_val_ + rhs.
long_val_));
00413
else if (lhs.
isFloating() && rhs.
isFloating())
00414
return FieldValue(
double(lhs.
double_val_ + rhs.
double_val_));
00415
00416
00417
else if (lhs.
isIntegral() && rhs.
isFloating())
00418
return FieldValue(
double(lhs.
long_val_ + rhs.
double_val_));
00419
else if (lhs.
isFloating() && rhs.
isIntegral())
00420
return FieldValue(
double(lhs.
double_val_ + rhs.
long_val_));
00421
00422
00423
else
00424
PLERROR(
"Unrecognized case in addition between FieldValues");
00425
00426
return FieldValue();
00427 }
00428
00429 FieldValue FieldValue::operator-(
const FieldValue& rhs)
const
00430
{
00431
const FieldValue& lhs = *
this;
00432
const FieldType& lhs_type =
field_type_;
00433
const FieldType& rhs_type = rhs.
field_type_;
00434
00435
00436
if (lhs_type ==
StringType || rhs_type ==
StringType)
00437
PLERROR(
"Strings cannot be subtracted");
00438
else if (lhs_type ==
CharacterType || rhs_type ==
CharacterType)
00439
PLERROR(
"Characters cannot be subtracted");
00440
00441
00442
else if (lhs_type ==
DateType && rhs_type ==
DateType)
00443
return FieldValue(
int(
00444
PDate(lhs.
date_val_.
year, lhs.
date_val_.
month, lhs.
date_val_.
day) -
00445
PDate(rhs.
date_val_.
year, rhs.
date_val_.
month, rhs.
date_val_.
day)));
00446
else if (lhs_type ==
DateType || rhs_type ==
DateType)
00447
PLERROR(
"A date and a non-date cannot be subtracted");
00448
00449
00450
else if (lhs.
isIntegral() && rhs.
isIntegral())
00451
return FieldValue(
int(lhs.
long_val_ - rhs.
long_val_));
00452
else if (lhs.
isFloating() && rhs.
isFloating())
00453
return FieldValue(
double(lhs.
double_val_ - rhs.
double_val_));
00454
00455
00456
else if (lhs.
isIntegral() && rhs.
isFloating())
00457
return FieldValue(
double(lhs.
long_val_ - rhs.
double_val_));
00458
else if (lhs.
isFloating() && rhs.
isIntegral())
00459
return FieldValue(
double(lhs.
double_val_ - rhs.
long_val_));
00460
00461
00462
else
00463
PLERROR(
"Unrecognized case in subtraction between FieldValues");
00464
00465
return FieldValue();
00466 }
00467
00468 FieldValue FieldValue::operator*(
const FieldValue& rhs)
const
00469
{
00470
const FieldValue& lhs = *
this;
00471
const FieldType& lhs_type =
field_type_;
00472
const FieldType& rhs_type = rhs.
field_type_;
00473
00474
00475
if (lhs_type ==
StringType || rhs_type ==
StringType)
00476
PLERROR(
"Strings cannot be multiplied");
00477
else if (lhs_type ==
CharacterType || rhs_type ==
CharacterType)
00478
PLERROR(
"Characters cannot be multiplied");
00479
else if (lhs_type ==
DateType || rhs_type ==
DateType)
00480
PLERROR(
"Dates cannot be multiplied");
00481
00482
00483
else if (lhs.
isIntegral() && rhs.
isIntegral())
00484
return FieldValue(
int(lhs.
long_val_ * rhs.
long_val_));
00485
else if (lhs.
isFloating() && rhs.
isFloating())
00486
return FieldValue(
double(lhs.
double_val_ * rhs.
double_val_));
00487
00488
00489
else if (lhs.
isIntegral() && rhs.
isFloating())
00490
return FieldValue(
double(lhs.
long_val_ * rhs.
double_val_));
00491
else if (lhs.
isFloating() && rhs.
isIntegral())
00492
return FieldValue(
double(lhs.
double_val_ * rhs.
long_val_));
00493
00494
00495
else
00496
PLERROR(
"Unrecognized case in multiplication between FieldValues");
00497
00498
return FieldValue();
00499 }
00500
00501 FieldValue FieldValue::operator/(
const FieldValue& rhs)
const
00502
{
00503
const FieldValue& lhs = *
this;
00504
const FieldType& lhs_type =
field_type_;
00505
const FieldType& rhs_type = rhs.
field_type_;
00506
00507
00508
if (lhs_type ==
StringType || rhs_type ==
StringType)
00509
PLERROR(
"Strings cannot be divided");
00510
else if (lhs_type ==
CharacterType || rhs_type ==
CharacterType)
00511
PLERROR(
"Characters cannot be divided");
00512
else if (lhs_type ==
DateType || rhs_type ==
DateType)
00513
PLERROR(
"Dates cannot be divided");
00514
00515
00516
else if (lhs.
isIntegral() && rhs.
isIntegral())
00517
return FieldValue(
int(lhs.
long_val_ / rhs.
long_val_));
00518
else if (lhs.
isFloating() && rhs.
isFloating())
00519
return FieldValue(
double(lhs.
double_val_ / rhs.
double_val_));
00520
00521
00522
else if (lhs.
isIntegral() && rhs.
isFloating())
00523
return FieldValue(
double(lhs.
long_val_ / rhs.
double_val_));
00524
else if (lhs.
isFloating() && rhs.
isIntegral())
00525
return FieldValue(
double(lhs.
double_val_ / rhs.
long_val_));
00526
00527
00528
else
00529
PLERROR(
"Unrecognized case in division between FieldValues");
00530
00531
return FieldValue();
00532 }
00533
00534 void FieldValue::swap(
FieldValue& rhs)
00535 {
00536 std::swap(
field_type_, rhs.
field_type_);
00537 std::swap(
precision_, rhs.
precision_);
00538
switch(
field_type_) {
00539
case Unknown:
break;
00540
case StringType: std::swap(
string_val_, rhs.
string_val_);
break;
00541
case CharacterType:
00542
case SignedCharType:
00543
case ShortType:
00544
case IntType: std::swap(
long_val_, rhs.
long_val_);
break;
00545
case FloatType:
00546
case DoubleType: std::swap(
double_val_, rhs.
double_val_);
break;
00547
case DateType: std::swap(
date_val_, rhs.
date_val_);
break;
00548 }
00549 }
00550
00551 ostream&
operator<<(ostream& os,
const FieldValue& ft)
00552 {
00553
00554
return os << ft.
toString();
00555 }
00556
00557
00558
00559
00560 bool RowIterator::isMissing()
const
00561
{
00562
if (
const char*
x =
asString())
00563
return x[0] ==
MissingString;
00564
else if (
const unsigned char*
x =
asCharacter())
00565
return x[0] ==
MissingCharacter;
00566
else if (
const signed char*
x =
asSignedChar())
00567
return x[0] ==
MissingSignedChar;
00568
else if (
const short*
x =
asShort())
00569
return *
x ==
MissingShort;
00570
else if (
const int*
x =
asInt())
00571
return *
x ==
MissingInt;
00572
else if (
const float*
x =
asFloat())
00573
return isnan(*
x);
00574
else if (
const double*
x =
asDouble())
00575
return isnan(*
x);
00576
else if (
const PDate*
x =
asDate())
00577
return *
x ==
MissingDate;
00578
else
00579
return false;
00580 }
00581
00582 void RowIterator::setMissing()
00583 {
00584
if (
char*
x =
asString())
00585 *
x =
MissingString;
00586
else if (
unsigned char*
x =
asCharacter())
00587 *
x =
MissingCharacter;
00588
else if (
signed char*
x =
asSignedChar())
00589 *
x =
MissingSignedChar;
00590
else if (
short*
x =
asShort())
00591 *
x =
MissingShort;
00592
else if (
int*
x =
asInt())
00593 *
x =
MissingInt;
00594
else if (
float*
x =
asFloat())
00595 *
x =
MissingFloat;
00596
else if (
double*
x =
asDouble())
00597 *
x =
MissingDouble;
00598
else if (
PDate*
x =
asDate())
00599 *
x =
MissingDate;
00600 }
00601
00602 int RowIterator::char_width()
const
00603
{
00604
int w = 0;
00605
if (
isString())
00606 w =
precision()-1;
00607
else if (
isCharacter())
00608 w = 1;
00609
else if (
isSignedChar())
00610 w = 4;
00611
else if (
isShort())
00612 w = 6;
00613
else if (
isInt())
00614 w = 11;
00615
else if (
isFloat())
00616 w = 8;
00617
else if (
isDouble())
00618 w = 8;
00619
else if (
isDate())
00620 w = 10;
00621
else
00622
PLERROR(
"Unknown type for iterator, field %d (%s)",
curfield,
name().
c_str());
00623
00624
return std::max(
int(w),
int(
name().size()));
00625 }
00626
00627 double RowIterator::toDouble()
const
00628
{
00629
if (
isMissing())
00630
return MISSING_VALUE;
00631
if (
asString())
00632
PLERROR(
"Cannot convert string to double");
00633
if (
asCharacter())
00634
PLERROR(
"Cannot convert character to double");
00635
if (
const signed char*
x =
asSignedChar())
00636
return double(*
x);
00637
if (
const short*
x =
asShort())
00638
return double(*
x);
00639
if (
const int*
x =
asInt())
00640
return double(*
x);
00641
if (
const float*
x =
asFloat())
00642
return double(*
x);
00643
if (
const double*
x =
asDouble())
00644
return *
x;
00645
if (
const PDate*
x =
asDate())
00646
return double(
date_to_float(*
x));
00647
return MISSING_VALUE;
00648 }
00649
00650 string RowIterator::toString()
const
00651
{
00652
if (
isMissing())
00653
return "";
00654
if (
const char*
x =
asString())
00655
return space_to_underscore(
x);
00656
if (
const unsigned char*
x =
asCharacter())
00657
return string(1,
char(*
x));
00658
if (
const signed char*
x =
asSignedChar())
00659
return tostring(
int(*
x));
00660
if (
const short*
x =
asShort())
00661
return tostring(*
x);
00662
if (
const int*
x =
asInt())
00663
return tostring(*
x);
00664
if (
const float*
x =
asFloat())
00665
return tostring(*
x);
00666
if (
const double*
x =
asDouble())
00667
return tostring(*
x);
00668
if (
const PDate*
x =
asDate())
00669
return x->info();
00670
return "";
00671 }
00672
00673 double todouble(
const RowIterator& it)
00674 {
00675
return it.
toDouble();
00676 }
00677
00678 string tostring(
const RowIterator& it)
00679 {
00680
return it.
toString();
00681 }
00682
00683
00684
00685
00686 FieldRowRef::operator FieldValue()
const
00687
{
00688
if (
const char*
x =
it_.
asString())
00689
return FieldValue(
x);
00690
if (
const unsigned char*
x =
it_.
asCharacter())
00691
return FieldValue(*
x);
00692
if (
const signed char*
x =
it_.
asSignedChar())
00693
return FieldValue(*
x);
00694
if (
const short*
x =
it_.
asShort())
00695
return FieldValue(*
x);
00696
if (
const int*
x =
it_.
asInt())
00697
return FieldValue(*
x);
00698
if (
const float*
x =
it_.
asFloat())
00699
return FieldValue(*
x);
00700
if (
const double*
x =
it_.
asDouble())
00701
return FieldValue(*
x);
00702
if (
const PDate*
x =
it_.
asDate())
00703
return FieldValue(*
x);
00704
return FieldValue();
00705 }
00706
00707
00708
00709
00710 FieldRowRef& FieldRowRef::operator=(
const FieldValue& rhs)
00711 {
00712
00713
if (
char*
x =
it_.
asString()) {
00714 strncpy(
x, rhs.
toString().c_str(),
it_.
precision());
00715
x[
it_.
precision()-1] =
'\0';
00716 }
00717
else if (
unsigned char*
x =
it_.
asCharacter()) {
00718
if (rhs.
isIntegral())
00719 *
x = (
unsigned char)rhs.
long_val_;
00720
else if (rhs.
isFloating())
00721 *
x = (
unsigned char)rhs.
double_val_;
00722
else
00723
PLERROR(
"Cannot convert a string or a date into an unsigned character");
00724 }
00725
else if (
signed char*
x =
it_.
asSignedChar()) {
00726
if (rhs.
isIntegral())
00727 *
x = (
signed char)rhs.
long_val_;
00728
else if (rhs.
isFloating())
00729 *
x = (
signed char)rhs.
double_val_;
00730
else
00731
PLERROR(
"Cannot convert a string or a date into a signed character");
00732 }
00733
else if (
short*
x =
it_.
asShort()) {
00734
if (rhs.
isIntegral())
00735 *
x = (
short)rhs.
long_val_;
00736
else if (rhs.
isFloating())
00737 *
x = (
short)rhs.
double_val_;
00738
else
00739
PLERROR(
"Cannot convert a string or a date into a short");
00740 }
00741
else if (
int*
x =
it_.
asInt()) {
00742
if (rhs.
isIntegral())
00743 *
x = (
int)rhs.
long_val_;
00744
else if (rhs.
isFloating())
00745 *
x = (
int)rhs.
double_val_;
00746
else
00747
PLERROR(
"Cannot convert a string or a date into an int");
00748 }
00749
else if (
float*
x =
it_.
asFloat()) {
00750
if (rhs.
isIntegral())
00751 *
x = (
float)rhs.
long_val_;
00752
else if (rhs.
isFloating())
00753 *
x = (
float)rhs.
double_val_;
00754
else
00755
PLERROR(
"Cannot convert a string or a date into a float");
00756 }
00757
else if (
double*
x =
it_.
asDouble()) {
00758
if (rhs.
isIntegral())
00759 *
x = (
double)rhs.
long_val_;
00760
else if (rhs.
isFloating())
00761 *
x = (
double)rhs.
double_val_;
00762
else
00763
PLERROR(
"Cannot convert a string or a date into a double");
00764 }
00765
else if (
PDate*
x =
it_.
asDate()) {
00766
if (rhs.
isDate())
00767 *
x =
PDate(rhs.
date_val_.year, rhs.
date_val_.month,
00768 rhs.
date_val_.day);
00769
else
00770
PLERROR(
"Cannot convert a non-date into a date");
00771 }
00772
else
00773
PLERROR(
"Unrecognized case in assignment in FieldRowRef from FieldValue");
00774
00775
return *
this;
00776 }
00777
00778
00779
00780
00781 Row::Row(
const Schema* s) : schema(s)
00782 {
00783
00784
int n=0;
00785 Schema::const_iterator it =
schema->begin(),
end =
schema->end();
00786
for ( ; it !=
end; ++it ) {
00787 n += it->precision;
00788 }
00789
rawrow.resize(n,
'\0');
00790 }
00791
00792 void Row::sanitize()
const
00793
{
00794
00795
00796
00797
00798
00799
00800
00801
Row* This = const_cast<Row*>(
this);
00802
iterator it = This->
begin(),
end = This->
end();
00803
for ( ; it !=
end; ++it ) {
00804
if (
char *
x = it.
asString()) {
00805
int prec = it.
precision();
00806
bool clearing =
false;
00807
for ( ; prec; ++
x, --prec)
00808
if (clearing)
00809 *
x =
'\0';
00810
else if (*
x ==
'\0')
00811 clearing =
true;
00812 }
00813 }
00814 }
00815
00816 Row::iterator Row::operator[](
int fieldNumber)
00817 {
00818
iterator it=this->
begin(),
end=this->
end();
00819
for (; fieldNumber && it != end; --fieldNumber, ++it)
00820 ;
00821
return it;
00822 }
00823
00824 Row::iterator Row::operator[](
string fieldName)
00825 {
00826
iterator it=this->
begin(),
end=this->
end();
00827 Schema::const_iterator scit=
schema->begin(), scend=
schema->end();
00828
for(; it != end && scit != scend; ++it, ++scit)
00829
if (scit->name == fieldName)
00830
break;
00831
return it;
00832 }
00833
00834 void printFieldName(ostream& o,
const Row::iterator& field)
00835 {
00836 o.setf(ios::right, ios::adjustfield);
00837 o.fill(
' ');
00838 o.width(field.
char_width());
00839 o << field.
name().c_str();
00840 }
00841
00842 void printFieldNames(ostream& o,
const Row& rowc)
00843 {
00844
Row& row = const_cast<Row&>(rowc);
00845
Row::const_iterator it = row.
begin(), end = row.
end();
00846
00847
while(it!=end)
00848 {
00849
printFieldName(o,it);
00850 o <<
" | ";
00851 ++it;
00852 }
00853 o <<
endl;
00854 }
00855
00856 ostream&
operator<<(ostream& o,
const Row::iterator& field)
00857 {
00858 o.setf(ios::right, ios::adjustfield);
00859 o.fill(
' ');
00860 o.width(field.
char_width());
00861
00862
00863
00864
if (field.
isMissing())
00865 o <<
" ";
00866
else if (
const char*
x = field.
asString())
00867 o <<
x;
00868
else if (
const unsigned char*
x = field.
asCharacter())
00869 {
00870
if (isprint(*
x))
00871 {
00872
00873
00874 o.width(0);
00875 o <<
center(
string(1,*
x),field.
char_width());
00876 }
00877
else
00878 {
00879 o.setf(ios::left, ios::adjustfield);
00880 o.width(0);
00881 o <<
"0x";
00882 o.width(field.
char_width()-2);
00883 o << hex <<
int(*
x) << dec;
00884 o.setf(ios::right, ios::adjustfield);
00885 }
00886 }
00887
else if (
const signed char*
x = field.
asSignedChar())
00888 o <<
int(*
x);
00889
else if (
const short*
x = field.
asShort())
00890 o << *
x;
00891
else if (
const int*
x = field.
asInt())
00892 o << *
x;
00893
else if (
const float*
x = field.
asFloat())
00894 {
00895 o.setf(ios::fmtflags(0), ios::floatfield);
00896 o.precision(6);
00897 o << *
x;
00898 }
00899
else if (
const double*
x = field.
asDouble())
00900 {
00901 o.setf(ios::fmtflags(0), ios::floatfield);
00902 o.precision(6);
00903 o << *
x;
00904 }
00905
else if (
const PDate*
x = field.
asDate())
00906 {
00907 o.width(0);
00908 o <<
center(
x->info(),field.
char_width());
00909 }
00910
else
00911
PLERROR(
"Unknown field type");
00912
00913
return o;
00914 }
00915
00916 ostream&
operator<<(ostream& o,
const Row& rowc)
00917 {
00918
Row& row = const_cast<Row&>(rowc);
00919
Row::const_iterator it = row.
begin(), end = row.
end();
00920
00921
while(it!=end)
00922 {
00923 o << it <<
" | ";
00924 ++it;
00925 }
00926 o <<
endl;
00927
return o;
00928 }
00929
00930
00931
00932
00933
00934
00935
00936
00937 void halfShuffleRows(
SDB& sdb)
00938 {
00939
Row rowi(&sdb.
getSchema());
00940
Row rowj(&sdb.
getSchema());
00941
int length =
int(sdb.
length());
00942
for(
int k=1;
k<length/2;
k+=2)
00943 {
00944
if(
k%100000==1)
00945 cerr <<
k <<
endl;
00946 sdb.
getInRow(
k,rowi);
00947 sdb.
getInRow(length-
k,rowj);
00948 sdb.
setRow(rowi,length-
k);
00949 sdb.
setRow(rowj,
k);
00950 }
00951 }
00952
00953
00954 void randomShuffleRows(
SDB& sdb)
00955 {
00956
Row rowi(&sdb.
getSchema());
00957
Row rowj(&sdb.
getSchema());
00958
int length =
int(sdb.
length());
00959
for(
int i=0; i<sdb.
length(); i++)
00960 {
00961
if(i%1000==0)
00962 cerr << i <<
endl;
00963
int j = i+int(
uniform_sample()*(length-i));
00964 sdb.
getInRow(i,rowi);
00965 sdb.
getInRow(j,rowj);
00966 sdb.
setRow(rowi,j);
00967 sdb.
setRow(rowj,i);
00968 }
00969 }
00970
00971 }