Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

SDBVMat.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 // SDBVMat.cc: Implementation of VMat/SDB Interface 00003 // 00004 // Copyright (C) 2000 Nicolas Chapados 00005 // 00006 // Redistribution and use in source and binary forms, with or without 00007 // modification, are permitted provided that the following conditions are met: 00008 // 00009 // 1. Redistributions of source code must retain the above copyright 00010 // notice, this list of conditions and the following disclaimer. 00011 // 00012 // 2. Redistributions in binary form must reproduce the above copyright 00013 // notice, this list of conditions and the following disclaimer in the 00014 // documentation and/or other materials provided with the distribution. 00015 // 00016 // 3. The name of the authors may not be used to endorse or promote 00017 // products derived from this software without specific prior written 00018 // permission. 00019 // 00020 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00021 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00022 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00023 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00025 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00026 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00027 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00028 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00029 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 // 00031 // This file is part of the PLearn library. For more information on the PLearn 00032 // library, go to the PLearn Web site at www.plearn.org 00033 00034 #include <plearn/math/TMat_maths.h> // for binary_search 00035 //#include "stringutils.h" 00036 #include "SDBVMat.h" 00037 00038 // norman: profiler deactivated for win32 00039 #ifndef WIN32 00040 #include <plearn/sys/Profiler.h> 00041 #endif 00042 00043 namespace PLearn { 00044 using namespace std; 00045 00046 00047 //##### SDBVMOutputCoder #################################################### 00048 00049 SDBVMOutputCoder::SDBVMOutputCoder(SDBVMOutputCoding oc, 00050 real missing_values_mapping) 00051 : output_coding_(oc), num_classes_(0), 00052 missing_values_mapping_(missing_values_mapping) 00053 {} 00054 00055 SDBVMOutputCoder::~SDBVMOutputCoder() 00056 {} 00057 00058 SDBVMOutputCoding SDBVMOutputCoder::getOutputCoding() const 00059 { 00060 return output_coding_; 00061 } 00062 00063 void SDBVMOutputCoder::setNumClasses(int n_classes) 00064 { 00065 num_classes_ = n_classes; 00066 } 00067 00068 int SDBVMOutputCoder::getNumClasses() const 00069 { 00070 return num_classes_; 00071 } 00072 00073 void SDBVMOutputCoder::setMissingValuesMapping(real missing_values_mapping) 00074 { 00075 missing_values_mapping_ = missing_values_mapping; 00076 } 00077 00078 real SDBVMOutputCoder::getMissingValuesMapping() const 00079 { 00080 return missing_values_mapping_; 00081 } 00082 00083 void SDBVMOutputCoder::setOutput(real output_value, const Vec& output_field) const 00084 { 00085 if (is_missing(output_value)) 00086 output_value = missing_values_mapping_; 00087 int output_int = int(output_value); 00088 00089 switch (output_coding_) { 00090 case SDBVMNumeric: 00091 output_field[0] = output_value; 00092 break; 00093 00094 case SDBVMOneHot: 00095 // Additional coding: if we get a MISSING_VALUE at this point, fill 00096 // in the output_field with MISSING_VALUEs. 00097 if (is_missing(output_value)) 00098 output_field.fill(MISSING_VALUE); 00099 else { 00100 if (output_value < 0 || output_value >= num_classes_) 00101 PLERROR("In SDBVMOutputCoder::setOutput: " 00102 "Output value not in the range [0,%d]", num_classes_-1); 00103 00104 output_field.fill(0.0); 00105 output_field[output_int] = 1.0; 00106 } 00107 break; 00108 00109 case SDBVMOneHotMinus1: 00110 // Additional coding: if we get a MISSING_VALUE at this point, fill 00111 // in the output_field with MISSING_VALUEs. 00112 if (is_missing(output_value)) 00113 output_field.fill(MISSING_VALUE); 00114 else { 00115 00116 /*NOTE: this used to be a BUG!!! the following line was 00117 moved up from within 'if (output_int > 0)'. 00118 when output_int was ==0, output_field would stay as is, 00119 but should have been filled w/ zeros. 00120 */ 00121 output_field.fill(0.0); 00122 if (output_value < 0 || output_value >= num_classes_) 00123 PLERROR("In SDBVMOutputCoder::setOutput: " 00124 "Output value not in the range [0,%d]", num_classes_-1); 00125 if (output_int > 0) { 00126 --output_int; 00127 //output_field.fill(0.0); 00128 output_field[output_int] = 1.0; 00129 } 00130 } 00131 break; 00132 00133 default: 00134 PLERROR("In SDBVMOutputCoder::setOutput: " 00135 "Unknown coding type: %d", int(output_coding_)); 00136 } 00137 } 00138 00139 int SDBVMOutputCoder::fieldWidth() const 00140 { 00141 switch (output_coding_) { 00142 case SDBVMNumeric: 00143 return 1; 00144 00145 case SDBVMOneHot: 00146 if (num_classes_ == 0) 00147 PLERROR("In SDBVMOutputCoder::fieldWidth: " 00148 "number of output classes not specified"); 00149 return num_classes_; 00150 00151 case SDBVMOneHotMinus1: 00152 if (num_classes_ == 0) 00153 PLERROR("In SDBVMOutputCoder::fieldWidth: " 00154 "number of output classes not specified"); 00155 return num_classes_ - 1; 00156 00157 default: 00158 PLERROR("In SDBVMOutputCoder::fieldWidth: " 00159 "Unknown coding type: %d", int(output_coding_)); 00160 } 00161 return 0; 00162 } 00163 00164 template <class Mapping> 00165 int SDBVMOutputCoder::getNumClasses(const Mapping& mapping, 00166 real other_values_mapping, 00167 real missing_values_mapping) 00168 { 00169 typename Mapping::const_iterator it = mapping.begin(), end = mapping.end(); 00170 int max_of_map = INT_MIN; 00171 bool all_int = true; 00172 00173 // Find the maximum of the mapped values 00174 for (; it != end; ++it) { 00175 if (all_int && it->second >= 0 && it->second == int(it->second)) 00176 max_of_map = std::max(max_of_map, int(it->second)); 00177 else 00178 all_int = false; 00179 } 00180 00181 return handleOtherAndMissing(all_int, max_of_map, 00182 other_values_mapping, missing_values_mapping); 00183 } 00184 00185 int SDBVMOutputCoder::handleOtherAndMissing(bool all_int, 00186 int candidate_max, 00187 real other_values_mapping, 00188 real missing_values_mapping) 00189 { 00190 // Handle other_values and missing_values mappings. Causes for 00191 // dismissal include: infinites, NaN's, non-integers 00192 if (all_int && finite(other_values_mapping) && 00193 other_values_mapping == int(other_values_mapping)) 00194 candidate_max = std::max(candidate_max, int(other_values_mapping)); 00195 else 00196 // if other_values_mapping is MISSING_VALUE, leave candidate_max as 00197 // is and don't disable one-hot coding 00198 if (!is_missing(other_values_mapping)) 00199 all_int = false; 00200 00201 if (all_int && finite(missing_values_mapping) && 00202 missing_values_mapping == int(missing_values_mapping)) 00203 candidate_max = std::max(candidate_max, int(missing_values_mapping)); 00204 else 00205 // if other_values_mapping is MISSING_VALUE, leave candidate_max as 00206 // is and don't disable one-hot coding 00207 if (!is_missing(missing_values_mapping)) 00208 all_int = false; 00209 00210 return all_int? candidate_max+1 : 0; 00211 } 00212 00213 00214 //##### SDBVMatrix ######################################################## 00215 00216 SDBVMatrix::SDBVMatrix(const string& dbname, bool detect_missing) 00217 : sdb_(dbname, "", SDB::readonly), detect_missing_(detect_missing) 00218 { 00219 row_ = Row(&sdb_.getSchema()); 00220 length_ = sdb_.size(); 00221 width_ = 0; 00222 if (sdb_.hasStats()) 00223 sdb_.loadStats(); 00224 else { 00225 sdb_.computeStats(); 00226 sdb_.saveStats(); 00227 } 00228 } 00229 00230 void SDBVMatrix::appendField(const string& name, SDBVMField* new_field) 00231 { 00232 int fieldwidth = new_field->fieldWidth(); 00233 vector<string> fieldnames = split(name); 00234 if(fieldwidth>1 && int(fieldnames.size())==fieldwidth) 00235 { 00236 for(unsigned int i=0; i<fieldnames.size(); i++) 00237 { 00238 width_++; 00239 declareField(width_-1,fieldnames[i],new_field->fieldType()); 00240 } 00241 } 00242 else 00243 { 00244 for(int k=0; k<fieldwidth; k++) 00245 { 00246 width_++; 00247 declareField(width_-1,name,new_field->fieldType()); 00248 } 00249 } 00250 00251 fields_.push_back(PSDBVMField(new_field)); 00252 current_row.resize(width_); 00253 } 00254 00255 void SDBVMatrix::getRow(int i, Vec v) const 00256 { 00257 //prof.start("getrow"); 00258 //prof.start("getrow1"); 00259 sdb_.getInRow(i, row_); 00260 //prof.end("getrow1"); 00261 FieldsVector::const_iterator it = fields_.begin(), end = fields_.end(); 00262 int curpos=0, curwidth; 00263 for (int f=0; it != end; ++it, curpos+=curwidth, f++) { 00264 curwidth = (*it)->fieldWidth(); 00265 Vec output = v.subVec(curpos,curwidth); 00266 //prof.start("getrow2"); 00267 (*it)->convertField(sdb_, row_, output); 00268 //prof.end("getrow2"); 00269 if (detect_missing_ && output.hasMissing()) 00270 PLWARNING("SDBVMatrix::getRow(%d,v) has missing value for %d-th field (columns %d-%d)", 00271 i,f,curpos,curpos+curwidth-1); 00272 } 00273 //prof.end("getrow"); 00274 } 00275 00276 00277 //##### SDBVMField ######################################################## 00278 00279 SDBVMOutputCoding 00280 SDBVMField::getOutputCoding() const 00281 { 00282 return SDBVMUnknownCoding; 00283 } 00284 00285 void SDBVMField::convertMissing(const Vec& output) const 00286 { 00287 for (int i=0;i<output.length();i++) 00288 if (is_missing(output[i])) 00289 output[i]=missing_values_mapping_; 00290 } 00291 00292 00293 //##### SDBVMFieldAsIs #################################################### 00294 00295 void SDBVMFieldAsIs::convertField(const SDBWithStats& sdb, const Row& row, 00296 const Vec& output) const 00297 { 00298 output[0] = real(source_.getValue(sdb,row)); 00299 convertMissing(output); 00300 } 00301 00302 int SDBVMFieldAsIs::fieldWidth() const 00303 { 00304 return 1; 00305 } 00306 00307 00308 00309 00310 //##### SDBVMFieldNormalize ############################################### 00311 00312 void SDBVMFieldNormalize::convertField(const SDBWithStats& sdb, 00313 const Row& row, const Vec& output) const 00314 { 00315 real x = source_.getValue(sdb,row); 00316 const FieldStat& stat = source_.getFieldStat(sdb,row); 00317 output[0] = (x - stat.mean()) / stat.stddev(); 00318 convertMissing(output); 00319 } 00320 00321 int SDBVMFieldNormalize::fieldWidth() const 00322 { 00323 return 1; 00324 } 00325 00326 00327 00328 00329 //##### SDBVMFieldDivSigma ################################################ 00330 00331 void SDBVMFieldDivSigma::convertField(const SDBWithStats& sdb, 00332 const Row& row, const Vec& output) const 00333 { 00334 real x = source_.getValue(sdb,row); 00335 const FieldStat& stat = source_.getFieldStat(sdb,row); 00336 output[0] = x / stat.stddev(); 00337 convertMissing(output); 00338 } 00339 00340 int SDBVMFieldDivSigma::fieldWidth() const 00341 { 00342 return 1; 00343 } 00344 00345 00346 00347 //##### SDBVMFieldAffine ################################################## 00348 00349 void SDBVMFieldAffine::convertField(const SDBWithStats& sdb, 00350 const Row& row, const Vec& output) const 00351 { 00352 real v = source_.getValue(sdb,row); 00353 //if (is_missing(v)) 00354 // output[0] = MISSING_VALUE; 00355 //else 00356 output[0] = a_ * v + b_; 00357 convertMissing(output); 00358 } 00359 00360 int SDBVMFieldAffine::fieldWidth() const 00361 { 00362 return 1; 00363 } 00364 00365 00366 00367 00368 //##### SDBVMFieldPosAffine ############################################### 00369 00370 void SDBVMFieldPosAffine::convertField(const SDBWithStats& sdb, 00371 const Row& row, const Vec& output) const 00372 { 00373 output[0] = a_ * std::max(real(source_.getValue(sdb,row)), real(0.)) + b_; 00374 convertMissing(output); 00375 } 00376 00377 int SDBVMFieldPosAffine::fieldWidth() const 00378 { 00379 return 1; 00380 } 00381 00382 00383 00384 //##### SDBVMFieldSignedPower ############################################# 00385 00386 void SDBVMFieldSignedPower::convertField(const SDBWithStats& sdb, 00387 const Row& row, const Vec& output) const 00388 { 00389 real x = source_.getValue(sdb,row); 00390 real sign_x = x>=0.0 ? 1.0 : -1.0; 00391 output[0] = sign_x * mypow(x*sign_x, a_); 00392 convertMissing(output); 00393 } 00394 00395 int SDBVMFieldSignedPower::fieldWidth() const 00396 { 00397 return 1; 00398 } 00399 00400 00401 00402 //##### SDBVMFieldFunc1 ################################################### 00403 00404 void SDBVMFieldFunc1::convertField(const SDBWithStats& sdb, 00405 const Row& row, const Vec& output) const 00406 { 00407 Vec input(1); 00408 input[0] = source_.getValue(sdb,row); 00409 output << func_(input); 00410 convertMissing(output); 00411 } 00412 00413 int SDBVMFieldFunc1::fieldWidth() const 00414 { 00415 return func_->outputsize; 00416 } 00417 00418 00419 00420 00421 //##### SDBVMFieldFunc2 ################################################### 00422 00423 void SDBVMFieldFunc2::convertField(const SDBWithStats& sdb, 00424 const Row& row, const Vec& output) const 00425 { 00426 Vec input1(1), input2(1); 00427 input1[0] = real(source1_.getValue(sdb,row)); 00428 input2[0] = real(source2_.getValue(sdb,row)); 00429 output[0] = func_(input1, input2); 00430 convertMissing(output); 00431 } 00432 00433 int SDBVMFieldFunc2::fieldWidth() const 00434 { 00435 return 1; 00436 } 00437 00438 00439 00440 00441 //##### SDBVMFieldDate #################################################### 00442 00443 void SDBVMFieldDate::convertField(const SDBWithStats& sdb, 00444 const Row& row, const Vec& output) const 00445 { 00446 real realval = source_.getValue(sdb,row); 00447 if(is_missing(realval)) { 00448 output[0] = missing_values_mapping_; 00449 output[1] = missing_values_mapping_; 00450 output[2] = missing_values_mapping_; 00451 } 00452 else { 00453 PDate d = float_to_date(realval); 00454 output[0] = real(d.year); 00455 output[1] = real(d.month); 00456 output[2] = real(d.day); 00457 } 00458 } 00459 00460 int SDBVMFieldDate::fieldWidth() const 00461 { 00462 return 3; 00463 } 00464 00465 00466 00467 00468 //##### SDBVMFieldDay ##################################################### 00469 00470 void SDBVMFieldDay::convertField(const SDBWithStats& sdb, 00471 const Row& row, const Vec& output) const 00472 { 00473 real realval = source_.getValue(sdb,row); 00474 PDate d = float_to_date(realval); 00475 // compute a normalized day ranging approximately in [-1,1] 00476 // for (1980 - 2000). 00477 output[0] = ((d.year-1990)*365+(d.month-1)*30+(d.day-1))/3650.0; 00478 convertMissing(output); 00479 } 00480 00481 int SDBVMFieldDay::fieldWidth() const 00482 { 00483 return 1; 00484 } 00485 00486 00487 00488 //##### SDBVMFieldMonths ##################################################### 00489 00490 void SDBVMFieldMonths::convertField(const SDBWithStats& sdb, 00491 const Row& row, const Vec& output) const 00492 { 00493 real realval = source_.getValue(sdb,row); 00494 PDate d = float_to_date(realval); 00495 output[0] = d.year*12 + (d.month-1); 00496 convertMissing(output); 00497 } 00498 00499 int SDBVMFieldMonths::fieldWidth() const 00500 { 00501 return 1; 00502 } 00503 00504 00505 //##### SDBVMFieldDateDiff ################################################### 00506 00507 void SDBVMFieldDateDiff::convertField(const SDBWithStats& sdb, 00508 const Row& row, const Vec& output) const 00509 { 00510 // WARNING. Convoluted logic ahead. Should fix this. 00511 00512 FieldValue v1 = source1_.getValue(sdb,row); 00513 if (!v1.isMissing() && !date1_threshold_.isMissing() && 00514 v1 <= date1_threshold_) 00515 v1.setMissing(); 00516 00517 PDate d1 = v1.toDate(); 00518 00519 FieldValue v2; 00520 PDate d2 = refdate; 00521 output[0] = MISSING_VALUE; // default value 00522 00523 if(!d1.isMissing() && refdate.isMissing()) 00524 { 00525 v2 = source2_.getValue(sdb,row); 00526 if (!v2.isMissing() && !date2_threshold_.isMissing() && 00527 v2 <= date2_threshold_) 00528 v2.setMissing(); 00529 00530 if(v2.isDate()) 00531 d2 = v2.toDate(); 00532 else if(v2.isIntegral()) { 00533 if (!v2.isMissing()) { 00534 int value2 = int(v2); 00535 switch(unit) { 00536 case 'D': 00537 output[0] = d1.toJulianDay() - value2; 00538 break; 00539 case 'M': 00540 output[0] = d1.month - value2; 00541 break; 00542 case 'Y': 00543 output[0] = d1.year - value2; 00544 break; 00545 default: 00546 PLERROR("In SDBVMFieldDateDiff: unrecognized unit %c", unit); 00547 } 00548 return; 00549 } 00550 } 00551 else 00552 PLERROR("In SDBVMFieldDateDiff convertField: second field is neither " 00553 "a date nor an integer type!"); 00554 } 00555 00556 // Hack for consistency: (We found a date with year==-32764 (sdb select policy_start_date,drv_last_suspension_end,drv_suspension_days FROM icbc3:2530858:1 ) ) 00557 if(!d1.isMissing() && d1.year<1900) 00558 d1.setMissing(); 00559 if(!d2.isMissing() && d2.year<1900) 00560 d2.setMissing(); 00561 if(!d1.isMissing() && !d2.isMissing()) 00562 { 00563 switch(unit) 00564 { 00565 case 'D': 00566 output[0] = d1-d2; 00567 break; 00568 case 'M': 00569 output[0] = (d1.year*12+d1.month) - (d2.year*12+d2.month); 00570 break; 00571 case 'Y': 00572 output[0] = d1.year - d2.year; 00573 break; 00574 default: 00575 PLERROR("In SDBVMFieldDateDiff: unrecognized unit %c", unit); 00576 } 00577 } 00578 } 00579 00580 int SDBVMFieldDateDiff::fieldWidth() const 00581 { 00582 return 1; 00583 } 00584 00585 00586 //##### SDBVMFieldDiscrete ################################################## 00587 00588 SDBVMFieldDiscrete::SDBVMFieldDiscrete(SDBVMSource source, int num_classes, 00589 real missing_values_mapping, SDBVMOutputCoding oc, VMField::FieldType ft) 00590 : inherited(source,missing_values_mapping,ft), num_classes_(num_classes), 00591 output_coder_(new SDBVMOutputCoder(oc, missing_values_mapping)) 00592 { 00593 output_coder_->setNumClasses(num_classes); 00594 } 00595 00596 // Version of the constructor that takes an OutputCoder object 00597 SDBVMFieldDiscrete::SDBVMFieldDiscrete(SDBVMSource source, SDBVMOutputCoder* oc, 00598 int num_classes, real missing_values_mapping, VMField::FieldType ft) 00599 : inherited(source, missing_values_mapping,ft), 00600 num_classes_(num_classes), output_coder_(oc) 00601 { 00602 output_coder_->setNumClasses(num_classes); 00603 } 00604 00605 void SDBVMFieldDiscrete::convertField(const SDBWithStats& sdb, const Row& row, 00606 const Vec& output) const 00607 { 00608 real value = getDiscreteValue(sdb, row); 00609 output_coder_->setOutput(value, output); 00610 } 00611 00612 int SDBVMFieldDiscrete::fieldWidth() const 00613 { 00614 return output_coder_->fieldWidth(); 00615 } 00616 00617 SDBVMOutputCoding SDBVMFieldDiscrete::getOutputCoding() const 00618 { 00619 return output_coder_->getOutputCoding(); 00620 } 00621 00622 void SDBVMFieldDiscrete::setNumClasses(int num_classes) 00623 { 00624 num_classes_ = num_classes; 00625 output_coder_->setNumClasses(num_classes); 00626 } 00627 00628 00629 //##### SDBVMFieldDateGreater ################################################### 00630 00631 void SDBVMFieldDateGreater::convertField(const SDBWithStats& sdb, 00632 const Row& row, const Vec& output) const 00633 { 00634 PDate d = source_.getValue(sdb,row); 00635 00636 if (d>ref) 00637 output[0]=1; 00638 else 00639 output[0]=0; 00640 } 00641 00642 int SDBVMFieldDateGreater::fieldWidth() const 00643 { 00644 return 1; 00645 } 00646 00647 real SDBVMFieldDateGreater::getDiscreteValue(const SDBWithStats& sdb, const 00648 Row& row) const 00649 { 00650 // WARNING: IS THIS CORRECT (Pascal?) 00651 FieldValue v = source_.getValue(sdb,row); 00652 if(v.isMissing()) 00653 return missing_values_mapping_; 00654 return v.toDate()>ref ?1 :0; 00655 } 00656 00657 //##### SDBVMFieldCodeAsIs ################################################ 00658 00659 real SDBVMFieldCodeAsIs::getDiscreteValue(const SDBWithStats& sdb, 00660 const Row& row) const 00661 { 00662 FieldValue v = source_.getValue(sdb,row); 00663 return v.isMissing() ?missing_values_mapping_ :real(v); 00664 } 00665 00666 00667 //##### SDBVMFieldRemapReals ############################################## 00668 00669 SDBVMFieldRemapReals::RealMap 00670 SDBVMFieldRemapReals::getRealMapping(const string& mappings) 00671 { 00672 RealMap real_mapping; 00673 00674 if(!mappings.empty()) { 00675 istrstream in(mappings.c_str()); 00676 real realkey, value; 00677 00678 for(;;) { 00679 in >> realkey >> value; 00680 if (!in) 00681 break; 00682 real_mapping[realkey] = value; 00683 } 00684 } 00685 return real_mapping; 00686 } 00687 00688 SDBVMFieldRemapReals::SDBVMFieldRemapReals(SDBVMSource source, 00689 const string& mappings, 00690 real other_values_mapping, 00691 real missing_values_mapping, 00692 SDBVMOutputCoding oc, 00693 VMField::FieldType ft) 00694 : inherited(source, 0, missing_values_mapping, oc, ft), 00695 real_mapping_(getRealMapping(mappings)), 00696 other_values_mapping_(other_values_mapping) 00697 { 00698 // Set the base-class num_classes_ 00699 setNumClasses(SDBVMOutputCoder::getNumClasses( 00700 real_mapping_, other_values_mapping, missing_values_mapping)); 00701 } 00702 00703 SDBVMFieldRemapReals::SDBVMFieldRemapReals(SDBVMSource source, 00704 const FieldStat& field_stat, 00705 real other_values_mapping, 00706 real missing_values_mapping, 00707 SDBVMOutputCoding oc, 00708 VMField::FieldType ft) 00709 : inherited(source, 0, missing_values_mapping, oc, ft), 00710 other_values_mapping_(other_values_mapping) 00711 { 00712 map<string,int>::iterator it = field_stat.symbolid.begin(), 00713 end = field_stat.symbolid.end(); 00714 for( ; it != end; ++it) 00715 real_mapping_[real(todouble(it->first))] = it->second; 00716 00717 // Set the base-class num_classes_ 00718 setNumClasses(SDBVMOutputCoder::getNumClasses( 00719 real_mapping_, other_values_mapping, missing_values_mapping)); 00720 } 00721 00722 real SDBVMFieldRemapReals::getDiscreteValue(const SDBWithStats& sdb, 00723 const Row& row) const 00724 { 00725 FieldValue v = source_.getValue(sdb,row); 00726 if(v.isMissing()) 00727 return missing_values_mapping_; 00728 00729 real realval = real(v); 00730 RealMap::const_iterator found = real_mapping_.find(realval); 00731 if (found != real_mapping_.end()) 00732 realval = found->second; 00733 else 00734 realval = other_values_mapping_; 00735 return realval; 00736 } 00737 00738 00739 //##### NGSDBVMFieldRemapStrings ############################################ 00740 00741 SDBVMFieldRemapStrings::StringMap 00742 SDBVMFieldRemapStrings::getStringMapping(const string& mappings) 00743 { 00744 StringMap string_mapping; 00745 00746 if(!mappings.empty()) { 00747 istrstream in(mappings.c_str()); 00748 string stringkey; 00749 real value; 00750 00751 for(;;) { 00752 in >> stringkey >> value; 00753 if (!in) 00754 break; 00755 string_mapping[stringkey] = value; 00756 } 00757 } 00758 return string_mapping; 00759 } 00760 00761 SDBVMFieldRemapStrings::SDBVMFieldRemapStrings(SDBVMSource source, 00762 const string& mappings, 00763 real other_values_mapping, 00764 real missing_values_mapping, 00765 SDBVMOutputCoding oc, 00766 VMField::FieldType ft) 00767 : inherited(source, 0, missing_values_mapping, oc, ft), 00768 string_mapping_(getStringMapping(mappings)), 00769 other_values_mapping_(other_values_mapping) 00770 { 00771 // Set the base-class num_classes_ 00772 setNumClasses(SDBVMOutputCoder::getNumClasses( 00773 string_mapping_, other_values_mapping, missing_values_mapping)); 00774 } 00775 00776 SDBVMFieldRemapStrings::SDBVMFieldRemapStrings(SDBVMSource source, 00777 const FieldStat& field_stat, 00778 real other_values_mapping, 00779 real missing_values_mapping, 00780 SDBVMOutputCoding oc, 00781 VMField::FieldType ft) 00782 : inherited(source, 0, missing_values_mapping, oc, ft), 00783 other_values_mapping_(other_values_mapping) 00784 { 00785 map<string,int>::iterator it = field_stat.symbolid.begin(), 00786 end = field_stat.symbolid.end(); 00787 for( ; it != end; ++it) 00788 string_mapping_[it->first] = it->second; 00789 00790 // Set the base-class num_classes_ 00791 setNumClasses(SDBVMOutputCoder::getNumClasses( 00792 string_mapping_, other_values_mapping, missing_values_mapping)); 00793 } 00794 00795 real SDBVMFieldRemapStrings::getDiscreteValue(const SDBWithStats& sdb, 00796 const Row& row) const 00797 { 00798 real realval; 00799 FieldValue v = source_.getValue(sdb,row); 00800 if(v.isMissing()) 00801 /*NOTE: this used to be a BUG!!! the following used to be 00802 realval = missing_values_mapping_; 00803 but realval was overriden with other_values_mapping_ 00804 a few lines later... and never returned 00805 */ 00806 return missing_values_mapping_; 00807 string s = space_to_underscore(string(v)); 00808 StringMap::const_iterator found = string_mapping_.find(s); 00809 if (found != string_mapping_.end()) 00810 realval = found->second; 00811 else 00812 realval = other_values_mapping_; 00813 return realval; 00814 } 00815 00816 00817 //##### NGSDBVMFieldRemapIntervals ########################################## 00818 00819 void 00820 SDBVMFieldRemapIntervals::getIntervals(const string& mappings, 00821 bool& all_int, real& max_of_map, 00822 Vec& intervals_x, Vec& intervals_y) 00823 { 00824 all_int = true; 00825 max_of_map = -FLT_MAX; 00826 istrstream in(mappings.c_str()); 00827 real xi, yi; 00828 intervals_x.resize(10); 00829 intervals_y.resize(11); 00830 00831 int i; 00832 for(i=0; ; ++i) { 00833 in >> yi; 00834 if(!in) 00835 PLERROR("In NGSDBVMFieldRemapIntervals::getIntervals: " 00836 "mappings should have an odd number of elements, found %d", 00837 2*i); 00838 intervals_y[i] = yi; 00839 if (all_int && yi >= 0 && yi == int(yi)) 00840 max_of_map = std::max(max_of_map, yi); 00841 else 00842 all_int = false; 00843 00844 in >> xi; 00845 if(!in) 00846 break; 00847 if (i>0 && intervals_x[i-1]>=xi) 00848 PLERROR("In NGSDBVMFieldRemapIntervals::getIntervals: " 00849 "mappings needs x_{i-1}<x_i, found x[%d]=%f, x[%d]=%f", 00850 i-1,intervals_x[i-1],i,xi); 00851 intervals_x[i] = xi; 00852 if (intervals_x.length()==i+1) { 00853 intervals_x.resize(2*i); 00854 intervals_y.resize(2*i+1); 00855 } 00856 } 00857 00858 /*NOTE: this used to be a BUG!!! the following used to be: 00859 intervals_x.resize(i-1); 00860 intervals_y.resize(i); 00861 which made both vectors 1 too short... 00862 ...no value would ever fall in the last class 00863 */ 00864 intervals_x.resize(i); 00865 intervals_y.resize(i+1); 00866 } 00867 00868 SDBVMFieldRemapIntervals::SDBVMFieldRemapIntervals(SDBVMSource source, 00869 const string& mappings, 00870 real other_values_mapping, 00871 real missing_values_mapping, 00872 SDBVMOutputCoding oc, 00873 VMField::FieldType ft) 00874 : inherited(source, 0, missing_values_mapping, oc, ft), 00875 other_values_mapping_(other_values_mapping) 00876 { 00877 real max_of_map; 00878 bool all_int; 00879 getIntervals(mappings, all_int, max_of_map, intervals_x_, intervals_y_); 00880 00881 // Initialize base-class member 00882 setNumClasses(SDBVMOutputCoder::handleOtherAndMissing( 00883 all_int, int(max_of_map), other_values_mapping, missing_values_mapping)); 00884 } 00885 00886 real SDBVMFieldRemapIntervals::getDiscreteValue(const SDBWithStats& sdb, 00887 const Row& row) const 00888 { 00889 FieldValue v = source_.getValue(sdb,row); 00890 if(v.isMissing()) 00891 return missing_values_mapping_; 00892 else 00893 return intervals_y_[1+int(binary_search(intervals_x_,real(v)))]; 00894 } 00895 00896 00897 //##### SDBVMFieldMultiDiscrete ############################################# 00898 00899 SDBVMFieldMultiDiscrete::SDBVMFieldMultiDiscrete(const FieldArray& fields, 00900 real missing_values_mapping, 00901 SDBVMOutputCoding oc, 00902 VMField::FieldType ft) 00903 : inherited(FieldPtr() /* "null pointer" */, 0, 00904 missing_values_mapping, oc, ft) 00905 { 00906 setFields(fields); 00907 } 00908 00909 void SDBVMFieldMultiDiscrete::setFields(const FieldArray& fields) 00910 { 00911 fields_ = fields; 00912 int n = fields.size(); 00913 field_multipliers_.resize(n); 00914 setNumClasses(0); 00915 00916 // Compute all partial products from i to n 00917 if (n>0) { 00918 int prod = 1; 00919 field_multipliers_[n-1] = 1; 00920 for (int i=n-2; i>=0; --i) { 00921 prod *= fields[i+1]->getNumClasses(); 00922 field_multipliers_[i] = prod; 00923 } 00924 prod *= fields[0]->getNumClasses(); 00925 setNumClasses(prod); 00926 } 00927 } 00928 00929 real SDBVMFieldMultiDiscrete::getDiscreteValue(const SDBWithStats& sdb, 00930 const Row& row) const 00931 { 00932 int index = 0; 00933 int n = fields_.size(); 00934 for (int i=0; i<n; ++i) { 00935 real value = fields_[i]->getDiscreteValue(sdb,row); 00936 if (value != int(value) || value < 0) 00937 PLERROR("SDBVMFieldMultiDiscrete::getDiscreteValue: negative or " 00938 "non-integer value %f returned for field %d", value, i); 00939 index += int(value)*int(field_multipliers_[i]); 00940 } 00941 return real(index); 00942 } 00943 00944 // int SDBVMFieldMultiDiscrete::fieldWidth() const 00945 // { 00946 // return 1; 00947 // } 00948 00949 00950 00951 //##### SDBVMFieldICBCTargets ############################################### 00952 00953 SDBVMFieldICBCTargets::SDBVMFieldICBCTargets(Schema schema, bool use_roadstar, 00954 bool add_claims_sum_column, bool rescale_by_interval, 00955 bool rescale_by_start_date, Mat& start_date_rescaling_values, const string& targetname) 00956 : inherited(0,VMField::Continuous), use_roadstar_(use_roadstar), 00957 add_claims_sum_column_(add_claims_sum_column), 00958 rescale_by_interval_(rescale_by_interval), 00959 rescale_by_start_date_(rescale_by_start_date), 00960 start_date_rescaling_values_(start_date_rescaling_values), 00961 targetname_(targetname), 00962 start_date_(schema("policy_start_date")), 00963 end_date_(schema("policy_end_date")), 00964 bodily_injury_incurred_((targetname_=="ALL" || targetname_=="bodily_injury_incurred") ? 00965 schema("bodily_injury_incurred") : FieldPtr()), 00966 property_damage_incurred_((targetname_=="ALL" || targetname_=="sum_all_but_BI" 00967 || targetname_=="property_damage_incurred") ? 00968 schema("property_damage_incurred") : FieldPtr()), 00969 accident_death_incurred_((targetname_=="ALL" || targetname_=="sum_all_but_BI" 00970 || targetname_=="accident_death_incurred") ? 00971 schema("accident_death_incurred") : FieldPtr()), 00972 collision_lou_incurred_((targetname_=="ALL" || targetname_=="sum_all_but_BI" 00973 || targetname_=="collision_lou_incurred") ? 00974 schema("collision_lou_incurred") : FieldPtr()), 00975 comprehensive_incurred_((targetname_=="ALL" || targetname_=="sum_all_but_BI" 00976 || targetname_=="comprehensive_incurred") ? 00977 schema("comprehensive_incurred") : FieldPtr()), 00978 bodily_injury_count_((targetname_=="ALLcounts" || targetname_=="bodily_injury_count") ? 00979 schema("bodily_injury_count") : FieldPtr()), 00980 property_damage_count_((targetname_=="ALLcounts" || targetname_=="all_counts_but_BI" 00981 || targetname_=="property_damage_count") ? 00982 schema("property_damage_count") : FieldPtr()), 00983 accident_death_count_((targetname_=="ALLcounts" || targetname_=="all_counts_but_BI" 00984 || targetname_=="accident_death_count") ? 00985 schema("accident_death_count") : FieldPtr()), 00986 collision_lou_count_((targetname_=="ALLcounts" || targetname_=="all_counts_but_BI" 00987 || targetname_=="collision_lou_count") ? 00988 schema("collision_lou_count") : FieldPtr()), 00989 comprehensive_count_((targetname_=="ALLcounts" || targetname_=="all_counts_but_BI" 00990 || targetname_=="comprehensive_count") ? 00991 schema("comprehensive_count") : FieldPtr()), 00992 bodily_injury_severity_((targetname_=="ALLseverities" || targetname_=="bodily_injury_severity") ? 00993 schema("bodily_injury_severity") : FieldPtr()), 00994 property_damage_severity_((targetname_=="ALLseverities" || targetname_=="all_severities_but_BI" 00995 || targetname_=="property_damage_severity") ? 00996 schema("property_damage_severity") : FieldPtr()), 00997 accident_death_severity_((targetname_=="ALLseverities" || targetname_=="all_severities_but_BI" 00998 || targetname_=="accident_death_severity") ? 00999 schema("accident_death_severity") : FieldPtr()), 01000 collision_lou_severity_((targetname_=="ALLseverities" || targetname_=="all_severities_but_BI" 01001 || targetname_=="collision_lou_severity") ? 01002 schema("collision_lou_severity") : FieldPtr()), 01003 comprehensive_severity_((targetname_=="ALLseverities" || targetname_=="all_severities_but_BI" 01004 || targetname_=="comprehensive_severity") ? 01005 schema("comprehensive_severity") : FieldPtr()) 01006 // roadstar_incurred_((use_roadstar && (targetname_ == "ALL" || targetname == "roadstar_incurred")) ? 01007 // schema("roadstar_incurred") : FieldPtr()) 01008 01009 { 01010 reference_start_date_year_ = rescale_by_start_date_ ? (int) start_date_rescaling_values_[0][0] : 0; 01011 reference_start_date_month_ = rescale_by_start_date_ ? (int) start_date_rescaling_values_[0][1] : 0; 01012 } 01013 01014 void SDBVMFieldICBCTargets::convertField(const SDBWithStats& sdb, 01015 const Row& row, const Vec& output) const 01016 { 01017 int i = 0; 01018 PDate start_date = start_date_.getValue(sdb,row).toDate(); 01019 PDate end_date = end_date_.getValue(sdb,row).toDate(); 01020 // ******* BIG HACK TO GET AROUND A NASTY INCOMPREHENSIBLE BUG **** 01021 // (with the value read from file at this point not corresponding 01022 // to the value as seen otherwise in the file for the date of certain records) 01023 if (start_date.year<1970) start_date.year = end_date.year-1; 01024 // **************************************************************** 01025 real normalization = 0.001; 01026 real duration = (end_date - start_date)/365.25; 01027 01028 if (rescale_by_interval_) { 01029 if (is_missing(duration) || duration<=0) 01030 { 01031 cout << "start_date = " << start_date << endl; 01032 cout << "end_date = " << end_date << endl; 01033 PLERROR("SDBVMFieldICBCTargets: incorrect dates"); 01034 } 01035 normalization = 0.001/duration; 01036 } 01037 if (rescale_by_start_date_) { 01038 int row_index = (start_date.year - reference_start_date_year_ - 1) * 12 01039 + 12 - reference_start_date_month_ + start_date.month; 01040 if (targetname_=="ALL" || targetname_=="bodily_injury_incurred") 01041 { 01042 if (start_date_rescaling_values_[row_index][2] == 0) 01043 PLERROR("Trying to divide by zero"); 01044 output[i++] = real(bodily_injury_incurred_.getValue(sdb,row))*normalization / 01045 start_date_rescaling_values_[row_index][2]; 01046 } 01047 if (targetname_=="ALL" || targetname_=="property_damage_incurred") 01048 { 01049 if (start_date_rescaling_values_[row_index][3] == 0) 01050 PLERROR("Trying to divide by zero"); 01051 output[i++] = real(property_damage_incurred_.getValue(sdb,row))*normalization / 01052 start_date_rescaling_values_[row_index][3]; 01053 } 01054 if (targetname_=="ALL" || targetname_=="accident_death_incurred") 01055 { 01056 if (start_date_rescaling_values_[row_index][4] == 0) 01057 PLERROR("Trying to divide by zero"); 01058 output[i++] = real(accident_death_incurred_.getValue(sdb,row))*normalization / 01059 start_date_rescaling_values_[row_index][4]; 01060 } 01061 if (targetname_=="ALL" || targetname_=="collision_lou_incurred") 01062 { 01063 if (start_date_rescaling_values_[row_index][5] == 0) 01064 PLERROR("Trying to divide by zero"); 01065 output[i++] = real(collision_lou_incurred_.getValue(sdb,row))*normalization / 01066 start_date_rescaling_values_[row_index][5]; 01067 } 01068 if (targetname_=="ALL" || targetname_=="comprehensive_incurred") 01069 { 01070 if (start_date_rescaling_values_[row_index][6] == 0) 01071 PLERROR("Trying to divide by zero"); 01072 output[i++] = real(comprehensive_incurred_.getValue(sdb,row))*normalization / 01073 start_date_rescaling_values_[row_index][6]; 01074 } 01075 if (targetname_=="sum_all_but_BI") 01076 { 01077 if (start_date_rescaling_values_[row_index][3] == 0 || 01078 start_date_rescaling_values_[row_index][4] == 0 || 01079 start_date_rescaling_values_[row_index][5] == 0 || 01080 start_date_rescaling_values_[row_index][6] == 0) 01081 PLERROR("Trying to divide by zero"); 01082 output[i++] = (real(property_damage_incurred_.getValue(sdb,row))/ 01083 start_date_rescaling_values_[row_index][3] + 01084 real(accident_death_incurred_.getValue(sdb,row))/ 01085 start_date_rescaling_values_[row_index][4] + 01086 real(collision_lou_incurred_.getValue(sdb,row))/ 01087 start_date_rescaling_values_[row_index][5] + 01088 real(comprehensive_incurred_.getValue(sdb,row))/ 01089 start_date_rescaling_values_[row_index][6])*normalization; 01090 } 01091 } 01092 else { // no rescale by start date 01093 if (targetname_=="ALL" || targetname_=="bodily_injury_incurred") 01094 output[i++] = real(bodily_injury_incurred_.getValue(sdb,row))*normalization; 01095 if (targetname_=="ALL" || targetname_=="property_damage_incurred") 01096 output[i++] = real(property_damage_incurred_.getValue(sdb,row))*normalization; 01097 if (targetname_=="ALL" || targetname_=="accident_death_incurred") 01098 output[i++] = real(accident_death_incurred_.getValue(sdb,row))*normalization; 01099 if (targetname_=="ALL" || targetname_=="collision_lou_incurred") 01100 output[i++] = real(collision_lou_incurred_.getValue(sdb,row))*normalization; 01101 if (targetname_=="ALL" || targetname_=="comprehensive_incurred") 01102 output[i++] = real(comprehensive_incurred_.getValue(sdb,row))*normalization; 01103 if (targetname_=="sum_all_but_BI") 01104 output[i++] = (real(property_damage_incurred_.getValue(sdb,row))+ 01105 real(accident_death_incurred_.getValue(sdb,row))+ 01106 real(collision_lou_incurred_.getValue(sdb,row))+ 01107 real(comprehensive_incurred_.getValue(sdb,row)))*normalization; 01108 01109 //counts 01110 if (targetname_=="ALLcounts" || targetname_=="bodily_injury_count") 01111 output[i++] = real(bodily_injury_count_.getValue(sdb,row)); 01112 if (targetname_=="ALLcounts" || targetname_=="all_counts_but_BI" || targetname_=="property_damage_count") 01113 output[i++] = real(property_damage_count_.getValue(sdb,row)); 01114 if (targetname_=="ALLcounts" || targetname_=="all_counts_but_BI" || targetname_=="accident_death_count") 01115 output[i++] = real(accident_death_count_.getValue(sdb,row)); 01116 if (targetname_=="ALLcounts" || targetname_=="all_counts_but_BI" || targetname_=="collision_lou_count") 01117 output[i++] = real(collision_lou_count_.getValue(sdb,row)); 01118 if (targetname_=="ALLcounts" || targetname_=="all_counts_but_BI" || targetname_=="comprehensive_count") 01119 output[i++] = real(comprehensive_count_.getValue(sdb,row)); 01120 01121 01122 //severities 01123 if (targetname_=="ALLseverities" || targetname_=="bodily_injury_severity") 01124 { 01125 int n = int(real(bodily_injury_count_.getValue(sdb,row))); 01126 output[i++] = n>0? real(bodily_injury_incurred_.getValue(sdb,row)) / n: 0; 01127 } 01128 if (targetname_=="ALLseverities" || targetname_=="all_severities_but_BI" || targetname_=="property_damage_severity") 01129 { 01130 int n = int(real(property_damage_count_.getValue(sdb,row))); 01131 output[i++] = n>0? real(property_damage_incurred_.getValue(sdb,row)) / n: 0; 01132 } 01133 if (targetname_=="ALLseverities" || targetname_=="all_severities_but_BI" || targetname_=="accident_death_severity") 01134 { 01135 int n = int(real(accident_death_count_.getValue(sdb,row))); 01136 output[i++] = n>0? real(accident_death_incurred_.getValue(sdb,row)) / n: 0; 01137 } 01138 if (targetname_=="ALLseverities" || targetname_=="all_severities_but_BI" || targetname_=="collision_lou_severity") 01139 { 01140 int n = int(real(collision_lou_count_.getValue(sdb,row))); 01141 output[i++] = n>0? real(collision_lou_incurred_.getValue(sdb,row)) / n: 0; 01142 } 01143 if (targetname_=="ALLseverities" || targetname_=="all_severities_but_BI" || targetname_=="comprehensive_severity") 01144 { 01145 int n = int(real(comprehensive_count_.getValue(sdb,row))); 01146 output[i++] = n>0? real(comprehensive_incurred_.getValue(sdb,row)) / n: 0; 01147 } 01148 01149 //severity weights 01150 if (targetname_=="ALLseverities") 01151 output[i++] = real(bodily_injury_count_.getValue(sdb,row))>0? 1 : 0; 01152 if (targetname_=="ALLseverities" || targetname_=="all_severities_but_BI") 01153 { 01154 output[i++] = real(property_damage_count_.getValue(sdb,row))>0? 1 : 0; 01155 output[i++] = real(accident_death_count_.getValue(sdb,row))>0? 1 : 0; 01156 output[i++] = real(collision_lou_count_.getValue(sdb,row))>0? 1 : 0; 01157 output[i++] = real(comprehensive_count_.getValue(sdb,row))>0? 1 : 0; 01158 } 01159 01160 } 01161 if (add_claims_sum_column_) { 01162 if (targetname_=="ALL") 01163 output[i] = output[0] + output[1] + output[2] + output[3] + output[4]; 01164 else 01165 output[i] = output[0]; 01166 i++; 01167 } 01168 if (rescale_by_interval_) 01169 output[i] = duration; // weight that should be given to squared loss 01170 // REPLACE MISSING VALUES BY ZEROS 01171 convertMissing(output); 01172 } 01173 01174 01175 //##### SDBVMFieldHasClaim ############################################### 01176 01177 void SDBVMFieldHasClaim::convertField(const SDBWithStats& sdb, 01178 const Row& row, Vec& output) const 01179 { 01180 real a,b,c,d,e,f; 01181 a = real(row.bind(bodily_injury_incurred_).toDouble()); 01182 b = real(row.bind(property_damage_incurred_).toDouble()); 01183 c = real(row.bind(accident_death_incurred_).toDouble()); 01184 d = real(row.bind(collision_lou_incurred_).toDouble()); 01185 e = real(row.bind(comprehensive_incurred_).toDouble()); 01186 f = real(row.bind(roadstar_incurred_).toDouble()); 01187 output[0] = (a!=0) || (b!=0) || (c!=0) || (d!=0) || (e!=0) || (f!=0); 01188 // REPLACE MISSING VALUES BY ZEROS 01189 convertMissing(output); 01190 } 01191 01192 //##### SDBVMFieldSumClaims ############################################### 01193 01194 void SDBVMFieldSumClaims::convertField(const SDBWithStats& sdb, 01195 const Row& row, Vec& output) const 01196 { 01197 real a,b,c,d,e,f; 01198 a = real(row.bind(bodily_injury_incurred_).toDouble()); 01199 b = real(row.bind(property_damage_incurred_).toDouble()); 01200 c = real(row.bind(accident_death_incurred_).toDouble()); 01201 d = real(row.bind(collision_lou_incurred_).toDouble()); 01202 e = real(row.bind(comprehensive_incurred_).toDouble()); 01203 f = real(row.bind(roadstar_incurred_).toDouble()); 01204 output[0] = a+b+c+d+e+f; 01205 // REPLACE MISSING VALUES BY ZEROS 01206 convertMissing(output); 01207 } 01208 01209 01210 int ICBCpartition(const Vec& claims, real threshold) 01211 { 01212 bool flag_big = 0; 01213 bool flag_pos = 0; 01214 bool flag_neg = 0; 01215 01216 for (int j=0; j<claims.length(); j++) 01217 { 01218 if (claims[j]>threshold) {flag_big=1;} 01219 else if (claims[j]>0) {flag_pos=1;} 01220 else if (claims[j]<0) {flag_neg=1;} 01221 } 01222 01223 if (flag_big) return 3; 01224 else if (flag_pos) return 2; 01225 else if (flag_neg) return 0; 01226 else return 1; 01227 } 01228 01229 01230 //##### SDBVMFieldICBCClassification ############################################### 01231 01232 SDBVMFieldICBCClassification::SDBVMFieldICBCClassification(Schema schema, const string& fieldname,const string& tmap_file) 01233 : inherited(0), 01234 bodily_injury_incurred_(schema("bodily_injury_incurred")), 01235 property_damage_incurred_(schema("property_damage_incurred")), 01236 accident_death_incurred_(schema("accident_death_incurred")), 01237 collision_lou_incurred_(schema("collision_lou_incurred")), 01238 comprehensive_incurred_(schema("comprehensive_incurred")), 01239 policy_start_date_(schema("policy_start_date")), 01240 fieldname_(fieldname) 01241 { 01242 01243 // is this used anyway? threshold seems to be hardcoded in ConvertField 01244 if (fieldname == "") 01245 threshold = 10000; 01246 else if (fieldname == "condprob3") 01247 threshold = 10000; 01248 else if (fieldname == "bodily_injury_incurred") 01249 threshold = 50000; 01250 else if (fieldname == "property_damage_incurred") 01251 threshold = 4000; 01252 else if (fieldname == "accident_death_incurred") 01253 threshold = 12000; 01254 else if (fieldname == "collision_lou_incurred") 01255 threshold = 5000; 01256 else if (fieldname == "comprehensive_incurred") 01257 threshold = 1000; 01258 else if (fieldname == "sum_all_but_BI") 01259 threshold = 10000; 01260 else 01261 PLERROR("Bad field fieldname"); 01262 } 01263 01264 void SDBVMFieldICBCClassification::convertField(const SDBWithStats& sdb, 01265 const Row& row, const Vec& output) const 01266 { 01267 int threshold = 10000; 01268 Vec claims(1); 01269 if (fieldname_ == "") 01270 { 01271 claims.resize(5); 01272 claims[0] = real(bodily_injury_incurred_.getValue(sdb,row)); 01273 claims[1] = real(property_damage_incurred_.getValue(sdb,row)); 01274 claims[2] = real(accident_death_incurred_.getValue(sdb,row)); 01275 claims[3] = real(collision_lou_incurred_.getValue(sdb,row)); 01276 claims[4] = real(comprehensive_incurred_.getValue(sdb,row)); 01277 } 01278 // ugly hack 01279 else if (fieldname_ == "condprob3") 01280 { 01281 claims[0] = real(property_damage_incurred_.getValue(sdb,row)) 01282 + real(accident_death_incurred_.getValue(sdb,row)) 01283 + real(collision_lou_incurred_.getValue(sdb,row)) 01284 + real(comprehensive_incurred_.getValue(sdb,row)); 01285 output[0]=claims[0]<=0?0:1; 01286 01287 return; 01288 } 01289 01290 else if (fieldname_ == "bodily_injury_incurred") 01291 claims[0] = real(bodily_injury_incurred_.getValue(sdb,row)); 01292 else if (fieldname_ == "property_damage_incurred") 01293 claims[0] = real(property_damage_incurred_.getValue(sdb,row)); 01294 else if (fieldname_ == "accident_death_incurred") 01295 claims[0] = real(accident_death_incurred_.getValue(sdb,row)); 01296 else if (fieldname_ == "collision_lou_incurred") 01297 claims[0] = real(collision_lou_incurred_.getValue(sdb,row)); 01298 else if (fieldname_ == "comprehensive_incurred") 01299 claims[0] = real(comprehensive_incurred_.getValue(sdb,row)); 01300 else if (fieldname_ == "sum_all_but_BI") 01301 claims[0] = real(property_damage_incurred_.getValue(sdb,row)) + 01302 real(accident_death_incurred_.getValue(sdb,row)) + 01303 real(collision_lou_incurred_.getValue(sdb,row)) + 01304 real(comprehensive_incurred_.getValue(sdb,row)); 01305 output[0] = ICBCpartition(claims, threshold); 01306 01307 } 01308 01309 01310 01311 01312 01313 01314 01315 01316 01317 01318 01319 01320 01321 } // end of namespace PLearn

Generated on Tue Aug 17 16:04:41 2004 for PLearn by doxygen 1.3.7