PLearn: EmbeddedSequentialLearner.cc Source File

00001 // -*- C++ -*- 00002 00003 // EmbeddedSequentialLearner.cc 00004 // 00005 // Copyright (C) 2003 Rejean Ducharme, Yoshua Bengio 00006 // Copyright (C) 2003 Pascal Vincent 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 00038 #include "EmbeddedSequentialLearner.h" 00039 #include <plearn/vmat/TemporalHorizonVMatrix.h> 00040 #include <plearn/io/TmpFilenames.h> 00041 #include <plearn/vmat/VMat_maths.h> 00042 00043 namespace PLearn { 00044 using namespace std; 00045 00046 00047 PLEARN_IMPLEMENT_OBJECT(EmbeddedSequentialLearner, "ONE LINE DESCR", "NO HELP"); 00048 00049 EmbeddedSequentialLearner::EmbeddedSequentialLearner() 00050 {} 00051 00052 void EmbeddedSequentialLearner::makeDeepCopyFromShallowCopy(CopiesMap& copies) 00053 { 00054 inherited::makeDeepCopyFromShallowCopy(copies); 00055 deepCopyField(learner, copies); 00056 } 00057 00058 void EmbeddedSequentialLearner::build_() 00059 { 00060 if (learner.isNull()) 00061 PLERROR("EmbeddedSequentialLearner::build()_ - learner attribute is NULL"); 00062 00063 learner->build(); 00064 00065 forget(); 00066 } 00067 00068 void EmbeddedSequentialLearner::build() 00069 { 00070 inherited::build(); 00071 build_(); 00072 } 00073 00074 void EmbeddedSequentialLearner::declareOptions(OptionList& ol) 00075 { 00076 declareOption(ol, "learner", &EmbeddedSequentialLearner::learner, 00077 OptionBase::buildoption, "The underlying learner \n"); 00078 00079 inherited::declareOptions(ol); 00080 } 00081 00082 void EmbeddedSequentialLearner::train() 00083 { 00084 // TODO: this code should be moved to overrided setTrainingSet and setTrainStatsCollector (Pascal&Nicolas) 00085 00086 int t = train_set.length(); 00087 if (t >= last_train_t+train_step) 00088 { 00089 VMat aligned_set = new TemporalHorizonVMatrix(train_set, horizon, targetsize()); // last training pair is (t-1-horizon,t-1) 00090 int start = (max_train_len<0) ? 0 : max(0,aligned_set.length()-max_train_len); 00091 int len = aligned_set.length()-start; 00092 TmpFilenames tmpfile; 00093 // TODO: Remove the ugly, grotesque, brittle and unnecessay use of an "indexfile" (Nicolas&Pascal) 00094 string index_fname = tmpfile.addFilename(); 00095 VMat aligned_set_non_missing = filter(aligned_set.subMatRows(start,len), index_fname); 00096 learner->setTrainingSet(aligned_set_non_missing); 00097 learner->setTrainStatsCollector(train_stats); 00098 learner->train(); 00099 last_train_t = t; 00100 } 00101 00102 // BUG? what about setting last_call_train_t ??? 00103 } 00104 00105 void EmbeddedSequentialLearner::test(VMat testset, PP<VecStatsCollector> test_stats, 00106 VMat testoutputs, VMat testcosts) const 00107 { 00108 int l = testset.length(); 00109 Vec input, target; 00110 static Vec dummy_input; 00111 real weight; 00112 00113 Vec output(testoutputs ?outputsize() :0); 00114 Vec costs(nTestCosts()); 00115 00116 //testset->defineSizes(inputsize(),targetsize(),weightsize()); 00117 00118 //test_stats.forget(); 00119 00120 // We DON'T allow in-sample testing; hence, we test either from the end of the 00121 // last test, or the end of the training set. The last_train_t MINUS 1 is because 00122 // we allow the last training day to be part of the test set. Example: using 00123 // today's price, we can train a model and then use it to make a prediction that 00124 // has today's price as input (all that WITHOUT CHEATING or breaking the Criminal 00125 // Code.) 00126 int start = MAX(last_train_t-1,last_test_t); 00127 ProgressBar* pb = NULL; 00128 if(report_progress) 00129 pb = new ProgressBar("Testing learner",l-start); 00130 for (int t=start; t<testset.length(); t++) 00131 { 00132 testset.getExample(t, input, target, weight); 00133 //testset.getSample(t-last_call_train_t+1, input, dummy_target, weight); 00134 //testset.getSample(t-last_call_train_t+1+horizon, dummy_input, target, dummy_weight); 00135 00136 if (!input.hasMissing()) 00137 { 00138 Vec output = predictions(t); 00139 learner->computeOutput(input, output); 00140 if (testoutputs) testoutputs->appendRow(output); 00141 } 00142 if (t>=horizon) 00143 { 00144 Vec output = predictions(t-horizon); 00145 if (!target.hasMissing() && !output.hasMissing()) 00146 { 00147 Vec error_t = errors(t); 00148 learner->computeCostsFromOutputs(dummy_input, output, target, error_t); 00149 if (testcosts) testcosts->appendRow(error_t); 00150 test_stats->update(error_t); 00151 } 00152 //learner->computeOutputAndCosts(input, target, weight, output, costs); 00153 //predictions(t) << output; 00154 //errors(t+horizon) << costs; 00155 00156 if (pb) 00157 pb->update(t-start); 00158 } 00159 } 00160 last_test_t = testset.length(); 00161 00162 test_stats->finalize(); 00163 00164 if (pb) 00165 delete pb; 00166 } 00167 00168 void EmbeddedSequentialLearner::forget() 00169 { 00170 // BUG? call inherited::forget(); ??? 00171 learner->forget(); 00172 } 00173 00174 void EmbeddedSequentialLearner::computeOutput(const Vec& input, Vec& output) 00175 { learner->computeOutput(input, output); } 00176 00177 void EmbeddedSequentialLearner::computeCostsFromOutputs(const Vec& input, const Vec& output, 00178 const Vec& target, Vec& costs) 00179 { learner->computeCostsFromOutputs(input, output, target, costs); } 00180 00181 void EmbeddedSequentialLearner::computeOutputAndCosts(const Vec& input, const Vec& target, 00182 Vec& output, Vec& costs) 00183 { learner->computeOutputAndCosts(input, target, output, costs); } 00184 00185 void EmbeddedSequentialLearner::computeCostsOnly(const Vec& input, const Vec& target, Vec& costs) 00186 { learner->computeCostsOnly(input, target, costs); } 00187 00188 TVec<string> EmbeddedSequentialLearner::getTestCostNames() const 00189 { return learner->getTestCostNames(); } 00190 00191 TVec<string> EmbeddedSequentialLearner::getTrainCostNames() const 00192 { return learner->getTrainCostNames(); } 00193 00194 00195 } // end of namespace PLearn 00196