00001 // -*- C++ -*- 00002 00003 // VVMatrix.h 00004 // Copyright (C) 2002 Pascal Vincent and Julien Keable 00005 // 00006 // Redistribution and use in source and binary forms, with or without 00007 // modification, are permitted provided that the following conditions are met: 00008 // 00009 // 1. Redistributions of source code must retain the above copyright 00010 // notice, this list of conditions and the following disclaimer. 00011 // 00012 // 2. Redistributions in binary form must reproduce the above copyright 00013 // notice, this list of conditions and the following disclaimer in the 00014 // documentation and/or other materials provided with the distribution. 00015 // 00016 // 3. The name of the authors may not be used to endorse or promote 00017 // products derived from this software without specific prior written 00018 // permission. 00019 // 00020 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00021 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00022 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00023 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00025 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00026 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00027 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00028 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00029 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 // 00031 // This file is part of the PLearn library. For more information on the PLearn 00032 // library, go to the PLearn Web site at www.plearn.org 00033 00034 /* ******************************************************* 00035 * $Id: VVMatrix.h,v 1.8 2004/07/26 20:12:44 tihocan Exp $ 00036 * This file is part of the PLearn library. 00037 ******************************************************* */ 00038 00039 #ifndef VVMatrix_INC 00040 #define VVMatrix_INC 00041 00042 #include "VMat.h" 00043 00044 namespace PLearn { 00045 using namespace std; 00046 00048 class VVMatrix: public VMatrix 00049 { 00050 00051 private: 00052 00053 typedef VMatrix inherited; 00054 00055 protected: 00056 00057 string code; 00058 VMat the_mat; 00059 00060 static void declareOptions(OptionList& ol); 00061 00063 // returns the precomputed version if it can 00064 static VMat createPreproVMat(const string & filename); 00065 00066 // generate a file (ivfname) containing indexes of rows of 'source' that remain after filtering with 00067 // the *every* possible step that changes the index of rows (i.e : prefilter, shuffle.. postfiltering) 00068 // -- Not optimal, since it will first *precompute* if any postfilter is required 00069 static void generateVMatIndex(VMat source, const string& meta_data_dir, 00070 const string & filename, time_t date_of_code,const string & in, 00071 size_t idx_prefilter, size_t cidx_prefilter, 00072 size_t idx_postfilter, size_t cidx_postfilter, 00073 size_t idx_process, size_t cidx_process, 00074 size_t idx_shuffle, size_t cidx_shuffle, 00075 size_t idx_join, size_t cidx_join); 00076 // returns the result from the join operation 00077 static void processJoinSection(const vector<string> & code, VMat & tmpsource); 00078 // returns a 2d-array that contains the structure of the source datasets that will be concatenated 00079 static vector<vector<string> > extractSourceMatrix(const string & str,const string& filename); 00080 // generate a file (ivfname) containing indexes of rows of 'source' that remain after filtering with 'code' 00081 static void generateFilterIndexFile(VMat source, const string & code, const string& ivfname); 00082 00083 public: 00084 00085 // public build options 00086 string the_filename; 00087 00088 public: 00089 00090 PLEARN_DECLARE_OBJECT(VVMatrix); 00091 00092 virtual void build(); 00093 00094 const string & getCode(){return code;} 00095 00097 static time_t getDateOfVMat(const string& filename); 00098 00102 static VMat buildFilteredVMatFromVPL(VMat source, const string & code, const string& ivfname, time_t date_of_code); 00103 00105 bool isPrecomputedAndUpToDate(); 00106 00107 // returns a filename for the precomputed dataset (which you could load for example with getDataSet) 00108 string getPrecomputedDataName(); 00109 00110 VVMatrix(const string& filename_):the_filename(filename_){build_();} 00111 VVMatrix(){}; 00112 00113 // **************************************************** 00114 // POSSIBLE "cache" IMPROVEMENT.. need to check it out with pascal 00115 // would it be a good idea to systematically wrap "the_mat" with a RowBufferedVMatrix ? 00116 00117 // string maps are those loaded from the .vmat metadatadir, not those of the source vmatrix anymore 00118 // could be changed.. 00119 00120 // virtual string getValString(int col, real val) const; 00121 // virtual real getStringVal(int col, const string & str) const; 00122 // virtual string getString(int row,int col) const; 00123 // virtual const hash_map<string,real>& getStringToRealMapping(int col) const; 00124 00125 virtual real get(int i, int j) const {return the_mat->get(i,j);} 00126 virtual void getSubRow(int i, int j, Vec v) const {the_mat->getSubRow(i,j,v);} 00127 00129 virtual void makeDeepCopyFromShallowCopy(map<const void*, void*>& copies); 00130 00131 private: 00132 00133 void build_(); 00134 00135 }; 00136 00137 DECLARE_OBJECT_PTR(VVMatrix); 00138 00139 } // end of namespace PLearn 00140 00141 00142 #endif // VVMatrix_INC