PLearn: SparseVMatrix.cc Source File

00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2001 Pascal Vincent, Yoshua Bengio, Rejean Ducharme and University of Montreal 00006 // Copyright (C) 2002 Pascal Vincent, Julien Keable, Xavier Saint-Mleux 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 /* ******************************************************* 00038 * $Id: SparseVMatrix.cc,v 1.6 2004/07/09 19:42:23 tihocan Exp $ 00039 ******************************************************* */ 00040 00041 #include "SparseVMatrix.h" 00042 00043 namespace PLearn { 00044 using namespace std; 00045 00046 00049 PLEARN_IMPLEMENT_OBJECT(SparseVMatrix, "ONE LINE DESC", "NO HELP"); 00050 00051 SparseVMatrix::SparseVMatrix(const string& filename) 00052 : nelements(0), positions(0), values(0), rows(0) 00053 { 00054 load(filename); 00055 } 00056 00057 SparseVMatrix::SparseVMatrix(VMat m) 00058 : inherited(m.length(),m.width()), nelements(0), positions(0), values(0), rows(0) 00059 { 00060 fieldinfos = m->getFieldInfos(); // Copy the field infos 00061 00062 if(m.width()>USHRT_MAX) 00063 PLERROR("In SparseVMatrix constructor: m.width()=%d can't be greater than USHRT_MAX=%d",m.width(),USHRT_MAX); 00064 Vec v(m.width()); 00065 real* vptr = v.data(); 00066 00067 // First count nelements 00068 nelements = 0; 00069 if(m->hasStats()) // use the stats! 00070 { 00071 for(int j=0; j<m.width(); j++) 00072 { 00073 const VMFieldStat& st = m->fieldStat(j); 00074 nelements += st.nmissing() + st.npositive() + st.nnegative(); 00075 } 00076 } 00077 else // let's count them ourself 00078 { 00079 for(int i=0; i<m.length(); i++) 00080 { 00081 m->getRow(i,v); 00082 for(int j=0; j<v.length(); j++) 00083 if(vptr[j]!=0.) 00084 nelements++; 00085 } 00086 } 00087 00088 // Now allocate space for those elements 00089 if(nelements>0) 00090 { 00091 positions = new unsigned short[nelements]; 00092 values = new float[nelements]; 00093 int l=length(); 00094 rows = new SparseVMatrixRow[l]; 00095 00096 int pos = 0; 00097 // Fill the representation 00098 for(int i=0; i<m.length(); i++) 00099 { 00100 m->getRow(i,v); 00101 SparseVMatrixRow& r = rows[i]; 00102 r.row_startpos = pos; 00103 int nelem = 0; 00104 for(int j=0; j<v.length(); j++) 00105 if(vptr[j]!=0.) 00106 { 00107 positions[pos] = j; 00108 values[pos] = (float)vptr[j]; 00109 pos++; 00110 nelem++; 00111 } 00112 r.nelements = nelem; 00113 } 00114 } 00115 } 00116 00117 void 00118 SparseVMatrix::build() 00119 { 00120 inherited::build(); 00121 build_(); 00122 } 00123 00124 void 00125 SparseVMatrix::build_() 00126 { 00127 // TODO 00128 } 00129 00130 void 00131 SparseVMatrix::declareOptions(OptionList &ol) 00132 { 00133 inherited::declareOptions(ol); 00134 } 00135 00136 void SparseVMatrix::getNewRow(int i, const Vec& v) const 00137 { 00138 #ifdef BOUNDCHECK 00139 if(i<0 || i>=length()) 00140 PLERROR("In SparseVMatrix::getNewRow, row number i=%d OUT OF BOUNDS (matrix is %dx%d)",i,length(),width()); 00141 if(v.length()!=width()) 00142 PLERROR("In SparseVMatrix::getNewRow, length of v (%d) is different from width of VMatris (%d)",v.length(),width()); 00143 #endif 00144 00145 if(nelements==0) 00146 v.clear(); 00147 else 00148 { 00149 SparseVMatrixRow row_i = rows[i]; 00150 float* valueptr = values + row_i.row_startpos; 00151 unsigned short* positionptr = positions + row_i.row_startpos; 00152 int n = row_i.nelements; 00153 00154 real* vdata = v.data(); 00155 00156 int j = 0; 00157 while(n--) 00158 { 00159 int nextpos = (int) *positionptr++; 00160 real nextval = (real) *valueptr++; 00161 while(j<nextpos) 00162 vdata[j++] = 0.; 00163 vdata[j++] = nextval; 00164 } 00165 while(j<v.length()) 00166 vdata[j++] = 0.; 00167 } 00168 } 00169 00170 real SparseVMatrix::dot(int i1, int i2, int inputsize) const 00171 { 00172 #ifdef BOUNDCHECK 00173 if(i1<0 || i1>=length() || i2<0 || i2>=length() || inputsize>width()) 00174 PLERROR("IN SparseVMatrix::dot OUT OF BOUNDS"); 00175 #endif 00176 00177 if(nelements==0) 00178 return 0.; 00179 00180 SparseVMatrixRow row_1 = rows[i1]; 00181 float* valueptr_1 = values + row_1.row_startpos; 00182 unsigned short* positionptr_1 = positions + row_1.row_startpos; 00183 int n_1 = row_1.nelements; 00184 00185 SparseVMatrixRow row_2 = rows[i2]; 00186 float* valueptr_2 = values + row_2.row_startpos; 00187 unsigned short* positionptr_2 = positions + row_2.row_startpos; 00188 int n_2 = row_2.nelements; 00189 00190 real res = 0.; 00191 00192 while(n_1 && n_2) 00193 { 00194 if(*positionptr_1>=inputsize) 00195 break; 00196 if(*positionptr_1==*positionptr_2) 00197 { 00198 res += (*valueptr_1)*(*valueptr_2); 00199 positionptr_1++; 00200 valueptr_1++; 00201 n_1--; 00202 positionptr_2++; 00203 valueptr_2++; 00204 n_2--; 00205 } 00206 else if(*positionptr_1<*positionptr_2) 00207 { 00208 positionptr_1++; 00209 valueptr_1++; 00210 n_1--; 00211 } 00212 else 00213 { 00214 positionptr_2++; 00215 valueptr_2++; 00216 n_2--; 00217 } 00218 } 00219 00220 return res; 00221 } 00222 00223 real SparseVMatrix::dot(int i, const Vec& v) const 00224 { 00225 #ifdef BOUNDCHECK 00226 if(i<0 || i>=length() || v.length()>width()) 00227 PLERROR("IN SparseVMatrix::dot OUT OF BOUNDS"); 00228 #endif 00229 00230 if(nelements==0) 00231 return 0.; 00232 00233 SparseVMatrixRow row_i = rows[i]; 00234 float* valueptr = values + row_i.row_startpos; 00235 unsigned short* positionptr = positions + row_i.row_startpos; 00236 int n = row_i.nelements; 00237 00238 real* vdata = v.data(); 00239 real res = 0.; 00240 00241 while(n--) 00242 { 00243 int nextpos = (int) *positionptr++; 00244 real nextval = (real) *valueptr++; 00245 if(nextpos>=v.length()) 00246 break; 00247 res += nextval*vdata[nextpos]; 00248 } 00249 return res; 00250 } 00251 /* 00252 void SparseVMatrix::write(ostream& out) const 00253 { 00254 writeHeader(out,"SparseVMatrix"); 00255 writeField(out,"length",length_); 00256 writeField(out,"width",width_); 00257 writeField(out,"fieldinfos",fieldinfos); 00258 writeField(out,"fieldstats",fieldstats); 00259 writeField(out,"nelements",nelements); 00260 write_ushort(out,positions,nelements,false); 00261 write_float(out,values,nelements,false); 00262 for(int i=0; i<length(); i++) 00263 { 00264 write_int(out,rows[i].nelements); 00265 write_int(out,rows[i].row_startpos); 00266 } 00267 writeFooter(out,"SparseVMatrix"); 00268 } 00269 00270 void SparseVMatrix::oldread(istream& in) 00271 { 00272 readHeader(in,"SparseVMatrix"); 00273 readField(in,"length",length_); 00274 readField(in,"width",width_); 00275 readField(in,"fieldinfos",fieldinfos); 00276 fieldinfos.resize(0); // to fix current bug in setting fieldinfos 00277 readField(in,"fieldstats",fieldstats); 00278 00279 if(nelements>0) 00280 { 00281 delete[] positions; 00282 delete[] values; 00283 delete[] rows; 00284 } 00285 readField(in,"nelements",nelements); 00286 positions = new unsigned short[nelements]; 00287 values = new float[nelements]; 00288 rows = new SparseVMatrixRow[length()]; 00289 00290 read_ushort(in,positions,nelements,false); 00291 read_float(in,values,nelements,false); 00292 for(int i=0; i<length(); i++) 00293 { 00294 rows[i].nelements = read_int(in); 00295 rows[i].row_startpos = read_int(in); 00296 } 00297 readFooter(in,"SparseVMatrix"); 00298 } 00299 */ 00300 SparseVMatrix::~SparseVMatrix() 00301 { 00302 if(nelements>0) 00303 { 00304 delete[] positions; 00305 delete[] values; 00306 delete[] rows; 00307 } 00308 } 00309 00310 00311 } // end of namespcae PLearn