PLearn: TMat_sort.h Source File

00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2002 Pascal Vincent, Yoshua Bengio and University of Montreal 00006 // 00007 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 00038 00039 /* ******************************************************* 00040 * $Id: TMat_sort.h,v 1.1 2004/04/17 00:44:55 plearner Exp $ 00041 * AUTHORS: Pascal Vincent & Yoshua Bengio & Rejean Ducharme 00042 * This file is part of the PLearn library. 00043 ******************************************************* */ 00044 00047 #ifndef TMat_sort_INC 00048 #define TMat_sort_INC 00049 00050 #include <algorithm> 00051 #include "TVec_impl.h" 00052 #include "TMat_impl.h" 00053 00054 namespace PLearn { 00055 using namespace std; 00056 00057 00058 template<class T> class SelectedIndicesCmp 00059 { 00060 private: 00061 TVec<int> indices; 00062 bool use_less_than; 00063 00064 public: 00065 typedef T first_argument_type; 00066 typedef T second_argument_type; 00067 typedef bool result_type; 00068 00069 SelectedIndicesCmp(TVec<int> the_indices, bool use_lexical_less_than=true) 00070 :indices(the_indices), use_less_than(use_lexical_less_than) 00071 {} 00072 00073 bool operator()(const T& x, const T& y) 00074 { 00075 int n = indices.length(); 00076 if(use_less_than) // lexical < 00077 { 00078 for(int k=0; k<n; k++) 00079 { 00080 int i = indices[k]; 00081 if(x[i]<y[i]) 00082 return true; 00083 else if(x[i]>y[i]) 00084 return false; 00085 } 00086 return false; 00087 } 00088 else // lexical > 00089 { 00090 for(int k=0; k<n; k++) 00091 { 00092 int i = indices[k]; 00093 if(x[i]>y[i]) 00094 return true; 00095 else if(x[i]<y[i]) 00096 return false; 00097 } 00098 return false; 00099 } 00100 } 00101 }; 00102 00103 template<class T> 00104 void sortRows(TMat<T>& mat, const TVec<int>& key_columns, bool increasing_order=true) 00105 { 00106 SelectedIndicesCmp< Array<T> > cmp(key_columns,increasing_order); 00107 sort(mat.rows_as_arrays_begin(), mat.rows_as_arrays_end(), cmp); 00108 } 00109 00110 00112 template<class T> 00113 inline void sortElements(const TVec<T>& vec) 00114 { sort(vec.begin(),vec.end()); } 00115 00116 00125 template<class T> 00126 void partialSortRows(const TMat<T>& mat, int k, int sortk=1, int col=0) 00127 { 00128 vector< pair<T,int> > order(mat.length()); 00129 typename vector< pair<T,int> >::iterator it = order.begin(); 00130 T* ptr = mat.data()+col; 00131 for(int i=0; i<mat.length(); ++i, ptr+=mat.mod(), ++it) 00132 { 00133 it->first = *ptr; 00134 it->second = i; 00135 } 00136 00137 typename vector< pair<T,int> >::iterator middle = order.begin(); 00138 for(int i=0; i<k; ++i, ++middle); 00139 00140 partial_sort(order.begin(),middle,order.end()); 00141 00142 // Build the new destination position array 00143 // (destpos is the inverse map of order.second) 00144 vector<int> destpos(mat.length()); 00145 for(int i=0; i<mat.length(); ++i) 00146 destpos[order[i].second] = i; 00147 00148 // Put elements wich are in the rows k to mat.length()-1 at their place if 00149 // their destpos is in the range 0 to k-1. If not, we leave them there. 00150 for(int startpos = mat.length()-1; startpos>=k; startpos--) 00151 { 00152 int dest = destpos[startpos]; 00153 if(dest!=-1) 00154 { 00155 while(dest<k) 00156 { 00157 mat.swapRows(startpos,dest); 00158 int newdest = destpos[dest]; 00159 destpos[dest] = -1; 00160 dest = newdest; 00161 } 00162 destpos[startpos] = -1; 00163 } 00164 } 00165 00166 if(sortk) { 00167 // Put the k firsts rows in the right order 00168 for(int startpos = 0; startpos<k; startpos++) 00169 { 00170 int dest = destpos[startpos]; 00171 if(dest!=-1) 00172 { 00173 while(dest!=startpos) 00174 { 00175 mat.swapRows(startpos,dest); 00176 int newdest = destpos[dest]; 00177 destpos[dest] = -1; 00178 dest = newdest; 00179 } 00180 destpos[startpos] = -1; 00181 } 00182 } 00183 } 00184 } 00185 00193 template<class T> 00194 void sortRows(const TMat<T>& mat, int col=0, bool increasing_order=true) 00195 { 00196 vector< pair<T,int> > order(mat.length()); 00197 typename vector< pair<T,int> >::iterator it = order.begin(); 00198 T* ptr = mat.data()+col; 00199 for(int i=0; i<mat.length(); ++i, ptr+=mat.mod(), ++it) 00200 { 00201 it->first = *ptr; 00202 it->second = i; 00203 } 00204 00205 if(increasing_order) 00206 sort(order.begin(),order.end()); 00207 else 00208 sort(order.begin(), order.end(), greater< pair<T,int> >() ); 00209 00210 // Build the new destination position array 00211 // (destpos is the inverse map of order.second) 00212 vector<int> destpos(mat.length()); 00213 for(int i=0; i<mat.length(); ++i) 00214 destpos[order[i].second] = i; 00215 00216 // Now put the full rows in order... 00217 for(int startpos = 0; startpos<mat.length(); startpos++) 00218 { 00219 int dest = destpos[startpos]; 00220 if(dest!=-1) 00221 { 00222 while(dest!=startpos) 00223 { 00224 mat.swapRows(startpos,dest); 00225 int newdest = destpos[dest]; 00226 destpos[dest] = -1; 00227 dest = newdest; 00228 } 00229 destpos[startpos] = -1; 00230 } 00231 } 00232 } 00233 00234 00235 00236 /* Not taken from Numerical Recipies: This is a quickly written and hihghly unefficient algorithm! */ 00237 /* Sorts the columns of the given matrix, in ascending order of its rownum row's elements */ 00238 template<class T> 00239 void sortColumns(const TMat<T>& mat, int rownum) 00240 { 00241 int j,jj,i; 00242 T min; 00243 T tmp; 00244 int mincol; 00245 00246 if(rownum>=mat.length()) 00247 PLERROR("In sortColumns: no rownumumn %d in matrix (%dx%d)",rownum,mat.width(),mat.length()); 00248 00249 for(j=0; j<mat.width(); j++) 00250 { 00251 // look, starting from col j, for the col with the minimum rownum element 00252 mincol = j; 00253 min = mat(rownum,j); 00254 for(jj=j; jj<mat.width(); jj++) 00255 { 00256 if(mat(rownum,jj)<min) 00257 { 00258 min = mat(rownum,jj); 00259 mincol = jj; 00260 } 00261 } 00262 if(mincol>j) /* Found a col with inferior value */ 00263 { 00264 /* So let's exchange cols j and mincol */ 00265 for(i=0; i<mat.length(); i++) 00266 { 00267 tmp = mat(i,j); 00268 mat(i,j) = mat(i,mincol); 00269 mat(i,mincol) = tmp; 00270 } 00271 } 00272 } 00273 } 00274 00275 00276 00277 // returns the position k of x in src such that src[k] <= x < src[k+1] 00278 // (returns -1 if x < src[0] or N-1 if x >= src[N-1]) 00279 // BE CAREFULL: src must be sort BEFORE the fonction binary_search is called 00280 template<class T> 00281 int binary_search(const TVec<T>& src, T x) 00282 { 00283 const int len = src.length(); 00284 00285 if (x < src[0]) return -1; 00286 else if (x >= src[len-1]) return len-1; 00287 00288 int start = 0; 00289 int end = len-1; 00290 for (;;) { 00291 int k = (start+end)/2; 00292 if (x >= src[k] && x < src[k+1]) { 00293 if (src[k] == src[k+1]) { 00294 start = k; 00295 end = k+1; 00296 while (start>0 && src[start-1]==src[start]) start--; 00297 while (end<len-1 && src[end]==src[end+1]) end++; 00298 k = (start+end)/2; 00299 } 00300 return k; 00301 } 00302 else if (x > src[k]) 00303 start = k+1; 00304 else 00305 end = k; 00306 } 00307 } 00308 00309 // Binary search according to the given column c of a matrix. The column 00310 // MUST BE SORTED. Return the row k such that src(k,c) <= x < src(k+1). 00311 // Returns -1 if x < src(0,c) or N-1 if x >= src(N-1,c). 00312 template <class T> 00313 int binary_search(const TMat<T>& src, int c, T x) 00314 { 00315 const int len = src.length(); 00316 00317 if (x < src(0,c)) 00318 return -1; 00319 else if (x >= src(len-1,c)) 00320 return len-1; 00321 00322 int start = 0, end = len-1; 00323 for (;;) { 00324 int k = (start+end)/2; 00325 if (x >= src(k,c) && x < src(k+1,c)) { 00326 if (src(k,c) == src(k+1,c)) { 00327 start = k; 00328 end = k+1; 00329 while (start > 0 && src(start-1,c) == src(start,c)) 00330 --start; 00331 while (end < len-1 && src(end,c) == src(end+1,c)) 00332 ++end; 00333 k = (start+end)/2; 00334 } 00335 return k; 00336 } 00337 else if (x > src(k,c)) 00338 start = k+1; 00339 else 00340 end = k; 00341 } 00342 } 00343 00344 00345 } // end of namespace PLearn 00346 00347 #endif // TMat_sort_INC