PLearn: TMat_maths_specialisation.h Source File

00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 2002 Pascal Vincent 00005 00006 // Redistribution and use in source and binary forms, with or without 00007 // modification, are permitted provided that the following conditions are met: 00008 // 00009 // 1. Redistributions of source code must retain the above copyright 00010 // notice, this list of conditions and the following disclaimer. 00011 // 00012 // 2. Redistributions in binary form must reproduce the above copyright 00013 // notice, this list of conditions and the following disclaimer in the 00014 // documentation and/or other materials provided with the distribution. 00015 // 00016 // 3. The name of the authors may not be used to endorse or promote 00017 // products derived from this software without specific prior written 00018 // permission. 00019 // 00020 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00021 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00022 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00023 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00025 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00026 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00027 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00028 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00029 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 // 00031 // This file is part of the PLearn library. For more information on the PLearn 00032 // library, go to the PLearn Web site at www.plearn.org 00033 00034 00035 00036 00037 /* ******************************************************* 00038 * $Id: TMat_maths_specialisation.h,v 1.8 2004/07/21 16:30:53 chrish42 Exp $ 00039 * AUTHORS: Pascal Vincent & Yoshua Bengio & Rejean Ducharme 00040 * This file is part of the PLearn library. 00041 ******************************************************* */ 00042 00045 #ifndef TMat_maths_specialisation_INC 00046 #define TMat_maths_specialisation_INC 00047 00048 #include "TMat.h" 00049 00050 namespace PLearn { 00051 using namespace std; 00052 00053 00054 //#define USE_BLAS_SPECIALISATIONS 00055 00056 #ifdef USE_BLAS_SPECIALISATIONS 00057 #include "blas_proto.h" 00058 00059 00060 /* 00061 // These seem to generate a "Illegal instruction core dump" ! 00062 // So I'll stick with the memcpy versions in general.h 00063 inline float* copy(float* first, float* last, float* dest) 00064 { 00065 int n = last-first; 00066 int one = 1; 00067 scopy_(&n, first, &one, dest, &one); 00068 return dest+n; 00069 } 00070 00071 inline double* copy(double* first, double* last, double* dest) 00072 { 00073 int n = last-first; 00074 int one = 1; 00075 dcopy_(&n, first, &one, dest, &one); 00076 return dest+n; 00077 } 00078 */ 00079 00080 /* 00081 inline void multiplyAcc(const TVec<double>& vec, const TVec<double>& x, double scale) 00082 { 00083 int n = vec.length(); 00084 #ifdef BOUNDCHECK 00085 if(vec.length()!=x.length()) 00086 PLERROR("In multiplyAcc this has length_=%d and x has length_=%d", vec.length(),n); 00087 #endif 00088 int one = 1; 00089 daxpy_( &n, &scale, x.data(), &one, vec.data(), &one); 00090 } 00091 00092 inline void operator+=(const TVec<double>& vec, const TVec<double>& x) 00093 { multiplyAcc(vec,x,1.); } 00094 */ 00095 00096 // C = alpha A.B + beta C 00097 // ( Will use the transpose of A and/or B instead, if you set the correpsonding flags to true) 00098 inline void productScaleAcc(const TMat<double>& C, const TMat<double>& A, bool transposeA, const TMat<double>& B, bool transposeB, double alpha, double beta) 00099 { 00100 int l1, w1, l2, w2; 00101 char transa, transb; 00102 if(transposeA) 00103 { 00104 l1 = A.width(); 00105 w1 = A.length(); 00106 transa = 'T'; 00107 } 00108 else 00109 { 00110 l1 = A.length(); 00111 w1 = A.width(); 00112 transa = 'N'; 00113 } 00114 if(transposeB) 00115 { 00116 l2 = B.width(); 00117 w2 = B.length(); 00118 transb = 'T'; 00119 } 00120 else 00121 { 00122 l2 = B.length(); 00123 w2 = B.width(); 00124 transb = 'N'; 00125 } 00126 00127 #ifdef BOUNDCHECK 00128 if (w1!=l2 || C.length()!=l1 || C.width()!=w2) 00129 PLERROR("productScaleAcc, incompatible arguments (%dx%d) <- %s(%dx%d) . %s(%dx%d)", 00130 C.length(), C.width(), 00131 transposeA?"transpose":"", A.length(), A.width(), 00132 transposeB?"transpose":"", B.length(), B.width()); 00133 #endif 00134 00135 int lda = A.mod(); 00136 int ldb = B.mod(); 00137 int ldc = C.mod(); 00138 dgemm_(&transb, &transa, &w2, &l1, &w1, &alpha, B.data(), &ldb, A.data(), &lda, &beta, C.data(), &ldc); 00139 } 00140 00142 inline void productScaleAcc(const TVec<double>& y, const TMat<double>& A, bool transposeA, const TVec<double>& x, double alpha, double beta) 00143 { 00144 #ifdef BOUNDCHECK 00145 if(!transposeA) 00146 { 00147 if(A.length()!=y.length() || A.width()!=x.length()) 00148 PLERROR("productScaleAcc, incompatible arguments Vec(%d) <- Mat(%d,%d) . Vec(%d)", y.length(), A.length(), A.width(), x.length()); 00149 } 00150 else 00151 { 00152 if(A.length()!=x.length() || A.width()!=y.length()) 00153 PLERROR("productScaleAcc, incompatible arguments Vec(%d) <- Mat(%d,%d)' . Vec(%d)", y.length(), A.length(), A.width(), x.length()); 00154 } 00155 #endif 00156 00157 int one = 1; 00158 char trans = transposeA ?'N' :'T'; 00159 int lda = A.mod(); 00160 int m = A.width(); 00161 int n = A.length(); 00162 dgemv_(&trans, &m, &n, &alpha, A.data(), &lda, x.data(), &one, &beta, y.data(), &one); 00163 } 00164 00166 inline void externalProductScaleAcc(const TMat<double>& A, const TVec<double>& x, const TVec<double>& y, double alpha) 00167 { 00168 #ifdef BOUNDCHECK 00169 if(A.length()!=x.length() || A.width()!=y.length()) 00170 PLERROR("In externalProductScaleAcc, incompatible dimensions: Mat(%d,%d) <- Vec(%d).Vec(%d)'",A.length(), A.width(), x.length(), y.length()); 00171 #endif 00172 int one = 1; 00173 int lda = A.mod(); 00174 int m = A.width(); 00175 int n = A.length(); 00176 dger_(&m, &n, &alpha, y.data(), &one, x.data(), &one, A.data(), &lda); 00177 } 00178 00179 inline void externalProductAcc(const TMat<double>& A, const TVec<double>& x, const TVec<double>& y) 00180 { externalProductScaleAcc(A, x, y, 1.); } 00181 00182 inline void product(const TVec<double>& vec, const TMat<double>& m, const TVec<double>& v) 00183 { productScaleAcc(vec, m, false, v, 1., 0.); } 00184 00185 inline void productAcc(const TVec<double>& vec, const TMat<double>& m, const TVec<double>& v) 00186 { productScaleAcc(vec, m, false, v, 1., 1.); } 00187 00188 inline void transposeProduct(const TVec<double>& vec, const TMat<double>& m, const TVec<double>& v) 00189 { productScaleAcc(vec, m, true, v, 1., 0.); } 00190 00191 inline void transposeProductAcc(const TVec<double>& vec, const TMat<double>& m, const TVec<double>& v) 00192 { productScaleAcc(vec, m, true, v, 1., 1.); } 00193 00194 inline void product(const TMat<double>& mat, const TMat<double>& m1, const TMat<double>& m2) 00195 { productScaleAcc(mat, m1, false, m2, false, 1., 0.); } 00196 00197 inline void transposeTransposeProduct(const TMat<double>& mat, const TMat<double>& m1, const TMat<double>& m2) 00198 { productScaleAcc(mat, m1, true, m2, true, 1., 0.); } 00199 00200 inline void transposeProduct(const TMat<double>& mat, const TMat<double>& m1, const TMat<double>& m2) 00201 { productScaleAcc(mat, m1, true, m2, false, 1., 0.); } 00202 00203 inline void productTranspose(const TMat<double>& mat, const TMat<double>& m1, const TMat<double>& m2) 00204 { productScaleAcc(mat, m1, false, m2, true, 1., 0.); } 00205 00206 inline void productAcc(const TMat<double>& mat, const TMat<double>& m1, const TMat<double>& m2) 00207 { productScaleAcc(mat, m1, false, m2, false, 1., 1.); } 00208 00209 inline void transposeTransposeProductAcc(const TMat<double>& mat, const TMat<double>& m1, const TMat<double>& m2) 00210 { productScaleAcc(mat, m1, true, m2, true, 1., 1.); } 00211 00212 inline void transposeProductAcc(const TMat<double>& mat, const TMat<double>& m1, const TMat<double>& m2) 00213 { productScaleAcc(mat, m1, true, m2, false, 1., 1.); } 00214 00215 inline void productTransposeAcc(const TMat<double>& mat, const TMat<double>& m1, const TMat<double>& m2) 00216 { productScaleAcc(mat, m1, false, m2, true, 1., 1.); } 00217 00218 00219 00220 // float 00221 00222 00223 // C = alpha A.B + beta C 00224 // ( Will use the transpose of A and/or B instead, if you set the correpsonding flags to true) 00225 inline void productScaleAcc(const TMat<float>& C, const TMat<float>& A, bool transposeA, const TMat<float>& B, bool transposeB, float alpha, float beta) 00226 { 00227 int l1, w1, l2, w2; 00228 char transa, transb; 00229 if(transposeA) 00230 { 00231 l1 = A.width(); 00232 w1 = A.length(); 00233 transa = 'T'; 00234 } 00235 else 00236 { 00237 l1 = A.length(); 00238 w1 = A.width(); 00239 transa = 'N'; 00240 } 00241 if(transposeB) 00242 { 00243 l2 = B.width(); 00244 w2 = B.length(); 00245 transb = 'T'; 00246 } 00247 else 00248 { 00249 l2 = B.length(); 00250 w2 = B.width(); 00251 transb = 'N'; 00252 } 00253 00254 #ifdef BOUNDCHECK 00255 if (w1!=l2 || C.length()!=l1 || C.width()!=w2) 00256 PLERROR("productScaleAcc, incompatible arguments (%dx%d) <- %s(%dx%d) . %s(%dx%d)", 00257 C.length(), C.width(), 00258 transposeA?"transpose":"", A.length(), A.width(), 00259 transposeB?"transpose":"", B.length(), B.width()); 00260 #endif 00261 00262 int lda = A.mod(); 00263 int ldb = B.mod(); 00264 int ldc = C.mod(); 00265 sgemm_(&transb, &transa, &w2, &l1, &w1, &alpha, B.data(), &ldb, A.data(), &lda, &beta, C.data(), &ldc); 00266 } 00267 00269 inline void productScaleAcc(const TVec<float>& y, const TMat<float>& A, bool transposeA, const TVec<float>& x, float alpha, float beta) 00270 { 00271 #ifdef BOUNDCHECK 00272 if(!transposeA) 00273 { 00274 if(A.length()!=y.length() || A.width()!=x.length()) 00275 PLERROR("productScaleAcc, incompatible arguments Vec(%d) <- Mat(%d,%d) . Vec(%d)", y.length(), A.length(), A.width(), x.length()); 00276 } 00277 else 00278 { 00279 if(A.length()!=x.length() || A.width()!=y.length()) 00280 PLERROR("productScaleAcc, incompatible arguments Vec(%d) <- Mat(%d,%d)' . Vec(%d)", y.length(), A.length(), A.width(), x.length()); 00281 } 00282 #endif 00283 00284 /* int one = 1; 00285 char trans = transposeA ?'N' :'T'; 00286 int lda = A.mod(); 00287 int m = A.width(); 00288 int n = A.length(); 00289 static int ndbg=0; 00290 00291 extern bool debug_; 00292 if (debug_ && ndbg<3) 00293 { 00294 cout << "A=" << A << " * " << x << " ==> " << y << endl; 00295 ndbg++; 00296 } 00297 sgemv_(&trans, &m, &n, &alpha, A.data(), &lda, x.data(), &one, &beta, y.data(), &one); 00298 if (debug_ && ndbg<3) 00299 { 00300 cout << "A=" << A << " * " << x << " ==> " << y << endl; 00301 ndbg++; 00302 } 00303 */ 00304 } 00305 00307 inline void externalProductScaleAcc(const TMat<float>& A, const TVec<float>& x, const TVec<float>& y, float alpha) 00308 { 00309 #ifdef BOUNDCHECK 00310 if(A.length()!=x.length() || A.width()!=y.length()) 00311 PLERROR("In externalProductScaleAcc, incompatible dimensions: Mat(%d,%d) <- Vec(%d).Vec(%d)'",A.length(), A.width(), x.length(), y.length()); 00312 #endif 00313 int one = 1; 00314 int lda = A.mod(); 00315 int m = A.width(); 00316 int n = A.length(); 00317 sger_(&m, &n, &alpha, y.data(), &one, x.data(), &one, A.data(), &lda); 00318 } 00319 00320 inline void externalProductAcc(const TMat<float>& A, const TVec<float>& x, const TVec<float>& y) 00321 { externalProductScaleAcc(A, x, y, 1.); } 00322 00323 inline void product(const TVec<float>& vec, const TMat<float>& m, const TVec<float>& v) 00324 { productScaleAcc(vec, m, false, v, 1., 0.); } 00325 00326 inline void productAcc(const TVec<float>& vec, const TMat<float>& m, const TVec<float>& v) 00327 { productScaleAcc(vec, m, false, v, 1., 1.); } 00328 00329 inline void transposeProduct(const TVec<float>& vec, const TMat<float>& m, const TVec<float>& v) 00330 { productScaleAcc(vec, m, true, v, 1., 0.); } 00331 00332 inline void transposeProductAcc(const TVec<float>& vec, const TMat<float>& m, const TVec<float>& v) 00333 { productScaleAcc(vec, m, true, v, 1., 1.); } 00334 00335 inline void product(const TMat<float>& mat, const TMat<float>& m1, const TMat<float>& m2) 00336 { productScaleAcc(mat, m1, false, m2, false, 1., 0.); } 00337 00338 inline void transposeTransposeProduct(const TMat<float>& mat, const TMat<float>& m1, const TMat<float>& m2) 00339 { productScaleAcc(mat, m1, true, m2, true, 1., 0.); } 00340 00341 inline void transposeProduct(const TMat<float>& mat, const TMat<float>& m1, const TMat<float>& m2) 00342 { productScaleAcc(mat, m1, true, m2, false, 1., 0.); } 00343 00344 inline void productTranspose(const TMat<float>& mat, const TMat<float>& m1, const TMat<float>& m2) 00345 { productScaleAcc(mat, m1, false, m2, true, 1., 0.); } 00346 00347 inline void productAcc(const TMat<float>& mat, const TMat<float>& m1, const TMat<float>& m2) 00348 { productScaleAcc(mat, m1, false, m2, false, 1., 1.); } 00349 00350 inline void transposeTransposeProductAcc(const TMat<float>& mat, const TMat<float>& m1, const TMat<float>& m2) 00351 { productScaleAcc(mat, m1, true, m2, true, 1., 1.); } 00352 00353 inline void transposeProductAcc(const TMat<float>& mat, const TMat<float>& m1, const TMat<float>& m2) 00354 { productScaleAcc(mat, m1, true, m2, false, 1., 1.); } 00355 00356 inline void productTransposeAcc(const TMat<float>& mat, const TMat<float>& m1, const TMat<float>& m2) 00357 { productScaleAcc(mat, m1, false, m2, true, 1., 1.); } 00358 00359 00360 #endif // USE_BLAS_SPECIALISATIONS 00361 00362 // strange little functions of Yoshua for optimized computations in neural nets 00363 00364 #define UNFOLD 00365 00366 // dot product, assumes that s is already initialized 00367 // return s + sum_{i=0}^{n-1} x[i]*y[i] 00368 inline real dot_product(real s,real* x,real* y,int n) 00369 { 00370 #ifdef UNFOLD 00371 int n4 = (n >> 2) << 2; 00372 int i=0; 00373 for (;i<n4;i+=4) 00374 { 00375 real s1 = x[i] * y[i]; 00376 real s2 = x[i+1] * y[i+1]; 00377 real s3 = x[i+2] * y[i+2]; 00378 real s4 = x[i+3] * y[i+3]; 00379 s += s1+s2+s3+s4; 00380 } 00381 for (;i<n;i++) 00382 s += x[i] * y[i]; 00383 #else 00384 for (int i=0;i<n;i++) 00385 s += *x++ * *y++; 00386 #endif 00387 return s; 00388 } 00389 00390 // norman: sse is not supported in WIN32 00391 #if defined(SGI) && !defined(WIN32) 00392 #include <plearn/sys/sse.h> 00393 #endif //ndef SGI 00394 00395 //#define BUNDLE 00396 // dx[j] += sum_i dy[i]*w(i,j) 00397 // w(i,j) -= learning_rate*(dy[i]*x[j] + weight_decay*w(i,j)) 00398 inline void bprop_update_layer(real* dy, real* x, real* dx, real* w, 00399 int n_y, int n_x, real learning_rate, 00400 real weight_decay) 00401 { 00402 #ifdef BUNDLE 00403 int nx8 = (n_x >> 3) << 3; 00404 int j8=0; 00405 real* xj = x; 00406 real* dxj = dx; 00407 int delta_w1 = n_x - 8; 00408 int delta_w2 = n_y*n_x - 8; 00409 real* w_ij = w; 00410 for (;j8<nx8;j8+=8,xj+=8,dxj+=8,w_ij-=delta_w2) 00411 { 00412 real* dy_ = dy; 00413 for (int i=0;i<n_y;i++) 00414 { 00415 real* next_w = w_ij + delta_w1; 00416 prefetchnta(*next_w); 00417 real* x_j = xj; 00418 real* dx_j = dxj; 00419 real d_y = dy_[i]; 00420 *dx_j += d_y * *w_ij; 00421 *w_ij -= learning_rate*(d_y * *x_j + weight_decay * *w_ij); 00422 dx_j[1] += d_y * w_ij[1]; 00423 w_ij[1] -= learning_rate*(d_y * x_j[1] + weight_decay * w_ij[1]); 00424 dx_j[2] += d_y * w_ij[2]; 00425 w_ij[2] -= learning_rate*(d_y * x_j[2] + weight_decay * w_ij[2]); 00426 dx_j[3] += d_y * w_ij[3]; 00427 w_ij[3] -= learning_rate*(d_y * x_j[3] + weight_decay * w_ij[3]); 00428 dx_j[4] += d_y * w_ij[4]; 00429 w_ij[4] -= learning_rate*(d_y * x_j[4] + weight_decay * w_ij[4]); 00430 dx_j[5] += d_y * w_ij[5]; 00431 w_ij[5] -= learning_rate*(d_y * x_j[5] + weight_decay * w_ij[5]); 00432 dx_j[6] += d_y * w_ij[6]; 00433 w_ij[6] -= learning_rate*(d_y * x_j[6] + weight_decay * w_ij[6]); 00434 dx_j[7] += d_y * w_ij[7]; 00435 w_ij[7] -= learning_rate*(d_y * x_j[7] + weight_decay * w_ij[7]); 00436 w_ij = next_w; 00437 } 00438 } 00439 for (int i=0;i<n_y;i++) 00440 { 00441 real dy_i = dy[i]; 00442 real *dx_j = dx; 00443 real *x_j = x; 00444 for (int j=j8;j<n_x;j++) 00445 { 00446 *dx_j++ += dy_i * *w; 00447 *w++ -= learning_rate*(dy_i * *x_j++ + weight_decay * *w); 00448 } 00449 } 00450 00451 #else 00452 #ifdef UNFOLD 00453 int nx4 = (n_x >> 2) << 2; 00454 real *w_i = w; 00455 for (int i=0;i<n_y;i++,w_i+=n_x) 00456 { 00457 real dy_i = dy[i]; 00458 real *dx_j = dx; 00459 real *x_j = x; 00460 int j=0; 00461 for (;j<nx4;j+=4) 00462 { 00463 real w_ij0 = w_i[j]; 00464 real w_ij1 = w_i[j+1]; 00465 real w_ij2 = w_i[j+2]; 00466 real w_ij3 = w_i[j+3]; 00467 dx_j[j] += dy_i * w_ij0; 00468 dx_j[j+1] += dy_i * w_ij1; 00469 dx_j[j+2] += dy_i * w_ij2; 00470 dx_j[j+3] += dy_i * w_ij3; 00471 w_i[j] -= learning_rate*(dy_i * x_j[j] + weight_decay * w_ij0); 00472 w_i[j+1] -= learning_rate*(dy_i * x_j[j+1] + weight_decay * w_ij1); 00473 w_i[j+2] -= learning_rate*(dy_i * x_j[j+2] + weight_decay * w_ij2); 00474 w_i[j+3] -= learning_rate*(dy_i * x_j[j+3] + weight_decay * w_ij3); 00475 } 00476 for (;j<n_x;j++) 00477 { 00478 real w_ij = w_i[j]; 00479 dx_j[j] += dy_i * w_ij; 00480 w_i[j] -= learning_rate*(dy_i * x_j[j] + weight_decay * w_ij); 00481 } 00482 } 00483 #else 00484 for (int i=0;i<n_y;i++) 00485 { 00486 real dy_i = dy[i]; 00487 real *dx_j = dx; 00488 real *x_j = x; 00489 for (int j=0;j<n_x;j++) 00490 { 00491 *dx_j++ += dy_i * *w; 00492 *w++ -= learning_rate*(dy_i * *x_j++ + weight_decay * *w); 00493 } 00494 } 00495 #endif 00496 #endif 00497 } 00498 00499 } // end of namespace PLearn 00500 00501 #endif 00502 00503 00504 00505 00506 00507