00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00045
#ifndef TMat_maths_specialisation_INC
00046
#define TMat_maths_specialisation_INC
00047
00048
#include "TMat.h"
00049
00050
namespace PLearn {
00051
using namespace std;
00052
00053
00054
00055
00056
#ifdef USE_BLAS_SPECIALISATIONS
00057
#include "blas_proto.h"
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
inline void productScaleAcc(
const TMat<double>& C,
const TMat<double>& A,
bool transposeA,
const TMat<double>& B,
bool transposeB,
double alpha,
double beta)
00099 {
00100
int l1, w1, l2, w2;
00101
char transa, transb;
00102
if(transposeA)
00103 {
00104 l1 = A.width();
00105 w1 = A.length();
00106 transa =
'T';
00107 }
00108
else
00109 {
00110 l1 = A.length();
00111 w1 = A.width();
00112 transa =
'N';
00113 }
00114
if(transposeB)
00115 {
00116 l2 = B.width();
00117 w2 = B.length();
00118 transb =
'T';
00119 }
00120
else
00121 {
00122 l2 = B.length();
00123 w2 = B.width();
00124 transb =
'N';
00125 }
00126
00127
#ifdef BOUNDCHECK
00128
if (w1!=l2 || C.length()!=l1 || C.width()!=w2)
00129
PLERROR(
"productScaleAcc, incompatible arguments (%dx%d) <- %s(%dx%d) . %s(%dx%d)",
00130 C.length(), C.width(),
00131 transposeA?
"transpose":
"", A.length(), A.width(),
00132 transposeB?
"transpose":
"", B.length(), B.width());
00133
#endif
00134
00135
int lda = A.mod();
00136
int ldb = B.mod();
00137
int ldc = C.mod();
00138
dgemm_(&transb, &transa, &w2, &l1, &w1, &alpha, B.data(), &ldb, A.data(), &lda, &beta, C.data(), &ldc);
00139 }
00140
00142
inline void productScaleAcc(
const TVec<double>& y,
const TMat<double>& A,
bool transposeA,
const TVec<double>& x,
double alpha,
double beta)
00143 {
00144
#ifdef BOUNDCHECK
00145
if(!transposeA)
00146 {
00147
if(A.length()!=y.length() || A.width()!=
x.length())
00148
PLERROR(
"productScaleAcc, incompatible arguments Vec(%d) <- Mat(%d,%d) . Vec(%d)", y.length(), A.length(), A.width(),
x.length());
00149 }
00150
else
00151 {
00152
if(A.length()!=
x.length() || A.width()!=y.length())
00153
PLERROR(
"productScaleAcc, incompatible arguments Vec(%d) <- Mat(%d,%d)' . Vec(%d)", y.length(), A.length(), A.width(),
x.length());
00154 }
00155
#endif
00156
00157
int one = 1;
00158
char trans = transposeA ?
'N' :
'T';
00159
int lda = A.mod();
00160
int m = A.width();
00161
int n = A.length();
00162
dgemv_(&trans, &m, &n, &alpha, A.data(), &lda,
x.data(), &one, &beta, y.data(), &one);
00163 }
00164
00166
inline void externalProductScaleAcc(
const TMat<double>& A,
const TVec<double>& x,
const TVec<double>& y,
double alpha)
00167 {
00168
#ifdef BOUNDCHECK
00169
if(A.length()!=
x.length() || A.width()!=y.length())
00170
PLERROR(
"In externalProductScaleAcc, incompatible dimensions: Mat(%d,%d) <- Vec(%d).Vec(%d)'",A.length(), A.width(),
x.length(), y.length());
00171
#endif
00172
int one = 1;
00173
int lda = A.mod();
00174
int m = A.width();
00175
int n = A.length();
00176
dger_(&m, &n, &alpha, y.data(), &one,
x.data(), &one, A.data(), &lda);
00177 }
00178
00179
inline void externalProductAcc(
const TMat<double>& A,
const TVec<double>& x,
const TVec<double>& y)
00180 {
externalProductScaleAcc(A, x, y, 1.); }
00181
00182
inline void product(
const TVec<double>& vec,
const TMat<double>& m,
const TVec<double>& v)
00183 { productScaleAcc(vec, m,
false, v, 1., 0.); }
00184
00185
inline void productAcc(
const TVec<double>& vec,
const TMat<double>& m,
const TVec<double>& v)
00186 { productScaleAcc(vec, m,
false, v, 1., 1.); }
00187
00188
inline void transposeProduct(
const TVec<double>& vec,
const TMat<double>& m,
const TVec<double>& v)
00189 { productScaleAcc(vec, m,
true, v, 1., 0.); }
00190
00191
inline void transposeProductAcc(
const TVec<double>& vec,
const TMat<double>& m,
const TVec<double>& v)
00192 { productScaleAcc(vec, m,
true, v, 1., 1.); }
00193
00194
inline void product(
const TMat<double>& mat,
const TMat<double>& m1,
const TMat<double>& m2)
00195 { productScaleAcc(mat, m1,
false, m2,
false, 1., 0.); }
00196
00197
inline void transposeTransposeProduct(
const TMat<double>& mat,
const TMat<double>& m1,
const TMat<double>& m2)
00198 { productScaleAcc(mat, m1,
true, m2,
true, 1., 0.); }
00199
00200
inline void transposeProduct(
const TMat<double>& mat,
const TMat<double>& m1,
const TMat<double>& m2)
00201 { productScaleAcc(mat, m1,
true, m2,
false, 1., 0.); }
00202
00203
inline void productTranspose(
const TMat<double>& mat,
const TMat<double>& m1,
const TMat<double>& m2)
00204 { productScaleAcc(mat, m1,
false, m2,
true, 1., 0.); }
00205
00206
inline void productAcc(
const TMat<double>& mat,
const TMat<double>& m1,
const TMat<double>& m2)
00207 { productScaleAcc(mat, m1,
false, m2,
false, 1., 1.); }
00208
00209
inline void transposeTransposeProductAcc(
const TMat<double>& mat,
const TMat<double>& m1,
const TMat<double>& m2)
00210 { productScaleAcc(mat, m1,
true, m2,
true, 1., 1.); }
00211
00212
inline void transposeProductAcc(
const TMat<double>& mat,
const TMat<double>& m1,
const TMat<double>& m2)
00213 { productScaleAcc(mat, m1,
true, m2,
false, 1., 1.); }
00214
00215
inline void productTransposeAcc(
const TMat<double>& mat,
const TMat<double>& m1,
const TMat<double>& m2)
00216 { productScaleAcc(mat, m1,
false, m2,
true, 1., 1.); }
00217
00218
00219
00220
00221
00222
00223
00224
00225
inline void productScaleAcc(
const TMat<float>& C,
const TMat<float>& A,
bool transposeA,
const TMat<float>& B,
bool transposeB,
float alpha,
float beta)
00226 {
00227
int l1, w1, l2, w2;
00228
char transa, transb;
00229
if(transposeA)
00230 {
00231 l1 = A.width();
00232 w1 = A.length();
00233 transa =
'T';
00234 }
00235
else
00236 {
00237 l1 = A.length();
00238 w1 = A.width();
00239 transa =
'N';
00240 }
00241
if(transposeB)
00242 {
00243 l2 = B.width();
00244 w2 = B.length();
00245 transb =
'T';
00246 }
00247
else
00248 {
00249 l2 = B.length();
00250 w2 = B.width();
00251 transb =
'N';
00252 }
00253
00254
#ifdef BOUNDCHECK
00255
if (w1!=l2 || C.length()!=l1 || C.width()!=w2)
00256
PLERROR(
"productScaleAcc, incompatible arguments (%dx%d) <- %s(%dx%d) . %s(%dx%d)",
00257 C.length(), C.width(),
00258 transposeA?
"transpose":
"", A.length(), A.width(),
00259 transposeB?
"transpose":
"", B.length(), B.width());
00260
#endif
00261
00262
int lda = A.mod();
00263
int ldb = B.mod();
00264
int ldc = C.mod();
00265
sgemm_(&transb, &transa, &w2, &l1, &w1, &alpha, B.data(), &ldb, A.data(), &lda, &beta, C.data(), &ldc);
00266 }
00267
00269
inline void productScaleAcc(
const TVec<float>& y,
const TMat<float>& A,
bool transposeA,
const TVec<float>& x,
float alpha,
float beta)
00270 {
00271
#ifdef BOUNDCHECK
00272
if(!transposeA)
00273 {
00274
if(A.length()!=y.length() || A.width()!=
x.length())
00275
PLERROR(
"productScaleAcc, incompatible arguments Vec(%d) <- Mat(%d,%d) . Vec(%d)", y.length(), A.length(), A.width(),
x.length());
00276 }
00277
else
00278 {
00279
if(A.length()!=
x.length() || A.width()!=y.length())
00280
PLERROR(
"productScaleAcc, incompatible arguments Vec(%d) <- Mat(%d,%d)' . Vec(%d)", y.length(), A.length(), A.width(),
x.length());
00281 }
00282
#endif
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304 }
00305
00307
inline void externalProductScaleAcc(
const TMat<float>& A,
const TVec<float>& x,
const TVec<float>& y,
float alpha)
00308 {
00309
#ifdef BOUNDCHECK
00310
if(A.length()!=
x.length() || A.width()!=y.length())
00311
PLERROR(
"In externalProductScaleAcc, incompatible dimensions: Mat(%d,%d) <- Vec(%d).Vec(%d)'",A.length(), A.width(),
x.length(), y.length());
00312
#endif
00313
int one = 1;
00314
int lda = A.mod();
00315
int m = A.width();
00316
int n = A.length();
00317
sger_(&m, &n, &alpha, y.data(), &one,
x.data(), &one, A.data(), &lda);
00318 }
00319
00320
inline void externalProductAcc(
const TMat<float>& A,
const TVec<float>& x,
const TVec<float>& y)
00321 {
externalProductScaleAcc(A, x, y, 1.); }
00322
00323
inline void product(
const TVec<float>& vec,
const TMat<float>& m,
const TVec<float>& v)
00324 { productScaleAcc(vec, m,
false, v, 1., 0.); }
00325
00326
inline void productAcc(
const TVec<float>& vec,
const TMat<float>& m,
const TVec<float>& v)
00327 { productScaleAcc(vec, m,
false, v, 1., 1.); }
00328
00329
inline void transposeProduct(
const TVec<float>& vec,
const TMat<float>& m,
const TVec<float>& v)
00330 { productScaleAcc(vec, m,
true, v, 1., 0.); }
00331
00332
inline void transposeProductAcc(
const TVec<float>& vec,
const TMat<float>& m,
const TVec<float>& v)
00333 { productScaleAcc(vec, m,
true, v, 1., 1.); }
00334
00335
inline void product(
const TMat<float>& mat,
const TMat<float>& m1,
const TMat<float>& m2)
00336 { productScaleAcc(mat, m1,
false, m2,
false, 1., 0.); }
00337
00338
inline void transposeTransposeProduct(
const TMat<float>& mat,
const TMat<float>& m1,
const TMat<float>& m2)
00339 { productScaleAcc(mat, m1,
true, m2,
true, 1., 0.); }
00340
00341
inline void transposeProduct(
const TMat<float>& mat,
const TMat<float>& m1,
const TMat<float>& m2)
00342 { productScaleAcc(mat, m1,
true, m2,
false, 1., 0.); }
00343
00344
inline void productTranspose(
const TMat<float>& mat,
const TMat<float>& m1,
const TMat<float>& m2)
00345 { productScaleAcc(mat, m1,
false, m2,
true, 1., 0.); }
00346
00347
inline void productAcc(
const TMat<float>& mat,
const TMat<float>& m1,
const TMat<float>& m2)
00348 { productScaleAcc(mat, m1,
false, m2,
false, 1., 1.); }
00349
00350
inline void transposeTransposeProductAcc(
const TMat<float>& mat,
const TMat<float>& m1,
const TMat<float>& m2)
00351 { productScaleAcc(mat, m1,
true, m2,
true, 1., 1.); }
00352
00353
inline void transposeProductAcc(
const TMat<float>& mat,
const TMat<float>& m1,
const TMat<float>& m2)
00354 { productScaleAcc(mat, m1,
true, m2,
false, 1., 1.); }
00355
00356
inline void productTransposeAcc(
const TMat<float>& mat,
const TMat<float>& m1,
const TMat<float>& m2)
00357 { productScaleAcc(mat, m1,
false, m2,
true, 1., 1.); }
00358
00359
00360
#endif // USE_BLAS_SPECIALISATIONS
00361
00362
00363
00364 #define UNFOLD
00365
00366
00367
00368 inline real dot_product(
real s,
real* x,
real* y,
int n)
00369 {
00370
#ifdef UNFOLD
00371
int n4 = (n >> 2) << 2;
00372
int i=0;
00373
for (;i<n4;i+=4)
00374 {
00375
real s1 =
x[i] * y[i];
00376
real s2 =
x[i+1] * y[i+1];
00377
real s3 =
x[i+2] * y[i+2];
00378
real s4 =
x[i+3] * y[i+3];
00379 s += s1+s2+s3+s4;
00380 }
00381
for (;i<n;i++)
00382 s +=
x[i] * y[i];
00383
#else
00384
for (
int i=0;i<n;i++)
00385 s += *
x++ * *y++;
00386
#endif
00387
return s;
00388 }
00389
00390
00391
#if defined(SGI) && !defined(WIN32)
00392
#include <plearn/sys/sse.h>
00393
#endif //ndef SGI
00394
00395
00396
00397
00398 inline void bprop_update_layer(
real* dy,
real* x,
real* dx,
real* w,
00399
int n_y,
int n_x,
real learning_rate,
00400
real weight_decay)
00401 {
00402
#ifdef BUNDLE
00403
int nx8 = (n_x >> 3) << 3;
00404
int j8=0;
00405
real* xj =
x;
00406
real* dxj = dx;
00407
int delta_w1 = n_x - 8;
00408
int delta_w2 = n_y*n_x - 8;
00409
real* w_ij = w;
00410
for (;j8<nx8;j8+=8,xj+=8,dxj+=8,w_ij-=delta_w2)
00411 {
00412
real* dy_ = dy;
00413
for (
int i=0;i<n_y;i++)
00414 {
00415
real* next_w = w_ij + delta_w1;
00416
prefetchnta(*next_w);
00417
real* x_j = xj;
00418
real* dx_j = dxj;
00419
real d_y = dy_[i];
00420 *dx_j += d_y * *w_ij;
00421 *w_ij -= learning_rate*(d_y * *x_j + weight_decay * *w_ij);
00422 dx_j[1] += d_y * w_ij[1];
00423 w_ij[1] -= learning_rate*(d_y * x_j[1] + weight_decay * w_ij[1]);
00424 dx_j[2] += d_y * w_ij[2];
00425 w_ij[2] -= learning_rate*(d_y * x_j[2] + weight_decay * w_ij[2]);
00426 dx_j[3] += d_y * w_ij[3];
00427 w_ij[3] -= learning_rate*(d_y * x_j[3] + weight_decay * w_ij[3]);
00428 dx_j[4] += d_y * w_ij[4];
00429 w_ij[4] -= learning_rate*(d_y * x_j[4] + weight_decay * w_ij[4]);
00430 dx_j[5] += d_y * w_ij[5];
00431 w_ij[5] -= learning_rate*(d_y * x_j[5] + weight_decay * w_ij[5]);
00432 dx_j[6] += d_y * w_ij[6];
00433 w_ij[6] -= learning_rate*(d_y * x_j[6] + weight_decay * w_ij[6]);
00434 dx_j[7] += d_y * w_ij[7];
00435 w_ij[7] -= learning_rate*(d_y * x_j[7] + weight_decay * w_ij[7]);
00436 w_ij = next_w;
00437 }
00438 }
00439
for (
int i=0;i<n_y;i++)
00440 {
00441
real dy_i = dy[i];
00442
real *dx_j = dx;
00443
real *x_j =
x;
00444
for (
int j=j8;j<n_x;j++)
00445 {
00446 *dx_j++ += dy_i * *w;
00447 *w++ -= learning_rate*(dy_i * *x_j++ + weight_decay * *w);
00448 }
00449 }
00450
00451
#else
00452
#ifdef UNFOLD
00453
int nx4 = (n_x >> 2) << 2;
00454
real *w_i = w;
00455
for (
int i=0;i<n_y;i++,w_i+=n_x)
00456 {
00457
real dy_i = dy[i];
00458
real *dx_j = dx;
00459
real *x_j =
x;
00460
int j=0;
00461
for (;j<nx4;j+=4)
00462 {
00463
real w_ij0 = w_i[j];
00464
real w_ij1 = w_i[j+1];
00465
real w_ij2 = w_i[j+2];
00466
real w_ij3 = w_i[j+3];
00467 dx_j[j] += dy_i * w_ij0;
00468 dx_j[j+1] += dy_i * w_ij1;
00469 dx_j[j+2] += dy_i * w_ij2;
00470 dx_j[j+3] += dy_i * w_ij3;
00471 w_i[j] -= learning_rate*(dy_i * x_j[j] + weight_decay * w_ij0);
00472 w_i[j+1] -= learning_rate*(dy_i * x_j[j+1] + weight_decay * w_ij1);
00473 w_i[j+2] -= learning_rate*(dy_i * x_j[j+2] + weight_decay * w_ij2);
00474 w_i[j+3] -= learning_rate*(dy_i * x_j[j+3] + weight_decay * w_ij3);
00475 }
00476
for (;j<n_x;j++)
00477 {
00478
real w_ij = w_i[j];
00479 dx_j[j] += dy_i * w_ij;
00480 w_i[j] -= learning_rate*(dy_i * x_j[j] + weight_decay * w_ij);
00481 }
00482 }
00483
#else
00484
for (
int i=0;i<n_y;i++)
00485 {
00486
real dy_i = dy[i];
00487
real *dx_j = dx;
00488
real *x_j =
x;
00489
for (
int j=0;j<n_x;j++)
00490 {
00491 *dx_j++ += dy_i * *w;
00492 *w++ -= learning_rate*(dy_i * *x_j++ + weight_decay * *w);
00493 }
00494 }
00495
#endif
00496
#endif
00497
}
00498
00499 }
00500
00501
#endif
00502
00503
00504
00505
00506
00507