sse.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
#ifndef _SSE_H
00037
#define _SSE_H
00038
00039
00040
00041
00042
00043
00044
00045
00046 typedef union {
00047 float sf[4];
00048 }
__attribute__ ((aligned (16))) sse_t;
00049
00050
00051
00052
00053 inline extern
int
00054 mm_support(
void)
00055 {
00056
00057
00058
00059
00060
00061
00062
register int rval = 0;
00063
00064 __asm__ __volatile__ (
00065
00066
00067
"pushf\n\t"
00068
"popl %%eax\n\t"
00069
"movl %%eax, %%ecx\n\t"
00070
00071
00072
00073
"xorl $0x200000, %%eax\n\t"
00074
"push %%eax\n\t"
00075
"popf\n\t"
00076
00077
00078
"pushf\n\t"
00079
"popl %%eax\n\t"
00080
00081
00082
"xorl %%eax, %%ecx\n\t"
00083
"testl $0x200000, %%ecx\n\t"
00084
"jz NotSupported1\n\t"
00085
00086
00087
00088
00089
"movl $0, %%eax\n\t"
00090
"cpuid\n\t"
00091
00092
00093
"cmpl $0x756e6547, %%ebx\n\t"
00094
"jne TryAMD\n\t"
00095
"cmpl $0x49656e69, %%edx\n\t"
00096
"jne TryAMD\n\t"
00097
"cmpl $0x6c65746e, %%ecx\n"
00098
"jne TryAMD\n\t"
00099
"jmp Intel\n\t"
00100
00101
00102
"\nTryAMD:\n\t"
00103
"cmpl $0x68747541, %%ebx\n\t"
00104
"jne TryCyrix\n\t"
00105
"cmpl $0x69746e65, %%edx\n\t"
00106
"jne TryCyrix\n\t"
00107
"cmpl $0x444d4163, %%ecx\n"
00108
"jne TryCyrix\n\t"
00109
"jmp AMD\n\t"
00110
00111
00112
"\nTryCyrix:\n\t"
00113
"cmpl $0x69727943, %%ebx\n\t"
00114
"jne NotSupported2\n\t"
00115
"cmpl $0x736e4978, %%edx\n\t"
00116
"jne NotSupported3\n\t"
00117
"cmpl $0x64616574, %%ecx\n\t"
00118
"jne NotSupported4\n\t"
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
"cmpl $0x2, %%eax\n\t"
00132
"jne MMXtest\n\t"
00133
00134
00135
00136
"movl $0x80000001, %%eax\n\t"
00137
"cpuid\n\t"
00138
"testl $0x00800000, %%eax\n\t"
00139
"jz NotSupported5\n\t"
00140
"testl $0x01000000, %%eax\n\t"
00141
"jnz EMMXSupported\n\t"
00142
"movl $1, %0:\n\n\t"
00143
"jmp Return\n\n"
00144
"EMMXSupported:\n\t"
00145
"movl $3, %0:\n\n\t"
00146
"jmp Return\n\t"
00147
00148
00149
00150
"AMD:\n\t"
00151
00152
00153
"movl $0x80000000, %%eax\n\t"
00154
"cpuid\n\t"
00155
"cmpl $0x80000000, %%eax\n\t"
00156
"jl MMXtest\n\t"
00157
00158
00159
"movl $0x80000001, %%eax\n\t"
00160
"cpuid\n\t"
00161
"testl $0x00800000, %%edx\n\t"
00162
"jz NotSupported6\n\t"
00163
"testl $0x80000000, %%edx\n\t"
00164
"jnz ThreeDNowSupported\n\t"
00165
"movl $1, %0:\n\n\t"
00166
"jmp Return\n\n"
00167
"ThreeDNowSupported:\n\t"
00168
"movl $5, %0:\n\n\t"
00169
"jmp Return\n\t"
00170
00171
00172
00173
"Intel:\n\t"
00174
00175
00176
"SSEtest:\n\t"
00177
"movl $1, %%eax\n\t"
00178
"cpuid\n\t"
00179
"testl $0x02000000, %%edx\n\t"
00180
"jz MMXtest\n\t"
00181
"movl $9, %0:\n\n\t"
00182
"jmp Return\n\t"
00183
00184
00185
"MMXtest:\n\t"
00186
"movl $1, %%eax\n\t"
00187
"cpuid\n\t"
00188
"testl $0x00800000, %%edx\n\t"
00189
"jz NotSupported7\n\t"
00190
"movl $1, %0:\n\n\t"
00191
"jmp Return\n\t"
00192
00193
00194
"\nNotSupported1:\n\t"
00195
"#movl $101, %0:\n\n\t"
00196
"\nNotSupported2:\n\t"
00197
"#movl $102, %0:\n\n\t"
00198
"\nNotSupported3:\n\t"
00199
"#movl $103, %0:\n\n\t"
00200
"\nNotSupported4:\n\t"
00201
"#movl $104, %0:\n\n\t"
00202
"\nNotSupported5:\n\t"
00203
"#movl $105, %0:\n\n\t"
00204
"\nNotSupported6:\n\t"
00205
"#movl $106, %0:\n\n\t"
00206
"\nNotSupported7:\n\t"
00207
"#movl $107, %0:\n\n\t"
00208
"movl $0, %0:\n\n\t"
00209
00210
"Return:\n\t"
00211 :
"=a" (rval)
00212 :
00213 :
"eax",
"ebx",
"ecx",
"edx"
00214 );
00215
00216
00217
return(rval);
00218 }
00219
00220
00221
00222
inline extern int
00223 sse_ok(
void)
00224 {
00225
00226
return ( (
mm_support() & 0x8) >> 3 );
00227 }
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
#ifdef SSE_TRACE
00239
00240
00241
00242
00243
#include <stdio.h>
00244
00245
using namespace std;
00246
00247
00248
#define sse_i2r(op, imm, reg) \
00249
{ \
00250
sse_t sse_trace; \
00251
sse_trace.uq = (imm); \
00252
fprintf(stderr, #op "_i2r(" #imm "=0x%08x%08x, ", \
00253
sse_trace.d[1], sse_trace.d[0]); \
00254
__asm__ __volatile__ ("movq %%" #reg ", %0" \
00255
: "=X" (sse_trace) \
00256
: ); \
00257 fprintf(stderr, #reg "=0x%08x%08x) => ", \
00258 sse_trace.d[1], sse_trace.d[0]); \
00259 __asm__ __volatile__ (#op " %0, %%" #reg \
00260 : \
00261 : "X" (imm)); \
00262 __asm__ __volatile__ ("movq %%" #reg ", %0" \
00263 : "=X" (sse_trace) \
00264 : ); \
00265 fprintf(stderr, #reg "=0x%08x%08x\n", \
00266 sse_trace.d[1], sse_trace.d[0]); \
00267 }
00268
00269
#define sse_m2r(op, mem, reg) \
00270
{ \
00271
sse_t sse_trace; \
00272
sse_trace = (mem); \
00273
fprintf(stderr, #op "_m2r(" #mem "=0x%08x%08x, ", \
00274
sse_trace.d[1], sse_trace.d[0]); \
00275
__asm__ __volatile__ ("movq %%" #reg ", %0" \
00276
: "=X" (sse_trace) \
00277
: ); \
00278 fprintf(stderr, #reg "=0x%08x%08x) => ", \
00279 sse_trace.d[1], sse_trace.d[0]); \
00280 __asm__ __volatile__ (#op " %0, %%" #reg \
00281 : \
00282 : "X" (mem)); \
00283 __asm__ __volatile__ ("movq %%" #reg ", %0" \
00284 : "=X" (sse_trace) \
00285 : ); \
00286 fprintf(stderr, #reg "=0x%08x%08x\n", \
00287 sse_trace.d[1], sse_trace.d[0]); \
00288 }
00289
00290
#define sse_r2m(op, reg, mem) \
00291
{ \
00292
sse_t sse_trace; \
00293
__asm__ __volatile__ ("movq %%" #reg ", %0" \
00294
: "=X" (sse_trace) \
00295
: ); \
00296 fprintf(stderr, #op "_r2m(" #reg "=0x%08x%08x, ", \
00297 sse_trace.d[1], sse_trace.d[0]); \
00298 sse_trace = (mem); \
00299 fprintf(stderr, #mem "=0x%08x%08x) => ", \
00300 sse_trace.d[1], sse_trace.d[0]); \
00301 __asm__ __volatile__ (#op " %%" #reg ", %0" \
00302 : "=X" (mem) \
00303 : ); \
00304 sse_trace = (mem); \
00305 fprintf(stderr, #mem "=0x%08x%08x\n", \
00306 sse_trace.d[1], sse_trace.d[0]); \
00307 }
00308
00309
#define sse_r2r(op, regs, regd) \
00310
{ \
00311
sse_t sse_trace; \
00312
__asm__ __volatile__ ("movq %%" #regs ", %0" \
00313
: "=X" (sse_trace) \
00314
: ); \
00315 fprintf(stderr, #op "_r2r(" #regs "=0x%08x%08x, ", \
00316 sse_trace.d[1], sse_trace.d[0]); \
00317 __asm__ __volatile__ ("movq %%" #regd ", %0" \
00318 : "=X" (sse_trace) \
00319 : ); \
00320 fprintf(stderr, #regd "=0x%08x%08x) => ", \
00321 sse_trace.d[1], sse_trace.d[0]); \
00322 __asm__ __volatile__ (#op " %" #regs ", %" #regd); \
00323 __asm__ __volatile__ ("movq %%" #regd ", %0" \
00324 : "=X" (sse_trace) \
00325 : ); \
00326 fprintf(stderr, #regd "=0x%08x%08x\n", \
00327 sse_trace.d[1], sse_trace.d[0]); \
00328 }
00329
00330
#define sse_m2m(op, mems, memd) \
00331
{ \
00332
sse_t sse_trace; \
00333
sse_trace = (mems); \
00334
fprintf(stderr, #op "_m2m(" #mems "=0x%08x%08x, ", \
00335
sse_trace.d[1], sse_trace.d[0]); \
00336
sse_trace = (memd); \
00337
fprintf(stderr, #memd "=0x%08x%08x) => ", \
00338
sse_trace.d[1], sse_trace.d[0]); \
00339
__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
00340
#op " %1, %%mm0\n\t" \
00341
"movq %%mm0, %0" \
00342
: "=X" (memd) \
00343
: "X" (mems)); \
00344
sse_trace = (memd); \
00345
fprintf(stderr, #memd "=0x%08x%08x\n", \
00346
sse_trace.d[1], sse_trace.d[0]); \
00347
}
00348
00349
#else
00350
00351
00352
00353
00354 #define sse_i2r(op, imm, reg) \
00355
__asm__ __volatile__ (#op " %0, %%" #reg \
00356
: \
00357 : "X" (imm) )
00358
00359 #define sse_m2r(op, mem, reg) \
00360
__asm__ __volatile__ (#op " %0, %%" #reg \
00361
: \
00362 : "X" (mem))
00363
00364 #define sse_r2m(op, reg, mem) \
00365
__asm__ __volatile__ (#op " %%" #reg ", %0" \
00366
: "=X" (mem) \
00367
: )
00368
00369 #define sse_r2r(op, regs, regd) \
00370
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
00371
00372 #define sse_r2ri(op, regs, regd, imm) \
00373
__asm__ __volatile__ (#op " %%" #regs ", %%" #regd ", %0" \
00374
: \
00375 : "X" (imm) )
00376
00377
00378 #define sse_m2m(op, mems, memd, xmmreg) \
00379
__asm__ __volatile__ ("movups %0, %%xmm0\n\t" \
00380
#op " %1, %%xmm0\n\t" \
00381
"movups %%xmm0, %0" \
00382
: "=X" (memd) \
00383
: "X" (mems))
00384
00385 #define sse_m2ri(op, mem, reg, subop) \
00386
__asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \
00387
: \
00388 : "X" (mem))
00389
00390 #define sse_m2mi(op, mems, memd, xmmreg, subop) \
00391
__asm__ __volatile__ ("movups %0, %%xmm0\n\t" \
00392
#op " %1, %%xmm0, " #subop "\n\t" \
00393
"movups %%mm0, %0" \
00394
: "=X" (memd) \
00395
: "X" (mems))
00396
#endif
00397
00398
00399
00400
00401
00402
00403 #define movaps_m2r(var, reg) sse_m2r(movaps, var, reg)
00404 #define movaps_r2m(reg, var) sse_r2m(movaps, reg, var)
00405 #define movaps_r2r(regs, regd) sse_r2r(movaps, regs, regd)
00406 #define movaps(vars, vard) \
00407
__asm__ __volatile__ ("movaps %1, %%mm0\n\t" \
00408
"movaps %%mm0, %0" \
00409
: "=X" (vard) \
00410
: "X" (vars))
00411
00412
00413
00414
00415 #define movntps_r2m(xmmreg, var) sse_r2m(movntps, xmmreg, var)
00416
00417
00418
00419
00420 #define movntq_r2m(mmreg, var) sse_r2m(movntq, mmreg, var)
00421
00422
00423
00424
00425 #define movups_m2r(var, reg) sse_m2r(movups, var, reg)
00426 #define movups_r2m(reg, var) sse_r2m(movups, reg, var)
00427 #define movups_r2r(regs, regd) sse_r2r(movups, regs, regd)
00428 #define movups(vars, vard) \
00429
__asm__ __volatile__ ("movups %1, %%mm0\n\t" \
00430
"movups %%mm0, %0" \
00431
: "=X" (vard) \
00432
: "X" (vars))
00433
00434
00435
00436
00437
00438 #define movhlps_r2r(regs, regd) sse_r2r(movhlps, regs, regd)
00439
00440
00441
00442
00443
00444 #define movlhps_r2r(regs, regd) sse_r2r(movlhps, regs, regd)
00445
00446
00447
00448
00449
00450 #define movhps_m2r(var, reg) sse_m2r(movhps, var, reg)
00451 #define movhps_r2m(reg, var) sse_r2m(movhps, reg, var)
00452 #define movhps(vars, vard) \
00453
__asm__ __volatile__ ("movhps %1, %%mm0\n\t" \
00454
"movhps %%mm0, %0" \
00455
: "=X" (vard) \
00456
: "X" (vars))
00457
00458
00459
00460
00461
00462 #define movlps_m2r(var, reg) sse_m2r(movlps, var, reg)
00463 #define movlps_r2m(reg, var) sse_r2m(movlps, reg, var)
00464 #define movlps(vars, vard) \
00465
__asm__ __volatile__ ("movlps %1, %%mm0\n\t" \
00466
"movlps %%mm0, %0" \
00467
: "=X" (vard) \
00468
: "X" (vars))
00469
00470
00471
00472
00473
00474 #define movss_m2r(var, reg) sse_m2r(movss, var, reg)
00475 #define movss_r2m(reg, var) sse_r2m(movss, reg, var)
00476 #define movss_r2r(regs, regd) sse_r2r(movss, regs, regd)
00477 #define movss(vars, vard) \
00478
__asm__ __volatile__ ("movss %1, %%mm0\n\t" \
00479
"movss %%mm0, %0" \
00480
: "=X" (vard) \
00481
: "X" (vars))
00482
00483
00484
00485
00486 #define pshufw_m2r(var, reg, index) sse_m2ri(pshufw, var, reg, index)
00487 #define pshufw_r2r(regs, regd, index) sse_r2ri(pshufw, regs, regd, index)
00488
00489
00490
00491
00492 #define shufps_m2r(var, reg, index) sse_m2ri(shufps, var, reg, index)
00493 #define shufps_r2r(regs, regd, index) sse_r2ri(shufps, regs, regd, index)
00494
00495
00496
00497
00498 #define cvtpi2ps_m2r(var, xmmreg) sse_m2r(cvtpi2ps, var, xmmreg)
00499 #define cvtpi2ps_r2r(mmreg, xmmreg) sse_r2r(cvtpi2ps, mmreg, xmmreg)
00500
00501
00502
00503
00504 #define cvtps2pi_m2r(var, mmreg) sse_m2r(cvtps2pi, var, mmreg)
00505 #define cvtps2pi_r2r(xmmreg, mmreg) sse_r2r(cvtps2pi, mmreg, xmmreg)
00506
00507
00508
00509
00510 #define cvttps2pi_m2r(var, mmreg) sse_m2r(cvttps2pi, var, mmreg)
00511 #define cvttps2pi_r2r(xmmreg, mmreg) sse_r2r(cvttps2pi, mmreg, xmmreg)
00512
00513
00514
00515
00516 #define cvtsi2ss_m2r(var, xmmreg) sse_m2r(cvtsi2ss, var, xmmreg)
00517 #define cvtsi2ss_r2r(reg, xmmreg) sse_r2r(cvtsi2ss, reg, xmmreg)
00518
00519
00520
00521
00522 #define cvtss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg)
00523 #define cvtss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg)
00524
00525
00526
00527
00528 #define cvttss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg)
00529 #define cvttss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg)
00530
00531
00532
00533
00534 #define pextrw_r2r(mmreg, reg, field) sse_r2ri(pextrw, mmreg, reg, field)
00535
00536
00537
00538
00539 #define pinsrw_r2r(reg, mmreg, field) sse_r2ri(pinsrw, reg, mmreg, field)
00540
00541
00542
00543
00544
00545
#ifdef SSE_TRACE
00546
#define movmskps(xmmreg, reg) \
00547
{ \
00548
fprintf(stderr, "movmskps()\n"); \
00549
__asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg) \
00550
}
00551
#else
00552 #define movmskps(xmmreg, reg) \
00553
__asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg)
00554
#endif
00555
00556
00557
00558
00559
#ifdef SSE_TRACE
00560
#define pmovmskb(mmreg, reg) \
00561
{ \
00562
fprintf(stderr, "movmskps()\n"); \
00563
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) \
00564
}
00565
#else
00566 #define pmovmskb(mmreg, reg) \
00567
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
00568
#endif
00569
00570
00571
00572
00573 #define maskmovq(mmregs, fieldreg) sse_r2ri(maskmovq, mmregs, fieldreg)
00574
00575
00576
00577
00578
00579
00580 #define addps_m2r(var, reg) sse_m2r(addps, var, reg)
00581 #define addps_r2r(regs, regd) sse_r2r(addps, regs, regd)
00582 #define addps(vars, vard, xmmreg) sse_m2m(addps, vars, vard, xmmreg)
00583
00584
00585
00586
00587 #define addss_m2r(var, reg) sse_m2r(addss, var, reg)
00588 #define addss_r2r(regs, regd) sse_r2r(addss, regs, regd)
00589 #define addss(vars, vard, xmmreg) sse_m2m(addss, vars, vard, xmmreg)
00590
00591
00592
00593
00594 #define subps_m2r(var, reg) sse_m2r(subps, var, reg)
00595 #define subps_r2r(regs, regd) sse_r2r(subps, regs, regd)
00596 #define subps(vars, vard, xmmreg) sse_m2m(subps, vars, vard, xmmreg)
00597
00598
00599
00600
00601 #define subss_m2r(var, reg) sse_m2r(subss, var, reg)
00602 #define subss_r2r(regs, regd) sse_r2r(subss, regs, regd)
00603 #define subss(vars, vard, xmmreg) sse_m2m(subss, vars, vard, xmmreg)
00604
00605
00606
00607
00608 #define psadbw_m2r(var, reg) sse_m2r(psadbw, var, reg)
00609 #define psadbw_r2r(regs, regd) sse_r2r(psadbw, regs, regd)
00610 #define psadbw(vars, vard, mmreg) sse_m2m(psadbw, vars, vard, mmreg)
00611
00612
00613
00614
00615 #define pmulhuw_m2r(var, reg) sse_m2r(pmulhuw, var, reg)
00616 #define pmulhuw_r2r(regs, regd) sse_r2r(pmulhuw, regs, regd)
00617 #define pmulhuw(vars, vard, mmreg) sse_m2m(pmulhuw, vars, vard, mmreg)
00618
00619
00620
00621
00622 #define mulps_m2r(var, reg) sse_m2r(mulps, var, reg)
00623 #define mulps_r2r(regs, regd) sse_r2r(mulps, regs, regd)
00624 #define mulps(vars, vard, xmmreg) sse_m2m(mulps, vars, vard, xmmreg)
00625
00626
00627
00628
00629 #define mulss_m2r(var, reg) sse_m2r(mulss, var, reg)
00630 #define mulss_r2r(regs, regd) sse_r2r(mulss, regs, regd)
00631 #define mulss(vars, vard, xmmreg) sse_m2m(mulss, vars, vard, xmmreg)
00632
00633
00634
00635
00636 #define divps_m2r(var, reg) sse_m2r(divps, var, reg)
00637 #define divps_r2r(regs, regd) sse_r2r(divps, regs, regd)
00638 #define divps(vars, vard, xmmreg) sse_m2m(divps, vars, vard, xmmreg)
00639
00640
00641
00642
00643 #define divss_m2r(var, reg) sse_m2r(divss, var, reg)
00644 #define divss_r2r(regs, regd) sse_r2r(divss, regs, regd)
00645 #define divss(vars, vard, xmmreg) sse_m2m(divss, vars, vard, xmmreg)
00646
00647
00648
00649
00650 #define rcpps_m2r(var, reg) sse_m2r(rcpps, var, reg)
00651 #define rcpps_r2r(regs, regd) sse_r2r(rcpps, regs, regd)
00652 #define rcpps(vars, vard, xmmreg) sse_m2m(rcpps, vars, vard, xmmreg)
00653
00654
00655
00656
00657 #define rcpss_m2r(var, reg) sse_m2r(rcpss, var, reg)
00658 #define rcpss_r2r(regs, regd) sse_r2r(rcpss, regs, regd)
00659 #define rcpss(vars, vard, xmmreg) sse_m2m(rcpss, vars, vard, xmmreg)
00660
00661
00662
00663
00664 #define rsqrtps_m2r(var, reg) sse_m2r(rsqrtps, var, reg)
00665 #define rsqrtps_r2r(regs, regd) sse_r2r(rsqrtps, regs, regd)
00666 #define rsqrtps(vars, vard, xmmreg) sse_m2m(rsqrtps, vars, vard, xmmreg)
00667
00668
00669
00670
00671 #define rsqrtss_m2r(var, reg) sse_m2r(rsqrtss, var, reg)
00672 #define rsqrtss_r2r(regs, regd) sse_r2r(rsqrtss, regs, regd)
00673 #define rsqrtss(vars, vard, xmmreg) sse_m2m(rsqrtss, vars, vard, xmmreg)
00674
00675
00676
00677
00678 #define sqrtps_m2r(var, reg) sse_m2r(sqrtps, var, reg)
00679 #define sqrtps_r2r(regs, regd) sse_r2r(sqrtps, regs, regd)
00680 #define sqrtps(vars, vard, xmmreg) sse_m2m(sqrtps, vars, vard, xmmreg)
00681
00682
00683
00684
00685 #define sqrtss_m2r(var, reg) sse_m2r(sqrtss, var, reg)
00686 #define sqrtss_r2r(regs, regd) sse_r2r(sqrtss, regs, regd)
00687 #define sqrtss(vars, vard, xmmreg) sse_m2m(sqrtss, vars, vard, xmmreg)
00688
00689
00690
00691
00692 #define pavgb_m2r(var, reg) sse_m2r(pavgb, var, reg)
00693 #define pavgb_r2r(regs, regd) sse_r2r(pavgb, regs, regd)
00694 #define pavgb(vars, vard, mmreg) sse_m2m(pavgb, vars, vard, mmreg)
00695
00696 #define pavgw_m2r(var, reg) sse_m2r(pavgw, var, reg)
00697 #define pavgw_r2r(regs, regd) sse_r2r(pavgw, regs, regd)
00698 #define pavgw(vars, vard, mmreg) sse_m2m(pavgw, vars, vard, mmreg)
00699
00700
00701
00702
00703 #define andps_m2r(var, reg) sse_m2r(andps, var, reg)
00704 #define andps_r2r(regs, regd) sse_r2r(andps, regs, regd)
00705 #define andps(vars, vard, xmmreg) sse_m2m(andps, vars, vard, xmmreg)
00706
00707
00708
00709
00710 #define andnps_m2r(var, reg) sse_m2r(andnps, var, reg)
00711 #define andnps_r2r(regs, regd) sse_r2r(andnps, regs, regd)
00712 #define andnps(vars, vard, xmmreg) sse_m2m(andnps, vars, vard, xmmreg)
00713
00714
00715
00716
00717 #define orps_m2r(var, reg) sse_m2r(orps, var, reg)
00718 #define orps_r2r(regs, regd) sse_r2r(orps, regs, regd)
00719 #define orps(vars, vard, xmmreg) sse_m2m(orps, vars, vard, xmmreg)
00720
00721
00722
00723
00724 #define xorps_m2r(var, reg) sse_m2r(xorps, var, reg)
00725 #define xorps_r2r(regs, regd) sse_r2r(xorps, regs, regd)
00726 #define xorps(vars, vard, xmmreg) sse_m2m(xorps, vars, vard, xmmreg)
00727
00728
00729
00730
00731 #define pmaxub_m2r(var, reg) sse_m2r(pmaxub, var, reg)
00732 #define pmaxub_r2r(regs, regd) sse_r2r(pmaxub, regs, regd)
00733 #define pmaxub(vars, vard, mmreg) sse_m2m(pmaxub, vars, vard, mmreg)
00734
00735 #define pmaxsw_m2r(var, reg) sse_m2r(pmaxsw, var, reg)
00736 #define pmaxsw_r2r(regs, regd) sse_r2r(pmaxsw, regs, regd)
00737 #define pmaxsw(vars, vard, mmreg) sse_m2m(pmaxsw, vars, vard, mmreg)
00738
00739 #define maxps_m2r(var, reg) sse_m2r(maxps, var, reg)
00740 #define maxps_r2r(regs, regd) sse_r2r(maxps, regs, regd)
00741 #define maxps(vars, vard, xmmreg) sse_m2m(maxps, vars, vard, xmmreg)
00742
00743
00744
00745
00746 #define maxss_m2r(var, reg) sse_m2r(maxss, var, reg)
00747 #define maxss_r2r(regs, regd) sse_r2r(maxss, regs, regd)
00748 #define maxss(vars, vard, xmmreg) sse_m2m(maxss, vars, vard, xmmreg)
00749
00750
00751
00752
00753 #define pminub_m2r(var, reg) sse_m2r(pminub, var, reg)
00754 #define pminub_r2r(regs, regd) sse_r2r(pminub, regs, regd)
00755 #define pminub(vars, vard, mmreg) sse_m2m(pminub, vars, vard, mmreg)
00756
00757 #define pminsw_m2r(var, reg) sse_m2r(pminsw, var, reg)
00758 #define pminsw_r2r(regs, regd) sse_r2r(pminsw, regs, regd)
00759 #define pminsw(vars, vard, mmreg) sse_m2m(pminsw, vars, vard, mmreg)
00760
00761 #define minps_m2r(var, reg) sse_m2r(minps, var, reg)
00762 #define minps_r2r(regs, regd) sse_r2r(minps, regs, regd)
00763 #define minps(vars, vard, xmmreg) sse_m2m(minps, vars, vard, xmmreg)
00764
00765
00766
00767
00768 #define minss_m2r(var, reg) sse_m2r(minss, var, reg)
00769 #define minss_r2r(regs, regd) sse_r2r(minss, regs, regd)
00770 #define minss(vars, vard, xmmreg) sse_m2m(minss, vars, vard, xmmreg)
00771
00772
00773
00774
00775
00776 #define cmpps_m2r(var, reg, op) sse_m2ri(cmpps, var, reg, op)
00777 #define cmpps_r2r(regs, regd, op) sse_r2ri(cmpps, regs, regd, op)
00778 #define cmpps(vars, vard, op, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, op)
00779
00780 #define cmpeqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 0)
00781 #define cmpeqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 0)
00782 #define cmpeqps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 0)
00783
00784 #define cmpltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 1)
00785 #define cmpltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 1)
00786 #define cmpltps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 1)
00787
00788 #define cmpleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 2)
00789 #define cmpleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 2)
00790 #define cmpleps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 2)
00791
00792 #define cmpunordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 3)
00793 #define cmpunordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 3)
00794 #define cmpunordps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 3)
00795
00796 #define cmpneqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 4)
00797 #define cmpneqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 4)
00798 #define cmpneqps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 4)
00799
00800 #define cmpnltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 5)
00801 #define cmpnltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 5)
00802 #define cmpnltps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 5)
00803
00804 #define cmpnleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 6)
00805 #define cmpnleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 6)
00806 #define cmpnleps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 6)
00807
00808 #define cmpordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 7)
00809 #define cmpordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 7)
00810 #define cmpordps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 7)
00811
00812
00813
00814
00815
00816 #define cmpss_m2r(var, reg, op) sse_m2ri(cmpss, var, reg, op)
00817 #define cmpss_r2r(regs, regd, op) sse_r2ri(cmpss, regs, regd, op)
00818 #define cmpss(vars, vard, op, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, op)
00819
00820 #define cmpeqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 0)
00821 #define cmpeqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 0)
00822 #define cmpeqss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 0)
00823
00824 #define cmpltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 1)
00825 #define cmpltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 1)
00826 #define cmpltss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 1)
00827
00828 #define cmpless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 2)
00829 #define cmpless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 2)
00830 #define cmpless(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 2)
00831
00832 #define cmpunordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 3)
00833 #define cmpunordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 3)
00834 #define cmpunordss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 3)
00835
00836 #define cmpneqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 4)
00837 #define cmpneqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 4)
00838 #define cmpneqss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 4)
00839
00840 #define cmpnltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 5)
00841 #define cmpnltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 5)
00842 #define cmpnltss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 5)
00843
00844 #define cmpnless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 6)
00845 #define cmpnless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 6)
00846 #define cmpnless(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 6)
00847
00848 #define cmpordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 7)
00849 #define cmpordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 7)
00850 #define cmpordss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 7)
00851
00852
00853
00854
00855
00856 #define comiss_m2r(var, reg) sse_m2r(comiss, var, reg)
00857 #define comiss_r2r(regs, regd) sse_r2r(comiss, regs, regd)
00858 #define comiss(vars, vard, xmmreg) sse_m2m(comiss, vars, vard, xmmreg)
00859
00860
00861
00862
00863
00864 #define ucomiss_m2r(var, reg) sse_m2r(ucomiss, var, reg)
00865 #define ucomiss_r2r(regs, regd) sse_r2r(ucomiss, regs, regd)
00866 #define ucomiss(vars, vard, xmmreg) sse_m2m(ucomiss, vars, vard, xmmreg)
00867
00868
00869
00870
00871
00872
00873 #define unpcklps_m2r(var, reg) sse_m2r(unpcklps, var, reg)
00874 #define unpcklps_r2r(regs, regd) sse_r2r(unpcklps, regs, regd)
00875
00876
00877
00878
00879
00880
00881 #define unpckhps_m2r(var, reg) sse_m2r(unpckhps, var, reg)
00882 #define unpckhps_r2r(regs, regd) sse_r2r(unpckhps, regs, regd)
00883
00884
00885
00886
00887
00888
#ifdef SSE_TRACE
00889
#define fxrstor(mem) \
00890
{ \
00891
fprintf(stderr, "fxrstor()\n"); \
00892
__asm__ __volatile__ ("fxrstor %0" \
00893
: \
00894 : "X" (mem)) \
00895 }
00896
#else
00897 #define fxrstor(mem) \
00898
__asm__ __volatile__ ("fxrstor %0" \
00899
: \
00900 : "X" (mem))
00901
#endif
00902
00903
00904
00905
00906
#ifdef SSE_TRACE
00907
#define fxsave(mem) \
00908
{ \
00909
fprintf(stderr, "fxsave()\n"); \
00910
__asm__ __volatile__ ("fxsave %0" \
00911
: \
00912 : "X" (mem)) \
00913 }
00914
#else
00915 #define fxsave(mem) \
00916
__asm__ __volatile__ ("fxsave %0" \
00917
: \
00918 : "X" (mem))
00919
#endif
00920
00921
00922
00923
00924
#ifdef SSE_TRACE
00925
#define stmxcsr(mem) \
00926
{ \
00927
fprintf(stderr, "stmxcsr()\n"); \
00928
__asm__ __volatile__ ("stmxcsr %0" \
00929
: \
00930 : "X" (mem)) \
00931 }
00932
#else
00933 #define stmxcsr(mem) \
00934
__asm__ __volatile__ ("stmxcsr %0" \
00935
: \
00936 : "X" (mem))
00937
#endif
00938
00939
00940
00941
00942
#ifdef SSE_TRACE
00943
#define ldmxcsr(mem) \
00944
{ \
00945
fprintf(stderr, "ldmxcsr()\n"); \
00946
__asm__ __volatile__ ("ldmxcsr %0" \
00947
: \
00948 : "X" (mem)) \
00949 }
00950
#else
00951 #define ldmxcsr(mem) \
00952
__asm__ __volatile__ ("ldmxcsr %0" \
00953
: \
00954 : "X" (mem))
00955
#endif
00956
00957
00958
00959
00960
00961
#ifdef SSE_TRACE
00962
#define sfence() \
00963
{ \
00964
fprintf(stderr, "sfence()\n"); \
00965
__asm__ __volatile__ ("sfence\n\t") \
00966
}
00967
#else
00968 #define sfence() \
00969
__asm__ __volatile__ ("sfence\n\t")
00970
#endif
00971
00972
00973
00974
00975
00976
00977
00978
00979
#ifdef SSE_TRACE
00980
#else
00981 #define prefetch(mem, hint) \
00982
__asm__ __volatile__ ("prefetch" #hint " %0" \
00983
: \
00984 : "X" (mem))
00985
00986 #define prefetcht0(mem) prefetch(mem, t0)
00987 #define prefetcht1(mem) prefetch(mem, t1)
00988 #define prefetcht2(mem) prefetch(mem, t2)
00989 #define prefetchnta(mem) prefetch(mem, nta)
00990
#endif
00991
00992
00993
00994
00995
00996
#endif
00997
Generated on Tue Aug 17 16:06:40 2004 for PLearn by
1.3.7