1/* Cell BEA specific SPU intrinsics to PPU/VMX intrinsics 2 Copyright (C) 2007-2020 Free Software Foundation, Inc. 3 4 This file is free software; you can redistribute it and/or modify it under 5 the terms of the GNU General Public License as published by the Free 6 Software Foundation; either version 3 of the License, or (at your option) 7 any later version. 8 9 This file is distributed in the hope that it will be useful, but WITHOUT 10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 for more details. 13 14 Under Section 7 of GPL version 3, you are granted additional 15 permissions described in the GCC Runtime Library Exception, version 16 3.1, as published by the Free Software Foundation. 17 18 You should have received a copy of the GNU General Public License and 19 a copy of the GCC Runtime Library Exception along with this program; 20 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 21 <http://www.gnu.org/licenses/>. */ 22 23#ifndef _SI2VMX_H_ 24#define _SI2VMX_H_ 1 25 26#ifndef __SPU__ 27 28#include <stdlib.h> 29#include <vec_types.h> 30 31 32/* Specify a default halt action for spu_hcmpeq and spu_hcmpgt intrinsics. 33 * Users can override the action by defining it prior to including this 34 * header file. 35 */ 36#ifndef SPU_HALT_ACTION 37#define SPU_HALT_ACTION abort() 38#endif 39 40/* Specify a default stop action for the spu_stop intrinsic. 41 * Users can override the action by defining it prior to including this 42 * header file. 43 */ 44#ifndef SPU_STOP_ACTION 45#define SPU_STOP_ACTION abort() 46#endif 47 48 49/* Specify a default action for unsupported intrinsic. 50 * Users can override the action by defining it prior to including this 51 * header file. 52 */ 53#ifndef SPU_UNSUPPORTED_ACTION 54#define SPU_UNSUPPORTED_ACTION abort() 55#endif 56 57 58/* Casting intrinsics - from scalar to quadword 59 */ 60 61static __inline qword si_from_uchar(unsigned char c) { 62 union { 63 qword q; 64 unsigned char c[16]; 65 } x; 66 x.c[3] = c; 67 return (x.q); 68} 69 70static __inline qword si_from_char(signed char c) { 71 union { 72 qword q; 73 signed char c[16]; 74 } x; 75 x.c[3] = c; 76 return (x.q); 77} 78 79static __inline qword si_from_ushort(unsigned short s) { 80 union { 81 qword q; 82 unsigned short s[8]; 83 } x; 84 x.s[1] = s; 85 return (x.q); 86} 87 88static __inline qword si_from_short(short s) { 89 union { 90 qword q; 91 short s[8]; 92 } x; 93 x.s[1] = s; 94 return (x.q); 95} 96 97 98static __inline qword si_from_uint(unsigned int i) { 99 union { 100 qword q; 101 unsigned int i[4]; 102 } x; 103 x.i[0] = i; 104 return (x.q); 105} 106 107static __inline qword si_from_int(int i) { 108 union { 109 qword q; 110 int i[4]; 111 } x; 112 x.i[0] = i; 113 return (x.q); 114} 115 116static __inline qword si_from_ullong(unsigned long long l) { 117 union { 118 qword q; 119 unsigned long long l[2]; 120 } x; 121 x.l[0] = l; 122 return (x.q); 123} 124 125static __inline qword si_from_llong(long long l) { 126 union { 127 qword q; 128 long long l[2]; 129 } x; 130 x.l[0] = l; 131 return (x.q); 132} 133 134static __inline qword si_from_float(float f) { 135 union { 136 qword q; 137 float f[4]; 138 } x; 139 x.f[0] = f; 140 return (x.q); 141} 142 143static __inline qword si_from_double(double d) { 144 union { 145 qword q; 146 double d[2]; 147 } x; 148 x.d[0] = d; 149 return (x.q); 150} 151 152static __inline qword si_from_ptr(void *ptr) { 153 union { 154 qword q; 155 void *p; 156 } x; 157 x.p = ptr; 158 return (x.q); 159} 160 161 162/* Casting intrinsics - from quadword to scalar 163 */ 164static __inline unsigned char si_to_uchar(qword q) { 165 union { 166 qword q; 167 unsigned char c[16]; 168 } x; 169 x.q = q; 170 return (x.c[3]); 171} 172 173static __inline signed char si_to_char(qword q) { 174 union { 175 qword q; 176 signed char c[16]; 177 } x; 178 x.q = q; 179 return (x.c[3]); 180} 181 182static __inline unsigned short si_to_ushort(qword q) { 183 union { 184 qword q; 185 unsigned short s[8]; 186 } x; 187 x.q = q; 188 return (x.s[1]); 189} 190 191static __inline short si_to_short(qword q) { 192 union { 193 qword q; 194 short s[8]; 195 } x; 196 x.q = q; 197 return (x.s[1]); 198} 199 200static __inline unsigned int si_to_uint(qword q) { 201 union { 202 qword q; 203 unsigned int i[4]; 204 } x; 205 x.q = q; 206 return (x.i[0]); 207} 208 209static __inline int si_to_int(qword q) { 210 union { 211 qword q; 212 int i[4]; 213 } x; 214 x.q = q; 215 return (x.i[0]); 216} 217 218static __inline unsigned long long si_to_ullong(qword q) { 219 union { 220 qword q; 221 unsigned long long l[2]; 222 } x; 223 x.q = q; 224 return (x.l[0]); 225} 226 227static __inline long long si_to_llong(qword q) { 228 union { 229 qword q; 230 long long l[2]; 231 } x; 232 x.q = q; 233 return (x.l[0]); 234} 235 236static __inline float si_to_float(qword q) { 237 union { 238 qword q; 239 float f[4]; 240 } x; 241 x.q = q; 242 return (x.f[0]); 243} 244 245static __inline double si_to_double(qword q) { 246 union { 247 qword q; 248 double d[2]; 249 } x; 250 x.q = q; 251 return (x.d[0]); 252} 253 254static __inline void * si_to_ptr(qword q) { 255 union { 256 qword q; 257 void *p; 258 } x; 259 x.q = q; 260 return (x.p); 261} 262 263 264/* Absolute difference 265 */ 266static __inline qword si_absdb(qword a, qword b) 267{ 268 vec_uchar16 ac, bc, dc; 269 270 ac = (vec_uchar16)(a); 271 bc = (vec_uchar16)(b); 272 dc = vec_sel(vec_sub(bc, ac), vec_sub(ac, bc), vec_cmpgt(ac, bc)); 273 274 return ((qword)(dc)); 275} 276 277/* Add intrinsics 278 */ 279#define si_a(_a, _b) ((qword)(vec_add((vec_uint4)(_a), (vec_uint4)(_b)))) 280 281#define si_ah(_a, _b) ((qword)(vec_add((vec_ushort8)(_a), (vec_ushort8)(_b)))) 282 283static __inline qword si_ai(qword a, int b) 284{ 285 return ((qword)(vec_add((vec_int4)(a), 286 vec_splat((vec_int4)(si_from_int(b)), 0)))); 287} 288 289 290static __inline qword si_ahi(qword a, short b) 291{ 292 return ((qword)(vec_add((vec_short8)(a), 293 vec_splat((vec_short8)(si_from_short(b)), 1)))); 294} 295 296 297#define si_fa(_a, _b) ((qword)(vec_add((vec_float4)(_a), (vec_float4)(_b)))) 298 299 300static __inline qword si_dfa(qword a, qword b) 301{ 302 union { 303 vec_double2 v; 304 double d[2]; 305 } ad, bd, dd; 306 307 ad.v = (vec_double2)(a); 308 bd.v = (vec_double2)(b); 309 dd.d[0] = ad.d[0] + bd.d[0]; 310 dd.d[1] = ad.d[1] + bd.d[1]; 311 312 return ((qword)(dd.v)); 313} 314 315/* Add word extended 316 */ 317#define si_addx(_a, _b, _c) ((qword)(vec_add(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), \ 318 vec_and((vec_uint4)(_c), vec_splat_u32(1))))) 319 320 321/* Bit-wise AND 322 */ 323#define si_and(_a, _b) ((qword)(vec_and((vec_uint4)(_a), (vec_uint4)(_b)))) 324 325 326static __inline qword si_andbi(qword a, signed char b) 327{ 328 return ((qword)(vec_and((vec_char16)(a), 329 vec_splat((vec_char16)(si_from_char(b)), 3)))); 330} 331 332static __inline qword si_andhi(qword a, signed short b) 333{ 334 return ((qword)(vec_and((vec_short8)(a), 335 vec_splat((vec_short8)(si_from_short(b)), 1)))); 336} 337 338 339static __inline qword si_andi(qword a, signed int b) 340{ 341 return ((qword)(vec_and((vec_int4)(a), 342 vec_splat((vec_int4)(si_from_int(b)), 0)))); 343} 344 345 346/* Bit-wise AND with complement 347 */ 348#define si_andc(_a, _b) ((qword)(vec_andc((vec_uchar16)(_a), (vec_uchar16)(_b)))) 349 350 351/* Average byte vectors 352 */ 353#define si_avgb(_a, _b) ((qword)(vec_avg((vec_uchar16)(_a), (vec_uchar16)(_b)))) 354 355 356/* Branch indirect and set link on external data 357 */ 358#define si_bisled(_func) /* not mappable */ 359#define si_bisledd(_func) /* not mappable */ 360#define si_bislede(_func) /* not mappable */ 361 362 363/* Borrow generate 364 */ 365#define si_bg(_a, _b) ((qword)(vec_subc((vec_uint4)(_b), (vec_uint4)(_a)))) 366 367#define si_bgx(_a, _b, _c) ((qword)(vec_and(vec_or(vec_cmpgt((vec_uint4)(_b), (vec_uint4)(_a)), \ 368 vec_and(vec_cmpeq((vec_uint4)(_b), (vec_uint4)(_a)), \ 369 (vec_uint4)(_c))), vec_splat_u32(1)))) 370 371/* Compare absolute equal 372 */ 373static __inline qword si_fcmeq(qword a, qword b) 374{ 375 vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000}); 376 377 return ((qword)(vec_cmpeq(vec_andc((vec_float4)(a), msb), 378 vec_andc((vec_float4)(b), msb)))); 379} 380 381static __inline qword si_dfcmeq(qword a, qword b) 382{ 383 vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; 384 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 }; 385 vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3, 16,17,18,19, 8,9,10,11, 24,25,26,27}; 386 387 vec_uint4 biteq; 388 vec_uint4 aabs; 389 vec_uint4 babs; 390 vec_uint4 a_gt; 391 vec_uint4 ahi_inf; 392 vec_uint4 anan; 393 vec_uint4 result; 394 395 union { 396 vec_uchar16 v; 397 int i[4]; 398 } x; 399 400 /* Shift 4 bytes */ 401 x.i[3] = 4 << 3; 402 403 /* Mask out sign bits */ 404 aabs = vec_and((vec_uint4)a,sign_mask); 405 babs = vec_and((vec_uint4)b,sign_mask); 406 407 /* A) Check for bit equality, store in high word */ 408 biteq = (vec_uint4) vec_cmpeq((vec_uint4)aabs,(vec_uint4)babs); 409 biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v)); 410 411 /* 412 B) Check if a is NaN, store in high word 413 414 B1) If the high word is greater than max_exp (indicates a NaN) 415 B2) If the low word is greater than 0 416 */ 417 a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask); 418 419 /* B3) Check if the high word is equal to the inf exponent */ 420 ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask); 421 422 /* anan = B1[hi] or (B2[lo] and B3[hi]) */ 423 anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf)); 424 425 /* result = A and not B */ 426 result = vec_andc(biteq, anan); 427 428 /* Promote high words to 64 bits and return */ 429 return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote))); 430} 431 432 433/* Compare absolute greater than 434 */ 435static __inline qword si_fcmgt(qword a, qword b) 436{ 437 vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000}); 438 439 return ((qword)(vec_cmpgt(vec_andc((vec_float4)(a), msb), 440 vec_andc((vec_float4)(b), msb)))); 441} 442 443static __inline qword si_dfcmgt(qword a, qword b) 444{ 445 vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; 446 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 }; 447 vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; 448 449 union { 450 vec_uchar16 v; 451 int i[4]; 452 } x; 453 454 /* Shift 4 bytes */ 455 x.i[3] = 4 << 3; 456 457 // absolute value of a,b 458 vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask); 459 vec_uint4 babs = vec_and((vec_uint4)b, sign_mask); 460 461 // check if a is nan 462 vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask); 463 vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask); 464 a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf)); 465 a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi); 466 467 // check if b is nan 468 vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask); 469 vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask); 470 b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf)); 471 b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi); 472 473 // A) Check if the exponents are different 474 vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aabs,babs); 475 476 // B) Check if high word equal, and low word greater 477 vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aabs, (vec_uint4)babs); 478 vec_uint4 eq = (vec_uint4)vec_cmpeq(aabs, babs); 479 vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v)); 480 481 // If either A or B is true, return true (unless NaNs detected) 482 vec_uint4 r = vec_or(gt_hi, eqgt); 483 484 // splat the high words of the comparison step 485 r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi); 486 487 // correct for NaNs in input 488 return ((qword)vec_andc(r,vec_or(a_nan,b_nan))); 489} 490 491 492/* Compare equal 493 */ 494static __inline qword si_ceqb(qword a, qword b) 495{ 496 return ((qword)(vec_cmpeq((vec_uchar16)(a), (vec_uchar16)(b)))); 497} 498 499static __inline qword si_ceqh(qword a, qword b) 500{ 501 return ((qword)(vec_cmpeq((vec_ushort8)(a), (vec_ushort8)(b)))); 502} 503 504static __inline qword si_ceq(qword a, qword b) 505{ 506 return ((qword)(vec_cmpeq((vec_uint4)(a), (vec_uint4)(b)))); 507} 508 509static __inline qword si_fceq(qword a, qword b) 510{ 511 return ((qword)(vec_cmpeq((vec_float4)(a), (vec_float4)(b)))); 512} 513 514static __inline qword si_ceqbi(qword a, signed char b) 515{ 516 return ((qword)(vec_cmpeq((vec_char16)(a), 517 vec_splat((vec_char16)(si_from_char(b)), 3)))); 518} 519 520static __inline qword si_ceqhi(qword a, signed short b) 521{ 522 return ((qword)(vec_cmpeq((vec_short8)(a), 523 vec_splat((vec_short8)(si_from_short(b)), 1)))); 524} 525 526static __inline qword si_ceqi(qword a, signed int b) 527{ 528 return ((qword)(vec_cmpeq((vec_int4)(a), 529 vec_splat((vec_int4)(si_from_int(b)), 0)))); 530} 531 532static __inline qword si_dfceq(qword a, qword b) 533{ 534 vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; 535 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 }; 536 vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3, 16,17,18,19, 8,9,10,11, 24,25,26,27}; 537 538 vec_uint4 biteq; 539 vec_uint4 aabs; 540 vec_uint4 babs; 541 vec_uint4 a_gt; 542 vec_uint4 ahi_inf; 543 vec_uint4 anan; 544 vec_uint4 iszero; 545 vec_uint4 result; 546 547 union { 548 vec_uchar16 v; 549 int i[4]; 550 } x; 551 552 /* Shift 4 bytes */ 553 x.i[3] = 4 << 3; 554 555 /* A) Check for bit equality, store in high word */ 556 biteq = (vec_uint4) vec_cmpeq((vec_uint4)a,(vec_uint4)b); 557 biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v)); 558 559 /* Mask out sign bits */ 560 aabs = vec_and((vec_uint4)a,sign_mask); 561 babs = vec_and((vec_uint4)b,sign_mask); 562 563 /* 564 B) Check if a is NaN, store in high word 565 566 B1) If the high word is greater than max_exp (indicates a NaN) 567 B2) If the low word is greater than 0 568 */ 569 a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask); 570 571 /* B3) Check if the high word is equal to the inf exponent */ 572 ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask); 573 574 /* anan = B1[hi] or (B2[lo] and B3[hi]) */ 575 anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf)); 576 577 /* C) Check for 0 = -0 special case */ 578 iszero =(vec_uint4)vec_cmpeq((vec_uint4)vec_or(aabs,babs),(vec_uint4)vec_splat_u32(0)); 579 iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v)); 580 581 /* result = (A or C) and not B */ 582 result = vec_or(biteq,iszero); 583 result = vec_andc(result, anan); 584 585 /* Promote high words to 64 bits and return */ 586 return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote))); 587} 588 589 590/* Compare greater than 591 */ 592static __inline qword si_cgtb(qword a, qword b) 593{ 594 return ((qword)(vec_cmpgt((vec_char16)(a), (vec_char16)(b)))); 595} 596 597static __inline qword si_cgth(qword a, qword b) 598{ 599 return ((qword)(vec_cmpgt((vec_short8)(a), (vec_short8)(b)))); 600} 601 602static __inline qword si_cgt(qword a, qword b) 603{ 604 return ((qword)(vec_cmpgt((vec_int4)(a), (vec_int4)(b)))); 605} 606 607static __inline qword si_clgtb(qword a, qword b) 608{ 609 return ((qword)(vec_cmpgt((vec_uchar16)(a), (vec_uchar16)(b)))); 610} 611 612static __inline qword si_clgth(qword a, qword b) 613{ 614 return ((qword)(vec_cmpgt((vec_ushort8)(a), (vec_ushort8)(b)))); 615} 616 617static __inline qword si_clgt(qword a, qword b) 618{ 619 return ((qword)(vec_cmpgt((vec_uint4)(a), (vec_uint4)(b)))); 620} 621 622static __inline qword si_fcgt(qword a, qword b) 623{ 624 return ((qword)(vec_cmpgt((vec_float4)(a), (vec_float4)(b)))); 625} 626 627static __inline qword si_dfcgt(qword a, qword b) 628{ 629 vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; 630 vec_uchar16 borrow_shuffle = (vec_uchar16) { 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192 }; 631 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 }; 632 vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; 633 634 union { 635 vec_uchar16 v; 636 int i[4]; 637 } x; 638 639 /* Shift 4 bytes */ 640 x.i[3] = 4 << 3; 641 642 // absolute value of a,b 643 vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask); 644 vec_uint4 babs = vec_and((vec_uint4)b, sign_mask); 645 646 // check if a is nan 647 vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask); 648 vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask); 649 a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf)); 650 a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi); 651 652 // check if b is nan 653 vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask); 654 vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask); 655 b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf)); 656 b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi); 657 658 // sign of a 659 vec_uint4 asel = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0)); 660 asel = (vec_uint4)vec_perm((vec_uchar16)asel,(vec_uchar16)asel,splat_hi); 661 662 // sign of b 663 vec_uint4 bsel = (vec_uint4)vec_sra((vec_int4)(b), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0)); 664 bsel = (vec_uint4)vec_perm((vec_uchar16)bsel,(vec_uchar16)bsel,splat_hi); 665 666 // negative a 667 vec_uint4 abor = vec_subc((vec_uint4)vec_splat_u32(0), aabs); 668 vec_uchar16 pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), vec_sr(borrow_shuffle, vec_splat_u8(3)), vec_sra(borrow_shuffle, vec_splat_u8(7))); 669 abor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)abor, (vec_uchar16)abor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat)); 670 vec_uint4 aneg = vec_add(vec_add(vec_splat_u32(0), vec_nor(aabs, aabs)), vec_and(abor, vec_splat_u32(1))); 671 672 // pick the one we want 673 vec_int4 aval = (vec_int4)vec_sel((vec_uchar16)aabs, (vec_uchar16)aneg, (vec_uchar16)asel); 674 675 // negative b 676 vec_uint4 bbor = vec_subc((vec_uint4)vec_splat_u32(0), babs); 677 bbor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)bbor, (vec_uchar16)bbor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat)); 678 vec_uint4 bneg = vec_add(vec_nor(babs, babs), vec_and(bbor, vec_splat_u32(1))); 679 680 // pick the one we want 681 vec_int4 bval=(vec_int4)vec_sel((vec_uchar16)babs, (vec_uchar16)bneg, (vec_uchar16)bsel); 682 683 // A) Check if the exponents are different 684 vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aval,bval); 685 686 // B) Check if high word equal, and low word greater 687 vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aval, (vec_uint4)bval); 688 vec_uint4 eq = (vec_uint4)vec_cmpeq(aval, bval); 689 vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v)); 690 691 // If either A or B is true, return true (unless NaNs detected) 692 vec_uint4 r = vec_or(gt_hi, eqgt); 693 694 // splat the high words of the comparison step 695 r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi); 696 697 // correct for NaNs in input 698 return ((qword)vec_andc(r,vec_or(a_nan,b_nan))); 699} 700 701static __inline qword si_cgtbi(qword a, signed char b) 702{ 703 return ((qword)(vec_cmpgt((vec_char16)(a), 704 vec_splat((vec_char16)(si_from_char(b)), 3)))); 705} 706 707static __inline qword si_cgthi(qword a, signed short b) 708{ 709 return ((qword)(vec_cmpgt((vec_short8)(a), 710 vec_splat((vec_short8)(si_from_short(b)), 1)))); 711} 712 713static __inline qword si_cgti(qword a, signed int b) 714{ 715 return ((qword)(vec_cmpgt((vec_int4)(a), 716 vec_splat((vec_int4)(si_from_int(b)), 0)))); 717} 718 719static __inline qword si_clgtbi(qword a, unsigned char b) 720{ 721 return ((qword)(vec_cmpgt((vec_uchar16)(a), 722 vec_splat((vec_uchar16)(si_from_uchar(b)), 3)))); 723} 724 725static __inline qword si_clgthi(qword a, unsigned short b) 726{ 727 return ((qword)(vec_cmpgt((vec_ushort8)(a), 728 vec_splat((vec_ushort8)(si_from_ushort(b)), 1)))); 729} 730 731static __inline qword si_clgti(qword a, unsigned int b) 732{ 733 return ((qword)(vec_cmpgt((vec_uint4)(a), 734 vec_splat((vec_uint4)(si_from_uint(b)), 0)))); 735} 736 737static __inline qword si_dftsv(qword a, char b) 738{ 739 vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; 740 vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; 741 vec_uint4 result = (vec_uint4){0}; 742 vec_uint4 sign = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0)); 743 sign = (vec_uint4)vec_perm((vec_uchar16)sign,(vec_uchar16)sign,splat_hi); 744 vec_uint4 aabs = vec_and((vec_uint4)a,sign_mask); 745 746 union { 747 vec_uchar16 v; 748 int i[4]; 749 } x; 750 751 /* Shift 4 bytes */ 752 x.i[3] = 4 << 3; 753 754 /* Nan or +inf or -inf */ 755 if (b & 0x70) 756 { 757 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 }; 758 vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask); 759 /* NaN */ 760 if (b & 0x40) 761 { 762 vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask); 763 a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf)); 764 a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi); 765 result = vec_or(result, a_nan); 766 } 767 /* inf */ 768 if (b & 0x30) 769 { 770 a_inf = vec_and((vec_uint4)vec_slo((vec_uchar16)a_inf,x.v), a_inf); 771 a_inf = (vec_uint4)vec_perm((vec_uchar16)a_inf, (vec_uchar16)a_inf, splat_hi); 772 /* +inf */ 773 if (b & 0x20) 774 result = vec_or(vec_andc(a_inf, sign), result); 775 /* -inf */ 776 if (b & 0x10) 777 result = vec_or(vec_and(a_inf, sign), result); 778 } 779 } 780 /* 0 or denorm */ 781 if (b & 0xF) 782 { 783 vec_uint4 iszero =(vec_uint4)vec_cmpeq(aabs,(vec_uint4)vec_splat_u32(0)); 784 iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v)); 785 /* denorm */ 786 if (b & 0x3) 787 { 788 vec_uint4 denorm_mask = (vec_uint4){0xFFFFF, 0xFFFFF, 0xFFFFF, 0xFFFFF}; 789 vec_uint4 isdenorm = vec_nor((vec_uint4)vec_cmpgt(aabs, denorm_mask), iszero); 790 isdenorm = (vec_uint4)vec_perm((vec_uchar16)isdenorm, (vec_uchar16)isdenorm, splat_hi); 791 /* +denorm */ 792 if (b & 0x2) 793 result = vec_or(vec_andc(isdenorm, sign), result); 794 /* -denorm */ 795 if (b & 0x1) 796 result = vec_or(vec_and(isdenorm, sign), result); 797 } 798 /* 0 */ 799 if (b & 0xC) 800 { 801 iszero = (vec_uint4)vec_perm((vec_uchar16)iszero, (vec_uchar16)iszero, splat_hi); 802 /* +0 */ 803 if (b & 0x8) 804 result = vec_or(vec_andc(iszero, sign), result); 805 /* -0 */ 806 if (b & 0x4) 807 result = vec_or(vec_and(iszero, sign), result); 808 } 809 } 810 return ((qword)result); 811} 812 813 814/* Carry generate 815 */ 816#define si_cg(_a, _b) ((qword)(vec_addc((vec_uint4)(_a), (vec_uint4)(_b)))) 817 818#define si_cgx(_a, _b, _c) ((qword)(vec_or(vec_addc((vec_uint4)(_a), (vec_uint4)(_b)), \ 819 vec_addc(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), \ 820 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))) 821 822 823/* Count ones for bytes 824 */ 825static __inline qword si_cntb(qword a) 826{ 827 vec_uchar16 nib_cnt = (vec_uchar16){0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; 828 vec_uchar16 four = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; 829 vec_uchar16 av; 830 831 av = (vec_uchar16)(a); 832 833 return ((qword)(vec_add(vec_perm(nib_cnt, nib_cnt, av), 834 vec_perm(nib_cnt, nib_cnt, vec_sr (av, four))))); 835} 836 837/* Count ones for bytes 838 */ 839static __inline qword si_clz(qword a) 840{ 841 vec_uchar16 av; 842 vec_uchar16 cnt_hi, cnt_lo, cnt, tmp1, tmp2, tmp3; 843 vec_uchar16 four = vec_splat_u8(4); 844 vec_uchar16 nib_cnt = (vec_uchar16){4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}; 845 vec_uchar16 eight = vec_splat_u8(8); 846 vec_uchar16 sixteen = (vec_uchar16){16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16}; 847 vec_uchar16 twentyfour = (vec_uchar16){24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24}; 848 849 av = (vec_uchar16)(a); 850 851 cnt_hi = vec_perm(nib_cnt, nib_cnt, vec_sr(av, four)); 852 cnt_lo = vec_perm(nib_cnt, nib_cnt, av); 853 854 cnt = vec_add(cnt_hi, vec_and(cnt_lo, vec_cmpeq(cnt_hi, four))); 855 856 tmp1 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(eight)); 857 tmp2 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(sixteen)); 858 tmp3 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(twentyfour)); 859 860 cnt = vec_add(cnt, vec_and(tmp1, vec_cmpeq(cnt, eight))); 861 cnt = vec_add(cnt, vec_and(tmp2, vec_cmpeq(cnt, sixteen))); 862 cnt = vec_add(cnt, vec_and(tmp3, vec_cmpeq(cnt, twentyfour))); 863 864 return (qword)((vec_sr((vec_uint4)(cnt), (vec_uint4)(twentyfour)))); 865} 866 867/* Convert to float 868 */ 869#define si_cuflt(_a, _b) ((qword)(vec_ctf((vec_uint4)(_a), _b))) 870#define si_csflt(_a, _b) ((qword)(vec_ctf((vec_int4)(_a), _b))) 871 872/* Convert to signed int 873 */ 874#define si_cflts(_a, _b) ((qword)(vec_cts((vec_float4)(_a), _b))) 875 876/* Convert to unsigned int 877 */ 878#define si_cfltu(_a, _b) ((qword)(vec_ctu((vec_float4)(_a), _b))) 879 880/* Synchronize 881 */ 882#define si_dsync() /* do nothing */ 883#define si_sync() /* do nothing */ 884#define si_syncc() /* do nothing */ 885 886 887/* Equivalence 888 */ 889static __inline qword si_eqv(qword a, qword b) 890{ 891 vec_uchar16 d; 892 893 d = vec_xor((vec_uchar16)(a), (vec_uchar16)(b)); 894 return ((qword)(vec_nor(d, d))); 895} 896 897/* Extend 898 */ 899static __inline qword si_xsbh(qword a) 900{ 901 vec_char16 av; 902 903 av = (vec_char16)(a); 904 return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){1, 3, 5, 7, 9,11,13,15, 905 0, 0, 0, 0, 0, 0, 0, 0}))))); 906} 907 908static __inline qword si_xshw(qword a) 909{ 910 vec_short8 av; 911 912 av = (vec_short8)(a); 913 return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){2, 3, 6, 7, 914 10,11,14,15, 915 0, 0, 0, 0, 916 0, 0, 0, 0}))))); 917} 918 919static __inline qword si_xswd(qword a) 920{ 921 vec_int4 av; 922 923 av = (vec_int4)(a); 924 return ((qword)(vec_perm(av, vec_sra(av, ((vec_uint4){31,31,31,31})), 925 ((vec_uchar16){20, 21, 22, 23, 926 4, 5, 6, 7, 927 28, 29, 30, 31, 928 12, 13, 14, 15})))); 929} 930 931static __inline qword si_fesd(qword a) 932{ 933 union { 934 double d[2]; 935 vec_double2 vd; 936 } out; 937 union { 938 float f[4]; 939 vec_float4 vf; 940 } in; 941 942 in.vf = (vec_float4)(a); 943 out.d[0] = (double)(in.f[0]); 944 out.d[1] = (double)(in.f[2]); 945 return ((qword)(out.vd)); 946} 947 948/* Gather 949 */ 950static __inline qword si_gbb(qword a) 951{ 952 vec_uchar16 bits; 953 vec_uint4 bytes; 954 955 bits = vec_sl(vec_and((vec_uchar16)(a), vec_splat_u8(1)), ((vec_uchar16){7, 6, 5, 4, 3, 2, 1, 0, 956 7, 6, 5, 4, 3, 2, 1, 0})); 957 bytes = (vec_uint4)vec_sum2s((vec_int4)(vec_sum4s(bits, ((vec_uint4){0}))), ((vec_int4){0})); 958 959 return ((qword)(vec_perm(bytes, bytes, ((vec_uchar16){0, 0, 7,15, 0, 0, 0, 0, 960 0, 0, 0, 0, 0, 0, 0, 0})))); 961} 962 963 964static __inline qword si_gbh(qword a) 965{ 966 vec_ushort8 bits; 967 vec_uint4 bytes; 968 969 bits = vec_sl(vec_and((vec_ushort8)(a), vec_splat_u16(1)), ((vec_ushort8){7, 6, 5, 4, 3, 2, 1, 0})); 970 971 bytes = (vec_uint4)vec_sums((vec_int4)(vec_sum4s((vec_short8)(bits), (vec_int4){0})), (vec_int4){0}); 972 973 return ((qword)(vec_sld(bytes, bytes, 12))); 974} 975 976static __inline qword si_gb(qword a) 977{ 978 vec_uint4 bits; 979 vec_uint4 bytes; 980 981 bits = vec_sl(vec_and((vec_uint4)(a), vec_splat_u32(1)), ((vec_uint4){3, 2, 1, 0})); 982 bytes = (vec_uint4)vec_sums((vec_int4)(bits), ((vec_int4){0})); 983 return ((qword)(vec_sld(bytes, bytes, 12))); 984} 985 986 987/* Compare and halt 988 */ 989static __inline void si_heq(qword a, qword b) 990{ 991 union { 992 vector unsigned int v; 993 unsigned int i[4]; 994 } aa, bb; 995 996 aa.v = (vector unsigned int)(a); 997 bb.v = (vector unsigned int)(b); 998 999 if (aa.i[0] == bb.i[0]) { SPU_HALT_ACTION; }; 1000} 1001 1002static __inline void si_heqi(qword a, unsigned int b) 1003{ 1004 union { 1005 vector unsigned int v; 1006 unsigned int i[4]; 1007 } aa; 1008 1009 aa.v = (vector unsigned int)(a); 1010 1011 if (aa.i[0] == b) { SPU_HALT_ACTION; }; 1012} 1013 1014static __inline void si_hgt(qword a, qword b) 1015{ 1016 union { 1017 vector signed int v; 1018 signed int i[4]; 1019 } aa, bb; 1020 1021 aa.v = (vector signed int)(a); 1022 bb.v = (vector signed int)(b); 1023 1024 if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; }; 1025} 1026 1027static __inline void si_hgti(qword a, signed int b) 1028{ 1029 union { 1030 vector signed int v; 1031 signed int i[4]; 1032 } aa; 1033 1034 aa.v = (vector signed int)(a); 1035 1036 if (aa.i[0] > b) { SPU_HALT_ACTION; }; 1037} 1038 1039static __inline void si_hlgt(qword a, qword b) 1040{ 1041 union { 1042 vector unsigned int v; 1043 unsigned int i[4]; 1044 } aa, bb; 1045 1046 aa.v = (vector unsigned int)(a); 1047 bb.v = (vector unsigned int)(b); 1048 1049 if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; }; 1050} 1051 1052static __inline void si_hlgti(qword a, unsigned int b) 1053{ 1054 union { 1055 vector unsigned int v; 1056 unsigned int i[4]; 1057 } aa; 1058 1059 aa.v = (vector unsigned int)(a); 1060 1061 if (aa.i[0] > b) { SPU_HALT_ACTION; }; 1062} 1063 1064 1065/* Multiply and Add 1066 */ 1067static __inline qword si_mpya(qword a, qword b, qword c) 1068{ 1069 return ((qword)(vec_msum(vec_and((vec_short8)(a), 1070 ((vec_short8){0, -1, 0, -1, 0, -1, 0, -1})), 1071 (vec_short8)(b), (vec_int4)(c)))); 1072} 1073 1074static __inline qword si_fma(qword a, qword b, qword c) 1075{ 1076 return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), (vec_float4)(c)))); 1077} 1078 1079static __inline qword si_dfma(qword a, qword b, qword c) 1080{ 1081 union { 1082 vec_double2 v; 1083 double d[2]; 1084 } aa, bb, cc, dd; 1085 1086 aa.v = (vec_double2)(a); 1087 bb.v = (vec_double2)(b); 1088 cc.v = (vec_double2)(c); 1089 dd.d[0] = aa.d[0] * bb.d[0] + cc.d[0]; 1090 dd.d[1] = aa.d[1] * bb.d[1] + cc.d[1]; 1091 return ((qword)(dd.v)); 1092} 1093 1094/* Form Mask 1095 */ 1096#define si_fsmbi(_a) si_fsmb(si_from_int(_a)) 1097 1098static __inline qword si_fsmb(qword a) 1099{ 1100 vec_char16 mask; 1101 vec_ushort8 in; 1102 1103 in = (vec_ushort8)(a); 1104 mask = (vec_char16)(vec_perm(in, in, ((vec_uchar16){2, 2, 2, 2, 2, 2, 2, 2, 1105 3, 3, 3, 3, 3, 3, 3, 3}))); 1106 return ((qword)(vec_sra(vec_sl(mask, ((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 1107 0, 1, 2, 3, 4, 5, 6, 7})), 1108 vec_splat_u8(7)))); 1109} 1110 1111 1112static __inline qword si_fsmh(qword a) 1113{ 1114 vec_uchar16 in; 1115 vec_short8 mask; 1116 1117 in = (vec_uchar16)(a); 1118 mask = (vec_short8)(vec_splat(in, 3)); 1119 return ((qword)(vec_sra(vec_sl(mask, ((vec_ushort8){0, 1, 2, 3, 4, 5, 6, 7})), 1120 vec_splat_u16(15)))); 1121} 1122 1123static __inline qword si_fsm(qword a) 1124{ 1125 vec_uchar16 in; 1126 vec_int4 mask; 1127 1128 in = (vec_uchar16)(a); 1129 mask = (vec_int4)(vec_splat(in, 3)); 1130 return ((qword)(vec_sra(vec_sl(mask, ((vec_uint4){28, 29, 30, 31})), 1131 ((vec_uint4){31,31,31,31})))); 1132} 1133 1134/* Move from/to registers 1135 */ 1136#define si_fscrrd() ((qword)((vec_uint4){0})) 1137#define si_fscrwr(_a) 1138 1139#define si_mfspr(_reg) ((qword)((vec_uint4){0})) 1140#define si_mtspr(_reg, _a) 1141 1142/* Multiply High High Add 1143 */ 1144static __inline qword si_mpyhha(qword a, qword b, qword c) 1145{ 1146 return ((qword)(vec_add(vec_mule((vec_short8)(a), (vec_short8)(b)), (vec_int4)(c)))); 1147} 1148 1149static __inline qword si_mpyhhau(qword a, qword b, qword c) 1150{ 1151 return ((qword)(vec_add(vec_mule((vec_ushort8)(a), (vec_ushort8)(b)), (vec_uint4)(c)))); 1152} 1153 1154/* Multiply Subtract 1155 */ 1156static __inline qword si_fms(qword a, qword b, qword c) 1157{ 1158 return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), 1159 vec_sub(((vec_float4){0.0f}), (vec_float4)(c))))); 1160} 1161 1162static __inline qword si_dfms(qword a, qword b, qword c) 1163{ 1164 union { 1165 vec_double2 v; 1166 double d[2]; 1167 } aa, bb, cc, dd; 1168 1169 aa.v = (vec_double2)(a); 1170 bb.v = (vec_double2)(b); 1171 cc.v = (vec_double2)(c); 1172 dd.d[0] = aa.d[0] * bb.d[0] - cc.d[0]; 1173 dd.d[1] = aa.d[1] * bb.d[1] - cc.d[1]; 1174 return ((qword)(dd.v)); 1175} 1176 1177/* Multiply 1178 */ 1179static __inline qword si_fm(qword a, qword b) 1180{ 1181 return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), ((vec_float4){0.0f})))); 1182} 1183 1184static __inline qword si_dfm(qword a, qword b) 1185{ 1186 union { 1187 vec_double2 v; 1188 double d[2]; 1189 } aa, bb, dd; 1190 1191 aa.v = (vec_double2)(a); 1192 bb.v = (vec_double2)(b); 1193 dd.d[0] = aa.d[0] * bb.d[0]; 1194 dd.d[1] = aa.d[1] * bb.d[1]; 1195 return ((qword)(dd.v)); 1196} 1197 1198/* Multiply High 1199 */ 1200static __inline qword si_mpyh(qword a, qword b) 1201{ 1202 vec_uint4 sixteen = (vec_uint4){16, 16, 16, 16}; 1203 1204 return ((qword)(vec_sl(vec_mule((vec_short8)(a), (vec_short8)(vec_sl((vec_uint4)(b), sixteen))), sixteen))); 1205} 1206 1207 1208/* Multiply High High 1209 */ 1210static __inline qword si_mpyhh(qword a, qword b) 1211{ 1212 return ((qword)(vec_mule((vec_short8)(a), (vec_short8)(b)))); 1213} 1214 1215static __inline qword si_mpyhhu(qword a, qword b) 1216{ 1217 return ((qword)(vec_mule((vec_ushort8)(a), (vec_ushort8)(b)))); 1218} 1219 1220/* Multiply Odd 1221 */ 1222static __inline qword si_mpy(qword a, qword b) 1223{ 1224 return ((qword)(vec_mulo((vec_short8)(a), (vec_short8)(b)))); 1225} 1226 1227static __inline qword si_mpyu(qword a, qword b) 1228{ 1229 return ((qword)(vec_mulo((vec_ushort8)(a), (vec_ushort8)(b)))); 1230} 1231 1232static __inline qword si_mpyi(qword a, short b) 1233{ 1234 return ((qword)(vec_mulo((vec_short8)(a), 1235 vec_splat((vec_short8)(si_from_short(b)), 1)))); 1236} 1237 1238static __inline qword si_mpyui(qword a, unsigned short b) 1239{ 1240 return ((qword)(vec_mulo((vec_ushort8)(a), 1241 vec_splat((vec_ushort8)(si_from_ushort(b)), 1)))); 1242} 1243 1244/* Multiply and Shift Right 1245 */ 1246static __inline qword si_mpys(qword a, qword b) 1247{ 1248 return ((qword)(vec_sra(vec_mulo((vec_short8)(a), (vec_short8)(b)), ((vec_uint4){16,16,16,16})))); 1249} 1250 1251/* Nand 1252 */ 1253static __inline qword si_nand(qword a, qword b) 1254{ 1255 vec_uchar16 d; 1256 1257 d = vec_and((vec_uchar16)(a), (vec_uchar16)(b)); 1258 return ((qword)(vec_nor(d, d))); 1259} 1260 1261/* Negative Multiply Add 1262 */ 1263static __inline qword si_dfnma(qword a, qword b, qword c) 1264{ 1265 union { 1266 vec_double2 v; 1267 double d[2]; 1268 } aa, bb, cc, dd; 1269 1270 aa.v = (vec_double2)(a); 1271 bb.v = (vec_double2)(b); 1272 cc.v = (vec_double2)(c); 1273 dd.d[0] = -cc.d[0] - aa.d[0] * bb.d[0]; 1274 dd.d[1] = -cc.d[1] - aa.d[1] * bb.d[1]; 1275 return ((qword)(dd.v)); 1276} 1277 1278/* Negative Multiply and Subtract 1279 */ 1280static __inline qword si_fnms(qword a, qword b, qword c) 1281{ 1282 return ((qword)(vec_nmsub((vec_float4)(a), (vec_float4)(b), (vec_float4)(c)))); 1283} 1284 1285static __inline qword si_dfnms(qword a, qword b, qword c) 1286{ 1287 union { 1288 vec_double2 v; 1289 double d[2]; 1290 } aa, bb, cc, dd; 1291 1292 aa.v = (vec_double2)(a); 1293 bb.v = (vec_double2)(b); 1294 cc.v = (vec_double2)(c); 1295 dd.d[0] = cc.d[0] - aa.d[0] * bb.d[0]; 1296 dd.d[1] = cc.d[1] - aa.d[1] * bb.d[1]; 1297 return ((qword)(dd.v)); 1298} 1299 1300/* Nor 1301 */ 1302static __inline qword si_nor(qword a, qword b) 1303{ 1304 return ((qword)(vec_nor((vec_uchar16)(a), (vec_uchar16)(b)))); 1305} 1306 1307/* Or 1308 */ 1309static __inline qword si_or(qword a, qword b) 1310{ 1311 return ((qword)(vec_or((vec_uchar16)(a), (vec_uchar16)(b)))); 1312} 1313 1314static __inline qword si_orbi(qword a, unsigned char b) 1315{ 1316 return ((qword)(vec_or((vec_uchar16)(a), 1317 vec_splat((vec_uchar16)(si_from_uchar(b)), 3)))); 1318} 1319 1320static __inline qword si_orhi(qword a, unsigned short b) 1321{ 1322 return ((qword)(vec_or((vec_ushort8)(a), 1323 vec_splat((vec_ushort8)(si_from_ushort(b)), 1)))); 1324} 1325 1326static __inline qword si_ori(qword a, unsigned int b) 1327{ 1328 return ((qword)(vec_or((vec_uint4)(a), 1329 vec_splat((vec_uint4)(si_from_uint(b)), 0)))); 1330} 1331 1332/* Or Complement 1333 */ 1334static __inline qword si_orc(qword a, qword b) 1335{ 1336 return ((qword)(vec_or((vec_uchar16)(a), vec_nor((vec_uchar16)(b), (vec_uchar16)(b))))); 1337} 1338 1339 1340/* Or Across 1341 */ 1342static __inline qword si_orx(qword a) 1343{ 1344 vec_uchar16 tmp; 1345 tmp = (vec_uchar16)(a); 1346 tmp = vec_or(tmp, vec_sld(tmp, tmp, 8)); 1347 tmp = vec_or(tmp, vec_sld(tmp, tmp, 4)); 1348 return ((qword)(vec_and(tmp, ((vec_uchar16){0xFF,0xFF,0xFF,0xFF, 0x00,0x00,0x00,0x00, 1349 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00})))); 1350} 1351 1352 1353/* Estimates 1354 */ 1355static __inline qword si_frest(qword a) 1356{ 1357 return ((qword)(vec_re((vec_float4)(a)))); 1358} 1359 1360static __inline qword si_frsqest(qword a) 1361{ 1362 return ((qword)(vec_rsqrte((vec_float4)(a)))); 1363} 1364 1365#define si_fi(_a, _d) (_d) 1366 1367/* Channel Read and Write 1368 */ 1369#define si_rdch(_channel) ((qword)(vec_splat_u8(0))) /* not mappable */ 1370#define si_rchcnt(_channel) ((qword)(vec_splat_u8(0))) /* not mappable */ 1371#define si_wrch(_channel, _a) /* not mappable */ 1372 1373/* Rotate Left 1374 */ 1375static __inline qword si_roth(qword a, qword b) 1376{ 1377 return ((qword)(vec_rl((vec_ushort8)(a), (vec_ushort8)(b)))); 1378} 1379 1380static __inline qword si_rot(qword a, qword b) 1381{ 1382 return ((qword)(vec_rl((vec_uint4)(a), (vec_uint4)(b)))); 1383} 1384 1385static __inline qword si_rothi(qword a, int b) 1386{ 1387 return ((qword)(vec_rl((vec_ushort8)(a), 1388 vec_splat((vec_ushort8)(si_from_int(b)), 1)))); 1389} 1390 1391static __inline qword si_roti(qword a, int b) 1392{ 1393 return ((qword)(vec_rl((vec_uint4)(a), 1394 vec_splat((vec_uint4)(si_from_int(b)), 0)))); 1395} 1396 1397/* Rotate Left with Mask 1398 */ 1399static __inline qword si_rothm(qword a, qword b) 1400{ 1401 vec_ushort8 neg_b; 1402 vec_ushort8 mask; 1403 1404 neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b)); 1405 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15)); 1406 return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask))); 1407} 1408 1409static __inline qword si_rotm(qword a, qword b) 1410{ 1411 vec_uint4 neg_b; 1412 vec_uint4 mask; 1413 1414 neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b)); 1415 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); 1416 return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask))); 1417} 1418 1419static __inline qword si_rothmi(qword a, int b) 1420{ 1421 vec_ushort8 neg_b; 1422 vec_ushort8 mask; 1423 1424 neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1); 1425 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15)); 1426 return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask))); 1427} 1428 1429static __inline qword si_rotmi(qword a, int b) 1430{ 1431 vec_uint4 neg_b; 1432 vec_uint4 mask; 1433 1434 neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0); 1435 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); 1436 return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask))); 1437} 1438 1439 1440/* Rotate Left Algebraic with Mask 1441 */ 1442static __inline qword si_rotmah(qword a, qword b) 1443{ 1444 vec_ushort8 neg_b; 1445 vec_ushort8 mask; 1446 1447 neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b)); 1448 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15)); 1449 return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask)))); 1450} 1451 1452static __inline qword si_rotma(qword a, qword b) 1453{ 1454 vec_uint4 neg_b; 1455 vec_uint4 mask; 1456 1457 neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b)); 1458 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); 1459 return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask)))); 1460} 1461 1462 1463static __inline qword si_rotmahi(qword a, int b) 1464{ 1465 vec_ushort8 neg_b; 1466 vec_ushort8 mask; 1467 1468 neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1); 1469 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15)); 1470 return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask)))); 1471} 1472 1473static __inline qword si_rotmai(qword a, int b) 1474{ 1475 vec_uint4 neg_b; 1476 vec_uint4 mask; 1477 1478 neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0); 1479 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); 1480 return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask)))); 1481} 1482 1483 1484/* Rotate Left Quadword by Bytes with Mask 1485 */ 1486static __inline qword si_rotqmbyi(qword a, int count) 1487{ 1488 union { 1489 vec_uchar16 v; 1490 int i[4]; 1491 } x; 1492 vec_uchar16 mask; 1493 1494 count = 0 - count; 1495 x.i[3] = count << 3; 1496 mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1); 1497 1498 return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask))); 1499} 1500 1501 1502static __inline qword si_rotqmby(qword a, qword count) 1503{ 1504 union { 1505 vec_uchar16 v; 1506 int i[4]; 1507 } x; 1508 int cnt; 1509 vec_uchar16 mask; 1510 1511 x.v = (vec_uchar16)(count); 1512 x.i[0] = cnt = (0 - x.i[0]) << 3; 1513 1514 x.v = vec_splat(x.v, 3); 1515 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1); 1516 1517 return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask))); 1518} 1519 1520 1521/* Rotate Left Quadword by Bytes 1522 */ 1523static __inline qword si_rotqbyi(qword a, int count) 1524{ 1525 union { 1526 vec_uchar16 v; 1527 int i[4]; 1528 } left, right; 1529 1530 count <<= 3; 1531 left.i[3] = count; 1532 right.i[3] = 0 - count; 1533 return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left.v), vec_sro((vec_uchar16)(a), right.v)))); 1534} 1535 1536static __inline qword si_rotqby(qword a, qword count) 1537{ 1538 vec_uchar16 left, right; 1539 1540 left = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3)); 1541 right = vec_sub(vec_splat_u8(0), left); 1542 return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right)))); 1543} 1544 1545/* Rotate Left Quadword by Bytes Bit Count 1546 */ 1547static __inline qword si_rotqbybi(qword a, qword count) 1548{ 1549 vec_uchar16 left, right; 1550 1551 left = vec_splat((vec_uchar16)(count), 3); 1552 right = vec_sub(vec_splat_u8(7), left); 1553 return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right)))); 1554} 1555 1556 1557/* Rotate Left Quadword by Bytes Bit Count 1558 */ 1559static __inline qword si_rotqbii(qword a, int count) 1560{ 1561 vec_uchar16 x, y; 1562 vec_uchar16 result; 1563 1564 x = vec_splat((vec_uchar16)(si_from_int(count & 7)), 3); 1565 y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))), 1566 (vec_uint4)vec_sub(vec_splat_u8(8), x))); 1567 result = vec_or(vec_sll((qword)(a), x), y); 1568 return ((qword)(result)); 1569} 1570 1571static __inline qword si_rotqbi(qword a, qword count) 1572{ 1573 vec_uchar16 x, y; 1574 vec_uchar16 result; 1575 1576 x = vec_and(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(7)); 1577 y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))), 1578 (vec_uint4)vec_sub(vec_splat_u8(8), x))); 1579 1580 result = vec_or(vec_sll((qword)(a), x), y); 1581 return ((qword)(result)); 1582} 1583 1584 1585/* Rotate Left Quadword and Mask by Bits 1586 */ 1587static __inline qword si_rotqmbii(qword a, int count) 1588{ 1589 return ((qword)(vec_srl((vec_uchar16)(a), vec_splat((vec_uchar16)(si_from_int(0 - count)), 3)))); 1590} 1591 1592static __inline qword si_rotqmbi(qword a, qword count) 1593{ 1594 return ((qword)(vec_srl((vec_uchar16)(a), vec_sub(vec_splat_u8(0), vec_splat((vec_uchar16)(count), 3))))); 1595} 1596 1597 1598/* Rotate Left Quadword and Mask by Bytes with Bit Count 1599 */ 1600static __inline qword si_rotqmbybi(qword a, qword count) 1601{ 1602 union { 1603 vec_uchar16 v; 1604 int i[4]; 1605 } x; 1606 int cnt; 1607 vec_uchar16 mask; 1608 1609 x.v = (vec_uchar16)(count); 1610 x.i[0] = cnt = 0 - (x.i[0] & ~7); 1611 x.v = vec_splat(x.v, 3); 1612 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1); 1613 1614 return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask))); 1615} 1616 1617 1618 1619 1620/* Round Double to Float 1621 */ 1622static __inline qword si_frds(qword a) 1623{ 1624 union { 1625 vec_float4 v; 1626 float f[4]; 1627 } d; 1628 union { 1629 vec_double2 v; 1630 double d[2]; 1631 } in; 1632 1633 in.v = (vec_double2)(a); 1634 d.v = (vec_float4){0.0f}; 1635 d.f[0] = (float)in.d[0]; 1636 d.f[2] = (float)in.d[1]; 1637 1638 return ((qword)(d.v)); 1639} 1640 1641/* Select Bits 1642 */ 1643static __inline qword si_selb(qword a, qword b, qword c) 1644{ 1645 return ((qword)(vec_sel((vec_uchar16)(a), (vec_uchar16)(b), (vec_uchar16)(c)))); 1646} 1647 1648 1649/* Shuffle Bytes 1650 */ 1651static __inline qword si_shufb(qword a, qword b, qword pattern) 1652{ 1653 vec_uchar16 pat; 1654 1655 pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), 1656 vec_sr((vec_uchar16)(pattern), vec_splat_u8(3)), 1657 vec_sra((vec_uchar16)(pattern), vec_splat_u8(7))); 1658 return ((qword)(vec_perm(vec_perm(a, b, pattern), 1659 ((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 1660 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}), 1661 pat))); 1662} 1663 1664 1665/* Shift Left 1666 */ 1667static __inline qword si_shlh(qword a, qword b) 1668{ 1669 vec_ushort8 mask; 1670 1671 mask = (vec_ushort8)vec_sra(vec_sl((vec_ushort8)(b), vec_splat_u16(11)), vec_splat_u16(15)); 1672 return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), (vec_ushort8)(b)), mask))); 1673} 1674 1675static __inline qword si_shl(qword a, qword b) 1676{ 1677 vec_uint4 mask; 1678 1679 mask = (vec_uint4)vec_sra(vec_sl((vec_uint4)(b), ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); 1680 return ((qword)(vec_andc(vec_sl((vec_uint4)(a), (vec_uint4)(b)), mask))); 1681} 1682 1683 1684static __inline qword si_shlhi(qword a, unsigned int b) 1685{ 1686 vec_ushort8 mask; 1687 vec_ushort8 bv; 1688 1689 bv = vec_splat((vec_ushort8)(si_from_int(b)), 1); 1690 mask = (vec_ushort8)vec_sra(vec_sl(bv, vec_splat_u16(11)), vec_splat_u16(15)); 1691 return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), bv), mask))); 1692} 1693 1694static __inline qword si_shli(qword a, unsigned int b) 1695{ 1696 vec_uint4 bv; 1697 vec_uint4 mask; 1698 1699 bv = vec_splat((vec_uint4)(si_from_uint(b)), 0); 1700 mask = (vec_uint4)vec_sra(vec_sl(bv, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); 1701 return ((qword)(vec_andc(vec_sl((vec_uint4)(a), bv), mask))); 1702} 1703 1704 1705/* Shift Left Quadword 1706 */ 1707static __inline qword si_shlqbii(qword a, unsigned int count) 1708{ 1709 vec_uchar16 x; 1710 1711 x = vec_splat((vec_uchar16)(si_from_uint(count)), 3); 1712 return ((qword)(vec_sll((vec_uchar16)(a), x))); 1713} 1714 1715static __inline qword si_shlqbi(qword a, qword count) 1716{ 1717 vec_uchar16 x; 1718 1719 x = vec_splat((vec_uchar16)(count), 3); 1720 return ((qword)(vec_sll((vec_uchar16)(a), x))); 1721} 1722 1723 1724/* Shift Left Quadword by Bytes 1725 */ 1726static __inline qword si_shlqbyi(qword a, unsigned int count) 1727{ 1728 union { 1729 vec_uchar16 v; 1730 int i[4]; 1731 } x; 1732 vec_uchar16 mask; 1733 1734 x.i[3] = count << 3; 1735 mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1); 1736 return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask))); 1737} 1738 1739static __inline qword si_shlqby(qword a, qword count) 1740{ 1741 union { 1742 vec_uchar16 v; 1743 unsigned int i[4]; 1744 } x; 1745 unsigned int cnt; 1746 vec_uchar16 mask; 1747 1748 x.v = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3)); 1749 cnt = x.i[0]; 1750 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1); 1751 return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask))); 1752} 1753 1754/* Shift Left Quadword by Bytes with Bit Count 1755 */ 1756static __inline qword si_shlqbybi(qword a, qword count) 1757{ 1758 union { 1759 vec_uchar16 v; 1760 int i[4]; 1761 } x; 1762 unsigned int cnt; 1763 vec_uchar16 mask; 1764 1765 x.v = vec_splat((vec_uchar16)(count), 3); 1766 cnt = x.i[0]; 1767 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1); 1768 return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask))); 1769} 1770 1771 1772/* Stop and Signal 1773 */ 1774#define si_stop(_type) SPU_STOP_ACTION 1775#define si_stopd(a, b, c) SPU_STOP_ACTION 1776 1777 1778/* Subtract 1779 */ 1780static __inline qword si_sfh(qword a, qword b) 1781{ 1782 return ((qword)(vec_sub((vec_ushort8)(b), (vec_ushort8)(a)))); 1783} 1784 1785static __inline qword si_sf(qword a, qword b) 1786{ 1787 return ((qword)(vec_sub((vec_uint4)(b), (vec_uint4)(a)))); 1788} 1789 1790static __inline qword si_fs(qword a, qword b) 1791{ 1792 return ((qword)(vec_sub((vec_float4)(a), (vec_float4)(b)))); 1793} 1794 1795static __inline qword si_dfs(qword a, qword b) 1796{ 1797 union { 1798 vec_double2 v; 1799 double d[2]; 1800 } aa, bb, dd; 1801 1802 aa.v = (vec_double2)(a); 1803 bb.v = (vec_double2)(b); 1804 dd.d[0] = aa.d[0] - bb.d[0]; 1805 dd.d[1] = aa.d[1] - bb.d[1]; 1806 return ((qword)(dd.v)); 1807} 1808 1809static __inline qword si_sfhi(qword a, short b) 1810{ 1811 return ((qword)(vec_sub(vec_splat((vec_short8)(si_from_short(b)), 1), 1812 (vec_short8)(a)))); 1813} 1814 1815static __inline qword si_sfi(qword a, int b) 1816{ 1817 return ((qword)(vec_sub(vec_splat((vec_int4)(si_from_int(b)), 0), 1818 (vec_int4)(a)))); 1819} 1820 1821/* Subtract word extended 1822 */ 1823#define si_sfx(_a, _b, _c) ((qword)(vec_add(vec_add((vec_uint4)(_b), \ 1824 vec_nor((vec_uint4)(_a), (vec_uint4)(_a))), \ 1825 vec_and((vec_uint4)(_c), vec_splat_u32(1))))) 1826 1827 1828/* Sum Bytes into Shorts 1829 */ 1830static __inline qword si_sumb(qword a, qword b) 1831{ 1832 vec_uint4 zero = (vec_uint4){0}; 1833 vec_ushort8 sum_a, sum_b; 1834 1835 sum_a = (vec_ushort8)vec_sum4s((vec_uchar16)(a), zero); 1836 sum_b = (vec_ushort8)vec_sum4s((vec_uchar16)(b), zero); 1837 1838 return ((qword)(vec_perm(sum_a, sum_b, ((vec_uchar16){18, 19, 2, 3, 22, 23, 6, 7, 1839 26, 27, 10, 11, 30, 31, 14, 15})))); 1840} 1841 1842/* Exclusive OR 1843 */ 1844static __inline qword si_xor(qword a, qword b) 1845{ 1846 return ((qword)(vec_xor((vec_uchar16)(a), (vec_uchar16)(b)))); 1847} 1848 1849static __inline qword si_xorbi(qword a, unsigned char b) 1850{ 1851 return ((qword)(vec_xor((vec_uchar16)(a), 1852 vec_splat((vec_uchar16)(si_from_uchar(b)), 3)))); 1853} 1854 1855static __inline qword si_xorhi(qword a, unsigned short b) 1856{ 1857 return ((qword)(vec_xor((vec_ushort8)(a), 1858 vec_splat((vec_ushort8)(si_from_ushort(b)), 1)))); 1859} 1860 1861static __inline qword si_xori(qword a, unsigned int b) 1862{ 1863 return ((qword)(vec_xor((vec_uint4)(a), 1864 vec_splat((vec_uint4)(si_from_uint(b)), 0)))); 1865} 1866 1867 1868/* Generate Controls for Sub-Quadword Insertion 1869 */ 1870static __inline qword si_cbd(qword a, int imm) 1871{ 1872 union { 1873 vec_uint4 v; 1874 unsigned char c[16]; 1875 } shmask; 1876 1877 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); 1878 shmask.c[(si_to_uint(a) + (unsigned int)(imm)) & 0xF] = 0x03; 1879 return ((qword)(shmask.v)); 1880} 1881 1882static __inline qword si_cdd(qword a, int imm) 1883{ 1884 union { 1885 vec_uint4 v; 1886 unsigned long long ll[2]; 1887 } shmask; 1888 1889 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); 1890 shmask.ll[((si_to_uint(a) + (unsigned int)(imm)) >> 3) & 0x1] = 0x0001020304050607ULL; 1891 return ((qword)(shmask.v)); 1892} 1893 1894static __inline qword si_chd(qword a, int imm) 1895{ 1896 union { 1897 vec_uint4 v; 1898 unsigned short s[8]; 1899 } shmask; 1900 1901 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); 1902 shmask.s[((si_to_uint(a) + (unsigned int)(imm)) >> 1) & 0x7] = 0x0203; 1903 return ((qword)(shmask.v)); 1904} 1905 1906static __inline qword si_cwd(qword a, int imm) 1907{ 1908 union { 1909 vec_uint4 v; 1910 unsigned int i[4]; 1911 } shmask; 1912 1913 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); 1914 shmask.i[((si_to_uint(a) + (unsigned int)(imm)) >> 2) & 0x3] = 0x00010203; 1915 return ((qword)(shmask.v)); 1916} 1917 1918static __inline qword si_cbx(qword a, qword b) 1919{ 1920 union { 1921 vec_uint4 v; 1922 unsigned char c[16]; 1923 } shmask; 1924 1925 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); 1926 shmask.c[si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) & 0xF] = 0x03; 1927 return ((qword)(shmask.v)); 1928} 1929 1930 1931static __inline qword si_cdx(qword a, qword b) 1932{ 1933 union { 1934 vec_uint4 v; 1935 unsigned long long ll[2]; 1936 } shmask; 1937 1938 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); 1939 shmask.ll[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 3) & 0x1] = 0x0001020304050607ULL; 1940 return ((qword)(shmask.v)); 1941} 1942 1943static __inline qword si_chx(qword a, qword b) 1944{ 1945 union { 1946 vec_uint4 v; 1947 unsigned short s[8]; 1948 } shmask; 1949 1950 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); 1951 shmask.s[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 1) & 0x7] = 0x0203; 1952 return ((qword)(shmask.v)); 1953} 1954 1955static __inline qword si_cwx(qword a, qword b) 1956{ 1957 union { 1958 vec_uint4 v; 1959 unsigned int i[4]; 1960 } shmask; 1961 1962 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); 1963 shmask.i[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 2) & 0x3] = 0x00010203; 1964 return ((qword)(shmask.v)); 1965} 1966 1967 1968/* Constant Formation 1969 */ 1970static __inline qword si_il(signed short imm) 1971{ 1972 return ((qword)(vec_splat((vec_int4)(si_from_int((signed int)(imm))), 0))); 1973} 1974 1975 1976static __inline qword si_ila(unsigned int imm) 1977{ 1978 return ((qword)(vec_splat((vec_uint4)(si_from_uint(imm)), 0))); 1979} 1980 1981static __inline qword si_ilh(signed short imm) 1982{ 1983 return ((qword)(vec_splat((vec_short8)(si_from_short(imm)), 1))); 1984} 1985 1986static __inline qword si_ilhu(signed short imm) 1987{ 1988 return ((qword)(vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm) << 16)), 0))); 1989} 1990 1991static __inline qword si_iohl(qword a, unsigned short imm) 1992{ 1993 return ((qword)(vec_or((vec_uint4)(a), vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm))), 0)))); 1994} 1995 1996/* No Operation 1997 */ 1998#define si_lnop() /* do nothing */ 1999#define si_nop() /* do nothing */ 2000 2001 2002/* Memory Load and Store 2003 */ 2004static __inline qword si_lqa(unsigned int imm) 2005{ 2006 return ((qword)(vec_ld(0, (vector unsigned char *)(imm)))); 2007} 2008 2009static __inline qword si_lqd(qword a, unsigned int imm) 2010{ 2011 return ((qword)(vec_ld(si_to_uint(a) & ~0xF, (vector unsigned char *)(imm)))); 2012} 2013 2014static __inline qword si_lqr(unsigned int imm) 2015{ 2016 return ((qword)(vec_ld(0, (vector unsigned char *)(imm)))); 2017} 2018 2019static __inline qword si_lqx(qword a, qword b) 2020{ 2021 return ((qword)(vec_ld(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))), (vector unsigned char *)(0)))); 2022} 2023 2024static __inline void si_stqa(qword a, unsigned int imm) 2025{ 2026 vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm)); 2027} 2028 2029static __inline void si_stqd(qword a, qword b, unsigned int imm) 2030{ 2031 vec_st((vec_uchar16)(a), si_to_uint(b) & ~0xF, (vector unsigned char *)(imm)); 2032} 2033 2034static __inline void si_stqr(qword a, unsigned int imm) 2035{ 2036 vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm)); 2037} 2038 2039static __inline void si_stqx(qword a, qword b, qword c) 2040{ 2041 vec_st((vec_uchar16)(a), 2042 si_to_uint((qword)(vec_add((vec_uint4)(b), (vec_uint4)(c)))), 2043 (vector unsigned char *)(0)); 2044} 2045 2046#endif /* !__SPU__ */ 2047#endif /* !_SI2VMX_H_ */ 2048 2049