mmintrin.h revision 132718
1/* Copyright (C) 2002, 2003 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with GCC; see the file COPYING. If not, write to 17 the Free Software Foundation, 59 Temple Place - Suite 330, 18 Boston, MA 02111-1307, USA. */ 19 20/* As a special exception, if you include this header file into source 21 files compiled by GCC, this header file does not by itself cause 22 the resulting executable to be covered by the GNU General Public 23 License. This exception does not however invalidate any other 24 reasons why the executable file might be covered by the GNU General 25 Public License. */ 26 27/* Implemented from the specification included in the Intel C++ Compiler 28 User Guide and Reference, version 8.0. */ 29 30#ifndef _MMINTRIN_H_INCLUDED 31#define _MMINTRIN_H_INCLUDED 32 33#ifndef __MMX__ 34# error "MMX instruction set not enabled" 35#else 36/* The data type intended for user use. */ 37typedef int __m64 __attribute__ ((__mode__ (__V2SI__))); 38 39/* Internal data types for implementing the intrinsics. */ 40typedef int __v2si __attribute__ ((__mode__ (__V2SI__))); 41typedef int __v4hi __attribute__ ((__mode__ (__V4HI__))); 42typedef int __v8qi __attribute__ ((__mode__ (__V8QI__))); 43 44/* Empty the multimedia state. */ 45static __inline void 46_mm_empty (void) 47{ 48 __builtin_ia32_emms (); 49} 50 51static __inline void 52_m_empty (void) 53{ 54 _mm_empty (); 55} 56 57/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ 58static __inline __m64 59_mm_cvtsi32_si64 (int __i) 60{ 61 long long __tmp = (unsigned int)__i; 62 return (__m64) __tmp; 63} 64 65static __inline __m64 66_m_from_int (int __i) 67{ 68 return _mm_cvtsi32_si64 (__i); 69} 70 71#ifdef __x86_64__ 72/* Convert I to a __m64 object. */ 73static __inline __m64 74_mm_cvtsi64x_si64 (long long __i) 75{ 76 return (__m64) __i; 77} 78 79/* Convert I to a __m64 object. */ 80static __inline __m64 81_mm_set_pi64x (long long __i) 82{ 83 return (__m64) __i; 84} 85#endif 86 87/* Convert the lower 32 bits of the __m64 object into an integer. */ 88static __inline int 89_mm_cvtsi64_si32 (__m64 __i) 90{ 91 long long __tmp = (long long)__i; 92 return __tmp; 93} 94 95static __inline int 96_m_to_int (__m64 __i) 97{ 98 return _mm_cvtsi64_si32 (__i); 99} 100 101#ifdef __x86_64__ 102/* Convert the lower 32 bits of the __m64 object into an integer. */ 103static __inline long long 104_mm_cvtsi64_si64x (__m64 __i) 105{ 106 return (long long)__i; 107} 108#endif 109 110/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 111 the result, and the four 16-bit values from M2 into the upper four 8-bit 112 values of the result, all with signed saturation. */ 113static __inline __m64 114_mm_packs_pi16 (__m64 __m1, __m64 __m2) 115{ 116 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); 117} 118 119static __inline __m64 120_m_packsswb (__m64 __m1, __m64 __m2) 121{ 122 return _mm_packs_pi16 (__m1, __m2); 123} 124 125/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of 126 the result, and the two 32-bit values from M2 into the upper two 16-bit 127 values of the result, all with signed saturation. */ 128static __inline __m64 129_mm_packs_pi32 (__m64 __m1, __m64 __m2) 130{ 131 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); 132} 133 134static __inline __m64 135_m_packssdw (__m64 __m1, __m64 __m2) 136{ 137 return _mm_packs_pi32 (__m1, __m2); 138} 139 140/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 141 the result, and the four 16-bit values from M2 into the upper four 8-bit 142 values of the result, all with unsigned saturation. */ 143static __inline __m64 144_mm_packs_pu16 (__m64 __m1, __m64 __m2) 145{ 146 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); 147} 148 149static __inline __m64 150_m_packuswb (__m64 __m1, __m64 __m2) 151{ 152 return _mm_packs_pu16 (__m1, __m2); 153} 154 155/* Interleave the four 8-bit values from the high half of M1 with the four 156 8-bit values from the high half of M2. */ 157static __inline __m64 158_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) 159{ 160 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); 161} 162 163static __inline __m64 164_m_punpckhbw (__m64 __m1, __m64 __m2) 165{ 166 return _mm_unpackhi_pi8 (__m1, __m2); 167} 168 169/* Interleave the two 16-bit values from the high half of M1 with the two 170 16-bit values from the high half of M2. */ 171static __inline __m64 172_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) 173{ 174 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); 175} 176 177static __inline __m64 178_m_punpckhwd (__m64 __m1, __m64 __m2) 179{ 180 return _mm_unpackhi_pi16 (__m1, __m2); 181} 182 183/* Interleave the 32-bit value from the high half of M1 with the 32-bit 184 value from the high half of M2. */ 185static __inline __m64 186_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) 187{ 188 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); 189} 190 191static __inline __m64 192_m_punpckhdq (__m64 __m1, __m64 __m2) 193{ 194 return _mm_unpackhi_pi32 (__m1, __m2); 195} 196 197/* Interleave the four 8-bit values from the low half of M1 with the four 198 8-bit values from the low half of M2. */ 199static __inline __m64 200_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) 201{ 202 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); 203} 204 205static __inline __m64 206_m_punpcklbw (__m64 __m1, __m64 __m2) 207{ 208 return _mm_unpacklo_pi8 (__m1, __m2); 209} 210 211/* Interleave the two 16-bit values from the low half of M1 with the two 212 16-bit values from the low half of M2. */ 213static __inline __m64 214_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) 215{ 216 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); 217} 218 219static __inline __m64 220_m_punpcklwd (__m64 __m1, __m64 __m2) 221{ 222 return _mm_unpacklo_pi16 (__m1, __m2); 223} 224 225/* Interleave the 32-bit value from the low half of M1 with the 32-bit 226 value from the low half of M2. */ 227static __inline __m64 228_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) 229{ 230 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); 231} 232 233static __inline __m64 234_m_punpckldq (__m64 __m1, __m64 __m2) 235{ 236 return _mm_unpacklo_pi32 (__m1, __m2); 237} 238 239/* Add the 8-bit values in M1 to the 8-bit values in M2. */ 240static __inline __m64 241_mm_add_pi8 (__m64 __m1, __m64 __m2) 242{ 243 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); 244} 245 246static __inline __m64 247_m_paddb (__m64 __m1, __m64 __m2) 248{ 249 return _mm_add_pi8 (__m1, __m2); 250} 251 252/* Add the 16-bit values in M1 to the 16-bit values in M2. */ 253static __inline __m64 254_mm_add_pi16 (__m64 __m1, __m64 __m2) 255{ 256 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); 257} 258 259static __inline __m64 260_m_paddw (__m64 __m1, __m64 __m2) 261{ 262 return _mm_add_pi16 (__m1, __m2); 263} 264 265/* Add the 32-bit values in M1 to the 32-bit values in M2. */ 266static __inline __m64 267_mm_add_pi32 (__m64 __m1, __m64 __m2) 268{ 269 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); 270} 271 272static __inline __m64 273_m_paddd (__m64 __m1, __m64 __m2) 274{ 275 return _mm_add_pi32 (__m1, __m2); 276} 277 278/* Add the 64-bit values in M1 to the 64-bit values in M2. */ 279static __inline __m64 280_mm_add_si64 (__m64 __m1, __m64 __m2) 281{ 282 return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2); 283} 284 285/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed 286 saturated arithmetic. */ 287static __inline __m64 288_mm_adds_pi8 (__m64 __m1, __m64 __m2) 289{ 290 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); 291} 292 293static __inline __m64 294_m_paddsb (__m64 __m1, __m64 __m2) 295{ 296 return _mm_adds_pi8 (__m1, __m2); 297} 298 299/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed 300 saturated arithmetic. */ 301static __inline __m64 302_mm_adds_pi16 (__m64 __m1, __m64 __m2) 303{ 304 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); 305} 306 307static __inline __m64 308_m_paddsw (__m64 __m1, __m64 __m2) 309{ 310 return _mm_adds_pi16 (__m1, __m2); 311} 312 313/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned 314 saturated arithmetic. */ 315static __inline __m64 316_mm_adds_pu8 (__m64 __m1, __m64 __m2) 317{ 318 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); 319} 320 321static __inline __m64 322_m_paddusb (__m64 __m1, __m64 __m2) 323{ 324 return _mm_adds_pu8 (__m1, __m2); 325} 326 327/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned 328 saturated arithmetic. */ 329static __inline __m64 330_mm_adds_pu16 (__m64 __m1, __m64 __m2) 331{ 332 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); 333} 334 335static __inline __m64 336_m_paddusw (__m64 __m1, __m64 __m2) 337{ 338 return _mm_adds_pu16 (__m1, __m2); 339} 340 341/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ 342static __inline __m64 343_mm_sub_pi8 (__m64 __m1, __m64 __m2) 344{ 345 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); 346} 347 348static __inline __m64 349_m_psubb (__m64 __m1, __m64 __m2) 350{ 351 return _mm_sub_pi8 (__m1, __m2); 352} 353 354/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ 355static __inline __m64 356_mm_sub_pi16 (__m64 __m1, __m64 __m2) 357{ 358 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); 359} 360 361static __inline __m64 362_m_psubw (__m64 __m1, __m64 __m2) 363{ 364 return _mm_sub_pi16 (__m1, __m2); 365} 366 367/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ 368static __inline __m64 369_mm_sub_pi32 (__m64 __m1, __m64 __m2) 370{ 371 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); 372} 373 374static __inline __m64 375_m_psubd (__m64 __m1, __m64 __m2) 376{ 377 return _mm_sub_pi32 (__m1, __m2); 378} 379 380/* Add the 64-bit values in M1 to the 64-bit values in M2. */ 381static __inline __m64 382_mm_sub_si64 (__m64 __m1, __m64 __m2) 383{ 384 return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2); 385} 386 387/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed 388 saturating arithmetic. */ 389static __inline __m64 390_mm_subs_pi8 (__m64 __m1, __m64 __m2) 391{ 392 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); 393} 394 395static __inline __m64 396_m_psubsb (__m64 __m1, __m64 __m2) 397{ 398 return _mm_subs_pi8 (__m1, __m2); 399} 400 401/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 402 signed saturating arithmetic. */ 403static __inline __m64 404_mm_subs_pi16 (__m64 __m1, __m64 __m2) 405{ 406 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); 407} 408 409static __inline __m64 410_m_psubsw (__m64 __m1, __m64 __m2) 411{ 412 return _mm_subs_pi16 (__m1, __m2); 413} 414 415/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using 416 unsigned saturating arithmetic. */ 417static __inline __m64 418_mm_subs_pu8 (__m64 __m1, __m64 __m2) 419{ 420 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); 421} 422 423static __inline __m64 424_m_psubusb (__m64 __m1, __m64 __m2) 425{ 426 return _mm_subs_pu8 (__m1, __m2); 427} 428 429/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 430 unsigned saturating arithmetic. */ 431static __inline __m64 432_mm_subs_pu16 (__m64 __m1, __m64 __m2) 433{ 434 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); 435} 436 437static __inline __m64 438_m_psubusw (__m64 __m1, __m64 __m2) 439{ 440 return _mm_subs_pu16 (__m1, __m2); 441} 442 443/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing 444 four 32-bit intermediate results, which are then summed by pairs to 445 produce two 32-bit results. */ 446static __inline __m64 447_mm_madd_pi16 (__m64 __m1, __m64 __m2) 448{ 449 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); 450} 451 452static __inline __m64 453_m_pmaddwd (__m64 __m1, __m64 __m2) 454{ 455 return _mm_madd_pi16 (__m1, __m2); 456} 457 458/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in 459 M2 and produce the high 16 bits of the 32-bit results. */ 460static __inline __m64 461_mm_mulhi_pi16 (__m64 __m1, __m64 __m2) 462{ 463 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); 464} 465 466static __inline __m64 467_m_pmulhw (__m64 __m1, __m64 __m2) 468{ 469 return _mm_mulhi_pi16 (__m1, __m2); 470} 471 472/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce 473 the low 16 bits of the results. */ 474static __inline __m64 475_mm_mullo_pi16 (__m64 __m1, __m64 __m2) 476{ 477 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); 478} 479 480static __inline __m64 481_m_pmullw (__m64 __m1, __m64 __m2) 482{ 483 return _mm_mullo_pi16 (__m1, __m2); 484} 485 486/* Shift four 16-bit values in M left by COUNT. */ 487static __inline __m64 488_mm_sll_pi16 (__m64 __m, __m64 __count) 489{ 490 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count); 491} 492 493static __inline __m64 494_m_psllw (__m64 __m, __m64 __count) 495{ 496 return _mm_sll_pi16 (__m, __count); 497} 498 499static __inline __m64 500_mm_slli_pi16 (__m64 __m, int __count) 501{ 502 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); 503} 504 505static __inline __m64 506_m_psllwi (__m64 __m, int __count) 507{ 508 return _mm_slli_pi16 (__m, __count); 509} 510 511/* Shift two 32-bit values in M left by COUNT. */ 512static __inline __m64 513_mm_sll_pi32 (__m64 __m, __m64 __count) 514{ 515 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count); 516} 517 518static __inline __m64 519_m_pslld (__m64 __m, __m64 __count) 520{ 521 return _mm_sll_pi32 (__m, __count); 522} 523 524static __inline __m64 525_mm_slli_pi32 (__m64 __m, int __count) 526{ 527 return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); 528} 529 530static __inline __m64 531_m_pslldi (__m64 __m, int __count) 532{ 533 return _mm_slli_pi32 (__m, __count); 534} 535 536/* Shift the 64-bit value in M left by COUNT. */ 537static __inline __m64 538_mm_sll_si64 (__m64 __m, __m64 __count) 539{ 540 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 541} 542 543static __inline __m64 544_m_psllq (__m64 __m, __m64 __count) 545{ 546 return _mm_sll_si64 (__m, __count); 547} 548 549static __inline __m64 550_mm_slli_si64 (__m64 __m, int __count) 551{ 552 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 553} 554 555static __inline __m64 556_m_psllqi (__m64 __m, int __count) 557{ 558 return _mm_slli_si64 (__m, __count); 559} 560 561/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ 562static __inline __m64 563_mm_sra_pi16 (__m64 __m, __m64 __count) 564{ 565 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count); 566} 567 568static __inline __m64 569_m_psraw (__m64 __m, __m64 __count) 570{ 571 return _mm_sra_pi16 (__m, __count); 572} 573 574static __inline __m64 575_mm_srai_pi16 (__m64 __m, int __count) 576{ 577 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); 578} 579 580static __inline __m64 581_m_psrawi (__m64 __m, int __count) 582{ 583 return _mm_srai_pi16 (__m, __count); 584} 585 586/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ 587static __inline __m64 588_mm_sra_pi32 (__m64 __m, __m64 __count) 589{ 590 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count); 591} 592 593static __inline __m64 594_m_psrad (__m64 __m, __m64 __count) 595{ 596 return _mm_sra_pi32 (__m, __count); 597} 598 599static __inline __m64 600_mm_srai_pi32 (__m64 __m, int __count) 601{ 602 return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); 603} 604 605static __inline __m64 606_m_psradi (__m64 __m, int __count) 607{ 608 return _mm_srai_pi32 (__m, __count); 609} 610 611/* Shift four 16-bit values in M right by COUNT; shift in zeros. */ 612static __inline __m64 613_mm_srl_pi16 (__m64 __m, __m64 __count) 614{ 615 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count); 616} 617 618static __inline __m64 619_m_psrlw (__m64 __m, __m64 __count) 620{ 621 return _mm_srl_pi16 (__m, __count); 622} 623 624static __inline __m64 625_mm_srli_pi16 (__m64 __m, int __count) 626{ 627 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); 628} 629 630static __inline __m64 631_m_psrlwi (__m64 __m, int __count) 632{ 633 return _mm_srli_pi16 (__m, __count); 634} 635 636/* Shift two 32-bit values in M right by COUNT; shift in zeros. */ 637static __inline __m64 638_mm_srl_pi32 (__m64 __m, __m64 __count) 639{ 640 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count); 641} 642 643static __inline __m64 644_m_psrld (__m64 __m, __m64 __count) 645{ 646 return _mm_srl_pi32 (__m, __count); 647} 648 649static __inline __m64 650_mm_srli_pi32 (__m64 __m, int __count) 651{ 652 return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); 653} 654 655static __inline __m64 656_m_psrldi (__m64 __m, int __count) 657{ 658 return _mm_srli_pi32 (__m, __count); 659} 660 661/* Shift the 64-bit value in M left by COUNT; shift in zeros. */ 662static __inline __m64 663_mm_srl_si64 (__m64 __m, __m64 __count) 664{ 665 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 666} 667 668static __inline __m64 669_m_psrlq (__m64 __m, __m64 __count) 670{ 671 return _mm_srl_si64 (__m, __count); 672} 673 674static __inline __m64 675_mm_srli_si64 (__m64 __m, int __count) 676{ 677 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 678} 679 680static __inline __m64 681_m_psrlqi (__m64 __m, int __count) 682{ 683 return _mm_srli_si64 (__m, __count); 684} 685 686/* Bit-wise AND the 64-bit values in M1 and M2. */ 687static __inline __m64 688_mm_and_si64 (__m64 __m1, __m64 __m2) 689{ 690 return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2); 691} 692 693static __inline __m64 694_m_pand (__m64 __m1, __m64 __m2) 695{ 696 return _mm_and_si64 (__m1, __m2); 697} 698 699/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 700 64-bit value in M2. */ 701static __inline __m64 702_mm_andnot_si64 (__m64 __m1, __m64 __m2) 703{ 704 return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2); 705} 706 707static __inline __m64 708_m_pandn (__m64 __m1, __m64 __m2) 709{ 710 return _mm_andnot_si64 (__m1, __m2); 711} 712 713/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ 714static __inline __m64 715_mm_or_si64 (__m64 __m1, __m64 __m2) 716{ 717 return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2); 718} 719 720static __inline __m64 721_m_por (__m64 __m1, __m64 __m2) 722{ 723 return _mm_or_si64 (__m1, __m2); 724} 725 726/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ 727static __inline __m64 728_mm_xor_si64 (__m64 __m1, __m64 __m2) 729{ 730 return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2); 731} 732 733static __inline __m64 734_m_pxor (__m64 __m1, __m64 __m2) 735{ 736 return _mm_xor_si64 (__m1, __m2); 737} 738 739/* Compare eight 8-bit values. The result of the comparison is 0xFF if the 740 test is true and zero if false. */ 741static __inline __m64 742_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) 743{ 744 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2); 745} 746 747static __inline __m64 748_m_pcmpeqb (__m64 __m1, __m64 __m2) 749{ 750 return _mm_cmpeq_pi8 (__m1, __m2); 751} 752 753static __inline __m64 754_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) 755{ 756 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); 757} 758 759static __inline __m64 760_m_pcmpgtb (__m64 __m1, __m64 __m2) 761{ 762 return _mm_cmpgt_pi8 (__m1, __m2); 763} 764 765/* Compare four 16-bit values. The result of the comparison is 0xFFFF if 766 the test is true and zero if false. */ 767static __inline __m64 768_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) 769{ 770 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2); 771} 772 773static __inline __m64 774_m_pcmpeqw (__m64 __m1, __m64 __m2) 775{ 776 return _mm_cmpeq_pi16 (__m1, __m2); 777} 778 779static __inline __m64 780_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) 781{ 782 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); 783} 784 785static __inline __m64 786_m_pcmpgtw (__m64 __m1, __m64 __m2) 787{ 788 return _mm_cmpgt_pi16 (__m1, __m2); 789} 790 791/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if 792 the test is true and zero if false. */ 793static __inline __m64 794_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) 795{ 796 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2); 797} 798 799static __inline __m64 800_m_pcmpeqd (__m64 __m1, __m64 __m2) 801{ 802 return _mm_cmpeq_pi32 (__m1, __m2); 803} 804 805static __inline __m64 806_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) 807{ 808 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); 809} 810 811static __inline __m64 812_m_pcmpgtd (__m64 __m1, __m64 __m2) 813{ 814 return _mm_cmpgt_pi32 (__m1, __m2); 815} 816 817/* Creates a 64-bit zero. */ 818static __inline __m64 819_mm_setzero_si64 (void) 820{ 821 return (__m64)__builtin_ia32_mmx_zero (); 822} 823 824/* Creates a vector of two 32-bit values; I0 is least significant. */ 825static __inline __m64 826_mm_set_pi32 (int __i1, int __i0) 827{ 828 union { 829 __m64 __q; 830 struct { 831 unsigned int __i0; 832 unsigned int __i1; 833 } __s; 834 } __u; 835 836 __u.__s.__i0 = __i0; 837 __u.__s.__i1 = __i1; 838 839 return __u.__q; 840} 841 842/* Creates a vector of four 16-bit values; W0 is least significant. */ 843static __inline __m64 844_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) 845{ 846 unsigned int __i1 = (unsigned short)__w3 << 16 | (unsigned short)__w2; 847 unsigned int __i0 = (unsigned short)__w1 << 16 | (unsigned short)__w0; 848 return _mm_set_pi32 (__i1, __i0); 849 850} 851 852/* Creates a vector of eight 8-bit values; B0 is least significant. */ 853static __inline __m64 854_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, 855 char __b3, char __b2, char __b1, char __b0) 856{ 857 unsigned int __i1, __i0; 858 859 __i1 = (unsigned char)__b7; 860 __i1 = __i1 << 8 | (unsigned char)__b6; 861 __i1 = __i1 << 8 | (unsigned char)__b5; 862 __i1 = __i1 << 8 | (unsigned char)__b4; 863 864 __i0 = (unsigned char)__b3; 865 __i0 = __i0 << 8 | (unsigned char)__b2; 866 __i0 = __i0 << 8 | (unsigned char)__b1; 867 __i0 = __i0 << 8 | (unsigned char)__b0; 868 869 return _mm_set_pi32 (__i1, __i0); 870} 871 872/* Similar, but with the arguments in reverse order. */ 873static __inline __m64 874_mm_setr_pi32 (int __i0, int __i1) 875{ 876 return _mm_set_pi32 (__i1, __i0); 877} 878 879static __inline __m64 880_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3) 881{ 882 return _mm_set_pi16 (__w3, __w2, __w1, __w0); 883} 884 885static __inline __m64 886_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3, 887 char __b4, char __b5, char __b6, char __b7) 888{ 889 return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 890} 891 892/* Creates a vector of two 32-bit values, both elements containing I. */ 893static __inline __m64 894_mm_set1_pi32 (int __i) 895{ 896 return _mm_set_pi32 (__i, __i); 897} 898 899/* Creates a vector of four 16-bit values, all elements containing W. */ 900static __inline __m64 901_mm_set1_pi16 (short __w) 902{ 903 unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w; 904 return _mm_set1_pi32 (__i); 905} 906 907/* Creates a vector of eight 8-bit values, all elements containing B. */ 908static __inline __m64 909_mm_set1_pi8 (char __b) 910{ 911 unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b; 912 unsigned int __i = __w << 16 | __w; 913 return _mm_set1_pi32 (__i); 914} 915 916#endif /* __MMX__ */ 917#endif /* _MMINTRIN_H_INCLUDED */ 918