mmintrin.h revision 169689
1/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 2 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GCC is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING. If not, write to 18 the Free Software Foundation, 51 Franklin Street, Fifth Floor, 19 Boston, MA 02110-1301, USA. */ 20 21/* As a special exception, if you include this header file into source 22 files compiled by GCC, this header file does not by itself cause 23 the resulting executable to be covered by the GNU General Public 24 License. This exception does not however invalidate any other 25 reasons why the executable file might be covered by the GNU General 26 Public License. */ 27 28/* Implemented from the specification included in the Intel C++ Compiler 29 User Guide and Reference, version 9.0. */ 30 31#ifndef _MMINTRIN_H_INCLUDED 32#define _MMINTRIN_H_INCLUDED 33 34#ifndef __MMX__ 35# error "MMX instruction set not enabled" 36#else 37/* The Intel API is flexible enough that we must allow aliasing with other 38 vector types, and their scalar components. */ 39typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); 40 41/* Internal data types for implementing the intrinsics. */ 42typedef int __v2si __attribute__ ((__vector_size__ (8))); 43typedef short __v4hi __attribute__ ((__vector_size__ (8))); 44typedef char __v8qi __attribute__ ((__vector_size__ (8))); 45 46/* Empty the multimedia state. */ 47static __inline void __attribute__((__always_inline__)) 48_mm_empty (void) 49{ 50 __builtin_ia32_emms (); 51} 52 53static __inline void __attribute__((__always_inline__)) 54_m_empty (void) 55{ 56 _mm_empty (); 57} 58 59/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ 60static __inline __m64 __attribute__((__always_inline__)) 61_mm_cvtsi32_si64 (int __i) 62{ 63 return (__m64) __builtin_ia32_vec_init_v2si (__i, 0); 64} 65 66static __inline __m64 __attribute__((__always_inline__)) 67_m_from_int (int __i) 68{ 69 return _mm_cvtsi32_si64 (__i); 70} 71 72#ifdef __x86_64__ 73/* Convert I to a __m64 object. */ 74 75/* Intel intrinsic. */ 76static __inline __m64 __attribute__((__always_inline__)) 77_m_from_int64 (long long __i) 78{ 79 return (__m64) __i; 80} 81 82static __inline __m64 __attribute__((__always_inline__)) 83_mm_cvtsi64_m64 (long long __i) 84{ 85 return (__m64) __i; 86} 87 88/* Microsoft intrinsic. */ 89static __inline __m64 __attribute__((__always_inline__)) 90_mm_cvtsi64x_si64 (long long __i) 91{ 92 return (__m64) __i; 93} 94 95static __inline __m64 __attribute__((__always_inline__)) 96_mm_set_pi64x (long long __i) 97{ 98 return (__m64) __i; 99} 100#endif 101 102/* Convert the lower 32 bits of the __m64 object into an integer. */ 103static __inline int __attribute__((__always_inline__)) 104_mm_cvtsi64_si32 (__m64 __i) 105{ 106 return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0); 107} 108 109static __inline int __attribute__((__always_inline__)) 110_m_to_int (__m64 __i) 111{ 112 return _mm_cvtsi64_si32 (__i); 113} 114 115#ifdef __x86_64__ 116/* Convert the __m64 object to a 64bit integer. */ 117 118/* Intel intrinsic. */ 119static __inline long long __attribute__((__always_inline__)) 120_m_to_int64 (__m64 __i) 121{ 122 return (long long)__i; 123} 124 125static __inline long long __attribute__((__always_inline__)) 126_mm_cvtm64_si64 (__m64 __i) 127{ 128 return (long long)__i; 129} 130 131/* Microsoft intrinsic. */ 132static __inline long long __attribute__((__always_inline__)) 133_mm_cvtsi64_si64x (__m64 __i) 134{ 135 return (long long)__i; 136} 137#endif 138 139/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 140 the result, and the four 16-bit values from M2 into the upper four 8-bit 141 values of the result, all with signed saturation. */ 142static __inline __m64 __attribute__((__always_inline__)) 143_mm_packs_pi16 (__m64 __m1, __m64 __m2) 144{ 145 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); 146} 147 148static __inline __m64 __attribute__((__always_inline__)) 149_m_packsswb (__m64 __m1, __m64 __m2) 150{ 151 return _mm_packs_pi16 (__m1, __m2); 152} 153 154/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of 155 the result, and the two 32-bit values from M2 into the upper two 16-bit 156 values of the result, all with signed saturation. */ 157static __inline __m64 __attribute__((__always_inline__)) 158_mm_packs_pi32 (__m64 __m1, __m64 __m2) 159{ 160 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); 161} 162 163static __inline __m64 __attribute__((__always_inline__)) 164_m_packssdw (__m64 __m1, __m64 __m2) 165{ 166 return _mm_packs_pi32 (__m1, __m2); 167} 168 169/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 170 the result, and the four 16-bit values from M2 into the upper four 8-bit 171 values of the result, all with unsigned saturation. */ 172static __inline __m64 __attribute__((__always_inline__)) 173_mm_packs_pu16 (__m64 __m1, __m64 __m2) 174{ 175 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); 176} 177 178static __inline __m64 __attribute__((__always_inline__)) 179_m_packuswb (__m64 __m1, __m64 __m2) 180{ 181 return _mm_packs_pu16 (__m1, __m2); 182} 183 184/* Interleave the four 8-bit values from the high half of M1 with the four 185 8-bit values from the high half of M2. */ 186static __inline __m64 __attribute__((__always_inline__)) 187_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) 188{ 189 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); 190} 191 192static __inline __m64 __attribute__((__always_inline__)) 193_m_punpckhbw (__m64 __m1, __m64 __m2) 194{ 195 return _mm_unpackhi_pi8 (__m1, __m2); 196} 197 198/* Interleave the two 16-bit values from the high half of M1 with the two 199 16-bit values from the high half of M2. */ 200static __inline __m64 __attribute__((__always_inline__)) 201_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) 202{ 203 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); 204} 205 206static __inline __m64 __attribute__((__always_inline__)) 207_m_punpckhwd (__m64 __m1, __m64 __m2) 208{ 209 return _mm_unpackhi_pi16 (__m1, __m2); 210} 211 212/* Interleave the 32-bit value from the high half of M1 with the 32-bit 213 value from the high half of M2. */ 214static __inline __m64 __attribute__((__always_inline__)) 215_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) 216{ 217 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); 218} 219 220static __inline __m64 __attribute__((__always_inline__)) 221_m_punpckhdq (__m64 __m1, __m64 __m2) 222{ 223 return _mm_unpackhi_pi32 (__m1, __m2); 224} 225 226/* Interleave the four 8-bit values from the low half of M1 with the four 227 8-bit values from the low half of M2. */ 228static __inline __m64 __attribute__((__always_inline__)) 229_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) 230{ 231 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); 232} 233 234static __inline __m64 __attribute__((__always_inline__)) 235_m_punpcklbw (__m64 __m1, __m64 __m2) 236{ 237 return _mm_unpacklo_pi8 (__m1, __m2); 238} 239 240/* Interleave the two 16-bit values from the low half of M1 with the two 241 16-bit values from the low half of M2. */ 242static __inline __m64 __attribute__((__always_inline__)) 243_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) 244{ 245 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); 246} 247 248static __inline __m64 __attribute__((__always_inline__)) 249_m_punpcklwd (__m64 __m1, __m64 __m2) 250{ 251 return _mm_unpacklo_pi16 (__m1, __m2); 252} 253 254/* Interleave the 32-bit value from the low half of M1 with the 32-bit 255 value from the low half of M2. */ 256static __inline __m64 __attribute__((__always_inline__)) 257_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) 258{ 259 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); 260} 261 262static __inline __m64 __attribute__((__always_inline__)) 263_m_punpckldq (__m64 __m1, __m64 __m2) 264{ 265 return _mm_unpacklo_pi32 (__m1, __m2); 266} 267 268/* Add the 8-bit values in M1 to the 8-bit values in M2. */ 269static __inline __m64 __attribute__((__always_inline__)) 270_mm_add_pi8 (__m64 __m1, __m64 __m2) 271{ 272 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); 273} 274 275static __inline __m64 __attribute__((__always_inline__)) 276_m_paddb (__m64 __m1, __m64 __m2) 277{ 278 return _mm_add_pi8 (__m1, __m2); 279} 280 281/* Add the 16-bit values in M1 to the 16-bit values in M2. */ 282static __inline __m64 __attribute__((__always_inline__)) 283_mm_add_pi16 (__m64 __m1, __m64 __m2) 284{ 285 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); 286} 287 288static __inline __m64 __attribute__((__always_inline__)) 289_m_paddw (__m64 __m1, __m64 __m2) 290{ 291 return _mm_add_pi16 (__m1, __m2); 292} 293 294/* Add the 32-bit values in M1 to the 32-bit values in M2. */ 295static __inline __m64 __attribute__((__always_inline__)) 296_mm_add_pi32 (__m64 __m1, __m64 __m2) 297{ 298 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); 299} 300 301static __inline __m64 __attribute__((__always_inline__)) 302_m_paddd (__m64 __m1, __m64 __m2) 303{ 304 return _mm_add_pi32 (__m1, __m2); 305} 306 307/* Add the 64-bit values in M1 to the 64-bit values in M2. */ 308#ifdef __SSE2__ 309static __inline __m64 __attribute__((__always_inline__)) 310_mm_add_si64 (__m64 __m1, __m64 __m2) 311{ 312 return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2); 313} 314#endif 315 316/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed 317 saturated arithmetic. */ 318static __inline __m64 __attribute__((__always_inline__)) 319_mm_adds_pi8 (__m64 __m1, __m64 __m2) 320{ 321 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); 322} 323 324static __inline __m64 __attribute__((__always_inline__)) 325_m_paddsb (__m64 __m1, __m64 __m2) 326{ 327 return _mm_adds_pi8 (__m1, __m2); 328} 329 330/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed 331 saturated arithmetic. */ 332static __inline __m64 __attribute__((__always_inline__)) 333_mm_adds_pi16 (__m64 __m1, __m64 __m2) 334{ 335 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); 336} 337 338static __inline __m64 __attribute__((__always_inline__)) 339_m_paddsw (__m64 __m1, __m64 __m2) 340{ 341 return _mm_adds_pi16 (__m1, __m2); 342} 343 344/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned 345 saturated arithmetic. */ 346static __inline __m64 __attribute__((__always_inline__)) 347_mm_adds_pu8 (__m64 __m1, __m64 __m2) 348{ 349 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); 350} 351 352static __inline __m64 __attribute__((__always_inline__)) 353_m_paddusb (__m64 __m1, __m64 __m2) 354{ 355 return _mm_adds_pu8 (__m1, __m2); 356} 357 358/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned 359 saturated arithmetic. */ 360static __inline __m64 __attribute__((__always_inline__)) 361_mm_adds_pu16 (__m64 __m1, __m64 __m2) 362{ 363 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); 364} 365 366static __inline __m64 __attribute__((__always_inline__)) 367_m_paddusw (__m64 __m1, __m64 __m2) 368{ 369 return _mm_adds_pu16 (__m1, __m2); 370} 371 372/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ 373static __inline __m64 __attribute__((__always_inline__)) 374_mm_sub_pi8 (__m64 __m1, __m64 __m2) 375{ 376 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); 377} 378 379static __inline __m64 __attribute__((__always_inline__)) 380_m_psubb (__m64 __m1, __m64 __m2) 381{ 382 return _mm_sub_pi8 (__m1, __m2); 383} 384 385/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ 386static __inline __m64 __attribute__((__always_inline__)) 387_mm_sub_pi16 (__m64 __m1, __m64 __m2) 388{ 389 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); 390} 391 392static __inline __m64 __attribute__((__always_inline__)) 393_m_psubw (__m64 __m1, __m64 __m2) 394{ 395 return _mm_sub_pi16 (__m1, __m2); 396} 397 398/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ 399static __inline __m64 __attribute__((__always_inline__)) 400_mm_sub_pi32 (__m64 __m1, __m64 __m2) 401{ 402 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); 403} 404 405static __inline __m64 __attribute__((__always_inline__)) 406_m_psubd (__m64 __m1, __m64 __m2) 407{ 408 return _mm_sub_pi32 (__m1, __m2); 409} 410 411/* Add the 64-bit values in M1 to the 64-bit values in M2. */ 412#ifdef __SSE2__ 413static __inline __m64 __attribute__((__always_inline__)) 414_mm_sub_si64 (__m64 __m1, __m64 __m2) 415{ 416 return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2); 417} 418#endif 419 420/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed 421 saturating arithmetic. */ 422static __inline __m64 __attribute__((__always_inline__)) 423_mm_subs_pi8 (__m64 __m1, __m64 __m2) 424{ 425 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); 426} 427 428static __inline __m64 __attribute__((__always_inline__)) 429_m_psubsb (__m64 __m1, __m64 __m2) 430{ 431 return _mm_subs_pi8 (__m1, __m2); 432} 433 434/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 435 signed saturating arithmetic. */ 436static __inline __m64 __attribute__((__always_inline__)) 437_mm_subs_pi16 (__m64 __m1, __m64 __m2) 438{ 439 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); 440} 441 442static __inline __m64 __attribute__((__always_inline__)) 443_m_psubsw (__m64 __m1, __m64 __m2) 444{ 445 return _mm_subs_pi16 (__m1, __m2); 446} 447 448/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using 449 unsigned saturating arithmetic. */ 450static __inline __m64 __attribute__((__always_inline__)) 451_mm_subs_pu8 (__m64 __m1, __m64 __m2) 452{ 453 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); 454} 455 456static __inline __m64 __attribute__((__always_inline__)) 457_m_psubusb (__m64 __m1, __m64 __m2) 458{ 459 return _mm_subs_pu8 (__m1, __m2); 460} 461 462/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 463 unsigned saturating arithmetic. */ 464static __inline __m64 __attribute__((__always_inline__)) 465_mm_subs_pu16 (__m64 __m1, __m64 __m2) 466{ 467 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); 468} 469 470static __inline __m64 __attribute__((__always_inline__)) 471_m_psubusw (__m64 __m1, __m64 __m2) 472{ 473 return _mm_subs_pu16 (__m1, __m2); 474} 475 476/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing 477 four 32-bit intermediate results, which are then summed by pairs to 478 produce two 32-bit results. */ 479static __inline __m64 __attribute__((__always_inline__)) 480_mm_madd_pi16 (__m64 __m1, __m64 __m2) 481{ 482 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); 483} 484 485static __inline __m64 __attribute__((__always_inline__)) 486_m_pmaddwd (__m64 __m1, __m64 __m2) 487{ 488 return _mm_madd_pi16 (__m1, __m2); 489} 490 491/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in 492 M2 and produce the high 16 bits of the 32-bit results. */ 493static __inline __m64 __attribute__((__always_inline__)) 494_mm_mulhi_pi16 (__m64 __m1, __m64 __m2) 495{ 496 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); 497} 498 499static __inline __m64 __attribute__((__always_inline__)) 500_m_pmulhw (__m64 __m1, __m64 __m2) 501{ 502 return _mm_mulhi_pi16 (__m1, __m2); 503} 504 505/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce 506 the low 16 bits of the results. */ 507static __inline __m64 __attribute__((__always_inline__)) 508_mm_mullo_pi16 (__m64 __m1, __m64 __m2) 509{ 510 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); 511} 512 513static __inline __m64 __attribute__((__always_inline__)) 514_m_pmullw (__m64 __m1, __m64 __m2) 515{ 516 return _mm_mullo_pi16 (__m1, __m2); 517} 518 519/* Shift four 16-bit values in M left by COUNT. */ 520static __inline __m64 __attribute__((__always_inline__)) 521_mm_sll_pi16 (__m64 __m, __m64 __count) 522{ 523 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count); 524} 525 526static __inline __m64 __attribute__((__always_inline__)) 527_m_psllw (__m64 __m, __m64 __count) 528{ 529 return _mm_sll_pi16 (__m, __count); 530} 531 532static __inline __m64 __attribute__((__always_inline__)) 533_mm_slli_pi16 (__m64 __m, int __count) 534{ 535 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); 536} 537 538static __inline __m64 __attribute__((__always_inline__)) 539_m_psllwi (__m64 __m, int __count) 540{ 541 return _mm_slli_pi16 (__m, __count); 542} 543 544/* Shift two 32-bit values in M left by COUNT. */ 545static __inline __m64 __attribute__((__always_inline__)) 546_mm_sll_pi32 (__m64 __m, __m64 __count) 547{ 548 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count); 549} 550 551static __inline __m64 __attribute__((__always_inline__)) 552_m_pslld (__m64 __m, __m64 __count) 553{ 554 return _mm_sll_pi32 (__m, __count); 555} 556 557static __inline __m64 __attribute__((__always_inline__)) 558_mm_slli_pi32 (__m64 __m, int __count) 559{ 560 return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); 561} 562 563static __inline __m64 __attribute__((__always_inline__)) 564_m_pslldi (__m64 __m, int __count) 565{ 566 return _mm_slli_pi32 (__m, __count); 567} 568 569/* Shift the 64-bit value in M left by COUNT. */ 570static __inline __m64 __attribute__((__always_inline__)) 571_mm_sll_si64 (__m64 __m, __m64 __count) 572{ 573 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 574} 575 576static __inline __m64 __attribute__((__always_inline__)) 577_m_psllq (__m64 __m, __m64 __count) 578{ 579 return _mm_sll_si64 (__m, __count); 580} 581 582static __inline __m64 __attribute__((__always_inline__)) 583_mm_slli_si64 (__m64 __m, int __count) 584{ 585 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 586} 587 588static __inline __m64 __attribute__((__always_inline__)) 589_m_psllqi (__m64 __m, int __count) 590{ 591 return _mm_slli_si64 (__m, __count); 592} 593 594/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ 595static __inline __m64 __attribute__((__always_inline__)) 596_mm_sra_pi16 (__m64 __m, __m64 __count) 597{ 598 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count); 599} 600 601static __inline __m64 __attribute__((__always_inline__)) 602_m_psraw (__m64 __m, __m64 __count) 603{ 604 return _mm_sra_pi16 (__m, __count); 605} 606 607static __inline __m64 __attribute__((__always_inline__)) 608_mm_srai_pi16 (__m64 __m, int __count) 609{ 610 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); 611} 612 613static __inline __m64 __attribute__((__always_inline__)) 614_m_psrawi (__m64 __m, int __count) 615{ 616 return _mm_srai_pi16 (__m, __count); 617} 618 619/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ 620static __inline __m64 __attribute__((__always_inline__)) 621_mm_sra_pi32 (__m64 __m, __m64 __count) 622{ 623 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count); 624} 625 626static __inline __m64 __attribute__((__always_inline__)) 627_m_psrad (__m64 __m, __m64 __count) 628{ 629 return _mm_sra_pi32 (__m, __count); 630} 631 632static __inline __m64 __attribute__((__always_inline__)) 633_mm_srai_pi32 (__m64 __m, int __count) 634{ 635 return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); 636} 637 638static __inline __m64 __attribute__((__always_inline__)) 639_m_psradi (__m64 __m, int __count) 640{ 641 return _mm_srai_pi32 (__m, __count); 642} 643 644/* Shift four 16-bit values in M right by COUNT; shift in zeros. */ 645static __inline __m64 __attribute__((__always_inline__)) 646_mm_srl_pi16 (__m64 __m, __m64 __count) 647{ 648 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count); 649} 650 651static __inline __m64 __attribute__((__always_inline__)) 652_m_psrlw (__m64 __m, __m64 __count) 653{ 654 return _mm_srl_pi16 (__m, __count); 655} 656 657static __inline __m64 __attribute__((__always_inline__)) 658_mm_srli_pi16 (__m64 __m, int __count) 659{ 660 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); 661} 662 663static __inline __m64 __attribute__((__always_inline__)) 664_m_psrlwi (__m64 __m, int __count) 665{ 666 return _mm_srli_pi16 (__m, __count); 667} 668 669/* Shift two 32-bit values in M right by COUNT; shift in zeros. */ 670static __inline __m64 __attribute__((__always_inline__)) 671_mm_srl_pi32 (__m64 __m, __m64 __count) 672{ 673 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count); 674} 675 676static __inline __m64 __attribute__((__always_inline__)) 677_m_psrld (__m64 __m, __m64 __count) 678{ 679 return _mm_srl_pi32 (__m, __count); 680} 681 682static __inline __m64 __attribute__((__always_inline__)) 683_mm_srli_pi32 (__m64 __m, int __count) 684{ 685 return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); 686} 687 688static __inline __m64 __attribute__((__always_inline__)) 689_m_psrldi (__m64 __m, int __count) 690{ 691 return _mm_srli_pi32 (__m, __count); 692} 693 694/* Shift the 64-bit value in M left by COUNT; shift in zeros. */ 695static __inline __m64 __attribute__((__always_inline__)) 696_mm_srl_si64 (__m64 __m, __m64 __count) 697{ 698 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 699} 700 701static __inline __m64 __attribute__((__always_inline__)) 702_m_psrlq (__m64 __m, __m64 __count) 703{ 704 return _mm_srl_si64 (__m, __count); 705} 706 707static __inline __m64 __attribute__((__always_inline__)) 708_mm_srli_si64 (__m64 __m, int __count) 709{ 710 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 711} 712 713static __inline __m64 __attribute__((__always_inline__)) 714_m_psrlqi (__m64 __m, int __count) 715{ 716 return _mm_srli_si64 (__m, __count); 717} 718 719/* Bit-wise AND the 64-bit values in M1 and M2. */ 720static __inline __m64 __attribute__((__always_inline__)) 721_mm_and_si64 (__m64 __m1, __m64 __m2) 722{ 723 return __builtin_ia32_pand (__m1, __m2); 724} 725 726static __inline __m64 __attribute__((__always_inline__)) 727_m_pand (__m64 __m1, __m64 __m2) 728{ 729 return _mm_and_si64 (__m1, __m2); 730} 731 732/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 733 64-bit value in M2. */ 734static __inline __m64 __attribute__((__always_inline__)) 735_mm_andnot_si64 (__m64 __m1, __m64 __m2) 736{ 737 return __builtin_ia32_pandn (__m1, __m2); 738} 739 740static __inline __m64 __attribute__((__always_inline__)) 741_m_pandn (__m64 __m1, __m64 __m2) 742{ 743 return _mm_andnot_si64 (__m1, __m2); 744} 745 746/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ 747static __inline __m64 __attribute__((__always_inline__)) 748_mm_or_si64 (__m64 __m1, __m64 __m2) 749{ 750 return __builtin_ia32_por (__m1, __m2); 751} 752 753static __inline __m64 __attribute__((__always_inline__)) 754_m_por (__m64 __m1, __m64 __m2) 755{ 756 return _mm_or_si64 (__m1, __m2); 757} 758 759/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ 760static __inline __m64 __attribute__((__always_inline__)) 761_mm_xor_si64 (__m64 __m1, __m64 __m2) 762{ 763 return __builtin_ia32_pxor (__m1, __m2); 764} 765 766static __inline __m64 __attribute__((__always_inline__)) 767_m_pxor (__m64 __m1, __m64 __m2) 768{ 769 return _mm_xor_si64 (__m1, __m2); 770} 771 772/* Compare eight 8-bit values. The result of the comparison is 0xFF if the 773 test is true and zero if false. */ 774static __inline __m64 __attribute__((__always_inline__)) 775_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) 776{ 777 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2); 778} 779 780static __inline __m64 __attribute__((__always_inline__)) 781_m_pcmpeqb (__m64 __m1, __m64 __m2) 782{ 783 return _mm_cmpeq_pi8 (__m1, __m2); 784} 785 786static __inline __m64 __attribute__((__always_inline__)) 787_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) 788{ 789 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); 790} 791 792static __inline __m64 __attribute__((__always_inline__)) 793_m_pcmpgtb (__m64 __m1, __m64 __m2) 794{ 795 return _mm_cmpgt_pi8 (__m1, __m2); 796} 797 798/* Compare four 16-bit values. The result of the comparison is 0xFFFF if 799 the test is true and zero if false. */ 800static __inline __m64 __attribute__((__always_inline__)) 801_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) 802{ 803 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2); 804} 805 806static __inline __m64 __attribute__((__always_inline__)) 807_m_pcmpeqw (__m64 __m1, __m64 __m2) 808{ 809 return _mm_cmpeq_pi16 (__m1, __m2); 810} 811 812static __inline __m64 __attribute__((__always_inline__)) 813_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) 814{ 815 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); 816} 817 818static __inline __m64 __attribute__((__always_inline__)) 819_m_pcmpgtw (__m64 __m1, __m64 __m2) 820{ 821 return _mm_cmpgt_pi16 (__m1, __m2); 822} 823 824/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if 825 the test is true and zero if false. */ 826static __inline __m64 __attribute__((__always_inline__)) 827_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) 828{ 829 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2); 830} 831 832static __inline __m64 __attribute__((__always_inline__)) 833_m_pcmpeqd (__m64 __m1, __m64 __m2) 834{ 835 return _mm_cmpeq_pi32 (__m1, __m2); 836} 837 838static __inline __m64 __attribute__((__always_inline__)) 839_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) 840{ 841 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); 842} 843 844static __inline __m64 __attribute__((__always_inline__)) 845_m_pcmpgtd (__m64 __m1, __m64 __m2) 846{ 847 return _mm_cmpgt_pi32 (__m1, __m2); 848} 849 850/* Creates a 64-bit zero. */ 851static __inline __m64 __attribute__((__always_inline__)) 852_mm_setzero_si64 (void) 853{ 854 return (__m64)0LL; 855} 856 857/* Creates a vector of two 32-bit values; I0 is least significant. */ 858static __inline __m64 __attribute__((__always_inline__)) 859_mm_set_pi32 (int __i1, int __i0) 860{ 861 return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1); 862} 863 864/* Creates a vector of four 16-bit values; W0 is least significant. */ 865static __inline __m64 __attribute__((__always_inline__)) 866_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) 867{ 868 return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3); 869} 870 871/* Creates a vector of eight 8-bit values; B0 is least significant. */ 872static __inline __m64 __attribute__((__always_inline__)) 873_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, 874 char __b3, char __b2, char __b1, char __b0) 875{ 876 return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3, 877 __b4, __b5, __b6, __b7); 878} 879 880/* Similar, but with the arguments in reverse order. */ 881static __inline __m64 __attribute__((__always_inline__)) 882_mm_setr_pi32 (int __i0, int __i1) 883{ 884 return _mm_set_pi32 (__i1, __i0); 885} 886 887static __inline __m64 __attribute__((__always_inline__)) 888_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3) 889{ 890 return _mm_set_pi16 (__w3, __w2, __w1, __w0); 891} 892 893static __inline __m64 __attribute__((__always_inline__)) 894_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3, 895 char __b4, char __b5, char __b6, char __b7) 896{ 897 return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 898} 899 900/* Creates a vector of two 32-bit values, both elements containing I. */ 901static __inline __m64 __attribute__((__always_inline__)) 902_mm_set1_pi32 (int __i) 903{ 904 return _mm_set_pi32 (__i, __i); 905} 906 907/* Creates a vector of four 16-bit values, all elements containing W. */ 908static __inline __m64 __attribute__((__always_inline__)) 909_mm_set1_pi16 (short __w) 910{ 911 return _mm_set_pi16 (__w, __w, __w, __w); 912} 913 914/* Creates a vector of eight 8-bit values, all elements containing B. */ 915static __inline __m64 __attribute__((__always_inline__)) 916_mm_set1_pi8 (char __b) 917{ 918 return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b); 919} 920 921#endif /* __MMX__ */ 922#endif /* _MMINTRIN_H_INCLUDED */ 923