1/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 2 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 GCC is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 Under Section 7 of GPL version 3, you are granted additional 17 permissions described in the GCC Runtime Library Exception, version 18 3.1, as published by the Free Software Foundation. 19 20 You should have received a copy of the GNU General Public License and 21 a copy of the GCC Runtime Library Exception along with this program; 22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 <http://www.gnu.org/licenses/>. */ 24 25/* Implemented from the specification included in the Intel C++ Compiler 26 User Guide and Reference, version 9.0. */ 27 28#ifndef _MMINTRIN_H_INCLUDED 29#define _MMINTRIN_H_INCLUDED 30 31#ifndef __MMX__ 32# error "MMX instruction set not enabled" 33#else 34/* The Intel API is flexible enough that we must allow aliasing with other 35 vector types, and their scalar components. */ 36typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); 37 38/* Internal data types for implementing the intrinsics. */ 39typedef int __v2si __attribute__ ((__vector_size__ (8))); 40typedef short __v4hi __attribute__ ((__vector_size__ (8))); 41typedef char __v8qi __attribute__ ((__vector_size__ (8))); 42typedef long long __v1di __attribute__ ((__vector_size__ (8))); 43typedef float __v2sf __attribute__ ((__vector_size__ (8))); 44 45/* Empty the multimedia state. */ 46extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 47_mm_empty (void) 48{ 49 __builtin_ia32_emms (); 50} 51 52extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 53_m_empty (void) 54{ 55 _mm_empty (); 56} 57 58/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ 59extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 60_mm_cvtsi32_si64 (int __i) 61{ 62 return (__m64) __builtin_ia32_vec_init_v2si (__i, 0); 63} 64 65extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 66_m_from_int (int __i) 67{ 68 return _mm_cvtsi32_si64 (__i); 69} 70 71#ifdef __x86_64__ 72/* Convert I to a __m64 object. */ 73 74/* Intel intrinsic. */ 75extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 76_m_from_int64 (long long __i) 77{ 78 return (__m64) __i; 79} 80 81extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 82_mm_cvtsi64_m64 (long long __i) 83{ 84 return (__m64) __i; 85} 86 87/* Microsoft intrinsic. */ 88extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 89_mm_cvtsi64x_si64 (long long __i) 90{ 91 return (__m64) __i; 92} 93 94extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 95_mm_set_pi64x (long long __i) 96{ 97 return (__m64) __i; 98} 99#endif 100 101/* Convert the lower 32 bits of the __m64 object into an integer. */ 102extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 103_mm_cvtsi64_si32 (__m64 __i) 104{ 105 return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0); 106} 107 108extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 109_m_to_int (__m64 __i) 110{ 111 return _mm_cvtsi64_si32 (__i); 112} 113 114#ifdef __x86_64__ 115/* Convert the __m64 object to a 64bit integer. */ 116 117/* Intel intrinsic. */ 118extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 119_m_to_int64 (__m64 __i) 120{ 121 return (long long)__i; 122} 123 124extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 125_mm_cvtm64_si64 (__m64 __i) 126{ 127 return (long long)__i; 128} 129 130/* Microsoft intrinsic. */ 131extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 132_mm_cvtsi64_si64x (__m64 __i) 133{ 134 return (long long)__i; 135} 136#endif 137 138/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 139 the result, and the four 16-bit values from M2 into the upper four 8-bit 140 values of the result, all with signed saturation. */ 141extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 142_mm_packs_pi16 (__m64 __m1, __m64 __m2) 143{ 144 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); 145} 146 147extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 148_m_packsswb (__m64 __m1, __m64 __m2) 149{ 150 return _mm_packs_pi16 (__m1, __m2); 151} 152 153/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of 154 the result, and the two 32-bit values from M2 into the upper two 16-bit 155 values of the result, all with signed saturation. */ 156extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 157_mm_packs_pi32 (__m64 __m1, __m64 __m2) 158{ 159 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); 160} 161 162extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 163_m_packssdw (__m64 __m1, __m64 __m2) 164{ 165 return _mm_packs_pi32 (__m1, __m2); 166} 167 168/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 169 the result, and the four 16-bit values from M2 into the upper four 8-bit 170 values of the result, all with unsigned saturation. */ 171extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 172_mm_packs_pu16 (__m64 __m1, __m64 __m2) 173{ 174 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); 175} 176 177extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 178_m_packuswb (__m64 __m1, __m64 __m2) 179{ 180 return _mm_packs_pu16 (__m1, __m2); 181} 182 183/* Interleave the four 8-bit values from the high half of M1 with the four 184 8-bit values from the high half of M2. */ 185extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 186_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) 187{ 188 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); 189} 190 191extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 192_m_punpckhbw (__m64 __m1, __m64 __m2) 193{ 194 return _mm_unpackhi_pi8 (__m1, __m2); 195} 196 197/* Interleave the two 16-bit values from the high half of M1 with the two 198 16-bit values from the high half of M2. */ 199extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 200_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) 201{ 202 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); 203} 204 205extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 206_m_punpckhwd (__m64 __m1, __m64 __m2) 207{ 208 return _mm_unpackhi_pi16 (__m1, __m2); 209} 210 211/* Interleave the 32-bit value from the high half of M1 with the 32-bit 212 value from the high half of M2. */ 213extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 214_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) 215{ 216 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); 217} 218 219extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 220_m_punpckhdq (__m64 __m1, __m64 __m2) 221{ 222 return _mm_unpackhi_pi32 (__m1, __m2); 223} 224 225/* Interleave the four 8-bit values from the low half of M1 with the four 226 8-bit values from the low half of M2. */ 227extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 228_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) 229{ 230 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); 231} 232 233extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 234_m_punpcklbw (__m64 __m1, __m64 __m2) 235{ 236 return _mm_unpacklo_pi8 (__m1, __m2); 237} 238 239/* Interleave the two 16-bit values from the low half of M1 with the two 240 16-bit values from the low half of M2. */ 241extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 242_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) 243{ 244 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); 245} 246 247extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 248_m_punpcklwd (__m64 __m1, __m64 __m2) 249{ 250 return _mm_unpacklo_pi16 (__m1, __m2); 251} 252 253/* Interleave the 32-bit value from the low half of M1 with the 32-bit 254 value from the low half of M2. */ 255extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 256_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) 257{ 258 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); 259} 260 261extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 262_m_punpckldq (__m64 __m1, __m64 __m2) 263{ 264 return _mm_unpacklo_pi32 (__m1, __m2); 265} 266 267/* Add the 8-bit values in M1 to the 8-bit values in M2. */ 268extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 269_mm_add_pi8 (__m64 __m1, __m64 __m2) 270{ 271 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); 272} 273 274extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 275_m_paddb (__m64 __m1, __m64 __m2) 276{ 277 return _mm_add_pi8 (__m1, __m2); 278} 279 280/* Add the 16-bit values in M1 to the 16-bit values in M2. */ 281extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 282_mm_add_pi16 (__m64 __m1, __m64 __m2) 283{ 284 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); 285} 286 287extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 288_m_paddw (__m64 __m1, __m64 __m2) 289{ 290 return _mm_add_pi16 (__m1, __m2); 291} 292 293/* Add the 32-bit values in M1 to the 32-bit values in M2. */ 294extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 295_mm_add_pi32 (__m64 __m1, __m64 __m2) 296{ 297 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); 298} 299 300extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 301_m_paddd (__m64 __m1, __m64 __m2) 302{ 303 return _mm_add_pi32 (__m1, __m2); 304} 305 306/* Add the 64-bit values in M1 to the 64-bit values in M2. */ 307#ifdef __SSE2__ 308extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 309_mm_add_si64 (__m64 __m1, __m64 __m2) 310{ 311 return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2); 312} 313#endif 314 315/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed 316 saturated arithmetic. */ 317extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 318_mm_adds_pi8 (__m64 __m1, __m64 __m2) 319{ 320 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); 321} 322 323extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 324_m_paddsb (__m64 __m1, __m64 __m2) 325{ 326 return _mm_adds_pi8 (__m1, __m2); 327} 328 329/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed 330 saturated arithmetic. */ 331extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 332_mm_adds_pi16 (__m64 __m1, __m64 __m2) 333{ 334 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); 335} 336 337extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 338_m_paddsw (__m64 __m1, __m64 __m2) 339{ 340 return _mm_adds_pi16 (__m1, __m2); 341} 342 343/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned 344 saturated arithmetic. */ 345extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 346_mm_adds_pu8 (__m64 __m1, __m64 __m2) 347{ 348 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); 349} 350 351extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 352_m_paddusb (__m64 __m1, __m64 __m2) 353{ 354 return _mm_adds_pu8 (__m1, __m2); 355} 356 357/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned 358 saturated arithmetic. */ 359extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 360_mm_adds_pu16 (__m64 __m1, __m64 __m2) 361{ 362 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); 363} 364 365extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 366_m_paddusw (__m64 __m1, __m64 __m2) 367{ 368 return _mm_adds_pu16 (__m1, __m2); 369} 370 371/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ 372extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 373_mm_sub_pi8 (__m64 __m1, __m64 __m2) 374{ 375 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); 376} 377 378extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 379_m_psubb (__m64 __m1, __m64 __m2) 380{ 381 return _mm_sub_pi8 (__m1, __m2); 382} 383 384/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ 385extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 386_mm_sub_pi16 (__m64 __m1, __m64 __m2) 387{ 388 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); 389} 390 391extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 392_m_psubw (__m64 __m1, __m64 __m2) 393{ 394 return _mm_sub_pi16 (__m1, __m2); 395} 396 397/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ 398extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 399_mm_sub_pi32 (__m64 __m1, __m64 __m2) 400{ 401 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); 402} 403 404extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 405_m_psubd (__m64 __m1, __m64 __m2) 406{ 407 return _mm_sub_pi32 (__m1, __m2); 408} 409 410/* Add the 64-bit values in M1 to the 64-bit values in M2. */ 411#ifdef __SSE2__ 412extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 413_mm_sub_si64 (__m64 __m1, __m64 __m2) 414{ 415 return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2); 416} 417#endif 418 419/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed 420 saturating arithmetic. */ 421extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 422_mm_subs_pi8 (__m64 __m1, __m64 __m2) 423{ 424 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); 425} 426 427extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 428_m_psubsb (__m64 __m1, __m64 __m2) 429{ 430 return _mm_subs_pi8 (__m1, __m2); 431} 432 433/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 434 signed saturating arithmetic. */ 435extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 436_mm_subs_pi16 (__m64 __m1, __m64 __m2) 437{ 438 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); 439} 440 441extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 442_m_psubsw (__m64 __m1, __m64 __m2) 443{ 444 return _mm_subs_pi16 (__m1, __m2); 445} 446 447/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using 448 unsigned saturating arithmetic. */ 449extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 450_mm_subs_pu8 (__m64 __m1, __m64 __m2) 451{ 452 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); 453} 454 455extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 456_m_psubusb (__m64 __m1, __m64 __m2) 457{ 458 return _mm_subs_pu8 (__m1, __m2); 459} 460 461/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 462 unsigned saturating arithmetic. */ 463extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 464_mm_subs_pu16 (__m64 __m1, __m64 __m2) 465{ 466 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); 467} 468 469extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 470_m_psubusw (__m64 __m1, __m64 __m2) 471{ 472 return _mm_subs_pu16 (__m1, __m2); 473} 474 475/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing 476 four 32-bit intermediate results, which are then summed by pairs to 477 produce two 32-bit results. */ 478extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 479_mm_madd_pi16 (__m64 __m1, __m64 __m2) 480{ 481 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); 482} 483 484extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 485_m_pmaddwd (__m64 __m1, __m64 __m2) 486{ 487 return _mm_madd_pi16 (__m1, __m2); 488} 489 490/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in 491 M2 and produce the high 16 bits of the 32-bit results. */ 492extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 493_mm_mulhi_pi16 (__m64 __m1, __m64 __m2) 494{ 495 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); 496} 497 498extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 499_m_pmulhw (__m64 __m1, __m64 __m2) 500{ 501 return _mm_mulhi_pi16 (__m1, __m2); 502} 503 504/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce 505 the low 16 bits of the results. */ 506extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 507_mm_mullo_pi16 (__m64 __m1, __m64 __m2) 508{ 509 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); 510} 511 512extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 513_m_pmullw (__m64 __m1, __m64 __m2) 514{ 515 return _mm_mullo_pi16 (__m1, __m2); 516} 517 518/* Shift four 16-bit values in M left by COUNT. */ 519extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 520_mm_sll_pi16 (__m64 __m, __m64 __count) 521{ 522 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count); 523} 524 525extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 526_m_psllw (__m64 __m, __m64 __count) 527{ 528 return _mm_sll_pi16 (__m, __count); 529} 530 531extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 532_mm_slli_pi16 (__m64 __m, int __count) 533{ 534 return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count); 535} 536 537extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 538_m_psllwi (__m64 __m, int __count) 539{ 540 return _mm_slli_pi16 (__m, __count); 541} 542 543/* Shift two 32-bit values in M left by COUNT. */ 544extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 545_mm_sll_pi32 (__m64 __m, __m64 __count) 546{ 547 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count); 548} 549 550extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 551_m_pslld (__m64 __m, __m64 __count) 552{ 553 return _mm_sll_pi32 (__m, __count); 554} 555 556extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 557_mm_slli_pi32 (__m64 __m, int __count) 558{ 559 return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count); 560} 561 562extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 563_m_pslldi (__m64 __m, int __count) 564{ 565 return _mm_slli_pi32 (__m, __count); 566} 567 568/* Shift the 64-bit value in M left by COUNT. */ 569extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 570_mm_sll_si64 (__m64 __m, __m64 __count) 571{ 572 return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count); 573} 574 575extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 576_m_psllq (__m64 __m, __m64 __count) 577{ 578 return _mm_sll_si64 (__m, __count); 579} 580 581extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 582_mm_slli_si64 (__m64 __m, int __count) 583{ 584 return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count); 585} 586 587extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 588_m_psllqi (__m64 __m, int __count) 589{ 590 return _mm_slli_si64 (__m, __count); 591} 592 593/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ 594extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 595_mm_sra_pi16 (__m64 __m, __m64 __count) 596{ 597 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count); 598} 599 600extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 601_m_psraw (__m64 __m, __m64 __count) 602{ 603 return _mm_sra_pi16 (__m, __count); 604} 605 606extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 607_mm_srai_pi16 (__m64 __m, int __count) 608{ 609 return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count); 610} 611 612extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 613_m_psrawi (__m64 __m, int __count) 614{ 615 return _mm_srai_pi16 (__m, __count); 616} 617 618/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ 619extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 620_mm_sra_pi32 (__m64 __m, __m64 __count) 621{ 622 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count); 623} 624 625extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 626_m_psrad (__m64 __m, __m64 __count) 627{ 628 return _mm_sra_pi32 (__m, __count); 629} 630 631extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 632_mm_srai_pi32 (__m64 __m, int __count) 633{ 634 return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count); 635} 636 637extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 638_m_psradi (__m64 __m, int __count) 639{ 640 return _mm_srai_pi32 (__m, __count); 641} 642 643/* Shift four 16-bit values in M right by COUNT; shift in zeros. */ 644extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 645_mm_srl_pi16 (__m64 __m, __m64 __count) 646{ 647 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count); 648} 649 650extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 651_m_psrlw (__m64 __m, __m64 __count) 652{ 653 return _mm_srl_pi16 (__m, __count); 654} 655 656extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 657_mm_srli_pi16 (__m64 __m, int __count) 658{ 659 return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count); 660} 661 662extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 663_m_psrlwi (__m64 __m, int __count) 664{ 665 return _mm_srli_pi16 (__m, __count); 666} 667 668/* Shift two 32-bit values in M right by COUNT; shift in zeros. */ 669extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 670_mm_srl_pi32 (__m64 __m, __m64 __count) 671{ 672 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count); 673} 674 675extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 676_m_psrld (__m64 __m, __m64 __count) 677{ 678 return _mm_srl_pi32 (__m, __count); 679} 680 681extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 682_mm_srli_pi32 (__m64 __m, int __count) 683{ 684 return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count); 685} 686 687extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 688_m_psrldi (__m64 __m, int __count) 689{ 690 return _mm_srli_pi32 (__m, __count); 691} 692 693/* Shift the 64-bit value in M left by COUNT; shift in zeros. */ 694extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 695_mm_srl_si64 (__m64 __m, __m64 __count) 696{ 697 return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count); 698} 699 700extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 701_m_psrlq (__m64 __m, __m64 __count) 702{ 703 return _mm_srl_si64 (__m, __count); 704} 705 706extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 707_mm_srli_si64 (__m64 __m, int __count) 708{ 709 return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count); 710} 711 712extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 713_m_psrlqi (__m64 __m, int __count) 714{ 715 return _mm_srli_si64 (__m, __count); 716} 717 718/* Bit-wise AND the 64-bit values in M1 and M2. */ 719extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 720_mm_and_si64 (__m64 __m1, __m64 __m2) 721{ 722 return __builtin_ia32_pand (__m1, __m2); 723} 724 725extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 726_m_pand (__m64 __m1, __m64 __m2) 727{ 728 return _mm_and_si64 (__m1, __m2); 729} 730 731/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 732 64-bit value in M2. */ 733extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 734_mm_andnot_si64 (__m64 __m1, __m64 __m2) 735{ 736 return __builtin_ia32_pandn (__m1, __m2); 737} 738 739extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 740_m_pandn (__m64 __m1, __m64 __m2) 741{ 742 return _mm_andnot_si64 (__m1, __m2); 743} 744 745/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ 746extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 747_mm_or_si64 (__m64 __m1, __m64 __m2) 748{ 749 return __builtin_ia32_por (__m1, __m2); 750} 751 752extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 753_m_por (__m64 __m1, __m64 __m2) 754{ 755 return _mm_or_si64 (__m1, __m2); 756} 757 758/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ 759extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 760_mm_xor_si64 (__m64 __m1, __m64 __m2) 761{ 762 return __builtin_ia32_pxor (__m1, __m2); 763} 764 765extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 766_m_pxor (__m64 __m1, __m64 __m2) 767{ 768 return _mm_xor_si64 (__m1, __m2); 769} 770 771/* Compare eight 8-bit values. The result of the comparison is 0xFF if the 772 test is true and zero if false. */ 773extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 774_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) 775{ 776 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2); 777} 778 779extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 780_m_pcmpeqb (__m64 __m1, __m64 __m2) 781{ 782 return _mm_cmpeq_pi8 (__m1, __m2); 783} 784 785extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 786_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) 787{ 788 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); 789} 790 791extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 792_m_pcmpgtb (__m64 __m1, __m64 __m2) 793{ 794 return _mm_cmpgt_pi8 (__m1, __m2); 795} 796 797/* Compare four 16-bit values. The result of the comparison is 0xFFFF if 798 the test is true and zero if false. */ 799extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 800_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) 801{ 802 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2); 803} 804 805extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 806_m_pcmpeqw (__m64 __m1, __m64 __m2) 807{ 808 return _mm_cmpeq_pi16 (__m1, __m2); 809} 810 811extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 812_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) 813{ 814 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); 815} 816 817extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 818_m_pcmpgtw (__m64 __m1, __m64 __m2) 819{ 820 return _mm_cmpgt_pi16 (__m1, __m2); 821} 822 823/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if 824 the test is true and zero if false. */ 825extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 826_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) 827{ 828 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2); 829} 830 831extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 832_m_pcmpeqd (__m64 __m1, __m64 __m2) 833{ 834 return _mm_cmpeq_pi32 (__m1, __m2); 835} 836 837extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 838_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) 839{ 840 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); 841} 842 843extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 844_m_pcmpgtd (__m64 __m1, __m64 __m2) 845{ 846 return _mm_cmpgt_pi32 (__m1, __m2); 847} 848 849/* Creates a 64-bit zero. */ 850extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 851_mm_setzero_si64 (void) 852{ 853 return (__m64)0LL; 854} 855 856/* Creates a vector of two 32-bit values; I0 is least significant. */ 857extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 858_mm_set_pi32 (int __i1, int __i0) 859{ 860 return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1); 861} 862 863/* Creates a vector of four 16-bit values; W0 is least significant. */ 864extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 865_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) 866{ 867 return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3); 868} 869 870/* Creates a vector of eight 8-bit values; B0 is least significant. */ 871extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 872_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, 873 char __b3, char __b2, char __b1, char __b0) 874{ 875 return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3, 876 __b4, __b5, __b6, __b7); 877} 878 879/* Similar, but with the arguments in reverse order. */ 880extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 881_mm_setr_pi32 (int __i0, int __i1) 882{ 883 return _mm_set_pi32 (__i1, __i0); 884} 885 886extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 887_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3) 888{ 889 return _mm_set_pi16 (__w3, __w2, __w1, __w0); 890} 891 892extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 893_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3, 894 char __b4, char __b5, char __b6, char __b7) 895{ 896 return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 897} 898 899/* Creates a vector of two 32-bit values, both elements containing I. */ 900extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 901_mm_set1_pi32 (int __i) 902{ 903 return _mm_set_pi32 (__i, __i); 904} 905 906/* Creates a vector of four 16-bit values, all elements containing W. */ 907extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 908_mm_set1_pi16 (short __w) 909{ 910 return _mm_set_pi16 (__w, __w, __w, __w); 911} 912 913/* Creates a vector of eight 8-bit values, all elements containing B. */ 914extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 915_mm_set1_pi8 (char __b) 916{ 917 return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b); 918} 919 920#endif /* __MMX__ */ 921#endif /* _MMINTRIN_H_INCLUDED */ 922