1/* Copyright (C) 2003-2015 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24/* Implemented from the specification included in the Intel C++ Compiler 25 User Guide and Reference, version 9.0. */ 26 27#ifndef _EMMINTRIN_H_INCLUDED 28#define _EMMINTRIN_H_INCLUDED 29 30/* We need definitions from the SSE header files*/ 31#include <xmmintrin.h> 32 33#ifndef __SSE2__ 34#pragma GCC push_options 35#pragma GCC target("sse2") 36#define __DISABLE_SSE2__ 37#endif /* __SSE2__ */ 38 39/* SSE2 */ 40typedef double __v2df __attribute__ ((__vector_size__ (16))); 41typedef long long __v2di __attribute__ ((__vector_size__ (16))); 42typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16))); 43typedef int __v4si __attribute__ ((__vector_size__ (16))); 44typedef unsigned int __v4su __attribute__ ((__vector_size__ (16))); 45typedef short __v8hi __attribute__ ((__vector_size__ (16))); 46typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16))); 47typedef char __v16qi __attribute__ ((__vector_size__ (16))); 48typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16))); 49 50/* The Intel API is flexible enough that we must allow aliasing with other 51 vector types, and their scalar components. */ 52typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); 53typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); 54 55/* Create a selector for use with the SHUFPD instruction. */ 56#define _MM_SHUFFLE2(fp1,fp0) \ 57 (((fp1) << 1) | (fp0)) 58 59/* Create a vector with element 0 as F and the rest zero. */ 60extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 61_mm_set_sd (double __F) 62{ 63 return __extension__ (__m128d){ __F, 0.0 }; 64} 65 66/* Create a vector with both elements equal to F. */ 67extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 68_mm_set1_pd (double __F) 69{ 70 return __extension__ (__m128d){ __F, __F }; 71} 72 73extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 74_mm_set_pd1 (double __F) 75{ 76 return _mm_set1_pd (__F); 77} 78 79/* Create a vector with the lower value X and upper value W. */ 80extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 81_mm_set_pd (double __W, double __X) 82{ 83 return __extension__ (__m128d){ __X, __W }; 84} 85 86/* Create a vector with the lower value W and upper value X. */ 87extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 88_mm_setr_pd (double __W, double __X) 89{ 90 return __extension__ (__m128d){ __W, __X }; 91} 92 93/* Create an undefined vector. */ 94extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 95_mm_undefined_pd (void) 96{ 97 __m128d __Y = __Y; 98 return __Y; 99} 100 101/* Create a vector of zeros. */ 102extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 103_mm_setzero_pd (void) 104{ 105 return __extension__ (__m128d){ 0.0, 0.0 }; 106} 107 108/* Sets the low DPFP value of A from the low value of B. */ 109extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 110_mm_move_sd (__m128d __A, __m128d __B) 111{ 112 return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); 113} 114 115/* Load two DPFP values from P. The address must be 16-byte aligned. */ 116extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 117_mm_load_pd (double const *__P) 118{ 119 return *(__m128d *)__P; 120} 121 122/* Load two DPFP values from P. The address need not be 16-byte aligned. */ 123extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 124_mm_loadu_pd (double const *__P) 125{ 126 return __builtin_ia32_loadupd (__P); 127} 128 129/* Create a vector with all two elements equal to *P. */ 130extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 131_mm_load1_pd (double const *__P) 132{ 133 return _mm_set1_pd (*__P); 134} 135 136/* Create a vector with element 0 as *P and the rest zero. */ 137extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 138_mm_load_sd (double const *__P) 139{ 140 return _mm_set_sd (*__P); 141} 142 143extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 144_mm_load_pd1 (double const *__P) 145{ 146 return _mm_load1_pd (__P); 147} 148 149/* Load two DPFP values in reverse order. The address must be aligned. */ 150extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 151_mm_loadr_pd (double const *__P) 152{ 153 __m128d __tmp = _mm_load_pd (__P); 154 return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); 155} 156 157/* Store two DPFP values. The address must be 16-byte aligned. */ 158extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 159_mm_store_pd (double *__P, __m128d __A) 160{ 161 *(__m128d *)__P = __A; 162} 163 164/* Store two DPFP values. The address need not be 16-byte aligned. */ 165extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 166_mm_storeu_pd (double *__P, __m128d __A) 167{ 168 __builtin_ia32_storeupd (__P, __A); 169} 170 171/* Stores the lower DPFP value. */ 172extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 173_mm_store_sd (double *__P, __m128d __A) 174{ 175 *__P = ((__v2df)__A)[0]; 176} 177 178extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 179_mm_cvtsd_f64 (__m128d __A) 180{ 181 return ((__v2df)__A)[0]; 182} 183 184extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 185_mm_storel_pd (double *__P, __m128d __A) 186{ 187 _mm_store_sd (__P, __A); 188} 189 190/* Stores the upper DPFP value. */ 191extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 192_mm_storeh_pd (double *__P, __m128d __A) 193{ 194 *__P = ((__v2df)__A)[1]; 195} 196 197/* Store the lower DPFP value across two words. 198 The address must be 16-byte aligned. */ 199extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 200_mm_store1_pd (double *__P, __m128d __A) 201{ 202 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0))); 203} 204 205extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 206_mm_store_pd1 (double *__P, __m128d __A) 207{ 208 _mm_store1_pd (__P, __A); 209} 210 211/* Store two DPFP values in reverse order. The address must be aligned. */ 212extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 213_mm_storer_pd (double *__P, __m128d __A) 214{ 215 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1))); 216} 217 218extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 219_mm_cvtsi128_si32 (__m128i __A) 220{ 221 return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0); 222} 223 224#ifdef __x86_64__ 225/* Intel intrinsic. */ 226extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 227_mm_cvtsi128_si64 (__m128i __A) 228{ 229 return ((__v2di)__A)[0]; 230} 231 232/* Microsoft intrinsic. */ 233extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 234_mm_cvtsi128_si64x (__m128i __A) 235{ 236 return ((__v2di)__A)[0]; 237} 238#endif 239 240extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 241_mm_add_pd (__m128d __A, __m128d __B) 242{ 243 return (__m128d) ((__v2df)__A + (__v2df)__B); 244} 245 246extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 247_mm_add_sd (__m128d __A, __m128d __B) 248{ 249 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); 250} 251 252extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 253_mm_sub_pd (__m128d __A, __m128d __B) 254{ 255 return (__m128d) ((__v2df)__A - (__v2df)__B); 256} 257 258extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 259_mm_sub_sd (__m128d __A, __m128d __B) 260{ 261 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); 262} 263 264extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 265_mm_mul_pd (__m128d __A, __m128d __B) 266{ 267 return (__m128d) ((__v2df)__A * (__v2df)__B); 268} 269 270extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 271_mm_mul_sd (__m128d __A, __m128d __B) 272{ 273 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); 274} 275 276extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 277_mm_div_pd (__m128d __A, __m128d __B) 278{ 279 return (__m128d) ((__v2df)__A / (__v2df)__B); 280} 281 282extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 283_mm_div_sd (__m128d __A, __m128d __B) 284{ 285 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); 286} 287 288extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 289_mm_sqrt_pd (__m128d __A) 290{ 291 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); 292} 293 294/* Return pair {sqrt (B[0]), A[1]}. */ 295extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 296_mm_sqrt_sd (__m128d __A, __m128d __B) 297{ 298 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); 299 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); 300} 301 302extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 303_mm_min_pd (__m128d __A, __m128d __B) 304{ 305 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); 306} 307 308extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 309_mm_min_sd (__m128d __A, __m128d __B) 310{ 311 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); 312} 313 314extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 315_mm_max_pd (__m128d __A, __m128d __B) 316{ 317 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); 318} 319 320extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 321_mm_max_sd (__m128d __A, __m128d __B) 322{ 323 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); 324} 325 326extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 327_mm_and_pd (__m128d __A, __m128d __B) 328{ 329 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); 330} 331 332extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 333_mm_andnot_pd (__m128d __A, __m128d __B) 334{ 335 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); 336} 337 338extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 339_mm_or_pd (__m128d __A, __m128d __B) 340{ 341 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); 342} 343 344extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 345_mm_xor_pd (__m128d __A, __m128d __B) 346{ 347 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); 348} 349 350extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 351_mm_cmpeq_pd (__m128d __A, __m128d __B) 352{ 353 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); 354} 355 356extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 357_mm_cmplt_pd (__m128d __A, __m128d __B) 358{ 359 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); 360} 361 362extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 363_mm_cmple_pd (__m128d __A, __m128d __B) 364{ 365 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); 366} 367 368extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 369_mm_cmpgt_pd (__m128d __A, __m128d __B) 370{ 371 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); 372} 373 374extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 375_mm_cmpge_pd (__m128d __A, __m128d __B) 376{ 377 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); 378} 379 380extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 381_mm_cmpneq_pd (__m128d __A, __m128d __B) 382{ 383 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); 384} 385 386extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 387_mm_cmpnlt_pd (__m128d __A, __m128d __B) 388{ 389 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); 390} 391 392extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 393_mm_cmpnle_pd (__m128d __A, __m128d __B) 394{ 395 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); 396} 397 398extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 399_mm_cmpngt_pd (__m128d __A, __m128d __B) 400{ 401 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); 402} 403 404extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 405_mm_cmpnge_pd (__m128d __A, __m128d __B) 406{ 407 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); 408} 409 410extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 411_mm_cmpord_pd (__m128d __A, __m128d __B) 412{ 413 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); 414} 415 416extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 417_mm_cmpunord_pd (__m128d __A, __m128d __B) 418{ 419 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); 420} 421 422extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 423_mm_cmpeq_sd (__m128d __A, __m128d __B) 424{ 425 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); 426} 427 428extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 429_mm_cmplt_sd (__m128d __A, __m128d __B) 430{ 431 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); 432} 433 434extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 435_mm_cmple_sd (__m128d __A, __m128d __B) 436{ 437 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); 438} 439 440extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 441_mm_cmpgt_sd (__m128d __A, __m128d __B) 442{ 443 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 444 (__v2df) 445 __builtin_ia32_cmpltsd ((__v2df) __B, 446 (__v2df) 447 __A)); 448} 449 450extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 451_mm_cmpge_sd (__m128d __A, __m128d __B) 452{ 453 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 454 (__v2df) 455 __builtin_ia32_cmplesd ((__v2df) __B, 456 (__v2df) 457 __A)); 458} 459 460extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 461_mm_cmpneq_sd (__m128d __A, __m128d __B) 462{ 463 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); 464} 465 466extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 467_mm_cmpnlt_sd (__m128d __A, __m128d __B) 468{ 469 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); 470} 471 472extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 473_mm_cmpnle_sd (__m128d __A, __m128d __B) 474{ 475 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); 476} 477 478extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 479_mm_cmpngt_sd (__m128d __A, __m128d __B) 480{ 481 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 482 (__v2df) 483 __builtin_ia32_cmpnltsd ((__v2df) __B, 484 (__v2df) 485 __A)); 486} 487 488extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 489_mm_cmpnge_sd (__m128d __A, __m128d __B) 490{ 491 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 492 (__v2df) 493 __builtin_ia32_cmpnlesd ((__v2df) __B, 494 (__v2df) 495 __A)); 496} 497 498extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 499_mm_cmpord_sd (__m128d __A, __m128d __B) 500{ 501 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); 502} 503 504extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 505_mm_cmpunord_sd (__m128d __A, __m128d __B) 506{ 507 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); 508} 509 510extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 511_mm_comieq_sd (__m128d __A, __m128d __B) 512{ 513 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); 514} 515 516extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 517_mm_comilt_sd (__m128d __A, __m128d __B) 518{ 519 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); 520} 521 522extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 523_mm_comile_sd (__m128d __A, __m128d __B) 524{ 525 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); 526} 527 528extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 529_mm_comigt_sd (__m128d __A, __m128d __B) 530{ 531 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); 532} 533 534extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 535_mm_comige_sd (__m128d __A, __m128d __B) 536{ 537 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); 538} 539 540extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 541_mm_comineq_sd (__m128d __A, __m128d __B) 542{ 543 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); 544} 545 546extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 547_mm_ucomieq_sd (__m128d __A, __m128d __B) 548{ 549 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); 550} 551 552extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 553_mm_ucomilt_sd (__m128d __A, __m128d __B) 554{ 555 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); 556} 557 558extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 559_mm_ucomile_sd (__m128d __A, __m128d __B) 560{ 561 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); 562} 563 564extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 565_mm_ucomigt_sd (__m128d __A, __m128d __B) 566{ 567 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); 568} 569 570extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 571_mm_ucomige_sd (__m128d __A, __m128d __B) 572{ 573 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); 574} 575 576extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 577_mm_ucomineq_sd (__m128d __A, __m128d __B) 578{ 579 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); 580} 581 582/* Create a vector of Qi, where i is the element number. */ 583 584extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 585_mm_set_epi64x (long long __q1, long long __q0) 586{ 587 return __extension__ (__m128i)(__v2di){ __q0, __q1 }; 588} 589 590extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 591_mm_set_epi64 (__m64 __q1, __m64 __q0) 592{ 593 return _mm_set_epi64x ((long long)__q1, (long long)__q0); 594} 595 596extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 597_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) 598{ 599 return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; 600} 601 602extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 603_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4, 604 short __q3, short __q2, short __q1, short __q0) 605{ 606 return __extension__ (__m128i)(__v8hi){ 607 __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 }; 608} 609 610extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 611_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12, 612 char __q11, char __q10, char __q09, char __q08, 613 char __q07, char __q06, char __q05, char __q04, 614 char __q03, char __q02, char __q01, char __q00) 615{ 616 return __extension__ (__m128i)(__v16qi){ 617 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, 618 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 619 }; 620} 621 622/* Set all of the elements of the vector to A. */ 623 624extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 625_mm_set1_epi64x (long long __A) 626{ 627 return _mm_set_epi64x (__A, __A); 628} 629 630extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 631_mm_set1_epi64 (__m64 __A) 632{ 633 return _mm_set_epi64 (__A, __A); 634} 635 636extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 637_mm_set1_epi32 (int __A) 638{ 639 return _mm_set_epi32 (__A, __A, __A, __A); 640} 641 642extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 643_mm_set1_epi16 (short __A) 644{ 645 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A); 646} 647 648extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 649_mm_set1_epi8 (char __A) 650{ 651 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A, 652 __A, __A, __A, __A, __A, __A, __A, __A); 653} 654 655/* Create a vector of Qi, where i is the element number. 656 The parameter order is reversed from the _mm_set_epi* functions. */ 657 658extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 659_mm_setr_epi64 (__m64 __q0, __m64 __q1) 660{ 661 return _mm_set_epi64 (__q1, __q0); 662} 663 664extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 665_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3) 666{ 667 return _mm_set_epi32 (__q3, __q2, __q1, __q0); 668} 669 670extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 671_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3, 672 short __q4, short __q5, short __q6, short __q7) 673{ 674 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); 675} 676 677extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 678_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03, 679 char __q04, char __q05, char __q06, char __q07, 680 char __q08, char __q09, char __q10, char __q11, 681 char __q12, char __q13, char __q14, char __q15) 682{ 683 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, 684 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); 685} 686 687/* Create a vector with element 0 as *P and the rest zero. */ 688 689extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 690_mm_load_si128 (__m128i const *__P) 691{ 692 return *__P; 693} 694 695extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 696_mm_loadu_si128 (__m128i const *__P) 697{ 698 return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); 699} 700 701extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 702_mm_loadl_epi64 (__m128i const *__P) 703{ 704 return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P); 705} 706 707extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 708_mm_store_si128 (__m128i *__P, __m128i __B) 709{ 710 *__P = __B; 711} 712 713extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 714_mm_storeu_si128 (__m128i *__P, __m128i __B) 715{ 716 __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); 717} 718 719extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 720_mm_storel_epi64 (__m128i *__P, __m128i __B) 721{ 722 *(long long *)__P = ((__v2di)__B)[0]; 723} 724 725extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 726_mm_movepi64_pi64 (__m128i __B) 727{ 728 return (__m64) ((__v2di)__B)[0]; 729} 730 731extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 732_mm_movpi64_epi64 (__m64 __A) 733{ 734 return _mm_set_epi64 ((__m64)0LL, __A); 735} 736 737extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 738_mm_move_epi64 (__m128i __A) 739{ 740 return (__m128i)__builtin_ia32_movq128 ((__v2di) __A); 741} 742 743/* Create an undefined vector. */ 744extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 745_mm_undefined_si128 (void) 746{ 747 __m128i __Y = __Y; 748 return __Y; 749} 750 751/* Create a vector of zeros. */ 752extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 753_mm_setzero_si128 (void) 754{ 755 return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 }; 756} 757 758extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 759_mm_cvtepi32_pd (__m128i __A) 760{ 761 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); 762} 763 764extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 765_mm_cvtepi32_ps (__m128i __A) 766{ 767 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); 768} 769 770extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 771_mm_cvtpd_epi32 (__m128d __A) 772{ 773 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); 774} 775 776extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 777_mm_cvtpd_pi32 (__m128d __A) 778{ 779 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); 780} 781 782extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 783_mm_cvtpd_ps (__m128d __A) 784{ 785 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); 786} 787 788extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 789_mm_cvttpd_epi32 (__m128d __A) 790{ 791 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); 792} 793 794extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 795_mm_cvttpd_pi32 (__m128d __A) 796{ 797 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); 798} 799 800extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 801_mm_cvtpi32_pd (__m64 __A) 802{ 803 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); 804} 805 806extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 807_mm_cvtps_epi32 (__m128 __A) 808{ 809 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); 810} 811 812extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 813_mm_cvttps_epi32 (__m128 __A) 814{ 815 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); 816} 817 818extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 819_mm_cvtps_pd (__m128 __A) 820{ 821 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); 822} 823 824extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 825_mm_cvtsd_si32 (__m128d __A) 826{ 827 return __builtin_ia32_cvtsd2si ((__v2df) __A); 828} 829 830#ifdef __x86_64__ 831/* Intel intrinsic. */ 832extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 833_mm_cvtsd_si64 (__m128d __A) 834{ 835 return __builtin_ia32_cvtsd2si64 ((__v2df) __A); 836} 837 838/* Microsoft intrinsic. */ 839extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 840_mm_cvtsd_si64x (__m128d __A) 841{ 842 return __builtin_ia32_cvtsd2si64 ((__v2df) __A); 843} 844#endif 845 846extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 847_mm_cvttsd_si32 (__m128d __A) 848{ 849 return __builtin_ia32_cvttsd2si ((__v2df) __A); 850} 851 852#ifdef __x86_64__ 853/* Intel intrinsic. */ 854extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 855_mm_cvttsd_si64 (__m128d __A) 856{ 857 return __builtin_ia32_cvttsd2si64 ((__v2df) __A); 858} 859 860/* Microsoft intrinsic. */ 861extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 862_mm_cvttsd_si64x (__m128d __A) 863{ 864 return __builtin_ia32_cvttsd2si64 ((__v2df) __A); 865} 866#endif 867 868extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 869_mm_cvtsd_ss (__m128 __A, __m128d __B) 870{ 871 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); 872} 873 874extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 875_mm_cvtsi32_sd (__m128d __A, int __B) 876{ 877 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); 878} 879 880#ifdef __x86_64__ 881/* Intel intrinsic. */ 882extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 883_mm_cvtsi64_sd (__m128d __A, long long __B) 884{ 885 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); 886} 887 888/* Microsoft intrinsic. */ 889extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 890_mm_cvtsi64x_sd (__m128d __A, long long __B) 891{ 892 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); 893} 894#endif 895 896extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 897_mm_cvtss_sd (__m128d __A, __m128 __B) 898{ 899 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); 900} 901 902#ifdef __OPTIMIZE__ 903extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 904_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) 905{ 906 return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask); 907} 908#else 909#define _mm_shuffle_pd(A, B, N) \ 910 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \ 911 (__v2df)(__m128d)(B), (int)(N))) 912#endif 913 914extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 915_mm_unpackhi_pd (__m128d __A, __m128d __B) 916{ 917 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); 918} 919 920extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 921_mm_unpacklo_pd (__m128d __A, __m128d __B) 922{ 923 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); 924} 925 926extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 927_mm_loadh_pd (__m128d __A, double const *__B) 928{ 929 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B); 930} 931 932extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 933_mm_loadl_pd (__m128d __A, double const *__B) 934{ 935 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B); 936} 937 938extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 939_mm_movemask_pd (__m128d __A) 940{ 941 return __builtin_ia32_movmskpd ((__v2df)__A); 942} 943 944extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 945_mm_packs_epi16 (__m128i __A, __m128i __B) 946{ 947 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); 948} 949 950extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 951_mm_packs_epi32 (__m128i __A, __m128i __B) 952{ 953 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); 954} 955 956extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 957_mm_packus_epi16 (__m128i __A, __m128i __B) 958{ 959 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); 960} 961 962extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 963_mm_unpackhi_epi8 (__m128i __A, __m128i __B) 964{ 965 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); 966} 967 968extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 969_mm_unpackhi_epi16 (__m128i __A, __m128i __B) 970{ 971 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); 972} 973 974extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 975_mm_unpackhi_epi32 (__m128i __A, __m128i __B) 976{ 977 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); 978} 979 980extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 981_mm_unpackhi_epi64 (__m128i __A, __m128i __B) 982{ 983 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); 984} 985 986extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 987_mm_unpacklo_epi8 (__m128i __A, __m128i __B) 988{ 989 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); 990} 991 992extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 993_mm_unpacklo_epi16 (__m128i __A, __m128i __B) 994{ 995 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); 996} 997 998extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 999_mm_unpacklo_epi32 (__m128i __A, __m128i __B) 1000{ 1001 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); 1002} 1003 1004extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1005_mm_unpacklo_epi64 (__m128i __A, __m128i __B) 1006{ 1007 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); 1008} 1009 1010extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1011_mm_add_epi8 (__m128i __A, __m128i __B) 1012{ 1013 return (__m128i) ((__v16qu)__A + (__v16qu)__B); 1014} 1015 1016extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1017_mm_add_epi16 (__m128i __A, __m128i __B) 1018{ 1019 return (__m128i) ((__v8hu)__A + (__v8hu)__B); 1020} 1021 1022extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1023_mm_add_epi32 (__m128i __A, __m128i __B) 1024{ 1025 return (__m128i) ((__v4su)__A + (__v4su)__B); 1026} 1027 1028extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1029_mm_add_epi64 (__m128i __A, __m128i __B) 1030{ 1031 return (__m128i) ((__v2du)__A + (__v2du)__B); 1032} 1033 1034extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1035_mm_adds_epi8 (__m128i __A, __m128i __B) 1036{ 1037 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); 1038} 1039 1040extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1041_mm_adds_epi16 (__m128i __A, __m128i __B) 1042{ 1043 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); 1044} 1045 1046extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1047_mm_adds_epu8 (__m128i __A, __m128i __B) 1048{ 1049 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); 1050} 1051 1052extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1053_mm_adds_epu16 (__m128i __A, __m128i __B) 1054{ 1055 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); 1056} 1057 1058extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1059_mm_sub_epi8 (__m128i __A, __m128i __B) 1060{ 1061 return (__m128i) ((__v16qu)__A - (__v16qu)__B); 1062} 1063 1064extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1065_mm_sub_epi16 (__m128i __A, __m128i __B) 1066{ 1067 return (__m128i) ((__v8hu)__A - (__v8hu)__B); 1068} 1069 1070extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1071_mm_sub_epi32 (__m128i __A, __m128i __B) 1072{ 1073 return (__m128i) ((__v4su)__A - (__v4su)__B); 1074} 1075 1076extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1077_mm_sub_epi64 (__m128i __A, __m128i __B) 1078{ 1079 return (__m128i) ((__v2du)__A - (__v2du)__B); 1080} 1081 1082extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1083_mm_subs_epi8 (__m128i __A, __m128i __B) 1084{ 1085 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); 1086} 1087 1088extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1089_mm_subs_epi16 (__m128i __A, __m128i __B) 1090{ 1091 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); 1092} 1093 1094extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1095_mm_subs_epu8 (__m128i __A, __m128i __B) 1096{ 1097 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); 1098} 1099 1100extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1101_mm_subs_epu16 (__m128i __A, __m128i __B) 1102{ 1103 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); 1104} 1105 1106extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1107_mm_madd_epi16 (__m128i __A, __m128i __B) 1108{ 1109 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); 1110} 1111 1112extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1113_mm_mulhi_epi16 (__m128i __A, __m128i __B) 1114{ 1115 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); 1116} 1117 1118extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1119_mm_mullo_epi16 (__m128i __A, __m128i __B) 1120{ 1121 return (__m128i) ((__v8hu)__A * (__v8hu)__B); 1122} 1123 1124extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1125_mm_mul_su32 (__m64 __A, __m64 __B) 1126{ 1127 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); 1128} 1129 1130extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1131_mm_mul_epu32 (__m128i __A, __m128i __B) 1132{ 1133 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); 1134} 1135 1136extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1137_mm_slli_epi16 (__m128i __A, int __B) 1138{ 1139 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); 1140} 1141 1142extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1143_mm_slli_epi32 (__m128i __A, int __B) 1144{ 1145 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); 1146} 1147 1148extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1149_mm_slli_epi64 (__m128i __A, int __B) 1150{ 1151 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); 1152} 1153 1154extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1155_mm_srai_epi16 (__m128i __A, int __B) 1156{ 1157 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); 1158} 1159 1160extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1161_mm_srai_epi32 (__m128i __A, int __B) 1162{ 1163 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); 1164} 1165 1166#ifdef __OPTIMIZE__ 1167extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1168_mm_bsrli_si128 (__m128i __A, const int __N) 1169{ 1170 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); 1171} 1172 1173extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1174_mm_bslli_si128 (__m128i __A, const int __N) 1175{ 1176 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); 1177} 1178 1179extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1180_mm_srli_si128 (__m128i __A, const int __N) 1181{ 1182 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); 1183} 1184 1185extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1186_mm_slli_si128 (__m128i __A, const int __N) 1187{ 1188 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); 1189} 1190#else 1191#define _mm_bsrli_si128(A, N) \ 1192 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) 1193#define _mm_bslli_si128(A, N) \ 1194 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8)) 1195#define _mm_srli_si128(A, N) \ 1196 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) 1197#define _mm_slli_si128(A, N) \ 1198 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8)) 1199#endif 1200 1201extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1202_mm_srli_epi16 (__m128i __A, int __B) 1203{ 1204 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); 1205} 1206 1207extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1208_mm_srli_epi32 (__m128i __A, int __B) 1209{ 1210 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); 1211} 1212 1213extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1214_mm_srli_epi64 (__m128i __A, int __B) 1215{ 1216 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); 1217} 1218 1219extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1220_mm_sll_epi16 (__m128i __A, __m128i __B) 1221{ 1222 return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B); 1223} 1224 1225extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1226_mm_sll_epi32 (__m128i __A, __m128i __B) 1227{ 1228 return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B); 1229} 1230 1231extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1232_mm_sll_epi64 (__m128i __A, __m128i __B) 1233{ 1234 return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B); 1235} 1236 1237extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1238_mm_sra_epi16 (__m128i __A, __m128i __B) 1239{ 1240 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B); 1241} 1242 1243extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1244_mm_sra_epi32 (__m128i __A, __m128i __B) 1245{ 1246 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B); 1247} 1248 1249extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1250_mm_srl_epi16 (__m128i __A, __m128i __B) 1251{ 1252 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B); 1253} 1254 1255extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1256_mm_srl_epi32 (__m128i __A, __m128i __B) 1257{ 1258 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B); 1259} 1260 1261extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1262_mm_srl_epi64 (__m128i __A, __m128i __B) 1263{ 1264 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); 1265} 1266 1267extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1268_mm_and_si128 (__m128i __A, __m128i __B) 1269{ 1270 return (__m128i) ((__v2du)__A & (__v2du)__B); 1271} 1272 1273extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1274_mm_andnot_si128 (__m128i __A, __m128i __B) 1275{ 1276 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); 1277} 1278 1279extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1280_mm_or_si128 (__m128i __A, __m128i __B) 1281{ 1282 return (__m128i) ((__v2du)__A | (__v2du)__B); 1283} 1284 1285extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1286_mm_xor_si128 (__m128i __A, __m128i __B) 1287{ 1288 return (__m128i) ((__v2du)__A ^ (__v2du)__B); 1289} 1290 1291extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1292_mm_cmpeq_epi8 (__m128i __A, __m128i __B) 1293{ 1294 return (__m128i) ((__v16qi)__A == (__v16qi)__B); 1295} 1296 1297extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1298_mm_cmpeq_epi16 (__m128i __A, __m128i __B) 1299{ 1300 return (__m128i) ((__v8hi)__A == (__v8hi)__B); 1301} 1302 1303extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1304_mm_cmpeq_epi32 (__m128i __A, __m128i __B) 1305{ 1306 return (__m128i) ((__v4si)__A == (__v4si)__B); 1307} 1308 1309extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1310_mm_cmplt_epi8 (__m128i __A, __m128i __B) 1311{ 1312 return (__m128i) ((__v16qi)__A < (__v16qi)__B); 1313} 1314 1315extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1316_mm_cmplt_epi16 (__m128i __A, __m128i __B) 1317{ 1318 return (__m128i) ((__v8hi)__A < (__v8hi)__B); 1319} 1320 1321extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1322_mm_cmplt_epi32 (__m128i __A, __m128i __B) 1323{ 1324 return (__m128i) ((__v4si)__A < (__v4si)__B); 1325} 1326 1327extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1328_mm_cmpgt_epi8 (__m128i __A, __m128i __B) 1329{ 1330 return (__m128i) ((__v16qi)__A > (__v16qi)__B); 1331} 1332 1333extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1334_mm_cmpgt_epi16 (__m128i __A, __m128i __B) 1335{ 1336 return (__m128i) ((__v8hi)__A > (__v8hi)__B); 1337} 1338 1339extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1340_mm_cmpgt_epi32 (__m128i __A, __m128i __B) 1341{ 1342 return (__m128i) ((__v4si)__A > (__v4si)__B); 1343} 1344 1345#ifdef __OPTIMIZE__ 1346extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1347_mm_extract_epi16 (__m128i const __A, int const __N) 1348{ 1349 return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); 1350} 1351 1352extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1353_mm_insert_epi16 (__m128i const __A, int const __D, int const __N) 1354{ 1355 return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N); 1356} 1357#else 1358#define _mm_extract_epi16(A, N) \ 1359 ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) 1360#define _mm_insert_epi16(A, D, N) \ 1361 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ 1362 (int)(D), (int)(N))) 1363#endif 1364 1365extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1366_mm_max_epi16 (__m128i __A, __m128i __B) 1367{ 1368 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); 1369} 1370 1371extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1372_mm_max_epu8 (__m128i __A, __m128i __B) 1373{ 1374 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); 1375} 1376 1377extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1378_mm_min_epi16 (__m128i __A, __m128i __B) 1379{ 1380 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); 1381} 1382 1383extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1384_mm_min_epu8 (__m128i __A, __m128i __B) 1385{ 1386 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); 1387} 1388 1389extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1390_mm_movemask_epi8 (__m128i __A) 1391{ 1392 return __builtin_ia32_pmovmskb128 ((__v16qi)__A); 1393} 1394 1395extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1396_mm_mulhi_epu16 (__m128i __A, __m128i __B) 1397{ 1398 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); 1399} 1400 1401#ifdef __OPTIMIZE__ 1402extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1403_mm_shufflehi_epi16 (__m128i __A, const int __mask) 1404{ 1405 return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask); 1406} 1407 1408extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1409_mm_shufflelo_epi16 (__m128i __A, const int __mask) 1410{ 1411 return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask); 1412} 1413 1414extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1415_mm_shuffle_epi32 (__m128i __A, const int __mask) 1416{ 1417 return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask); 1418} 1419#else 1420#define _mm_shufflehi_epi16(A, N) \ 1421 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N))) 1422#define _mm_shufflelo_epi16(A, N) \ 1423 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N))) 1424#define _mm_shuffle_epi32(A, N) \ 1425 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N))) 1426#endif 1427 1428extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1429_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) 1430{ 1431 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); 1432} 1433 1434extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1435_mm_avg_epu8 (__m128i __A, __m128i __B) 1436{ 1437 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); 1438} 1439 1440extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1441_mm_avg_epu16 (__m128i __A, __m128i __B) 1442{ 1443 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); 1444} 1445 1446extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1447_mm_sad_epu8 (__m128i __A, __m128i __B) 1448{ 1449 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); 1450} 1451 1452extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1453_mm_stream_si32 (int *__A, int __B) 1454{ 1455 __builtin_ia32_movnti (__A, __B); 1456} 1457 1458#ifdef __x86_64__ 1459extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1460_mm_stream_si64 (long long int *__A, long long int __B) 1461{ 1462 __builtin_ia32_movnti64 (__A, __B); 1463} 1464#endif 1465 1466extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1467_mm_stream_si128 (__m128i *__A, __m128i __B) 1468{ 1469 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); 1470} 1471 1472extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1473_mm_stream_pd (double *__A, __m128d __B) 1474{ 1475 __builtin_ia32_movntpd (__A, (__v2df)__B); 1476} 1477 1478extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1479_mm_clflush (void const *__A) 1480{ 1481 __builtin_ia32_clflush (__A); 1482} 1483 1484extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1485_mm_lfence (void) 1486{ 1487 __builtin_ia32_lfence (); 1488} 1489 1490extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1491_mm_mfence (void) 1492{ 1493 __builtin_ia32_mfence (); 1494} 1495 1496extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1497_mm_cvtsi32_si128 (int __A) 1498{ 1499 return _mm_set_epi32 (0, 0, 0, __A); 1500} 1501 1502#ifdef __x86_64__ 1503/* Intel intrinsic. */ 1504extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1505_mm_cvtsi64_si128 (long long __A) 1506{ 1507 return _mm_set_epi64x (0, __A); 1508} 1509 1510/* Microsoft intrinsic. */ 1511extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1512_mm_cvtsi64x_si128 (long long __A) 1513{ 1514 return _mm_set_epi64x (0, __A); 1515} 1516#endif 1517 1518/* Casts between various SP, DP, INT vector types. Note that these do no 1519 conversion of values, they just change the type. */ 1520extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1521_mm_castpd_ps(__m128d __A) 1522{ 1523 return (__m128) __A; 1524} 1525 1526extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1527_mm_castpd_si128(__m128d __A) 1528{ 1529 return (__m128i) __A; 1530} 1531 1532extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1533_mm_castps_pd(__m128 __A) 1534{ 1535 return (__m128d) __A; 1536} 1537 1538extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1539_mm_castps_si128(__m128 __A) 1540{ 1541 return (__m128i) __A; 1542} 1543 1544extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1545_mm_castsi128_ps(__m128i __A) 1546{ 1547 return (__m128) __A; 1548} 1549 1550extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1551_mm_castsi128_pd(__m128i __A) 1552{ 1553 return (__m128d) __A; 1554} 1555 1556#ifdef __DISABLE_SSE2__ 1557#undef __DISABLE_SSE2__ 1558#pragma GCC pop_options 1559#endif /* __DISABLE_SSE2__ */ 1560 1561#endif /* _EMMINTRIN_H_INCLUDED */ 1562