emmintrin.h revision 1.1.1.2
1/* Copyright (C) 2003-2013 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24/* Implemented from the specification included in the Intel C++ Compiler 25 User Guide and Reference, version 9.0. */ 26 27#ifndef _EMMINTRIN_H_INCLUDED 28#define _EMMINTRIN_H_INCLUDED 29 30#ifndef __SSE2__ 31# error "SSE2 instruction set not enabled" 32#else 33 34/* We need definitions from the SSE header files*/ 35#include <xmmintrin.h> 36 37/* SSE2 */ 38typedef double __v2df __attribute__ ((__vector_size__ (16))); 39typedef long long __v2di __attribute__ ((__vector_size__ (16))); 40typedef int __v4si __attribute__ ((__vector_size__ (16))); 41typedef short __v8hi __attribute__ ((__vector_size__ (16))); 42typedef char __v16qi __attribute__ ((__vector_size__ (16))); 43 44/* The Intel API is flexible enough that we must allow aliasing with other 45 vector types, and their scalar components. */ 46typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); 47typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); 48 49/* Create a selector for use with the SHUFPD instruction. */ 50#define _MM_SHUFFLE2(fp1,fp0) \ 51 (((fp1) << 1) | (fp0)) 52 53/* Create a vector with element 0 as F and the rest zero. */ 54extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 55_mm_set_sd (double __F) 56{ 57 return __extension__ (__m128d){ __F, 0.0 }; 58} 59 60/* Create a vector with both elements equal to F. */ 61extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 62_mm_set1_pd (double __F) 63{ 64 return __extension__ (__m128d){ __F, __F }; 65} 66 67extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 68_mm_set_pd1 (double __F) 69{ 70 return _mm_set1_pd (__F); 71} 72 73/* Create a vector with the lower value X and upper value W. */ 74extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 75_mm_set_pd (double __W, double __X) 76{ 77 return __extension__ (__m128d){ __X, __W }; 78} 79 80/* Create a vector with the lower value W and upper value X. */ 81extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 82_mm_setr_pd (double __W, double __X) 83{ 84 return __extension__ (__m128d){ __W, __X }; 85} 86 87/* Create a vector of zeros. */ 88extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 89_mm_setzero_pd (void) 90{ 91 return __extension__ (__m128d){ 0.0, 0.0 }; 92} 93 94/* Sets the low DPFP value of A from the low value of B. */ 95extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 96_mm_move_sd (__m128d __A, __m128d __B) 97{ 98 return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); 99} 100 101/* Load two DPFP values from P. The address must be 16-byte aligned. */ 102extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 103_mm_load_pd (double const *__P) 104{ 105 return *(__m128d *)__P; 106} 107 108/* Load two DPFP values from P. The address need not be 16-byte aligned. */ 109extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 110_mm_loadu_pd (double const *__P) 111{ 112 return __builtin_ia32_loadupd (__P); 113} 114 115/* Create a vector with all two elements equal to *P. */ 116extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 117_mm_load1_pd (double const *__P) 118{ 119 return _mm_set1_pd (*__P); 120} 121 122/* Create a vector with element 0 as *P and the rest zero. */ 123extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 124_mm_load_sd (double const *__P) 125{ 126 return _mm_set_sd (*__P); 127} 128 129extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 130_mm_load_pd1 (double const *__P) 131{ 132 return _mm_load1_pd (__P); 133} 134 135/* Load two DPFP values in reverse order. The address must be aligned. */ 136extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 137_mm_loadr_pd (double const *__P) 138{ 139 __m128d __tmp = _mm_load_pd (__P); 140 return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); 141} 142 143/* Store two DPFP values. The address must be 16-byte aligned. */ 144extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 145_mm_store_pd (double *__P, __m128d __A) 146{ 147 *(__m128d *)__P = __A; 148} 149 150/* Store two DPFP values. The address need not be 16-byte aligned. */ 151extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 152_mm_storeu_pd (double *__P, __m128d __A) 153{ 154 __builtin_ia32_storeupd (__P, __A); 155} 156 157/* Stores the lower DPFP value. */ 158extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 159_mm_store_sd (double *__P, __m128d __A) 160{ 161 *__P = __builtin_ia32_vec_ext_v2df (__A, 0); 162} 163 164extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 165_mm_cvtsd_f64 (__m128d __A) 166{ 167 return __builtin_ia32_vec_ext_v2df (__A, 0); 168} 169 170extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 171_mm_storel_pd (double *__P, __m128d __A) 172{ 173 _mm_store_sd (__P, __A); 174} 175 176/* Stores the upper DPFP value. */ 177extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 178_mm_storeh_pd (double *__P, __m128d __A) 179{ 180 *__P = __builtin_ia32_vec_ext_v2df (__A, 1); 181} 182 183/* Store the lower DPFP value across two words. 184 The address must be 16-byte aligned. */ 185extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 186_mm_store1_pd (double *__P, __m128d __A) 187{ 188 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0))); 189} 190 191extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 192_mm_store_pd1 (double *__P, __m128d __A) 193{ 194 _mm_store1_pd (__P, __A); 195} 196 197/* Store two DPFP values in reverse order. The address must be aligned. */ 198extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 199_mm_storer_pd (double *__P, __m128d __A) 200{ 201 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1))); 202} 203 204extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 205_mm_cvtsi128_si32 (__m128i __A) 206{ 207 return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0); 208} 209 210#ifdef __x86_64__ 211/* Intel intrinsic. */ 212extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 213_mm_cvtsi128_si64 (__m128i __A) 214{ 215 return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); 216} 217 218/* Microsoft intrinsic. */ 219extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 220_mm_cvtsi128_si64x (__m128i __A) 221{ 222 return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); 223} 224#endif 225 226extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 227_mm_add_pd (__m128d __A, __m128d __B) 228{ 229 return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B); 230} 231 232extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 233_mm_add_sd (__m128d __A, __m128d __B) 234{ 235 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); 236} 237 238extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 239_mm_sub_pd (__m128d __A, __m128d __B) 240{ 241 return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B); 242} 243 244extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 245_mm_sub_sd (__m128d __A, __m128d __B) 246{ 247 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); 248} 249 250extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 251_mm_mul_pd (__m128d __A, __m128d __B) 252{ 253 return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B); 254} 255 256extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 257_mm_mul_sd (__m128d __A, __m128d __B) 258{ 259 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); 260} 261 262extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 263_mm_div_pd (__m128d __A, __m128d __B) 264{ 265 return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B); 266} 267 268extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 269_mm_div_sd (__m128d __A, __m128d __B) 270{ 271 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); 272} 273 274extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 275_mm_sqrt_pd (__m128d __A) 276{ 277 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); 278} 279 280/* Return pair {sqrt (A[0), B[1]}. */ 281extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 282_mm_sqrt_sd (__m128d __A, __m128d __B) 283{ 284 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); 285 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); 286} 287 288extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 289_mm_min_pd (__m128d __A, __m128d __B) 290{ 291 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); 292} 293 294extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 295_mm_min_sd (__m128d __A, __m128d __B) 296{ 297 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); 298} 299 300extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 301_mm_max_pd (__m128d __A, __m128d __B) 302{ 303 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); 304} 305 306extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 307_mm_max_sd (__m128d __A, __m128d __B) 308{ 309 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); 310} 311 312extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 313_mm_and_pd (__m128d __A, __m128d __B) 314{ 315 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); 316} 317 318extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 319_mm_andnot_pd (__m128d __A, __m128d __B) 320{ 321 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); 322} 323 324extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 325_mm_or_pd (__m128d __A, __m128d __B) 326{ 327 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); 328} 329 330extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 331_mm_xor_pd (__m128d __A, __m128d __B) 332{ 333 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); 334} 335 336extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 337_mm_cmpeq_pd (__m128d __A, __m128d __B) 338{ 339 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); 340} 341 342extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 343_mm_cmplt_pd (__m128d __A, __m128d __B) 344{ 345 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); 346} 347 348extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 349_mm_cmple_pd (__m128d __A, __m128d __B) 350{ 351 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); 352} 353 354extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 355_mm_cmpgt_pd (__m128d __A, __m128d __B) 356{ 357 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); 358} 359 360extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 361_mm_cmpge_pd (__m128d __A, __m128d __B) 362{ 363 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); 364} 365 366extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 367_mm_cmpneq_pd (__m128d __A, __m128d __B) 368{ 369 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); 370} 371 372extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 373_mm_cmpnlt_pd (__m128d __A, __m128d __B) 374{ 375 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); 376} 377 378extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 379_mm_cmpnle_pd (__m128d __A, __m128d __B) 380{ 381 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); 382} 383 384extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 385_mm_cmpngt_pd (__m128d __A, __m128d __B) 386{ 387 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); 388} 389 390extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 391_mm_cmpnge_pd (__m128d __A, __m128d __B) 392{ 393 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); 394} 395 396extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 397_mm_cmpord_pd (__m128d __A, __m128d __B) 398{ 399 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); 400} 401 402extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 403_mm_cmpunord_pd (__m128d __A, __m128d __B) 404{ 405 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); 406} 407 408extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 409_mm_cmpeq_sd (__m128d __A, __m128d __B) 410{ 411 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); 412} 413 414extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 415_mm_cmplt_sd (__m128d __A, __m128d __B) 416{ 417 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); 418} 419 420extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 421_mm_cmple_sd (__m128d __A, __m128d __B) 422{ 423 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); 424} 425 426extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 427_mm_cmpgt_sd (__m128d __A, __m128d __B) 428{ 429 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 430 (__v2df) 431 __builtin_ia32_cmpltsd ((__v2df) __B, 432 (__v2df) 433 __A)); 434} 435 436extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 437_mm_cmpge_sd (__m128d __A, __m128d __B) 438{ 439 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 440 (__v2df) 441 __builtin_ia32_cmplesd ((__v2df) __B, 442 (__v2df) 443 __A)); 444} 445 446extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 447_mm_cmpneq_sd (__m128d __A, __m128d __B) 448{ 449 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); 450} 451 452extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 453_mm_cmpnlt_sd (__m128d __A, __m128d __B) 454{ 455 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); 456} 457 458extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 459_mm_cmpnle_sd (__m128d __A, __m128d __B) 460{ 461 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); 462} 463 464extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 465_mm_cmpngt_sd (__m128d __A, __m128d __B) 466{ 467 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 468 (__v2df) 469 __builtin_ia32_cmpnltsd ((__v2df) __B, 470 (__v2df) 471 __A)); 472} 473 474extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 475_mm_cmpnge_sd (__m128d __A, __m128d __B) 476{ 477 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 478 (__v2df) 479 __builtin_ia32_cmpnlesd ((__v2df) __B, 480 (__v2df) 481 __A)); 482} 483 484extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 485_mm_cmpord_sd (__m128d __A, __m128d __B) 486{ 487 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); 488} 489 490extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 491_mm_cmpunord_sd (__m128d __A, __m128d __B) 492{ 493 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); 494} 495 496extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 497_mm_comieq_sd (__m128d __A, __m128d __B) 498{ 499 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); 500} 501 502extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 503_mm_comilt_sd (__m128d __A, __m128d __B) 504{ 505 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); 506} 507 508extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 509_mm_comile_sd (__m128d __A, __m128d __B) 510{ 511 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); 512} 513 514extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 515_mm_comigt_sd (__m128d __A, __m128d __B) 516{ 517 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); 518} 519 520extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 521_mm_comige_sd (__m128d __A, __m128d __B) 522{ 523 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); 524} 525 526extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 527_mm_comineq_sd (__m128d __A, __m128d __B) 528{ 529 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); 530} 531 532extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 533_mm_ucomieq_sd (__m128d __A, __m128d __B) 534{ 535 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); 536} 537 538extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 539_mm_ucomilt_sd (__m128d __A, __m128d __B) 540{ 541 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); 542} 543 544extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 545_mm_ucomile_sd (__m128d __A, __m128d __B) 546{ 547 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); 548} 549 550extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 551_mm_ucomigt_sd (__m128d __A, __m128d __B) 552{ 553 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); 554} 555 556extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 557_mm_ucomige_sd (__m128d __A, __m128d __B) 558{ 559 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); 560} 561 562extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 563_mm_ucomineq_sd (__m128d __A, __m128d __B) 564{ 565 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); 566} 567 568/* Create a vector of Qi, where i is the element number. */ 569 570extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 571_mm_set_epi64x (long long __q1, long long __q0) 572{ 573 return __extension__ (__m128i)(__v2di){ __q0, __q1 }; 574} 575 576extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 577_mm_set_epi64 (__m64 __q1, __m64 __q0) 578{ 579 return _mm_set_epi64x ((long long)__q1, (long long)__q0); 580} 581 582extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 583_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) 584{ 585 return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; 586} 587 588extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 589_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4, 590 short __q3, short __q2, short __q1, short __q0) 591{ 592 return __extension__ (__m128i)(__v8hi){ 593 __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 }; 594} 595 596extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 597_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12, 598 char __q11, char __q10, char __q09, char __q08, 599 char __q07, char __q06, char __q05, char __q04, 600 char __q03, char __q02, char __q01, char __q00) 601{ 602 return __extension__ (__m128i)(__v16qi){ 603 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, 604 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 605 }; 606} 607 608/* Set all of the elements of the vector to A. */ 609 610extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 611_mm_set1_epi64x (long long __A) 612{ 613 return _mm_set_epi64x (__A, __A); 614} 615 616extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 617_mm_set1_epi64 (__m64 __A) 618{ 619 return _mm_set_epi64 (__A, __A); 620} 621 622extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 623_mm_set1_epi32 (int __A) 624{ 625 return _mm_set_epi32 (__A, __A, __A, __A); 626} 627 628extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 629_mm_set1_epi16 (short __A) 630{ 631 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A); 632} 633 634extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 635_mm_set1_epi8 (char __A) 636{ 637 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A, 638 __A, __A, __A, __A, __A, __A, __A, __A); 639} 640 641/* Create a vector of Qi, where i is the element number. 642 The parameter order is reversed from the _mm_set_epi* functions. */ 643 644extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 645_mm_setr_epi64 (__m64 __q0, __m64 __q1) 646{ 647 return _mm_set_epi64 (__q1, __q0); 648} 649 650extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 651_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3) 652{ 653 return _mm_set_epi32 (__q3, __q2, __q1, __q0); 654} 655 656extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 657_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3, 658 short __q4, short __q5, short __q6, short __q7) 659{ 660 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); 661} 662 663extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 664_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03, 665 char __q04, char __q05, char __q06, char __q07, 666 char __q08, char __q09, char __q10, char __q11, 667 char __q12, char __q13, char __q14, char __q15) 668{ 669 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, 670 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); 671} 672 673/* Create a vector with element 0 as *P and the rest zero. */ 674 675extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 676_mm_load_si128 (__m128i const *__P) 677{ 678 return *__P; 679} 680 681extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 682_mm_loadu_si128 (__m128i const *__P) 683{ 684 return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); 685} 686 687extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 688_mm_loadl_epi64 (__m128i const *__P) 689{ 690 return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P); 691} 692 693extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 694_mm_store_si128 (__m128i *__P, __m128i __B) 695{ 696 *__P = __B; 697} 698 699extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 700_mm_storeu_si128 (__m128i *__P, __m128i __B) 701{ 702 __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); 703} 704 705extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 706_mm_storel_epi64 (__m128i *__P, __m128i __B) 707{ 708 *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); 709} 710 711extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 712_mm_movepi64_pi64 (__m128i __B) 713{ 714 return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); 715} 716 717extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 718_mm_movpi64_epi64 (__m64 __A) 719{ 720 return _mm_set_epi64 ((__m64)0LL, __A); 721} 722 723extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 724_mm_move_epi64 (__m128i __A) 725{ 726 return (__m128i)__builtin_ia32_movq128 ((__v2di) __A); 727} 728 729/* Create a vector of zeros. */ 730extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 731_mm_setzero_si128 (void) 732{ 733 return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 }; 734} 735 736extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 737_mm_cvtepi32_pd (__m128i __A) 738{ 739 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); 740} 741 742extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 743_mm_cvtepi32_ps (__m128i __A) 744{ 745 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); 746} 747 748extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 749_mm_cvtpd_epi32 (__m128d __A) 750{ 751 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); 752} 753 754extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 755_mm_cvtpd_pi32 (__m128d __A) 756{ 757 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); 758} 759 760extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 761_mm_cvtpd_ps (__m128d __A) 762{ 763 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); 764} 765 766extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 767_mm_cvttpd_epi32 (__m128d __A) 768{ 769 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); 770} 771 772extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 773_mm_cvttpd_pi32 (__m128d __A) 774{ 775 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); 776} 777 778extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 779_mm_cvtpi32_pd (__m64 __A) 780{ 781 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); 782} 783 784extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 785_mm_cvtps_epi32 (__m128 __A) 786{ 787 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); 788} 789 790extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 791_mm_cvttps_epi32 (__m128 __A) 792{ 793 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); 794} 795 796extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 797_mm_cvtps_pd (__m128 __A) 798{ 799 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); 800} 801 802extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 803_mm_cvtsd_si32 (__m128d __A) 804{ 805 return __builtin_ia32_cvtsd2si ((__v2df) __A); 806} 807 808#ifdef __x86_64__ 809/* Intel intrinsic. */ 810extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 811_mm_cvtsd_si64 (__m128d __A) 812{ 813 return __builtin_ia32_cvtsd2si64 ((__v2df) __A); 814} 815 816/* Microsoft intrinsic. */ 817extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 818_mm_cvtsd_si64x (__m128d __A) 819{ 820 return __builtin_ia32_cvtsd2si64 ((__v2df) __A); 821} 822#endif 823 824extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 825_mm_cvttsd_si32 (__m128d __A) 826{ 827 return __builtin_ia32_cvttsd2si ((__v2df) __A); 828} 829 830#ifdef __x86_64__ 831/* Intel intrinsic. */ 832extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 833_mm_cvttsd_si64 (__m128d __A) 834{ 835 return __builtin_ia32_cvttsd2si64 ((__v2df) __A); 836} 837 838/* Microsoft intrinsic. */ 839extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 840_mm_cvttsd_si64x (__m128d __A) 841{ 842 return __builtin_ia32_cvttsd2si64 ((__v2df) __A); 843} 844#endif 845 846extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 847_mm_cvtsd_ss (__m128 __A, __m128d __B) 848{ 849 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); 850} 851 852extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 853_mm_cvtsi32_sd (__m128d __A, int __B) 854{ 855 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); 856} 857 858#ifdef __x86_64__ 859/* Intel intrinsic. */ 860extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 861_mm_cvtsi64_sd (__m128d __A, long long __B) 862{ 863 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); 864} 865 866/* Microsoft intrinsic. */ 867extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 868_mm_cvtsi64x_sd (__m128d __A, long long __B) 869{ 870 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); 871} 872#endif 873 874extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 875_mm_cvtss_sd (__m128d __A, __m128 __B) 876{ 877 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); 878} 879 880#ifdef __OPTIMIZE__ 881extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 882_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) 883{ 884 return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask); 885} 886#else 887#define _mm_shuffle_pd(A, B, N) \ 888 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \ 889 (__v2df)(__m128d)(B), (int)(N))) 890#endif 891 892extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 893_mm_unpackhi_pd (__m128d __A, __m128d __B) 894{ 895 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); 896} 897 898extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 899_mm_unpacklo_pd (__m128d __A, __m128d __B) 900{ 901 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); 902} 903 904extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 905_mm_loadh_pd (__m128d __A, double const *__B) 906{ 907 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B); 908} 909 910extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 911_mm_loadl_pd (__m128d __A, double const *__B) 912{ 913 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B); 914} 915 916extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 917_mm_movemask_pd (__m128d __A) 918{ 919 return __builtin_ia32_movmskpd ((__v2df)__A); 920} 921 922extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 923_mm_packs_epi16 (__m128i __A, __m128i __B) 924{ 925 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); 926} 927 928extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 929_mm_packs_epi32 (__m128i __A, __m128i __B) 930{ 931 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); 932} 933 934extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 935_mm_packus_epi16 (__m128i __A, __m128i __B) 936{ 937 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); 938} 939 940extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 941_mm_unpackhi_epi8 (__m128i __A, __m128i __B) 942{ 943 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); 944} 945 946extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 947_mm_unpackhi_epi16 (__m128i __A, __m128i __B) 948{ 949 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); 950} 951 952extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 953_mm_unpackhi_epi32 (__m128i __A, __m128i __B) 954{ 955 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); 956} 957 958extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 959_mm_unpackhi_epi64 (__m128i __A, __m128i __B) 960{ 961 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); 962} 963 964extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 965_mm_unpacklo_epi8 (__m128i __A, __m128i __B) 966{ 967 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); 968} 969 970extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 971_mm_unpacklo_epi16 (__m128i __A, __m128i __B) 972{ 973 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); 974} 975 976extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 977_mm_unpacklo_epi32 (__m128i __A, __m128i __B) 978{ 979 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); 980} 981 982extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 983_mm_unpacklo_epi64 (__m128i __A, __m128i __B) 984{ 985 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); 986} 987 988extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 989_mm_add_epi8 (__m128i __A, __m128i __B) 990{ 991 return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B); 992} 993 994extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 995_mm_add_epi16 (__m128i __A, __m128i __B) 996{ 997 return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B); 998} 999 1000extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1001_mm_add_epi32 (__m128i __A, __m128i __B) 1002{ 1003 return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B); 1004} 1005 1006extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1007_mm_add_epi64 (__m128i __A, __m128i __B) 1008{ 1009 return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B); 1010} 1011 1012extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1013_mm_adds_epi8 (__m128i __A, __m128i __B) 1014{ 1015 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); 1016} 1017 1018extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1019_mm_adds_epi16 (__m128i __A, __m128i __B) 1020{ 1021 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); 1022} 1023 1024extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1025_mm_adds_epu8 (__m128i __A, __m128i __B) 1026{ 1027 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); 1028} 1029 1030extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1031_mm_adds_epu16 (__m128i __A, __m128i __B) 1032{ 1033 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); 1034} 1035 1036extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1037_mm_sub_epi8 (__m128i __A, __m128i __B) 1038{ 1039 return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B); 1040} 1041 1042extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1043_mm_sub_epi16 (__m128i __A, __m128i __B) 1044{ 1045 return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B); 1046} 1047 1048extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1049_mm_sub_epi32 (__m128i __A, __m128i __B) 1050{ 1051 return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B); 1052} 1053 1054extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1055_mm_sub_epi64 (__m128i __A, __m128i __B) 1056{ 1057 return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B); 1058} 1059 1060extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1061_mm_subs_epi8 (__m128i __A, __m128i __B) 1062{ 1063 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); 1064} 1065 1066extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1067_mm_subs_epi16 (__m128i __A, __m128i __B) 1068{ 1069 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); 1070} 1071 1072extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1073_mm_subs_epu8 (__m128i __A, __m128i __B) 1074{ 1075 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); 1076} 1077 1078extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1079_mm_subs_epu16 (__m128i __A, __m128i __B) 1080{ 1081 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); 1082} 1083 1084extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1085_mm_madd_epi16 (__m128i __A, __m128i __B) 1086{ 1087 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); 1088} 1089 1090extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1091_mm_mulhi_epi16 (__m128i __A, __m128i __B) 1092{ 1093 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); 1094} 1095 1096extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1097_mm_mullo_epi16 (__m128i __A, __m128i __B) 1098{ 1099 return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B); 1100} 1101 1102extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1103_mm_mul_su32 (__m64 __A, __m64 __B) 1104{ 1105 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); 1106} 1107 1108extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1109_mm_mul_epu32 (__m128i __A, __m128i __B) 1110{ 1111 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); 1112} 1113 1114extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1115_mm_slli_epi16 (__m128i __A, int __B) 1116{ 1117 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); 1118} 1119 1120extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1121_mm_slli_epi32 (__m128i __A, int __B) 1122{ 1123 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); 1124} 1125 1126extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1127_mm_slli_epi64 (__m128i __A, int __B) 1128{ 1129 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); 1130} 1131 1132extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1133_mm_srai_epi16 (__m128i __A, int __B) 1134{ 1135 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); 1136} 1137 1138extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1139_mm_srai_epi32 (__m128i __A, int __B) 1140{ 1141 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); 1142} 1143 1144#ifdef __OPTIMIZE__ 1145extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1146_mm_srli_si128 (__m128i __A, const int __N) 1147{ 1148 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); 1149} 1150 1151extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1152_mm_slli_si128 (__m128i __A, const int __N) 1153{ 1154 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); 1155} 1156#else 1157#define _mm_srli_si128(A, N) \ 1158 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) 1159#define _mm_slli_si128(A, N) \ 1160 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8)) 1161#endif 1162 1163extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1164_mm_srli_epi16 (__m128i __A, int __B) 1165{ 1166 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); 1167} 1168 1169extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1170_mm_srli_epi32 (__m128i __A, int __B) 1171{ 1172 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); 1173} 1174 1175extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1176_mm_srli_epi64 (__m128i __A, int __B) 1177{ 1178 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); 1179} 1180 1181extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1182_mm_sll_epi16 (__m128i __A, __m128i __B) 1183{ 1184 return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B); 1185} 1186 1187extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1188_mm_sll_epi32 (__m128i __A, __m128i __B) 1189{ 1190 return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B); 1191} 1192 1193extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1194_mm_sll_epi64 (__m128i __A, __m128i __B) 1195{ 1196 return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B); 1197} 1198 1199extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1200_mm_sra_epi16 (__m128i __A, __m128i __B) 1201{ 1202 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B); 1203} 1204 1205extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1206_mm_sra_epi32 (__m128i __A, __m128i __B) 1207{ 1208 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B); 1209} 1210 1211extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1212_mm_srl_epi16 (__m128i __A, __m128i __B) 1213{ 1214 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B); 1215} 1216 1217extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1218_mm_srl_epi32 (__m128i __A, __m128i __B) 1219{ 1220 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B); 1221} 1222 1223extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1224_mm_srl_epi64 (__m128i __A, __m128i __B) 1225{ 1226 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); 1227} 1228 1229extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1230_mm_and_si128 (__m128i __A, __m128i __B) 1231{ 1232 return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B); 1233} 1234 1235extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1236_mm_andnot_si128 (__m128i __A, __m128i __B) 1237{ 1238 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); 1239} 1240 1241extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1242_mm_or_si128 (__m128i __A, __m128i __B) 1243{ 1244 return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B); 1245} 1246 1247extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1248_mm_xor_si128 (__m128i __A, __m128i __B) 1249{ 1250 return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B); 1251} 1252 1253extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1254_mm_cmpeq_epi8 (__m128i __A, __m128i __B) 1255{ 1256 return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B); 1257} 1258 1259extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1260_mm_cmpeq_epi16 (__m128i __A, __m128i __B) 1261{ 1262 return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B); 1263} 1264 1265extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1266_mm_cmpeq_epi32 (__m128i __A, __m128i __B) 1267{ 1268 return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B); 1269} 1270 1271extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1272_mm_cmplt_epi8 (__m128i __A, __m128i __B) 1273{ 1274 return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A); 1275} 1276 1277extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1278_mm_cmplt_epi16 (__m128i __A, __m128i __B) 1279{ 1280 return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A); 1281} 1282 1283extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1284_mm_cmplt_epi32 (__m128i __A, __m128i __B) 1285{ 1286 return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A); 1287} 1288 1289extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1290_mm_cmpgt_epi8 (__m128i __A, __m128i __B) 1291{ 1292 return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B); 1293} 1294 1295extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1296_mm_cmpgt_epi16 (__m128i __A, __m128i __B) 1297{ 1298 return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B); 1299} 1300 1301extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1302_mm_cmpgt_epi32 (__m128i __A, __m128i __B) 1303{ 1304 return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); 1305} 1306 1307#ifdef __OPTIMIZE__ 1308extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1309_mm_extract_epi16 (__m128i const __A, int const __N) 1310{ 1311 return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); 1312} 1313 1314extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1315_mm_insert_epi16 (__m128i const __A, int const __D, int const __N) 1316{ 1317 return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N); 1318} 1319#else 1320#define _mm_extract_epi16(A, N) \ 1321 ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) 1322#define _mm_insert_epi16(A, D, N) \ 1323 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ 1324 (int)(D), (int)(N))) 1325#endif 1326 1327extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1328_mm_max_epi16 (__m128i __A, __m128i __B) 1329{ 1330 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); 1331} 1332 1333extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1334_mm_max_epu8 (__m128i __A, __m128i __B) 1335{ 1336 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); 1337} 1338 1339extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1340_mm_min_epi16 (__m128i __A, __m128i __B) 1341{ 1342 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); 1343} 1344 1345extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1346_mm_min_epu8 (__m128i __A, __m128i __B) 1347{ 1348 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); 1349} 1350 1351extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1352_mm_movemask_epi8 (__m128i __A) 1353{ 1354 return __builtin_ia32_pmovmskb128 ((__v16qi)__A); 1355} 1356 1357extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1358_mm_mulhi_epu16 (__m128i __A, __m128i __B) 1359{ 1360 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); 1361} 1362 1363#ifdef __OPTIMIZE__ 1364extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1365_mm_shufflehi_epi16 (__m128i __A, const int __mask) 1366{ 1367 return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask); 1368} 1369 1370extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1371_mm_shufflelo_epi16 (__m128i __A, const int __mask) 1372{ 1373 return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask); 1374} 1375 1376extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1377_mm_shuffle_epi32 (__m128i __A, const int __mask) 1378{ 1379 return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask); 1380} 1381#else 1382#define _mm_shufflehi_epi16(A, N) \ 1383 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N))) 1384#define _mm_shufflelo_epi16(A, N) \ 1385 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N))) 1386#define _mm_shuffle_epi32(A, N) \ 1387 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N))) 1388#endif 1389 1390extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1391_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) 1392{ 1393 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); 1394} 1395 1396extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1397_mm_avg_epu8 (__m128i __A, __m128i __B) 1398{ 1399 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); 1400} 1401 1402extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1403_mm_avg_epu16 (__m128i __A, __m128i __B) 1404{ 1405 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); 1406} 1407 1408extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1409_mm_sad_epu8 (__m128i __A, __m128i __B) 1410{ 1411 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); 1412} 1413 1414extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1415_mm_stream_si32 (int *__A, int __B) 1416{ 1417 __builtin_ia32_movnti (__A, __B); 1418} 1419 1420#ifdef __x86_64__ 1421extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1422_mm_stream_si64 (long long int *__A, long long int __B) 1423{ 1424 __builtin_ia32_movnti64 (__A, __B); 1425} 1426#endif 1427 1428extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1429_mm_stream_si128 (__m128i *__A, __m128i __B) 1430{ 1431 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); 1432} 1433 1434extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1435_mm_stream_pd (double *__A, __m128d __B) 1436{ 1437 __builtin_ia32_movntpd (__A, (__v2df)__B); 1438} 1439 1440extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1441_mm_clflush (void const *__A) 1442{ 1443 __builtin_ia32_clflush (__A); 1444} 1445 1446extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1447_mm_lfence (void) 1448{ 1449 __builtin_ia32_lfence (); 1450} 1451 1452extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1453_mm_mfence (void) 1454{ 1455 __builtin_ia32_mfence (); 1456} 1457 1458extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1459_mm_cvtsi32_si128 (int __A) 1460{ 1461 return _mm_set_epi32 (0, 0, 0, __A); 1462} 1463 1464#ifdef __x86_64__ 1465/* Intel intrinsic. */ 1466extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1467_mm_cvtsi64_si128 (long long __A) 1468{ 1469 return _mm_set_epi64x (0, __A); 1470} 1471 1472/* Microsoft intrinsic. */ 1473extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1474_mm_cvtsi64x_si128 (long long __A) 1475{ 1476 return _mm_set_epi64x (0, __A); 1477} 1478#endif 1479 1480/* Casts between various SP, DP, INT vector types. Note that these do no 1481 conversion of values, they just change the type. */ 1482extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1483_mm_castpd_ps(__m128d __A) 1484{ 1485 return (__m128) __A; 1486} 1487 1488extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1489_mm_castpd_si128(__m128d __A) 1490{ 1491 return (__m128i) __A; 1492} 1493 1494extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1495_mm_castps_pd(__m128 __A) 1496{ 1497 return (__m128d) __A; 1498} 1499 1500extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1501_mm_castps_si128(__m128 __A) 1502{ 1503 return (__m128i) __A; 1504} 1505 1506extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1507_mm_castsi128_ps(__m128i __A) 1508{ 1509 return (__m128) __A; 1510} 1511 1512extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1513_mm_castsi128_pd(__m128i __A) 1514{ 1515 return (__m128d) __A; 1516} 1517 1518#endif /* __SSE2__ */ 1519 1520#endif /* _EMMINTRIN_H_INCLUDED */ 1521