1169689Skan/* Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. 2122180Skan 3132718Skan This file is part of GCC. 4122180Skan 5132718Skan GCC is free software; you can redistribute it and/or modify 6122180Skan it under the terms of the GNU General Public License as published by 7122180Skan the Free Software Foundation; either version 2, or (at your option) 8122180Skan any later version. 9122180Skan 10132718Skan GCC is distributed in the hope that it will be useful, 11122180Skan but WITHOUT ANY WARRANTY; without even the implied warranty of 12122180Skan MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13122180Skan GNU General Public License for more details. 14122180Skan 15122180Skan You should have received a copy of the GNU General Public License 16132718Skan along with GCC; see the file COPYING. If not, write to 17169689Skan the Free Software Foundation, 51 Franklin Street, Fifth Floor, 18169689Skan Boston, MA 02110-1301, USA. */ 19122180Skan 20122180Skan/* As a special exception, if you include this header file into source 21122180Skan files compiled by GCC, this header file does not by itself cause 22122180Skan the resulting executable to be covered by the GNU General Public 23122180Skan License. This exception does not however invalidate any other 24122180Skan reasons why the executable file might be covered by the GNU General 25122180Skan Public License. */ 26122180Skan 27122180Skan/* Implemented from the specification included in the Intel C++ Compiler 28169689Skan User Guide and Reference, version 9.0. */ 29122180Skan 30122180Skan#ifndef _EMMINTRIN_H_INCLUDED 31122180Skan#define _EMMINTRIN_H_INCLUDED 32122180Skan 33251212Spfg#ifndef __SSE2__ 34251212Spfg# error "SSE2 instruction set not enabled" 35251212Spfg#else 36251212Spfg 37251212Spfg/* We need definitions from the SSE header files*/ 38122180Skan#include <xmmintrin.h> 39122180Skan 40122180Skan/* SSE2 */ 41169689Skantypedef double __v2df __attribute__ ((__vector_size__ (16))); 42169689Skantypedef long long __v2di __attribute__ ((__vector_size__ (16))); 43169689Skantypedef int __v4si __attribute__ ((__vector_size__ (16))); 44169689Skantypedef short __v8hi __attribute__ ((__vector_size__ (16))); 45169689Skantypedef char __v16qi __attribute__ ((__vector_size__ (16))); 46122180Skan 47169689Skan/* The Intel API is flexible enough that we must allow aliasing with other 48169689Skan vector types, and their scalar components. */ 49169689Skantypedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); 50169689Skantypedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); 51169689Skan 52122180Skan/* Create a selector for use with the SHUFPD instruction. */ 53122180Skan#define _MM_SHUFFLE2(fp1,fp0) \ 54122180Skan (((fp1) << 1) | (fp0)) 55122180Skan 56169689Skan/* Create a vector with element 0 as F and the rest zero. */ 57169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 58169689Skan_mm_set_sd (double __F) 59169689Skan{ 60169689Skan return __extension__ (__m128d){ __F, 0 }; 61169689Skan} 62122180Skan 63169689Skan/* Create a vector with both elements equal to F. */ 64169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 65169689Skan_mm_set1_pd (double __F) 66122180Skan{ 67169689Skan return __extension__ (__m128d){ __F, __F }; 68122180Skan} 69122180Skan 70169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 71169689Skan_mm_set_pd1 (double __F) 72122180Skan{ 73169689Skan return _mm_set1_pd (__F); 74122180Skan} 75122180Skan 76169689Skan/* Create a vector with the lower value X and upper value W. */ 77169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 78169689Skan_mm_set_pd (double __W, double __X) 79122180Skan{ 80169689Skan return __extension__ (__m128d){ __X, __W }; 81122180Skan} 82122180Skan 83169689Skan/* Create a vector with the lower value W and upper value X. */ 84169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 85169689Skan_mm_setr_pd (double __W, double __X) 86169689Skan{ 87169689Skan return __extension__ (__m128d){ __W, __X }; 88169689Skan} 89169689Skan 90169689Skan/* Create a vector of zeros. */ 91169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 92169689Skan_mm_setzero_pd (void) 93169689Skan{ 94169689Skan return __extension__ (__m128d){ 0.0, 0.0 }; 95169689Skan} 96169689Skan 97169689Skan/* Sets the low DPFP value of A from the low value of B. */ 98169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 99169689Skan_mm_move_sd (__m128d __A, __m128d __B) 100169689Skan{ 101169689Skan return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); 102169689Skan} 103169689Skan 104122180Skan/* Load two DPFP values from P. The address must be 16-byte aligned. */ 105169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 106122180Skan_mm_load_pd (double const *__P) 107122180Skan{ 108169689Skan return *(__m128d *)__P; 109122180Skan} 110122180Skan 111122180Skan/* Load two DPFP values from P. The address need not be 16-byte aligned. */ 112169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 113122180Skan_mm_loadu_pd (double const *__P) 114122180Skan{ 115169689Skan return __builtin_ia32_loadupd (__P); 116122180Skan} 117122180Skan 118169689Skan/* Create a vector with all two elements equal to *P. */ 119169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 120169689Skan_mm_load1_pd (double const *__P) 121122180Skan{ 122169689Skan return _mm_set1_pd (*__P); 123122180Skan} 124122180Skan 125169689Skan/* Create a vector with element 0 as *P and the rest zero. */ 126169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 127169689Skan_mm_load_sd (double const *__P) 128122180Skan{ 129169689Skan return _mm_set_sd (*__P); 130122180Skan} 131122180Skan 132169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 133169689Skan_mm_load_pd1 (double const *__P) 134122180Skan{ 135169689Skan return _mm_load1_pd (__P); 136122180Skan} 137122180Skan 138169689Skan/* Load two DPFP values in reverse order. The address must be aligned. */ 139169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 140169689Skan_mm_loadr_pd (double const *__P) 141122180Skan{ 142169689Skan __m128d __tmp = _mm_load_pd (__P); 143169689Skan return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); 144122180Skan} 145122180Skan 146169689Skan/* Store two DPFP values. The address must be 16-byte aligned. */ 147169689Skanstatic __inline void __attribute__((__always_inline__)) 148169689Skan_mm_store_pd (double *__P, __m128d __A) 149122180Skan{ 150169689Skan *(__m128d *)__P = __A; 151122180Skan} 152122180Skan 153169689Skan/* Store two DPFP values. The address need not be 16-byte aligned. */ 154169689Skanstatic __inline void __attribute__((__always_inline__)) 155169689Skan_mm_storeu_pd (double *__P, __m128d __A) 156122180Skan{ 157169689Skan __builtin_ia32_storeupd (__P, __A); 158122180Skan} 159122180Skan 160169689Skan/* Stores the lower DPFP value. */ 161169689Skanstatic __inline void __attribute__((__always_inline__)) 162169689Skan_mm_store_sd (double *__P, __m128d __A) 163122180Skan{ 164169689Skan *__P = __builtin_ia32_vec_ext_v2df (__A, 0); 165122180Skan} 166122180Skan 167169689Skanstatic __inline double __attribute__((__always_inline__)) 168169689Skan_mm_cvtsd_f64 (__m128d __A) 169122180Skan{ 170169689Skan return __builtin_ia32_vec_ext_v2df (__A, 0); 171122180Skan} 172122180Skan 173169689Skanstatic __inline void __attribute__((__always_inline__)) 174169689Skan_mm_storel_pd (double *__P, __m128d __A) 175169689Skan{ 176169689Skan _mm_store_sd (__P, __A); 177169689Skan} 178169689Skan 179169689Skan/* Stores the upper DPFP value. */ 180169689Skanstatic __inline void __attribute__((__always_inline__)) 181169689Skan_mm_storeh_pd (double *__P, __m128d __A) 182169689Skan{ 183169689Skan *__P = __builtin_ia32_vec_ext_v2df (__A, 1); 184169689Skan} 185169689Skan 186169689Skan/* Store the lower DPFP value across two words. 187169689Skan The address must be 16-byte aligned. */ 188169689Skanstatic __inline void __attribute__((__always_inline__)) 189122180Skan_mm_store1_pd (double *__P, __m128d __A) 190122180Skan{ 191169689Skan _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0))); 192122180Skan} 193122180Skan 194169689Skanstatic __inline void __attribute__((__always_inline__)) 195122180Skan_mm_store_pd1 (double *__P, __m128d __A) 196122180Skan{ 197122180Skan _mm_store1_pd (__P, __A); 198122180Skan} 199122180Skan 200169689Skan/* Store two DPFP values in reverse order. The address must be aligned. */ 201169689Skanstatic __inline void __attribute__((__always_inline__)) 202169689Skan_mm_storer_pd (double *__P, __m128d __A) 203122180Skan{ 204169689Skan _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1))); 205122180Skan} 206122180Skan 207169689Skanstatic __inline int __attribute__((__always_inline__)) 208169689Skan_mm_cvtsi128_si32 (__m128i __A) 209122180Skan{ 210169689Skan return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0); 211122180Skan} 212122180Skan 213169689Skan#ifdef __x86_64__ 214169689Skan/* Intel intrinsic. */ 215169689Skanstatic __inline long long __attribute__((__always_inline__)) 216169689Skan_mm_cvtsi128_si64 (__m128i __A) 217122180Skan{ 218169689Skan return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); 219122180Skan} 220122180Skan 221169689Skan/* Microsoft intrinsic. */ 222169689Skanstatic __inline long long __attribute__((__always_inline__)) 223169689Skan_mm_cvtsi128_si64x (__m128i __A) 224122180Skan{ 225169689Skan return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); 226122180Skan} 227169689Skan#endif 228122180Skan 229169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 230122180Skan_mm_add_pd (__m128d __A, __m128d __B) 231122180Skan{ 232122180Skan return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B); 233122180Skan} 234122180Skan 235169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 236122180Skan_mm_add_sd (__m128d __A, __m128d __B) 237122180Skan{ 238122180Skan return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); 239122180Skan} 240122180Skan 241169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 242122180Skan_mm_sub_pd (__m128d __A, __m128d __B) 243122180Skan{ 244122180Skan return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B); 245122180Skan} 246122180Skan 247169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 248122180Skan_mm_sub_sd (__m128d __A, __m128d __B) 249122180Skan{ 250122180Skan return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); 251122180Skan} 252122180Skan 253169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 254122180Skan_mm_mul_pd (__m128d __A, __m128d __B) 255122180Skan{ 256122180Skan return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B); 257122180Skan} 258122180Skan 259169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 260122180Skan_mm_mul_sd (__m128d __A, __m128d __B) 261122180Skan{ 262122180Skan return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); 263122180Skan} 264122180Skan 265169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 266122180Skan_mm_div_pd (__m128d __A, __m128d __B) 267122180Skan{ 268122180Skan return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B); 269122180Skan} 270122180Skan 271169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 272122180Skan_mm_div_sd (__m128d __A, __m128d __B) 273122180Skan{ 274122180Skan return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); 275122180Skan} 276122180Skan 277169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 278122180Skan_mm_sqrt_pd (__m128d __A) 279122180Skan{ 280122180Skan return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); 281122180Skan} 282122180Skan 283122180Skan/* Return pair {sqrt (A[0), B[1]}. */ 284169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 285122180Skan_mm_sqrt_sd (__m128d __A, __m128d __B) 286122180Skan{ 287122180Skan __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); 288122180Skan return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); 289122180Skan} 290122180Skan 291169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 292122180Skan_mm_min_pd (__m128d __A, __m128d __B) 293122180Skan{ 294122180Skan return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); 295122180Skan} 296122180Skan 297169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 298122180Skan_mm_min_sd (__m128d __A, __m128d __B) 299122180Skan{ 300122180Skan return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); 301122180Skan} 302122180Skan 303169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 304122180Skan_mm_max_pd (__m128d __A, __m128d __B) 305122180Skan{ 306122180Skan return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); 307122180Skan} 308122180Skan 309169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 310122180Skan_mm_max_sd (__m128d __A, __m128d __B) 311122180Skan{ 312122180Skan return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); 313122180Skan} 314122180Skan 315169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 316122180Skan_mm_and_pd (__m128d __A, __m128d __B) 317122180Skan{ 318122180Skan return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); 319122180Skan} 320122180Skan 321169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 322122180Skan_mm_andnot_pd (__m128d __A, __m128d __B) 323122180Skan{ 324122180Skan return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); 325122180Skan} 326122180Skan 327169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 328122180Skan_mm_or_pd (__m128d __A, __m128d __B) 329122180Skan{ 330122180Skan return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); 331122180Skan} 332122180Skan 333169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 334122180Skan_mm_xor_pd (__m128d __A, __m128d __B) 335122180Skan{ 336122180Skan return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); 337122180Skan} 338122180Skan 339169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 340122180Skan_mm_cmpeq_pd (__m128d __A, __m128d __B) 341122180Skan{ 342122180Skan return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); 343122180Skan} 344122180Skan 345169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 346122180Skan_mm_cmplt_pd (__m128d __A, __m128d __B) 347122180Skan{ 348122180Skan return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); 349122180Skan} 350122180Skan 351169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 352122180Skan_mm_cmple_pd (__m128d __A, __m128d __B) 353122180Skan{ 354122180Skan return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); 355122180Skan} 356122180Skan 357169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 358122180Skan_mm_cmpgt_pd (__m128d __A, __m128d __B) 359122180Skan{ 360122180Skan return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); 361122180Skan} 362122180Skan 363169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 364122180Skan_mm_cmpge_pd (__m128d __A, __m128d __B) 365122180Skan{ 366122180Skan return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); 367122180Skan} 368122180Skan 369169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 370122180Skan_mm_cmpneq_pd (__m128d __A, __m128d __B) 371122180Skan{ 372122180Skan return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); 373122180Skan} 374122180Skan 375169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 376122180Skan_mm_cmpnlt_pd (__m128d __A, __m128d __B) 377122180Skan{ 378122180Skan return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); 379122180Skan} 380122180Skan 381169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 382122180Skan_mm_cmpnle_pd (__m128d __A, __m128d __B) 383122180Skan{ 384122180Skan return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); 385122180Skan} 386122180Skan 387169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 388122180Skan_mm_cmpngt_pd (__m128d __A, __m128d __B) 389122180Skan{ 390122180Skan return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); 391122180Skan} 392122180Skan 393169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 394122180Skan_mm_cmpnge_pd (__m128d __A, __m128d __B) 395122180Skan{ 396122180Skan return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); 397122180Skan} 398122180Skan 399169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 400122180Skan_mm_cmpord_pd (__m128d __A, __m128d __B) 401122180Skan{ 402122180Skan return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); 403122180Skan} 404122180Skan 405169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 406122180Skan_mm_cmpunord_pd (__m128d __A, __m128d __B) 407122180Skan{ 408122180Skan return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); 409122180Skan} 410122180Skan 411169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 412122180Skan_mm_cmpeq_sd (__m128d __A, __m128d __B) 413122180Skan{ 414122180Skan return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); 415122180Skan} 416122180Skan 417169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 418122180Skan_mm_cmplt_sd (__m128d __A, __m128d __B) 419122180Skan{ 420122180Skan return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); 421122180Skan} 422122180Skan 423169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 424122180Skan_mm_cmple_sd (__m128d __A, __m128d __B) 425122180Skan{ 426122180Skan return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); 427122180Skan} 428122180Skan 429169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 430122180Skan_mm_cmpgt_sd (__m128d __A, __m128d __B) 431122180Skan{ 432122180Skan return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 433122180Skan (__v2df) 434122180Skan __builtin_ia32_cmpltsd ((__v2df) __B, 435122180Skan (__v2df) 436122180Skan __A)); 437122180Skan} 438122180Skan 439169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 440122180Skan_mm_cmpge_sd (__m128d __A, __m128d __B) 441122180Skan{ 442122180Skan return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 443122180Skan (__v2df) 444122180Skan __builtin_ia32_cmplesd ((__v2df) __B, 445122180Skan (__v2df) 446122180Skan __A)); 447122180Skan} 448122180Skan 449169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 450122180Skan_mm_cmpneq_sd (__m128d __A, __m128d __B) 451122180Skan{ 452122180Skan return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); 453122180Skan} 454122180Skan 455169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 456122180Skan_mm_cmpnlt_sd (__m128d __A, __m128d __B) 457122180Skan{ 458122180Skan return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); 459122180Skan} 460122180Skan 461169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 462122180Skan_mm_cmpnle_sd (__m128d __A, __m128d __B) 463122180Skan{ 464122180Skan return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); 465122180Skan} 466122180Skan 467169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 468122180Skan_mm_cmpngt_sd (__m128d __A, __m128d __B) 469122180Skan{ 470122180Skan return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 471122180Skan (__v2df) 472122180Skan __builtin_ia32_cmpnltsd ((__v2df) __B, 473122180Skan (__v2df) 474122180Skan __A)); 475122180Skan} 476122180Skan 477169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 478122180Skan_mm_cmpnge_sd (__m128d __A, __m128d __B) 479122180Skan{ 480122180Skan return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 481122180Skan (__v2df) 482122180Skan __builtin_ia32_cmpnlesd ((__v2df) __B, 483122180Skan (__v2df) 484122180Skan __A)); 485122180Skan} 486122180Skan 487169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 488122180Skan_mm_cmpord_sd (__m128d __A, __m128d __B) 489122180Skan{ 490122180Skan return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); 491122180Skan} 492122180Skan 493169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 494122180Skan_mm_cmpunord_sd (__m128d __A, __m128d __B) 495122180Skan{ 496122180Skan return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); 497122180Skan} 498122180Skan 499169689Skanstatic __inline int __attribute__((__always_inline__)) 500122180Skan_mm_comieq_sd (__m128d __A, __m128d __B) 501122180Skan{ 502122180Skan return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); 503122180Skan} 504122180Skan 505169689Skanstatic __inline int __attribute__((__always_inline__)) 506122180Skan_mm_comilt_sd (__m128d __A, __m128d __B) 507122180Skan{ 508122180Skan return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); 509122180Skan} 510122180Skan 511169689Skanstatic __inline int __attribute__((__always_inline__)) 512122180Skan_mm_comile_sd (__m128d __A, __m128d __B) 513122180Skan{ 514122180Skan return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); 515122180Skan} 516122180Skan 517169689Skanstatic __inline int __attribute__((__always_inline__)) 518122180Skan_mm_comigt_sd (__m128d __A, __m128d __B) 519122180Skan{ 520122180Skan return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); 521122180Skan} 522122180Skan 523169689Skanstatic __inline int __attribute__((__always_inline__)) 524122180Skan_mm_comige_sd (__m128d __A, __m128d __B) 525122180Skan{ 526122180Skan return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); 527122180Skan} 528122180Skan 529169689Skanstatic __inline int __attribute__((__always_inline__)) 530122180Skan_mm_comineq_sd (__m128d __A, __m128d __B) 531122180Skan{ 532122180Skan return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); 533122180Skan} 534122180Skan 535169689Skanstatic __inline int __attribute__((__always_inline__)) 536122180Skan_mm_ucomieq_sd (__m128d __A, __m128d __B) 537122180Skan{ 538122180Skan return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); 539122180Skan} 540122180Skan 541169689Skanstatic __inline int __attribute__((__always_inline__)) 542122180Skan_mm_ucomilt_sd (__m128d __A, __m128d __B) 543122180Skan{ 544122180Skan return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); 545122180Skan} 546122180Skan 547169689Skanstatic __inline int __attribute__((__always_inline__)) 548122180Skan_mm_ucomile_sd (__m128d __A, __m128d __B) 549122180Skan{ 550122180Skan return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); 551122180Skan} 552122180Skan 553169689Skanstatic __inline int __attribute__((__always_inline__)) 554122180Skan_mm_ucomigt_sd (__m128d __A, __m128d __B) 555122180Skan{ 556122180Skan return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); 557122180Skan} 558122180Skan 559169689Skanstatic __inline int __attribute__((__always_inline__)) 560122180Skan_mm_ucomige_sd (__m128d __A, __m128d __B) 561122180Skan{ 562122180Skan return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); 563122180Skan} 564122180Skan 565169689Skanstatic __inline int __attribute__((__always_inline__)) 566122180Skan_mm_ucomineq_sd (__m128d __A, __m128d __B) 567122180Skan{ 568122180Skan return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); 569122180Skan} 570122180Skan 571169689Skan/* Create a vector of Qi, where i is the element number. */ 572122180Skan 573169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 574169689Skan_mm_set_epi64x (long long __q1, long long __q0) 575122180Skan{ 576169689Skan return __extension__ (__m128i)(__v2di){ __q0, __q1 }; 577122180Skan} 578122180Skan 579169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 580169689Skan_mm_set_epi64 (__m64 __q1, __m64 __q0) 581122180Skan{ 582169689Skan return _mm_set_epi64x ((long long)__q1, (long long)__q0); 583122180Skan} 584122180Skan 585169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 586169689Skan_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) 587122180Skan{ 588169689Skan return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; 589122180Skan} 590122180Skan 591169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 592169689Skan_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4, 593169689Skan short __q3, short __q2, short __q1, short __q0) 594122180Skan{ 595169689Skan return __extension__ (__m128i)(__v8hi){ 596169689Skan __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 }; 597122180Skan} 598122180Skan 599169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 600169689Skan_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12, 601169689Skan char __q11, char __q10, char __q09, char __q08, 602169689Skan char __q07, char __q06, char __q05, char __q04, 603169689Skan char __q03, char __q02, char __q01, char __q00) 604122180Skan{ 605169689Skan return __extension__ (__m128i)(__v16qi){ 606169689Skan __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, 607169689Skan __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 608169689Skan }; 609122180Skan} 610122180Skan 611169689Skan/* Set all of the elements of the vector to A. */ 612169689Skan 613169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 614169689Skan_mm_set1_epi64x (long long __A) 615122180Skan{ 616169689Skan return _mm_set_epi64x (__A, __A); 617122180Skan} 618122180Skan 619169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 620169689Skan_mm_set1_epi64 (__m64 __A) 621122180Skan{ 622169689Skan return _mm_set_epi64 (__A, __A); 623122180Skan} 624122180Skan 625169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 626169689Skan_mm_set1_epi32 (int __A) 627122180Skan{ 628169689Skan return _mm_set_epi32 (__A, __A, __A, __A); 629122180Skan} 630122180Skan 631169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 632169689Skan_mm_set1_epi16 (short __A) 633122180Skan{ 634169689Skan return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A); 635122180Skan} 636122180Skan 637169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 638169689Skan_mm_set1_epi8 (char __A) 639122180Skan{ 640169689Skan return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A, 641169689Skan __A, __A, __A, __A, __A, __A, __A, __A); 642122180Skan} 643122180Skan 644169689Skan/* Create a vector of Qi, where i is the element number. 645169689Skan The parameter order is reversed from the _mm_set_epi* functions. */ 646169689Skan 647169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 648169689Skan_mm_setr_epi64 (__m64 __q0, __m64 __q1) 649122180Skan{ 650169689Skan return _mm_set_epi64 (__q1, __q0); 651122180Skan} 652122180Skan 653169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 654169689Skan_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3) 655122180Skan{ 656169689Skan return _mm_set_epi32 (__q3, __q2, __q1, __q0); 657122180Skan} 658122180Skan 659169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 660169689Skan_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3, 661169689Skan short __q4, short __q5, short __q6, short __q7) 662122180Skan{ 663169689Skan return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); 664122180Skan} 665122180Skan 666169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 667169689Skan_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03, 668169689Skan char __q04, char __q05, char __q06, char __q07, 669169689Skan char __q08, char __q09, char __q10, char __q11, 670169689Skan char __q12, char __q13, char __q14, char __q15) 671122180Skan{ 672169689Skan return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, 673169689Skan __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); 674169689Skan} 675122180Skan 676169689Skan/* Create a vector with element 0 as *P and the rest zero. */ 677122180Skan 678169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 679169689Skan_mm_load_si128 (__m128i const *__P) 680169689Skan{ 681169689Skan return *__P; 682122180Skan} 683122180Skan 684169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 685169689Skan_mm_loadu_si128 (__m128i const *__P) 686122180Skan{ 687169689Skan return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); 688122180Skan} 689122180Skan 690169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 691169689Skan_mm_loadl_epi64 (__m128i const *__P) 692122180Skan{ 693169689Skan return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P); 694122180Skan} 695122180Skan 696169689Skanstatic __inline void __attribute__((__always_inline__)) 697169689Skan_mm_store_si128 (__m128i *__P, __m128i __B) 698122180Skan{ 699169689Skan *__P = __B; 700122180Skan} 701122180Skan 702169689Skanstatic __inline void __attribute__((__always_inline__)) 703169689Skan_mm_storeu_si128 (__m128i *__P, __m128i __B) 704122180Skan{ 705169689Skan __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); 706122180Skan} 707122180Skan 708169689Skanstatic __inline void __attribute__((__always_inline__)) 709169689Skan_mm_storel_epi64 (__m128i *__P, __m128i __B) 710122180Skan{ 711169689Skan *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); 712122180Skan} 713122180Skan 714169689Skanstatic __inline __m64 __attribute__((__always_inline__)) 715169689Skan_mm_movepi64_pi64 (__m128i __B) 716122180Skan{ 717169689Skan return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); 718122180Skan} 719122180Skan 720169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 721169689Skan_mm_movpi64_epi64 (__m64 __A) 722122180Skan{ 723169689Skan return _mm_set_epi64 ((__m64)0LL, __A); 724169689Skan} 725122180Skan 726169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 727169689Skan_mm_move_epi64 (__m128i __A) 728122180Skan{ 729169689Skan return _mm_set_epi64 ((__m64)0LL, _mm_movepi64_pi64 (__A)); 730122180Skan} 731122180Skan 732169689Skan/* Create a vector of zeros. */ 733169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 734169689Skan_mm_setzero_si128 (void) 735122180Skan{ 736169689Skan return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 }; 737122180Skan} 738122180Skan 739169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 740122180Skan_mm_cvtepi32_pd (__m128i __A) 741122180Skan{ 742122180Skan return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); 743122180Skan} 744122180Skan 745169689Skanstatic __inline __m128 __attribute__((__always_inline__)) 746122180Skan_mm_cvtepi32_ps (__m128i __A) 747122180Skan{ 748122180Skan return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); 749122180Skan} 750122180Skan 751169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 752122180Skan_mm_cvtpd_epi32 (__m128d __A) 753122180Skan{ 754122180Skan return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); 755122180Skan} 756122180Skan 757169689Skanstatic __inline __m64 __attribute__((__always_inline__)) 758122180Skan_mm_cvtpd_pi32 (__m128d __A) 759122180Skan{ 760122180Skan return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); 761122180Skan} 762122180Skan 763169689Skanstatic __inline __m128 __attribute__((__always_inline__)) 764122180Skan_mm_cvtpd_ps (__m128d __A) 765122180Skan{ 766122180Skan return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); 767122180Skan} 768122180Skan 769169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 770122180Skan_mm_cvttpd_epi32 (__m128d __A) 771122180Skan{ 772122180Skan return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); 773122180Skan} 774122180Skan 775169689Skanstatic __inline __m64 __attribute__((__always_inline__)) 776122180Skan_mm_cvttpd_pi32 (__m128d __A) 777122180Skan{ 778122180Skan return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); 779122180Skan} 780122180Skan 781169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 782122180Skan_mm_cvtpi32_pd (__m64 __A) 783122180Skan{ 784122180Skan return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); 785122180Skan} 786122180Skan 787169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 788122180Skan_mm_cvtps_epi32 (__m128 __A) 789122180Skan{ 790122180Skan return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); 791122180Skan} 792122180Skan 793169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 794122180Skan_mm_cvttps_epi32 (__m128 __A) 795122180Skan{ 796122180Skan return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); 797122180Skan} 798122180Skan 799169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 800122180Skan_mm_cvtps_pd (__m128 __A) 801122180Skan{ 802122180Skan return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); 803122180Skan} 804122180Skan 805169689Skanstatic __inline int __attribute__((__always_inline__)) 806122180Skan_mm_cvtsd_si32 (__m128d __A) 807122180Skan{ 808122180Skan return __builtin_ia32_cvtsd2si ((__v2df) __A); 809122180Skan} 810122180Skan 811122180Skan#ifdef __x86_64__ 812169689Skan/* Intel intrinsic. */ 813169689Skanstatic __inline long long __attribute__((__always_inline__)) 814169689Skan_mm_cvtsd_si64 (__m128d __A) 815169689Skan{ 816169689Skan return __builtin_ia32_cvtsd2si64 ((__v2df) __A); 817169689Skan} 818169689Skan 819169689Skan/* Microsoft intrinsic. */ 820169689Skanstatic __inline long long __attribute__((__always_inline__)) 821122180Skan_mm_cvtsd_si64x (__m128d __A) 822122180Skan{ 823122180Skan return __builtin_ia32_cvtsd2si64 ((__v2df) __A); 824122180Skan} 825122180Skan#endif 826122180Skan 827169689Skanstatic __inline int __attribute__((__always_inline__)) 828122180Skan_mm_cvttsd_si32 (__m128d __A) 829122180Skan{ 830122180Skan return __builtin_ia32_cvttsd2si ((__v2df) __A); 831122180Skan} 832122180Skan 833122180Skan#ifdef __x86_64__ 834169689Skan/* Intel intrinsic. */ 835169689Skanstatic __inline long long __attribute__((__always_inline__)) 836169689Skan_mm_cvttsd_si64 (__m128d __A) 837169689Skan{ 838169689Skan return __builtin_ia32_cvttsd2si64 ((__v2df) __A); 839169689Skan} 840169689Skan 841169689Skan/* Microsoft intrinsic. */ 842169689Skanstatic __inline long long __attribute__((__always_inline__)) 843122180Skan_mm_cvttsd_si64x (__m128d __A) 844122180Skan{ 845122180Skan return __builtin_ia32_cvttsd2si64 ((__v2df) __A); 846122180Skan} 847122180Skan#endif 848122180Skan 849169689Skanstatic __inline __m128 __attribute__((__always_inline__)) 850122180Skan_mm_cvtsd_ss (__m128 __A, __m128d __B) 851122180Skan{ 852122180Skan return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); 853122180Skan} 854122180Skan 855169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 856122180Skan_mm_cvtsi32_sd (__m128d __A, int __B) 857122180Skan{ 858122180Skan return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); 859122180Skan} 860122180Skan 861122180Skan#ifdef __x86_64__ 862169689Skan/* Intel intrinsic. */ 863169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 864169689Skan_mm_cvtsi64_sd (__m128d __A, long long __B) 865169689Skan{ 866169689Skan return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); 867169689Skan} 868169689Skan 869169689Skan/* Microsoft intrinsic. */ 870169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 871122180Skan_mm_cvtsi64x_sd (__m128d __A, long long __B) 872122180Skan{ 873122180Skan return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); 874122180Skan} 875122180Skan#endif 876122180Skan 877169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 878122180Skan_mm_cvtss_sd (__m128d __A, __m128 __B) 879122180Skan{ 880122180Skan return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); 881122180Skan} 882122180Skan 883122180Skan#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C))) 884122180Skan 885169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 886122180Skan_mm_unpackhi_pd (__m128d __A, __m128d __B) 887122180Skan{ 888122180Skan return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); 889122180Skan} 890122180Skan 891169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 892122180Skan_mm_unpacklo_pd (__m128d __A, __m128d __B) 893122180Skan{ 894122180Skan return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); 895122180Skan} 896122180Skan 897169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 898122180Skan_mm_loadh_pd (__m128d __A, double const *__B) 899122180Skan{ 900169689Skan return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B); 901122180Skan} 902122180Skan 903169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 904122180Skan_mm_loadl_pd (__m128d __A, double const *__B) 905122180Skan{ 906169689Skan return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B); 907122180Skan} 908122180Skan 909169689Skanstatic __inline int __attribute__((__always_inline__)) 910122180Skan_mm_movemask_pd (__m128d __A) 911122180Skan{ 912122180Skan return __builtin_ia32_movmskpd ((__v2df)__A); 913122180Skan} 914122180Skan 915169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 916122180Skan_mm_packs_epi16 (__m128i __A, __m128i __B) 917122180Skan{ 918122180Skan return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); 919122180Skan} 920122180Skan 921169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 922122180Skan_mm_packs_epi32 (__m128i __A, __m128i __B) 923122180Skan{ 924122180Skan return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); 925122180Skan} 926122180Skan 927169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 928122180Skan_mm_packus_epi16 (__m128i __A, __m128i __B) 929122180Skan{ 930122180Skan return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); 931122180Skan} 932122180Skan 933169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 934122180Skan_mm_unpackhi_epi8 (__m128i __A, __m128i __B) 935122180Skan{ 936122180Skan return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); 937122180Skan} 938122180Skan 939169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 940122180Skan_mm_unpackhi_epi16 (__m128i __A, __m128i __B) 941122180Skan{ 942122180Skan return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); 943122180Skan} 944122180Skan 945169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 946122180Skan_mm_unpackhi_epi32 (__m128i __A, __m128i __B) 947122180Skan{ 948122180Skan return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); 949122180Skan} 950122180Skan 951169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 952122180Skan_mm_unpackhi_epi64 (__m128i __A, __m128i __B) 953122180Skan{ 954122180Skan return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); 955122180Skan} 956122180Skan 957169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 958122180Skan_mm_unpacklo_epi8 (__m128i __A, __m128i __B) 959122180Skan{ 960122180Skan return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); 961122180Skan} 962122180Skan 963169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 964122180Skan_mm_unpacklo_epi16 (__m128i __A, __m128i __B) 965122180Skan{ 966122180Skan return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); 967122180Skan} 968122180Skan 969169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 970122180Skan_mm_unpacklo_epi32 (__m128i __A, __m128i __B) 971122180Skan{ 972122180Skan return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); 973122180Skan} 974122180Skan 975169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 976122180Skan_mm_unpacklo_epi64 (__m128i __A, __m128i __B) 977122180Skan{ 978122180Skan return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); 979122180Skan} 980122180Skan 981169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 982122180Skan_mm_add_epi8 (__m128i __A, __m128i __B) 983122180Skan{ 984122180Skan return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B); 985122180Skan} 986122180Skan 987169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 988122180Skan_mm_add_epi16 (__m128i __A, __m128i __B) 989122180Skan{ 990122180Skan return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B); 991122180Skan} 992122180Skan 993169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 994122180Skan_mm_add_epi32 (__m128i __A, __m128i __B) 995122180Skan{ 996122180Skan return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B); 997122180Skan} 998122180Skan 999169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1000122180Skan_mm_add_epi64 (__m128i __A, __m128i __B) 1001122180Skan{ 1002122180Skan return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B); 1003122180Skan} 1004122180Skan 1005169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1006122180Skan_mm_adds_epi8 (__m128i __A, __m128i __B) 1007122180Skan{ 1008122180Skan return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); 1009122180Skan} 1010122180Skan 1011169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1012122180Skan_mm_adds_epi16 (__m128i __A, __m128i __B) 1013122180Skan{ 1014122180Skan return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); 1015122180Skan} 1016122180Skan 1017169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1018122180Skan_mm_adds_epu8 (__m128i __A, __m128i __B) 1019122180Skan{ 1020122180Skan return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); 1021122180Skan} 1022122180Skan 1023169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1024122180Skan_mm_adds_epu16 (__m128i __A, __m128i __B) 1025122180Skan{ 1026122180Skan return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); 1027122180Skan} 1028122180Skan 1029169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1030122180Skan_mm_sub_epi8 (__m128i __A, __m128i __B) 1031122180Skan{ 1032122180Skan return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B); 1033122180Skan} 1034122180Skan 1035169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1036122180Skan_mm_sub_epi16 (__m128i __A, __m128i __B) 1037122180Skan{ 1038122180Skan return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B); 1039122180Skan} 1040122180Skan 1041169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1042122180Skan_mm_sub_epi32 (__m128i __A, __m128i __B) 1043122180Skan{ 1044122180Skan return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B); 1045122180Skan} 1046122180Skan 1047169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1048122180Skan_mm_sub_epi64 (__m128i __A, __m128i __B) 1049122180Skan{ 1050122180Skan return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B); 1051122180Skan} 1052122180Skan 1053169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1054122180Skan_mm_subs_epi8 (__m128i __A, __m128i __B) 1055122180Skan{ 1056122180Skan return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); 1057122180Skan} 1058122180Skan 1059169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1060122180Skan_mm_subs_epi16 (__m128i __A, __m128i __B) 1061122180Skan{ 1062122180Skan return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); 1063122180Skan} 1064122180Skan 1065169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1066122180Skan_mm_subs_epu8 (__m128i __A, __m128i __B) 1067122180Skan{ 1068122180Skan return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); 1069122180Skan} 1070122180Skan 1071169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1072122180Skan_mm_subs_epu16 (__m128i __A, __m128i __B) 1073122180Skan{ 1074122180Skan return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); 1075122180Skan} 1076122180Skan 1077169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1078122180Skan_mm_madd_epi16 (__m128i __A, __m128i __B) 1079122180Skan{ 1080122180Skan return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); 1081122180Skan} 1082122180Skan 1083169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1084122180Skan_mm_mulhi_epi16 (__m128i __A, __m128i __B) 1085122180Skan{ 1086122180Skan return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); 1087122180Skan} 1088122180Skan 1089169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1090122180Skan_mm_mullo_epi16 (__m128i __A, __m128i __B) 1091122180Skan{ 1092122180Skan return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B); 1093122180Skan} 1094122180Skan 1095169689Skanstatic __inline __m64 __attribute__((__always_inline__)) 1096122180Skan_mm_mul_su32 (__m64 __A, __m64 __B) 1097122180Skan{ 1098122180Skan return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); 1099122180Skan} 1100122180Skan 1101169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1102122180Skan_mm_mul_epu32 (__m128i __A, __m128i __B) 1103122180Skan{ 1104122180Skan return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); 1105122180Skan} 1106122180Skan 1107169689Skan#if 0 1108169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1109169689Skan_mm_slli_epi16 (__m128i __A, int __B) 1110122180Skan{ 1111169689Skan return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); 1112122180Skan} 1113122180Skan 1114169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1115169689Skan_mm_slli_epi32 (__m128i __A, int __B) 1116122180Skan{ 1117169689Skan return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); 1118122180Skan} 1119122180Skan 1120169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1121169689Skan_mm_slli_epi64 (__m128i __A, int __B) 1122122180Skan{ 1123169689Skan return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); 1124122180Skan} 1125169689Skan#else 1126169689Skan#define _mm_slli_epi16(__A, __B) \ 1127169689Skan ((__m128i)__builtin_ia32_psllwi128 ((__v8hi)(__A), __B)) 1128169689Skan#define _mm_slli_epi32(__A, __B) \ 1129259216Sdim ((__m128i)__builtin_ia32_pslldi128 ((__v4si)(__A), __B)) 1130169689Skan#define _mm_slli_epi64(__A, __B) \ 1131259216Sdim ((__m128i)__builtin_ia32_psllqi128 ((__v2di)(__A), __B)) 1132169689Skan#endif 1133122180Skan 1134169689Skan#if 0 1135169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1136169689Skan_mm_srai_epi16 (__m128i __A, int __B) 1137122180Skan{ 1138169689Skan return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); 1139122180Skan} 1140122180Skan 1141169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1142169689Skan_mm_srai_epi32 (__m128i __A, int __B) 1143122180Skan{ 1144169689Skan return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); 1145122180Skan} 1146169689Skan#else 1147169689Skan#define _mm_srai_epi16(__A, __B) \ 1148169689Skan ((__m128i)__builtin_ia32_psrawi128 ((__v8hi)(__A), __B)) 1149169689Skan#define _mm_srai_epi32(__A, __B) \ 1150259216Sdim ((__m128i)__builtin_ia32_psradi128 ((__v4si)(__A), __B)) 1151169689Skan#endif 1152122180Skan 1153169689Skan#if 0 1154169689Skanstatic __m128i __attribute__((__always_inline__)) 1155169689Skan_mm_srli_si128 (__m128i __A, int __B) 1156122180Skan{ 1157169689Skan return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8)); 1158122180Skan} 1159122180Skan 1160169689Skanstatic __m128i __attribute__((__always_inline__)) 1161169689Skan_mm_srli_si128 (__m128i __A, int __B) 1162122180Skan{ 1163169689Skan return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8)); 1164122180Skan} 1165169689Skan#else 1166169689Skan#define _mm_srli_si128(__A, __B) \ 1167169689Skan ((__m128i)__builtin_ia32_psrldqi128 (__A, (__B) * 8)) 1168169689Skan#define _mm_slli_si128(__A, __B) \ 1169169689Skan ((__m128i)__builtin_ia32_pslldqi128 (__A, (__B) * 8)) 1170169689Skan#endif 1171122180Skan 1172169689Skan#if 0 1173169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1174169689Skan_mm_srli_epi16 (__m128i __A, int __B) 1175122180Skan{ 1176169689Skan return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); 1177122180Skan} 1178122180Skan 1179169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1180169689Skan_mm_srli_epi32 (__m128i __A, int __B) 1181122180Skan{ 1182169689Skan return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); 1183122180Skan} 1184122180Skan 1185169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1186169689Skan_mm_srli_epi64 (__m128i __A, int __B) 1187122180Skan{ 1188169689Skan return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); 1189122180Skan} 1190169689Skan#else 1191169689Skan#define _mm_srli_epi16(__A, __B) \ 1192169689Skan ((__m128i)__builtin_ia32_psrlwi128 ((__v8hi)(__A), __B)) 1193169689Skan#define _mm_srli_epi32(__A, __B) \ 1194169689Skan ((__m128i)__builtin_ia32_psrldi128 ((__v4si)(__A), __B)) 1195169689Skan#define _mm_srli_epi64(__A, __B) \ 1196270099Sdim ((__m128i)__builtin_ia32_psrlqi128 ((__v2di)(__A), __B)) 1197169689Skan#endif 1198122180Skan 1199169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1200169689Skan_mm_sll_epi16 (__m128i __A, __m128i __B) 1201122180Skan{ 1202169689Skan return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B); 1203122180Skan} 1204122180Skan 1205169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1206169689Skan_mm_sll_epi32 (__m128i __A, __m128i __B) 1207122180Skan{ 1208169689Skan return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B); 1209122180Skan} 1210122180Skan 1211169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1212169689Skan_mm_sll_epi64 (__m128i __A, __m128i __B) 1213122180Skan{ 1214169689Skan return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B); 1215122180Skan} 1216122180Skan 1217169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1218169689Skan_mm_sra_epi16 (__m128i __A, __m128i __B) 1219122180Skan{ 1220169689Skan return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B); 1221122180Skan} 1222122180Skan 1223169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1224169689Skan_mm_sra_epi32 (__m128i __A, __m128i __B) 1225122180Skan{ 1226169689Skan return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B); 1227122180Skan} 1228122180Skan 1229169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1230169689Skan_mm_srl_epi16 (__m128i __A, __m128i __B) 1231122180Skan{ 1232169689Skan return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B); 1233122180Skan} 1234122180Skan 1235169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1236169689Skan_mm_srl_epi32 (__m128i __A, __m128i __B) 1237122180Skan{ 1238169689Skan return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B); 1239122180Skan} 1240122180Skan 1241169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1242169689Skan_mm_srl_epi64 (__m128i __A, __m128i __B) 1243122180Skan{ 1244169689Skan return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); 1245122180Skan} 1246122180Skan 1247169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1248122180Skan_mm_and_si128 (__m128i __A, __m128i __B) 1249122180Skan{ 1250122180Skan return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B); 1251122180Skan} 1252122180Skan 1253169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1254122180Skan_mm_andnot_si128 (__m128i __A, __m128i __B) 1255122180Skan{ 1256122180Skan return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); 1257122180Skan} 1258122180Skan 1259169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1260122180Skan_mm_or_si128 (__m128i __A, __m128i __B) 1261122180Skan{ 1262122180Skan return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B); 1263122180Skan} 1264122180Skan 1265169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1266122180Skan_mm_xor_si128 (__m128i __A, __m128i __B) 1267122180Skan{ 1268122180Skan return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B); 1269122180Skan} 1270122180Skan 1271169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1272122180Skan_mm_cmpeq_epi8 (__m128i __A, __m128i __B) 1273122180Skan{ 1274122180Skan return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B); 1275122180Skan} 1276122180Skan 1277169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1278122180Skan_mm_cmpeq_epi16 (__m128i __A, __m128i __B) 1279122180Skan{ 1280122180Skan return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B); 1281122180Skan} 1282122180Skan 1283169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1284122180Skan_mm_cmpeq_epi32 (__m128i __A, __m128i __B) 1285122180Skan{ 1286122180Skan return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B); 1287122180Skan} 1288122180Skan 1289169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1290122180Skan_mm_cmplt_epi8 (__m128i __A, __m128i __B) 1291122180Skan{ 1292122180Skan return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A); 1293122180Skan} 1294122180Skan 1295169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1296122180Skan_mm_cmplt_epi16 (__m128i __A, __m128i __B) 1297122180Skan{ 1298122180Skan return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A); 1299122180Skan} 1300122180Skan 1301169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1302122180Skan_mm_cmplt_epi32 (__m128i __A, __m128i __B) 1303122180Skan{ 1304122180Skan return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A); 1305122180Skan} 1306122180Skan 1307169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1308122180Skan_mm_cmpgt_epi8 (__m128i __A, __m128i __B) 1309122180Skan{ 1310122180Skan return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B); 1311122180Skan} 1312122180Skan 1313169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1314122180Skan_mm_cmpgt_epi16 (__m128i __A, __m128i __B) 1315122180Skan{ 1316122180Skan return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B); 1317122180Skan} 1318122180Skan 1319169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1320122180Skan_mm_cmpgt_epi32 (__m128i __A, __m128i __B) 1321122180Skan{ 1322122180Skan return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); 1323122180Skan} 1324122180Skan 1325169689Skan#if 0 1326169689Skanstatic __inline int __attribute__((__always_inline__)) 1327169689Skan_mm_extract_epi16 (__m128i const __A, int const __N) 1328169689Skan{ 1329169689Skan return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); 1330169689Skan} 1331122180Skan 1332169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1333169689Skan_mm_insert_epi16 (__m128i const __A, int const __D, int const __N) 1334169689Skan{ 1335169689Skan return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N); 1336169689Skan} 1337169689Skan#else 1338169689Skan#define _mm_extract_epi16(A, N) \ 1339169689Skan ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(A), (N))) 1340169689Skan#define _mm_insert_epi16(A, D, N) \ 1341169689Skan ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(A), (D), (N))) 1342169689Skan#endif 1343122180Skan 1344169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1345122180Skan_mm_max_epi16 (__m128i __A, __m128i __B) 1346122180Skan{ 1347122180Skan return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); 1348122180Skan} 1349122180Skan 1350169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1351122180Skan_mm_max_epu8 (__m128i __A, __m128i __B) 1352122180Skan{ 1353122180Skan return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); 1354122180Skan} 1355122180Skan 1356169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1357122180Skan_mm_min_epi16 (__m128i __A, __m128i __B) 1358122180Skan{ 1359122180Skan return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); 1360122180Skan} 1361122180Skan 1362169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1363122180Skan_mm_min_epu8 (__m128i __A, __m128i __B) 1364122180Skan{ 1365122180Skan return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); 1366122180Skan} 1367122180Skan 1368169689Skanstatic __inline int __attribute__((__always_inline__)) 1369122180Skan_mm_movemask_epi8 (__m128i __A) 1370122180Skan{ 1371122180Skan return __builtin_ia32_pmovmskb128 ((__v16qi)__A); 1372122180Skan} 1373122180Skan 1374169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1375122180Skan_mm_mulhi_epu16 (__m128i __A, __m128i __B) 1376122180Skan{ 1377122180Skan return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); 1378122180Skan} 1379122180Skan 1380122180Skan#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B)) 1381122180Skan#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B)) 1382122180Skan#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B)) 1383122180Skan 1384169689Skanstatic __inline void __attribute__((__always_inline__)) 1385122180Skan_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) 1386122180Skan{ 1387122180Skan __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); 1388122180Skan} 1389122180Skan 1390169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1391122180Skan_mm_avg_epu8 (__m128i __A, __m128i __B) 1392122180Skan{ 1393122180Skan return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); 1394122180Skan} 1395122180Skan 1396169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1397122180Skan_mm_avg_epu16 (__m128i __A, __m128i __B) 1398122180Skan{ 1399122180Skan return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); 1400122180Skan} 1401122180Skan 1402169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1403122180Skan_mm_sad_epu8 (__m128i __A, __m128i __B) 1404122180Skan{ 1405122180Skan return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); 1406122180Skan} 1407122180Skan 1408169689Skanstatic __inline void __attribute__((__always_inline__)) 1409122180Skan_mm_stream_si32 (int *__A, int __B) 1410122180Skan{ 1411122180Skan __builtin_ia32_movnti (__A, __B); 1412122180Skan} 1413122180Skan 1414169689Skanstatic __inline void __attribute__((__always_inline__)) 1415122180Skan_mm_stream_si128 (__m128i *__A, __m128i __B) 1416122180Skan{ 1417122180Skan __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); 1418122180Skan} 1419122180Skan 1420169689Skanstatic __inline void __attribute__((__always_inline__)) 1421122180Skan_mm_stream_pd (double *__A, __m128d __B) 1422122180Skan{ 1423122180Skan __builtin_ia32_movntpd (__A, (__v2df)__B); 1424122180Skan} 1425122180Skan 1426169689Skanstatic __inline void __attribute__((__always_inline__)) 1427122180Skan_mm_clflush (void const *__A) 1428122180Skan{ 1429169689Skan __builtin_ia32_clflush (__A); 1430122180Skan} 1431122180Skan 1432169689Skanstatic __inline void __attribute__((__always_inline__)) 1433122180Skan_mm_lfence (void) 1434122180Skan{ 1435122180Skan __builtin_ia32_lfence (); 1436122180Skan} 1437122180Skan 1438169689Skanstatic __inline void __attribute__((__always_inline__)) 1439122180Skan_mm_mfence (void) 1440122180Skan{ 1441122180Skan __builtin_ia32_mfence (); 1442122180Skan} 1443122180Skan 1444169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1445122180Skan_mm_cvtsi32_si128 (int __A) 1446122180Skan{ 1447169689Skan return _mm_set_epi32 (0, 0, 0, __A); 1448122180Skan} 1449122180Skan 1450122180Skan#ifdef __x86_64__ 1451169689Skan/* Intel intrinsic. */ 1452169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1453169689Skan_mm_cvtsi64_si128 (long long __A) 1454169689Skan{ 1455169689Skan return _mm_set_epi64x (0, __A); 1456169689Skan} 1457169689Skan 1458169689Skan/* Microsoft intrinsic. */ 1459169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1460122180Skan_mm_cvtsi64x_si128 (long long __A) 1461122180Skan{ 1462169689Skan return _mm_set_epi64x (0, __A); 1463122180Skan} 1464122180Skan#endif 1465122180Skan 1466169689Skan/* Casts between various SP, DP, INT vector types. Note that these do no 1467169689Skan conversion of values, they just change the type. */ 1468169689Skanstatic __inline __m128 __attribute__((__always_inline__)) 1469169689Skan_mm_castpd_ps(__m128d __A) 1470122180Skan{ 1471169689Skan return (__m128) __A; 1472122180Skan} 1473122180Skan 1474169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1475169689Skan_mm_castpd_si128(__m128d __A) 1476122180Skan{ 1477169689Skan return (__m128i) __A; 1478122180Skan} 1479122180Skan 1480169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 1481169689Skan_mm_castps_pd(__m128 __A) 1482169689Skan{ 1483169689Skan return (__m128d) __A; 1484169689Skan} 1485169689Skan 1486169689Skanstatic __inline __m128i __attribute__((__always_inline__)) 1487169689Skan_mm_castps_si128(__m128 __A) 1488169689Skan{ 1489169689Skan return (__m128i) __A; 1490169689Skan} 1491169689Skan 1492169689Skanstatic __inline __m128 __attribute__((__always_inline__)) 1493169689Skan_mm_castsi128_ps(__m128i __A) 1494169689Skan{ 1495169689Skan return (__m128) __A; 1496169689Skan} 1497169689Skan 1498169689Skanstatic __inline __m128d __attribute__((__always_inline__)) 1499169689Skan_mm_castsi128_pd(__m128i __A) 1500169689Skan{ 1501169689Skan return (__m128d) __A; 1502169689Skan} 1503169689Skan 1504122180Skan#endif /* __SSE2__ */ 1505122180Skan 1506122180Skan#endif /* _EMMINTRIN_H_INCLUDED */ 1507