xopintrin.h revision 341825
1251662Sdim/*===---- xopintrin.h - XOP intrinsics -------------------------------------=== 2239313Sdim * 3239313Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 4239313Sdim * of this software and associated documentation files (the "Software"), to deal 5239313Sdim * in the Software without restriction, including without limitation the rights 6239313Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7239313Sdim * copies of the Software, and to permit persons to whom the Software is 8239313Sdim * furnished to do so, subject to the following conditions: 9239313Sdim * 10239313Sdim * The above copyright notice and this permission notice shall be included in 11239313Sdim * all copies or substantial portions of the Software. 12239313Sdim * 13239313Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14239313Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15239313Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16239313Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17239313Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18239313Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19239313Sdim * THE SOFTWARE. 20239313Sdim * 21239313Sdim *===-----------------------------------------------------------------------=== 22239313Sdim */ 23239313Sdim 24239313Sdim#ifndef __X86INTRIN_H 25251662Sdim#error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." 26239313Sdim#endif 27239313Sdim 28239313Sdim#ifndef __XOPINTRIN_H 29239313Sdim#define __XOPINTRIN_H 30239313Sdim 31239313Sdim#include <fma4intrin.h> 32239313Sdim 33288943Sdim/* Define the default attributes for the functions in this file. */ 34341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128))) 35341825Sdim#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256))) 36288943Sdim 37288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 38239313Sdim_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) 39239313Sdim{ 40239313Sdim return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 41239313Sdim} 42239313Sdim 43288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 44239313Sdim_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) 45239313Sdim{ 46239313Sdim return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 47239313Sdim} 48239313Sdim 49288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 50239313Sdim_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) 51239313Sdim{ 52239313Sdim return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 53239313Sdim} 54239313Sdim 55288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 56239313Sdim_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) 57239313Sdim{ 58239313Sdim return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 59239313Sdim} 60239313Sdim 61288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 62239313Sdim_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) 63239313Sdim{ 64239313Sdim return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); 65239313Sdim} 66239313Sdim 67288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 68239313Sdim_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) 69239313Sdim{ 70239313Sdim return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); 71239313Sdim} 72239313Sdim 73288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 74239313Sdim_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) 75239313Sdim{ 76239313Sdim return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); 77239313Sdim} 78239313Sdim 79288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 80239313Sdim_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) 81239313Sdim{ 82239313Sdim return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); 83239313Sdim} 84239313Sdim 85288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 86239313Sdim_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) 87239313Sdim{ 88239313Sdim return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); 89239313Sdim} 90239313Sdim 91288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 92239313Sdim_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) 93239313Sdim{ 94239313Sdim return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); 95239313Sdim} 96239313Sdim 97288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 98239313Sdim_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) 99239313Sdim{ 100239313Sdim return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 101239313Sdim} 102239313Sdim 103288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 104239313Sdim_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) 105239313Sdim{ 106239313Sdim return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 107239313Sdim} 108239313Sdim 109288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 110239313Sdim_mm_haddw_epi8(__m128i __A) 111239313Sdim{ 112239313Sdim return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); 113239313Sdim} 114239313Sdim 115288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 116239313Sdim_mm_haddd_epi8(__m128i __A) 117239313Sdim{ 118239313Sdim return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); 119239313Sdim} 120239313Sdim 121288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 122239313Sdim_mm_haddq_epi8(__m128i __A) 123239313Sdim{ 124239313Sdim return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); 125239313Sdim} 126239313Sdim 127288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 128239313Sdim_mm_haddd_epi16(__m128i __A) 129239313Sdim{ 130239313Sdim return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); 131239313Sdim} 132239313Sdim 133288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 134239313Sdim_mm_haddq_epi16(__m128i __A) 135239313Sdim{ 136239313Sdim return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); 137239313Sdim} 138239313Sdim 139288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 140239313Sdim_mm_haddq_epi32(__m128i __A) 141239313Sdim{ 142239313Sdim return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); 143239313Sdim} 144239313Sdim 145288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 146239313Sdim_mm_haddw_epu8(__m128i __A) 147239313Sdim{ 148239313Sdim return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); 149239313Sdim} 150239313Sdim 151288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 152239313Sdim_mm_haddd_epu8(__m128i __A) 153239313Sdim{ 154239313Sdim return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); 155239313Sdim} 156239313Sdim 157288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 158239313Sdim_mm_haddq_epu8(__m128i __A) 159239313Sdim{ 160239313Sdim return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); 161239313Sdim} 162239313Sdim 163288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 164239313Sdim_mm_haddd_epu16(__m128i __A) 165239313Sdim{ 166239313Sdim return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); 167239313Sdim} 168239313Sdim 169288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 170239313Sdim_mm_haddq_epu16(__m128i __A) 171239313Sdim{ 172239313Sdim return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); 173239313Sdim} 174239313Sdim 175288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 176239313Sdim_mm_haddq_epu32(__m128i __A) 177239313Sdim{ 178239313Sdim return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); 179239313Sdim} 180239313Sdim 181288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 182239313Sdim_mm_hsubw_epi8(__m128i __A) 183239313Sdim{ 184239313Sdim return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); 185239313Sdim} 186239313Sdim 187288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 188239313Sdim_mm_hsubd_epi16(__m128i __A) 189239313Sdim{ 190239313Sdim return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); 191239313Sdim} 192239313Sdim 193288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 194239313Sdim_mm_hsubq_epi32(__m128i __A) 195239313Sdim{ 196239313Sdim return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); 197239313Sdim} 198239313Sdim 199288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 200239313Sdim_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) 201239313Sdim{ 202321369Sdim return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C)); 203239313Sdim} 204239313Sdim 205341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 206239313Sdim_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) 207239313Sdim{ 208321369Sdim return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C)); 209239313Sdim} 210239313Sdim 211288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 212239313Sdim_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) 213239313Sdim{ 214239313Sdim return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); 215239313Sdim} 216239313Sdim 217288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 218239313Sdim_mm_rot_epi8(__m128i __A, __m128i __B) 219239313Sdim{ 220239313Sdim return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); 221239313Sdim} 222239313Sdim 223288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 224239313Sdim_mm_rot_epi16(__m128i __A, __m128i __B) 225239313Sdim{ 226239313Sdim return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); 227239313Sdim} 228239313Sdim 229288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 230239313Sdim_mm_rot_epi32(__m128i __A, __m128i __B) 231239313Sdim{ 232239313Sdim return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); 233239313Sdim} 234239313Sdim 235288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 236239313Sdim_mm_rot_epi64(__m128i __A, __m128i __B) 237239313Sdim{ 238239313Sdim return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); 239239313Sdim} 240239313Sdim 241341825Sdim#define _mm_roti_epi8(A, N) \ 242341825Sdim (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)) 243239313Sdim 244341825Sdim#define _mm_roti_epi16(A, N) \ 245341825Sdim (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)) 246239313Sdim 247341825Sdim#define _mm_roti_epi32(A, N) \ 248341825Sdim (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)) 249239313Sdim 250341825Sdim#define _mm_roti_epi64(A, N) \ 251341825Sdim (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)) 252239313Sdim 253288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 254239313Sdim_mm_shl_epi8(__m128i __A, __m128i __B) 255239313Sdim{ 256239313Sdim return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); 257239313Sdim} 258239313Sdim 259288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 260239313Sdim_mm_shl_epi16(__m128i __A, __m128i __B) 261239313Sdim{ 262239313Sdim return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); 263239313Sdim} 264239313Sdim 265288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 266239313Sdim_mm_shl_epi32(__m128i __A, __m128i __B) 267239313Sdim{ 268239313Sdim return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); 269239313Sdim} 270239313Sdim 271288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 272239313Sdim_mm_shl_epi64(__m128i __A, __m128i __B) 273239313Sdim{ 274239313Sdim return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); 275239313Sdim} 276239313Sdim 277288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 278239313Sdim_mm_sha_epi8(__m128i __A, __m128i __B) 279239313Sdim{ 280239313Sdim return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); 281239313Sdim} 282239313Sdim 283288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 284239313Sdim_mm_sha_epi16(__m128i __A, __m128i __B) 285239313Sdim{ 286239313Sdim return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); 287239313Sdim} 288239313Sdim 289288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 290239313Sdim_mm_sha_epi32(__m128i __A, __m128i __B) 291239313Sdim{ 292239313Sdim return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); 293239313Sdim} 294239313Sdim 295288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 296239313Sdim_mm_sha_epi64(__m128i __A, __m128i __B) 297239313Sdim{ 298239313Sdim return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); 299239313Sdim} 300239313Sdim 301341825Sdim#define _mm_com_epu8(A, B, N) \ 302296417Sdim (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \ 303341825Sdim (__v16qi)(__m128i)(B), (N)) 304239313Sdim 305341825Sdim#define _mm_com_epu16(A, B, N) \ 306296417Sdim (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \ 307341825Sdim (__v8hi)(__m128i)(B), (N)) 308239313Sdim 309341825Sdim#define _mm_com_epu32(A, B, N) \ 310296417Sdim (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \ 311341825Sdim (__v4si)(__m128i)(B), (N)) 312239313Sdim 313341825Sdim#define _mm_com_epu64(A, B, N) \ 314296417Sdim (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \ 315341825Sdim (__v2di)(__m128i)(B), (N)) 316239313Sdim 317341825Sdim#define _mm_com_epi8(A, B, N) \ 318296417Sdim (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \ 319341825Sdim (__v16qi)(__m128i)(B), (N)) 320239313Sdim 321341825Sdim#define _mm_com_epi16(A, B, N) \ 322296417Sdim (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \ 323341825Sdim (__v8hi)(__m128i)(B), (N)) 324239313Sdim 325341825Sdim#define _mm_com_epi32(A, B, N) \ 326296417Sdim (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \ 327341825Sdim (__v4si)(__m128i)(B), (N)) 328239313Sdim 329341825Sdim#define _mm_com_epi64(A, B, N) \ 330296417Sdim (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \ 331341825Sdim (__v2di)(__m128i)(B), (N)) 332239313Sdim 333261991Sdim#define _MM_PCOMCTRL_LT 0 334261991Sdim#define _MM_PCOMCTRL_LE 1 335261991Sdim#define _MM_PCOMCTRL_GT 2 336261991Sdim#define _MM_PCOMCTRL_GE 3 337261991Sdim#define _MM_PCOMCTRL_EQ 4 338261991Sdim#define _MM_PCOMCTRL_NEQ 5 339261991Sdim#define _MM_PCOMCTRL_FALSE 6 340261991Sdim#define _MM_PCOMCTRL_TRUE 7 341261991Sdim 342288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 343261991Sdim_mm_comlt_epu8(__m128i __A, __m128i __B) 344261991Sdim{ 345261991Sdim return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); 346261991Sdim} 347261991Sdim 348288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 349261991Sdim_mm_comle_epu8(__m128i __A, __m128i __B) 350261991Sdim{ 351261991Sdim return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); 352261991Sdim} 353261991Sdim 354288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 355261991Sdim_mm_comgt_epu8(__m128i __A, __m128i __B) 356261991Sdim{ 357261991Sdim return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); 358261991Sdim} 359261991Sdim 360288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 361261991Sdim_mm_comge_epu8(__m128i __A, __m128i __B) 362261991Sdim{ 363261991Sdim return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); 364261991Sdim} 365261991Sdim 366288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 367261991Sdim_mm_comeq_epu8(__m128i __A, __m128i __B) 368261991Sdim{ 369261991Sdim return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); 370261991Sdim} 371261991Sdim 372288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 373261991Sdim_mm_comneq_epu8(__m128i __A, __m128i __B) 374261991Sdim{ 375261991Sdim return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); 376261991Sdim} 377261991Sdim 378288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 379261991Sdim_mm_comfalse_epu8(__m128i __A, __m128i __B) 380261991Sdim{ 381261991Sdim return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); 382261991Sdim} 383261991Sdim 384288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 385261991Sdim_mm_comtrue_epu8(__m128i __A, __m128i __B) 386261991Sdim{ 387261991Sdim return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); 388261991Sdim} 389261991Sdim 390288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 391261991Sdim_mm_comlt_epu16(__m128i __A, __m128i __B) 392261991Sdim{ 393261991Sdim return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); 394261991Sdim} 395261991Sdim 396288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 397261991Sdim_mm_comle_epu16(__m128i __A, __m128i __B) 398261991Sdim{ 399261991Sdim return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); 400261991Sdim} 401261991Sdim 402288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 403261991Sdim_mm_comgt_epu16(__m128i __A, __m128i __B) 404261991Sdim{ 405261991Sdim return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); 406261991Sdim} 407261991Sdim 408288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 409261991Sdim_mm_comge_epu16(__m128i __A, __m128i __B) 410261991Sdim{ 411261991Sdim return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); 412261991Sdim} 413261991Sdim 414288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 415261991Sdim_mm_comeq_epu16(__m128i __A, __m128i __B) 416261991Sdim{ 417261991Sdim return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); 418261991Sdim} 419261991Sdim 420288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 421261991Sdim_mm_comneq_epu16(__m128i __A, __m128i __B) 422261991Sdim{ 423261991Sdim return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); 424261991Sdim} 425261991Sdim 426288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 427261991Sdim_mm_comfalse_epu16(__m128i __A, __m128i __B) 428261991Sdim{ 429261991Sdim return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); 430261991Sdim} 431261991Sdim 432288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 433261991Sdim_mm_comtrue_epu16(__m128i __A, __m128i __B) 434261991Sdim{ 435261991Sdim return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); 436261991Sdim} 437261991Sdim 438288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 439261991Sdim_mm_comlt_epu32(__m128i __A, __m128i __B) 440261991Sdim{ 441261991Sdim return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); 442261991Sdim} 443261991Sdim 444288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 445261991Sdim_mm_comle_epu32(__m128i __A, __m128i __B) 446261991Sdim{ 447261991Sdim return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); 448261991Sdim} 449261991Sdim 450288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 451261991Sdim_mm_comgt_epu32(__m128i __A, __m128i __B) 452261991Sdim{ 453261991Sdim return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); 454261991Sdim} 455261991Sdim 456288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 457261991Sdim_mm_comge_epu32(__m128i __A, __m128i __B) 458261991Sdim{ 459261991Sdim return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); 460261991Sdim} 461261991Sdim 462288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 463261991Sdim_mm_comeq_epu32(__m128i __A, __m128i __B) 464261991Sdim{ 465261991Sdim return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); 466261991Sdim} 467261991Sdim 468288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 469261991Sdim_mm_comneq_epu32(__m128i __A, __m128i __B) 470261991Sdim{ 471261991Sdim return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); 472261991Sdim} 473261991Sdim 474288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 475261991Sdim_mm_comfalse_epu32(__m128i __A, __m128i __B) 476261991Sdim{ 477261991Sdim return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); 478261991Sdim} 479261991Sdim 480288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 481261991Sdim_mm_comtrue_epu32(__m128i __A, __m128i __B) 482261991Sdim{ 483261991Sdim return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); 484261991Sdim} 485261991Sdim 486288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 487261991Sdim_mm_comlt_epu64(__m128i __A, __m128i __B) 488261991Sdim{ 489261991Sdim return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); 490261991Sdim} 491261991Sdim 492288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 493261991Sdim_mm_comle_epu64(__m128i __A, __m128i __B) 494261991Sdim{ 495261991Sdim return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); 496261991Sdim} 497261991Sdim 498288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 499261991Sdim_mm_comgt_epu64(__m128i __A, __m128i __B) 500261991Sdim{ 501261991Sdim return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); 502261991Sdim} 503261991Sdim 504288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 505261991Sdim_mm_comge_epu64(__m128i __A, __m128i __B) 506261991Sdim{ 507261991Sdim return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); 508261991Sdim} 509261991Sdim 510288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 511261991Sdim_mm_comeq_epu64(__m128i __A, __m128i __B) 512261991Sdim{ 513261991Sdim return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); 514261991Sdim} 515261991Sdim 516288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 517261991Sdim_mm_comneq_epu64(__m128i __A, __m128i __B) 518261991Sdim{ 519261991Sdim return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); 520261991Sdim} 521261991Sdim 522288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 523261991Sdim_mm_comfalse_epu64(__m128i __A, __m128i __B) 524261991Sdim{ 525261991Sdim return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); 526261991Sdim} 527261991Sdim 528288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 529261991Sdim_mm_comtrue_epu64(__m128i __A, __m128i __B) 530261991Sdim{ 531261991Sdim return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); 532261991Sdim} 533261991Sdim 534288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 535261991Sdim_mm_comlt_epi8(__m128i __A, __m128i __B) 536261991Sdim{ 537261991Sdim return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); 538261991Sdim} 539261991Sdim 540288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 541261991Sdim_mm_comle_epi8(__m128i __A, __m128i __B) 542261991Sdim{ 543261991Sdim return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); 544261991Sdim} 545261991Sdim 546288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 547261991Sdim_mm_comgt_epi8(__m128i __A, __m128i __B) 548261991Sdim{ 549261991Sdim return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); 550261991Sdim} 551261991Sdim 552288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 553261991Sdim_mm_comge_epi8(__m128i __A, __m128i __B) 554261991Sdim{ 555261991Sdim return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); 556261991Sdim} 557261991Sdim 558288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 559261991Sdim_mm_comeq_epi8(__m128i __A, __m128i __B) 560261991Sdim{ 561261991Sdim return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); 562261991Sdim} 563261991Sdim 564288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 565261991Sdim_mm_comneq_epi8(__m128i __A, __m128i __B) 566261991Sdim{ 567261991Sdim return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); 568261991Sdim} 569261991Sdim 570288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 571261991Sdim_mm_comfalse_epi8(__m128i __A, __m128i __B) 572261991Sdim{ 573261991Sdim return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); 574261991Sdim} 575261991Sdim 576288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 577261991Sdim_mm_comtrue_epi8(__m128i __A, __m128i __B) 578261991Sdim{ 579261991Sdim return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); 580261991Sdim} 581261991Sdim 582288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 583261991Sdim_mm_comlt_epi16(__m128i __A, __m128i __B) 584261991Sdim{ 585261991Sdim return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); 586261991Sdim} 587261991Sdim 588288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 589261991Sdim_mm_comle_epi16(__m128i __A, __m128i __B) 590261991Sdim{ 591261991Sdim return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); 592261991Sdim} 593261991Sdim 594288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 595261991Sdim_mm_comgt_epi16(__m128i __A, __m128i __B) 596261991Sdim{ 597261991Sdim return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); 598261991Sdim} 599261991Sdim 600288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 601261991Sdim_mm_comge_epi16(__m128i __A, __m128i __B) 602261991Sdim{ 603261991Sdim return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); 604261991Sdim} 605261991Sdim 606288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 607261991Sdim_mm_comeq_epi16(__m128i __A, __m128i __B) 608261991Sdim{ 609261991Sdim return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); 610261991Sdim} 611261991Sdim 612288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 613261991Sdim_mm_comneq_epi16(__m128i __A, __m128i __B) 614261991Sdim{ 615261991Sdim return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); 616261991Sdim} 617261991Sdim 618288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 619261991Sdim_mm_comfalse_epi16(__m128i __A, __m128i __B) 620261991Sdim{ 621261991Sdim return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); 622261991Sdim} 623261991Sdim 624288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 625261991Sdim_mm_comtrue_epi16(__m128i __A, __m128i __B) 626261991Sdim{ 627261991Sdim return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); 628261991Sdim} 629261991Sdim 630288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 631261991Sdim_mm_comlt_epi32(__m128i __A, __m128i __B) 632261991Sdim{ 633261991Sdim return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); 634261991Sdim} 635261991Sdim 636288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 637261991Sdim_mm_comle_epi32(__m128i __A, __m128i __B) 638261991Sdim{ 639261991Sdim return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); 640261991Sdim} 641261991Sdim 642288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 643261991Sdim_mm_comgt_epi32(__m128i __A, __m128i __B) 644261991Sdim{ 645261991Sdim return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); 646261991Sdim} 647261991Sdim 648288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 649261991Sdim_mm_comge_epi32(__m128i __A, __m128i __B) 650261991Sdim{ 651261991Sdim return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); 652261991Sdim} 653261991Sdim 654288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 655261991Sdim_mm_comeq_epi32(__m128i __A, __m128i __B) 656261991Sdim{ 657261991Sdim return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); 658261991Sdim} 659261991Sdim 660288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 661261991Sdim_mm_comneq_epi32(__m128i __A, __m128i __B) 662261991Sdim{ 663261991Sdim return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); 664261991Sdim} 665261991Sdim 666288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 667261991Sdim_mm_comfalse_epi32(__m128i __A, __m128i __B) 668261991Sdim{ 669261991Sdim return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); 670261991Sdim} 671261991Sdim 672288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 673261991Sdim_mm_comtrue_epi32(__m128i __A, __m128i __B) 674261991Sdim{ 675261991Sdim return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); 676261991Sdim} 677261991Sdim 678288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 679261991Sdim_mm_comlt_epi64(__m128i __A, __m128i __B) 680261991Sdim{ 681261991Sdim return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); 682261991Sdim} 683261991Sdim 684288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 685261991Sdim_mm_comle_epi64(__m128i __A, __m128i __B) 686261991Sdim{ 687261991Sdim return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); 688261991Sdim} 689261991Sdim 690288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 691261991Sdim_mm_comgt_epi64(__m128i __A, __m128i __B) 692261991Sdim{ 693261991Sdim return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); 694261991Sdim} 695261991Sdim 696288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 697261991Sdim_mm_comge_epi64(__m128i __A, __m128i __B) 698261991Sdim{ 699261991Sdim return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); 700261991Sdim} 701261991Sdim 702288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 703261991Sdim_mm_comeq_epi64(__m128i __A, __m128i __B) 704261991Sdim{ 705261991Sdim return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); 706261991Sdim} 707261991Sdim 708288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 709261991Sdim_mm_comneq_epi64(__m128i __A, __m128i __B) 710261991Sdim{ 711261991Sdim return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); 712261991Sdim} 713261991Sdim 714288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 715261991Sdim_mm_comfalse_epi64(__m128i __A, __m128i __B) 716261991Sdim{ 717261991Sdim return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); 718261991Sdim} 719261991Sdim 720288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 721261991Sdim_mm_comtrue_epi64(__m128i __A, __m128i __B) 722261991Sdim{ 723261991Sdim return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); 724261991Sdim} 725261991Sdim 726341825Sdim#define _mm_permute2_pd(X, Y, C, I) \ 727296417Sdim (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \ 728296417Sdim (__v2df)(__m128d)(Y), \ 729341825Sdim (__v2di)(__m128i)(C), (I)) 730239313Sdim 731341825Sdim#define _mm256_permute2_pd(X, Y, C, I) \ 732296417Sdim (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \ 733296417Sdim (__v4df)(__m256d)(Y), \ 734341825Sdim (__v4di)(__m256i)(C), (I)) 735239313Sdim 736341825Sdim#define _mm_permute2_ps(X, Y, C, I) \ 737296417Sdim (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ 738341825Sdim (__v4si)(__m128i)(C), (I)) 739239313Sdim 740341825Sdim#define _mm256_permute2_ps(X, Y, C, I) \ 741296417Sdim (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \ 742296417Sdim (__v8sf)(__m256)(Y), \ 743341825Sdim (__v8si)(__m256i)(C), (I)) 744239313Sdim 745288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 746239313Sdim_mm_frcz_ss(__m128 __A) 747239313Sdim{ 748239313Sdim return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); 749239313Sdim} 750239313Sdim 751288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 752239313Sdim_mm_frcz_sd(__m128d __A) 753239313Sdim{ 754239313Sdim return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); 755239313Sdim} 756239313Sdim 757288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 758239313Sdim_mm_frcz_ps(__m128 __A) 759239313Sdim{ 760239313Sdim return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); 761239313Sdim} 762239313Sdim 763288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 764239313Sdim_mm_frcz_pd(__m128d __A) 765239313Sdim{ 766239313Sdim return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); 767239313Sdim} 768239313Sdim 769341825Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS256 770239313Sdim_mm256_frcz_ps(__m256 __A) 771239313Sdim{ 772239313Sdim return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); 773239313Sdim} 774239313Sdim 775341825Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS256 776239313Sdim_mm256_frcz_pd(__m256d __A) 777239313Sdim{ 778239313Sdim return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); 779239313Sdim} 780239313Sdim 781288943Sdim#undef __DEFAULT_FN_ATTRS 782341825Sdim#undef __DEFAULT_FN_ATTRS256 783288943Sdim 784239313Sdim#endif /* __XOPINTRIN_H */ 785