xopintrin.h revision 341825
1/*===---- xopintrin.h - XOP intrinsics -------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __X86INTRIN_H 25#error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." 26#endif 27 28#ifndef __XOPINTRIN_H 29#define __XOPINTRIN_H 30 31#include <fma4intrin.h> 32 33/* Define the default attributes for the functions in this file. */ 34#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128))) 35#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256))) 36 37static __inline__ __m128i __DEFAULT_FN_ATTRS 38_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) 39{ 40 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 41} 42 43static __inline__ __m128i __DEFAULT_FN_ATTRS 44_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) 45{ 46 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 47} 48 49static __inline__ __m128i __DEFAULT_FN_ATTRS 50_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) 51{ 52 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 53} 54 55static __inline__ __m128i __DEFAULT_FN_ATTRS 56_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) 57{ 58 return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 59} 60 61static __inline__ __m128i __DEFAULT_FN_ATTRS 62_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) 63{ 64 return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); 65} 66 67static __inline__ __m128i __DEFAULT_FN_ATTRS 68_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) 69{ 70 return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); 71} 72 73static __inline__ __m128i __DEFAULT_FN_ATTRS 74_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) 75{ 76 return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); 77} 78 79static __inline__ __m128i __DEFAULT_FN_ATTRS 80_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) 81{ 82 return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); 83} 84 85static __inline__ __m128i __DEFAULT_FN_ATTRS 86_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) 87{ 88 return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); 89} 90 91static __inline__ __m128i __DEFAULT_FN_ATTRS 92_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) 93{ 94 return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); 95} 96 97static __inline__ __m128i __DEFAULT_FN_ATTRS 98_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) 99{ 100 return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 101} 102 103static __inline__ __m128i __DEFAULT_FN_ATTRS 104_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) 105{ 106 return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 107} 108 109static __inline__ __m128i __DEFAULT_FN_ATTRS 110_mm_haddw_epi8(__m128i __A) 111{ 112 return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); 113} 114 115static __inline__ __m128i __DEFAULT_FN_ATTRS 116_mm_haddd_epi8(__m128i __A) 117{ 118 return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); 119} 120 121static __inline__ __m128i __DEFAULT_FN_ATTRS 122_mm_haddq_epi8(__m128i __A) 123{ 124 return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); 125} 126 127static __inline__ __m128i __DEFAULT_FN_ATTRS 128_mm_haddd_epi16(__m128i __A) 129{ 130 return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); 131} 132 133static __inline__ __m128i __DEFAULT_FN_ATTRS 134_mm_haddq_epi16(__m128i __A) 135{ 136 return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); 137} 138 139static __inline__ __m128i __DEFAULT_FN_ATTRS 140_mm_haddq_epi32(__m128i __A) 141{ 142 return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); 143} 144 145static __inline__ __m128i __DEFAULT_FN_ATTRS 146_mm_haddw_epu8(__m128i __A) 147{ 148 return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); 149} 150 151static __inline__ __m128i __DEFAULT_FN_ATTRS 152_mm_haddd_epu8(__m128i __A) 153{ 154 return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); 155} 156 157static __inline__ __m128i __DEFAULT_FN_ATTRS 158_mm_haddq_epu8(__m128i __A) 159{ 160 return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); 161} 162 163static __inline__ __m128i __DEFAULT_FN_ATTRS 164_mm_haddd_epu16(__m128i __A) 165{ 166 return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); 167} 168 169static __inline__ __m128i __DEFAULT_FN_ATTRS 170_mm_haddq_epu16(__m128i __A) 171{ 172 return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); 173} 174 175static __inline__ __m128i __DEFAULT_FN_ATTRS 176_mm_haddq_epu32(__m128i __A) 177{ 178 return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); 179} 180 181static __inline__ __m128i __DEFAULT_FN_ATTRS 182_mm_hsubw_epi8(__m128i __A) 183{ 184 return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); 185} 186 187static __inline__ __m128i __DEFAULT_FN_ATTRS 188_mm_hsubd_epi16(__m128i __A) 189{ 190 return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); 191} 192 193static __inline__ __m128i __DEFAULT_FN_ATTRS 194_mm_hsubq_epi32(__m128i __A) 195{ 196 return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); 197} 198 199static __inline__ __m128i __DEFAULT_FN_ATTRS 200_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) 201{ 202 return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C)); 203} 204 205static __inline__ __m256i __DEFAULT_FN_ATTRS256 206_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) 207{ 208 return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C)); 209} 210 211static __inline__ __m128i __DEFAULT_FN_ATTRS 212_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) 213{ 214 return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); 215} 216 217static __inline__ __m128i __DEFAULT_FN_ATTRS 218_mm_rot_epi8(__m128i __A, __m128i __B) 219{ 220 return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); 221} 222 223static __inline__ __m128i __DEFAULT_FN_ATTRS 224_mm_rot_epi16(__m128i __A, __m128i __B) 225{ 226 return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); 227} 228 229static __inline__ __m128i __DEFAULT_FN_ATTRS 230_mm_rot_epi32(__m128i __A, __m128i __B) 231{ 232 return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); 233} 234 235static __inline__ __m128i __DEFAULT_FN_ATTRS 236_mm_rot_epi64(__m128i __A, __m128i __B) 237{ 238 return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); 239} 240 241#define _mm_roti_epi8(A, N) \ 242 (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)) 243 244#define _mm_roti_epi16(A, N) \ 245 (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)) 246 247#define _mm_roti_epi32(A, N) \ 248 (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)) 249 250#define _mm_roti_epi64(A, N) \ 251 (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)) 252 253static __inline__ __m128i __DEFAULT_FN_ATTRS 254_mm_shl_epi8(__m128i __A, __m128i __B) 255{ 256 return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); 257} 258 259static __inline__ __m128i __DEFAULT_FN_ATTRS 260_mm_shl_epi16(__m128i __A, __m128i __B) 261{ 262 return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); 263} 264 265static __inline__ __m128i __DEFAULT_FN_ATTRS 266_mm_shl_epi32(__m128i __A, __m128i __B) 267{ 268 return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); 269} 270 271static __inline__ __m128i __DEFAULT_FN_ATTRS 272_mm_shl_epi64(__m128i __A, __m128i __B) 273{ 274 return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); 275} 276 277static __inline__ __m128i __DEFAULT_FN_ATTRS 278_mm_sha_epi8(__m128i __A, __m128i __B) 279{ 280 return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); 281} 282 283static __inline__ __m128i __DEFAULT_FN_ATTRS 284_mm_sha_epi16(__m128i __A, __m128i __B) 285{ 286 return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); 287} 288 289static __inline__ __m128i __DEFAULT_FN_ATTRS 290_mm_sha_epi32(__m128i __A, __m128i __B) 291{ 292 return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); 293} 294 295static __inline__ __m128i __DEFAULT_FN_ATTRS 296_mm_sha_epi64(__m128i __A, __m128i __B) 297{ 298 return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); 299} 300 301#define _mm_com_epu8(A, B, N) \ 302 (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \ 303 (__v16qi)(__m128i)(B), (N)) 304 305#define _mm_com_epu16(A, B, N) \ 306 (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \ 307 (__v8hi)(__m128i)(B), (N)) 308 309#define _mm_com_epu32(A, B, N) \ 310 (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \ 311 (__v4si)(__m128i)(B), (N)) 312 313#define _mm_com_epu64(A, B, N) \ 314 (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \ 315 (__v2di)(__m128i)(B), (N)) 316 317#define _mm_com_epi8(A, B, N) \ 318 (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \ 319 (__v16qi)(__m128i)(B), (N)) 320 321#define _mm_com_epi16(A, B, N) \ 322 (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \ 323 (__v8hi)(__m128i)(B), (N)) 324 325#define _mm_com_epi32(A, B, N) \ 326 (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \ 327 (__v4si)(__m128i)(B), (N)) 328 329#define _mm_com_epi64(A, B, N) \ 330 (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \ 331 (__v2di)(__m128i)(B), (N)) 332 333#define _MM_PCOMCTRL_LT 0 334#define _MM_PCOMCTRL_LE 1 335#define _MM_PCOMCTRL_GT 2 336#define _MM_PCOMCTRL_GE 3 337#define _MM_PCOMCTRL_EQ 4 338#define _MM_PCOMCTRL_NEQ 5 339#define _MM_PCOMCTRL_FALSE 6 340#define _MM_PCOMCTRL_TRUE 7 341 342static __inline__ __m128i __DEFAULT_FN_ATTRS 343_mm_comlt_epu8(__m128i __A, __m128i __B) 344{ 345 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); 346} 347 348static __inline__ __m128i __DEFAULT_FN_ATTRS 349_mm_comle_epu8(__m128i __A, __m128i __B) 350{ 351 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); 352} 353 354static __inline__ __m128i __DEFAULT_FN_ATTRS 355_mm_comgt_epu8(__m128i __A, __m128i __B) 356{ 357 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); 358} 359 360static __inline__ __m128i __DEFAULT_FN_ATTRS 361_mm_comge_epu8(__m128i __A, __m128i __B) 362{ 363 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); 364} 365 366static __inline__ __m128i __DEFAULT_FN_ATTRS 367_mm_comeq_epu8(__m128i __A, __m128i __B) 368{ 369 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); 370} 371 372static __inline__ __m128i __DEFAULT_FN_ATTRS 373_mm_comneq_epu8(__m128i __A, __m128i __B) 374{ 375 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); 376} 377 378static __inline__ __m128i __DEFAULT_FN_ATTRS 379_mm_comfalse_epu8(__m128i __A, __m128i __B) 380{ 381 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); 382} 383 384static __inline__ __m128i __DEFAULT_FN_ATTRS 385_mm_comtrue_epu8(__m128i __A, __m128i __B) 386{ 387 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); 388} 389 390static __inline__ __m128i __DEFAULT_FN_ATTRS 391_mm_comlt_epu16(__m128i __A, __m128i __B) 392{ 393 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); 394} 395 396static __inline__ __m128i __DEFAULT_FN_ATTRS 397_mm_comle_epu16(__m128i __A, __m128i __B) 398{ 399 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); 400} 401 402static __inline__ __m128i __DEFAULT_FN_ATTRS 403_mm_comgt_epu16(__m128i __A, __m128i __B) 404{ 405 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); 406} 407 408static __inline__ __m128i __DEFAULT_FN_ATTRS 409_mm_comge_epu16(__m128i __A, __m128i __B) 410{ 411 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); 412} 413 414static __inline__ __m128i __DEFAULT_FN_ATTRS 415_mm_comeq_epu16(__m128i __A, __m128i __B) 416{ 417 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); 418} 419 420static __inline__ __m128i __DEFAULT_FN_ATTRS 421_mm_comneq_epu16(__m128i __A, __m128i __B) 422{ 423 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); 424} 425 426static __inline__ __m128i __DEFAULT_FN_ATTRS 427_mm_comfalse_epu16(__m128i __A, __m128i __B) 428{ 429 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); 430} 431 432static __inline__ __m128i __DEFAULT_FN_ATTRS 433_mm_comtrue_epu16(__m128i __A, __m128i __B) 434{ 435 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); 436} 437 438static __inline__ __m128i __DEFAULT_FN_ATTRS 439_mm_comlt_epu32(__m128i __A, __m128i __B) 440{ 441 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); 442} 443 444static __inline__ __m128i __DEFAULT_FN_ATTRS 445_mm_comle_epu32(__m128i __A, __m128i __B) 446{ 447 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); 448} 449 450static __inline__ __m128i __DEFAULT_FN_ATTRS 451_mm_comgt_epu32(__m128i __A, __m128i __B) 452{ 453 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); 454} 455 456static __inline__ __m128i __DEFAULT_FN_ATTRS 457_mm_comge_epu32(__m128i __A, __m128i __B) 458{ 459 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); 460} 461 462static __inline__ __m128i __DEFAULT_FN_ATTRS 463_mm_comeq_epu32(__m128i __A, __m128i __B) 464{ 465 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); 466} 467 468static __inline__ __m128i __DEFAULT_FN_ATTRS 469_mm_comneq_epu32(__m128i __A, __m128i __B) 470{ 471 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); 472} 473 474static __inline__ __m128i __DEFAULT_FN_ATTRS 475_mm_comfalse_epu32(__m128i __A, __m128i __B) 476{ 477 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); 478} 479 480static __inline__ __m128i __DEFAULT_FN_ATTRS 481_mm_comtrue_epu32(__m128i __A, __m128i __B) 482{ 483 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); 484} 485 486static __inline__ __m128i __DEFAULT_FN_ATTRS 487_mm_comlt_epu64(__m128i __A, __m128i __B) 488{ 489 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); 490} 491 492static __inline__ __m128i __DEFAULT_FN_ATTRS 493_mm_comle_epu64(__m128i __A, __m128i __B) 494{ 495 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); 496} 497 498static __inline__ __m128i __DEFAULT_FN_ATTRS 499_mm_comgt_epu64(__m128i __A, __m128i __B) 500{ 501 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); 502} 503 504static __inline__ __m128i __DEFAULT_FN_ATTRS 505_mm_comge_epu64(__m128i __A, __m128i __B) 506{ 507 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); 508} 509 510static __inline__ __m128i __DEFAULT_FN_ATTRS 511_mm_comeq_epu64(__m128i __A, __m128i __B) 512{ 513 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); 514} 515 516static __inline__ __m128i __DEFAULT_FN_ATTRS 517_mm_comneq_epu64(__m128i __A, __m128i __B) 518{ 519 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); 520} 521 522static __inline__ __m128i __DEFAULT_FN_ATTRS 523_mm_comfalse_epu64(__m128i __A, __m128i __B) 524{ 525 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); 526} 527 528static __inline__ __m128i __DEFAULT_FN_ATTRS 529_mm_comtrue_epu64(__m128i __A, __m128i __B) 530{ 531 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); 532} 533 534static __inline__ __m128i __DEFAULT_FN_ATTRS 535_mm_comlt_epi8(__m128i __A, __m128i __B) 536{ 537 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); 538} 539 540static __inline__ __m128i __DEFAULT_FN_ATTRS 541_mm_comle_epi8(__m128i __A, __m128i __B) 542{ 543 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); 544} 545 546static __inline__ __m128i __DEFAULT_FN_ATTRS 547_mm_comgt_epi8(__m128i __A, __m128i __B) 548{ 549 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); 550} 551 552static __inline__ __m128i __DEFAULT_FN_ATTRS 553_mm_comge_epi8(__m128i __A, __m128i __B) 554{ 555 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); 556} 557 558static __inline__ __m128i __DEFAULT_FN_ATTRS 559_mm_comeq_epi8(__m128i __A, __m128i __B) 560{ 561 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); 562} 563 564static __inline__ __m128i __DEFAULT_FN_ATTRS 565_mm_comneq_epi8(__m128i __A, __m128i __B) 566{ 567 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); 568} 569 570static __inline__ __m128i __DEFAULT_FN_ATTRS 571_mm_comfalse_epi8(__m128i __A, __m128i __B) 572{ 573 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); 574} 575 576static __inline__ __m128i __DEFAULT_FN_ATTRS 577_mm_comtrue_epi8(__m128i __A, __m128i __B) 578{ 579 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); 580} 581 582static __inline__ __m128i __DEFAULT_FN_ATTRS 583_mm_comlt_epi16(__m128i __A, __m128i __B) 584{ 585 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); 586} 587 588static __inline__ __m128i __DEFAULT_FN_ATTRS 589_mm_comle_epi16(__m128i __A, __m128i __B) 590{ 591 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); 592} 593 594static __inline__ __m128i __DEFAULT_FN_ATTRS 595_mm_comgt_epi16(__m128i __A, __m128i __B) 596{ 597 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); 598} 599 600static __inline__ __m128i __DEFAULT_FN_ATTRS 601_mm_comge_epi16(__m128i __A, __m128i __B) 602{ 603 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); 604} 605 606static __inline__ __m128i __DEFAULT_FN_ATTRS 607_mm_comeq_epi16(__m128i __A, __m128i __B) 608{ 609 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); 610} 611 612static __inline__ __m128i __DEFAULT_FN_ATTRS 613_mm_comneq_epi16(__m128i __A, __m128i __B) 614{ 615 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); 616} 617 618static __inline__ __m128i __DEFAULT_FN_ATTRS 619_mm_comfalse_epi16(__m128i __A, __m128i __B) 620{ 621 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); 622} 623 624static __inline__ __m128i __DEFAULT_FN_ATTRS 625_mm_comtrue_epi16(__m128i __A, __m128i __B) 626{ 627 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); 628} 629 630static __inline__ __m128i __DEFAULT_FN_ATTRS 631_mm_comlt_epi32(__m128i __A, __m128i __B) 632{ 633 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); 634} 635 636static __inline__ __m128i __DEFAULT_FN_ATTRS 637_mm_comle_epi32(__m128i __A, __m128i __B) 638{ 639 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); 640} 641 642static __inline__ __m128i __DEFAULT_FN_ATTRS 643_mm_comgt_epi32(__m128i __A, __m128i __B) 644{ 645 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); 646} 647 648static __inline__ __m128i __DEFAULT_FN_ATTRS 649_mm_comge_epi32(__m128i __A, __m128i __B) 650{ 651 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); 652} 653 654static __inline__ __m128i __DEFAULT_FN_ATTRS 655_mm_comeq_epi32(__m128i __A, __m128i __B) 656{ 657 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); 658} 659 660static __inline__ __m128i __DEFAULT_FN_ATTRS 661_mm_comneq_epi32(__m128i __A, __m128i __B) 662{ 663 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); 664} 665 666static __inline__ __m128i __DEFAULT_FN_ATTRS 667_mm_comfalse_epi32(__m128i __A, __m128i __B) 668{ 669 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); 670} 671 672static __inline__ __m128i __DEFAULT_FN_ATTRS 673_mm_comtrue_epi32(__m128i __A, __m128i __B) 674{ 675 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); 676} 677 678static __inline__ __m128i __DEFAULT_FN_ATTRS 679_mm_comlt_epi64(__m128i __A, __m128i __B) 680{ 681 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); 682} 683 684static __inline__ __m128i __DEFAULT_FN_ATTRS 685_mm_comle_epi64(__m128i __A, __m128i __B) 686{ 687 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); 688} 689 690static __inline__ __m128i __DEFAULT_FN_ATTRS 691_mm_comgt_epi64(__m128i __A, __m128i __B) 692{ 693 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); 694} 695 696static __inline__ __m128i __DEFAULT_FN_ATTRS 697_mm_comge_epi64(__m128i __A, __m128i __B) 698{ 699 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); 700} 701 702static __inline__ __m128i __DEFAULT_FN_ATTRS 703_mm_comeq_epi64(__m128i __A, __m128i __B) 704{ 705 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); 706} 707 708static __inline__ __m128i __DEFAULT_FN_ATTRS 709_mm_comneq_epi64(__m128i __A, __m128i __B) 710{ 711 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); 712} 713 714static __inline__ __m128i __DEFAULT_FN_ATTRS 715_mm_comfalse_epi64(__m128i __A, __m128i __B) 716{ 717 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); 718} 719 720static __inline__ __m128i __DEFAULT_FN_ATTRS 721_mm_comtrue_epi64(__m128i __A, __m128i __B) 722{ 723 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); 724} 725 726#define _mm_permute2_pd(X, Y, C, I) \ 727 (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \ 728 (__v2df)(__m128d)(Y), \ 729 (__v2di)(__m128i)(C), (I)) 730 731#define _mm256_permute2_pd(X, Y, C, I) \ 732 (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \ 733 (__v4df)(__m256d)(Y), \ 734 (__v4di)(__m256i)(C), (I)) 735 736#define _mm_permute2_ps(X, Y, C, I) \ 737 (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ 738 (__v4si)(__m128i)(C), (I)) 739 740#define _mm256_permute2_ps(X, Y, C, I) \ 741 (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \ 742 (__v8sf)(__m256)(Y), \ 743 (__v8si)(__m256i)(C), (I)) 744 745static __inline__ __m128 __DEFAULT_FN_ATTRS 746_mm_frcz_ss(__m128 __A) 747{ 748 return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); 749} 750 751static __inline__ __m128d __DEFAULT_FN_ATTRS 752_mm_frcz_sd(__m128d __A) 753{ 754 return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); 755} 756 757static __inline__ __m128 __DEFAULT_FN_ATTRS 758_mm_frcz_ps(__m128 __A) 759{ 760 return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); 761} 762 763static __inline__ __m128d __DEFAULT_FN_ATTRS 764_mm_frcz_pd(__m128d __A) 765{ 766 return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); 767} 768 769static __inline__ __m256 __DEFAULT_FN_ATTRS256 770_mm256_frcz_ps(__m256 __A) 771{ 772 return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); 773} 774 775static __inline__ __m256d __DEFAULT_FN_ATTRS256 776_mm256_frcz_pd(__m256d __A) 777{ 778 return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); 779} 780 781#undef __DEFAULT_FN_ATTRS 782#undef __DEFAULT_FN_ATTRS256 783 784#endif /* __XOPINTRIN_H */ 785