1/* Copyright (C) 2017-2020 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24#ifndef _IMMINTRIN_H_INCLUDED 25#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef _GFNIINTRIN_H_INCLUDED 29#define _GFNIINTRIN_H_INCLUDED 30 31#if !defined(__GFNI__) || !defined(__SSE2__) 32#pragma GCC push_options 33#pragma GCC target("gfni,sse2") 34#define __DISABLE_GFNI__ 35#endif /* __GFNI__ */ 36 37extern __inline __m128i 38__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 39_mm_gf2p8mul_epi8 (__m128i __A, __m128i __B) 40{ 41 return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, 42 (__v16qi) __B); 43} 44 45#ifdef __OPTIMIZE__ 46extern __inline __m128i 47__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 48_mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C) 49{ 50 return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A, 51 (__v16qi) __B, 52 __C); 53} 54 55extern __inline __m128i 56__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 57_mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C) 58{ 59 return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A, 60 (__v16qi) __B, __C); 61} 62#else 63#define _mm_gf2p8affineinv_epi64_epi8(A, B, C) \ 64 ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ 65 (__v16qi)(__m128i)(B), (int)(C))) 66#define _mm_gf2p8affine_epi64_epi8(A, B, C) \ 67 ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A), \ 68 (__v16qi)(__m128i)(B), (int)(C))) 69#endif 70 71#ifdef __DISABLE_GFNI__ 72#undef __DISABLE_GFNI__ 73#pragma GCC pop_options 74#endif /* __DISABLE_GFNI__ */ 75 76#if !defined(__GFNI__) || !defined(__AVX__) 77#pragma GCC push_options 78#pragma GCC target("gfni,avx") 79#define __DISABLE_GFNIAVX__ 80#endif /* __GFNIAVX__ */ 81 82extern __inline __m256i 83__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 84_mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B) 85{ 86 return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A, 87 (__v32qi) __B); 88} 89 90#ifdef __OPTIMIZE__ 91extern __inline __m256i 92__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 93_mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C) 94{ 95 return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A, 96 (__v32qi) __B, 97 __C); 98} 99 100extern __inline __m256i 101__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 102_mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C) 103{ 104 return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A, 105 (__v32qi) __B, __C); 106} 107#else 108#define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) \ 109 ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ 110 (__v32qi)(__m256i)(B), \ 111 (int)(C))) 112#define _mm256_gf2p8affine_epi64_epi8(A, B, C) \ 113 ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A), \ 114 ( __v32qi)(__m256i)(B), (int)(C))) 115#endif 116 117#ifdef __DISABLE_GFNIAVX__ 118#undef __DISABLE_GFNIAVX__ 119#pragma GCC pop_options 120#endif /* __GFNIAVX__ */ 121 122#if !defined(__GFNI__) || !defined(__AVX512VL__) 123#pragma GCC push_options 124#pragma GCC target("gfni,avx512vl") 125#define __DISABLE_GFNIAVX512VL__ 126#endif /* __GFNIAVX512VL__ */ 127 128extern __inline __m128i 129__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 130_mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D) 131{ 132 return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C, 133 (__v16qi) __D, 134 (__v16qi)__A, __B); 135} 136 137extern __inline __m128i 138__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 139_mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C) 140{ 141 return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B, 142 (__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A); 143} 144 145#ifdef __OPTIMIZE__ 146extern __inline __m128i 147__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 148_mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C, 149 __m128i __D, const int __E) 150{ 151 return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C, 152 (__v16qi) __D, 153 __E, 154 (__v16qi)__A, 155 __B); 156} 157 158extern __inline __m128i 159__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 160_mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C, 161 const int __D) 162{ 163 return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B, 164 (__v16qi) __C, __D, 165 (__v16qi) _mm_setzero_si128 (), 166 __A); 167} 168 169extern __inline __m128i 170__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 171_mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C, 172 __m128i __D, const int __E) 173{ 174 return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C, 175 (__v16qi) __D, __E, (__v16qi)__A, __B); 176} 177 178extern __inline __m128i 179__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 180_mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C, 181 const int __D) 182{ 183 return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B, 184 (__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A); 185} 186#else 187#define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \ 188 ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \ 189 (__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), \ 190 (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B))) 191#define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \ 192 ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \ 193 (__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), \ 194 (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), \ 195 (__mmask16)(A))) 196#define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \ 197 ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C),\ 198 (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B))) 199#define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \ 200 ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B),\ 201 (__v16qi)(__m128i)(C), (int)(D), \ 202 (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A))) 203#endif 204 205#ifdef __DISABLE_GFNIAVX512VL__ 206#undef __DISABLE_GFNIAVX512VL__ 207#pragma GCC pop_options 208#endif /* __GFNIAVX512VL__ */ 209 210#if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__) 211#pragma GCC push_options 212#pragma GCC target("gfni,avx512vl,avx512bw") 213#define __DISABLE_GFNIAVX512VLBW__ 214#endif /* __GFNIAVX512VLBW__ */ 215 216extern __inline __m256i 217__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 218_mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C, 219 __m256i __D) 220{ 221 return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C, 222 (__v32qi) __D, 223 (__v32qi)__A, __B); 224} 225 226extern __inline __m256i 227__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 228_mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C) 229{ 230 return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B, 231 (__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A); 232} 233 234#ifdef __OPTIMIZE__ 235extern __inline __m256i 236__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 237_mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B, 238 __m256i __C, __m256i __D, const int __E) 239{ 240 return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C, 241 (__v32qi) __D, 242 __E, 243 (__v32qi)__A, 244 __B); 245} 246 247extern __inline __m256i 248__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 249_mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B, 250 __m256i __C, const int __D) 251{ 252 return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B, 253 (__v32qi) __C, __D, 254 (__v32qi) _mm256_setzero_si256 (), __A); 255} 256 257extern __inline __m256i 258__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 259_mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C, 260 __m256i __D, const int __E) 261{ 262 return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C, 263 (__v32qi) __D, 264 __E, 265 (__v32qi)__A, 266 __B); 267} 268 269extern __inline __m256i 270__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 271_mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B, 272 __m256i __C, const int __D) 273{ 274 return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B, 275 (__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A); 276} 277#else 278#define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \ 279 ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \ 280 (__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \ 281 (__v32qi)(__m256i)(A), (__mmask32)(B))) 282#define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \ 283 ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \ 284 (__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \ 285 (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A))) 286#define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \ 287 ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C),\ 288 (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B))) 289#define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \ 290 ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B),\ 291 (__v32qi)(__m256i)(C), (int)(D), \ 292 (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A))) 293#endif 294 295#ifdef __DISABLE_GFNIAVX512VLBW__ 296#undef __DISABLE_GFNIAVX512VLBW__ 297#pragma GCC pop_options 298#endif /* __GFNIAVX512VLBW__ */ 299 300#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__) 301#pragma GCC push_options 302#pragma GCC target("gfni,avx512f,avx512bw") 303#define __DISABLE_GFNIAVX512FBW__ 304#endif /* __GFNIAVX512FBW__ */ 305 306extern __inline __m512i 307__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 308_mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C, 309 __m512i __D) 310{ 311 return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C, 312 (__v64qi) __D, (__v64qi)__A, __B); 313} 314 315extern __inline __m512i 316__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 317_mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C) 318{ 319 return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B, 320 (__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A); 321} 322extern __inline __m512i 323__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 324_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B) 325{ 326 return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A, 327 (__v64qi) __B); 328} 329 330#ifdef __OPTIMIZE__ 331extern __inline __m512i 332__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 333_mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C, 334 __m512i __D, const int __E) 335{ 336 return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C, 337 (__v64qi) __D, 338 __E, 339 (__v64qi)__A, 340 __B); 341} 342 343extern __inline __m512i 344__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 345_mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B, 346 __m512i __C, const int __D) 347{ 348 return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B, 349 (__v64qi) __C, __D, 350 (__v64qi) _mm512_setzero_si512 (), __A); 351} 352 353extern __inline __m512i 354__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 355_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C) 356{ 357 return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A, 358 (__v64qi) __B, __C); 359} 360 361extern __inline __m512i 362__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 363_mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C, 364 __m512i __D, const int __E) 365{ 366 return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C, 367 (__v64qi) __D, __E, (__v64qi)__A, __B); 368} 369 370extern __inline __m512i 371__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 372_mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C, 373 const int __D) 374{ 375 return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B, 376 (__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A); 377} 378extern __inline __m512i 379__attribute__((__gnu_inline__, __always_inline__, __artificial__)) 380_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C) 381{ 382 return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A, 383 (__v64qi) __B, __C); 384} 385#else 386#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \ 387 ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \ 388 (__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \ 389 (__v64qi)(__m512i)(A), (__mmask64)(B))) 390#define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \ 391 ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \ 392 (__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \ 393 (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A))) 394#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \ 395 ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( \ 396 (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C))) 397#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \ 398 ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C),\ 399 (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B))) 400#define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \ 401 ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B),\ 402 (__v64qi)(__m512i)(C), (int)(D), \ 403 (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A))) 404#define _mm512_gf2p8affine_epi64_epi8(A, B, C) \ 405 ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), \ 406 (__v64qi)(__m512i)(B), (int)(C))) 407#endif 408 409#ifdef __DISABLE_GFNIAVX512FBW__ 410#undef __DISABLE_GFNIAVX512FBW__ 411#pragma GCC pop_options 412#endif /* __GFNIAVX512FBW__ */ 413 414#endif /* _GFNIINTRIN_H_INCLUDED */ 415