1239313Sdim/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== 2239313Sdim * 3239313Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 4239313Sdim * of this software and associated documentation files (the "Software"), to deal 5239313Sdim * in the Software without restriction, including without limitation the rights 6239313Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7239313Sdim * copies of the Software, and to permit persons to whom the Software is 8239313Sdim * furnished to do so, subject to the following conditions: 9239313Sdim * 10239313Sdim * The above copyright notice and this permission notice shall be included in 11239313Sdim * all copies or substantial portions of the Software. 12239313Sdim * 13239313Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14239313Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15239313Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16239313Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17239313Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18239313Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19239313Sdim * THE SOFTWARE. 20239313Sdim * 21239313Sdim *===-----------------------------------------------------------------------=== 22239313Sdim */ 23239313Sdim 24239313Sdim#ifndef __IMMINTRIN_H 25239313Sdim#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." 26239313Sdim#endif 27239313Sdim 28239313Sdim#ifndef __FMAINTRIN_H 29239313Sdim#define __FMAINTRIN_H 30239313Sdim 31239313Sdim#ifndef __FMA__ 32239313Sdim# error "FMA instruction set is not enabled" 33239313Sdim#else 34239313Sdim 35239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 36239313Sdim_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) 37239313Sdim{ 38239313Sdim return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); 39239313Sdim} 40239313Sdim 41239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 42239313Sdim_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) 43239313Sdim{ 44239313Sdim return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); 45239313Sdim} 46239313Sdim 47239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 48239313Sdim_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) 49239313Sdim{ 50239313Sdim return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); 51239313Sdim} 52239313Sdim 53239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 54239313Sdim_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) 55239313Sdim{ 56239313Sdim return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); 57239313Sdim} 58239313Sdim 59239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 60239313Sdim_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) 61239313Sdim{ 62239313Sdim return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); 63239313Sdim} 64239313Sdim 65239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 66239313Sdim_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) 67239313Sdim{ 68239313Sdim return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); 69239313Sdim} 70239313Sdim 71239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 72239313Sdim_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) 73239313Sdim{ 74239313Sdim return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); 75239313Sdim} 76239313Sdim 77239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 78239313Sdim_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) 79239313Sdim{ 80239313Sdim return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); 81239313Sdim} 82239313Sdim 83239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 84239313Sdim_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) 85239313Sdim{ 86239313Sdim return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); 87239313Sdim} 88239313Sdim 89239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 90239313Sdim_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) 91239313Sdim{ 92239313Sdim return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); 93239313Sdim} 94239313Sdim 95239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 96239313Sdim_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) 97239313Sdim{ 98239313Sdim return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); 99239313Sdim} 100239313Sdim 101239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 102239313Sdim_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) 103239313Sdim{ 104239313Sdim return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); 105239313Sdim} 106239313Sdim 107239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 108239313Sdim_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) 109239313Sdim{ 110239313Sdim return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); 111239313Sdim} 112239313Sdim 113239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 114239313Sdim_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) 115239313Sdim{ 116239313Sdim return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); 117239313Sdim} 118239313Sdim 119239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 120239313Sdim_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) 121239313Sdim{ 122239313Sdim return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); 123239313Sdim} 124239313Sdim 125239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 126239313Sdim_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) 127239313Sdim{ 128239313Sdim return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); 129239313Sdim} 130239313Sdim 131239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 132239313Sdim_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) 133239313Sdim{ 134239313Sdim return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); 135239313Sdim} 136239313Sdim 137239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 138239313Sdim_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) 139239313Sdim{ 140239313Sdim return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); 141239313Sdim} 142239313Sdim 143239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 144239313Sdim_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) 145239313Sdim{ 146239313Sdim return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); 147239313Sdim} 148239313Sdim 149239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 150239313Sdim_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) 151239313Sdim{ 152239313Sdim return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); 153239313Sdim} 154239313Sdim 155239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 156239313Sdim_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) 157239313Sdim{ 158239313Sdim return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); 159239313Sdim} 160239313Sdim 161239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 162239313Sdim_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) 163239313Sdim{ 164239313Sdim return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); 165239313Sdim} 166239313Sdim 167239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 168239313Sdim_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) 169239313Sdim{ 170239313Sdim return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); 171239313Sdim} 172239313Sdim 173239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 174239313Sdim_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) 175239313Sdim{ 176239313Sdim return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); 177239313Sdim} 178239313Sdim 179239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 180239313Sdim_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) 181239313Sdim{ 182239313Sdim return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); 183239313Sdim} 184239313Sdim 185239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 186239313Sdim_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) 187239313Sdim{ 188239313Sdim return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); 189239313Sdim} 190239313Sdim 191239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 192239313Sdim_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) 193239313Sdim{ 194239313Sdim return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); 195239313Sdim} 196239313Sdim 197239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 198239313Sdim_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) 199239313Sdim{ 200239313Sdim return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); 201239313Sdim} 202239313Sdim 203239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 204239313Sdim_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) 205239313Sdim{ 206239313Sdim return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); 207239313Sdim} 208239313Sdim 209239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 210239313Sdim_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) 211239313Sdim{ 212239313Sdim return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); 213239313Sdim} 214239313Sdim 215239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 216239313Sdim_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) 217239313Sdim{ 218239313Sdim return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); 219239313Sdim} 220239313Sdim 221239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 222239313Sdim_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) 223239313Sdim{ 224239313Sdim return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); 225239313Sdim} 226239313Sdim 227239313Sdim#endif /* __FMA__ */ 228239313Sdim 229239313Sdim#endif /* __FMAINTRIN_H */ 230