1234287Sdim/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== 2234287Sdim * 3234287Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 4234287Sdim * of this software and associated documentation files (the "Software"), to deal 5234287Sdim * in the Software without restriction, including without limitation the rights 6234287Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7234287Sdim * copies of the Software, and to permit persons to whom the Software is 8234287Sdim * furnished to do so, subject to the following conditions: 9234287Sdim * 10234287Sdim * The above copyright notice and this permission notice shall be included in 11234287Sdim * all copies or substantial portions of the Software. 12234287Sdim * 13234287Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14234287Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15234287Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16234287Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17234287Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18234287Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19234287Sdim * THE SOFTWARE. 20234287Sdim * 21234287Sdim *===-----------------------------------------------------------------------=== 22234287Sdim */ 23234287Sdim 24234287Sdim#ifndef __X86INTRIN_H 25234287Sdim#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead." 26234287Sdim#endif 27234287Sdim 28234287Sdim#ifndef __FMA4INTRIN_H 29234287Sdim#define __FMA4INTRIN_H 30234287Sdim 31234287Sdim#include <pmmintrin.h> 32234287Sdim 33288943Sdim/* Define the default attributes for the functions in this file. */ 34296417Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4"))) 35288943Sdim 36288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 37234287Sdim_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) 38234287Sdim{ 39234287Sdim return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); 40234287Sdim} 41234287Sdim 42288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 43234287Sdim_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) 44234287Sdim{ 45234287Sdim return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); 46234287Sdim} 47234287Sdim 48288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 49234287Sdim_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) 50234287Sdim{ 51234287Sdim return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); 52234287Sdim} 53234287Sdim 54288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 55234287Sdim_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) 56234287Sdim{ 57234287Sdim return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); 58234287Sdim} 59234287Sdim 60288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 61234287Sdim_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) 62234287Sdim{ 63234287Sdim return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); 64234287Sdim} 65234287Sdim 66288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 67234287Sdim_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) 68234287Sdim{ 69234287Sdim return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); 70234287Sdim} 71234287Sdim 72288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 73234287Sdim_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) 74234287Sdim{ 75234287Sdim return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); 76234287Sdim} 77234287Sdim 78288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 79234287Sdim_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) 80234287Sdim{ 81234287Sdim return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); 82234287Sdim} 83234287Sdim 84288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 85234287Sdim_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) 86234287Sdim{ 87234287Sdim return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); 88234287Sdim} 89234287Sdim 90288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 91234287Sdim_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) 92234287Sdim{ 93234287Sdim return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); 94234287Sdim} 95234287Sdim 96288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 97234287Sdim_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) 98234287Sdim{ 99234287Sdim return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); 100234287Sdim} 101234287Sdim 102288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 103234287Sdim_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) 104234287Sdim{ 105234287Sdim return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); 106234287Sdim} 107234287Sdim 108288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 109234287Sdim_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) 110234287Sdim{ 111234287Sdim return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); 112234287Sdim} 113234287Sdim 114288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 115234287Sdim_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) 116234287Sdim{ 117234287Sdim return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); 118234287Sdim} 119234287Sdim 120288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 121234287Sdim_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) 122234287Sdim{ 123234287Sdim return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); 124234287Sdim} 125234287Sdim 126288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 127234287Sdim_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) 128234287Sdim{ 129234287Sdim return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); 130234287Sdim} 131234287Sdim 132288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 133234287Sdim_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) 134234287Sdim{ 135234287Sdim return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); 136234287Sdim} 137234287Sdim 138288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 139234287Sdim_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) 140234287Sdim{ 141234287Sdim return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); 142234287Sdim} 143234287Sdim 144288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS 145234287Sdim_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) 146234287Sdim{ 147234287Sdim return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); 148234287Sdim} 149234287Sdim 150288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS 151234287Sdim_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) 152234287Sdim{ 153234287Sdim return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); 154234287Sdim} 155234287Sdim 156288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 157234287Sdim_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) 158234287Sdim{ 159234287Sdim return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); 160234287Sdim} 161234287Sdim 162288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS 163234287Sdim_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) 164234287Sdim{ 165234287Sdim return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); 166234287Sdim} 167234287Sdim 168288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 169234287Sdim_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) 170234287Sdim{ 171234287Sdim return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); 172234287Sdim} 173234287Sdim 174288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS 175234287Sdim_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) 176234287Sdim{ 177234287Sdim return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); 178234287Sdim} 179234287Sdim 180288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 181234287Sdim_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) 182234287Sdim{ 183234287Sdim return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); 184234287Sdim} 185234287Sdim 186288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS 187234287Sdim_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) 188234287Sdim{ 189234287Sdim return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); 190234287Sdim} 191234287Sdim 192288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 193234287Sdim_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) 194234287Sdim{ 195234287Sdim return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); 196234287Sdim} 197234287Sdim 198288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS 199234287Sdim_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) 200234287Sdim{ 201234287Sdim return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); 202234287Sdim} 203234287Sdim 204288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 205234287Sdim_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) 206234287Sdim{ 207234287Sdim return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); 208234287Sdim} 209234287Sdim 210288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS 211234287Sdim_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) 212234287Sdim{ 213234287Sdim return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); 214234287Sdim} 215234287Sdim 216288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS 217234287Sdim_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) 218234287Sdim{ 219234287Sdim return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); 220234287Sdim} 221234287Sdim 222288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS 223234287Sdim_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) 224234287Sdim{ 225234287Sdim return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); 226234287Sdim} 227234287Sdim 228288943Sdim#undef __DEFAULT_FN_ATTRS 229288943Sdim 230234287Sdim#endif /* __FMA4INTRIN_H */ 231