fmaintrin.h revision 341825
176259Sgreen/*===---- fmaintrin.h - FMA intrinsics -------------------------------------=== 292555Sdes * 376259Sgreen * Permission is hereby granted, free of charge, to any person obtaining a copy 476259Sgreen * of this software and associated documentation files (the "Software"), to deal 576259Sgreen * in the Software without restriction, including without limitation the rights 676259Sgreen * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 776259Sgreen * copies of the Software, and to permit persons to whom the Software is 876259Sgreen * furnished to do so, subject to the following conditions: 976259Sgreen * 1076259Sgreen * The above copyright notice and this permission notice shall be included in 1176259Sgreen * all copies or substantial portions of the Software. 1276259Sgreen * 1376259Sgreen * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1476259Sgreen * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1576259Sgreen * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1676259Sgreen * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1776259Sgreen * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 1876259Sgreen * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 1976259Sgreen * THE SOFTWARE. 2076259Sgreen * 2176259Sgreen *===-----------------------------------------------------------------------=== 2276259Sgreen */ 2376259Sgreen 2476259Sgreen#ifndef __IMMINTRIN_H 2576259Sgreen#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." 2676259Sgreen#endif 2799060Sdes 2876259Sgreen#ifndef __FMAINTRIN_H 2976259Sgreen#define __FMAINTRIN_H 3076259Sgreen 3176259Sgreen/* Define the default attributes for the functions in this file. */ 3276259Sgreen#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) 3376259Sgreen#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) 3476259Sgreen 3592555Sdesstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 3676259Sgreen_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) 3776259Sgreen{ 3876259Sgreen return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 3976259Sgreen} 4076259Sgreen 4176259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 4298937Sdes_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) 4398937Sdes{ 4498937Sdes return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 4598937Sdes} 4698937Sdes 4798937Sdesstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 4876259Sgreen_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) 4992555Sdes{ 5092555Sdes return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 5176259Sgreen} 5292555Sdes 5392555Sdesstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 5476259Sgreen_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) 5576259Sgreen{ 5699060Sdes return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); 5776259Sgreen} 5876259Sgreen 5999060Sdesstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 6076259Sgreen_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) 6176259Sgreen{ 6276259Sgreen return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 6376259Sgreen} 6476259Sgreen 6576259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 6676259Sgreen_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) 6776259Sgreen{ 6899060Sdes return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); 6976259Sgreen} 7076259Sgreen 7176259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 7276259Sgreen_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) 7376259Sgreen{ 7476259Sgreen return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 7576259Sgreen} 7676259Sgreen 7776259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 7876259Sgreen_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) 7976259Sgreen{ 8076259Sgreen return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); 8176259Sgreen} 8276259Sgreen 8376259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 8476259Sgreen_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) 8576259Sgreen{ 8676259Sgreen return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 8792555Sdes} 8892555Sdes 8976259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 9076259Sgreen_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) 9176259Sgreen{ 9276259Sgreen return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); 9376259Sgreen} 9476259Sgreen 9576259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 9692555Sdes_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) 9776259Sgreen{ 9876259Sgreen return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); 9992555Sdes} 10098675Sdes 10192555Sdesstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 10292555Sdes_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) 10392555Sdes{ 10492555Sdes return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); 10576259Sgreen} 10676259Sgreen 10776259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 10876259Sgreen_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) 10976259Sgreen{ 11076259Sgreen return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 11192555Sdes} 11276259Sgreen 11376259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 11492555Sdes_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) 11592555Sdes{ 11692555Sdes return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); 11792555Sdes} 11892555Sdes 11976259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 12076259Sgreen_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) 12176259Sgreen{ 12298937Sdes return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); 12392555Sdes} 12492555Sdes 12592555Sdesstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 12692555Sdes_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) 12792555Sdes{ 12892555Sdes return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); 12992555Sdes} 13076259Sgreen 13176259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 13292555Sdes_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) 13376259Sgreen{ 13476259Sgreen return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 13592555Sdes} 13676259Sgreen 13776259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 13892555Sdes_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) 13992555Sdes{ 14092555Sdes return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 14192555Sdes} 14292555Sdes 14376259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128 14492555Sdes_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) 14576259Sgreen{ 14692555Sdes return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 14776259Sgreen} 14876259Sgreen 14992555Sdesstatic __inline__ __m128d __DEFAULT_FN_ATTRS128 15076259Sgreen_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) 15176259Sgreen{ 15276259Sgreen return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); 15376259Sgreen} 15476259Sgreen 15576259Sgreenstatic __inline__ __m256 __DEFAULT_FN_ATTRS256 15676259Sgreen_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) 15776259Sgreen{ 15876259Sgreen return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 15976259Sgreen} 16076259Sgreen 16176259Sgreenstatic __inline__ __m256d __DEFAULT_FN_ATTRS256 16276259Sgreen_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) 16376259Sgreen{ 16476259Sgreen return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 16576259Sgreen} 16676259Sgreen 16792555Sdesstatic __inline__ __m256 __DEFAULT_FN_ATTRS256 16892555Sdes_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) 16992555Sdes{ 17092555Sdes return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); 17192555Sdes} 17292555Sdes 17392555Sdesstatic __inline__ __m256d __DEFAULT_FN_ATTRS256 17492555Sdes_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) 17592555Sdes{ 17692555Sdes return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); 17792555Sdes} 17898675Sdes 17992555Sdesstatic __inline__ __m256 __DEFAULT_FN_ATTRS256 18092555Sdes_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) 18176259Sgreen{ 18276259Sgreen return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 18376259Sgreen} 18476259Sgreen 18576259Sgreenstatic __inline__ __m256d __DEFAULT_FN_ATTRS256 18676259Sgreen_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) 18798675Sdes{ 18898675Sdes return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); 18992555Sdes} 19092555Sdes 19192555Sdesstatic __inline__ __m256 __DEFAULT_FN_ATTRS256 19276259Sgreen_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) 19392555Sdes{ 19492555Sdes return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); 19576259Sgreen} 19692555Sdes 19792555Sdesstatic __inline__ __m256d __DEFAULT_FN_ATTRS256 19892555Sdes_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) 19992555Sdes{ 20076259Sgreen return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); 20192555Sdes} 20292555Sdes 20392555Sdesstatic __inline__ __m256 __DEFAULT_FN_ATTRS256 20492555Sdes_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) 20592555Sdes{ 20692555Sdes return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 20792555Sdes} 20892555Sdes 20992555Sdesstatic __inline__ __m256d __DEFAULT_FN_ATTRS256 21092555Sdes_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) 21192555Sdes{ 21292555Sdes return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 21392555Sdes} 21492555Sdes 21576259Sgreenstatic __inline__ __m256 __DEFAULT_FN_ATTRS256 21676259Sgreen_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) 21776259Sgreen{ 21892555Sdes return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); 21976259Sgreen} 22092555Sdes 22192555Sdesstatic __inline__ __m256d __DEFAULT_FN_ATTRS256 22292555Sdes_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) 22376259Sgreen{ 22492555Sdes return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); 22598675Sdes} 22692555Sdes 22792555Sdes#undef __DEFAULT_FN_ATTRS128 22876259Sgreen#undef __DEFAULT_FN_ATTRS256 22992555Sdes 23098675Sdes#endif /* __FMAINTRIN_H */ 23192555Sdes