fmaintrin.h revision 341825
176259Sgreen/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
292555Sdes *
376259Sgreen * Permission is hereby granted, free of charge, to any person obtaining a copy
476259Sgreen * of this software and associated documentation files (the "Software"), to deal
576259Sgreen * in the Software without restriction, including without limitation the rights
676259Sgreen * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
776259Sgreen * copies of the Software, and to permit persons to whom the Software is
876259Sgreen * furnished to do so, subject to the following conditions:
976259Sgreen *
1076259Sgreen * The above copyright notice and this permission notice shall be included in
1176259Sgreen * all copies or substantial portions of the Software.
1276259Sgreen *
1376259Sgreen * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1476259Sgreen * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1576259Sgreen * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1676259Sgreen * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1776259Sgreen * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1876259Sgreen * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
1976259Sgreen * THE SOFTWARE.
2076259Sgreen *
2176259Sgreen *===-----------------------------------------------------------------------===
2276259Sgreen */
2376259Sgreen
2476259Sgreen#ifndef __IMMINTRIN_H
2576259Sgreen#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
2676259Sgreen#endif
2799060Sdes
2876259Sgreen#ifndef __FMAINTRIN_H
2976259Sgreen#define __FMAINTRIN_H
3076259Sgreen
3176259Sgreen/* Define the default attributes for the functions in this file. */
3276259Sgreen#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
3376259Sgreen#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
3476259Sgreen
3592555Sdesstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
3676259Sgreen_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
3776259Sgreen{
3876259Sgreen  return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
3976259Sgreen}
4076259Sgreen
4176259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
4298937Sdes_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
4398937Sdes{
4498937Sdes  return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
4598937Sdes}
4698937Sdes
4798937Sdesstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
4876259Sgreen_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
4992555Sdes{
5092555Sdes  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
5176259Sgreen}
5292555Sdes
5392555Sdesstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
5476259Sgreen_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
5576259Sgreen{
5699060Sdes  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
5776259Sgreen}
5876259Sgreen
5999060Sdesstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
6076259Sgreen_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
6176259Sgreen{
6276259Sgreen  return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
6376259Sgreen}
6476259Sgreen
6576259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
6676259Sgreen_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
6776259Sgreen{
6899060Sdes  return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
6976259Sgreen}
7076259Sgreen
7176259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
7276259Sgreen_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
7376259Sgreen{
7476259Sgreen  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
7576259Sgreen}
7676259Sgreen
7776259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
7876259Sgreen_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
7976259Sgreen{
8076259Sgreen  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
8176259Sgreen}
8276259Sgreen
8376259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
8476259Sgreen_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
8576259Sgreen{
8676259Sgreen  return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
8792555Sdes}
8892555Sdes
8976259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
9076259Sgreen_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
9176259Sgreen{
9276259Sgreen  return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
9376259Sgreen}
9476259Sgreen
9576259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
9692555Sdes_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
9776259Sgreen{
9876259Sgreen  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
9992555Sdes}
10098675Sdes
10192555Sdesstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
10292555Sdes_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
10392555Sdes{
10492555Sdes  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
10576259Sgreen}
10676259Sgreen
10776259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
10876259Sgreen_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
10976259Sgreen{
11076259Sgreen  return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
11192555Sdes}
11276259Sgreen
11376259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
11492555Sdes_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
11592555Sdes{
11692555Sdes  return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
11792555Sdes}
11892555Sdes
11976259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
12076259Sgreen_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
12176259Sgreen{
12298937Sdes  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
12392555Sdes}
12492555Sdes
12592555Sdesstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
12692555Sdes_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
12792555Sdes{
12892555Sdes  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
12992555Sdes}
13076259Sgreen
13176259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
13292555Sdes_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
13376259Sgreen{
13476259Sgreen  return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
13592555Sdes}
13676259Sgreen
13776259Sgreenstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
13892555Sdes_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
13992555Sdes{
14092555Sdes  return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
14192555Sdes}
14292555Sdes
14376259Sgreenstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
14492555Sdes_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
14576259Sgreen{
14692555Sdes  return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
14776259Sgreen}
14876259Sgreen
14992555Sdesstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
15076259Sgreen_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
15176259Sgreen{
15276259Sgreen  return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
15376259Sgreen}
15476259Sgreen
15576259Sgreenstatic __inline__ __m256 __DEFAULT_FN_ATTRS256
15676259Sgreen_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
15776259Sgreen{
15876259Sgreen  return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
15976259Sgreen}
16076259Sgreen
16176259Sgreenstatic __inline__ __m256d __DEFAULT_FN_ATTRS256
16276259Sgreen_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
16376259Sgreen{
16476259Sgreen  return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
16576259Sgreen}
16676259Sgreen
16792555Sdesstatic __inline__ __m256 __DEFAULT_FN_ATTRS256
16892555Sdes_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
16992555Sdes{
17092555Sdes  return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
17192555Sdes}
17292555Sdes
17392555Sdesstatic __inline__ __m256d __DEFAULT_FN_ATTRS256
17492555Sdes_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
17592555Sdes{
17692555Sdes  return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
17792555Sdes}
17898675Sdes
17992555Sdesstatic __inline__ __m256 __DEFAULT_FN_ATTRS256
18092555Sdes_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
18176259Sgreen{
18276259Sgreen  return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
18376259Sgreen}
18476259Sgreen
18576259Sgreenstatic __inline__ __m256d __DEFAULT_FN_ATTRS256
18676259Sgreen_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
18798675Sdes{
18898675Sdes  return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
18992555Sdes}
19092555Sdes
19192555Sdesstatic __inline__ __m256 __DEFAULT_FN_ATTRS256
19276259Sgreen_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
19392555Sdes{
19492555Sdes  return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
19576259Sgreen}
19692555Sdes
19792555Sdesstatic __inline__ __m256d __DEFAULT_FN_ATTRS256
19892555Sdes_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
19992555Sdes{
20076259Sgreen  return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
20192555Sdes}
20292555Sdes
20392555Sdesstatic __inline__ __m256 __DEFAULT_FN_ATTRS256
20492555Sdes_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
20592555Sdes{
20692555Sdes  return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
20792555Sdes}
20892555Sdes
20992555Sdesstatic __inline__ __m256d __DEFAULT_FN_ATTRS256
21092555Sdes_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
21192555Sdes{
21292555Sdes  return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
21392555Sdes}
21492555Sdes
21576259Sgreenstatic __inline__ __m256 __DEFAULT_FN_ATTRS256
21676259Sgreen_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
21776259Sgreen{
21892555Sdes  return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
21976259Sgreen}
22092555Sdes
22192555Sdesstatic __inline__ __m256d __DEFAULT_FN_ATTRS256
22292555Sdes_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
22376259Sgreen{
22492555Sdes  return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
22598675Sdes}
22692555Sdes
22792555Sdes#undef __DEFAULT_FN_ATTRS128
22876259Sgreen#undef __DEFAULT_FN_ATTRS256
22992555Sdes
23098675Sdes#endif /* __FMAINTRIN_H */
23192555Sdes