1234287Sdim/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
2234287Sdim *
3234287Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy
4234287Sdim * of this software and associated documentation files (the "Software"), to deal
5234287Sdim * in the Software without restriction, including without limitation the rights
6234287Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7234287Sdim * copies of the Software, and to permit persons to whom the Software is
8234287Sdim * furnished to do so, subject to the following conditions:
9234287Sdim *
10234287Sdim * The above copyright notice and this permission notice shall be included in
11234287Sdim * all copies or substantial portions of the Software.
12234287Sdim *
13234287Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14234287Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15234287Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16234287Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17234287Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18234287Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19234287Sdim * THE SOFTWARE.
20234287Sdim *
21234287Sdim *===-----------------------------------------------------------------------===
22234287Sdim */
23234287Sdim
24234287Sdim#ifndef __X86INTRIN_H
25234287Sdim#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
26234287Sdim#endif
27234287Sdim
28234287Sdim#ifndef __FMA4INTRIN_H
29234287Sdim#define __FMA4INTRIN_H
30234287Sdim
31234287Sdim#include <pmmintrin.h>
32234287Sdim
33288943Sdim/* Define the default attributes for the functions in this file. */
34296417Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4")))
35288943Sdim
36288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
37234287Sdim_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
38234287Sdim{
39234287Sdim  return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
40234287Sdim}
41234287Sdim
42288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
43234287Sdim_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
44234287Sdim{
45234287Sdim  return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
46234287Sdim}
47234287Sdim
48288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
49234287Sdim_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
50234287Sdim{
51234287Sdim  return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
52234287Sdim}
53234287Sdim
54288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
55234287Sdim_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
56234287Sdim{
57234287Sdim  return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
58234287Sdim}
59234287Sdim
60288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
61234287Sdim_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
62234287Sdim{
63234287Sdim  return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
64234287Sdim}
65234287Sdim
66288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
67234287Sdim_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
68234287Sdim{
69234287Sdim  return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
70234287Sdim}
71234287Sdim
72288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
73234287Sdim_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
74234287Sdim{
75234287Sdim  return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
76234287Sdim}
77234287Sdim
78288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
79234287Sdim_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
80234287Sdim{
81234287Sdim  return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
82234287Sdim}
83234287Sdim
84288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
85234287Sdim_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
86234287Sdim{
87234287Sdim  return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
88234287Sdim}
89234287Sdim
90288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
91234287Sdim_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
92234287Sdim{
93234287Sdim  return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
94234287Sdim}
95234287Sdim
96288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
97234287Sdim_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
98234287Sdim{
99234287Sdim  return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
100234287Sdim}
101234287Sdim
102288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
103234287Sdim_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
104234287Sdim{
105234287Sdim  return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
106234287Sdim}
107234287Sdim
108288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
109234287Sdim_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
110234287Sdim{
111234287Sdim  return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
112234287Sdim}
113234287Sdim
114288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
115234287Sdim_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
116234287Sdim{
117234287Sdim  return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
118234287Sdim}
119234287Sdim
120288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
121234287Sdim_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
122234287Sdim{
123234287Sdim  return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
124234287Sdim}
125234287Sdim
126288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
127234287Sdim_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
128234287Sdim{
129234287Sdim  return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
130234287Sdim}
131234287Sdim
132288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
133234287Sdim_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
134234287Sdim{
135234287Sdim  return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
136234287Sdim}
137234287Sdim
138288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
139234287Sdim_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
140234287Sdim{
141234287Sdim  return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
142234287Sdim}
143234287Sdim
144288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
145234287Sdim_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
146234287Sdim{
147234287Sdim  return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
148234287Sdim}
149234287Sdim
150288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
151234287Sdim_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
152234287Sdim{
153234287Sdim  return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
154234287Sdim}
155234287Sdim
156288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS
157234287Sdim_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
158234287Sdim{
159234287Sdim  return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
160234287Sdim}
161234287Sdim
162288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS
163234287Sdim_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
164234287Sdim{
165234287Sdim  return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
166234287Sdim}
167234287Sdim
168288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS
169234287Sdim_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
170234287Sdim{
171234287Sdim  return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
172234287Sdim}
173234287Sdim
174288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS
175234287Sdim_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
176234287Sdim{
177234287Sdim  return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
178234287Sdim}
179234287Sdim
180288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS
181234287Sdim_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
182234287Sdim{
183234287Sdim  return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
184234287Sdim}
185234287Sdim
186288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS
187234287Sdim_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
188234287Sdim{
189234287Sdim  return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
190234287Sdim}
191234287Sdim
192288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS
193234287Sdim_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
194234287Sdim{
195234287Sdim  return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
196234287Sdim}
197234287Sdim
198288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS
199234287Sdim_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
200234287Sdim{
201234287Sdim  return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
202234287Sdim}
203234287Sdim
204288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS
205234287Sdim_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
206234287Sdim{
207234287Sdim  return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
208234287Sdim}
209234287Sdim
210288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS
211234287Sdim_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
212234287Sdim{
213234287Sdim  return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
214234287Sdim}
215234287Sdim
216288943Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS
217234287Sdim_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
218234287Sdim{
219234287Sdim  return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
220234287Sdim}
221234287Sdim
222288943Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS
223234287Sdim_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
224234287Sdim{
225234287Sdim  return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
226234287Sdim}
227234287Sdim
228288943Sdim#undef __DEFAULT_FN_ATTRS
229288943Sdim
230234287Sdim#endif /* __FMA4INTRIN_H */
231