1//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes FMA (Fused Multiply-Add) instructions.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// FMA3 - Intel 3 operand Fused Multiply-Add instructions
16//===----------------------------------------------------------------------===//
17
18let Constraints = "$src1 = $dst" in {
19multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
20                    PatFrag MemFrag128, PatFrag MemFrag256,
21                    ValueType OpVT128, ValueType OpVT256,
22                    SDPatternOperator Op = null_frag> {
23  let isCommutable = 1 in
24  def r     : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
25                   (ins VR128:$src1, VR128:$src2, VR128:$src3),
26                   !strconcat(OpcodeStr,
27                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
28                   [(set VR128:$dst, (OpVT128 (Op VR128:$src2,
29                                               VR128:$src1, VR128:$src3)))]>;
30
31  let mayLoad = 1 in
32  def m     : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
33                   (ins VR128:$src1, VR128:$src2, f128mem:$src3),
34                   !strconcat(OpcodeStr,
35                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
36                   [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
37                                               (MemFrag128 addr:$src3))))]>;
38
39  let isCommutable = 1 in
40  def rY    : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
41                   (ins VR256:$src1, VR256:$src2, VR256:$src3),
42                   !strconcat(OpcodeStr,
43                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
44                   [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
45                                               VR256:$src3)))]>, VEX_L;
46
47  let mayLoad = 1 in
48  def mY    : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
49                   (ins VR256:$src1, VR256:$src2, f256mem:$src3),
50                   !strconcat(OpcodeStr,
51                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
52                   [(set VR256:$dst,
53                     (OpVT256 (Op VR256:$src2, VR256:$src1,
54                               (MemFrag256 addr:$src3))))]>, VEX_L;
55}
56} // Constraints = "$src1 = $dst"
57
58multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
59                       string OpcodeStr, string PackTy,
60                       PatFrag MemFrag128, PatFrag MemFrag256,
61                       SDNode Op, ValueType OpTy128, ValueType OpTy256> {
62  defm r213 : fma3p_rm<opc213,
63                       !strconcat(OpcodeStr, "213", PackTy),
64                       MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
65let neverHasSideEffects = 1 in {
66  defm r132 : fma3p_rm<opc132,
67                       !strconcat(OpcodeStr, "132", PackTy),
68                       MemFrag128, MemFrag256, OpTy128, OpTy256>;
69  defm r231 : fma3p_rm<opc231,
70                       !strconcat(OpcodeStr, "231", PackTy),
71                       MemFrag128, MemFrag256, OpTy128, OpTy256>;
72} // neverHasSideEffects = 1
73}
74
75// Fused Multiply-Add
76let ExeDomain = SSEPackedSingle in {
77  defm VFMADDPS    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
78                                 memopv8f32, X86Fmadd, v4f32, v8f32>;
79  defm VFMSUBPS    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
80                                 memopv8f32, X86Fmsub, v4f32, v8f32>;
81  defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
82                                 memopv4f32, memopv8f32, X86Fmaddsub,
83                                 v4f32, v8f32>;
84  defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
85                                 memopv4f32, memopv8f32, X86Fmsubadd,
86                                 v4f32, v8f32>;
87}
88
89let ExeDomain = SSEPackedDouble in {
90  defm VFMADDPD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
91                                 memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
92  defm VFMSUBPD    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
93                                 memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
94  defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
95                                 memopv2f64, memopv4f64, X86Fmaddsub,
96                                 v2f64, v4f64>, VEX_W;
97  defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
98                                 memopv2f64, memopv4f64, X86Fmsubadd,
99                                 v2f64, v4f64>, VEX_W;
100}
101
102// Fused Negative Multiply-Add
103let ExeDomain = SSEPackedSingle in {
104  defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps",  memopv4f32,
105                               memopv8f32, X86Fnmadd, v4f32, v8f32>;
106  defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps",  memopv4f32,
107                               memopv8f32, X86Fnmsub, v4f32, v8f32>;
108}
109let ExeDomain = SSEPackedDouble in {
110  defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
111                               memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
112  defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
113                               memopv2f64, memopv4f64, X86Fnmsub, v2f64,
114                               v4f64>, VEX_W;
115}
116
117let Constraints = "$src1 = $dst" in {
118multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
119                    RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
120                    SDPatternOperator OpNode = null_frag> {
121  let isCommutable = 1 in
122  def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
123                   (ins RC:$src1, RC:$src2, RC:$src3),
124                   !strconcat(OpcodeStr,
125                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
126                   [(set RC:$dst,
127                     (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
128  let mayLoad = 1 in
129  def m     : FMA3<opc, MRMSrcMem, (outs RC:$dst),
130                   (ins RC:$src1, RC:$src2, x86memop:$src3),
131                   !strconcat(OpcodeStr,
132                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
133                   [(set RC:$dst,
134                     (OpVT (OpNode RC:$src2, RC:$src1,
135                            (mem_frag addr:$src3))))]>;
136}
137
138multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
139                        ComplexPattern mem_cpat, Intrinsic IntId,
140                        RegisterClass RC> {
141  let isCommutable = 1 in
142  def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
143                   (ins VR128:$src1, VR128:$src2, VR128:$src3),
144                   !strconcat(OpcodeStr,
145                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
146                   [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
147                     VR128:$src3))]>;
148  def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
149                   (ins VR128:$src1, VR128:$src2, memop:$src3),
150                   !strconcat(OpcodeStr,
151                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
152                   [(set VR128:$dst,
153                     (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
154}
155} // Constraints = "$src1 = $dst"
156
157multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
158                       string OpStr, string PackTy, Intrinsic Int,
159                       SDNode OpNode, RegisterClass RC, ValueType OpVT,
160                       X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
161                       ComplexPattern mem_cpat> {
162let neverHasSideEffects = 1 in {
163  defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
164                       x86memop, RC, OpVT, mem_frag>;
165  defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
166                       x86memop, RC, OpVT, mem_frag>;
167}
168
169defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
170                     x86memop, RC, OpVT, mem_frag, OpNode>,
171            fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
172                         memop, mem_cpat, Int, RC>;
173}
174
175multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
176                 string OpStr, Intrinsic IntF32, Intrinsic IntF64,
177                 SDNode OpNode> {
178  defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode,
179                        FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
180  defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode,
181                        FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
182}
183
184defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
185                    int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
186defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
187                    int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
188
189defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
190                     int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
191defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
192                     int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
193
194
195//===----------------------------------------------------------------------===//
196// FMA4 - AMD 4 operand Fused Multiply-Add instructions
197//===----------------------------------------------------------------------===//
198
199
200multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
201                 X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
202                 PatFrag mem_frag> {
203  let isCommutable = 1 in
204  def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst),
205           (ins RC:$src1, RC:$src2, RC:$src3),
206           !strconcat(OpcodeStr,
207           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
208           [(set RC:$dst,
209             (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4;
210  def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst),
211           (ins RC:$src1, RC:$src2, x86memop:$src3),
212           !strconcat(OpcodeStr,
213           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
214           [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
215                           (mem_frag addr:$src3)))]>, VEX_W, MemOp4;
216  def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst),
217           (ins RC:$src1, x86memop:$src2, RC:$src3),
218           !strconcat(OpcodeStr,
219           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
220           [(set RC:$dst,
221             (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;
222// For disassembler
223let isCodeGenOnly = 1, hasSideEffects = 0 in
224  def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
225               (ins RC:$src1, RC:$src2, RC:$src3),
226               !strconcat(OpcodeStr,
227               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
228}
229
230multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
231                     ComplexPattern mem_cpat, Intrinsic Int> {
232  let isCommutable = 1 in
233  def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
234               (ins VR128:$src1, VR128:$src2, VR128:$src3),
235               !strconcat(OpcodeStr,
236               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
237               [(set VR128:$dst,
238                 (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
239  def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
240               (ins VR128:$src1, VR128:$src2, memop:$src3),
241               !strconcat(OpcodeStr,
242               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
243               [(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
244                                  mem_cpat:$src3))]>, VEX_W, MemOp4;
245  def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
246               (ins VR128:$src1, memop:$src2, VR128:$src3),
247               !strconcat(OpcodeStr,
248               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
249               [(set VR128:$dst,
250                 (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
251}
252
253multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
254                 ValueType OpVT128, ValueType OpVT256,
255                 PatFrag ld_frag128, PatFrag ld_frag256> {
256  let isCommutable = 1 in
257  def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
258           (ins VR128:$src1, VR128:$src2, VR128:$src3),
259           !strconcat(OpcodeStr,
260           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
261           [(set VR128:$dst,
262             (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
263           VEX_W, MemOp4;
264  def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
265           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
266           !strconcat(OpcodeStr,
267           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
268           [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
269                              (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4;
270  def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
271           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
272           !strconcat(OpcodeStr,
273           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
274           [(set VR128:$dst,
275             (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
276  let isCommutable = 1 in
277  def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
278           (ins VR256:$src1, VR256:$src2, VR256:$src3),
279           !strconcat(OpcodeStr,
280           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
281           [(set VR256:$dst,
282             (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
283           VEX_W, MemOp4, VEX_L;
284  def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
285           (ins VR256:$src1, VR256:$src2, f256mem:$src3),
286           !strconcat(OpcodeStr,
287           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
288           [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
289                              (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4, VEX_L;
290  def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
291           (ins VR256:$src1, f256mem:$src2, VR256:$src3),
292           !strconcat(OpcodeStr,
293           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
294           [(set VR256:$dst, (OpNode VR256:$src1,
295                              (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L;
296// For disassembler
297let isCodeGenOnly = 1, hasSideEffects = 0 in {
298  def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
299               (ins VR128:$src1, VR128:$src2, VR128:$src3),
300               !strconcat(OpcodeStr,
301               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
302  def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
303                (ins VR256:$src1, VR256:$src2, VR256:$src3),
304                !strconcat(OpcodeStr,
305                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
306                VEX_L;
307} // isCodeGenOnly = 1
308}
309
310defm VFMADDSS4  : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
311                  fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
312                            int_x86_fma_vfmadd_ss>;
313defm VFMADDSD4  : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
314                  fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
315                            int_x86_fma_vfmadd_sd>;
316defm VFMSUBSS4  : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
317                  fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
318                            int_x86_fma_vfmsub_ss>;
319defm VFMSUBSD4  : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
320                  fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
321                            int_x86_fma_vfmsub_sd>;
322defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
323                        X86Fnmadd, loadf32>,
324                  fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
325                            int_x86_fma_vfnmadd_ss>;
326defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
327                        X86Fnmadd, loadf64>,
328                  fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
329                            int_x86_fma_vfnmadd_sd>;
330defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
331                        X86Fnmsub, loadf32>,
332                  fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
333                            int_x86_fma_vfnmsub_ss>;
334defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
335                        X86Fnmsub, loadf64>,
336                  fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
337                            int_x86_fma_vfnmsub_sd>;
338
339let ExeDomain = SSEPackedSingle in {
340  defm VFMADDPS4    : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
341                            memopv4f32, memopv8f32>;
342  defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
343                            memopv4f32, memopv8f32>;
344  defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
345                            memopv4f32, memopv8f32>;
346  defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
347                            memopv4f32, memopv8f32>;
348  defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
349                            memopv4f32, memopv8f32>;
350  defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
351                            memopv4f32, memopv8f32>;
352}
353
354let ExeDomain = SSEPackedDouble in {
355  defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
356                            memopv2f64, memopv4f64>;
357  defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
358                            memopv2f64, memopv4f64>;
359  defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
360                            memopv2f64, memopv4f64>;
361  defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
362                            memopv2f64, memopv4f64>;
363  defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
364                            memopv2f64, memopv4f64>;
365  defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
366                            memopv2f64, memopv4f64>;
367}
368
369