1/* Test that the compiler properly optimizes floating point multiply and add
2   instructions vector into vfmaddps on FMA4 systems.  */
3
4/* { dg-do compile } */
5/* { dg-require-effective-target lp64 } */
6/* { dg-options "-O2 -mfma4 -ftree-vectorize" } */
7
8extern void exit (int);
9
10typedef float     __m256  __attribute__ ((__vector_size__ (32), __may_alias__));
11typedef double    __m256d __attribute__ ((__vector_size__ (32), __may_alias__));
12
13#define SIZE 10240
14
15union {
16  __m256 f_align;
17  __m256d d_align;
18  float f[SIZE];
19  double d[SIZE];
20} a, b, c, d;
21
22void
23flt_mul_add (void)
24{
25  int i;
26
27  for (i = 0; i < SIZE; i++)
28    a.f[i] = (b.f[i] * c.f[i]) + d.f[i];
29}
30
31void
32dbl_mul_add (void)
33{
34  int i;
35
36  for (i = 0; i < SIZE; i++)
37    a.d[i] = (b.d[i] * c.d[i]) + d.d[i];
38}
39
40void
41flt_mul_sub (void)
42{
43  int i;
44
45  for (i = 0; i < SIZE; i++)
46    a.f[i] = (b.f[i] * c.f[i]) - d.f[i];
47}
48
49void
50dbl_mul_sub (void)
51{
52  int i;
53
54  for (i = 0; i < SIZE; i++)
55    a.d[i] = (b.d[i] * c.d[i]) - d.d[i];
56}
57
58void
59flt_neg_mul_add (void)
60{
61  int i;
62
63  for (i = 0; i < SIZE; i++)
64    a.f[i] = (-(b.f[i] * c.f[i])) + d.f[i];
65}
66
67void
68dbl_neg_mul_add (void)
69{
70  int i;
71
72  for (i = 0; i < SIZE; i++)
73    a.d[i] = (-(b.d[i] * c.d[i])) + d.d[i];
74}
75
76int main ()
77{
78  flt_mul_add ();
79  flt_mul_sub ();
80  flt_neg_mul_add ();
81
82  dbl_mul_add ();
83  dbl_mul_sub ();
84  dbl_neg_mul_add ();
85  exit (0);
86}
87
88/* { dg-final { scan-assembler "vfmaddps" } } */
89/* { dg-final { scan-assembler "vfmaddpd" } } */
90/* { dg-final { scan-assembler "vfmsubps" } } */
91/* { dg-final { scan-assembler "vfmsubpd" } } */
92/* { dg-final { scan-assembler "vfnmaddps" } } */
93/* { dg-final { scan-assembler "vfnmaddpd" } } */
94