1228753Smm/* { dg-do run } */
2228753Smm/* { dg-require-effective-target sse3 } */
3228753Smm/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
4228753Smm
5228753Smm#ifndef CHECK_H
6228753Smm#define CHECK_H "sse3-check.h"
7228753Smm#endif
8228753Smm
9228753Smm#ifndef TEST
10228753Smm#define TEST sse3_test
11228753Smm#endif
12228753Smm
13228753Smm#include CHECK_H
14228753Smm
15228753Smm#include <pmmintrin.h>
16228753Smm
17228753Smmstatic void
18228753Smmsse3_test_movddup_mem (double *i1, double *r)
19228753Smm{
20228753Smm  __m128d t1 = _mm_loaddup_pd (i1);
21228753Smm
22228753Smm  _mm_storeu_pd (r, t1);
23228753Smm}
24228753Smm
25228763Smmstatic double cnst1 [2] = {1.0, 1.0};
26228753Smm
27228753Smmstatic void
28228753Smmsse3_test_movddup_reg (double *i1, double *r)
29228753Smm{
30228753Smm  __m128d t1 = _mm_loadu_pd (i1);
31228753Smm  __m128d t2 = _mm_loadu_pd (&cnst1[0]);
32228753Smm
33228753Smm  t1  = _mm_mul_pd (t1, t2);
34228753Smm  t2  = _mm_movedup_pd (t1);
35232153Smm
36228753Smm  _mm_storeu_pd (r, t2);
37228753Smm}
38228753Smm
39228753Smmstatic void
40228753Smmsse3_test_movddup_reg_subsume_unaligned (double *i1, double *r)
41228753Smm{
42228753Smm  __m128d t1 = _mm_loadu_pd (i1);
43228753Smm  __m128d t2 = _mm_movedup_pd (t1);
44228753Smm
45228753Smm  _mm_storeu_pd (r, t2);
46232153Smm}
47232153Smm
48232153Smmstatic void
49232153Smmsse3_test_movddup_reg_subsume_ldsd (double *i1, double *r)
50232153Smm{
51232153Smm  __m128d t1 = _mm_load_sd (i1);
52232153Smm  __m128d t2 = _mm_movedup_pd (t1);
53228753Smm
54228753Smm  _mm_storeu_pd (r, t2);
55228753Smm}
56228753Smm
57228753Smmstatic void
58228753Smmsse3_test_movddup_reg_subsume (double *i1, double *r)
59228753Smm{
60228753Smm  __m128d t1 = _mm_load_pd (i1);
61228753Smm  __m128d t2 = _mm_movedup_pd (t1);
62228753Smm
63228753Smm  _mm_storeu_pd (r, t2);
64228753Smm}
65228753Smm
66228753Smmstatic int
67228753Smmchk_pd (double *v1, double *v2)
68228753Smm{
69228753Smm  int i;
70228753Smm  int n_fails = 0;
71228753Smm
72228753Smm  for (i = 0; i < 2; i++)
73228753Smm    if (v1[i] != v2[i])
74232153Smm      n_fails += 1;
75232153Smm
76228753Smm  return n_fails;
77228753Smm}
78228753Smm
79228753Smmstatic double p1[2] __attribute__ ((aligned(16)));
80228753Smmstatic double p2[2];
81228753Smmstatic double ck[2];
82228753Smm
83228753Smmstatic double vals[80] =
84228753Smm  {
85228753Smm    100.0,  200.0, 300.0, 400.0, 5.0, -1.0, .345, -21.5,
86232153Smm    1100.0, 0.235, 321.3, 53.40, 0.3, 10.0, 42.0, 32.52,
87232153Smm    32.6,   123.3, 1.234, 2.156, 0.1, 3.25, 4.75, 32.44,
88232153Smm    12.16,  52.34, 64.12, 71.13, -.1, 2.30, 5.12, 3.785,
89232153Smm    541.3,  321.4, 231.4, 531.4, 71., 321., 231., -531.,
90232153Smm    23.45,  23.45, 23.45, 23.45, 23.45, 23.45, 23.45, 23.45,
91232153Smm    23.45,  -1.43, -6.74, 6.345, -20.1, -20.1, -40.1, -40.1,
92232153Smm    1.234,  2.345, 3.456, 4.567, 5.678, 6.789, 7.891, 8.912,
93232153Smm    -9.32,  -8.41, -7.50, -6.59, -5.68, -4.77, -3.86, -2.95,
94232153Smm    9.32,  8.41, 7.50, 6.59, -5.68, -4.77, -3.86, -2.95
95228753Smm  };
96228753Smm
97228753Smmstatic void
98228753SmmTEST (void)
99228753Smm{
100228753Smm  int i;
101228753Smm  int fail = 0;
102228753Smm
103228753Smm  for (i = 0; i < 80; i += 1)
104228753Smm    {
105228753Smm      p1[0] = vals[i+0];
106228753Smm
107228753Smm      ck[0] = p1[0];
108232153Smm      ck[1] = p1[0];
109228753Smm
110228753Smm      sse3_test_movddup_mem (p1, p2);
111228753Smm
112232153Smm      fail += chk_pd (ck, p2);
113228753Smm
114228753Smm      sse3_test_movddup_reg (p1, p2);
115228753Smm
116228753Smm      fail += chk_pd (ck, p2);
117228753Smm
118228753Smm      sse3_test_movddup_reg_subsume (p1, p2);
119228753Smm
120228753Smm      fail += chk_pd (ck, p2);
121228753Smm
122228753Smm      sse3_test_movddup_reg_subsume_unaligned (p1, p2);
123228753Smm
124228753Smm      fail += chk_pd (ck, p2);
125228753Smm
126228753Smm      sse3_test_movddup_reg_subsume_ldsd (p1, p2);
127228753Smm
128228753Smm      fail += chk_pd (ck, p2);
129228753Smm    }
130228753Smm
131228753Smm  if (fail != 0)
132228753Smm    abort ();
133228753Smm}
134228753Smm