1/* PR rtl-optimization/21239 */
2/* { dg-do run { target i?86-*-* x86_64-*-* } } */
3/* { dg-options "-O2 -msse2" } */
4#include <emmintrin.h>
5#include "../../gcc.dg/i386-cpuid.h"
6
7extern void abort (void);
8
9void
10foo (unsigned int x, double *y, const double *z)
11{
12  __m128d tmp;
13  while (x)
14    {
15      tmp = _mm_load_sd (z);
16      _mm_store_sd (y, tmp);
17      --x; ++z; ++y;
18    }
19}
20
21void
22bar (unsigned int x, float *y, const float *z)
23{
24  __m128 tmp;
25  unsigned int i;
26  for (i = 0; i < x; ++i)
27    {
28      tmp = (__m128) { *z, 0, 0, 0 };
29      *y = __builtin_ia32_vec_ext_v4sf (tmp, 0);
30      ++z; ++y;
31    }
32  for (i = 0; i < x; ++i)
33    {
34      tmp = (__m128) { 0, *z, 0, 0 };
35      *y = __builtin_ia32_vec_ext_v4sf (tmp, 1);
36      ++z; ++y;
37    }
38  for (i = 0; i < x; ++i)
39    {
40      tmp = (__m128) { 0, 0, *z, 0 };
41      *y = __builtin_ia32_vec_ext_v4sf (tmp, 2);
42      ++z; ++y;
43    }
44  for (i = 0; i < x; ++i)
45    {
46      tmp = (__m128) { 0, 0, 0, *z };
47      *y = __builtin_ia32_vec_ext_v4sf (tmp, 3);
48      ++z; ++y;
49    }
50}
51
52void __attribute__((noinline))
53run_tests (void)
54{
55  unsigned int i;
56  double a[16], b[16];
57  float c[16], d[16];
58  for (i = 0; i < 16; ++i)
59    {
60      a[i] = 1;
61      b[i] = 2;
62      c[i] = 3;
63      d[i] = 4;
64    }
65  foo (16, a, b);
66  bar (4, c, d);
67  for (i = 0; i < 16; ++i)
68    {
69      if (a[i] != 2)
70	abort ();
71      if (c[i] != 4)
72	abort ();
73    }
74}
75
76int
77main ()
78{
79  unsigned long cpu_facilities;
80  unsigned int i;
81  double a[19], b[19];
82
83  cpu_facilities = i386_cpuid ();
84
85  if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
86      != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
87    /* If host has no vector support, pass.  */
88    return 0;
89
90  run_tests ();
91  return 0;
92}
93