1228753Smm/* { dg-do run } */ 2228753Smm/* { dg-require-effective-target sse3 } */ 3228753Smm/* { dg-options "-O2 -msse3 -mfpmath=sse" } */ 4228753Smm 5228753Smm#ifndef CHECK_H 6228753Smm#define CHECK_H "sse3-check.h" 7228753Smm#endif 8228753Smm 9228753Smm#ifndef TEST 10228753Smm#define TEST sse3_test 11228753Smm#endif 12228753Smm 13228753Smm#include CHECK_H 14228753Smm 15228753Smm#include <pmmintrin.h> 16228753Smm 17228753Smmstatic void 18228753Smmsse3_test_movddup_mem (double *i1, double *r) 19228753Smm{ 20228753Smm __m128d t1 = _mm_loaddup_pd (i1); 21228753Smm 22228753Smm _mm_storeu_pd (r, t1); 23228753Smm} 24228753Smm 25228763Smmstatic double cnst1 [2] = {1.0, 1.0}; 26228753Smm 27228753Smmstatic void 28228753Smmsse3_test_movddup_reg (double *i1, double *r) 29228753Smm{ 30228753Smm __m128d t1 = _mm_loadu_pd (i1); 31228753Smm __m128d t2 = _mm_loadu_pd (&cnst1[0]); 32228753Smm 33228753Smm t1 = _mm_mul_pd (t1, t2); 34228753Smm t2 = _mm_movedup_pd (t1); 35232153Smm 36228753Smm _mm_storeu_pd (r, t2); 37228753Smm} 38228753Smm 39228753Smmstatic void 40228753Smmsse3_test_movddup_reg_subsume_unaligned (double *i1, double *r) 41228753Smm{ 42228753Smm __m128d t1 = _mm_loadu_pd (i1); 43228753Smm __m128d t2 = _mm_movedup_pd (t1); 44228753Smm 45228753Smm _mm_storeu_pd (r, t2); 46232153Smm} 47232153Smm 48232153Smmstatic void 49232153Smmsse3_test_movddup_reg_subsume_ldsd (double *i1, double *r) 50232153Smm{ 51232153Smm __m128d t1 = _mm_load_sd (i1); 52232153Smm __m128d t2 = _mm_movedup_pd (t1); 53228753Smm 54228753Smm _mm_storeu_pd (r, t2); 55228753Smm} 56228753Smm 57228753Smmstatic void 58228753Smmsse3_test_movddup_reg_subsume (double *i1, double *r) 59228753Smm{ 60228753Smm __m128d t1 = _mm_load_pd (i1); 61228753Smm __m128d t2 = _mm_movedup_pd (t1); 62228753Smm 63228753Smm _mm_storeu_pd (r, t2); 64228753Smm} 65228753Smm 66228753Smmstatic int 67228753Smmchk_pd (double *v1, double *v2) 68228753Smm{ 69228753Smm int i; 70228753Smm int n_fails = 0; 71228753Smm 72228753Smm for (i = 0; i < 2; i++) 73228753Smm if (v1[i] != v2[i]) 74232153Smm n_fails += 1; 75232153Smm 76228753Smm return n_fails; 77228753Smm} 78228753Smm 79228753Smmstatic double p1[2] __attribute__ ((aligned(16))); 80228753Smmstatic double p2[2]; 81228753Smmstatic double ck[2]; 82228753Smm 83228753Smmstatic double vals[80] = 84228753Smm { 85228753Smm 100.0, 200.0, 300.0, 400.0, 5.0, -1.0, .345, -21.5, 86232153Smm 1100.0, 0.235, 321.3, 53.40, 0.3, 10.0, 42.0, 32.52, 87232153Smm 32.6, 123.3, 1.234, 2.156, 0.1, 3.25, 4.75, 32.44, 88232153Smm 12.16, 52.34, 64.12, 71.13, -.1, 2.30, 5.12, 3.785, 89232153Smm 541.3, 321.4, 231.4, 531.4, 71., 321., 231., -531., 90232153Smm 23.45, 23.45, 23.45, 23.45, 23.45, 23.45, 23.45, 23.45, 91232153Smm 23.45, -1.43, -6.74, 6.345, -20.1, -20.1, -40.1, -40.1, 92232153Smm 1.234, 2.345, 3.456, 4.567, 5.678, 6.789, 7.891, 8.912, 93232153Smm -9.32, -8.41, -7.50, -6.59, -5.68, -4.77, -3.86, -2.95, 94232153Smm 9.32, 8.41, 7.50, 6.59, -5.68, -4.77, -3.86, -2.95 95228753Smm }; 96228753Smm 97228753Smmstatic void 98228753SmmTEST (void) 99228753Smm{ 100228753Smm int i; 101228753Smm int fail = 0; 102228753Smm 103228753Smm for (i = 0; i < 80; i += 1) 104228753Smm { 105228753Smm p1[0] = vals[i+0]; 106228753Smm 107228753Smm ck[0] = p1[0]; 108232153Smm ck[1] = p1[0]; 109228753Smm 110228753Smm sse3_test_movddup_mem (p1, p2); 111228753Smm 112232153Smm fail += chk_pd (ck, p2); 113228753Smm 114228753Smm sse3_test_movddup_reg (p1, p2); 115228753Smm 116228753Smm fail += chk_pd (ck, p2); 117228753Smm 118228753Smm sse3_test_movddup_reg_subsume (p1, p2); 119228753Smm 120228753Smm fail += chk_pd (ck, p2); 121228753Smm 122228753Smm sse3_test_movddup_reg_subsume_unaligned (p1, p2); 123228753Smm 124228753Smm fail += chk_pd (ck, p2); 125228753Smm 126228753Smm sse3_test_movddup_reg_subsume_ldsd (p1, p2); 127228753Smm 128228753Smm fail += chk_pd (ck, p2); 129228753Smm } 130228753Smm 131228753Smm if (fail != 0) 132228753Smm abort (); 133228753Smm} 134228753Smm