1169689Skan/*
2169689Skan * Copyright (C) 2005 Free Software Foundation, Inc.
3169689Skan *
4169689Skan * This file is free software; you can redistribute it and/or modify it
5169689Skan * under the terms of the GNU General Public License as published by the
6169689Skan * Free Software Foundation; either version 2, or (at your option) any
7169689Skan * later version.
8169689Skan *
9169689Skan * In addition to the permissions in the GNU General Public License, the
10169689Skan * Free Software Foundation gives you unlimited permission to link the
11169689Skan * compiled version of this file with other programs, and to distribute
12169689Skan * those programs without any restriction coming from the use of this
13169689Skan * file.  (The General Public License restrictions do apply in other
14169689Skan * respects; for example, they cover modification of the file, and
15169689Skan * distribution when not linked into another program.)
16169689Skan *
17169689Skan * This file is distributed in the hope that it will be useful, but
18169689Skan * WITHOUT ANY WARRANTY; without even the implied warranty of
19169689Skan * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20169689Skan * General Public License for more details.
21169689Skan *
22169689Skan * You should have received a copy of the GNU General Public License
23169689Skan * along with this program; see the file COPYING.  If not, write to
24169689Skan * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
25169689Skan * Boston, MA 02110-1301, USA.
26169689Skan *
27169689Skan *    As a special exception, if you link this library with files
28169689Skan *    compiled with GCC to produce an executable, this does not cause
29169689Skan *    the resulting executable to be covered by the GNU General Public License.
30169689Skan *    This exception does not however invalidate any other reasons why
31169689Skan *    the executable file might be covered by the GNU General Public License.
32169689Skan */
33169689Skan
34169689Skan#define MXCSR_DAZ (1 << 6)	/* Enable denormals are zero mode */
35169689Skan#define MXCSR_FTZ (1 << 15)	/* Enable flush to zero mode */
36169689Skan
37169689Skan#define FXSAVE	(1 << 24)
38169689Skan#define SSE	(1 << 25)
39169689Skan
40169689Skanstatic void __attribute__((constructor))
41169689Skan#ifndef __x86_64__
42169689Skan/* The i386 ABI only requires 4-byte stack alignment, so this is necessary
43169689Skan   to make sure the fxsave struct gets correct alignment.
44169689Skan   See PR27537 and PR28621.  */
45169689Skan__attribute__ ((force_align_arg_pointer))
46169689Skan#endif
47169689Skanset_fast_math (void)
48169689Skan{
49169689Skan#ifndef __x86_64__
50169689Skan  /* All 64-bit targets have SSE and DAZ; only check them explicitly
51169689Skan     for 32-bit ones. */
52169689Skan  unsigned int eax, ebx, ecx, edx;
53169689Skan
54169689Skan  /* See if we can use cpuid.  */
55169689Skan  asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;"
56169689Skan		"pushl %0; popfl; pushfl; popl %0; popfl"
57169689Skan		: "=&r" (eax), "=&r" (ebx)
58169689Skan		: "i" (0x00200000));
59169689Skan
60169689Skan  if (((eax ^ ebx) & 0x00200000) == 0)
61169689Skan    return;
62169689Skan
63169689Skan  /* Check the highest input value for eax.  */
64169689Skan  asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
65169689Skan		: "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
66169689Skan		: "0" (0));
67169689Skan
68169689Skan  if (eax == 0)
69169689Skan    return;
70169689Skan
71169689Skan  asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
72169689Skan		: "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
73169689Skan		: "0" (1));
74169689Skan
75169689Skan  if (edx & SSE)
76169689Skan    {
77169689Skan      unsigned int mxcsr = __builtin_ia32_stmxcsr ();
78169689Skan
79169689Skan      mxcsr |= MXCSR_FTZ;
80169689Skan
81169689Skan      if (edx & FXSAVE)
82169689Skan	{
83169689Skan	  /* Check if DAZ is available.  */
84169689Skan	  struct
85169689Skan	    {
86169689Skan	      unsigned short int cwd;
87169689Skan	      unsigned short int swd;
88169689Skan	      unsigned short int twd;
89169689Skan	      unsigned short int fop;
90169689Skan	      long int fip;
91169689Skan	      long int fcs;
92169689Skan	      long int foo;
93169689Skan	      long int fos;
94169689Skan	      long int mxcsr;
95169689Skan	      long int mxcsr_mask;
96169689Skan	      long int st_space[32];
97169689Skan	      long int xmm_space[32];
98169689Skan	      long int padding[56];
99169689Skan	    } __attribute__ ((aligned (16))) fxsave;
100169689Skan
101169689Skan	  __builtin_memset (&fxsave, 0, sizeof (fxsave));
102169689Skan
103169689Skan	  asm volatile ("fxsave %0" : "=m" (fxsave) : "m" (fxsave));
104169689Skan
105169689Skan	  if (fxsave.mxcsr_mask & MXCSR_DAZ)
106169689Skan	    mxcsr |= MXCSR_DAZ;
107169689Skan	}
108169689Skan
109169689Skan      __builtin_ia32_ldmxcsr (mxcsr);
110169689Skan    }
111169689Skan#else
112169689Skan  unsigned int mxcsr = __builtin_ia32_stmxcsr ();
113169689Skan  mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
114169689Skan  __builtin_ia32_ldmxcsr (mxcsr);
115169689Skan#endif
116169689Skan}
117