1169689Skan/* 2169689Skan * Copyright (C) 2005 Free Software Foundation, Inc. 3169689Skan * 4169689Skan * This file is free software; you can redistribute it and/or modify it 5169689Skan * under the terms of the GNU General Public License as published by the 6169689Skan * Free Software Foundation; either version 2, or (at your option) any 7169689Skan * later version. 8169689Skan * 9169689Skan * In addition to the permissions in the GNU General Public License, the 10169689Skan * Free Software Foundation gives you unlimited permission to link the 11169689Skan * compiled version of this file with other programs, and to distribute 12169689Skan * those programs without any restriction coming from the use of this 13169689Skan * file. (The General Public License restrictions do apply in other 14169689Skan * respects; for example, they cover modification of the file, and 15169689Skan * distribution when not linked into another program.) 16169689Skan * 17169689Skan * This file is distributed in the hope that it will be useful, but 18169689Skan * WITHOUT ANY WARRANTY; without even the implied warranty of 19169689Skan * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20169689Skan * General Public License for more details. 21169689Skan * 22169689Skan * You should have received a copy of the GNU General Public License 23169689Skan * along with this program; see the file COPYING. If not, write to 24169689Skan * the Free Software Foundation, 51 Franklin Street, Fifth Floor, 25169689Skan * Boston, MA 02110-1301, USA. 26169689Skan * 27169689Skan * As a special exception, if you link this library with files 28169689Skan * compiled with GCC to produce an executable, this does not cause 29169689Skan * the resulting executable to be covered by the GNU General Public License. 30169689Skan * This exception does not however invalidate any other reasons why 31169689Skan * the executable file might be covered by the GNU General Public License. 32169689Skan */ 33169689Skan 34169689Skan#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */ 35169689Skan#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */ 36169689Skan 37169689Skan#define FXSAVE (1 << 24) 38169689Skan#define SSE (1 << 25) 39169689Skan 40169689Skanstatic void __attribute__((constructor)) 41169689Skan#ifndef __x86_64__ 42169689Skan/* The i386 ABI only requires 4-byte stack alignment, so this is necessary 43169689Skan to make sure the fxsave struct gets correct alignment. 44169689Skan See PR27537 and PR28621. */ 45169689Skan__attribute__ ((force_align_arg_pointer)) 46169689Skan#endif 47169689Skanset_fast_math (void) 48169689Skan{ 49169689Skan#ifndef __x86_64__ 50169689Skan /* All 64-bit targets have SSE and DAZ; only check them explicitly 51169689Skan for 32-bit ones. */ 52169689Skan unsigned int eax, ebx, ecx, edx; 53169689Skan 54169689Skan /* See if we can use cpuid. */ 55169689Skan asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;" 56169689Skan "pushl %0; popfl; pushfl; popl %0; popfl" 57169689Skan : "=&r" (eax), "=&r" (ebx) 58169689Skan : "i" (0x00200000)); 59169689Skan 60169689Skan if (((eax ^ ebx) & 0x00200000) == 0) 61169689Skan return; 62169689Skan 63169689Skan /* Check the highest input value for eax. */ 64169689Skan asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" 65169689Skan : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) 66169689Skan : "0" (0)); 67169689Skan 68169689Skan if (eax == 0) 69169689Skan return; 70169689Skan 71169689Skan asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" 72169689Skan : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) 73169689Skan : "0" (1)); 74169689Skan 75169689Skan if (edx & SSE) 76169689Skan { 77169689Skan unsigned int mxcsr = __builtin_ia32_stmxcsr (); 78169689Skan 79169689Skan mxcsr |= MXCSR_FTZ; 80169689Skan 81169689Skan if (edx & FXSAVE) 82169689Skan { 83169689Skan /* Check if DAZ is available. */ 84169689Skan struct 85169689Skan { 86169689Skan unsigned short int cwd; 87169689Skan unsigned short int swd; 88169689Skan unsigned short int twd; 89169689Skan unsigned short int fop; 90169689Skan long int fip; 91169689Skan long int fcs; 92169689Skan long int foo; 93169689Skan long int fos; 94169689Skan long int mxcsr; 95169689Skan long int mxcsr_mask; 96169689Skan long int st_space[32]; 97169689Skan long int xmm_space[32]; 98169689Skan long int padding[56]; 99169689Skan } __attribute__ ((aligned (16))) fxsave; 100169689Skan 101169689Skan __builtin_memset (&fxsave, 0, sizeof (fxsave)); 102169689Skan 103169689Skan asm volatile ("fxsave %0" : "=m" (fxsave) : "m" (fxsave)); 104169689Skan 105169689Skan if (fxsave.mxcsr_mask & MXCSR_DAZ) 106169689Skan mxcsr |= MXCSR_DAZ; 107169689Skan } 108169689Skan 109169689Skan __builtin_ia32_ldmxcsr (mxcsr); 110169689Skan } 111169689Skan#else 112169689Skan unsigned int mxcsr = __builtin_ia32_stmxcsr (); 113169689Skan mxcsr |= MXCSR_DAZ | MXCSR_FTZ; 114169689Skan __builtin_ia32_ldmxcsr (mxcsr); 115169689Skan#endif 116169689Skan} 117