ieeefp.h revision 144968
198038Sache/*- 2131447Stjr * Copyright (c) 2003 Peter Wemm. 398038Sache * Copyright (c) 1990 Andrew Moore, Talke Studio 498038Sache * All rights reserved. 598038Sache * 698038Sache * Redistribution and use in source and binary forms, with or without 798038Sache * modification, are permitted provided that the following conditions 898038Sache * are met: 998038Sache * 1. Redistributions of source code must retain the above copyright 1098038Sache * notice, this list of conditions and the following disclaimer. 1198038Sache * 2. Redistributions in binary form must reproduce the above copyright 1298038Sache * notice, this list of conditions and the following disclaimer in the 1398038Sache * documentation and/or other materials provided with the distribution. 1498038Sache * 3. All advertising materials mentioning features or use of this software 1598038Sache * must display the following acknowledgement: 1698038Sache * This product includes software developed by the University of 1798038Sache * California, Berkeley and its contributors. 1898038Sache * 4. Neither the name of the University nor the names of its contributors 1998038Sache * may be used to endorse or promote products derived from this software 2098038Sache * without specific prior written permission. 2198038Sache * 2298038Sache * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2398038Sache * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2498038Sache * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2598038Sache * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2698038Sache * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2798038Sache * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2898038Sache * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2998038Sache * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3098038Sache * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3198038Sache * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3298038Sache * SUCH DAMAGE. 3398038Sache * 34131447Stjr * from: @(#) ieeefp.h 1.0 (Berkeley) 9/23/93 35131447Stjr * $FreeBSD: head/sys/amd64/include/ieeefp.h 144968 2005-04-12 23:12:00Z jhb $ 36131447Stjr */ 37131447Stjr 38131447Stjr/* 39131447Stjr * IEEE floating point type and constant definitions. 40131447Stjr */ 41131447Stjr 42131447Stjr#ifndef _MACHINE_IEEEFP_H_ 43131447Stjr#define _MACHINE_IEEEFP_H_ 44131447Stjr 45131447Stjr#ifndef _SYS_CDEFS_H_ 46131447Stjr#error this file needs sys/cdefs.h as a prerequisite 47131447Stjr#endif 48131447Stjr 49131447Stjr/* 50131447Stjr * FP rounding modes 51131447Stjr */ 52131447Stjrtypedef enum { 53131447Stjr FP_RN=0, /* round to nearest */ 54131447Stjr FP_RM, /* round down to minus infinity */ 55131447Stjr FP_RP, /* round up to plus infinity */ 56131447Stjr FP_RZ /* truncate */ 57131447Stjr} fp_rnd_t; 58131447Stjr 59131447Stjr/* 60131447Stjr * FP precision modes 61131447Stjr */ 62131447Stjrtypedef enum { 63131447Stjr FP_PS=0, /* 24 bit (single-precision) */ 64131447Stjr FP_PRS, /* reserved */ 65131447Stjr FP_PD, /* 53 bit (double-precision) */ 66131447Stjr FP_PE /* 64 bit (extended-precision) */ 67131447Stjr} fp_prec_t; 68131447Stjr 69131447Stjr#define fp_except_t int 70131447Stjr 71131447Stjr/* 72131447Stjr * FP exception masks 73131447Stjr */ 74131447Stjr#define FP_X_INV 0x01 /* invalid operation */ 75131447Stjr#define FP_X_DNML 0x02 /* denormal */ 76131447Stjr#define FP_X_DZ 0x04 /* zero divide */ 77131447Stjr#define FP_X_OFL 0x08 /* overflow */ 78131447Stjr#define FP_X_UFL 0x10 /* underflow */ 79131447Stjr#define FP_X_IMP 0x20 /* (im)precision */ 80131447Stjr#define FP_X_STK 0x40 /* stack fault */ 81131447Stjr 82131447Stjr/* 8398038Sache * FP registers 8498038Sache */ 8598038Sache#define FP_MSKS_REG 0 /* exception masks */ 8698038Sache#define FP_PRC_REG 0 /* precision */ 8798038Sache#define FP_RND_REG 0 /* direction */ 88131447Stjr#define FP_STKY_REG 1 /* sticky flags */ 8998038Sache 9098038Sache/* 9198038Sache * FP register bit field masks 9298038Sache */ 9398038Sache#define FP_MSKS_FLD 0x3f /* exception masks field */ 9498038Sache#define FP_PRC_FLD 0x300 /* precision control field */ 9598038Sache#define FP_RND_FLD 0xc00 /* round control field */ 9698038Sache#define FP_STKY_FLD 0x3f /* sticky flags field */ 97131447Stjr 9898038Sache/* 9998038Sache * SSE mxcsr register bit field masks 10098038Sache */ 10198038Sache#define SSE_STKY_FLD 0x3f /* exception flags */ 10298038Sache#define SSE_DAZ_FLD 0x40 /* Denormals are zero */ 10398038Sache#define SSE_MSKS_FLD 0x1f80 /* exception masks field */ 10498038Sache#define SSE_RND_FLD 0x6000 /* rounding control */ 10598038Sache#define SSE_FZ_FLD 0x8000 /* flush to zero on underflow */ 10698038Sache 10798038Sache/* 10898038Sache * FP register bit field offsets 109131447Stjr */ 110131447Stjr#define FP_MSKS_OFF 0 /* exception masks offset */ 111131447Stjr#define FP_PRC_OFF 8 /* precision control offset */ 112131447Stjr#define FP_RND_OFF 10 /* round control offset */ 113131447Stjr#define FP_STKY_OFF 0 /* sticky flags offset */ 114131447Stjr 115131447Stjr/* 116131447Stjr * SSE mxcsr register bit field offsets 117131447Stjr */ 118131447Stjr#define SSE_STKY_OFF 0 /* exception flags offset */ 119131447Stjr#define SSE_DAZ_OFF 6 /* DAZ exception mask offset */ 120131447Stjr#define SSE_MSKS_OFF 7 /* other exception masks offset */ 121131447Stjr#define SSE_RND_OFF 13 /* rounding control offset */ 122131447Stjr#define SSE_FZ_OFF 15 /* flush to zero offset */ 123131447Stjr 124131447Stjr#if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE__) \ 125131447Stjr && !defined(__cplusplus) 126131447Stjr 127131447Stjr#define __fldenv(addr) __asm __volatile("fldenv %0" : : "m" (*(addr))) 128131447Stjr#define __fnstenv(addr) __asm __volatile("fnstenv %0" : "=m" (*(addr))) 129131447Stjr#define __fldcw(addr) __asm __volatile("fldcw %0" : : "m" (*(addr))) 130131447Stjr#define __fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 131131447Stjr#define __fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) 132131447Stjr#define __ldmxcsr(addr) __asm __volatile("ldmxcsr %0" : : "m" (*(addr))) 133131447Stjr#define __stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr))) 134131447Stjr 135131447Stjr/* 136131447Stjr * General notes about conflicting SSE vs FP status bits. 137131447Stjr * This code assumes that software will not fiddle with the control 138131447Stjr * bits of the SSE and x87 in such a way to get them out of sync and 139131447Stjr * still expect this to work. Break this at your peril. 140131447Stjr * Because I based this on the i386 port, the x87 state is used for 141131447Stjr * the fpget*() functions, and is shadowed into the SSE state for 142131447Stjr * the fpset*() functions. For dual source fpget*() functions, I 143131447Stjr * merge the two together. I think. 144131447Stjr */ 145131447Stjr 146131447Stjr/* Set rounding control */ 147131447Stjrstatic __inline__ fp_rnd_t 148131447Stjr__fpgetround(void) 149131447Stjr{ 150131447Stjr unsigned short _cw; 151131447Stjr 152131447Stjr __fnstcw(&_cw); 153131447Stjr return ((_cw & FP_RND_FLD) >> FP_RND_OFF); 154131447Stjr} 155131447Stjr 156131447Stjrstatic __inline__ fp_rnd_t 157131447Stjr__fpsetround(fp_rnd_t _m) 158131447Stjr{ 159131447Stjr unsigned short _cw; 160131447Stjr unsigned int _mxcsr; 161131447Stjr fp_rnd_t _p; 162131447Stjr 163131447Stjr __fnstcw(&_cw); 164131447Stjr _p = (_cw & FP_RND_FLD) >> FP_RND_OFF; 165131447Stjr _cw &= ~FP_RND_FLD; 166131447Stjr _cw |= (_m << FP_RND_OFF) & FP_RND_FLD; 167131447Stjr __fldcw(&_cw); 168131447Stjr __stmxcsr(&_mxcsr); 169131447Stjr _mxcsr &= ~SSE_RND_FLD; 170131447Stjr _mxcsr |= (_m << SSE_RND_OFF) & SSE_RND_FLD; 171131447Stjr __ldmxcsr(&_mxcsr); 172131447Stjr return (_p); 173131447Stjr} 174131447Stjr 175131447Stjr/* 176131447Stjr * Set precision for fadd/fsub/fsqrt etc x87 instructions 177131447Stjr * There is no equivalent SSE mode or control. 178131447Stjr */ 179131447Stjrstatic __inline__ fp_prec_t 180131447Stjr__fpgetprec(void) 181131447Stjr{ 182131447Stjr unsigned short _cw; 18398038Sache 18498038Sache __fnstcw(&_cw); 18598038Sache return ((_cw & FP_PRC_FLD) >> FP_PRC_OFF); 18698038Sache} 18798038Sache 18898038Sachestatic __inline__ fp_prec_t 18998038Sache__fpsetprec(fp_rnd_t _m) 19098038Sache{ 19198038Sache unsigned short _cw; 192131447Stjr fp_prec_t _p; 19398038Sache 19498038Sache __fnstcw(&_cw); 19598038Sache _p = (_cw & FP_PRC_FLD) >> FP_PRC_OFF; 19698038Sache _cw &= ~FP_PRC_FLD; 19798038Sache _cw |= (_m << FP_PRC_OFF) & FP_PRC_FLD; 19898038Sache __fldcw(&_cw); 19998038Sache return (_p); 20098038Sache} 201131447Stjr 20298038Sache/* 20398038Sache * Look at the exception masks 20498038Sache * Note that x87 masks are inverse of the fp*() functions 20598038Sache * API. ie: mask = 1 means disable for x87 and SSE, but 20698038Sache * for the fp*() api, mask = 1 means enabled. 20798038Sache */ 20898038Sachestatic __inline__ fp_except_t 20998038Sache__fpgetmask(void) 21098038Sache{ 21198038Sache unsigned short _cw; 21298038Sache 21398038Sache __fnstcw(&_cw); 21498038Sache return ((~_cw) & FP_MSKS_FLD); 215131447Stjr} 216131447Stjr 217131447Stjrstatic __inline__ fp_except_t 218131447Stjr__fpsetmask(fp_except_t _m) 219131447Stjr{ 220131447Stjr unsigned short _cw; 221131447Stjr unsigned int _mxcsr; 222131447Stjr fp_except_t _p; 223131447Stjr 224131447Stjr __fnstcw(&_cw); 225131447Stjr _p = (~_cw) & FP_MSKS_FLD; 226131447Stjr _cw &= ~FP_MSKS_FLD; 227131447Stjr _cw |= (~_m) & FP_MSKS_FLD; 228131447Stjr __fldcw(&_cw); 229131447Stjr __stmxcsr(&_mxcsr); 230131447Stjr /* XXX should we clear non-ieee SSE_DAZ_FLD and SSE_FZ_FLD ? */ 231131447Stjr _mxcsr &= ~SSE_MSKS_FLD; 232131447Stjr _mxcsr |= ((~_m) << SSE_MSKS_OFF) & SSE_MSKS_FLD; 233131447Stjr __ldmxcsr(&_mxcsr); 234131447Stjr return (_p); 235131447Stjr} 236131447Stjr 237131447Stjr/* See which sticky exceptions are pending, and reset them */ 238131447Stjrstatic __inline__ fp_except_t 239131447Stjr__fpgetsticky(void) 240131447Stjr{ 241131447Stjr unsigned short _sw; 242131447Stjr unsigned int _mxcsr; 243131447Stjr fp_except_t _ex; 244131447Stjr 245131447Stjr __fnstsw(&_sw); 246131447Stjr _ex = _sw & FP_STKY_FLD; 247131447Stjr __stmxcsr(&_mxcsr); 248131447Stjr _ex |= _mxcsr & SSE_STKY_FLD; 249131447Stjr return (_ex); 250131447Stjr} 251131447Stjr 252131447Stjr#endif /* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE__ && !__cplusplus */ 253131447Stjr 254131447Stjr#if !defined(__IEEEFP_NOINLINES__) && !defined(__cplusplus) \ 255131447Stjr && defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE__) 256131447Stjr 257131447Stjr#define fpgetround() __fpgetround() 258131447Stjr#define fpsetround(_m) __fpsetround(_m) 259131447Stjr#define fpgetprec() __fpgetprec() 260131447Stjr#define fpsetprec(_m) __fpsetprec(_m) 261131447Stjr#define fpgetmask() __fpgetmask() 262131447Stjr#define fpsetmask(_m) __fpsetmask(_m) 263131447Stjr#define fpgetsticky() __fpgetsticky() 264131447Stjr 265131447Stjr/* Suppress prototypes in the MI header. */ 266131447Stjr#define _IEEEFP_INLINED_ 1 267131447Stjr 268131447Stjr#else /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUCLIKE_ASM 269131447Stjr && __CC_SUPPORTS___INLINE__ */ 270131447Stjr 271131447Stjr/* Augment the userland declarations */ 272131447Stjr__BEGIN_DECLS 273131447Stjrextern fp_prec_t fpgetprec(void); 274131447Stjrextern fp_prec_t fpsetprec(fp_prec_t); 275131447Stjr__END_DECLS 276131447Stjr 277131447Stjr#endif /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUCLIKE_ASM 278131447Stjr && __CC_SUPPORTS___INLINE__ */ 279131447Stjr 280131447Stjr#endif /* !_MACHINE_IEEEFP_H_ */ 281131447Stjr