ieeefp.h revision 117863
11834Swollman/*- 21834Swollman * Copyright (c) 1990 Andrew Moore, Talke Studio 31834Swollman * All rights reserved. 41834Swollman * 51834Swollman * Redistribution and use in source and binary forms, with or without 61834Swollman * modification, are permitted provided that the following conditions 71834Swollman * are met: 81834Swollman * 1. Redistributions of source code must retain the above copyright 91834Swollman * notice, this list of conditions and the following disclaimer. 101834Swollman * 2. Redistributions in binary form must reproduce the above copyright 111834Swollman * notice, this list of conditions and the following disclaimer in the 121834Swollman * documentation and/or other materials provided with the distribution. 131834Swollman * 3. All advertising materials mentioning features or use of this software 141834Swollman * must display the following acknowledgement: 151834Swollman * This product includes software developed by the University of 161834Swollman * California, Berkeley and its contributors. 171834Swollman * 4. Neither the name of the University nor the names of its contributors 181834Swollman * may be used to endorse or promote products derived from this software 191834Swollman * without specific prior written permission. 201834Swollman * 211834Swollman * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221834Swollman * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231834Swollman * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241834Swollman * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251834Swollman * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261834Swollman * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271834Swollman * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281834Swollman * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291834Swollman * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301834Swollman * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311834Swollman * SUCH DAMAGE. 321834Swollman * 331834Swollman * from: @(#) ieeefp.h 1.0 (Berkeley) 9/23/93 3450477Speter * $FreeBSD: head/sys/amd64/include/ieeefp.h 117863 2003-07-22 06:44:54Z peter $ 351834Swollman */ 361834Swollman 371834Swollman/* 381834Swollman * IEEE floating point type and constant definitions. 391834Swollman */ 401834Swollman 411862Swollman#ifndef _MACHINE_IEEEFP_H_ 421862Swollman#define _MACHINE_IEEEFP_H_ 431834Swollman 441834Swollman/* 451834Swollman * FP rounding modes 461834Swollman */ 471834Swollmantypedef enum { 481834Swollman FP_RN=0, /* round to nearest */ 491834Swollman FP_RM, /* round down to minus infinity */ 501834Swollman FP_RP, /* round up to plus infinity */ 511834Swollman FP_RZ /* truncate */ 521834Swollman} fp_rnd_t; 531834Swollman 541834Swollman/* 5513765Smpp * FP precision modes 561834Swollman */ 571834Swollmantypedef enum { 5813765Smpp FP_PS=0, /* 24 bit (single-precision) */ 591834Swollman FP_PRS, /* reserved */ 601834Swollman FP_PD, /* 53 bit (double-precision) */ 6113765Smpp FP_PE /* 64 bit (extended-precision) */ 621834Swollman} fp_prec_t; 631834Swollman 641834Swollman#define fp_except_t int 651834Swollman 661834Swollman/* 671834Swollman * FP exception masks 681834Swollman */ 691834Swollman#define FP_X_INV 0x01 /* invalid operation */ 701834Swollman#define FP_X_DNML 0x02 /* denormal */ 711834Swollman#define FP_X_DZ 0x04 /* zero divide */ 721834Swollman#define FP_X_OFL 0x08 /* overflow */ 731834Swollman#define FP_X_UFL 0x10 /* underflow */ 741834Swollman#define FP_X_IMP 0x20 /* (im)precision */ 7549081Scracauer#define FP_X_STK 0x40 /* stack fault */ 761834Swollman 771834Swollman/* 781834Swollman * FP registers 791834Swollman */ 801834Swollman#define FP_MSKS_REG 0 /* exception masks */ 811834Swollman#define FP_PRC_REG 0 /* precision */ 821834Swollman#define FP_RND_REG 0 /* direction */ 831834Swollman#define FP_STKY_REG 1 /* sticky flags */ 841834Swollman 851834Swollman/* 861834Swollman * FP register bit field masks 871834Swollman */ 881834Swollman#define FP_MSKS_FLD 0x3f /* exception masks field */ 891834Swollman#define FP_PRC_FLD 0x300 /* precision control field */ 901834Swollman#define FP_RND_FLD 0xc00 /* round control field */ 911834Swollman#define FP_STKY_FLD 0x3f /* sticky flags field */ 921834Swollman 931834Swollman/* 94117863Speter * SSE mxcsr register bit field masks 95117863Speter */ 96117863Speter#define SSE_STKY_FLD 0x3f /* exception flags */ 97117863Speter#define SSE_DAZ_FLD 0x40 /* Denormals are zero */ 98117863Speter#define SSE_MSKS_FLD 0x1f80 /* exception masks field */ 99117863Speter#define SSE_RND_FLD 0x6000 /* rounding control */ 100117863Speter#define SSE_FZ_FLD 0x8000 /* flush to zero on underflow */ 101117863Speter 102117863Speter/* 1031834Swollman * FP register bit field offsets 1041834Swollman */ 1051834Swollman#define FP_MSKS_OFF 0 /* exception masks offset */ 1061834Swollman#define FP_PRC_OFF 8 /* precision control offset */ 1071834Swollman#define FP_RND_OFF 10 /* round control offset */ 1081834Swollman#define FP_STKY_OFF 0 /* sticky flags offset */ 1091834Swollman 110117863Speter/* 111117863Speter * SSE mxcsr register bit field offsets 112117863Speter */ 113117863Speter#define SSE_STKY_OFF 0 /* exception flags offset */ 114117863Speter#define SSE_DAZ_OFF 6 /* DAZ exception mask offset */ 115117863Speter#define SSE_MSKS_OFF 7 /* other exception masks offset */ 116117863Speter#define SSE_RND_OFF 13 /* rounding control offset */ 117117863Speter#define SSE_FZ_OFF 15 /* flush to zero offset */ 118109520Smarcel 119117863Speter#if defined(__GNUC__) && !defined(__cplusplus) 120117863Speter 121109520Smarcel#define __fldenv(addr) __asm __volatile("fldenv %0" : : "m" (*(addr))) 122109520Smarcel#define __fnstenv(addr) __asm __volatile("fnstenv %0" : "=m" (*(addr))) 123117863Speter#define __fldcw(addr) __asm __volatile("fldcw %0" : "=m" (*(addr))) 124109520Smarcel#define __fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 125109520Smarcel#define __fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) 126117863Speter#define __ldmxcsr(addr) __asm __volatile("ldmxcsr %0" : "=m" (*(addr))) 127117863Speter#define __stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr))) 128109520Smarcel 129109520Smarcel/* 130117863Speter * General notes about conflicting SSE vs FP status bits. 131117863Speter * This code assumes that software will not fiddle with the control 132117863Speter * bits of the SSE and x87 in such a way to get them out of sync and 133117863Speter * still expect this to work. Break this at your peril. 134117863Speter * Because I based this on the i386 port, the x87 state is used for 135117863Speter * the fpget*() functions, and is shadowed into the SSE state for 136117863Speter * the fpset*() functions. For dual source fpget*() functions, I 137117863Speter * merge the two together. I think. 138109520Smarcel */ 139117863Speter 140117863Speter/* Set rounding control */ 141117863Speterstatic __inline__ fp_rnd_t 142117863Speter__fpgetround(void) 143109520Smarcel{ 144117863Speter unsigned short _cw; 145109520Smarcel 146117863Speter __fnstcw(&_cw); 147117863Speter return ((_cw & FP_RND_FLD) >> FP_RND_OFF); 148109520Smarcel} 149109520Smarcel 150117863Speterstatic __inline__ fp_rnd_t 151117863Speter__fpsetround(fp_rnd_t _m) 152117863Speter{ 153117863Speter unsigned short _cw; 154117863Speter unsigned int _mxcsr; 155117863Speter fp_rnd_t _p; 156117863Speter 157117863Speter __fnstcw(&_cw); 158117863Speter _p = (_cw & FP_RND_FLD) >> FP_RND_OFF; 159117863Speter _cw &= ~FP_RND_FLD; 160117863Speter _cw |= (_m << FP_RND_OFF) & FP_RND_FLD; 161117863Speter __fldcw(&_cw); 162117863Speter __stmxcsr(&_mxcsr); 163117863Speter _mxcsr &= ~SSE_RND_FLD; 164117863Speter _mxcsr |= (_m << SSE_RND_OFF) & SSE_RND_FLD; 165117863Speter __ldmxcsr(&_mxcsr); 166117863Speter return (_p); 167117863Speter} 168117863Speter 169109520Smarcel/* 170117863Speter * Set precision for fadd/fsub/fsqrt etc x87 instructions 171117863Speter * There is no equivalent SSE mode or control. 172109520Smarcel */ 173117863Speterstatic __inline__ fp_prec_t 174117863Speter__fpgetprec(void) 175109520Smarcel{ 176117863Speter unsigned short _cw; 177117863Speter 178117863Speter __fnstcw(&_cw); 179117863Speter return ((_cw & FP_PRC_FLD) >> FP_PRC_OFF); 180117863Speter} 181117863Speter 182117863Speterstatic __inline__ fp_prec_t 183117863Speter__fpsetprec(fp_rnd_t _m) 184117863Speter{ 185117863Speter unsigned short _cw; 186117863Speter fp_prec_t _p; 187117863Speter 188117863Speter __fnstcw(&_cw); 189117863Speter _p = (_cw & FP_PRC_FLD) >> FP_PRC_OFF; 190117863Speter _cw &= ~FP_PRC_FLD; 191117863Speter _cw |= (_m << FP_PRC_OFF) & FP_PRC_FLD; 192117863Speter __fldcw(&_cw); 193117863Speter return (_p); 194117863Speter} 195117863Speter 196117863Speter/* 197117863Speter * Look at the exception masks 198117863Speter * Note that x87 masks are inverse of the fp*() functions 199117863Speter * API. ie: mask = 1 means disable for x87 and SSE, but 200117863Speter * for the fp*() api, mask = 1 means enabled. 201117863Speter */ 202117863Speterstatic __inline__ fp_except_t 203117863Speter__fpgetmask(void) 204117863Speter{ 205117863Speter unsigned short _cw; 206117863Speter 207117863Speter __fnstcw(&_cw); 208117863Speter return ((~_cw) & FP_MSKS_FLD); 209117863Speter} 210117863Speter 211117863Speterstatic __inline__ fp_except_t 212117863Speter__fpsetmask(fp_except_t _m) 213117863Speter{ 214117863Speter unsigned short _cw; 215117863Speter unsigned int _mxcsr; 216117863Speter fp_except_t _p; 217117863Speter 218117863Speter __fnstcw(&_cw); 219117863Speter _p = (~_cw) & FP_MSKS_FLD; 220117863Speter _cw &= ~FP_MSKS_FLD; 221117863Speter _cw |= (~_m) & FP_MSKS_FLD; 222117863Speter __fldcw(&_cw); 223117863Speter __stmxcsr(&_mxcsr); 224117863Speter /* XXX should we clear non-ieee SSE_DAZ_FLD and SSE_FZ_FLD ? */ 225117863Speter _mxcsr &= ~SSE_MSKS_FLD; 226117863Speter _mxcsr |= ((~_m) << SSE_MSKS_OFF) & SSE_MSKS_FLD; 227117863Speter __ldmxcsr(&_mxcsr); 228117863Speter return (_p); 229117863Speter} 230117863Speter 231117863Speter/* See which sticky exceptions are pending, and reset them */ 232117863Speterstatic __inline__ fp_except_t 233117863Speter__fpgetsticky(void) 234117863Speter{ 235117863Speter unsigned short _sw; 236117863Speter unsigned int _mxcsr; 237117863Speter fp_except_t _ex; 238117863Speter 239117863Speter __fnstsw(&_sw); 240117863Speter _ex = _sw & FP_STKY_FLD; 241117863Speter __stmxcsr(&_mxcsr); 242117863Speter _ex |= _mxcsr & SSE_STKY_FLD; 243117863Speter return (_ex); 244117863Speter} 245117863Speter 246117863Speter/* Note that this should really be called fpresetsticky() */ 247117863Speterstatic __inline__ fp_except_t 248117863Speter__fpsetsticky(fp_except_t _m) 249117863Speter{ 250109520Smarcel unsigned _env[7]; 251117863Speter unsigned int _mxcsr; 252117863Speter fp_except_t _p; 253109520Smarcel 254109520Smarcel __fnstenv(_env); 255117863Speter _p = _env[FP_STKY_REG] & _m; 256117863Speter __stmxcsr(&_mxcsr); 257117863Speter _p |= _mxcsr & SSE_STKY_FLD; 258117863Speter _env[FP_STKY_REG] &= ~_m; 259109520Smarcel __fldenv(_env); 260117863Speter _mxcsr &= ~_m; 261117863Speter __ldmxcsr(&_mxcsr); 262117863Speter return (_p); 263109520Smarcel} 264109520Smarcel 265117863Speter#endif /* __GNUC__ && !__cplusplus */ 266109520Smarcel 267117863Speter#if !defined(__IEEEFP_NOINLINES__) && !defined(__cplusplus) && defined(__GNUC__) 268109520Smarcel 269117863Speter#define fpsetround() __fpsetround() 270117863Speter#define fpgetround(_m) __fpgetround(_m) 271117863Speter#define fpgetprec() __fpgetprec() 272117863Speter#define fpsetprec(_m) __fpsetprec(_m) 273117863Speter#define fpgetmask() __fpgetmask() 274117863Speter#define fpsetmask(_m) __fpsetmask(_m) 275117863Speter#define fpgetsticky() __fpgetsticky() 276117863Speter#define fpsetsticky(_m) __fpsetsticky(_m) 277117863Speter 278109520Smarcel/* Suppress prototypes in the MI header. */ 279109520Smarcel#define _IEEEFP_INLINED_ 1 280109520Smarcel 281117863Speter#else /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUC__ */ 282117863Speter 283117863Speter/* Augment the userland declarations */ 284117863Speter__BEGIN_DECLS 285117863Speterextern fp_prec_t fpgetprec(void); 286117863Speterextern fp_prec_t fpsetprec(fp_prec_t); 287117863Speter__END_DECLS 288117863Speter 289117863Speter#endif /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUC__ */ 290117863Speter 2911862Swollman#endif /* !_MACHINE_IEEEFP_H_ */ 292