1130144Sdas/*-
2143708Sdas * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
3130144Sdas * All rights reserved.
4130144Sdas *
5130144Sdas * Redistribution and use in source and binary forms, with or without
6130144Sdas * modification, are permitted provided that the following conditions
7130144Sdas * are met:
8130144Sdas * 1. Redistributions of source code must retain the above copyright
9130144Sdas *    notice, this list of conditions and the following disclaimer.
10130144Sdas * 2. Redistributions in binary form must reproduce the above copyright
11130144Sdas *    notice, this list of conditions and the following disclaimer in the
12130144Sdas *    documentation and/or other materials provided with the distribution.
13130144Sdas *
14130144Sdas * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15130144Sdas * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16130144Sdas * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17130144Sdas * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18130144Sdas * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19130144Sdas * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20130144Sdas * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21130144Sdas * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22130144Sdas * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23130144Sdas * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24130144Sdas * SUCH DAMAGE.
25130144Sdas *
26130144Sdas * $FreeBSD$
27130144Sdas */
28130144Sdas
29130144Sdas#ifndef	_FENV_H_
30130144Sdas#define	_FENV_H_
31130144Sdas
32130144Sdas#include <sys/cdefs.h>
33130144Sdas#include <sys/_types.h>
34130144Sdas
35143769Sdas/*
36143769Sdas * To preserve binary compatibility with FreeBSD 5.3, we pack the
37143769Sdas * mxcsr into some reserved fields, rather than changing sizeof(fenv_t).
38143769Sdas */
39130144Sdastypedef struct {
40143769Sdas	__uint16_t	__control;
41143769Sdas	__uint16_t      __mxcsr_hi;
42143769Sdas	__uint16_t	__status;
43143769Sdas	__uint16_t      __mxcsr_lo;
44130144Sdas	__uint32_t	__tag;
45130144Sdas	char		__other[16];
46130144Sdas} fenv_t;
47130144Sdas
48143769Sdas#define	__get_mxcsr(env)	(((env).__mxcsr_hi << 16) |	\
49143769Sdas				 ((env).__mxcsr_lo))
50143769Sdas#define	__set_mxcsr(env, x)	do {				\
51143769Sdas	(env).__mxcsr_hi = (__uint32_t)(x) >> 16;		\
52143769Sdas	(env).__mxcsr_lo = (__uint16_t)(x);			\
53143769Sdas} while (0)
54143769Sdas
55130144Sdastypedef	__uint16_t	fexcept_t;
56130144Sdas
57130144Sdas/* Exception flags */
58130144Sdas#define	FE_INVALID	0x01
59130144Sdas#define	FE_DENORMAL	0x02
60130144Sdas#define	FE_DIVBYZERO	0x04
61130144Sdas#define	FE_OVERFLOW	0x08
62130144Sdas#define	FE_UNDERFLOW	0x10
63130144Sdas#define	FE_INEXACT	0x20
64130144Sdas#define	FE_ALL_EXCEPT	(FE_DIVBYZERO | FE_DENORMAL | FE_INEXACT | \
65130144Sdas			 FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
66130144Sdas
67130144Sdas/* Rounding modes */
68130144Sdas#define	FE_TONEAREST	0x0000
69130144Sdas#define	FE_DOWNWARD	0x0400
70130144Sdas#define	FE_UPWARD	0x0800
71130144Sdas#define	FE_TOWARDZERO	0x0c00
72130144Sdas#define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
73130144Sdas			 FE_UPWARD | FE_TOWARDZERO)
74130144Sdas
75143769Sdas/*
76143769Sdas * As compared to the x87 control word, the SSE unit's control word
77143769Sdas * has the rounding control bits offset by 3 and the exception mask
78143769Sdas * bits offset by 7.
79143769Sdas */
80143769Sdas#define	_SSE_ROUND_SHIFT	3
81143769Sdas#define	_SSE_EMASK_SHIFT	7
82143769Sdas
83163358Sbde__BEGIN_DECLS
84163358Sbde
85143769Sdas/* After testing for SSE support once, we cache the result in __has_sse. */
86143769Sdasenum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
87143769Sdasextern enum __sse_support __has_sse;
88143769Sdasint __test_sse(void);
89143769Sdas#ifdef __SSE__
90143769Sdas#define	__HAS_SSE()	1
91143769Sdas#else
92143769Sdas#define	__HAS_SSE()	(__has_sse == __SSE_YES ||			\
93143769Sdas			 (__has_sse == __SSE_UNK && __test_sse()))
94143769Sdas#endif
95143769Sdas
96130144Sdas/* Default floating-point environment */
97130144Sdasextern const fenv_t	__fe_dfl_env;
98130144Sdas#define	FE_DFL_ENV	(&__fe_dfl_env)
99130144Sdas
100130144Sdas#define	__fldcw(__cw)		__asm __volatile("fldcw %0" : : "m" (__cw))
101130144Sdas#define	__fldenv(__env)		__asm __volatile("fldenv %0" : : "m" (__env))
102165841Sdas#define	__fldenvx(__env)	__asm __volatile("fldenv %0" : : "m" (__env)  \
103165841Sdas				: "st", "st(1)", "st(2)", "st(3)", "st(4)",   \
104165841Sdas				"st(5)", "st(6)", "st(7)")
105130144Sdas#define	__fnclex()		__asm __volatile("fnclex")
106140219Sdas#define	__fnstenv(__env)	__asm __volatile("fnstenv %0" : "=m" (*(__env)))
107140219Sdas#define	__fnstcw(__cw)		__asm __volatile("fnstcw %0" : "=m" (*(__cw)))
108140219Sdas#define	__fnstsw(__sw)		__asm __volatile("fnstsw %0" : "=am" (*(__sw)))
109130144Sdas#define	__fwait()		__asm __volatile("fwait")
110143769Sdas#define	__ldmxcsr(__csr)	__asm __volatile("ldmxcsr %0" : : "m" (__csr))
111143769Sdas#define	__stmxcsr(__csr)	__asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
112130144Sdas
113130144Sdasstatic __inline int
114130144Sdasfeclearexcept(int __excepts)
115130144Sdas{
116130144Sdas	fenv_t __env;
117203441Skib	__uint32_t __mxcsr;
118130144Sdas
119130144Sdas	if (__excepts == FE_ALL_EXCEPT) {
120130144Sdas		__fnclex();
121130144Sdas	} else {
122130144Sdas		__fnstenv(&__env);
123130144Sdas		__env.__status &= ~__excepts;
124130144Sdas		__fldenv(__env);
125130144Sdas	}
126143769Sdas	if (__HAS_SSE()) {
127143769Sdas		__stmxcsr(&__mxcsr);
128143769Sdas		__mxcsr &= ~__excepts;
129143769Sdas		__ldmxcsr(__mxcsr);
130143769Sdas	}
131130144Sdas	return (0);
132130144Sdas}
133130144Sdas
134130144Sdasstatic __inline int
135130144Sdasfegetexceptflag(fexcept_t *__flagp, int __excepts)
136130144Sdas{
137203441Skib	__uint32_t __mxcsr;
138203441Skib	__uint16_t __status;
139130144Sdas
140130144Sdas	__fnstsw(&__status);
141143769Sdas	if (__HAS_SSE())
142143769Sdas		__stmxcsr(&__mxcsr);
143143769Sdas	else
144143769Sdas		__mxcsr = 0;
145143769Sdas	*__flagp = (__mxcsr | __status) & __excepts;
146130144Sdas	return (0);
147130144Sdas}
148130144Sdas
149143769Sdasint fesetexceptflag(const fexcept_t *__flagp, int __excepts);
150143769Sdasint feraiseexcept(int __excepts);
151130144Sdas
152130144Sdasstatic __inline int
153130144Sdasfetestexcept(int __excepts)
154130144Sdas{
155203441Skib	__uint32_t __mxcsr;
156203441Skib	__uint16_t __status;
157130144Sdas
158130144Sdas	__fnstsw(&__status);
159143769Sdas	if (__HAS_SSE())
160143769Sdas		__stmxcsr(&__mxcsr);
161143769Sdas	else
162143769Sdas		__mxcsr = 0;
163143769Sdas	return ((__status | __mxcsr) & __excepts);
164130144Sdas}
165130144Sdas
166130144Sdasstatic __inline int
167130144Sdasfegetround(void)
168130144Sdas{
169203441Skib	__uint16_t __control;
170130144Sdas
171143769Sdas	/*
172143769Sdas	 * We assume that the x87 and the SSE unit agree on the
173143769Sdas	 * rounding mode.  Reading the control word on the x87 turns
174143769Sdas	 * out to be about 5 times faster than reading it on the SSE
175143769Sdas	 * unit on an Opteron 244.
176143769Sdas	 */
177130144Sdas	__fnstcw(&__control);
178130144Sdas	return (__control & _ROUND_MASK);
179130144Sdas}
180130144Sdas
181130144Sdasstatic __inline int
182130144Sdasfesetround(int __round)
183130144Sdas{
184203441Skib	__uint32_t __mxcsr;
185203441Skib	__uint16_t __control;
186130144Sdas
187130144Sdas	if (__round & ~_ROUND_MASK)
188130144Sdas		return (-1);
189143769Sdas
190130144Sdas	__fnstcw(&__control);
191130144Sdas	__control &= ~_ROUND_MASK;
192130144Sdas	__control |= __round;
193130144Sdas	__fldcw(__control);
194130144Sdas
195143769Sdas	if (__HAS_SSE()) {
196143769Sdas		__stmxcsr(&__mxcsr);
197143769Sdas		__mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
198143769Sdas		__mxcsr |= __round << _SSE_ROUND_SHIFT;
199143769Sdas		__ldmxcsr(__mxcsr);
200143769Sdas	}
201130144Sdas
202130144Sdas	return (0);
203130144Sdas}
204130144Sdas
205143769Sdasint fegetenv(fenv_t *__envp);
206143769Sdasint feholdexcept(fenv_t *__envp);
207130144Sdas
208130144Sdasstatic __inline int
209130144Sdasfesetenv(const fenv_t *__envp)
210130144Sdas{
211143769Sdas	fenv_t __env = *__envp;
212203441Skib	__uint32_t __mxcsr;
213130144Sdas
214143769Sdas	__mxcsr = __get_mxcsr(__env);
215143769Sdas	__set_mxcsr(__env, 0xffffffff);
216165841Sdas	/*
217165841Sdas	 * XXX Using fldenvx() instead of fldenv() tells the compiler that this
218165841Sdas	 * instruction clobbers the i387 register stack.  This happens because
219165841Sdas	 * we restore the tag word from the saved environment.  Normally, this
220165841Sdas	 * would happen anyway and we wouldn't care, because the ABI allows
221165841Sdas	 * function calls to clobber the i387 regs.  However, fesetenv() is
222165841Sdas	 * inlined, so we need to be more careful.
223165841Sdas	 */
224165841Sdas	__fldenvx(__env);
225143769Sdas	if (__HAS_SSE())
226143769Sdas		__ldmxcsr(__mxcsr);
227130144Sdas	return (0);
228130144Sdas}
229130144Sdas
230143769Sdasint feupdateenv(const fenv_t *__envp);
231130144Sdas
232130144Sdas#if __BSD_VISIBLE
233130144Sdas
234143769Sdasint feenableexcept(int __mask);
235143769Sdasint fedisableexcept(int __mask);
236130144Sdas
237130144Sdasstatic __inline int
238143708Sdasfegetexcept(void)
239143708Sdas{
240203441Skib	__uint16_t __control;
241143708Sdas
242143769Sdas	/*
243143769Sdas	 * We assume that the masks for the x87 and the SSE unit are
244143769Sdas	 * the same.
245143769Sdas	 */
246143708Sdas	__fnstcw(&__control);
247143708Sdas	return (~__control & FE_ALL_EXCEPT);
248143708Sdas}
249143708Sdas
250130144Sdas#endif /* __BSD_VISIBLE */
251130144Sdas
252130144Sdas__END_DECLS
253130144Sdas
254130144Sdas#endif	/* !_FENV_H_ */
255