1/*	$OpenBSD: fenv.c,v 1.7 2022/12/27 17:10:07 jmc Exp $	*/
2/*	$NetBSD: fenv.c,v 1.3 2010/08/01 06:34:38 taca Exp $	*/
3
4/*-
5 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/types.h>
31#include <sys/time.h>
32#include <sys/sysctl.h>
33#include <machine/cpu.h>
34#include <machine/npx.h>
35
36#include <fenv.h>
37
38/*
39 * The following constant represents the default floating-point environment
40 * (that is, the one installed at program startup) and has type pointer to
41 * const-qualified fenv_t.
42 *
43 * It can be used as an argument to the functions within the <fenv.h> header
44 * that manage the floating-point environment, namely fesetenv() and
45 * feupdateenv().
46 *
47 * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
48 * RESERVED.
49 */
50fenv_t __fe_dfl_env = {
51	{
52		0xffff0000 | __INITIAL_NPXCW__,	/* Control word register */
53		0xffff0000,			/* Status word register */
54		0xffffffff,			/* Tag word register */
55		{
56			0x00000000,
57			0x00000000,
58			0x00000000,
59			0xffff0000
60		}
61	},
62	__INITIAL_MXCSR__		/* MXCSR register */
63};
64
65/*
66 * Test for SSE support on this processor.
67 *
68 * We need to use ldmxcsr/stmxcsr to get correct results if any part
69 * of the program was compiled to use SSE floating-point, but we can't
70 * use SSE on older processors.
71 *
72 * In order to do so, we need to query the processor capabilities via the CPUID
73 * instruction. We can make it even simpler though, by querying the machdep.sse
74 * sysctl.
75 */
76static int __HAS_SSE = 0;
77
78static void __test_sse(void) __attribute__ ((constructor));
79
80static void __test_sse(void)
81{
82	size_t oldlen = sizeof(__HAS_SSE);
83	int mib[2] = { CTL_MACHDEP, CPU_SSE };
84	int rv;
85
86	rv = sysctl(mib, 2, &__HAS_SSE, &oldlen, NULL, 0);
87	if (rv == -1)
88		__HAS_SSE = 0;
89}
90
91/*
92 * The feclearexcept() function clears the supported floating-point exceptions
93 * represented by `excepts'.
94 */
95int
96feclearexcept(int excepts)
97{
98	fenv_t fenv;
99	unsigned int mxcsr;
100
101	excepts &= FE_ALL_EXCEPT;
102
103	/* Store the current x87 floating-point environment */
104	__asm__ volatile ("fnstenv %0" : "=m" (fenv));
105
106	/* Clear the requested floating-point exceptions */
107	fenv.__x87.__status &= ~excepts;
108
109	/* Load the x87 floating-point environment */
110	__asm__ volatile ("fldenv %0" : : "m" (fenv));
111
112	/* Same for SSE environment */
113	if (__HAS_SSE) {
114		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
115		mxcsr &= ~excepts;
116		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
117	}
118
119	return (0);
120}
121DEF_STD(feclearexcept);
122
123/*
124 * The fegetexceptflag() function stores an implementation-defined
125 * representation of the states of the floating-point status flags indicated by
126 * the argument excepts in the object pointed to by the argument flagp.
127 */
128int
129fegetexceptflag(fexcept_t *flagp, int excepts)
130{
131	unsigned short status;
132	unsigned int mxcsr = 0;
133
134	excepts &= FE_ALL_EXCEPT;
135
136	/* Store the current x87 status register */
137	__asm__ volatile ("fnstsw %0" : "=am" (status));
138
139	/* Store the MXCSR register */
140	if (__HAS_SSE)
141		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
142
143	/* Store the results in flagp */
144	*flagp = (status | mxcsr) & excepts;
145
146	return (0);
147}
148
149/*
150 * The feraiseexcept() function raises the supported floating-point exceptions
151 * represented by the argument `excepts'.
152 *
153 * The standard explicitly allows us to execute an instruction that has the
154 * exception as a side effect, but we choose to manipulate the status register
155 * directly.
156 *
157 * The validation of input is being deferred to fesetexceptflag().
158 */
159int
160feraiseexcept(int excepts)
161{
162	excepts &= FE_ALL_EXCEPT;
163
164	fesetexceptflag((fexcept_t *)&excepts, excepts);
165	__asm__ volatile ("fwait");
166
167	return (0);
168}
169DEF_STD(feraiseexcept);
170
171/*
172 * This function sets the floating-point status flags indicated by the argument
173 * `excepts' to the states stored in the object pointed to by `flagp'. It does
174 * NOT raise any floating-point exceptions, but only sets the state of the flags.
175 */
176int
177fesetexceptflag(const fexcept_t *flagp, int excepts)
178{
179	fenv_t fenv;
180	unsigned int mxcsr;
181
182	excepts &= FE_ALL_EXCEPT;
183
184	/* Store the current x87 floating-point environment */
185	__asm__ volatile ("fnstenv %0" : "=m" (fenv));
186
187	/* Set the requested status flags */
188	fenv.__x87.__status &= ~excepts;
189	fenv.__x87.__status |= *flagp & excepts;
190
191	/* Load the x87 floating-point environment */
192	__asm__ volatile ("fldenv %0" : : "m" (fenv));
193
194	/* Same for SSE environment */
195	if (__HAS_SSE) {
196		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
197		mxcsr &= ~excepts;
198		mxcsr |= *flagp & excepts;
199		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
200	}
201
202	return (0);
203}
204DEF_STD(fesetexceptflag);
205
206/*
207 * The fetestexcept() function determines which of a specified subset of the
208 * floating-point exception flags are currently set. The `excepts' argument
209 * specifies the floating-point status flags to be queried.
210 */
211int
212fetestexcept(int excepts)
213{
214	unsigned short status;
215	unsigned int mxcsr = 0;
216
217	excepts &= FE_ALL_EXCEPT;
218
219	/* Store the current x87 status register */
220	__asm__ volatile ("fnstsw %0" : "=am" (status));
221
222	/* Store the MXCSR register state */
223	if (__HAS_SSE)
224		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
225
226	return ((status | mxcsr) & excepts);
227}
228DEF_STD(fetestexcept);
229
230/*
231 * The fegetround() function gets the current rounding direction.
232 */
233int
234fegetround(void)
235{
236	unsigned short control;
237
238	/*
239	 * We assume that the x87 and the SSE unit agree on the
240	 * rounding mode.  Reading the control word on the x87 turns
241	 * out to be about 5 times faster than reading it on the SSE
242	 * unit on an Opteron 244.
243	 */
244	__asm__ volatile ("fnstcw %0" : "=m" (control));
245
246	return (control & _X87_ROUND_MASK);
247}
248DEF_STD(fegetround);
249
250/*
251 * The fesetround() function establishes the rounding direction represented by
252 * its argument `round'. If the argument is not equal to the value of a rounding
253 * direction macro, the rounding direction is not changed.
254 */
255int
256fesetround(int round)
257{
258	unsigned short control;
259	unsigned int mxcsr;
260
261	/* Check whether requested rounding direction is supported */
262	if (round & ~_X87_ROUND_MASK)
263		return (-1);
264
265	/* Store the current x87 control word register */
266	__asm__ volatile ("fnstcw %0" : "=m" (control));
267
268	/* Set the rounding direction */
269	control &= ~_X87_ROUND_MASK;
270	control |= round;
271
272	/* Load the x87 control word register */
273	__asm__ volatile ("fldcw %0" : : "m" (control));
274
275	/* Same for the SSE environment */
276	if (__HAS_SSE) {
277		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
278		mxcsr &= ~(_X87_ROUND_MASK << _SSE_ROUND_SHIFT);
279		mxcsr |= round << _SSE_ROUND_SHIFT;
280		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
281	}
282
283	return (0);
284}
285DEF_STD(fesetround);
286
287/*
288 * The fegetenv() function attempts to store the current floating-point
289 * environment in the object pointed to by envp.
290 */
291int
292fegetenv(fenv_t *envp)
293{
294	/* Store the current x87 floating-point environment */
295	__asm__ volatile ("fnstenv %0" : "=m" (*envp));
296
297	/* Store the MXCSR register state */
298	if (__HAS_SSE)
299		__asm__ volatile ("stmxcsr %0" : "=m" (envp->__mxcsr));
300
301	/*
302	 * When an FNSTENV instruction is executed, all pending exceptions are
303	 * essentially lost (either the x87 FPU status register is cleared or
304	 * all exceptions are masked).
305	 *
306	 * 8.6 X87 FPU EXCEPTION SYNCHRONIZATION -
307	 * Intel(R) 64 and IA-32 Architectures Softare Developer's Manual - Vol1
308	 */
309	__asm__ volatile ("fldcw %0" : : "m" (envp->__x87.__control));
310
311	return (0);
312}
313DEF_STD(fegetenv);
314
315/*
316 * The feholdexcept() function saves the current floating-point environment
317 * in the object pointed to by envp, clears the floating-point status flags, and
318 * then installs a non-stop (continue on floating-point exceptions) mode, if
319 * available, for all floating-point exceptions.
320 */
321int
322feholdexcept(fenv_t *envp)
323{
324	unsigned int mxcsr;
325
326	/* Store the current x87 floating-point environment */
327	__asm__ volatile ("fnstenv %0" : "=m" (*envp));
328
329	/* Clear all exception flags in FPU */
330	__asm__ volatile ("fnclex");
331
332	if (__HAS_SSE) {
333		/* Store the MXCSR register state */
334		__asm__ volatile ("stmxcsr %0" : "=m" (envp->__mxcsr));
335
336		/* Clear exception flags in MXCSR */
337		mxcsr = envp->__mxcsr;
338		mxcsr &= ~FE_ALL_EXCEPT;
339
340		/* Mask all exceptions */
341		mxcsr |= FE_ALL_EXCEPT << _SSE_MASK_SHIFT;
342
343		/* Store the MXCSR register */
344		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
345	}
346
347	return (0);
348}
349DEF_STD(feholdexcept);
350
351/*
352 * The fesetenv() function attempts to establish the floating-point environment
353 * represented by the object pointed to by envp. The argument `envp' points
354 * to an object set by a call to fegetenv() or feholdexcept(), or equal a
355 * floating-point environment macro. The fesetenv() function does not raise
356 * floating-point exceptions, but only installs the state of the floating-point
357 * status flags represented through its argument.
358 */
359int
360fesetenv(const fenv_t *envp)
361{
362	/* Load the x87 floating-point environment */
363	__asm__ volatile ("fldenv %0" : : "m" (*envp));
364
365	/* Store the MXCSR register */
366	if (__HAS_SSE)
367		__asm__ volatile ("ldmxcsr %0" : : "m" (envp->__mxcsr));
368
369	return (0);
370}
371DEF_STD(fesetenv);
372
373/*
374 * The feupdateenv() function saves the currently raised floating-point
375 * exceptions in its automatic storage, installs the floating-point environment
376 * represented by the object pointed to by `envp', and then raises the saved
377 * floating-point exceptions. The argument `envp' shall point to an object set
378 * by a call to feholdexcept() or fegetenv(), or equal a floating-point
379 * environment macro.
380 */
381int
382feupdateenv(const fenv_t *envp)
383{
384	unsigned short status;
385	unsigned int mxcsr = 0;
386
387	/* Store the x87 status register */
388	__asm__ volatile ("fnstsw %0" : "=am" (status));
389
390	/* Store the MXCSR register */
391	if (__HAS_SSE)
392		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
393
394	/* Install new floating-point environment */
395	fesetenv(envp);
396
397	/* Raise any previously accumulated exceptions */
398	feraiseexcept(status | mxcsr);
399
400	return (0);
401}
402DEF_STD(feupdateenv);
403
404/*
405 * The following functions are extensions to the standard
406 */
407int
408feenableexcept(int mask)
409{
410	unsigned int mxcsr = 0, omask;
411	unsigned short control;
412
413	mask &= FE_ALL_EXCEPT;
414
415	__asm__ volatile ("fnstcw %0" : "=m" (control));
416	if (__HAS_SSE)
417		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
418
419	omask = ~(control | (mxcsr >> _SSE_MASK_SHIFT)) & FE_ALL_EXCEPT;
420	control &= ~mask;
421	__asm__ volatile ("fldcw %0" : : "m" (control));
422
423	if (__HAS_SSE) {
424		mxcsr &= ~(mask << _SSE_MASK_SHIFT);
425		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
426	}
427
428	return (omask);
429}
430
431int
432fedisableexcept(int mask)
433{
434	unsigned int mxcsr = 0, omask;
435	unsigned short control;
436
437	mask &= FE_ALL_EXCEPT;
438
439	__asm__ volatile ("fnstcw %0" : "=m" (control));
440	if (__HAS_SSE)
441		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
442
443	omask = ~(control | (mxcsr >> _SSE_MASK_SHIFT)) & FE_ALL_EXCEPT;
444	control |= mask;
445	__asm__ volatile ("fldcw %0" : : "m" (control));
446
447	if (__HAS_SSE) {
448		mxcsr |= mask << _SSE_MASK_SHIFT;
449		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
450	}
451
452	return (omask);
453}
454
455int
456fegetexcept(void)
457{
458	unsigned short control;
459
460	/*
461	 * We assume that the masks for the x87 and the SSE unit are
462	 * the same.
463	 */
464	__asm__ volatile ("fnstcw %0" : "=m" (control));
465
466	return (~control & FE_ALL_EXCEPT);
467}
468