1/* $NetBSD: fenv.c,v 1.10 2021/09/03 21:54:59 andvar Exp $ */
2
3/*-
4 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__RCSID("$NetBSD: fenv.c,v 1.10 2021/09/03 21:54:59 andvar Exp $");
31
32#include "namespace.h"
33
34#include <sys/param.h>
35#include <sys/sysctl.h>
36#include <assert.h>
37#include <fenv.h>
38#include <stddef.h>
39#include <string.h>
40
41#ifdef __weak_alias
42__weak_alias(feclearexcept,_feclearexcept)
43__weak_alias(fedisableexcept,_fedisableexcept)
44__weak_alias(feenableexcept,_feenableexcept)
45__weak_alias(fegetenv,_fegetenv)
46__weak_alias(fegetexcept,_fegetexcept)
47__weak_alias(fegetexceptflag,_fegetexceptflag)
48__weak_alias(fegetround,_fegetround)
49__weak_alias(feholdexcept,_feholdexcept)
50__weak_alias(feraiseexcept,_feraiseexcept)
51__weak_alias(fesetenv,_fesetenv)
52__weak_alias(fesetexceptflag,_fesetexceptflag)
53__weak_alias(fesetround,_fesetround)
54__weak_alias(fetestexcept,_fetestexcept)
55__weak_alias(feupdateenv,_feupdateenv)
56#endif
57
58/* Load x87 Control Word */
59#define	__fldcw(__cw)		__asm__ __volatile__	\
60	("fldcw %0" : : "m" (__cw))
61
62/* No-Wait Store Control Word */
63#define	__fnstcw(__cw)		__asm__ __volatile__	\
64	("fnstcw %0" : "=m" (*(__cw)))
65
66/* No-Wait Store Status Word */
67#define	__fnstsw(__sw)		__asm__ __volatile__	\
68	("fnstsw %0" : "=am" (*(__sw)))
69
70/* No-Wait Clear Exception Flags */
71#define	__fnclex()		__asm__ __volatile__	\
72	("fnclex")
73
74/* Load x87 Environment */
75#define	__fldenv(__env)		__asm__ __volatile__	\
76	("fldenv %0" : : "m" (__env))
77
78/* No-Wait Store x87 environment */
79#define	__fnstenv(__env)	__asm__ __volatile__	\
80	("fnstenv %0" : "=m" (*(__env)))
81
82/* Check for and handle pending unmasked x87 pending FPU exceptions */
83#define	__fwait(__env)		__asm__	__volatile__	\
84	("fwait")
85
86/* Load the MXCSR register */
87#define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
88	("ldmxcsr %0" : : "m" (__mxcsr))
89
90/* Store the MXCSR register state */
91#define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
92	("stmxcsr %0" : "=m" (*(__mxcsr)))
93
94/*
95 * The following constant represents the default floating-point environment
96 * (that is, the one installed at program startup) and has type pointer to
97 * const-qualified fenv_t.
98 *
99 * It can be used as an argument to the functions within the <fenv.h> header
100 * that manage the floating-point environment, namely fesetenv() and
101 * feupdateenv().
102 *
103 * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
104 * RESERVED. We provide a partial floating-point environment, where we
105 * define only the lower bits. The reserved bits are extracted and set by the
106 * consumers of FE_DFL_ENV, during runtime.
107 */
108fenv_t __fe_dfl_env = {
109	.x87 = {
110		.control = __NetBSD_NPXCW__,    /* Control word register */
111		.unused1 = 0,			/* Unused */
112		.status = 0,  		     	/* Status word register */
113		.unused2 = 0,			/* Unused */
114		.tag = 0xffff,          	/* Tag word register */
115		.unused3 = 0,			/* Unused */
116		.others = {
117			0, 0, 0, 0x0000ffff,
118		}
119	},
120	.mxcsr = __INITIAL_MXCSR__		/* MXCSR register */
121};
122
123/*
124 * Test for SSE support on this processor.
125 *
126 * We need to use ldmxcsr/stmxcsr to get correct results if any part
127 * of the program was compiled to use SSE floating-point, but we can't
128 * use SSE on older processors.
129 *
130 * In order to do so, we need to query the processor capabilities via the CPUID
131 * instruction. We can make it even simpler though, by querying the machdep.sse
132 * sysctl.
133 */
134static int __HAS_SSE = 0;
135
136static void __init_libm(void) __attribute__ ((constructor, used));
137
138static void __init_libm(void)
139{
140	size_t oldlen = sizeof(__HAS_SSE);
141	int rv;
142	uint16_t control;
143
144	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
145	if (rv == -1)
146		__HAS_SSE = 0;
147
148	__fnstcw(&control);
149	__fe_dfl_env.x87.control = control;
150}
151
152/*
153 * The feclearexcept() function clears the supported floating-point exceptions
154 * represented by `excepts'.
155 */
156int
157feclearexcept(int excepts)
158{
159	fenv_t env;
160	uint32_t mxcsr;
161	int ex;
162
163	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
164
165	ex = excepts & FE_ALL_EXCEPT;
166
167	/* It's ~3x faster to call fnclex, than store/load fp env */
168	if (ex == FE_ALL_EXCEPT) {
169		__fnclex();
170	} else {
171		__fnstenv(&env);
172		env.x87.status &= ~ex;
173		__fldenv(env);
174	}
175
176	if (__HAS_SSE) {
177		__stmxcsr(&mxcsr);
178		mxcsr &= ~ex;
179		__ldmxcsr(mxcsr);
180	}
181
182	/* Success */
183	return (0);
184}
185
186/*
187 * The fegetexceptflag() function stores an implementation-defined
188 * representation of the states of the floating-point status flags indicated by
189 * the argument excepts in the object pointed to by the argument flagp.
190 */
191int
192fegetexceptflag(fexcept_t *flagp, int excepts)
193{
194	uint32_t mxcsr;
195	uint16_t status;
196	int ex;
197
198	_DIAGASSERT(flagp != NULL);
199	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
200
201	ex = excepts & FE_ALL_EXCEPT;
202
203	__fnstsw(&status);
204	if (__HAS_SSE)
205		__stmxcsr(&mxcsr);
206	else
207		mxcsr = 0;
208
209	*flagp = (mxcsr | status) & ex;
210
211	/* Success */
212	return (0);
213}
214
215/*
216 * The feraiseexcept() function raises the supported floating-point exceptions
217 * represented by the argument `excepts'.
218 *
219 * The standard explicitly allows us to execute an instruction that has the
220 * exception as a side effect, but we choose to manipulate the status register
221 * directly.
222 *
223 * The validation of input is being deferred to fesetexceptflag().
224 */
225int
226feraiseexcept(int excepts)
227{
228	fexcept_t ex;
229
230	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
231
232	ex = excepts & FE_ALL_EXCEPT;
233	fesetexceptflag(&ex, excepts);
234	__fwait();
235
236	/* Success */
237	return (0);
238}
239
240/*
241 * This function sets the floating-point status flags indicated by the argument
242 * `excepts' to the states stored in the object pointed to by `flagp'. It does
243 * NOT raise any floating-point exceptions, but only sets the state of the flags.
244 */
245int
246fesetexceptflag(const fexcept_t *flagp, int excepts)
247{
248	fenv_t env;
249	uint32_t mxcsr;
250	int ex;
251
252	_DIAGASSERT(flagp != NULL);
253	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
254
255	ex = excepts & FE_ALL_EXCEPT;
256
257	__fnstenv(&env);
258	env.x87.status &= ~ex;
259	env.x87.status |= *flagp & ex;
260	__fldenv(env);
261
262	if (__HAS_SSE) {
263		__stmxcsr(&mxcsr);
264		mxcsr &= ~ex;
265		mxcsr |= *flagp & ex;
266		__ldmxcsr(mxcsr);
267	}
268
269	/* Success */
270	return (0);
271}
272
273/*
274 * The fetestexcept() function determines which of a specified subset of the
275 * floating-point exception flags are currently set. The `excepts' argument
276 * specifies the floating-point status flags to be queried.
277 */
278int
279fetestexcept(int excepts)
280{
281	uint32_t mxcsr;
282	uint16_t status;
283	int ex;
284
285	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
286
287	ex = excepts & FE_ALL_EXCEPT;
288
289	__fnstsw(&status);
290	if (__HAS_SSE)
291		__stmxcsr(&mxcsr);
292	else
293		mxcsr = 0;
294
295	return ((status | mxcsr) & ex);
296}
297
298int
299fegetround(void)
300{
301	uint16_t control;
302
303	/*
304	 * We assume that the x87 and the SSE unit agree on the
305	 * rounding mode.  Reading the control word on the x87 turns
306	 * out to be about 5 times faster than reading it on the SSE
307	 * unit on an Opteron 244.
308	 */
309	__fnstcw(&control);
310
311	return (control & __X87_ROUND_MASK);
312}
313
314/*
315 * The fesetround() function shall establish the rounding direction represented
316 * by its argument round. If the argument is not equal to the value of a
317 * rounding direction macro, the rounding direction is not changed.
318 */
319int
320fesetround(int round)
321{
322	uint32_t mxcsr;
323	uint16_t control;
324
325	if (round & ~__X87_ROUND_MASK) {
326		/* Failure */
327		return (-1);
328	}
329
330	__fnstcw(&control);
331	control &= ~__X87_ROUND_MASK;
332	control |= round;
333	__fldcw(control);
334
335	if (__HAS_SSE) {
336		__stmxcsr(&mxcsr);
337		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
338		mxcsr |= round << __SSE_ROUND_SHIFT;
339		__ldmxcsr(mxcsr);
340	}
341
342	/* Success */
343	return (0);
344}
345
346/*
347 * The fegetenv() function attempts to store the current floating-point
348 * environment in the object pointed to by envp.
349 */
350int
351fegetenv(fenv_t *envp)
352{
353	uint32_t mxcsr;
354
355	_DIAGASSERT(flagp != NULL);
356
357	/*
358	 * fnstenv masks all exceptions, so we need to restore the old control
359	 * word to avoid this side effect.
360	 */
361	__fnstenv(envp);
362	__fldcw(envp->x87.control);
363	if (__HAS_SSE) {
364		__stmxcsr(&mxcsr);
365		envp->mxcsr = mxcsr;
366	}
367
368	/* Success */
369	return (0);
370}
371
372/*
373 * The feholdexcept() function saves the current floating-point environment in
374 * the object pointed to by envp, clears the floating-point status flags, and
375 * then installs a non-stop (continue on floating-point exceptions) mode, if
376 * available, for all floating-point exceptions.
377 */
378int
379feholdexcept(fenv_t *envp)
380{
381	uint32_t mxcsr;
382
383	_DIAGASSERT(envp != NULL);
384
385	__fnstenv(envp);
386	__fnclex();
387	if (__HAS_SSE) {
388		__stmxcsr(&mxcsr);
389		envp->mxcsr = mxcsr;
390		mxcsr &= ~FE_ALL_EXCEPT;
391		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
392		__ldmxcsr(mxcsr);
393	}
394
395	/* Success */
396	return (0);
397}
398
399/*
400 * The fesetenv() function attempts to establish the floating-point environment
401 * represented by the object pointed to by envp. The argument `envp' points
402 * to an object set by a call to fegetenv() or feholdexcept(), or equal a
403 * floating-point environment macro. The fesetenv() function does not raise
404 * floating-point exceptions, but only installs the state of the floating-point
405 * status flags represented through its argument.
406 */
407int
408fesetenv(const fenv_t *envp)
409{
410	fenv_t env;
411
412	_DIAGASSERT(envp != NULL);
413
414	/* Store the x87 floating-point environment */
415	memset(&env, 0, sizeof(env));
416	__fnstenv(&env);
417
418	__fe_dfl_env.x87.unused1 = env.x87.unused1;
419	__fe_dfl_env.x87.unused2 = env.x87.unused2;
420	__fe_dfl_env.x87.unused3 = env.x87.unused3;
421	memcpy(__fe_dfl_env.x87.others, env.x87.others,
422	    sizeof(__fe_dfl_env.x87.others));
423
424	__fldenv(envp->x87);
425	if (__HAS_SSE)
426		__ldmxcsr(envp->mxcsr);
427
428	/* Success */
429	return (0);
430}
431
432/*
433 * The feupdateenv() function saves the currently raised floating-point
434 * exceptions in its automatic storage, installs the floating-point environment
435 * represented by the object pointed to by `envp', and then raises the saved
436 * floating-point exceptions. The argument `envp' shall point to an object set
437 * by a call to feholdexcept() or fegetenv(), or equal a floating-point
438 * environment macro.
439 */
440int
441feupdateenv(const fenv_t *envp)
442{
443	fenv_t env;
444	uint32_t mxcsr;
445	uint16_t status;
446
447	_DIAGASSERT(envp != NULL);
448
449	/* Store the x87 floating-point environment */
450	memset(&env, 0, sizeof(env));
451	__fnstenv(&env);
452
453	__fe_dfl_env.x87.unused1 = env.x87.unused1;
454	__fe_dfl_env.x87.unused2 = env.x87.unused2;
455	__fe_dfl_env.x87.unused3 = env.x87.unused3;
456	memcpy(__fe_dfl_env.x87.others, env.x87.others,
457	    sizeof(__fe_dfl_env.x87.others));
458
459	__fnstsw(&status);
460	if (__HAS_SSE)
461		__stmxcsr(&mxcsr);
462	else
463		mxcsr = 0;
464	fesetenv(envp);
465	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
466
467	/* Success */
468	return (0);
469}
470
471/*
472 * The following functions are extensions to the standard
473 */
474int
475feenableexcept(int mask)
476{
477	uint32_t mxcsr, omask;
478	uint16_t control;
479
480	mask &= FE_ALL_EXCEPT;
481	__fnstcw(&control);
482	if (__HAS_SSE)
483		__stmxcsr(&mxcsr);
484	else
485		mxcsr = 0;
486
487	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
488	control &= ~mask;
489	__fldcw(control);
490	if (__HAS_SSE) {
491		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
492		__ldmxcsr(mxcsr);
493	}
494
495	return (FE_ALL_EXCEPT & ~omask);
496}
497
498int
499fedisableexcept(int mask)
500{
501	uint32_t mxcsr, omask;
502	uint16_t control;
503
504	mask &= FE_ALL_EXCEPT;
505	__fnstcw(&control);
506	if (__HAS_SSE)
507		__stmxcsr(&mxcsr);
508	else
509		mxcsr = 0;
510
511	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
512	control |= mask;
513	__fldcw(control);
514	if (__HAS_SSE) {
515		mxcsr |= mask << __SSE_EMASK_SHIFT;
516		__ldmxcsr(mxcsr);
517	}
518
519	return (FE_ALL_EXCEPT & ~omask);
520}
521
522int
523fegetexcept(void)
524{
525	uint16_t control;
526
527	/*
528	 * We assume that the masks for the x87 and the SSE unit are
529	 * the same.
530	 */
531	__fnstcw(&control);
532
533	return (~control & FE_ALL_EXCEPT);
534}
535