171345Sgshapiro/* $NetBSD: fenv.c,v 1.10 2021/09/03 21:54:59 andvar Exp $ */
271345Sgshapiro
394334Sgshapiro/*-
471345Sgshapiro * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
571345Sgshapiro * All rights reserved.
671345Sgshapiro *
771345Sgshapiro * Redistribution and use in source and binary forms, with or without
871345Sgshapiro * modification, are permitted provided that the following conditions
971345Sgshapiro * are met:
1071345Sgshapiro * 1. Redistributions of source code must retain the above copyright
1171345Sgshapiro *    notice, this list of conditions and the following disclaimer.
1271345Sgshapiro * 2. Redistributions in binary form must reproduce the above copyright
1371345Sgshapiro *    notice, this list of conditions and the following disclaimer in the
1471345Sgshapiro *    documentation and/or other materials provided with the distribution.
1571345Sgshapiro *
1671345Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1771345Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1871345Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1971345Sgshapiro * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2071345Sgshapiro * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2171345Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2271345Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2371345Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2471345Sgshapiro * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2571345Sgshapiro * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2671345Sgshapiro * SUCH DAMAGE.
2771345Sgshapiro */
2871345Sgshapiro
2971345Sgshapiro#include <sys/cdefs.h>
3094334Sgshapiro__RCSID("$NetBSD: fenv.c,v 1.10 2021/09/03 21:54:59 andvar Exp $");
3171345Sgshapiro
3271345Sgshapiro#include "namespace.h"
3371345Sgshapiro
3471345Sgshapiro#include <sys/param.h>
3571345Sgshapiro#include <sys/sysctl.h>
3671345Sgshapiro#include <assert.h>
3771345Sgshapiro#include <fenv.h>
3894334Sgshapiro#include <stddef.h>
3971345Sgshapiro#include <string.h>
4071345Sgshapiro
4171345Sgshapiro#ifdef __weak_alias
4271345Sgshapiro__weak_alias(feclearexcept,_feclearexcept)
4371345Sgshapiro__weak_alias(fedisableexcept,_fedisableexcept)
4471345Sgshapiro__weak_alias(feenableexcept,_feenableexcept)
4571345Sgshapiro__weak_alias(fegetenv,_fegetenv)
4671345Sgshapiro__weak_alias(fegetexcept,_fegetexcept)
4771345Sgshapiro__weak_alias(fegetexceptflag,_fegetexceptflag)
4873188Sgshapiro__weak_alias(fegetround,_fegetround)
4973188Sgshapiro__weak_alias(feholdexcept,_feholdexcept)
5073188Sgshapiro__weak_alias(feraiseexcept,_feraiseexcept)
5173188Sgshapiro__weak_alias(fesetenv,_fesetenv)
5271345Sgshapiro__weak_alias(fesetexceptflag,_fesetexceptflag)
5394334Sgshapiro__weak_alias(fesetround,_fesetround)
5494334Sgshapiro__weak_alias(fetestexcept,_fetestexcept)
5594334Sgshapiro__weak_alias(feupdateenv,_feupdateenv)
5671345Sgshapiro#endif
5771345Sgshapiro
5871345Sgshapiro/* Load x87 Control Word */
5971345Sgshapiro#define	__fldcw(__cw)		__asm__ __volatile__	\
6071345Sgshapiro	("fldcw %0" : : "m" (__cw))
6171345Sgshapiro
6271345Sgshapiro/* No-Wait Store Control Word */
6371345Sgshapiro#define	__fnstcw(__cw)		__asm__ __volatile__	\
6471345Sgshapiro	("fnstcw %0" : "=m" (*(__cw)))
6571345Sgshapiro
6671345Sgshapiro/* No-Wait Store Status Word */
6771345Sgshapiro#define	__fnstsw(__sw)		__asm__ __volatile__	\
6871345Sgshapiro	("fnstsw %0" : "=am" (*(__sw)))
6971345Sgshapiro
7071345Sgshapiro/* No-Wait Clear Exception Flags */
7171345Sgshapiro#define	__fnclex()		__asm__ __volatile__	\
7271345Sgshapiro	("fnclex")
7371345Sgshapiro
7471345Sgshapiro/* Load x87 Environment */
7573188Sgshapiro#define	__fldenv(__env)		__asm__ __volatile__	\
7673188Sgshapiro	("fldenv %0" : : "m" (__env))
7773188Sgshapiro
7871345Sgshapiro/* No-Wait Store x87 environment */
7971345Sgshapiro#define	__fnstenv(__env)	__asm__ __volatile__	\
8071345Sgshapiro	("fnstenv %0" : "=m" (*(__env)))
8171345Sgshapiro
8271345Sgshapiro/* Check for and handle pending unmasked x87 pending FPU exceptions */
8373188Sgshapiro#define	__fwait(__env)		__asm__	__volatile__	\
8471345Sgshapiro	("fwait")
8571345Sgshapiro
8671345Sgshapiro/* Load the MXCSR register */
8771345Sgshapiro#define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
8871345Sgshapiro	("ldmxcsr %0" : : "m" (__mxcsr))
8973188Sgshapiro
9071345Sgshapiro/* Store the MXCSR register state */
9171345Sgshapiro#define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
9271345Sgshapiro	("stmxcsr %0" : "=m" (*(__mxcsr)))
9371345Sgshapiro
9471345Sgshapiro/*
9571345Sgshapiro * The following constant represents the default floating-point environment
9671345Sgshapiro * (that is, the one installed at program startup) and has type pointer to
9771345Sgshapiro * const-qualified fenv_t.
9871345Sgshapiro *
9971345Sgshapiro * It can be used as an argument to the functions within the <fenv.h> header
10071345Sgshapiro * that manage the floating-point environment, namely fesetenv() and
10173188Sgshapiro * feupdateenv().
10294334Sgshapiro *
10373188Sgshapiro * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
10471345Sgshapiro * RESERVED. We provide a partial floating-point environment, where we
10571345Sgshapiro * define only the lower bits. The reserved bits are extracted and set by the
10671345Sgshapiro * consumers of FE_DFL_ENV, during runtime.
10771345Sgshapiro */
10871345Sgshapirofenv_t __fe_dfl_env = {
10971345Sgshapiro	.x87 = {
11071345Sgshapiro		.control = __NetBSD_NPXCW__,    /* Control word register */
11171345Sgshapiro		.unused1 = 0,			/* Unused */
11271345Sgshapiro		.status = 0,  		     	/* Status word register */
11371345Sgshapiro		.unused2 = 0,			/* Unused */
11473188Sgshapiro		.tag = 0xffff,          	/* Tag word register */
11594334Sgshapiro		.unused3 = 0,			/* Unused */
11671345Sgshapiro		.others = {
11794334Sgshapiro			0, 0, 0, 0x0000ffff,
11894334Sgshapiro		}
11971345Sgshapiro	},
12071345Sgshapiro	.mxcsr = __INITIAL_MXCSR__		/* MXCSR register */
12171345Sgshapiro};
12271345Sgshapiro
12371345Sgshapiro/*
12471345Sgshapiro * Test for SSE support on this processor.
12571345Sgshapiro *
12671345Sgshapiro * We need to use ldmxcsr/stmxcsr to get correct results if any part
12771345Sgshapiro * of the program was compiled to use SSE floating-point, but we can't
12871345Sgshapiro * use SSE on older processors.
12971345Sgshapiro *
13071345Sgshapiro * In order to do so, we need to query the processor capabilities via the CPUID
13171345Sgshapiro * instruction. We can make it even simpler though, by querying the machdep.sse
13271345Sgshapiro * sysctl.
13371345Sgshapiro */
13471345Sgshapirostatic int __HAS_SSE = 0;
13571345Sgshapiro
13671345Sgshapirostatic void __init_libm(void) __attribute__ ((constructor, used));
13771345Sgshapiro
13871345Sgshapirostatic void __init_libm(void)
13971345Sgshapiro{
14071345Sgshapiro	size_t oldlen = sizeof(__HAS_SSE);
14171345Sgshapiro	int rv;
14273188Sgshapiro	uint16_t control;
14371345Sgshapiro
14471345Sgshapiro	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
14571345Sgshapiro	if (rv == -1)
14671345Sgshapiro		__HAS_SSE = 0;
14773188Sgshapiro
14871345Sgshapiro	__fnstcw(&control);
14971345Sgshapiro	__fe_dfl_env.x87.control = control;
15071345Sgshapiro}
15171345Sgshapiro
15271345Sgshapiro/*
15371345Sgshapiro * The feclearexcept() function clears the supported floating-point exceptions
15471345Sgshapiro * represented by `excepts'.
15571345Sgshapiro */
15673188Sgshapiroint
15773188Sgshapirofeclearexcept(int excepts)
15873188Sgshapiro{
15973188Sgshapiro	fenv_t env;
16073188Sgshapiro	uint32_t mxcsr;
16171345Sgshapiro	int ex;
16271345Sgshapiro
16371345Sgshapiro	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
16471345Sgshapiro
16571345Sgshapiro	ex = excepts & FE_ALL_EXCEPT;
16671345Sgshapiro
16794334Sgshapiro	/* It's ~3x faster to call fnclex, than store/load fp env */
16871345Sgshapiro	if (ex == FE_ALL_EXCEPT) {
16971345Sgshapiro		__fnclex();
17071345Sgshapiro	} else {
17171345Sgshapiro		__fnstenv(&env);
17271345Sgshapiro		env.x87.status &= ~ex;
17371345Sgshapiro		__fldenv(env);
17471345Sgshapiro	}
17571345Sgshapiro
17671345Sgshapiro	if (__HAS_SSE) {
17771345Sgshapiro		__stmxcsr(&mxcsr);
17871345Sgshapiro		mxcsr &= ~ex;
17971345Sgshapiro		__ldmxcsr(mxcsr);
18071345Sgshapiro	}
18171345Sgshapiro
18271345Sgshapiro	/* Success */
18371345Sgshapiro	return (0);
18471345Sgshapiro}
18571345Sgshapiro
18671345Sgshapiro/*
18771345Sgshapiro * The fegetexceptflag() function stores an implementation-defined
188 * representation of the states of the floating-point status flags indicated by
189 * the argument excepts in the object pointed to by the argument flagp.
190 */
191int
192fegetexceptflag(fexcept_t *flagp, int excepts)
193{
194	uint32_t mxcsr;
195	uint16_t status;
196	int ex;
197
198	_DIAGASSERT(flagp != NULL);
199	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
200
201	ex = excepts & FE_ALL_EXCEPT;
202
203	__fnstsw(&status);
204	if (__HAS_SSE)
205		__stmxcsr(&mxcsr);
206	else
207		mxcsr = 0;
208
209	*flagp = (mxcsr | status) & ex;
210
211	/* Success */
212	return (0);
213}
214
215/*
216 * The feraiseexcept() function raises the supported floating-point exceptions
217 * represented by the argument `excepts'.
218 *
219 * The standard explicitly allows us to execute an instruction that has the
220 * exception as a side effect, but we choose to manipulate the status register
221 * directly.
222 *
223 * The validation of input is being deferred to fesetexceptflag().
224 */
225int
226feraiseexcept(int excepts)
227{
228	fexcept_t ex;
229
230	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
231
232	ex = excepts & FE_ALL_EXCEPT;
233	fesetexceptflag(&ex, excepts);
234	__fwait();
235
236	/* Success */
237	return (0);
238}
239
240/*
241 * This function sets the floating-point status flags indicated by the argument
242 * `excepts' to the states stored in the object pointed to by `flagp'. It does
243 * NOT raise any floating-point exceptions, but only sets the state of the flags.
244 */
245int
246fesetexceptflag(const fexcept_t *flagp, int excepts)
247{
248	fenv_t env;
249	uint32_t mxcsr;
250	int ex;
251
252	_DIAGASSERT(flagp != NULL);
253	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
254
255	ex = excepts & FE_ALL_EXCEPT;
256
257	__fnstenv(&env);
258	env.x87.status &= ~ex;
259	env.x87.status |= *flagp & ex;
260	__fldenv(env);
261
262	if (__HAS_SSE) {
263		__stmxcsr(&mxcsr);
264		mxcsr &= ~ex;
265		mxcsr |= *flagp & ex;
266		__ldmxcsr(mxcsr);
267	}
268
269	/* Success */
270	return (0);
271}
272
273/*
274 * The fetestexcept() function determines which of a specified subset of the
275 * floating-point exception flags are currently set. The `excepts' argument
276 * specifies the floating-point status flags to be queried.
277 */
278int
279fetestexcept(int excepts)
280{
281	uint32_t mxcsr;
282	uint16_t status;
283	int ex;
284
285	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
286
287	ex = excepts & FE_ALL_EXCEPT;
288
289	__fnstsw(&status);
290	if (__HAS_SSE)
291		__stmxcsr(&mxcsr);
292	else
293		mxcsr = 0;
294
295	return ((status | mxcsr) & ex);
296}
297
298int
299fegetround(void)
300{
301	uint16_t control;
302
303	/*
304	 * We assume that the x87 and the SSE unit agree on the
305	 * rounding mode.  Reading the control word on the x87 turns
306	 * out to be about 5 times faster than reading it on the SSE
307	 * unit on an Opteron 244.
308	 */
309	__fnstcw(&control);
310
311	return (control & __X87_ROUND_MASK);
312}
313
314/*
315 * The fesetround() function shall establish the rounding direction represented
316 * by its argument round. If the argument is not equal to the value of a
317 * rounding direction macro, the rounding direction is not changed.
318 */
319int
320fesetround(int round)
321{
322	uint32_t mxcsr;
323	uint16_t control;
324
325	if (round & ~__X87_ROUND_MASK) {
326		/* Failure */
327		return (-1);
328	}
329
330	__fnstcw(&control);
331	control &= ~__X87_ROUND_MASK;
332	control |= round;
333	__fldcw(control);
334
335	if (__HAS_SSE) {
336		__stmxcsr(&mxcsr);
337		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
338		mxcsr |= round << __SSE_ROUND_SHIFT;
339		__ldmxcsr(mxcsr);
340	}
341
342	/* Success */
343	return (0);
344}
345
346/*
347 * The fegetenv() function attempts to store the current floating-point
348 * environment in the object pointed to by envp.
349 */
350int
351fegetenv(fenv_t *envp)
352{
353	uint32_t mxcsr;
354
355	_DIAGASSERT(flagp != NULL);
356
357	/*
358	 * fnstenv masks all exceptions, so we need to restore the old control
359	 * word to avoid this side effect.
360	 */
361	__fnstenv(envp);
362	__fldcw(envp->x87.control);
363	if (__HAS_SSE) {
364		__stmxcsr(&mxcsr);
365		envp->mxcsr = mxcsr;
366	}
367
368	/* Success */
369	return (0);
370}
371
372/*
373 * The feholdexcept() function saves the current floating-point environment in
374 * the object pointed to by envp, clears the floating-point status flags, and
375 * then installs a non-stop (continue on floating-point exceptions) mode, if
376 * available, for all floating-point exceptions.
377 */
378int
379feholdexcept(fenv_t *envp)
380{
381	uint32_t mxcsr;
382
383	_DIAGASSERT(envp != NULL);
384
385	__fnstenv(envp);
386	__fnclex();
387	if (__HAS_SSE) {
388		__stmxcsr(&mxcsr);
389		envp->mxcsr = mxcsr;
390		mxcsr &= ~FE_ALL_EXCEPT;
391		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
392		__ldmxcsr(mxcsr);
393	}
394
395	/* Success */
396	return (0);
397}
398
399/*
400 * The fesetenv() function attempts to establish the floating-point environment
401 * represented by the object pointed to by envp. The argument `envp' points
402 * to an object set by a call to fegetenv() or feholdexcept(), or equal a
403 * floating-point environment macro. The fesetenv() function does not raise
404 * floating-point exceptions, but only installs the state of the floating-point
405 * status flags represented through its argument.
406 */
407int
408fesetenv(const fenv_t *envp)
409{
410	fenv_t env;
411
412	_DIAGASSERT(envp != NULL);
413
414	/* Store the x87 floating-point environment */
415	memset(&env, 0, sizeof(env));
416	__fnstenv(&env);
417
418	__fe_dfl_env.x87.unused1 = env.x87.unused1;
419	__fe_dfl_env.x87.unused2 = env.x87.unused2;
420	__fe_dfl_env.x87.unused3 = env.x87.unused3;
421	memcpy(__fe_dfl_env.x87.others, env.x87.others,
422	    sizeof(__fe_dfl_env.x87.others));
423
424	__fldenv(envp->x87);
425	if (__HAS_SSE)
426		__ldmxcsr(envp->mxcsr);
427
428	/* Success */
429	return (0);
430}
431
432/*
433 * The feupdateenv() function saves the currently raised floating-point
434 * exceptions in its automatic storage, installs the floating-point environment
435 * represented by the object pointed to by `envp', and then raises the saved
436 * floating-point exceptions. The argument `envp' shall point to an object set
437 * by a call to feholdexcept() or fegetenv(), or equal a floating-point
438 * environment macro.
439 */
440int
441feupdateenv(const fenv_t *envp)
442{
443	fenv_t env;
444	uint32_t mxcsr;
445	uint16_t status;
446
447	_DIAGASSERT(envp != NULL);
448
449	/* Store the x87 floating-point environment */
450	memset(&env, 0, sizeof(env));
451	__fnstenv(&env);
452
453	__fe_dfl_env.x87.unused1 = env.x87.unused1;
454	__fe_dfl_env.x87.unused2 = env.x87.unused2;
455	__fe_dfl_env.x87.unused3 = env.x87.unused3;
456	memcpy(__fe_dfl_env.x87.others, env.x87.others,
457	    sizeof(__fe_dfl_env.x87.others));
458
459	__fnstsw(&status);
460	if (__HAS_SSE)
461		__stmxcsr(&mxcsr);
462	else
463		mxcsr = 0;
464	fesetenv(envp);
465	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
466
467	/* Success */
468	return (0);
469}
470
471/*
472 * The following functions are extensions to the standard
473 */
474int
475feenableexcept(int mask)
476{
477	uint32_t mxcsr, omask;
478	uint16_t control;
479
480	mask &= FE_ALL_EXCEPT;
481	__fnstcw(&control);
482	if (__HAS_SSE)
483		__stmxcsr(&mxcsr);
484	else
485		mxcsr = 0;
486
487	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
488	control &= ~mask;
489	__fldcw(control);
490	if (__HAS_SSE) {
491		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
492		__ldmxcsr(mxcsr);
493	}
494
495	return (FE_ALL_EXCEPT & ~omask);
496}
497
498int
499fedisableexcept(int mask)
500{
501	uint32_t mxcsr, omask;
502	uint16_t control;
503
504	mask &= FE_ALL_EXCEPT;
505	__fnstcw(&control);
506	if (__HAS_SSE)
507		__stmxcsr(&mxcsr);
508	else
509		mxcsr = 0;
510
511	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
512	control |= mask;
513	__fldcw(control);
514	if (__HAS_SSE) {
515		mxcsr |= mask << __SSE_EMASK_SHIFT;
516		__ldmxcsr(mxcsr);
517	}
518
519	return (FE_ALL_EXCEPT & ~omask);
520}
521
522int
523fegetexcept(void)
524{
525	uint16_t control;
526
527	/*
528	 * We assume that the masks for the x87 and the SSE unit are
529	 * the same.
530	 */
531	__fnstcw(&control);
532
533	return (~control & FE_ALL_EXCEPT);
534}
535