1/* $NetBSD: fenv.c,v 1.3.8.1 2012/08/12 18:53:11 martin Exp $ */
2
3/*-
4 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__RCSID("$NetBSD: fenv.c,v 1.3.8.1 2012/08/12 18:53:11 martin Exp $");
31
32#include <sys/param.h>
33#include <sys/sysctl.h>
34#include <assert.h>
35#include <fenv.h>
36#include <stddef.h>
37#include <string.h>
38
39/* Load x87 Control Word */
40#define	__fldcw(__cw)		__asm__ __volatile__	\
41	("fldcw %0" : : "m" (__cw))
42
43/* No-Wait Store Control Word */
44#define	__fnstcw(__cw)		__asm__ __volatile__	\
45	("fnstcw %0" : "=m" (*(__cw)))
46
47/* No-Wait Store Status Word */
48#define	__fnstsw(__sw)		__asm__ __volatile__	\
49	("fnstsw %0" : "=am" (*(__sw)))
50
51/* No-Wait Clear Exception Flags */
52#define	__fnclex()		__asm__ __volatile__	\
53	("fnclex")
54
55/* Load x87 Environment */
56#define	__fldenv(__env)		__asm__ __volatile__	\
57	("fldenv %0" : : "m" (__env))
58
59/* No-Wait Store x87 environment */
60#define	__fnstenv(__env)	__asm__ __volatile__	\
61	("fnstenv %0" : "=m" (*(__env)))
62
63/* Check for and handle pending unmasked x87 pending FPU exceptions */
64#define	__fwait(__env)		__asm__	__volatile__	\
65	("fwait")
66
67/* Load the MXCSR register */
68#define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
69	("ldmxcsr %0" : : "m" (__mxcsr))
70
71/* Store the MXCSR register state */
72#define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
73	("stmxcsr %0" : "=m" (*(__mxcsr)))
74
75/*
76 * The following constant represents the default floating-point environment
77 * (that is, the one installed at program startup) and has type pointer to
78 * const-qualified fenv_t.
79 *
80 * It can be used as an argument to the functions within the <fenv.h> header
81 * that manage the floating-point environment, namely fesetenv() and
82 * feupdateenv().
83 *
84 * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
85 * RESERVED. We provide a partial floating-point environment, where we
86 * define only the lower bits. The reserved bits are extracted and set by the
87 * consumers of FE_DFL_ENV, during runtime.
88 */
89fenv_t __fe_dfl_env = {
90	{
91		__NetBSD_NPXCW__,       /* Control word register */
92		0x0,			/* Unused */
93		0x0000,                 /* Status word register */
94		0x0,			/* Unused */
95		0x0000ffff,             /* Tag word register */
96		0x0,			/* Unused */
97		{
98			0x0000, 0x0000,
99			0x0000, 0xffff
100		}
101	},
102	__INITIAL_MXCSR__		/* MXCSR register */
103};
104
105/*
106 * Test for SSE support on this processor.
107 *
108 * We need to use ldmxcsr/stmxcsr to get correct results if any part
109 * of the program was compiled to use SSE floating-point, but we can't
110 * use SSE on older processors.
111 *
112 * In order to do so, we need to query the processor capabilities via the CPUID
113 * instruction. We can make it even simpler though, by querying the machdep.sse
114 * sysctl.
115 */
116static int __HAS_SSE = 0;
117
118static void __test_sse(void) __attribute__ ((constructor));
119
120static void __test_sse(void)
121{
122	size_t oldlen = sizeof(__HAS_SSE);
123	int rv;
124
125	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
126	if (rv == -1)
127		__HAS_SSE = 0;
128}
129
130/*
131 * The feclearexcept() function clears the supported floating-point exceptions
132 * represented by `excepts'.
133 */
134int
135feclearexcept(int excepts)
136{
137	fenv_t env;
138	uint32_t mxcsr;
139	int ex;
140
141	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
142
143	ex = excepts & FE_ALL_EXCEPT;
144
145	/* It's ~3x faster to call fnclex, than store/load fp env */
146	if (ex == FE_ALL_EXCEPT) {
147		__fnclex();
148	} else {
149		__fnstenv(&env);
150		env.x87.status &= ~ex;
151		__fldenv(env);
152	}
153
154	if (__HAS_SSE) {
155		__stmxcsr(&mxcsr);
156		mxcsr &= ~ex;
157		__ldmxcsr(mxcsr);
158	}
159
160	/* Success */
161	return (0);
162}
163
164/*
165 * The fegetexceptflag() function stores an implementation-defined
166 * representation of the states of the floating-point status flags indicated by
167 * the argument excepts in the object pointed to by the argument flagp.
168 */
169int
170fegetexceptflag(fexcept_t *flagp, int excepts)
171{
172	uint32_t mxcsr;
173	uint16_t status;
174	int ex;
175
176	_DIAGASSERT(flagp != NULL);
177	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
178
179	ex = excepts & FE_ALL_EXCEPT;
180
181	__fnstsw(&status);
182	if (__HAS_SSE)
183		__stmxcsr(&mxcsr);
184	else
185		mxcsr = 0;
186
187	*flagp = (mxcsr | status) & ex;
188
189	/* Success */
190	return (0);
191}
192
193/*
194 * The feraiseexcept() function raises the supported floating-point exceptions
195 * represented by the argument `excepts'.
196 *
197 * The standard explicitly allows us to execute an instruction that has the
198 * exception as a side effect, but we choose to manipulate the status register
199 * directly.
200 *
201 * The validation of input is being deferred to fesetexceptflag().
202 */
203int
204feraiseexcept(int excepts)
205{
206	fexcept_t ex;
207
208	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
209
210	ex = excepts & FE_ALL_EXCEPT;
211	fesetexceptflag(&ex, excepts);
212	__fwait();
213
214	/* Success */
215	return (0);
216}
217
218/*
219 * This function sets the floating-point status flags indicated by the argument
220 * `excepts' to the states stored in the object pointed to by `flagp'. It does
221 * NOT raise any floating-point exceptions, but only sets the state of the flags.
222 */
223int
224fesetexceptflag(const fexcept_t *flagp, int excepts)
225{
226	fenv_t env;
227	uint32_t mxcsr;
228	int ex;
229
230	_DIAGASSERT(flagp != NULL);
231	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
232
233	ex = excepts & FE_ALL_EXCEPT;
234
235	__fnstenv(&env);
236	env.x87.status &= ~ex;
237	env.x87.status |= *flagp & ex;
238	__fldenv(env);
239
240	if (__HAS_SSE) {
241		__stmxcsr(&mxcsr);
242		mxcsr &= ~ex;
243		mxcsr |= *flagp & ex;
244		__ldmxcsr(mxcsr);
245	}
246
247	/* Success */
248	return (0);
249}
250
251/*
252 * The fetestexcept() function determines which of a specified subset of the
253 * floating-point exception flags are currently set. The `excepts' argument
254 * specifies the floating-point status flags to be queried.
255 */
256int
257fetestexcept(int excepts)
258{
259	uint32_t mxcsr;
260	uint16_t status;
261	int ex;
262
263	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
264
265	ex = excepts & FE_ALL_EXCEPT;
266
267	__fnstsw(&status);
268	if (__HAS_SSE)
269		__stmxcsr(&mxcsr);
270	else
271		mxcsr = 0;
272
273	return ((status | mxcsr) & ex);
274}
275
276int
277fegetround(void)
278{
279	uint16_t control;
280
281	/*
282	 * We assume that the x87 and the SSE unit agree on the
283	 * rounding mode.  Reading the control word on the x87 turns
284	 * out to be about 5 times faster than reading it on the SSE
285	 * unit on an Opteron 244.
286	 */
287	__fnstcw(&control);
288
289	return (control & __X87_ROUND_MASK);
290}
291
292/*
293 * The fesetround() function shall establish the rounding direction represented
294 * by its argument round. If the argument is not equal to the value of a
295 * rounding direction macro, the rounding direction is not changed.
296 */
297int
298fesetround(int round)
299{
300	uint32_t mxcsr;
301	uint16_t control;
302
303	if (round & ~__X87_ROUND_MASK) {
304		/* Failure */
305		return (-1);
306	}
307
308	__fnstcw(&control);
309	control &= ~__X87_ROUND_MASK;
310	control |= round;
311	__fldcw(control);
312
313	if (__HAS_SSE) {
314		__stmxcsr(&mxcsr);
315		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
316		mxcsr |= round << __SSE_ROUND_SHIFT;
317		__ldmxcsr(mxcsr);
318	}
319
320	/* Success */
321	return (0);
322}
323
324/*
325 * The fegetenv() function attempts to store the current floating-point
326 * environment in the object pointed to by envp.
327 */
328int
329fegetenv(fenv_t *envp)
330{
331	uint32_t mxcsr;
332
333	_DIAGASSERT(flagp != NULL);
334
335	/*
336	 * fnstenv masks all exceptions, so we need to restore the old control
337	 * word to avoid this side effect.
338	 */
339	__fnstenv(envp);
340	__fldcw(envp->x87.control);
341	if (__HAS_SSE) {
342		__stmxcsr(&mxcsr);
343		envp->mxcsr = mxcsr;
344	}
345
346	/* Success */
347	return (0);
348}
349
350/*
351 * The feholdexcept() function saves the current floating-point environment in
352 * the object pointed to by envp, clears the floating-point status flags, and
353 * then installs a non-stop (continue on floating-point exceptions) mode, if
354 * available, for all floating-point exceptions.
355 */
356int
357feholdexcept(fenv_t *envp)
358{
359	uint32_t mxcsr;
360
361	_DIAGASSERT(envp != NULL);
362
363	__fnstenv(envp);
364	__fnclex();
365	if (__HAS_SSE) {
366		__stmxcsr(&mxcsr);
367		envp->mxcsr = mxcsr;
368		mxcsr &= ~FE_ALL_EXCEPT;
369		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
370		__ldmxcsr(mxcsr);
371	}
372
373	/* Success */
374	return (0);
375}
376
377/*
378 * The fesetenv() function attempts to establish the floating-point environment
379 * represented by the object pointed to by envp. The argument `envp' points
380 * to an object set by a call to fegetenv() or feholdexcept(), or equal a
381 * floating-point environment macro. The fesetenv() function does not raise
382 * floating-point exceptions, but only installs the state of the floating-point
383 * status flags represented through its argument.
384 */
385int
386fesetenv(const fenv_t *envp)
387{
388	fenv_t env;
389
390	_DIAGASSERT(envp != NULL);
391
392	/* Store the x87 floating-point environment */
393	memset(&env, 0, sizeof(env));
394	__fnstenv(&env);
395
396	__fe_dfl_env.x87.unused1 = env.x87.unused1;
397	__fe_dfl_env.x87.unused2 = env.x87.unused2;
398	__fe_dfl_env.x87.unused3 = env.x87.unused3;
399	memcpy(__fe_dfl_env.x87.others,
400	       env.x87.others,
401	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
402
403	__fldenv(envp->x87);
404	if (__HAS_SSE)
405		__ldmxcsr(envp->mxcsr);
406
407	/* Success */
408	return (0);
409}
410
411/*
412 * The feupdateenv() function saves the currently raised floating-point
413 * exceptions in its automatic storage, installs the floating-point environment
414 * represented by the object pointed to by `envp', and then raises the saved
415 * floating-point exceptions. The argument `envp' shall point to an object set
416 * by a call to feholdexcept() or fegetenv(), or equal a floating-point
417 * environment macro.
418 */
419int
420feupdateenv(const fenv_t *envp)
421{
422	fenv_t env;
423	uint32_t mxcsr;
424	uint16_t status;
425
426	_DIAGASSERT(envp != NULL);
427
428	/* Store the x87 floating-point environment */
429	memset(&env, 0, sizeof(env));
430	__fnstenv(&env);
431
432	__fe_dfl_env.x87.unused1 = env.x87.unused1;
433	__fe_dfl_env.x87.unused2 = env.x87.unused2;
434	__fe_dfl_env.x87.unused3 = env.x87.unused3;
435	memcpy(__fe_dfl_env.x87.others,
436	       env.x87.others,
437	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
438
439	__fnstsw(&status);
440	if (__HAS_SSE)
441		__stmxcsr(&mxcsr);
442	else
443		mxcsr = 0;
444	fesetenv(envp);
445	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
446
447	/* Success */
448	return (0);
449}
450
451/*
452 * The following functions are extentions to the standard
453 */
454int
455feenableexcept(int mask)
456{
457	uint32_t mxcsr, omask;
458	uint16_t control;
459
460	mask &= FE_ALL_EXCEPT;
461	__fnstcw(&control);
462	if (__HAS_SSE)
463		__stmxcsr(&mxcsr);
464	else
465		mxcsr = 0;
466
467	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
468	control &= ~mask;
469	__fldcw(control);
470	if (__HAS_SSE) {
471		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
472		__ldmxcsr(mxcsr);
473	}
474
475	return (FE_ALL_EXCEPT & ~omask);
476}
477
478int
479fedisableexcept(int mask)
480{
481	uint32_t mxcsr, omask;
482	uint16_t control;
483
484	mask &= FE_ALL_EXCEPT;
485	__fnstcw(&control);
486	if (__HAS_SSE)
487		__stmxcsr(&mxcsr);
488	else
489		mxcsr = 0;
490
491	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
492	control |= mask;
493	__fldcw(control);
494	if (__HAS_SSE) {
495		mxcsr |= mask << __SSE_EMASK_SHIFT;
496		__ldmxcsr(mxcsr);
497	}
498
499	return (FE_ALL_EXCEPT & ~omask);
500}
501
502int
503fegetexcept(void)
504{
505	uint16_t control;
506
507	/*
508	 * We assume that the masks for the x87 and the SSE unit are
509	 * the same.
510	 */
511	__fnstcw(&control);
512
513	return (~control & FE_ALL_EXCEPT);
514}
515