1/* $NetBSD: fenv.c,v 1.10 2021/09/03 21:54:59 andvar Exp $ */ 2 3/*- 4 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__RCSID("$NetBSD: fenv.c,v 1.10 2021/09/03 21:54:59 andvar Exp $"); 31 32#include "namespace.h" 33 34#include <sys/param.h> 35#include <sys/sysctl.h> 36#include <assert.h> 37#include <fenv.h> 38#include <stddef.h> 39#include <string.h> 40 41#ifdef __weak_alias 42__weak_alias(feclearexcept,_feclearexcept) 43__weak_alias(fedisableexcept,_fedisableexcept) 44__weak_alias(feenableexcept,_feenableexcept) 45__weak_alias(fegetenv,_fegetenv) 46__weak_alias(fegetexcept,_fegetexcept) 47__weak_alias(fegetexceptflag,_fegetexceptflag) 48__weak_alias(fegetround,_fegetround) 49__weak_alias(feholdexcept,_feholdexcept) 50__weak_alias(feraiseexcept,_feraiseexcept) 51__weak_alias(fesetenv,_fesetenv) 52__weak_alias(fesetexceptflag,_fesetexceptflag) 53__weak_alias(fesetround,_fesetround) 54__weak_alias(fetestexcept,_fetestexcept) 55__weak_alias(feupdateenv,_feupdateenv) 56#endif 57 58/* Load x87 Control Word */ 59#define __fldcw(__cw) __asm__ __volatile__ \ 60 ("fldcw %0" : : "m" (__cw)) 61 62/* No-Wait Store Control Word */ 63#define __fnstcw(__cw) __asm__ __volatile__ \ 64 ("fnstcw %0" : "=m" (*(__cw))) 65 66/* No-Wait Store Status Word */ 67#define __fnstsw(__sw) __asm__ __volatile__ \ 68 ("fnstsw %0" : "=am" (*(__sw))) 69 70/* No-Wait Clear Exception Flags */ 71#define __fnclex() __asm__ __volatile__ \ 72 ("fnclex") 73 74/* Load x87 Environment */ 75#define __fldenv(__env) __asm__ __volatile__ \ 76 ("fldenv %0" : : "m" (__env)) 77 78/* No-Wait Store x87 environment */ 79#define __fnstenv(__env) __asm__ __volatile__ \ 80 ("fnstenv %0" : "=m" (*(__env))) 81 82/* Check for and handle pending unmasked x87 pending FPU exceptions */ 83#define __fwait(__env) __asm__ __volatile__ \ 84 ("fwait") 85 86/* Load the MXCSR register */ 87#define __ldmxcsr(__mxcsr) __asm__ __volatile__ \ 88 ("ldmxcsr %0" : : "m" (__mxcsr)) 89 90/* Store the MXCSR register state */ 91#define __stmxcsr(__mxcsr) __asm__ __volatile__ \ 92 ("stmxcsr %0" : "=m" (*(__mxcsr))) 93 94/* 95 * The following constant represents the default floating-point environment 96 * (that is, the one installed at program startup) and has type pointer to 97 * const-qualified fenv_t. 98 * 99 * It can be used as an argument to the functions within the <fenv.h> header 100 * that manage the floating-point environment, namely fesetenv() and 101 * feupdateenv(). 102 * 103 * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as 104 * RESERVED. We provide a partial floating-point environment, where we 105 * define only the lower bits. The reserved bits are extracted and set by the 106 * consumers of FE_DFL_ENV, during runtime. 107 */ 108fenv_t __fe_dfl_env = { 109 .x87 = { 110 .control = __NetBSD_NPXCW__, /* Control word register */ 111 .unused1 = 0, /* Unused */ 112 .status = 0, /* Status word register */ 113 .unused2 = 0, /* Unused */ 114 .tag = 0xffff, /* Tag word register */ 115 .unused3 = 0, /* Unused */ 116 .others = { 117 0, 0, 0, 0x0000ffff, 118 } 119 }, 120 .mxcsr = __INITIAL_MXCSR__ /* MXCSR register */ 121}; 122 123/* 124 * Test for SSE support on this processor. 125 * 126 * We need to use ldmxcsr/stmxcsr to get correct results if any part 127 * of the program was compiled to use SSE floating-point, but we can't 128 * use SSE on older processors. 129 * 130 * In order to do so, we need to query the processor capabilities via the CPUID 131 * instruction. We can make it even simpler though, by querying the machdep.sse 132 * sysctl. 133 */ 134static int __HAS_SSE = 0; 135 136static void __init_libm(void) __attribute__ ((constructor, used)); 137 138static void __init_libm(void) 139{ 140 size_t oldlen = sizeof(__HAS_SSE); 141 int rv; 142 uint16_t control; 143 144 rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0); 145 if (rv == -1) 146 __HAS_SSE = 0; 147 148 __fnstcw(&control); 149 __fe_dfl_env.x87.control = control; 150} 151 152/* 153 * The feclearexcept() function clears the supported floating-point exceptions 154 * represented by `excepts'. 155 */ 156int 157feclearexcept(int excepts) 158{ 159 fenv_t env; 160 uint32_t mxcsr; 161 int ex; 162 163 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 164 165 ex = excepts & FE_ALL_EXCEPT; 166 167 /* It's ~3x faster to call fnclex, than store/load fp env */ 168 if (ex == FE_ALL_EXCEPT) { 169 __fnclex(); 170 } else { 171 __fnstenv(&env); 172 env.x87.status &= ~ex; 173 __fldenv(env); 174 } 175 176 if (__HAS_SSE) { 177 __stmxcsr(&mxcsr); 178 mxcsr &= ~ex; 179 __ldmxcsr(mxcsr); 180 } 181 182 /* Success */ 183 return (0); 184} 185 186/* 187 * The fegetexceptflag() function stores an implementation-defined 188 * representation of the states of the floating-point status flags indicated by 189 * the argument excepts in the object pointed to by the argument flagp. 190 */ 191int 192fegetexceptflag(fexcept_t *flagp, int excepts) 193{ 194 uint32_t mxcsr; 195 uint16_t status; 196 int ex; 197 198 _DIAGASSERT(flagp != NULL); 199 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 200 201 ex = excepts & FE_ALL_EXCEPT; 202 203 __fnstsw(&status); 204 if (__HAS_SSE) 205 __stmxcsr(&mxcsr); 206 else 207 mxcsr = 0; 208 209 *flagp = (mxcsr | status) & ex; 210 211 /* Success */ 212 return (0); 213} 214 215/* 216 * The feraiseexcept() function raises the supported floating-point exceptions 217 * represented by the argument `excepts'. 218 * 219 * The standard explicitly allows us to execute an instruction that has the 220 * exception as a side effect, but we choose to manipulate the status register 221 * directly. 222 * 223 * The validation of input is being deferred to fesetexceptflag(). 224 */ 225int 226feraiseexcept(int excepts) 227{ 228 fexcept_t ex; 229 230 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 231 232 ex = excepts & FE_ALL_EXCEPT; 233 fesetexceptflag(&ex, excepts); 234 __fwait(); 235 236 /* Success */ 237 return (0); 238} 239 240/* 241 * This function sets the floating-point status flags indicated by the argument 242 * `excepts' to the states stored in the object pointed to by `flagp'. It does 243 * NOT raise any floating-point exceptions, but only sets the state of the flags. 244 */ 245int 246fesetexceptflag(const fexcept_t *flagp, int excepts) 247{ 248 fenv_t env; 249 uint32_t mxcsr; 250 int ex; 251 252 _DIAGASSERT(flagp != NULL); 253 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 254 255 ex = excepts & FE_ALL_EXCEPT; 256 257 __fnstenv(&env); 258 env.x87.status &= ~ex; 259 env.x87.status |= *flagp & ex; 260 __fldenv(env); 261 262 if (__HAS_SSE) { 263 __stmxcsr(&mxcsr); 264 mxcsr &= ~ex; 265 mxcsr |= *flagp & ex; 266 __ldmxcsr(mxcsr); 267 } 268 269 /* Success */ 270 return (0); 271} 272 273/* 274 * The fetestexcept() function determines which of a specified subset of the 275 * floating-point exception flags are currently set. The `excepts' argument 276 * specifies the floating-point status flags to be queried. 277 */ 278int 279fetestexcept(int excepts) 280{ 281 uint32_t mxcsr; 282 uint16_t status; 283 int ex; 284 285 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 286 287 ex = excepts & FE_ALL_EXCEPT; 288 289 __fnstsw(&status); 290 if (__HAS_SSE) 291 __stmxcsr(&mxcsr); 292 else 293 mxcsr = 0; 294 295 return ((status | mxcsr) & ex); 296} 297 298int 299fegetround(void) 300{ 301 uint16_t control; 302 303 /* 304 * We assume that the x87 and the SSE unit agree on the 305 * rounding mode. Reading the control word on the x87 turns 306 * out to be about 5 times faster than reading it on the SSE 307 * unit on an Opteron 244. 308 */ 309 __fnstcw(&control); 310 311 return (control & __X87_ROUND_MASK); 312} 313 314/* 315 * The fesetround() function shall establish the rounding direction represented 316 * by its argument round. If the argument is not equal to the value of a 317 * rounding direction macro, the rounding direction is not changed. 318 */ 319int 320fesetround(int round) 321{ 322 uint32_t mxcsr; 323 uint16_t control; 324 325 if (round & ~__X87_ROUND_MASK) { 326 /* Failure */ 327 return (-1); 328 } 329 330 __fnstcw(&control); 331 control &= ~__X87_ROUND_MASK; 332 control |= round; 333 __fldcw(control); 334 335 if (__HAS_SSE) { 336 __stmxcsr(&mxcsr); 337 mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT); 338 mxcsr |= round << __SSE_ROUND_SHIFT; 339 __ldmxcsr(mxcsr); 340 } 341 342 /* Success */ 343 return (0); 344} 345 346/* 347 * The fegetenv() function attempts to store the current floating-point 348 * environment in the object pointed to by envp. 349 */ 350int 351fegetenv(fenv_t *envp) 352{ 353 uint32_t mxcsr; 354 355 _DIAGASSERT(flagp != NULL); 356 357 /* 358 * fnstenv masks all exceptions, so we need to restore the old control 359 * word to avoid this side effect. 360 */ 361 __fnstenv(envp); 362 __fldcw(envp->x87.control); 363 if (__HAS_SSE) { 364 __stmxcsr(&mxcsr); 365 envp->mxcsr = mxcsr; 366 } 367 368 /* Success */ 369 return (0); 370} 371 372/* 373 * The feholdexcept() function saves the current floating-point environment in 374 * the object pointed to by envp, clears the floating-point status flags, and 375 * then installs a non-stop (continue on floating-point exceptions) mode, if 376 * available, for all floating-point exceptions. 377 */ 378int 379feholdexcept(fenv_t *envp) 380{ 381 uint32_t mxcsr; 382 383 _DIAGASSERT(envp != NULL); 384 385 __fnstenv(envp); 386 __fnclex(); 387 if (__HAS_SSE) { 388 __stmxcsr(&mxcsr); 389 envp->mxcsr = mxcsr; 390 mxcsr &= ~FE_ALL_EXCEPT; 391 mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT; 392 __ldmxcsr(mxcsr); 393 } 394 395 /* Success */ 396 return (0); 397} 398 399/* 400 * The fesetenv() function attempts to establish the floating-point environment 401 * represented by the object pointed to by envp. The argument `envp' points 402 * to an object set by a call to fegetenv() or feholdexcept(), or equal a 403 * floating-point environment macro. The fesetenv() function does not raise 404 * floating-point exceptions, but only installs the state of the floating-point 405 * status flags represented through its argument. 406 */ 407int 408fesetenv(const fenv_t *envp) 409{ 410 fenv_t env; 411 412 _DIAGASSERT(envp != NULL); 413 414 /* Store the x87 floating-point environment */ 415 memset(&env, 0, sizeof(env)); 416 __fnstenv(&env); 417 418 __fe_dfl_env.x87.unused1 = env.x87.unused1; 419 __fe_dfl_env.x87.unused2 = env.x87.unused2; 420 __fe_dfl_env.x87.unused3 = env.x87.unused3; 421 memcpy(__fe_dfl_env.x87.others, env.x87.others, 422 sizeof(__fe_dfl_env.x87.others)); 423 424 __fldenv(envp->x87); 425 if (__HAS_SSE) 426 __ldmxcsr(envp->mxcsr); 427 428 /* Success */ 429 return (0); 430} 431 432/* 433 * The feupdateenv() function saves the currently raised floating-point 434 * exceptions in its automatic storage, installs the floating-point environment 435 * represented by the object pointed to by `envp', and then raises the saved 436 * floating-point exceptions. The argument `envp' shall point to an object set 437 * by a call to feholdexcept() or fegetenv(), or equal a floating-point 438 * environment macro. 439 */ 440int 441feupdateenv(const fenv_t *envp) 442{ 443 fenv_t env; 444 uint32_t mxcsr; 445 uint16_t status; 446 447 _DIAGASSERT(envp != NULL); 448 449 /* Store the x87 floating-point environment */ 450 memset(&env, 0, sizeof(env)); 451 __fnstenv(&env); 452 453 __fe_dfl_env.x87.unused1 = env.x87.unused1; 454 __fe_dfl_env.x87.unused2 = env.x87.unused2; 455 __fe_dfl_env.x87.unused3 = env.x87.unused3; 456 memcpy(__fe_dfl_env.x87.others, env.x87.others, 457 sizeof(__fe_dfl_env.x87.others)); 458 459 __fnstsw(&status); 460 if (__HAS_SSE) 461 __stmxcsr(&mxcsr); 462 else 463 mxcsr = 0; 464 fesetenv(envp); 465 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); 466 467 /* Success */ 468 return (0); 469} 470 471/* 472 * The following functions are extensions to the standard 473 */ 474int 475feenableexcept(int mask) 476{ 477 uint32_t mxcsr, omask; 478 uint16_t control; 479 480 mask &= FE_ALL_EXCEPT; 481 __fnstcw(&control); 482 if (__HAS_SSE) 483 __stmxcsr(&mxcsr); 484 else 485 mxcsr = 0; 486 487 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 488 control &= ~mask; 489 __fldcw(control); 490 if (__HAS_SSE) { 491 mxcsr &= ~(mask << __SSE_EMASK_SHIFT); 492 __ldmxcsr(mxcsr); 493 } 494 495 return (FE_ALL_EXCEPT & ~omask); 496} 497 498int 499fedisableexcept(int mask) 500{ 501 uint32_t mxcsr, omask; 502 uint16_t control; 503 504 mask &= FE_ALL_EXCEPT; 505 __fnstcw(&control); 506 if (__HAS_SSE) 507 __stmxcsr(&mxcsr); 508 else 509 mxcsr = 0; 510 511 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 512 control |= mask; 513 __fldcw(control); 514 if (__HAS_SSE) { 515 mxcsr |= mask << __SSE_EMASK_SHIFT; 516 __ldmxcsr(mxcsr); 517 } 518 519 return (FE_ALL_EXCEPT & ~omask); 520} 521 522int 523fegetexcept(void) 524{ 525 uint16_t control; 526 527 /* 528 * We assume that the masks for the x87 and the SSE unit are 529 * the same. 530 */ 531 __fnstcw(&control); 532 533 return (~control & FE_ALL_EXCEPT); 534} 535