171345Sgshapiro/* $NetBSD: fenv.c,v 1.10 2021/09/03 21:54:59 andvar Exp $ */ 271345Sgshapiro 394334Sgshapiro/*- 471345Sgshapiro * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG> 571345Sgshapiro * All rights reserved. 671345Sgshapiro * 771345Sgshapiro * Redistribution and use in source and binary forms, with or without 871345Sgshapiro * modification, are permitted provided that the following conditions 971345Sgshapiro * are met: 1071345Sgshapiro * 1. Redistributions of source code must retain the above copyright 1171345Sgshapiro * notice, this list of conditions and the following disclaimer. 1271345Sgshapiro * 2. Redistributions in binary form must reproduce the above copyright 1371345Sgshapiro * notice, this list of conditions and the following disclaimer in the 1471345Sgshapiro * documentation and/or other materials provided with the distribution. 1571345Sgshapiro * 1671345Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1771345Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1871345Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1971345Sgshapiro * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2071345Sgshapiro * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2171345Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2271345Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2371345Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2471345Sgshapiro * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2571345Sgshapiro * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2671345Sgshapiro * SUCH DAMAGE. 2771345Sgshapiro */ 2871345Sgshapiro 2971345Sgshapiro#include <sys/cdefs.h> 3094334Sgshapiro__RCSID("$NetBSD: fenv.c,v 1.10 2021/09/03 21:54:59 andvar Exp $"); 3171345Sgshapiro 3271345Sgshapiro#include "namespace.h" 3371345Sgshapiro 3471345Sgshapiro#include <sys/param.h> 3571345Sgshapiro#include <sys/sysctl.h> 3671345Sgshapiro#include <assert.h> 3771345Sgshapiro#include <fenv.h> 3894334Sgshapiro#include <stddef.h> 3971345Sgshapiro#include <string.h> 4071345Sgshapiro 4171345Sgshapiro#ifdef __weak_alias 4271345Sgshapiro__weak_alias(feclearexcept,_feclearexcept) 4371345Sgshapiro__weak_alias(fedisableexcept,_fedisableexcept) 4471345Sgshapiro__weak_alias(feenableexcept,_feenableexcept) 4571345Sgshapiro__weak_alias(fegetenv,_fegetenv) 4671345Sgshapiro__weak_alias(fegetexcept,_fegetexcept) 4771345Sgshapiro__weak_alias(fegetexceptflag,_fegetexceptflag) 4873188Sgshapiro__weak_alias(fegetround,_fegetround) 4973188Sgshapiro__weak_alias(feholdexcept,_feholdexcept) 5073188Sgshapiro__weak_alias(feraiseexcept,_feraiseexcept) 5173188Sgshapiro__weak_alias(fesetenv,_fesetenv) 5271345Sgshapiro__weak_alias(fesetexceptflag,_fesetexceptflag) 5394334Sgshapiro__weak_alias(fesetround,_fesetround) 5494334Sgshapiro__weak_alias(fetestexcept,_fetestexcept) 5594334Sgshapiro__weak_alias(feupdateenv,_feupdateenv) 5671345Sgshapiro#endif 5771345Sgshapiro 5871345Sgshapiro/* Load x87 Control Word */ 5971345Sgshapiro#define __fldcw(__cw) __asm__ __volatile__ \ 6071345Sgshapiro ("fldcw %0" : : "m" (__cw)) 6171345Sgshapiro 6271345Sgshapiro/* No-Wait Store Control Word */ 6371345Sgshapiro#define __fnstcw(__cw) __asm__ __volatile__ \ 6471345Sgshapiro ("fnstcw %0" : "=m" (*(__cw))) 6571345Sgshapiro 6671345Sgshapiro/* No-Wait Store Status Word */ 6771345Sgshapiro#define __fnstsw(__sw) __asm__ __volatile__ \ 6871345Sgshapiro ("fnstsw %0" : "=am" (*(__sw))) 6971345Sgshapiro 7071345Sgshapiro/* No-Wait Clear Exception Flags */ 7171345Sgshapiro#define __fnclex() __asm__ __volatile__ \ 7271345Sgshapiro ("fnclex") 7371345Sgshapiro 7471345Sgshapiro/* Load x87 Environment */ 7573188Sgshapiro#define __fldenv(__env) __asm__ __volatile__ \ 7673188Sgshapiro ("fldenv %0" : : "m" (__env)) 7773188Sgshapiro 7871345Sgshapiro/* No-Wait Store x87 environment */ 7971345Sgshapiro#define __fnstenv(__env) __asm__ __volatile__ \ 8071345Sgshapiro ("fnstenv %0" : "=m" (*(__env))) 8171345Sgshapiro 8271345Sgshapiro/* Check for and handle pending unmasked x87 pending FPU exceptions */ 8373188Sgshapiro#define __fwait(__env) __asm__ __volatile__ \ 8471345Sgshapiro ("fwait") 8571345Sgshapiro 8671345Sgshapiro/* Load the MXCSR register */ 8771345Sgshapiro#define __ldmxcsr(__mxcsr) __asm__ __volatile__ \ 8871345Sgshapiro ("ldmxcsr %0" : : "m" (__mxcsr)) 8973188Sgshapiro 9071345Sgshapiro/* Store the MXCSR register state */ 9171345Sgshapiro#define __stmxcsr(__mxcsr) __asm__ __volatile__ \ 9271345Sgshapiro ("stmxcsr %0" : "=m" (*(__mxcsr))) 9371345Sgshapiro 9471345Sgshapiro/* 9571345Sgshapiro * The following constant represents the default floating-point environment 9671345Sgshapiro * (that is, the one installed at program startup) and has type pointer to 9771345Sgshapiro * const-qualified fenv_t. 9871345Sgshapiro * 9971345Sgshapiro * It can be used as an argument to the functions within the <fenv.h> header 10071345Sgshapiro * that manage the floating-point environment, namely fesetenv() and 10173188Sgshapiro * feupdateenv(). 10294334Sgshapiro * 10373188Sgshapiro * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as 10471345Sgshapiro * RESERVED. We provide a partial floating-point environment, where we 10571345Sgshapiro * define only the lower bits. The reserved bits are extracted and set by the 10671345Sgshapiro * consumers of FE_DFL_ENV, during runtime. 10771345Sgshapiro */ 10871345Sgshapirofenv_t __fe_dfl_env = { 10971345Sgshapiro .x87 = { 11071345Sgshapiro .control = __NetBSD_NPXCW__, /* Control word register */ 11171345Sgshapiro .unused1 = 0, /* Unused */ 11271345Sgshapiro .status = 0, /* Status word register */ 11371345Sgshapiro .unused2 = 0, /* Unused */ 11473188Sgshapiro .tag = 0xffff, /* Tag word register */ 11594334Sgshapiro .unused3 = 0, /* Unused */ 11671345Sgshapiro .others = { 11794334Sgshapiro 0, 0, 0, 0x0000ffff, 11894334Sgshapiro } 11971345Sgshapiro }, 12071345Sgshapiro .mxcsr = __INITIAL_MXCSR__ /* MXCSR register */ 12171345Sgshapiro}; 12271345Sgshapiro 12371345Sgshapiro/* 12471345Sgshapiro * Test for SSE support on this processor. 12571345Sgshapiro * 12671345Sgshapiro * We need to use ldmxcsr/stmxcsr to get correct results if any part 12771345Sgshapiro * of the program was compiled to use SSE floating-point, but we can't 12871345Sgshapiro * use SSE on older processors. 12971345Sgshapiro * 13071345Sgshapiro * In order to do so, we need to query the processor capabilities via the CPUID 13171345Sgshapiro * instruction. We can make it even simpler though, by querying the machdep.sse 13271345Sgshapiro * sysctl. 13371345Sgshapiro */ 13471345Sgshapirostatic int __HAS_SSE = 0; 13571345Sgshapiro 13671345Sgshapirostatic void __init_libm(void) __attribute__ ((constructor, used)); 13771345Sgshapiro 13871345Sgshapirostatic void __init_libm(void) 13971345Sgshapiro{ 14071345Sgshapiro size_t oldlen = sizeof(__HAS_SSE); 14171345Sgshapiro int rv; 14273188Sgshapiro uint16_t control; 14371345Sgshapiro 14471345Sgshapiro rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0); 14571345Sgshapiro if (rv == -1) 14671345Sgshapiro __HAS_SSE = 0; 14773188Sgshapiro 14871345Sgshapiro __fnstcw(&control); 14971345Sgshapiro __fe_dfl_env.x87.control = control; 15071345Sgshapiro} 15171345Sgshapiro 15271345Sgshapiro/* 15371345Sgshapiro * The feclearexcept() function clears the supported floating-point exceptions 15471345Sgshapiro * represented by `excepts'. 15571345Sgshapiro */ 15673188Sgshapiroint 15773188Sgshapirofeclearexcept(int excepts) 15873188Sgshapiro{ 15973188Sgshapiro fenv_t env; 16073188Sgshapiro uint32_t mxcsr; 16171345Sgshapiro int ex; 16271345Sgshapiro 16371345Sgshapiro _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 16471345Sgshapiro 16571345Sgshapiro ex = excepts & FE_ALL_EXCEPT; 16671345Sgshapiro 16794334Sgshapiro /* It's ~3x faster to call fnclex, than store/load fp env */ 16871345Sgshapiro if (ex == FE_ALL_EXCEPT) { 16971345Sgshapiro __fnclex(); 17071345Sgshapiro } else { 17171345Sgshapiro __fnstenv(&env); 17271345Sgshapiro env.x87.status &= ~ex; 17371345Sgshapiro __fldenv(env); 17471345Sgshapiro } 17571345Sgshapiro 17671345Sgshapiro if (__HAS_SSE) { 17771345Sgshapiro __stmxcsr(&mxcsr); 17871345Sgshapiro mxcsr &= ~ex; 17971345Sgshapiro __ldmxcsr(mxcsr); 18071345Sgshapiro } 18171345Sgshapiro 18271345Sgshapiro /* Success */ 18371345Sgshapiro return (0); 18471345Sgshapiro} 18571345Sgshapiro 18671345Sgshapiro/* 18771345Sgshapiro * The fegetexceptflag() function stores an implementation-defined 188 * representation of the states of the floating-point status flags indicated by 189 * the argument excepts in the object pointed to by the argument flagp. 190 */ 191int 192fegetexceptflag(fexcept_t *flagp, int excepts) 193{ 194 uint32_t mxcsr; 195 uint16_t status; 196 int ex; 197 198 _DIAGASSERT(flagp != NULL); 199 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 200 201 ex = excepts & FE_ALL_EXCEPT; 202 203 __fnstsw(&status); 204 if (__HAS_SSE) 205 __stmxcsr(&mxcsr); 206 else 207 mxcsr = 0; 208 209 *flagp = (mxcsr | status) & ex; 210 211 /* Success */ 212 return (0); 213} 214 215/* 216 * The feraiseexcept() function raises the supported floating-point exceptions 217 * represented by the argument `excepts'. 218 * 219 * The standard explicitly allows us to execute an instruction that has the 220 * exception as a side effect, but we choose to manipulate the status register 221 * directly. 222 * 223 * The validation of input is being deferred to fesetexceptflag(). 224 */ 225int 226feraiseexcept(int excepts) 227{ 228 fexcept_t ex; 229 230 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 231 232 ex = excepts & FE_ALL_EXCEPT; 233 fesetexceptflag(&ex, excepts); 234 __fwait(); 235 236 /* Success */ 237 return (0); 238} 239 240/* 241 * This function sets the floating-point status flags indicated by the argument 242 * `excepts' to the states stored in the object pointed to by `flagp'. It does 243 * NOT raise any floating-point exceptions, but only sets the state of the flags. 244 */ 245int 246fesetexceptflag(const fexcept_t *flagp, int excepts) 247{ 248 fenv_t env; 249 uint32_t mxcsr; 250 int ex; 251 252 _DIAGASSERT(flagp != NULL); 253 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 254 255 ex = excepts & FE_ALL_EXCEPT; 256 257 __fnstenv(&env); 258 env.x87.status &= ~ex; 259 env.x87.status |= *flagp & ex; 260 __fldenv(env); 261 262 if (__HAS_SSE) { 263 __stmxcsr(&mxcsr); 264 mxcsr &= ~ex; 265 mxcsr |= *flagp & ex; 266 __ldmxcsr(mxcsr); 267 } 268 269 /* Success */ 270 return (0); 271} 272 273/* 274 * The fetestexcept() function determines which of a specified subset of the 275 * floating-point exception flags are currently set. The `excepts' argument 276 * specifies the floating-point status flags to be queried. 277 */ 278int 279fetestexcept(int excepts) 280{ 281 uint32_t mxcsr; 282 uint16_t status; 283 int ex; 284 285 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 286 287 ex = excepts & FE_ALL_EXCEPT; 288 289 __fnstsw(&status); 290 if (__HAS_SSE) 291 __stmxcsr(&mxcsr); 292 else 293 mxcsr = 0; 294 295 return ((status | mxcsr) & ex); 296} 297 298int 299fegetround(void) 300{ 301 uint16_t control; 302 303 /* 304 * We assume that the x87 and the SSE unit agree on the 305 * rounding mode. Reading the control word on the x87 turns 306 * out to be about 5 times faster than reading it on the SSE 307 * unit on an Opteron 244. 308 */ 309 __fnstcw(&control); 310 311 return (control & __X87_ROUND_MASK); 312} 313 314/* 315 * The fesetround() function shall establish the rounding direction represented 316 * by its argument round. If the argument is not equal to the value of a 317 * rounding direction macro, the rounding direction is not changed. 318 */ 319int 320fesetround(int round) 321{ 322 uint32_t mxcsr; 323 uint16_t control; 324 325 if (round & ~__X87_ROUND_MASK) { 326 /* Failure */ 327 return (-1); 328 } 329 330 __fnstcw(&control); 331 control &= ~__X87_ROUND_MASK; 332 control |= round; 333 __fldcw(control); 334 335 if (__HAS_SSE) { 336 __stmxcsr(&mxcsr); 337 mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT); 338 mxcsr |= round << __SSE_ROUND_SHIFT; 339 __ldmxcsr(mxcsr); 340 } 341 342 /* Success */ 343 return (0); 344} 345 346/* 347 * The fegetenv() function attempts to store the current floating-point 348 * environment in the object pointed to by envp. 349 */ 350int 351fegetenv(fenv_t *envp) 352{ 353 uint32_t mxcsr; 354 355 _DIAGASSERT(flagp != NULL); 356 357 /* 358 * fnstenv masks all exceptions, so we need to restore the old control 359 * word to avoid this side effect. 360 */ 361 __fnstenv(envp); 362 __fldcw(envp->x87.control); 363 if (__HAS_SSE) { 364 __stmxcsr(&mxcsr); 365 envp->mxcsr = mxcsr; 366 } 367 368 /* Success */ 369 return (0); 370} 371 372/* 373 * The feholdexcept() function saves the current floating-point environment in 374 * the object pointed to by envp, clears the floating-point status flags, and 375 * then installs a non-stop (continue on floating-point exceptions) mode, if 376 * available, for all floating-point exceptions. 377 */ 378int 379feholdexcept(fenv_t *envp) 380{ 381 uint32_t mxcsr; 382 383 _DIAGASSERT(envp != NULL); 384 385 __fnstenv(envp); 386 __fnclex(); 387 if (__HAS_SSE) { 388 __stmxcsr(&mxcsr); 389 envp->mxcsr = mxcsr; 390 mxcsr &= ~FE_ALL_EXCEPT; 391 mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT; 392 __ldmxcsr(mxcsr); 393 } 394 395 /* Success */ 396 return (0); 397} 398 399/* 400 * The fesetenv() function attempts to establish the floating-point environment 401 * represented by the object pointed to by envp. The argument `envp' points 402 * to an object set by a call to fegetenv() or feholdexcept(), or equal a 403 * floating-point environment macro. The fesetenv() function does not raise 404 * floating-point exceptions, but only installs the state of the floating-point 405 * status flags represented through its argument. 406 */ 407int 408fesetenv(const fenv_t *envp) 409{ 410 fenv_t env; 411 412 _DIAGASSERT(envp != NULL); 413 414 /* Store the x87 floating-point environment */ 415 memset(&env, 0, sizeof(env)); 416 __fnstenv(&env); 417 418 __fe_dfl_env.x87.unused1 = env.x87.unused1; 419 __fe_dfl_env.x87.unused2 = env.x87.unused2; 420 __fe_dfl_env.x87.unused3 = env.x87.unused3; 421 memcpy(__fe_dfl_env.x87.others, env.x87.others, 422 sizeof(__fe_dfl_env.x87.others)); 423 424 __fldenv(envp->x87); 425 if (__HAS_SSE) 426 __ldmxcsr(envp->mxcsr); 427 428 /* Success */ 429 return (0); 430} 431 432/* 433 * The feupdateenv() function saves the currently raised floating-point 434 * exceptions in its automatic storage, installs the floating-point environment 435 * represented by the object pointed to by `envp', and then raises the saved 436 * floating-point exceptions. The argument `envp' shall point to an object set 437 * by a call to feholdexcept() or fegetenv(), or equal a floating-point 438 * environment macro. 439 */ 440int 441feupdateenv(const fenv_t *envp) 442{ 443 fenv_t env; 444 uint32_t mxcsr; 445 uint16_t status; 446 447 _DIAGASSERT(envp != NULL); 448 449 /* Store the x87 floating-point environment */ 450 memset(&env, 0, sizeof(env)); 451 __fnstenv(&env); 452 453 __fe_dfl_env.x87.unused1 = env.x87.unused1; 454 __fe_dfl_env.x87.unused2 = env.x87.unused2; 455 __fe_dfl_env.x87.unused3 = env.x87.unused3; 456 memcpy(__fe_dfl_env.x87.others, env.x87.others, 457 sizeof(__fe_dfl_env.x87.others)); 458 459 __fnstsw(&status); 460 if (__HAS_SSE) 461 __stmxcsr(&mxcsr); 462 else 463 mxcsr = 0; 464 fesetenv(envp); 465 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); 466 467 /* Success */ 468 return (0); 469} 470 471/* 472 * The following functions are extensions to the standard 473 */ 474int 475feenableexcept(int mask) 476{ 477 uint32_t mxcsr, omask; 478 uint16_t control; 479 480 mask &= FE_ALL_EXCEPT; 481 __fnstcw(&control); 482 if (__HAS_SSE) 483 __stmxcsr(&mxcsr); 484 else 485 mxcsr = 0; 486 487 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 488 control &= ~mask; 489 __fldcw(control); 490 if (__HAS_SSE) { 491 mxcsr &= ~(mask << __SSE_EMASK_SHIFT); 492 __ldmxcsr(mxcsr); 493 } 494 495 return (FE_ALL_EXCEPT & ~omask); 496} 497 498int 499fedisableexcept(int mask) 500{ 501 uint32_t mxcsr, omask; 502 uint16_t control; 503 504 mask &= FE_ALL_EXCEPT; 505 __fnstcw(&control); 506 if (__HAS_SSE) 507 __stmxcsr(&mxcsr); 508 else 509 mxcsr = 0; 510 511 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 512 control |= mask; 513 __fldcw(control); 514 if (__HAS_SSE) { 515 mxcsr |= mask << __SSE_EMASK_SHIFT; 516 __ldmxcsr(mxcsr); 517 } 518 519 return (FE_ALL_EXCEPT & ~omask); 520} 521 522int 523fegetexcept(void) 524{ 525 uint16_t control; 526 527 /* 528 * We assume that the masks for the x87 and the SSE unit are 529 * the same. 530 */ 531 __fnstcw(&control); 532 533 return (~control & FE_ALL_EXCEPT); 534} 535