1/* $NetBSD: fenv.c,v 1.3.8.1 2012/08/12 18:53:11 martin Exp $ */ 2 3/*- 4 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__RCSID("$NetBSD: fenv.c,v 1.3.8.1 2012/08/12 18:53:11 martin Exp $"); 31 32#include <sys/param.h> 33#include <sys/sysctl.h> 34#include <assert.h> 35#include <fenv.h> 36#include <stddef.h> 37#include <string.h> 38 39/* Load x87 Control Word */ 40#define __fldcw(__cw) __asm__ __volatile__ \ 41 ("fldcw %0" : : "m" (__cw)) 42 43/* No-Wait Store Control Word */ 44#define __fnstcw(__cw) __asm__ __volatile__ \ 45 ("fnstcw %0" : "=m" (*(__cw))) 46 47/* No-Wait Store Status Word */ 48#define __fnstsw(__sw) __asm__ __volatile__ \ 49 ("fnstsw %0" : "=am" (*(__sw))) 50 51/* No-Wait Clear Exception Flags */ 52#define __fnclex() __asm__ __volatile__ \ 53 ("fnclex") 54 55/* Load x87 Environment */ 56#define __fldenv(__env) __asm__ __volatile__ \ 57 ("fldenv %0" : : "m" (__env)) 58 59/* No-Wait Store x87 environment */ 60#define __fnstenv(__env) __asm__ __volatile__ \ 61 ("fnstenv %0" : "=m" (*(__env))) 62 63/* Check for and handle pending unmasked x87 pending FPU exceptions */ 64#define __fwait(__env) __asm__ __volatile__ \ 65 ("fwait") 66 67/* Load the MXCSR register */ 68#define __ldmxcsr(__mxcsr) __asm__ __volatile__ \ 69 ("ldmxcsr %0" : : "m" (__mxcsr)) 70 71/* Store the MXCSR register state */ 72#define __stmxcsr(__mxcsr) __asm__ __volatile__ \ 73 ("stmxcsr %0" : "=m" (*(__mxcsr))) 74 75/* 76 * The following constant represents the default floating-point environment 77 * (that is, the one installed at program startup) and has type pointer to 78 * const-qualified fenv_t. 79 * 80 * It can be used as an argument to the functions within the <fenv.h> header 81 * that manage the floating-point environment, namely fesetenv() and 82 * feupdateenv(). 83 * 84 * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as 85 * RESERVED. We provide a partial floating-point environment, where we 86 * define only the lower bits. The reserved bits are extracted and set by the 87 * consumers of FE_DFL_ENV, during runtime. 88 */ 89fenv_t __fe_dfl_env = { 90 { 91 __NetBSD_NPXCW__, /* Control word register */ 92 0x0, /* Unused */ 93 0x0000, /* Status word register */ 94 0x0, /* Unused */ 95 0x0000ffff, /* Tag word register */ 96 0x0, /* Unused */ 97 { 98 0x0000, 0x0000, 99 0x0000, 0xffff 100 } 101 }, 102 __INITIAL_MXCSR__ /* MXCSR register */ 103}; 104 105/* 106 * Test for SSE support on this processor. 107 * 108 * We need to use ldmxcsr/stmxcsr to get correct results if any part 109 * of the program was compiled to use SSE floating-point, but we can't 110 * use SSE on older processors. 111 * 112 * In order to do so, we need to query the processor capabilities via the CPUID 113 * instruction. We can make it even simpler though, by querying the machdep.sse 114 * sysctl. 115 */ 116static int __HAS_SSE = 0; 117 118static void __test_sse(void) __attribute__ ((constructor)); 119 120static void __test_sse(void) 121{ 122 size_t oldlen = sizeof(__HAS_SSE); 123 int rv; 124 125 rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0); 126 if (rv == -1) 127 __HAS_SSE = 0; 128} 129 130/* 131 * The feclearexcept() function clears the supported floating-point exceptions 132 * represented by `excepts'. 133 */ 134int 135feclearexcept(int excepts) 136{ 137 fenv_t env; 138 uint32_t mxcsr; 139 int ex; 140 141 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 142 143 ex = excepts & FE_ALL_EXCEPT; 144 145 /* It's ~3x faster to call fnclex, than store/load fp env */ 146 if (ex == FE_ALL_EXCEPT) { 147 __fnclex(); 148 } else { 149 __fnstenv(&env); 150 env.x87.status &= ~ex; 151 __fldenv(env); 152 } 153 154 if (__HAS_SSE) { 155 __stmxcsr(&mxcsr); 156 mxcsr &= ~ex; 157 __ldmxcsr(mxcsr); 158 } 159 160 /* Success */ 161 return (0); 162} 163 164/* 165 * The fegetexceptflag() function stores an implementation-defined 166 * representation of the states of the floating-point status flags indicated by 167 * the argument excepts in the object pointed to by the argument flagp. 168 */ 169int 170fegetexceptflag(fexcept_t *flagp, int excepts) 171{ 172 uint32_t mxcsr; 173 uint16_t status; 174 int ex; 175 176 _DIAGASSERT(flagp != NULL); 177 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 178 179 ex = excepts & FE_ALL_EXCEPT; 180 181 __fnstsw(&status); 182 if (__HAS_SSE) 183 __stmxcsr(&mxcsr); 184 else 185 mxcsr = 0; 186 187 *flagp = (mxcsr | status) & ex; 188 189 /* Success */ 190 return (0); 191} 192 193/* 194 * The feraiseexcept() function raises the supported floating-point exceptions 195 * represented by the argument `excepts'. 196 * 197 * The standard explicitly allows us to execute an instruction that has the 198 * exception as a side effect, but we choose to manipulate the status register 199 * directly. 200 * 201 * The validation of input is being deferred to fesetexceptflag(). 202 */ 203int 204feraiseexcept(int excepts) 205{ 206 fexcept_t ex; 207 208 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 209 210 ex = excepts & FE_ALL_EXCEPT; 211 fesetexceptflag(&ex, excepts); 212 __fwait(); 213 214 /* Success */ 215 return (0); 216} 217 218/* 219 * This function sets the floating-point status flags indicated by the argument 220 * `excepts' to the states stored in the object pointed to by `flagp'. It does 221 * NOT raise any floating-point exceptions, but only sets the state of the flags. 222 */ 223int 224fesetexceptflag(const fexcept_t *flagp, int excepts) 225{ 226 fenv_t env; 227 uint32_t mxcsr; 228 int ex; 229 230 _DIAGASSERT(flagp != NULL); 231 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 232 233 ex = excepts & FE_ALL_EXCEPT; 234 235 __fnstenv(&env); 236 env.x87.status &= ~ex; 237 env.x87.status |= *flagp & ex; 238 __fldenv(env); 239 240 if (__HAS_SSE) { 241 __stmxcsr(&mxcsr); 242 mxcsr &= ~ex; 243 mxcsr |= *flagp & ex; 244 __ldmxcsr(mxcsr); 245 } 246 247 /* Success */ 248 return (0); 249} 250 251/* 252 * The fetestexcept() function determines which of a specified subset of the 253 * floating-point exception flags are currently set. The `excepts' argument 254 * specifies the floating-point status flags to be queried. 255 */ 256int 257fetestexcept(int excepts) 258{ 259 uint32_t mxcsr; 260 uint16_t status; 261 int ex; 262 263 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 264 265 ex = excepts & FE_ALL_EXCEPT; 266 267 __fnstsw(&status); 268 if (__HAS_SSE) 269 __stmxcsr(&mxcsr); 270 else 271 mxcsr = 0; 272 273 return ((status | mxcsr) & ex); 274} 275 276int 277fegetround(void) 278{ 279 uint16_t control; 280 281 /* 282 * We assume that the x87 and the SSE unit agree on the 283 * rounding mode. Reading the control word on the x87 turns 284 * out to be about 5 times faster than reading it on the SSE 285 * unit on an Opteron 244. 286 */ 287 __fnstcw(&control); 288 289 return (control & __X87_ROUND_MASK); 290} 291 292/* 293 * The fesetround() function shall establish the rounding direction represented 294 * by its argument round. If the argument is not equal to the value of a 295 * rounding direction macro, the rounding direction is not changed. 296 */ 297int 298fesetround(int round) 299{ 300 uint32_t mxcsr; 301 uint16_t control; 302 303 if (round & ~__X87_ROUND_MASK) { 304 /* Failure */ 305 return (-1); 306 } 307 308 __fnstcw(&control); 309 control &= ~__X87_ROUND_MASK; 310 control |= round; 311 __fldcw(control); 312 313 if (__HAS_SSE) { 314 __stmxcsr(&mxcsr); 315 mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT); 316 mxcsr |= round << __SSE_ROUND_SHIFT; 317 __ldmxcsr(mxcsr); 318 } 319 320 /* Success */ 321 return (0); 322} 323 324/* 325 * The fegetenv() function attempts to store the current floating-point 326 * environment in the object pointed to by envp. 327 */ 328int 329fegetenv(fenv_t *envp) 330{ 331 uint32_t mxcsr; 332 333 _DIAGASSERT(flagp != NULL); 334 335 /* 336 * fnstenv masks all exceptions, so we need to restore the old control 337 * word to avoid this side effect. 338 */ 339 __fnstenv(envp); 340 __fldcw(envp->x87.control); 341 if (__HAS_SSE) { 342 __stmxcsr(&mxcsr); 343 envp->mxcsr = mxcsr; 344 } 345 346 /* Success */ 347 return (0); 348} 349 350/* 351 * The feholdexcept() function saves the current floating-point environment in 352 * the object pointed to by envp, clears the floating-point status flags, and 353 * then installs a non-stop (continue on floating-point exceptions) mode, if 354 * available, for all floating-point exceptions. 355 */ 356int 357feholdexcept(fenv_t *envp) 358{ 359 uint32_t mxcsr; 360 361 _DIAGASSERT(envp != NULL); 362 363 __fnstenv(envp); 364 __fnclex(); 365 if (__HAS_SSE) { 366 __stmxcsr(&mxcsr); 367 envp->mxcsr = mxcsr; 368 mxcsr &= ~FE_ALL_EXCEPT; 369 mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT; 370 __ldmxcsr(mxcsr); 371 } 372 373 /* Success */ 374 return (0); 375} 376 377/* 378 * The fesetenv() function attempts to establish the floating-point environment 379 * represented by the object pointed to by envp. The argument `envp' points 380 * to an object set by a call to fegetenv() or feholdexcept(), or equal a 381 * floating-point environment macro. The fesetenv() function does not raise 382 * floating-point exceptions, but only installs the state of the floating-point 383 * status flags represented through its argument. 384 */ 385int 386fesetenv(const fenv_t *envp) 387{ 388 fenv_t env; 389 390 _DIAGASSERT(envp != NULL); 391 392 /* Store the x87 floating-point environment */ 393 memset(&env, 0, sizeof(env)); 394 __fnstenv(&env); 395 396 __fe_dfl_env.x87.unused1 = env.x87.unused1; 397 __fe_dfl_env.x87.unused2 = env.x87.unused2; 398 __fe_dfl_env.x87.unused3 = env.x87.unused3; 399 memcpy(__fe_dfl_env.x87.others, 400 env.x87.others, 401 sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t)); 402 403 __fldenv(envp->x87); 404 if (__HAS_SSE) 405 __ldmxcsr(envp->mxcsr); 406 407 /* Success */ 408 return (0); 409} 410 411/* 412 * The feupdateenv() function saves the currently raised floating-point 413 * exceptions in its automatic storage, installs the floating-point environment 414 * represented by the object pointed to by `envp', and then raises the saved 415 * floating-point exceptions. The argument `envp' shall point to an object set 416 * by a call to feholdexcept() or fegetenv(), or equal a floating-point 417 * environment macro. 418 */ 419int 420feupdateenv(const fenv_t *envp) 421{ 422 fenv_t env; 423 uint32_t mxcsr; 424 uint16_t status; 425 426 _DIAGASSERT(envp != NULL); 427 428 /* Store the x87 floating-point environment */ 429 memset(&env, 0, sizeof(env)); 430 __fnstenv(&env); 431 432 __fe_dfl_env.x87.unused1 = env.x87.unused1; 433 __fe_dfl_env.x87.unused2 = env.x87.unused2; 434 __fe_dfl_env.x87.unused3 = env.x87.unused3; 435 memcpy(__fe_dfl_env.x87.others, 436 env.x87.others, 437 sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t)); 438 439 __fnstsw(&status); 440 if (__HAS_SSE) 441 __stmxcsr(&mxcsr); 442 else 443 mxcsr = 0; 444 fesetenv(envp); 445 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); 446 447 /* Success */ 448 return (0); 449} 450 451/* 452 * The following functions are extentions to the standard 453 */ 454int 455feenableexcept(int mask) 456{ 457 uint32_t mxcsr, omask; 458 uint16_t control; 459 460 mask &= FE_ALL_EXCEPT; 461 __fnstcw(&control); 462 if (__HAS_SSE) 463 __stmxcsr(&mxcsr); 464 else 465 mxcsr = 0; 466 467 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 468 control &= ~mask; 469 __fldcw(control); 470 if (__HAS_SSE) { 471 mxcsr &= ~(mask << __SSE_EMASK_SHIFT); 472 __ldmxcsr(mxcsr); 473 } 474 475 return (FE_ALL_EXCEPT & ~omask); 476} 477 478int 479fedisableexcept(int mask) 480{ 481 uint32_t mxcsr, omask; 482 uint16_t control; 483 484 mask &= FE_ALL_EXCEPT; 485 __fnstcw(&control); 486 if (__HAS_SSE) 487 __stmxcsr(&mxcsr); 488 else 489 mxcsr = 0; 490 491 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 492 control |= mask; 493 __fldcw(control); 494 if (__HAS_SSE) { 495 mxcsr |= mask << __SSE_EMASK_SHIFT; 496 __ldmxcsr(mxcsr); 497 } 498 499 return (FE_ALL_EXCEPT & ~omask); 500} 501 502int 503fegetexcept(void) 504{ 505 uint16_t control; 506 507 /* 508 * We assume that the masks for the x87 and the SSE unit are 509 * the same. 510 */ 511 __fnstcw(&control); 512 513 return (~control & FE_ALL_EXCEPT); 514} 515