1/* $NetBSD: fp_complete.c,v 1.13 2011/06/07 00:48:30 matt Exp $ */ 2 3/*- 4 * Copyright (c) 2001 Ross Harvey 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the NetBSD 18 * Foundation, Inc. and its contributors. 19 * 4. Neither the name of The NetBSD Foundation nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 37 38__KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.13 2011/06/07 00:48:30 matt Exp $"); 39 40#include "opt_compat_osf1.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/proc.h> 45#include <sys/atomic.h> 46#include <sys/evcnt.h> 47 48#ifdef COMPAT_OSF1 49#include <compat/osf1/osf1_exec.h> 50#endif 51 52#include <machine/cpu.h> 53#include <machine/fpu.h> 54#include <machine/reg.h> 55#include <machine/alpha.h> 56#include <alpha/alpha/db_instruction.h> 57 58#include <lib/libkern/softfloat.h> 59 60#define TSWINSIZE 4 /* size of trap shadow window in uint32_t units */ 61 62/* Set Name Opcodes AARM C.* Symbols */ 63 64#define CPUREG_CLASS (0xfUL << 0x10) /* INT[ALSM] */ 65#define FPUREG_CLASS (0xfUL << 0x14) /* ITFP, FLT[ILV] */ 66#define CHECKFUNCTIONCODE (1UL << 0x18) /* MISC */ 67#define TRAPSHADOWBOUNDARY (1UL << 0x00 | /* PAL */\ 68 1UL << 0x19 | /* \PAL\ */\ 69 1UL << 0x1a | /* JSR */\ 70 1UL << 0x1b | /* \PAL\ */\ 71 1UL << 0x1d | /* \PAL\ */\ 72 1UL << 0x1e | /* \PAL\ */\ 73 1UL << 0x1f | /* \PAL\ */\ 74 0xffffUL << 0x30 | /* branch ops */\ 75 CHECKFUNCTIONCODE) 76 77#define MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \ 78 (u_int ## width ## _t)(sign) << ((width) - 1) |\ 79 (u_int ## width ## _t)(exp) << ((width) - 1 - (expwidth)) |\ 80 (u_int ## width ## _t)(msb) << ((width) - 1 - (expwidth) - 1) |\ 81 (u_int ## width ## _t)(rest_of_frac) 82 83#define FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0) 84#define FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0) 85 86#define IS_SUBNORMAL(v) ((v)->exp == 0 && (v)->frac != 0) 87 88#define PREFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_DMZ \ 89 && IS_SUBNORMAL(v)) \ 90 (v)->frac = 0; else 91 92#define POSTFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_UMZ \ 93 && IS_SUBNORMAL(v)) \ 94 (v)->frac = 0; else 95 96 /* Alpha returns 2.0 for true, all zeroes for false. */ 97 98#define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L) 99 100 /* Move bits from sw fp_c to hw fpcr. */ 101 102#define CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m))) 103 104struct evcnt fpevent_use; 105struct evcnt fpevent_reuse; 106 107/* 108 * Temporary trap shadow instrumentation. The [un]resolved counters 109 * could be kept permanently, as they provide information on whether 110 * user code has met AARM trap shadow generation requirements. 111 */ 112 113struct alpha_shadow { 114 uint64_t resolved; /* cases trigger pc found */ 115 uint64_t unresolved; /* cases it wasn't, code problems? */ 116 uint64_t scans; /* trap shadow scans */ 117 uint64_t len; /* number of instructions examined */ 118 uint64_t uop; /* bit mask of unexpected opcodes */ 119 uint64_t sqrts; /* ev6+ square root single count */ 120 uint64_t sqrtt; /* ev6+ square root double count */ 121 uint32_t ufunc; /* bit mask of unexpected functions */ 122 uint32_t max; /* max trap shadow scan */ 123 uint32_t nilswop; /* unexpected op codes */ 124 uint32_t nilswfunc; /* unexpected function codes */ 125 uint32_t nilanyop; /* this "cannot happen" */ 126 uint32_t vax; /* sigs from vax fp opcodes */ 127} alpha_shadow, alpha_shadow_zero; 128 129static float64 float64_unk(float64, float64); 130static float64 compare_un(float64, float64); 131static float64 compare_eq(float64, float64); 132static float64 compare_lt(float64, float64); 133static float64 compare_le(float64, float64); 134static void cvt_qs_ts_st_gf_qf(uint32_t, struct lwp *); 135static void cvt_gd(uint32_t, struct lwp *); 136static void cvt_qt_dg_qg(uint32_t, struct lwp *); 137static void cvt_tq_gq(uint32_t, struct lwp *); 138 139static float32 (*swfp_s[])(float32, float32) = { 140 float32_add, float32_sub, float32_mul, float32_div, 141}; 142 143static float64 (*swfp_t[])(float64, float64) = { 144 float64_add, float64_sub, float64_mul, float64_div, 145 compare_un, compare_eq, compare_lt, compare_le, 146 float64_unk, float64_unk, float64_unk, float64_unk 147}; 148 149static void (*swfp_cvt[])(uint32_t, struct lwp *) = { 150 cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq 151}; 152 153static void 154this_cannot_happen(int what_cannot_happen, int64_t bits) 155{ 156 static int total; 157 alpha_instruction inst; 158 static uint64_t reported; 159 160 inst.bits = bits; 161 ++alpha_shadow.nilswfunc; 162 if (bits != -1) 163 alpha_shadow.uop |= 1UL << inst.generic_format.opcode; 164 if (1UL << what_cannot_happen & reported) 165 return; 166 reported |= 1UL << what_cannot_happen; 167 if (total >= 1000) 168 return; /* right now, this return "cannot happen" */ 169 ++total; 170 if (bits) 171 printf("FP instruction %x\n", (unsigned int)bits); 172 printf("FP event %d/%lx/%lx\n", what_cannot_happen, reported, 173 alpha_shadow.uop); 174 printf("Please report this to port-alpha-maintainer@NetBSD.org\n"); 175} 176 177static inline void 178sts(unsigned int rn, s_float *v, struct lwp *l) 179{ 180 alpha_sts(rn, v); 181 PREFILTER_SUBNORMAL(l, v); 182} 183 184static inline void 185stt(unsigned int rn, t_float *v, struct lwp *l) 186{ 187 alpha_stt(rn, v); 188 PREFILTER_SUBNORMAL(l, v); 189} 190 191static inline void 192lds(unsigned int rn, s_float *v, struct lwp *l) 193{ 194 POSTFILTER_SUBNORMAL(l, v); 195 alpha_lds(rn, v); 196} 197 198static inline void 199ldt(unsigned int rn, t_float *v, struct lwp *l) 200{ 201 POSTFILTER_SUBNORMAL(l, v); 202 alpha_ldt(rn, v); 203} 204 205static float64 206compare_lt(float64 a, float64 b) 207{ 208 return CMP_RESULT(float64_lt(a, b)); 209} 210 211static float64 212compare_le(float64 a, float64 b) 213{ 214 return CMP_RESULT(float64_le(a, b)); 215} 216 217static float64 218compare_un(float64 a, float64 b) 219{ 220 if (float64_is_nan(a) | float64_is_nan(b)) { 221 if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b)) 222 float_set_invalid(); 223 return CMP_RESULT(1); 224 } 225 return CMP_RESULT(0); 226} 227 228static float64 229compare_eq(float64 a, float64 b) 230{ 231 return CMP_RESULT(float64_eq(a, b)); 232} 233/* 234 * A note regarding the VAX FP ops. 235 * 236 * The AARM gives us complete leeway to set or not set status flags on VAX 237 * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set 238 * flags by IEEE rules. Many ops are common to d/f/g and s/t source types. 239 * For the purely vax ones, it's hard to imagine ever running them. 240 * (Generated VAX fp ops with completion flags? Hmm.) We are careful never 241 * to panic, assert, or print unlimited output based on a path through the 242 * decoder, so weird cases don't become security issues. 243 */ 244static void 245cvt_qs_ts_st_gf_qf(uint32_t inst_bits, struct lwp *l) 246{ 247 t_float tfb, tfc; 248 s_float sfb, sfc; 249 alpha_instruction inst; 250 251 inst.bits = inst_bits; 252 /* 253 * cvtst and cvtts have the same opcode, function, and source. The 254 * distinction for cvtst is hidden in the illegal modifier combinations. 255 * We decode even the non-/s modifier, so that the fix-up-always mode 256 * works on ev6 and later. The rounding bits are unused and fixed for 257 * cvtst, so we check those too. 258 */ 259 switch(inst.float_format.function) { 260 case op_cvtst: 261 case op_cvtst_u: 262 sts(inst.float_detail.fb, &sfb, l); 263 tfc.i = float32_to_float64(sfb.i); 264 ldt(inst.float_detail.fc, &tfc, l); 265 return; 266 } 267 if(inst.float_detail.src == 2) { 268 stt(inst.float_detail.fb, &tfb, l); 269 sfc.i = float64_to_float32(tfb.i); 270 lds(inst.float_detail.fc, &sfc, l); 271 return; 272 } 273 /* 0: S/F */ 274 /* 1: /D */ 275 /* 3: Q/Q */ 276 this_cannot_happen(5, inst.generic_format.opcode); 277 tfc.i = FLOAT64QNAN; 278 ldt(inst.float_detail.fc, &tfc, l); 279 return; 280} 281 282static void 283cvt_gd(uint32_t inst_bits, struct lwp *l) 284{ 285 t_float tfb, tfc; 286 alpha_instruction inst; 287 288 inst.bits = inst_bits; 289 stt(inst.float_detail.fb, &tfb, l); 290 (void) float64_to_float32(tfb.i); 291 l->l_md.md_flags &= ~NETBSD_FLAG_TO_FP_C(FP_X_IMP); 292 tfc.i = float64_add(tfb.i, (float64)0); 293 ldt(inst.float_detail.fc, &tfc, l); 294} 295 296static void 297cvt_qt_dg_qg(uint32_t inst_bits, struct lwp *l) 298{ 299 t_float tfb, tfc; 300 alpha_instruction inst; 301 302 inst.bits = inst_bits; 303 switch(inst.float_detail.src) { 304 case 0: /* S/F */ 305 this_cannot_happen(3, inst.bits); 306 /* fall thru */ 307 case 1: /* D */ 308 /* VAX dirty 0's and reserved ops => UNPREDICTABLE */ 309 /* We've done what's important by just not trapping */ 310 tfc.i = 0; 311 break; 312 case 2: /* T/G */ 313 this_cannot_happen(4, inst.bits); 314 tfc.i = 0; 315 break; 316 case 3: /* Q/Q */ 317 stt(inst.float_detail.fb, &tfb, l); 318 tfc.i = int64_to_float64(tfb.i); 319 break; 320 } 321 alpha_ldt(inst.float_detail.fc, &tfc); 322} 323/* 324 * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's 325 * unfortunate habit of always returning the nontrapping result. 326 * XXX: there are several apparent AARM/AAH disagreements, as well as 327 * the issue of trap handler pc and trapping results. 328 */ 329static void 330cvt_tq_gq(uint32_t inst_bits, struct lwp *l) 331{ 332 t_float tfb, tfc; 333 alpha_instruction inst; 334 335 inst.bits = inst_bits; 336 stt(inst.float_detail.fb, &tfb, l); 337 tfc.i = float64_to_int64(tfb.i); 338 alpha_ldt(inst.float_detail.fc, &tfc); /* yes, ldt */ 339} 340 341static uint64_t 342fp_c_to_fpcr_1(uint64_t fpcr, uint64_t fp_c) 343{ 344 uint64_t disables; 345 346 /* 347 * It's hard to arrange for conforming bit fields, because the FP_C 348 * and the FPCR are both architected, with specified (and relatively 349 * scrambled) bit numbers. Defining an internal unscrambled FP_C 350 * wouldn't help much, because every user exception requires the 351 * architected bit order in the sigcontext. 352 * 353 * Programs that fiddle with the fpcr exception bits (instead of fp_c) 354 * will lose, because those bits can be and usually are subsetted; 355 * the official home is in the fp_c. Furthermore, the kernel puts 356 * phony enables (it lies :-) in the fpcr in order to get control when 357 * it is necessary to initially set a sticky bit. 358 */ 359 360 fpcr &= FPCR_DYN(3); 361 362 /* 363 * enable traps = case where flag bit is clear OR program wants a trap 364 * enables = ~flags | mask 365 * disables = ~(~flags | mask) 366 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871) 367 */ 368 disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c); 369 370 fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3); 371 fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0); 372 373# if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 && \ 374 FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 && \ 375 FP_X_UFL << (61 - 3) == FPCR_UNFD && \ 376 FP_X_IMP << (61 - 3) == FPCR_INED && \ 377 FP_X_OFL << (49 - 0) == FPCR_OVFD) 378# error "Assertion failed" 379 /* 380 * We don't care about the other built-in bit numbers because they 381 * have been architecturally specified. 382 */ 383# endif 384 385 fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START); 386 fpcr |= (fp_c & IEEE_MAP_DMZ) << 36; 387 if (fp_c & FP_C_MIRRORED) 388 fpcr |= FPCR_SUM; 389 if (fp_c & IEEE_MAP_UMZ) 390 fpcr |= FPCR_UNDZ | FPCR_UNFD; 391 fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41; 392 return fpcr; 393} 394 395static void 396fp_c_to_fpcr(struct lwp *l) 397{ 398 alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), l->l_md.md_flags)); 399} 400 401void 402alpha_write_fp_c(struct lwp *l, uint64_t fp_c) 403{ 404 uint64_t md_flags; 405 406 fp_c &= MDLWP_FP_C; 407 md_flags = l->l_md.md_flags; 408 if ((md_flags & MDLWP_FP_C) == fp_c) 409 return; 410 l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c; 411 fpu_load(); 412 alpha_pal_wrfen(1); 413 fp_c_to_fpcr(l); 414 alpha_pal_wrfen(0); 415} 416 417uint64_t 418alpha_read_fp_c(struct lwp *l) 419{ 420 /* 421 * A possibly-desireable EV6-specific optimization would deviate from 422 * the Alpha Architecture spec and keep some FP_C bits in the FPCR, 423 * but in a transparent way. Some of the code for that would need to 424 * go right here. 425 */ 426 return l->l_md.md_flags & MDLWP_FP_C; 427} 428 429static float64 430float64_unk(float64 a, float64 b) 431{ 432 return 0; 433} 434 435/* 436 * The real function field encodings for IEEE and VAX FP instructions. 437 * 438 * Since there is only one operand type field, the cvtXX instructions 439 * require a variety of special cases, and these have to be analyzed as 440 * they don't always fit into the field descriptions in AARM section I. 441 * 442 * Lots of staring at bits in the appendix shows what's really going on. 443 * 444 * | | 445 * 15 14 13|12 11 10 09|08 07 06 05 446 * --------======------============ 447 * TRAP : RND : SRC : FUNCTION : 448 * 0 0 0:. . .:. . . . . . . . . . . . Imprecise 449 * 0 0 1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output) 450 * | /V overfloat enable (if int output) 451 * 0 1 0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST 452 * 0 1 1|. . .:. . . . . . . . . . . . Unsupported 453 * 1 0 0:. . .:. . . . . . . . . . . ./S software completion (VAX only) 454 * 1 0 1|. . .:. . . . . . . . . . . ./SU 455 * | /SV 456 * 1 1 0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S 457 * 1 1 1|. . .:. . . . . . . . . . . ./SUI (if FP output) (IEEE only) 458 * | /SVI (if int output) (IEEE only) 459 * S I UV: In other words: bits 15:13 are S:I:UV, except that _usually_ 460 * | not all combinations are valid. 461 * | | 462 * 15 14 13|12 11 10 09|08 07 06 05 463 * --------======------============ 464 * TRAP : RND : SRC : FUNCTION : 465 * | 0 0 . . . . . . . . . . . ./C Chopped 466 * : 0 1 . . . . . . . . . . . ./M Minus Infinity 467 * | 1 0 . . . . . . . . . . . . Normal 468 * : 1 1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity) 469 * | | 470 * 15 14 13|12 11 10 09|08 07 06 05 471 * --------======------============ 472 * TRAP : RND : SRC : FUNCTION : 473 * 0 0. . . . . . . . . . S/F 474 * 0 1. . . . . . . . . . -/D 475 * 1 0. . . . . . . . . . T/G 476 * 1 1. . . . . . . . . . Q/Q 477 * | | 478 * 15 14 13|12 11 10 09|08 07 06 05 479 * --------======------============ 480 * TRAP : RND : SRC : FUNCTION : 481 * 0 0 0 0 . . . addX 482 * 0 0 0 1 . . . subX 483 * 0 0 1 0 . . . mulX 484 * 0 0 1 1 . . . divX 485 * 0 1 0 0 . . . cmpXun 486 * 0 1 0 1 . . . cmpXeq 487 * 0 1 1 0 . . . cmpXlt 488 * 0 1 1 1 . . . cmpXle 489 * 1 0 0 0 . . . reserved 490 * 1 0 0 1 . . . reserved 491 * 1 0 1 0 . . . sqrt[fg] (op_fix, not exactly "vax") 492 * 1 0 1 1 . . . sqrt[st] (op_fix, not exactly "ieee") 493 * 1 1 0 0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f) 494 * 1 1 0 1 . . . cvtXd (vax only) 495 * 1 1 1 0 . . . cvtXt/g (cvtqt, cvt[dq]g only) 496 * 1 1 1 1 . . . cvtXq/q (cvttq, cvtgq) 497 * | | 498 * 15 14 13|12 11 10 09|08 07 06 05 the twilight zone 499 * --------======------============ 500 * TRAP : RND : SRC : FUNCTION : 501 * /s /i /u x x 1 0 1 1 0 0 . . . cvtts, /siu only 0, 1, 5, 7 502 * 0 1 0 1 0 1 0 1 1 0 0 . . . cvtst (src == T (!)) 2ac NOT /S 503 * 1 1 0 1 0 1 0 1 1 0 0 . . . cvtst/s (src == T (!)) 6ac 504 * x 0 x x x x 0 1 1 1 1 . . . cvttq/_ (src == T) 505 */ 506 507static void 508alpha_fp_interpret(alpha_instruction *pc, struct lwp *l, uint64_t bits) 509{ 510 s_float sfa, sfb, sfc; 511 t_float tfa, tfb, tfc; 512 alpha_instruction inst; 513 514 inst.bits = bits; 515 switch(inst.generic_format.opcode) { 516 default: 517 /* this "cannot happen" */ 518 this_cannot_happen(2, inst.bits); 519 return; 520 case op_any_float: 521 if (inst.float_format.function == op_cvtql_sv || 522 inst.float_format.function == op_cvtql_v) { 523 alpha_stt(inst.float_detail.fb, &tfb); 524 sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN; 525 alpha_lds(inst.float_detail.fc, &sfc); 526 float_raise(FP_X_INV); 527 } else { 528 ++alpha_shadow.nilanyop; 529 this_cannot_happen(3, inst.bits); 530 } 531 break; 532 case op_vax_float: 533 ++alpha_shadow.vax; /* fall thru */ 534 case op_ieee_float: 535 case op_fix_float: 536 switch(inst.float_detail.src) { 537 case op_src_sf: 538 sts(inst.float_detail.fb, &sfb, l); 539 if (inst.float_detail.opclass == 10) 540 sfc.i = float32_sqrt(sfb.i); 541 else if (inst.float_detail.opclass & ~3) { 542 this_cannot_happen(1, inst.bits); 543 sfc.i = FLOAT32QNAN; 544 } else { 545 sts(inst.float_detail.fa, &sfa, l); 546 sfc.i = (*swfp_s[inst.float_detail.opclass])( 547 sfa.i, sfb.i); 548 } 549 lds(inst.float_detail.fc, &sfc, l); 550 break; 551 case op_src_xd: 552 case op_src_tg: 553 if (inst.float_detail.opclass >= 12) 554 (*swfp_cvt[inst.float_detail.opclass - 12])( 555 inst.bits, l); 556 else { 557 stt(inst.float_detail.fb, &tfb, l); 558 if (inst.float_detail.opclass == 10) 559 tfc.i = float64_sqrt(tfb.i); 560 else { 561 stt(inst.float_detail.fa, &tfa, l); 562 tfc.i = (*swfp_t[inst.float_detail 563 .opclass])(tfa.i, tfb.i); 564 } 565 ldt(inst.float_detail.fc, &tfc, l); 566 } 567 break; 568 case op_src_qq: 569 float_raise(FP_X_IMP); 570 break; 571 } 572 } 573} 574 575static int 576alpha_fp_complete_at(alpha_instruction *trigger_pc, struct lwp *l, 577 uint64_t *ucode) 578{ 579 int needsig; 580 alpha_instruction inst; 581 uint64_t rm, fpcr, orig_fpcr; 582 uint64_t orig_flags, new_flags, changed_flags, md_flags; 583 584 if (__predict_false(copyin(trigger_pc, &inst, sizeof inst))) { 585 this_cannot_happen(6, -1); 586 return SIGSEGV; 587 } 588 fpu_load(); 589 alpha_pal_wrfen(1); 590 /* 591 * If necessary, lie about the dynamic rounding mode so emulation 592 * software need go to only one place for it, and so we don't have to 593 * lock any memory locations or pass a third parameter to every 594 * SoftFloat entry point. 595 */ 596 orig_fpcr = fpcr = alpha_read_fpcr(); 597 rm = inst.float_detail.rnd; 598 if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) { 599 fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm); 600 alpha_write_fpcr(fpcr); 601 } 602 orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags); 603 604 alpha_fp_interpret(trigger_pc, l, inst.bits); 605 606 md_flags = l->l_md.md_flags; 607 608 new_flags = FP_C_TO_NETBSD_FLAG(md_flags); 609 changed_flags = orig_flags ^ new_flags; 610 KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */ 611 alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags)); 612 needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags); 613 alpha_pal_wrfen(0); 614 if (__predict_false(needsig)) { 615 *ucode = needsig; 616 return SIGFPE; 617 } 618 return 0; 619} 620 621int 622alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode) 623{ 624 int t; 625 int sig; 626 uint64_t op_class; 627 alpha_instruction inst; 628 /* "trigger_pc" is Compaq's term for the earliest faulting op */ 629 alpha_instruction *trigger_pc, *usertrap_pc; 630 alpha_instruction *pc, *win_begin, tsw[TSWINSIZE]; 631 632 sig = SIGFPE; 633 pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC]; 634 trigger_pc = pc - 1; /* for ALPHA_AMASK_PAT case */ 635 if (cpu_amask & ALPHA_AMASK_PAT) { 636 if (a0 & 1 || alpha_fp_sync_complete) { 637 sig = alpha_fp_complete_at(trigger_pc, l, ucode); 638 goto done; 639 } 640 } 641 *ucode = a0; 642 if (!(a0 & 1)) 643 return sig; 644/* 645 * At this point we are somewhere in the trap shadow of one or more instruc- 646 * tions that have trapped with software completion specified. We have a mask 647 * of the registers written by trapping instructions. 648 * 649 * Now step backwards through the trap shadow, clearing bits in the 650 * destination write mask until the trigger instruction is found, and 651 * interpret this one instruction in SW. If a SIGFPE is not required, back up 652 * the PC until just after this instruction and restart. This will execute all 653 * trap shadow instructions between the trigger pc and the trap pc twice. 654 * 655 * If a SIGFPE is generated from the OSF1 emulation, back up one more 656 * instruction to the trigger pc itself. Native binaries don't because it 657 * is non-portable and completely defeats the intended purpose of IEEE 658 * traps -- for example, to count the number of exponent wraps for a later 659 * correction. 660 */ 661 trigger_pc = 0; 662 win_begin = pc; 663 ++alpha_shadow.scans; 664 t = alpha_shadow.len; 665 for (--pc; a1; --pc) { 666 ++alpha_shadow.len; 667 if (pc < win_begin) { 668 win_begin = pc - TSWINSIZE + 1; 669 if (copyin(win_begin, tsw, sizeof tsw)) { 670 /* sigh, try to get just one */ 671 win_begin = pc; 672 if (copyin(win_begin, tsw, 4)) 673 return SIGSEGV; 674 } 675 } 676 assert(win_begin <= pc && !((long)pc & 3)); 677 inst = tsw[pc - win_begin]; 678 op_class = 1UL << inst.generic_format.opcode; 679 if (op_class & FPUREG_CLASS) { 680 a1 &= ~(1UL << (inst.operate_generic_format.rc + 32)); 681 trigger_pc = pc; 682 } else if (op_class & CPUREG_CLASS) { 683 a1 &= ~(1UL << inst.operate_generic_format.rc); 684 trigger_pc = pc; 685 } else if (op_class & TRAPSHADOWBOUNDARY) { 686 if (op_class & CHECKFUNCTIONCODE) { 687 if (inst.mem_format.displacement == op_trapb || 688 inst.mem_format.displacement == op_excb) 689 break; /* code breaks AARM rules */ 690 } else 691 break; /* code breaks AARM rules */ 692 } 693 /* Some shadow-safe op, probably load, store, or FPTI class */ 694 } 695 t = alpha_shadow.len - t; 696 if (t > alpha_shadow.max) 697 alpha_shadow.max = t; 698 if (__predict_true(trigger_pc != 0 && a1 == 0)) { 699 ++alpha_shadow.resolved; 700 sig = alpha_fp_complete_at(trigger_pc, l, ucode); 701 } else { 702 ++alpha_shadow.unresolved; 703 return sig; 704 } 705done: 706 if (sig) { 707 usertrap_pc = trigger_pc + 1; 708#ifdef COMPAT_OSF1 709 if (l->l_proc->p_emul == &emul_osf1) 710 usertrap_pc = trigger_pc; 711#endif 712 l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc; 713 return sig; 714 } 715 return 0; 716} 717 718/* 719 * Load the float-point context for the current lwp. 720 */ 721void 722fpu_state_load(struct lwp *l, bool used) 723{ 724 struct pcb * const pcb = lwp_getpcb(l); 725 726 /* 727 * Instrument FP usage -- if a process had not previously 728 * used FP, mark it as having used FP for the first time, 729 * and count this event. 730 * 731 * If a process has used FP, count a "used FP, and took 732 * a trap to use it again" event. 733 */ 734 if (!fpu_used_p(l)) { 735 atomic_inc_ulong(&fpevent_use.ev_count); 736 fpu_mark_used(l); 737 } else 738 atomic_inc_ulong(&fpevent_reuse.ev_count); 739 740 alpha_pal_wrfen(1); 741 restorefpstate(&pcb->pcb_fp); 742 alpha_pal_wrfen(0); 743 744 l->l_md.md_flags |= MDLWP_FPACTIVE; 745} 746 747/* 748 * Save the FPU state. 749 */ 750 751void 752fpu_state_save(struct lwp *l) 753{ 754 struct pcb * const pcb = lwp_getpcb(l); 755 756 alpha_pal_wrfen(1); 757 savefpstate(&pcb->pcb_fp); 758 alpha_pal_wrfen(0); 759} 760 761/* 762 * Release the FPU. 763 */ 764void 765fpu_state_release(struct lwp *l) 766{ 767 l->l_md.md_flags &= ~MDLWP_FPACTIVE; 768} 769