1/* ieee754-df.S double-precision floating point support for ARM 2 3 Copyright (C) 2003-2015 Free Software Foundation, Inc. 4 Contributed by Nicolas Pitre (nico@cam.org) 5 6 This file is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 This file is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 Under Section 7 of GPL version 3, you are granted additional 17 permissions described in the GCC Runtime Library Exception, version 18 3.1, as published by the Free Software Foundation. 19 20 You should have received a copy of the GNU General Public License and 21 a copy of the GCC Runtime Library Exception along with this program; 22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 <http://www.gnu.org/licenses/>. */ 24 25/* 26 * Notes: 27 * 28 * The goal of this code is to be as fast as possible. This is 29 * not meant to be easy to understand for the casual reader. 30 * For slightly simpler code please see the single precision version 31 * of this file. 32 * 33 * Only the default rounding mode is intended for best performances. 34 * Exceptions aren't supported yet, but that can be added quite easily 35 * if necessary without impacting performances. 36 */ 37 38 39#ifndef __ARMEB__ 40#define xl r0 41#define xh r1 42#define yl r2 43#define yh r3 44#else 45#define xh r0 46#define xl r1 47#define yh r2 48#define yl r3 49#endif 50 51 52#ifdef L_arm_negdf2 53 54ARM_FUNC_START negdf2 55ARM_FUNC_ALIAS aeabi_dneg negdf2 56 57 @ flip sign bit 58 eor xh, xh, #0x80000000 59 RET 60 61 FUNC_END aeabi_dneg 62 FUNC_END negdf2 63 64#endif 65 66#ifdef L_arm_addsubdf3 67 68ARM_FUNC_START aeabi_drsub 69 70 eor xh, xh, #0x80000000 @ flip sign bit of first arg 71 b 1f 72 73ARM_FUNC_START subdf3 74ARM_FUNC_ALIAS aeabi_dsub subdf3 75 76 eor yh, yh, #0x80000000 @ flip sign bit of second arg 77#if defined(__INTERWORKING_STUBS__) 78 b 1f @ Skip Thumb-code prologue 79#endif 80 81ARM_FUNC_START adddf3 82ARM_FUNC_ALIAS aeabi_dadd adddf3 83 841: do_push {r4, r5, lr} 85 86 @ Look for zeroes, equal values, INF, or NAN. 87 shift1 lsl, r4, xh, #1 88 shift1 lsl, r5, yh, #1 89 teq r4, r5 90 do_it eq 91 teqeq xl, yl 92 do_it ne, ttt 93 COND(orr,s,ne) ip, r4, xl 94 COND(orr,s,ne) ip, r5, yl 95 COND(mvn,s,ne) ip, r4, asr #21 96 COND(mvn,s,ne) ip, r5, asr #21 97 beq LSYM(Lad_s) 98 99 @ Compute exponent difference. Make largest exponent in r4, 100 @ corresponding arg in xh-xl, and positive exponent difference in r5. 101 shift1 lsr, r4, r4, #21 102 rsbs r5, r4, r5, lsr #21 103 do_it lt 104 rsblt r5, r5, #0 105 ble 1f 106 add r4, r4, r5 107 eor yl, xl, yl 108 eor yh, xh, yh 109 eor xl, yl, xl 110 eor xh, yh, xh 111 eor yl, xl, yl 112 eor yh, xh, yh 1131: 114 @ If exponent difference is too large, return largest argument 115 @ already in xh-xl. We need up to 54 bit to handle proper rounding 116 @ of 0x1p54 - 1.1. 117 cmp r5, #54 118 do_it hi 119 RETLDM "r4, r5" hi 120 121 @ Convert mantissa to signed integer. 122 tst xh, #0x80000000 123 mov xh, xh, lsl #12 124 mov ip, #0x00100000 125 orr xh, ip, xh, lsr #12 126 beq 1f 127#if defined(__thumb2__) 128 negs xl, xl 129 sbc xh, xh, xh, lsl #1 130#else 131 rsbs xl, xl, #0 132 rsc xh, xh, #0 133#endif 1341: 135 tst yh, #0x80000000 136 mov yh, yh, lsl #12 137 orr yh, ip, yh, lsr #12 138 beq 1f 139#if defined(__thumb2__) 140 negs yl, yl 141 sbc yh, yh, yh, lsl #1 142#else 143 rsbs yl, yl, #0 144 rsc yh, yh, #0 145#endif 1461: 147 @ If exponent == difference, one or both args were denormalized. 148 @ Since this is not common case, rescale them off line. 149 teq r4, r5 150 beq LSYM(Lad_d) 151LSYM(Lad_x): 152 153 @ Compensate for the exponent overlapping the mantissa MSB added later 154 sub r4, r4, #1 155 156 @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip. 157 rsbs lr, r5, #32 158 blt 1f 159 shift1 lsl, ip, yl, lr 160 shiftop adds xl xl yl lsr r5 yl 161 adc xh, xh, #0 162 shiftop adds xl xl yh lsl lr yl 163 shiftop adcs xh xh yh asr r5 yh 164 b 2f 1651: sub r5, r5, #32 166 add lr, lr, #32 167 cmp yl, #1 168 shift1 lsl,ip, yh, lr 169 do_it cs 170 orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later 171 shiftop adds xl xl yh asr r5 yh 172 adcs xh, xh, yh, asr #31 1732: 174 @ We now have a result in xh-xl-ip. 175 @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above) 176 and r5, xh, #0x80000000 177 bpl LSYM(Lad_p) 178#if defined(__thumb2__) 179 mov lr, #0 180 negs ip, ip 181 sbcs xl, lr, xl 182 sbc xh, lr, xh 183#else 184 rsbs ip, ip, #0 185 rscs xl, xl, #0 186 rsc xh, xh, #0 187#endif 188 189 @ Determine how to normalize the result. 190LSYM(Lad_p): 191 cmp xh, #0x00100000 192 bcc LSYM(Lad_a) 193 cmp xh, #0x00200000 194 bcc LSYM(Lad_e) 195 196 @ Result needs to be shifted right. 197 movs xh, xh, lsr #1 198 movs xl, xl, rrx 199 mov ip, ip, rrx 200 add r4, r4, #1 201 202 @ Make sure we did not bust our exponent. 203 mov r2, r4, lsl #21 204 cmn r2, #(2 << 21) 205 bcs LSYM(Lad_o) 206 207 @ Our result is now properly aligned into xh-xl, remaining bits in ip. 208 @ Round with MSB of ip. If halfway between two numbers, round towards 209 @ LSB of xl = 0. 210 @ Pack final result together. 211LSYM(Lad_e): 212 cmp ip, #0x80000000 213 do_it eq 214 COND(mov,s,eq) ip, xl, lsr #1 215 adcs xl, xl, #0 216 adc xh, xh, r4, lsl #20 217 orr xh, xh, r5 218 RETLDM "r4, r5" 219 220 @ Result must be shifted left and exponent adjusted. 221LSYM(Lad_a): 222 movs ip, ip, lsl #1 223 adcs xl, xl, xl 224 adc xh, xh, xh 225 tst xh, #0x00100000 226 sub r4, r4, #1 227 bne LSYM(Lad_e) 228 229 @ No rounding necessary since ip will always be 0 at this point. 230LSYM(Lad_l): 231 232#if __ARM_ARCH__ < 5 233 234 teq xh, #0 235 movne r3, #20 236 moveq r3, #52 237 moveq xh, xl 238 moveq xl, #0 239 mov r2, xh 240 cmp r2, #(1 << 16) 241 movhs r2, r2, lsr #16 242 subhs r3, r3, #16 243 cmp r2, #(1 << 8) 244 movhs r2, r2, lsr #8 245 subhs r3, r3, #8 246 cmp r2, #(1 << 4) 247 movhs r2, r2, lsr #4 248 subhs r3, r3, #4 249 cmp r2, #(1 << 2) 250 subhs r3, r3, #2 251 sublo r3, r3, r2, lsr #1 252 sub r3, r3, r2, lsr #3 253 254#else 255 256 teq xh, #0 257 do_it eq, t 258 moveq xh, xl 259 moveq xl, #0 260 clz r3, xh 261 do_it eq 262 addeq r3, r3, #32 263 sub r3, r3, #11 264 265#endif 266 267 @ determine how to shift the value. 268 subs r2, r3, #32 269 bge 2f 270 adds r2, r2, #12 271 ble 1f 272 273 @ shift value left 21 to 31 bits, or actually right 11 to 1 bits 274 @ since a register switch happened above. 275 add ip, r2, #20 276 rsb r2, r2, #12 277 shift1 lsl, xl, xh, ip 278 shift1 lsr, xh, xh, r2 279 b 3f 280 281 @ actually shift value left 1 to 20 bits, which might also represent 282 @ 32 to 52 bits if counting the register switch that happened earlier. 2831: add r2, r2, #20 2842: do_it le 285 rsble ip, r2, #32 286 shift1 lsl, xh, xh, r2 287#if defined(__thumb2__) 288 lsr ip, xl, ip 289 itt le 290 orrle xh, xh, ip 291 lslle xl, xl, r2 292#else 293 orrle xh, xh, xl, lsr ip 294 movle xl, xl, lsl r2 295#endif 296 297 @ adjust exponent accordingly. 2983: subs r4, r4, r3 299 do_it ge, tt 300 addge xh, xh, r4, lsl #20 301 orrge xh, xh, r5 302 RETLDM "r4, r5" ge 303 304 @ Exponent too small, denormalize result. 305 @ Find out proper shift value. 306 mvn r4, r4 307 subs r4, r4, #31 308 bge 2f 309 adds r4, r4, #12 310 bgt 1f 311 312 @ shift result right of 1 to 20 bits, sign is in r5. 313 add r4, r4, #20 314 rsb r2, r4, #32 315 shift1 lsr, xl, xl, r4 316 shiftop orr xl xl xh lsl r2 yh 317 shiftop orr xh r5 xh lsr r4 yh 318 RETLDM "r4, r5" 319 320 @ shift result right of 21 to 31 bits, or left 11 to 1 bits after 321 @ a register switch from xh to xl. 3221: rsb r4, r4, #12 323 rsb r2, r4, #32 324 shift1 lsr, xl, xl, r2 325 shiftop orr xl xl xh lsl r4 yh 326 mov xh, r5 327 RETLDM "r4, r5" 328 329 @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch 330 @ from xh to xl. 3312: shift1 lsr, xl, xh, r4 332 mov xh, r5 333 RETLDM "r4, r5" 334 335 @ Adjust exponents for denormalized arguments. 336 @ Note that r4 must not remain equal to 0. 337LSYM(Lad_d): 338 teq r4, #0 339 eor yh, yh, #0x00100000 340 do_it eq, te 341 eoreq xh, xh, #0x00100000 342 addeq r4, r4, #1 343 subne r5, r5, #1 344 b LSYM(Lad_x) 345 346 347LSYM(Lad_s): 348 mvns ip, r4, asr #21 349 do_it ne 350 COND(mvn,s,ne) ip, r5, asr #21 351 beq LSYM(Lad_i) 352 353 teq r4, r5 354 do_it eq 355 teqeq xl, yl 356 beq 1f 357 358 @ Result is x + 0.0 = x or 0.0 + y = y. 359 orrs ip, r4, xl 360 do_it eq, t 361 moveq xh, yh 362 moveq xl, yl 363 RETLDM "r4, r5" 364 3651: teq xh, yh 366 367 @ Result is x - x = 0. 368 do_it ne, tt 369 movne xh, #0 370 movne xl, #0 371 RETLDM "r4, r5" ne 372 373 @ Result is x + x = 2x. 374 movs ip, r4, lsr #21 375 bne 2f 376 movs xl, xl, lsl #1 377 adcs xh, xh, xh 378 do_it cs 379 orrcs xh, xh, #0x80000000 380 RETLDM "r4, r5" 3812: adds r4, r4, #(2 << 21) 382 do_it cc, t 383 addcc xh, xh, #(1 << 20) 384 RETLDM "r4, r5" cc 385 and r5, xh, #0x80000000 386 387 @ Overflow: return INF. 388LSYM(Lad_o): 389 orr xh, r5, #0x7f000000 390 orr xh, xh, #0x00f00000 391 mov xl, #0 392 RETLDM "r4, r5" 393 394 @ At least one of x or y is INF/NAN. 395 @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) 396 @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) 397 @ if either is NAN: return NAN 398 @ if opposite sign: return NAN 399 @ otherwise return xh-xl (which is INF or -INF) 400LSYM(Lad_i): 401 mvns ip, r4, asr #21 402 do_it ne, te 403 movne xh, yh 404 movne xl, yl 405 COND(mvn,s,eq) ip, r5, asr #21 406 do_it ne, t 407 movne yh, xh 408 movne yl, xl 409 orrs r4, xl, xh, lsl #12 410 do_it eq, te 411 COND(orr,s,eq) r5, yl, yh, lsl #12 412 teqeq xh, yh 413 orrne xh, xh, #0x00080000 @ quiet NAN 414 RETLDM "r4, r5" 415 416 FUNC_END aeabi_dsub 417 FUNC_END subdf3 418 FUNC_END aeabi_dadd 419 FUNC_END adddf3 420 421ARM_FUNC_START floatunsidf 422ARM_FUNC_ALIAS aeabi_ui2d floatunsidf 423 424 teq r0, #0 425 do_it eq, t 426 moveq r1, #0 427 RETc(eq) 428 do_push {r4, r5, lr} 429 mov r4, #0x400 @ initial exponent 430 add r4, r4, #(52-1 - 1) 431 mov r5, #0 @ sign bit is 0 432 .ifnc xl, r0 433 mov xl, r0 434 .endif 435 mov xh, #0 436 b LSYM(Lad_l) 437 438 FUNC_END aeabi_ui2d 439 FUNC_END floatunsidf 440 441ARM_FUNC_START floatsidf 442ARM_FUNC_ALIAS aeabi_i2d floatsidf 443 444 teq r0, #0 445 do_it eq, t 446 moveq r1, #0 447 RETc(eq) 448 do_push {r4, r5, lr} 449 mov r4, #0x400 @ initial exponent 450 add r4, r4, #(52-1 - 1) 451 ands r5, r0, #0x80000000 @ sign bit in r5 452 do_it mi 453 rsbmi r0, r0, #0 @ absolute value 454 .ifnc xl, r0 455 mov xl, r0 456 .endif 457 mov xh, #0 458 b LSYM(Lad_l) 459 460 FUNC_END aeabi_i2d 461 FUNC_END floatsidf 462 463ARM_FUNC_START extendsfdf2 464ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 465 466 movs r2, r0, lsl #1 @ toss sign bit 467 mov xh, r2, asr #3 @ stretch exponent 468 mov xh, xh, rrx @ retrieve sign bit 469 mov xl, r2, lsl #28 @ retrieve remaining bits 470 do_it ne, ttt 471 COND(and,s,ne) r3, r2, #0xff000000 @ isolate exponent 472 teqne r3, #0xff000000 @ if not 0, check if INF or NAN 473 eorne xh, xh, #0x38000000 @ fixup exponent otherwise. 474 RETc(ne) @ and return it. 475 476 teq r2, #0 @ if actually 0 477 do_it ne, e 478 teqne r3, #0xff000000 @ or INF or NAN 479 RETc(eq) @ we are done already. 480 481 @ value was denormalized. We can normalize it now. 482 do_push {r4, r5, lr} 483 mov r4, #0x380 @ setup corresponding exponent 484 and r5, xh, #0x80000000 @ move sign bit in r5 485 bic xh, xh, #0x80000000 486 b LSYM(Lad_l) 487 488 FUNC_END aeabi_f2d 489 FUNC_END extendsfdf2 490 491ARM_FUNC_START floatundidf 492ARM_FUNC_ALIAS aeabi_ul2d floatundidf 493 494 orrs r2, r0, r1 495 do_it eq 496 RETc(eq) 497 498 do_push {r4, r5, lr} 499 500 mov r5, #0 501 b 2f 502 503ARM_FUNC_START floatdidf 504ARM_FUNC_ALIAS aeabi_l2d floatdidf 505 506 orrs r2, r0, r1 507 do_it eq 508 RETc(eq) 509 510 do_push {r4, r5, lr} 511 512 ands r5, ah, #0x80000000 @ sign bit in r5 513 bpl 2f 514#if defined(__thumb2__) 515 negs al, al 516 sbc ah, ah, ah, lsl #1 517#else 518 rsbs al, al, #0 519 rsc ah, ah, #0 520#endif 5212: 522 mov r4, #0x400 @ initial exponent 523 add r4, r4, #(52-1 - 1) 524 525 @ If FP word order does not match integer word order, swap the words. 526 .ifnc xh, ah 527 mov ip, al 528 mov xh, ah 529 mov xl, ip 530 .endif 531 532 movs ip, xh, lsr #22 533 beq LSYM(Lad_p) 534 535 @ The value is too big. Scale it down a bit... 536 mov r2, #3 537 movs ip, ip, lsr #3 538 do_it ne 539 addne r2, r2, #3 540 movs ip, ip, lsr #3 541 do_it ne 542 addne r2, r2, #3 543 add r2, r2, ip, lsr #3 544 545 rsb r3, r2, #32 546 shift1 lsl, ip, xl, r3 547 shift1 lsr, xl, xl, r2 548 shiftop orr xl xl xh lsl r3 lr 549 shift1 lsr, xh, xh, r2 550 add r4, r4, r2 551 b LSYM(Lad_p) 552 553 FUNC_END floatdidf 554 FUNC_END aeabi_l2d 555 FUNC_END floatundidf 556 FUNC_END aeabi_ul2d 557 558#endif /* L_addsubdf3 */ 559 560#ifdef L_arm_muldivdf3 561 562ARM_FUNC_START muldf3 563ARM_FUNC_ALIAS aeabi_dmul muldf3 564 do_push {r4, r5, r6, lr} 565 566 @ Mask out exponents, trap any zero/denormal/INF/NAN. 567 mov ip, #0xff 568 orr ip, ip, #0x700 569 ands r4, ip, xh, lsr #20 570 do_it ne, tte 571 COND(and,s,ne) r5, ip, yh, lsr #20 572 teqne r4, ip 573 teqne r5, ip 574 bleq LSYM(Lml_s) 575 576 @ Add exponents together 577 add r4, r4, r5 578 579 @ Determine final sign. 580 eor r6, xh, yh 581 582 @ Convert mantissa to unsigned integer. 583 @ If power of two, branch to a separate path. 584 bic xh, xh, ip, lsl #21 585 bic yh, yh, ip, lsl #21 586 orrs r5, xl, xh, lsl #12 587 do_it ne 588 COND(orr,s,ne) r5, yl, yh, lsl #12 589 orr xh, xh, #0x00100000 590 orr yh, yh, #0x00100000 591 beq LSYM(Lml_1) 592 593#if __ARM_ARCH__ < 4 594 595 @ Put sign bit in r6, which will be restored in yl later. 596 and r6, r6, #0x80000000 597 598 @ Well, no way to make it shorter without the umull instruction. 599 stmfd sp!, {r6, r7, r8, r9, sl, fp} 600 mov r7, xl, lsr #16 601 mov r8, yl, lsr #16 602 mov r9, xh, lsr #16 603 mov sl, yh, lsr #16 604 bic xl, xl, r7, lsl #16 605 bic yl, yl, r8, lsl #16 606 bic xh, xh, r9, lsl #16 607 bic yh, yh, sl, lsl #16 608 mul ip, xl, yl 609 mul fp, xl, r8 610 mov lr, #0 611 adds ip, ip, fp, lsl #16 612 adc lr, lr, fp, lsr #16 613 mul fp, r7, yl 614 adds ip, ip, fp, lsl #16 615 adc lr, lr, fp, lsr #16 616 mul fp, xl, sl 617 mov r5, #0 618 adds lr, lr, fp, lsl #16 619 adc r5, r5, fp, lsr #16 620 mul fp, r7, yh 621 adds lr, lr, fp, lsl #16 622 adc r5, r5, fp, lsr #16 623 mul fp, xh, r8 624 adds lr, lr, fp, lsl #16 625 adc r5, r5, fp, lsr #16 626 mul fp, r9, yl 627 adds lr, lr, fp, lsl #16 628 adc r5, r5, fp, lsr #16 629 mul fp, xh, sl 630 mul r6, r9, sl 631 adds r5, r5, fp, lsl #16 632 adc r6, r6, fp, lsr #16 633 mul fp, r9, yh 634 adds r5, r5, fp, lsl #16 635 adc r6, r6, fp, lsr #16 636 mul fp, xl, yh 637 adds lr, lr, fp 638 mul fp, r7, sl 639 adcs r5, r5, fp 640 mul fp, xh, yl 641 adc r6, r6, #0 642 adds lr, lr, fp 643 mul fp, r9, r8 644 adcs r5, r5, fp 645 mul fp, r7, r8 646 adc r6, r6, #0 647 adds lr, lr, fp 648 mul fp, xh, yh 649 adcs r5, r5, fp 650 adc r6, r6, #0 651 ldmfd sp!, {yl, r7, r8, r9, sl, fp} 652 653#else 654 655 @ Here is the actual multiplication. 656 umull ip, lr, xl, yl 657 mov r5, #0 658 umlal lr, r5, xh, yl 659 and yl, r6, #0x80000000 660 umlal lr, r5, xl, yh 661 mov r6, #0 662 umlal r5, r6, xh, yh 663 664#endif 665 666 @ The LSBs in ip are only significant for the final rounding. 667 @ Fold them into lr. 668 teq ip, #0 669 do_it ne 670 orrne lr, lr, #1 671 672 @ Adjust result upon the MSB position. 673 sub r4, r4, #0xff 674 cmp r6, #(1 << (20-11)) 675 sbc r4, r4, #0x300 676 bcs 1f 677 movs lr, lr, lsl #1 678 adcs r5, r5, r5 679 adc r6, r6, r6 6801: 681 @ Shift to final position, add sign to result. 682 orr xh, yl, r6, lsl #11 683 orr xh, xh, r5, lsr #21 684 mov xl, r5, lsl #11 685 orr xl, xl, lr, lsr #21 686 mov lr, lr, lsl #11 687 688 @ Check exponent range for under/overflow. 689 subs ip, r4, #(254 - 1) 690 do_it hi 691 cmphi ip, #0x700 692 bhi LSYM(Lml_u) 693 694 @ Round the result, merge final exponent. 695 cmp lr, #0x80000000 696 do_it eq 697 COND(mov,s,eq) lr, xl, lsr #1 698 adcs xl, xl, #0 699 adc xh, xh, r4, lsl #20 700 RETLDM "r4, r5, r6" 701 702 @ Multiplication by 0x1p*: let''s shortcut a lot of code. 703LSYM(Lml_1): 704 and r6, r6, #0x80000000 705 orr xh, r6, xh 706 orr xl, xl, yl 707 eor xh, xh, yh 708 subs r4, r4, ip, lsr #1 709 do_it gt, tt 710 COND(rsb,s,gt) r5, r4, ip 711 orrgt xh, xh, r4, lsl #20 712 RETLDM "r4, r5, r6" gt 713 714 @ Under/overflow: fix things up for the code below. 715 orr xh, xh, #0x00100000 716 mov lr, #0 717 subs r4, r4, #1 718 719LSYM(Lml_u): 720 @ Overflow? 721 bgt LSYM(Lml_o) 722 723 @ Check if denormalized result is possible, otherwise return signed 0. 724 cmn r4, #(53 + 1) 725 do_it le, tt 726 movle xl, #0 727 bicle xh, xh, #0x7fffffff 728 RETLDM "r4, r5, r6" le 729 730 @ Find out proper shift value. 731 rsb r4, r4, #0 732 subs r4, r4, #32 733 bge 2f 734 adds r4, r4, #12 735 bgt 1f 736 737 @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. 738 add r4, r4, #20 739 rsb r5, r4, #32 740 shift1 lsl, r3, xl, r5 741 shift1 lsr, xl, xl, r4 742 shiftop orr xl xl xh lsl r5 r2 743 and r2, xh, #0x80000000 744 bic xh, xh, #0x80000000 745 adds xl, xl, r3, lsr #31 746 shiftop adc xh r2 xh lsr r4 r6 747 orrs lr, lr, r3, lsl #1 748 do_it eq 749 biceq xl, xl, r3, lsr #31 750 RETLDM "r4, r5, r6" 751 752 @ shift result right of 21 to 31 bits, or left 11 to 1 bits after 753 @ a register switch from xh to xl. Then round. 7541: rsb r4, r4, #12 755 rsb r5, r4, #32 756 shift1 lsl, r3, xl, r4 757 shift1 lsr, xl, xl, r5 758 shiftop orr xl xl xh lsl r4 r2 759 bic xh, xh, #0x7fffffff 760 adds xl, xl, r3, lsr #31 761 adc xh, xh, #0 762 orrs lr, lr, r3, lsl #1 763 do_it eq 764 biceq xl, xl, r3, lsr #31 765 RETLDM "r4, r5, r6" 766 767 @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch 768 @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. 7692: rsb r5, r4, #32 770 shiftop orr lr lr xl lsl r5 r2 771 shift1 lsr, r3, xl, r4 772 shiftop orr r3 r3 xh lsl r5 r2 773 shift1 lsr, xl, xh, r4 774 bic xh, xh, #0x7fffffff 775 shiftop bic xl xl xh lsr r4 r2 776 add xl, xl, r3, lsr #31 777 orrs lr, lr, r3, lsl #1 778 do_it eq 779 biceq xl, xl, r3, lsr #31 780 RETLDM "r4, r5, r6" 781 782 @ One or both arguments are denormalized. 783 @ Scale them leftwards and preserve sign bit. 784LSYM(Lml_d): 785 teq r4, #0 786 bne 2f 787 and r6, xh, #0x80000000 7881: movs xl, xl, lsl #1 789 adc xh, xh, xh 790 tst xh, #0x00100000 791 do_it eq 792 subeq r4, r4, #1 793 beq 1b 794 orr xh, xh, r6 795 teq r5, #0 796 do_it ne 797 RETc(ne) 7982: and r6, yh, #0x80000000 7993: movs yl, yl, lsl #1 800 adc yh, yh, yh 801 tst yh, #0x00100000 802 do_it eq 803 subeq r5, r5, #1 804 beq 3b 805 orr yh, yh, r6 806 RET 807 808LSYM(Lml_s): 809 @ Isolate the INF and NAN cases away 810 teq r4, ip 811 and r5, ip, yh, lsr #20 812 do_it ne 813 teqne r5, ip 814 beq 1f 815 816 @ Here, one or more arguments are either denormalized or zero. 817 orrs r6, xl, xh, lsl #1 818 do_it ne 819 COND(orr,s,ne) r6, yl, yh, lsl #1 820 bne LSYM(Lml_d) 821 822 @ Result is 0, but determine sign anyway. 823LSYM(Lml_z): 824 eor xh, xh, yh 825 and xh, xh, #0x80000000 826 mov xl, #0 827 RETLDM "r4, r5, r6" 828 8291: @ One or both args are INF or NAN. 830 orrs r6, xl, xh, lsl #1 831 do_it eq, te 832 moveq xl, yl 833 moveq xh, yh 834 COND(orr,s,ne) r6, yl, yh, lsl #1 835 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN 836 teq r4, ip 837 bne 1f 838 orrs r6, xl, xh, lsl #12 839 bne LSYM(Lml_n) @ NAN * <anything> -> NAN 8401: teq r5, ip 841 bne LSYM(Lml_i) 842 orrs r6, yl, yh, lsl #12 843 do_it ne, t 844 movne xl, yl 845 movne xh, yh 846 bne LSYM(Lml_n) @ <anything> * NAN -> NAN 847 848 @ Result is INF, but we need to determine its sign. 849LSYM(Lml_i): 850 eor xh, xh, yh 851 852 @ Overflow: return INF (sign already in xh). 853LSYM(Lml_o): 854 and xh, xh, #0x80000000 855 orr xh, xh, #0x7f000000 856 orr xh, xh, #0x00f00000 857 mov xl, #0 858 RETLDM "r4, r5, r6" 859 860 @ Return a quiet NAN. 861LSYM(Lml_n): 862 orr xh, xh, #0x7f000000 863 orr xh, xh, #0x00f80000 864 RETLDM "r4, r5, r6" 865 866 FUNC_END aeabi_dmul 867 FUNC_END muldf3 868 869ARM_FUNC_START divdf3 870ARM_FUNC_ALIAS aeabi_ddiv divdf3 871 872 do_push {r4, r5, r6, lr} 873 874 @ Mask out exponents, trap any zero/denormal/INF/NAN. 875 mov ip, #0xff 876 orr ip, ip, #0x700 877 ands r4, ip, xh, lsr #20 878 do_it ne, tte 879 COND(and,s,ne) r5, ip, yh, lsr #20 880 teqne r4, ip 881 teqne r5, ip 882 bleq LSYM(Ldv_s) 883 884 @ Subtract divisor exponent from dividend''s. 885 sub r4, r4, r5 886 887 @ Preserve final sign into lr. 888 eor lr, xh, yh 889 890 @ Convert mantissa to unsigned integer. 891 @ Dividend -> r5-r6, divisor -> yh-yl. 892 orrs r5, yl, yh, lsl #12 893 mov xh, xh, lsl #12 894 beq LSYM(Ldv_1) 895 mov yh, yh, lsl #12 896 mov r5, #0x10000000 897 orr yh, r5, yh, lsr #4 898 orr yh, yh, yl, lsr #24 899 mov yl, yl, lsl #8 900 orr r5, r5, xh, lsr #4 901 orr r5, r5, xl, lsr #24 902 mov r6, xl, lsl #8 903 904 @ Initialize xh with final sign bit. 905 and xh, lr, #0x80000000 906 907 @ Ensure result will land to known bit position. 908 @ Apply exponent bias accordingly. 909 cmp r5, yh 910 do_it eq 911 cmpeq r6, yl 912 adc r4, r4, #(255 - 2) 913 add r4, r4, #0x300 914 bcs 1f 915 movs yh, yh, lsr #1 916 mov yl, yl, rrx 9171: 918 @ Perform first subtraction to align result to a nibble. 919 subs r6, r6, yl 920 sbc r5, r5, yh 921 movs yh, yh, lsr #1 922 mov yl, yl, rrx 923 mov xl, #0x00100000 924 mov ip, #0x00080000 925 926 @ The actual division loop. 9271: subs lr, r6, yl 928 sbcs lr, r5, yh 929 do_it cs, tt 930 subcs r6, r6, yl 931 movcs r5, lr 932 orrcs xl, xl, ip 933 movs yh, yh, lsr #1 934 mov yl, yl, rrx 935 subs lr, r6, yl 936 sbcs lr, r5, yh 937 do_it cs, tt 938 subcs r6, r6, yl 939 movcs r5, lr 940 orrcs xl, xl, ip, lsr #1 941 movs yh, yh, lsr #1 942 mov yl, yl, rrx 943 subs lr, r6, yl 944 sbcs lr, r5, yh 945 do_it cs, tt 946 subcs r6, r6, yl 947 movcs r5, lr 948 orrcs xl, xl, ip, lsr #2 949 movs yh, yh, lsr #1 950 mov yl, yl, rrx 951 subs lr, r6, yl 952 sbcs lr, r5, yh 953 do_it cs, tt 954 subcs r6, r6, yl 955 movcs r5, lr 956 orrcs xl, xl, ip, lsr #3 957 958 orrs lr, r5, r6 959 beq 2f 960 mov r5, r5, lsl #4 961 orr r5, r5, r6, lsr #28 962 mov r6, r6, lsl #4 963 mov yh, yh, lsl #3 964 orr yh, yh, yl, lsr #29 965 mov yl, yl, lsl #3 966 movs ip, ip, lsr #4 967 bne 1b 968 969 @ We are done with a word of the result. 970 @ Loop again for the low word if this pass was for the high word. 971 tst xh, #0x00100000 972 bne 3f 973 orr xh, xh, xl 974 mov xl, #0 975 mov ip, #0x80000000 976 b 1b 9772: 978 @ Be sure result starts in the high word. 979 tst xh, #0x00100000 980 do_it eq, t 981 orreq xh, xh, xl 982 moveq xl, #0 9833: 984 @ Check exponent range for under/overflow. 985 subs ip, r4, #(254 - 1) 986 do_it hi 987 cmphi ip, #0x700 988 bhi LSYM(Lml_u) 989 990 @ Round the result, merge final exponent. 991 subs ip, r5, yh 992 do_it eq, t 993 COND(sub,s,eq) ip, r6, yl 994 COND(mov,s,eq) ip, xl, lsr #1 995 adcs xl, xl, #0 996 adc xh, xh, r4, lsl #20 997 RETLDM "r4, r5, r6" 998 999 @ Division by 0x1p*: shortcut a lot of code. 1000LSYM(Ldv_1): 1001 and lr, lr, #0x80000000 1002 orr xh, lr, xh, lsr #12 1003 adds r4, r4, ip, lsr #1 1004 do_it gt, tt 1005 COND(rsb,s,gt) r5, r4, ip 1006 orrgt xh, xh, r4, lsl #20 1007 RETLDM "r4, r5, r6" gt 1008 1009 orr xh, xh, #0x00100000 1010 mov lr, #0 1011 subs r4, r4, #1 1012 b LSYM(Lml_u) 1013 1014 @ Result mightt need to be denormalized: put remainder bits 1015 @ in lr for rounding considerations. 1016LSYM(Ldv_u): 1017 orr lr, r5, r6 1018 b LSYM(Lml_u) 1019 1020 @ One or both arguments is either INF, NAN or zero. 1021LSYM(Ldv_s): 1022 and r5, ip, yh, lsr #20 1023 teq r4, ip 1024 do_it eq 1025 teqeq r5, ip 1026 beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN 1027 teq r4, ip 1028 bne 1f 1029 orrs r4, xl, xh, lsl #12 1030 bne LSYM(Lml_n) @ NAN / <anything> -> NAN 1031 teq r5, ip 1032 bne LSYM(Lml_i) @ INF / <anything> -> INF 1033 mov xl, yl 1034 mov xh, yh 1035 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN 10361: teq r5, ip 1037 bne 2f 1038 orrs r5, yl, yh, lsl #12 1039 beq LSYM(Lml_z) @ <anything> / INF -> 0 1040 mov xl, yl 1041 mov xh, yh 1042 b LSYM(Lml_n) @ <anything> / NAN -> NAN 10432: @ If both are nonzero, we need to normalize and resume above. 1044 orrs r6, xl, xh, lsl #1 1045 do_it ne 1046 COND(orr,s,ne) r6, yl, yh, lsl #1 1047 bne LSYM(Lml_d) 1048 @ One or both arguments are 0. 1049 orrs r4, xl, xh, lsl #1 1050 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF 1051 orrs r5, yl, yh, lsl #1 1052 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 1053 b LSYM(Lml_n) @ 0 / 0 -> NAN 1054 1055 FUNC_END aeabi_ddiv 1056 FUNC_END divdf3 1057 1058#endif /* L_muldivdf3 */ 1059 1060#ifdef L_arm_cmpdf2 1061 1062@ Note: only r0 (return value) and ip are clobbered here. 1063 1064ARM_FUNC_START gtdf2 1065ARM_FUNC_ALIAS gedf2 gtdf2 1066 mov ip, #-1 1067 b 1f 1068 1069ARM_FUNC_START ltdf2 1070ARM_FUNC_ALIAS ledf2 ltdf2 1071 mov ip, #1 1072 b 1f 1073 1074ARM_FUNC_START cmpdf2 1075ARM_FUNC_ALIAS nedf2 cmpdf2 1076ARM_FUNC_ALIAS eqdf2 cmpdf2 1077 mov ip, #1 @ how should we specify unordered here? 1078 10791: str ip, [sp, #-4]! 1080 1081 @ Trap any INF/NAN first. 1082 mov ip, xh, lsl #1 1083 mvns ip, ip, asr #21 1084 mov ip, yh, lsl #1 1085 do_it ne 1086 COND(mvn,s,ne) ip, ip, asr #21 1087 beq 3f 1088 1089 @ Test for equality. 1090 @ Note that 0.0 is equal to -0.0. 10912: add sp, sp, #4 1092 orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 1093 do_it eq, e 1094 COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 1095 teqne xh, yh @ or xh == yh 1096 do_it eq, tt 1097 teqeq xl, yl @ and xl == yl 1098 moveq r0, #0 @ then equal. 1099 RETc(eq) 1100 1101 @ Clear C flag 1102 cmn r0, #0 1103 1104 @ Compare sign, 1105 teq xh, yh 1106 1107 @ Compare values if same sign 1108 do_it pl 1109 cmppl xh, yh 1110 do_it eq 1111 cmpeq xl, yl 1112 1113 @ Result: 1114 do_it cs, e 1115 movcs r0, yh, asr #31 1116 mvncc r0, yh, asr #31 1117 orr r0, r0, #1 1118 RET 1119 1120 @ Look for a NAN. 11213: mov ip, xh, lsl #1 1122 mvns ip, ip, asr #21 1123 bne 4f 1124 orrs ip, xl, xh, lsl #12 1125 bne 5f @ x is NAN 11264: mov ip, yh, lsl #1 1127 mvns ip, ip, asr #21 1128 bne 2b 1129 orrs ip, yl, yh, lsl #12 1130 beq 2b @ y is not NAN 11315: ldr r0, [sp], #4 @ unordered return code 1132 RET 1133 1134 FUNC_END gedf2 1135 FUNC_END gtdf2 1136 FUNC_END ledf2 1137 FUNC_END ltdf2 1138 FUNC_END nedf2 1139 FUNC_END eqdf2 1140 FUNC_END cmpdf2 1141 1142ARM_FUNC_START aeabi_cdrcmple 1143 1144 mov ip, r0 1145 mov r0, r2 1146 mov r2, ip 1147 mov ip, r1 1148 mov r1, r3 1149 mov r3, ip 1150 b 6f 1151 1152ARM_FUNC_START aeabi_cdcmpeq 1153ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq 1154 1155 @ The status-returning routines are required to preserve all 1156 @ registers except ip, lr, and cpsr. 11576: do_push {r0, lr} 1158 ARM_CALL cmpdf2 1159 @ Set the Z flag correctly, and the C flag unconditionally. 1160 cmp r0, #0 1161 @ Clear the C flag if the return value was -1, indicating 1162 @ that the first operand was smaller than the second. 1163 do_it mi 1164 cmnmi r0, #0 1165 RETLDM "r0" 1166 1167 FUNC_END aeabi_cdcmple 1168 FUNC_END aeabi_cdcmpeq 1169 FUNC_END aeabi_cdrcmple 1170 1171ARM_FUNC_START aeabi_dcmpeq 1172 1173 str lr, [sp, #-8]! 1174 ARM_CALL aeabi_cdcmple 1175 do_it eq, e 1176 moveq r0, #1 @ Equal to. 1177 movne r0, #0 @ Less than, greater than, or unordered. 1178 RETLDM 1179 1180 FUNC_END aeabi_dcmpeq 1181 1182ARM_FUNC_START aeabi_dcmplt 1183 1184 str lr, [sp, #-8]! 1185 ARM_CALL aeabi_cdcmple 1186 do_it cc, e 1187 movcc r0, #1 @ Less than. 1188 movcs r0, #0 @ Equal to, greater than, or unordered. 1189 RETLDM 1190 1191 FUNC_END aeabi_dcmplt 1192 1193ARM_FUNC_START aeabi_dcmple 1194 1195 str lr, [sp, #-8]! 1196 ARM_CALL aeabi_cdcmple 1197 do_it ls, e 1198 movls r0, #1 @ Less than or equal to. 1199 movhi r0, #0 @ Greater than or unordered. 1200 RETLDM 1201 1202 FUNC_END aeabi_dcmple 1203 1204ARM_FUNC_START aeabi_dcmpge 1205 1206 str lr, [sp, #-8]! 1207 ARM_CALL aeabi_cdrcmple 1208 do_it ls, e 1209 movls r0, #1 @ Operand 2 is less than or equal to operand 1. 1210 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. 1211 RETLDM 1212 1213 FUNC_END aeabi_dcmpge 1214 1215ARM_FUNC_START aeabi_dcmpgt 1216 1217 str lr, [sp, #-8]! 1218 ARM_CALL aeabi_cdrcmple 1219 do_it cc, e 1220 movcc r0, #1 @ Operand 2 is less than operand 1. 1221 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, 1222 @ or they are unordered. 1223 RETLDM 1224 1225 FUNC_END aeabi_dcmpgt 1226 1227#endif /* L_cmpdf2 */ 1228 1229#ifdef L_arm_unorddf2 1230 1231ARM_FUNC_START unorddf2 1232ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 1233 1234 mov ip, xh, lsl #1 1235 mvns ip, ip, asr #21 1236 bne 1f 1237 orrs ip, xl, xh, lsl #12 1238 bne 3f @ x is NAN 12391: mov ip, yh, lsl #1 1240 mvns ip, ip, asr #21 1241 bne 2f 1242 orrs ip, yl, yh, lsl #12 1243 bne 3f @ y is NAN 12442: mov r0, #0 @ arguments are ordered. 1245 RET 1246 12473: mov r0, #1 @ arguments are unordered. 1248 RET 1249 1250 FUNC_END aeabi_dcmpun 1251 FUNC_END unorddf2 1252 1253#endif /* L_unorddf2 */ 1254 1255#ifdef L_arm_fixdfsi 1256 1257ARM_FUNC_START fixdfsi 1258ARM_FUNC_ALIAS aeabi_d2iz fixdfsi 1259 1260 @ check exponent range. 1261 mov r2, xh, lsl #1 1262 adds r2, r2, #(1 << 21) 1263 bcs 2f @ value is INF or NAN 1264 bpl 1f @ value is too small 1265 mov r3, #(0xfffffc00 + 31) 1266 subs r2, r3, r2, asr #21 1267 bls 3f @ value is too large 1268 1269 @ scale value 1270 mov r3, xh, lsl #11 1271 orr r3, r3, #0x80000000 1272 orr r3, r3, xl, lsr #21 1273 tst xh, #0x80000000 @ the sign bit 1274 shift1 lsr, r0, r3, r2 1275 do_it ne 1276 rsbne r0, r0, #0 1277 RET 1278 12791: mov r0, #0 1280 RET 1281 12822: orrs xl, xl, xh, lsl #12 1283 bne 4f @ x is NAN. 12843: ands r0, xh, #0x80000000 @ the sign bit 1285 do_it eq 1286 moveq r0, #0x7fffffff @ maximum signed positive si 1287 RET 1288 12894: mov r0, #0 @ How should we convert NAN? 1290 RET 1291 1292 FUNC_END aeabi_d2iz 1293 FUNC_END fixdfsi 1294 1295#endif /* L_fixdfsi */ 1296 1297#ifdef L_arm_fixunsdfsi 1298 1299ARM_FUNC_START fixunsdfsi 1300ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi 1301 1302 @ check exponent range. 1303 movs r2, xh, lsl #1 1304 bcs 1f @ value is negative 1305 adds r2, r2, #(1 << 21) 1306 bcs 2f @ value is INF or NAN 1307 bpl 1f @ value is too small 1308 mov r3, #(0xfffffc00 + 31) 1309 subs r2, r3, r2, asr #21 1310 bmi 3f @ value is too large 1311 1312 @ scale value 1313 mov r3, xh, lsl #11 1314 orr r3, r3, #0x80000000 1315 orr r3, r3, xl, lsr #21 1316 shift1 lsr, r0, r3, r2 1317 RET 1318 13191: mov r0, #0 1320 RET 1321 13222: orrs xl, xl, xh, lsl #12 1323 bne 4f @ value is NAN. 13243: mov r0, #0xffffffff @ maximum unsigned si 1325 RET 1326 13274: mov r0, #0 @ How should we convert NAN? 1328 RET 1329 1330 FUNC_END aeabi_d2uiz 1331 FUNC_END fixunsdfsi 1332 1333#endif /* L_fixunsdfsi */ 1334 1335#ifdef L_arm_truncdfsf2 1336 1337ARM_FUNC_START truncdfsf2 1338ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 1339 1340 @ check exponent range. 1341 mov r2, xh, lsl #1 1342 subs r3, r2, #((1023 - 127) << 21) 1343 do_it cs, t 1344 COND(sub,s,cs) ip, r3, #(1 << 21) 1345 COND(rsb,s,cs) ip, ip, #(254 << 21) 1346 bls 2f @ value is out of range 1347 13481: @ shift and round mantissa 1349 and ip, xh, #0x80000000 1350 mov r2, xl, lsl #3 1351 orr xl, ip, xl, lsr #29 1352 cmp r2, #0x80000000 1353 adc r0, xl, r3, lsl #2 1354 do_it eq 1355 biceq r0, r0, #1 1356 RET 1357 13582: @ either overflow or underflow 1359 tst xh, #0x40000000 1360 bne 3f @ overflow 1361 1362 @ check if denormalized value is possible 1363 adds r2, r3, #(23 << 21) 1364 do_it lt, t 1365 andlt r0, xh, #0x80000000 @ too small, return signed 0. 1366 RETc(lt) 1367 1368 @ denormalize value so we can resume with the code above afterwards. 1369 orr xh, xh, #0x00100000 1370 mov r2, r2, lsr #21 1371 rsb r2, r2, #24 1372 rsb ip, r2, #32 1373#if defined(__thumb2__) 1374 lsls r3, xl, ip 1375#else 1376 movs r3, xl, lsl ip 1377#endif 1378 shift1 lsr, xl, xl, r2 1379 do_it ne 1380 orrne xl, xl, #1 @ fold r3 for rounding considerations. 1381 mov r3, xh, lsl #11 1382 mov r3, r3, lsr #11 1383 shiftop orr xl xl r3 lsl ip ip 1384 shift1 lsr, r3, r3, r2 1385 mov r3, r3, lsl #1 1386 b 1b 1387 13883: @ chech for NAN 1389 mvns r3, r2, asr #21 1390 bne 5f @ simple overflow 1391 orrs r3, xl, xh, lsl #12 1392 do_it ne, tt 1393 movne r0, #0x7f000000 1394 orrne r0, r0, #0x00c00000 1395 RETc(ne) @ return NAN 1396 13975: @ return INF with sign 1398 and r0, xh, #0x80000000 1399 orr r0, r0, #0x7f000000 1400 orr r0, r0, #0x00800000 1401 RET 1402 1403 FUNC_END aeabi_d2f 1404 FUNC_END truncdfsf2 1405 1406#endif /* L_truncdfsf2 */ 1407