1@ libgcc1 routines for ARM cpu. 2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) 3 4/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc. 5 6This file is free software; you can redistribute it and/or modify it 7under the terms of the GNU General Public License as published by the 8Free Software Foundation; either version 2, or (at your option) any 9later version. 10 11In addition to the permissions in the GNU General Public License, the 12Free Software Foundation gives you unlimited permission to link the 13compiled version of this file with other programs, and to distribute 14those programs without any restriction coming from the use of this 15file. (The General Public License restrictions do apply in other 16respects; for example, they cover modification of the file, and 17distribution when not linked into another program.) 18 19This file is distributed in the hope that it will be useful, but 20WITHOUT ANY WARRANTY; without even the implied warranty of 21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22General Public License for more details. 23 24You should have received a copy of the GNU General Public License 25along with this program; see the file COPYING. If not, write to 26the Free Software Foundation, 59 Temple Place - Suite 330, 27Boston, MA 02111-1307, USA. */ 28 29/* As a special exception, if you link this library with other files, 30 some of which are compiled with GCC, to produce an executable, 31 this library does not by itself cause the resulting executable 32 to be covered by the GNU General Public License. 33 This exception does not however invalidate any other reasons why 34 the executable file might be covered by the GNU General Public License. */ 35 36 .code 16 37 38#ifndef __USER_LABEL_PREFIX__ 39#error __USER_LABEL_PREFIX__ not defined 40#endif 41 42#ifdef __elf__ 43#define __PLT__ (PLT) 44#define TYPE(x) .type SYM(x),function 45#define SIZE(x) .size SYM(x), . - SYM(x) 46#else 47#define __PLT__ 48#define TYPE(x) 49#define SIZE(x) 50#endif 51 52#define RET mov pc, lr 53 54/* ANSI concatenation macros. */ 55 56#define CONCAT1(a, b) CONCAT2(a, b) 57#define CONCAT2(a, b) a ## b 58 59/* Use the right prefix for global labels. */ 60 61#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) 62 63work .req r4 @ XXXX is this safe ? 64 65#ifdef L_udivsi3 66 67dividend .req r0 68divisor .req r1 69result .req r2 70curbit .req r3 71ip .req r12 72sp .req r13 73lr .req r14 74pc .req r15 75 76 .text 77 .globl SYM (__udivsi3) 78 TYPE (__udivsi3) 79 .align 0 80 .thumb_func 81SYM (__udivsi3): 82 cmp divisor, #0 83 beq Ldiv0 84 mov curbit, #1 85 mov result, #0 86 87 push { work } 88 cmp dividend, divisor 89 bcc Lgot_result 90 91 @ Load the constant 0x10000000 into our work register 92 mov work, #1 93 lsl work, #28 94Loop1: 95 @ Unless the divisor is very big, shift it up in multiples of 96 @ four bits, since this is the amount of unwinding in the main 97 @ division loop. Continue shifting until the divisor is 98 @ larger than the dividend. 99 cmp divisor, work 100 bcs Lbignum 101 cmp divisor, dividend 102 bcs Lbignum 103 lsl divisor, #4 104 lsl curbit, #4 105 b Loop1 106 107Lbignum: 108 @ Set work to 0x80000000 109 lsl work, #3 110Loop2: 111 @ For very big divisors, we must shift it a bit at a time, or 112 @ we will be in danger of overflowing. 113 cmp divisor, work 114 bcs Loop3 115 cmp divisor, dividend 116 bcs Loop3 117 lsl divisor, #1 118 lsl curbit, #1 119 b Loop2 120 121Loop3: 122 @ Test for possible subtractions, and note which bits 123 @ are done in the result. On the final pass, this may subtract 124 @ too much from the dividend, but the result will be ok, since the 125 @ "bit" will have been shifted out at the bottom. 126 cmp dividend, divisor 127 bcc Over1 128 sub dividend, dividend, divisor 129 orr result, result, curbit 130Over1: 131 lsr work, divisor, #1 132 cmp dividend, work 133 bcc Over2 134 sub dividend, dividend, work 135 lsr work, curbit, #1 136 orr result, work 137Over2: 138 lsr work, divisor, #2 139 cmp dividend, work 140 bcc Over3 141 sub dividend, dividend, work 142 lsr work, curbit, #2 143 orr result, work 144Over3: 145 lsr work, divisor, #3 146 cmp dividend, work 147 bcc Over4 148 sub dividend, dividend, work 149 lsr work, curbit, #3 150 orr result, work 151Over4: 152 cmp dividend, #0 @ Early termination? 153 beq Lgot_result 154 lsr curbit, #4 @ No, any more bits to do? 155 beq Lgot_result 156 lsr divisor, #4 157 b Loop3 158Lgot_result: 159 mov r0, result 160 pop { work } 161 RET 162 163Ldiv0: 164 push { lr } 165 bl SYM (__div0) __PLT__ 166 mov r0, #0 @ about as wrong as it could be 167 pop { pc } 168 169 SIZE (__udivsi3) 170 171#endif /* L_udivsi3 */ 172 173#ifdef L_umodsi3 174 175dividend .req r0 176divisor .req r1 177overdone .req r2 178curbit .req r3 179ip .req r12 180sp .req r13 181lr .req r14 182pc .req r15 183 184 .text 185 .globl SYM (__umodsi3) 186 TYPE (__umodsi3) 187 .align 0 188 .thumb_func 189SYM (__umodsi3): 190 cmp divisor, #0 191 beq Ldiv0 192 mov curbit, #1 193 cmp dividend, divisor 194 bcs Over1 195 RET 196 197Over1: 198 @ Load the constant 0x10000000 into our work register 199 push { work } 200 mov work, #1 201 lsl work, #28 202Loop1: 203 @ Unless the divisor is very big, shift it up in multiples of 204 @ four bits, since this is the amount of unwinding in the main 205 @ division loop. Continue shifting until the divisor is 206 @ larger than the dividend. 207 cmp divisor, work 208 bcs Lbignum 209 cmp divisor, dividend 210 bcs Lbignum 211 lsl divisor, #4 212 lsl curbit, #4 213 b Loop1 214 215Lbignum: 216 @ Set work to 0x80000000 217 lsl work, #3 218Loop2: 219 @ For very big divisors, we must shift it a bit at a time, or 220 @ we will be in danger of overflowing. 221 cmp divisor, work 222 bcs Loop3 223 cmp divisor, dividend 224 bcs Loop3 225 lsl divisor, #1 226 lsl curbit, #1 227 b Loop2 228 229Loop3: 230 @ Test for possible subtractions. On the final pass, this may 231 @ subtract too much from the dividend, so keep track of which 232 @ subtractions are done, we can fix them up afterwards... 233 mov overdone, #0 234 cmp dividend, divisor 235 bcc Over2 236 sub dividend, dividend, divisor 237Over2: 238 lsr work, divisor, #1 239 cmp dividend, work 240 bcc Over3 241 sub dividend, dividend, work 242 mov ip, curbit 243 mov work, #1 244 ror curbit, work 245 orr overdone, curbit 246 mov curbit, ip 247Over3: 248 lsr work, divisor, #2 249 cmp dividend, work 250 bcc Over4 251 sub dividend, dividend, work 252 mov ip, curbit 253 mov work, #2 254 ror curbit, work 255 orr overdone, curbit 256 mov curbit, ip 257Over4: 258 lsr work, divisor, #3 259 cmp dividend, work 260 bcc Over5 261 sub dividend, dividend, work 262 mov ip, curbit 263 mov work, #3 264 ror curbit, work 265 orr overdone, curbit 266 mov curbit, ip 267Over5: 268 mov ip, curbit 269 cmp dividend, #0 @ Early termination? 270 beq Over6 271 lsr curbit, #4 @ No, any more bits to do? 272 beq Over6 273 lsr divisor, #4 274 b Loop3 275 276Over6: 277 @ Any subtractions that we should not have done will be recorded in 278 @ the top three bits of "overdone". Exactly which were not needed 279 @ are governed by the position of the bit, stored in ip. 280 @ If we terminated early, because dividend became zero, 281 @ then none of the below will match, since the bit in ip will not be 282 @ in the bottom nibble. 283 284 mov work, #0xe 285 lsl work, #28 286 and overdone, work 287 bne Over7 288 pop { work } 289 RET @ No fixups needed 290Over7: 291 mov curbit, ip 292 mov work, #3 293 ror curbit, work 294 tst overdone, curbit 295 beq Over8 296 lsr work, divisor, #3 297 add dividend, dividend, work 298Over8: 299 mov curbit, ip 300 mov work, #2 301 ror curbit, work 302 tst overdone, curbit 303 beq Over9 304 lsr work, divisor, #2 305 add dividend, dividend, work 306Over9: 307 mov curbit, ip 308 mov work, #1 309 ror curbit, work 310 tst overdone, curbit 311 beq Over10 312 lsr work, divisor, #1 313 add dividend, dividend, work 314Over10: 315 pop { work } 316 RET 317 318Ldiv0: 319 push { lr } 320 bl SYM (__div0) __PLT__ 321 mov r0, #0 @ about as wrong as it could be 322 pop { pc } 323 324 SIZE (__umodsi3) 325 326#endif /* L_umodsi3 */ 327 328#ifdef L_divsi3 329 330dividend .req r0 331divisor .req r1 332result .req r2 333curbit .req r3 334ip .req r12 335sp .req r13 336lr .req r14 337pc .req r15 338 339 .text 340 .globl SYM (__divsi3) 341 TYPE (__divsi3) 342 .align 0 343 .thumb_func 344SYM (__divsi3): 345 cmp divisor, #0 346 beq Ldiv0 347 348 push { work } 349 mov work, dividend 350 eor work, divisor @ Save the sign of the result. 351 mov ip, work 352 mov curbit, #1 353 mov result, #0 354 cmp divisor, #0 355 bpl Over1 356 neg divisor, divisor @ Loops below use unsigned. 357Over1: 358 cmp dividend, #0 359 bpl Over2 360 neg dividend, dividend 361Over2: 362 cmp dividend, divisor 363 bcc Lgot_result 364 365 mov work, #1 366 lsl work, #28 367Loop1: 368 @ Unless the divisor is very big, shift it up in multiples of 369 @ four bits, since this is the amount of unwinding in the main 370 @ division loop. Continue shifting until the divisor is 371 @ larger than the dividend. 372 cmp divisor, work 373 Bcs Lbignum 374 cmp divisor, dividend 375 Bcs Lbignum 376 lsl divisor, #4 377 lsl curbit, #4 378 b Loop1 379 380Lbignum: 381 @ For very big divisors, we must shift it a bit at a time, or 382 @ we will be in danger of overflowing. 383 lsl work, #3 384Loop2: 385 cmp divisor, work 386 Bcs Loop3 387 cmp divisor, dividend 388 Bcs Loop3 389 lsl divisor, #1 390 lsl curbit, #1 391 b Loop2 392 393Loop3: 394 @ Test for possible subtractions, and note which bits 395 @ are done in the result. On the final pass, this may subtract 396 @ too much from the dividend, but the result will be ok, since the 397 @ "bit" will have been shifted out at the bottom. 398 cmp dividend, divisor 399 Bcc Over3 400 sub dividend, dividend, divisor 401 orr result, result, curbit 402Over3: 403 lsr work, divisor, #1 404 cmp dividend, work 405 Bcc Over4 406 sub dividend, dividend, work 407 lsr work, curbit, #1 408 orr result, work 409Over4: 410 lsr work, divisor, #2 411 cmp dividend, work 412 Bcc Over5 413 sub dividend, dividend, work 414 lsr work, curbit, #2 415 orr result, result, work 416Over5: 417 lsr work, divisor, #3 418 cmp dividend, work 419 Bcc Over6 420 sub dividend, dividend, work 421 lsr work, curbit, #3 422 orr result, result, work 423Over6: 424 cmp dividend, #0 @ Early termination? 425 Beq Lgot_result 426 lsr curbit, #4 @ No, any more bits to do? 427 Beq Lgot_result 428 lsr divisor, #4 429 b Loop3 430 431Lgot_result: 432 mov r0, result 433 mov work, ip 434 cmp work, #0 435 Bpl Over7 436 neg r0, r0 437Over7: 438 pop { work } 439 RET 440 441Ldiv0: 442 push { lr } 443 bl SYM (__div0) __PLT__ 444 mov r0, #0 @ about as wrong as it could be 445 pop { pc } 446 447 SIZE (__divsi3) 448 449#endif /* L_divsi3 */ 450 451#ifdef L_modsi3 452 453dividend .req r0 454divisor .req r1 455overdone .req r2 456curbit .req r3 457ip .req r12 458sp .req r13 459lr .req r14 460pc .req r15 461 462 .text 463 .globl SYM (__modsi3) 464 TYPE (__modsi3) 465 .align 0 466 .thumb_func 467SYM (__modsi3): 468 mov curbit, #1 469 cmp divisor, #0 470 beq Ldiv0 471 Bpl Over1 472 neg divisor, divisor @ Loops below use unsigned. 473Over1: 474 push { work } 475 @ Need to save the sign of the dividend, unfortunately, we need 476 @ ip later on. Must do this after saving the original value of 477 @ the work register, because we will pop this value off first. 478 push { dividend } 479 cmp dividend, #0 480 Bpl Over2 481 neg dividend, dividend 482Over2: 483 cmp dividend, divisor 484 bcc Lgot_result 485 mov work, #1 486 lsl work, #28 487Loop1: 488 @ Unless the divisor is very big, shift it up in multiples of 489 @ four bits, since this is the amount of unwinding in the main 490 @ division loop. Continue shifting until the divisor is 491 @ larger than the dividend. 492 cmp divisor, work 493 bcs Lbignum 494 cmp divisor, dividend 495 bcs Lbignum 496 lsl divisor, #4 497 lsl curbit, #4 498 b Loop1 499 500Lbignum: 501 @ Set work to 0x80000000 502 lsl work, #3 503Loop2: 504 @ For very big divisors, we must shift it a bit at a time, or 505 @ we will be in danger of overflowing. 506 cmp divisor, work 507 bcs Loop3 508 cmp divisor, dividend 509 bcs Loop3 510 lsl divisor, #1 511 lsl curbit, #1 512 b Loop2 513 514Loop3: 515 @ Test for possible subtractions. On the final pass, this may 516 @ subtract too much from the dividend, so keep track of which 517 @ subtractions are done, we can fix them up afterwards... 518 mov overdone, #0 519 cmp dividend, divisor 520 bcc Over3 521 sub dividend, dividend, divisor 522Over3: 523 lsr work, divisor, #1 524 cmp dividend, work 525 bcc Over4 526 sub dividend, dividend, work 527 mov ip, curbit 528 mov work, #1 529 ror curbit, work 530 orr overdone, curbit 531 mov curbit, ip 532Over4: 533 lsr work, divisor, #2 534 cmp dividend, work 535 bcc Over5 536 sub dividend, dividend, work 537 mov ip, curbit 538 mov work, #2 539 ror curbit, work 540 orr overdone, curbit 541 mov curbit, ip 542Over5: 543 lsr work, divisor, #3 544 cmp dividend, work 545 bcc Over6 546 sub dividend, dividend, work 547 mov ip, curbit 548 mov work, #3 549 ror curbit, work 550 orr overdone, curbit 551 mov curbit, ip 552Over6: 553 mov ip, curbit 554 cmp dividend, #0 @ Early termination? 555 beq Over7 556 lsr curbit, #4 @ No, any more bits to do? 557 beq Over7 558 lsr divisor, #4 559 b Loop3 560 561Over7: 562 @ Any subtractions that we should not have done will be recorded in 563 @ the top three bits of "overdone". Exactly which were not needed 564 @ are governed by the position of the bit, stored in ip. 565 @ If we terminated early, because dividend became zero, 566 @ then none of the below will match, since the bit in ip will not be 567 @ in the bottom nibble. 568 mov work, #0xe 569 lsl work, #28 570 and overdone, work 571 beq Lgot_result 572 573 mov curbit, ip 574 mov work, #3 575 ror curbit, work 576 tst overdone, curbit 577 beq Over8 578 lsr work, divisor, #3 579 add dividend, dividend, work 580Over8: 581 mov curbit, ip 582 mov work, #2 583 ror curbit, work 584 tst overdone, curbit 585 beq Over9 586 lsr work, divisor, #2 587 add dividend, dividend, work 588Over9: 589 mov curbit, ip 590 mov work, #1 591 ror curbit, work 592 tst overdone, curbit 593 beq Lgot_result 594 lsr work, divisor, #1 595 add dividend, dividend, work 596Lgot_result: 597 pop { work } 598 cmp work, #0 599 bpl Over10 600 neg dividend, dividend 601Over10: 602 pop { work } 603 RET 604 605Ldiv0: 606 push { lr } 607 bl SYM (__div0) __PLT__ 608 mov r0, #0 @ about as wrong as it could be 609 pop { pc } 610 611 SIZE (__modsi3) 612 613#endif /* L_modsi3 */ 614 615#ifdef L_dvmd_tls 616 617 .globl SYM (__div0) 618 TYPE (__div0) 619 .align 0 620 .thumb_func 621SYM (__div0): 622 RET 623 624 SIZE (__div0) 625 626#endif /* L_divmodsi_tools */ 627 628 629#ifdef L_call_via_rX 630 631/* These labels & instructions are used by the Arm/Thumb interworking code. 632 The address of function to be called is loaded into a register and then 633 one of these labels is called via a BL instruction. This puts the 634 return address into the link register with the bottom bit set, and the 635 code here switches to the correct mode before executing the function. */ 636 637 .text 638 .align 0 639 640.macro call_via register 641 .globl SYM (_call_via_\register) 642 TYPE (_call_via_\register) 643 .thumb_func 644SYM (_call_via_\register): 645 bx \register 646 nop 647 648 SIZE (_call_via_\register) 649.endm 650 651 call_via r0 652 call_via r1 653 call_via r2 654 call_via r3 655 call_via r4 656 call_via r5 657 call_via r6 658 call_via r7 659 call_via r8 660 call_via r9 661 call_via sl 662 call_via fp 663 call_via ip 664 call_via sp 665 call_via lr 666 667#endif /* L_call_via_rX */ 668 669#ifdef L_interwork_call_via_rX 670 671/* These labels & instructions are used by the Arm/Thumb interworking code, 672 when the target address is in an unknown instruction set. The address 673 of function to be called is loaded into a register and then one of these 674 labels is called via a BL instruction. This puts the return address 675 into the link register with the bottom bit set, and the code here 676 switches to the correct mode before executing the function. Unfortunately 677 the target code cannot be relied upon to return via a BX instruction, so 678 instead we have to store the resturn address on the stack and allow the 679 called function to return here instead. Upon return we recover the real 680 return address and use a BX to get back to Thumb mode. */ 681 682 .text 683 .align 0 684 685 .code 32 686 .globl _arm_return 687_arm_return: 688 ldmia r13!, {r12} 689 bx r12 690 691.macro interwork register 692 .code 16 693 694 .globl SYM (_interwork_call_via_\register) 695 TYPE (_interwork_call_via_\register) 696 .thumb_func 697SYM (_interwork_call_via_\register): 698 bx pc 699 nop 700 701 .code 32 702 .globl .Lchange_\register 703.Lchange_\register: 704 tst \register, #1 705 stmeqdb r13!, {lr} 706 adreq lr, _arm_return 707 bx \register 708 709 SIZE (_interwork_call_via_\register) 710.endm 711 712 interwork r0 713 interwork r1 714 interwork r2 715 interwork r3 716 interwork r4 717 interwork r5 718 interwork r6 719 interwork r7 720 interwork r8 721 interwork r9 722 interwork sl 723 interwork fp 724 interwork ip 725 interwork sp 726 727 /* The lr case has to be handled a little differently...*/ 728 .code 16 729 .globl SYM (_interwork_call_via_lr) 730 TYPE (_interwork_call_via_lr) 731 .thumb_func 732SYM (_interwork_call_via_lr): 733 bx pc 734 nop 735 736 .code 32 737 .globl .Lchange_lr 738.Lchange_lr: 739 tst lr, #1 740 stmeqdb r13!, {lr} 741 mov ip, lr 742 adreq lr, _arm_return 743 bx ip 744 745 SIZE (_interwork_call_via_lr) 746 747#endif /* L_interwork_call_via_rX */ 748 749 750