1@ libgcc1 routines for ARM cpu. 2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) 3 4/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc. 5 6This file is free software; you can redistribute it and/or modify it 7under the terms of the GNU General Public License as published by the 8Free Software Foundation; either version 2, or (at your option) any 9later version. 10 11In addition to the permissions in the GNU General Public License, the 12Free Software Foundation gives you unlimited permission to link the 13compiled version of this file with other programs, and to distribute 14those programs without any restriction coming from the use of this 15file. (The General Public License restrictions do apply in other 16respects; for example, they cover modification of the file, and 17distribution when not linked into another program.) 18 19This file is distributed in the hope that it will be useful, but 20WITHOUT ANY WARRANTY; without even the implied warranty of 21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22General Public License for more details. 23 24You should have received a copy of the GNU General Public License 25along with this program; see the file COPYING. If not, write to 26the Free Software Foundation, 59 Temple Place - Suite 330, 27Boston, MA 02111-1307, USA. */ 28 29/* As a special exception, if you link this library with other files, 30 some of which are compiled with GCC, to produce an executable, 31 this library does not by itself cause the resulting executable 32 to be covered by the GNU General Public License. 33 This exception does not however invalidate any other reasons why 34 the executable file might be covered by the GNU General Public License. */ 35 36#ifdef __APCS_26__ 37#define RET movs 38#define RETc(x) mov##x##s 39#define RETCOND ^ 40#else 41#define RET mov 42#define RETc(x) mov##x 43#define RETCOND 44#endif 45 46#ifndef __USER_LABEL_PREFIX__ 47#error __USER_LABEL_PREFIX__ not defined 48#endif 49 50/* ANSI concatenation macros. */ 51 52#define CONCAT1(a, b) CONCAT2(a, b) 53#define CONCAT2(a, b) a ## b 54 55/* Use the right prefix for global labels. */ 56 57#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) 58 59#ifdef __ELF__ 60#define __PLT__ (PLT) 61#define TYPE(x) .type SYM(x),function 62#define SIZE(x) .size SYM(x), . - SYM(x) 63#else 64#define __PLT__ 65#define TYPE(x) 66#define SIZE(x) 67#endif 68 69#ifdef L_udivsi3 70 71dividend .req r0 72divisor .req r1 73result .req r2 74curbit .req r3 75ip .req r12 76sp .req r13 77lr .req r14 78pc .req r15 79 80 .text 81 .globl SYM (__udivsi3) 82 TYPE (__udivsi3) 83 .align 0 84 85SYM (__udivsi3): 86 cmp divisor, #0 87 beq Ldiv0 88 mov curbit, #1 89 mov result, #0 90 cmp dividend, divisor 91 bcc Lgot_result 92Loop1: 93 @ Unless the divisor is very big, shift it up in multiples of 94 @ four bits, since this is the amount of unwinding in the main 95 @ division loop. Continue shifting until the divisor is 96 @ larger than the dividend. 97 cmp divisor, #0x10000000 98 cmpcc divisor, dividend 99 movcc divisor, divisor, lsl #4 100 movcc curbit, curbit, lsl #4 101 bcc Loop1 102 103Lbignum: 104 @ For very big divisors, we must shift it a bit at a time, or 105 @ we will be in danger of overflowing. 106 cmp divisor, #0x80000000 107 cmpcc divisor, dividend 108 movcc divisor, divisor, lsl #1 109 movcc curbit, curbit, lsl #1 110 bcc Lbignum 111 112Loop3: 113 @ Test for possible subtractions, and note which bits 114 @ are done in the result. On the final pass, this may subtract 115 @ too much from the dividend, but the result will be ok, since the 116 @ "bit" will have been shifted out at the bottom. 117 cmp dividend, divisor 118 subcs dividend, dividend, divisor 119 orrcs result, result, curbit 120 cmp dividend, divisor, lsr #1 121 subcs dividend, dividend, divisor, lsr #1 122 orrcs result, result, curbit, lsr #1 123 cmp dividend, divisor, lsr #2 124 subcs dividend, dividend, divisor, lsr #2 125 orrcs result, result, curbit, lsr #2 126 cmp dividend, divisor, lsr #3 127 subcs dividend, dividend, divisor, lsr #3 128 orrcs result, result, curbit, lsr #3 129 cmp dividend, #0 @ Early termination? 130 movnes curbit, curbit, lsr #4 @ No, any more bits to do? 131 movne divisor, divisor, lsr #4 132 bne Loop3 133Lgot_result: 134 mov r0, result 135 RET pc, lr 136 137Ldiv0: 138 str lr, [sp, #-4]! 139 bl SYM (__div0) __PLT__ 140 mov r0, #0 @ about as wrong as it could be 141 ldmia sp!, {pc}RETCOND 142 143 SIZE (__udivsi3) 144 145#endif /* L_udivsi3 */ 146 147#ifdef L_umodsi3 148 149dividend .req r0 150divisor .req r1 151overdone .req r2 152curbit .req r3 153ip .req r12 154sp .req r13 155lr .req r14 156pc .req r15 157 158 .text 159 .globl SYM (__umodsi3) 160 TYPE (__umodsi3) 161 .align 0 162 163SYM (__umodsi3): 164 cmp divisor, #0 165 beq Ldiv0 166 mov curbit, #1 167 cmp dividend, divisor 168 RETc(cc) pc, lr 169Loop1: 170 @ Unless the divisor is very big, shift it up in multiples of 171 @ four bits, since this is the amount of unwinding in the main 172 @ division loop. Continue shifting until the divisor is 173 @ larger than the dividend. 174 cmp divisor, #0x10000000 175 cmpcc divisor, dividend 176 movcc divisor, divisor, lsl #4 177 movcc curbit, curbit, lsl #4 178 bcc Loop1 179 180Lbignum: 181 @ For very big divisors, we must shift it a bit at a time, or 182 @ we will be in danger of overflowing. 183 cmp divisor, #0x80000000 184 cmpcc divisor, dividend 185 movcc divisor, divisor, lsl #1 186 movcc curbit, curbit, lsl #1 187 bcc Lbignum 188 189Loop3: 190 @ Test for possible subtractions. On the final pass, this may 191 @ subtract too much from the dividend, so keep track of which 192 @ subtractions are done, we can fix them up afterwards... 193 mov overdone, #0 194 cmp dividend, divisor 195 subcs dividend, dividend, divisor 196 cmp dividend, divisor, lsr #1 197 subcs dividend, dividend, divisor, lsr #1 198 orrcs overdone, overdone, curbit, ror #1 199 cmp dividend, divisor, lsr #2 200 subcs dividend, dividend, divisor, lsr #2 201 orrcs overdone, overdone, curbit, ror #2 202 cmp dividend, divisor, lsr #3 203 subcs dividend, dividend, divisor, lsr #3 204 orrcs overdone, overdone, curbit, ror #3 205 mov ip, curbit 206 cmp dividend, #0 @ Early termination? 207 movnes curbit, curbit, lsr #4 @ No, any more bits to do? 208 movne divisor, divisor, lsr #4 209 bne Loop3 210 211 @ Any subtractions that we should not have done will be recorded in 212 @ the top three bits of "overdone". Exactly which were not needed 213 @ are governed by the position of the bit, stored in ip. 214 @ If we terminated early, because dividend became zero, 215 @ then none of the below will match, since the bit in ip will not be 216 @ in the bottom nibble. 217 ands overdone, overdone, #0xe0000000 218 RETc(eq) pc, lr @ No fixups needed 219 tst overdone, ip, ror #3 220 addne dividend, dividend, divisor, lsr #3 221 tst overdone, ip, ror #2 222 addne dividend, dividend, divisor, lsr #2 223 tst overdone, ip, ror #1 224 addne dividend, dividend, divisor, lsr #1 225 RET pc, lr 226 227Ldiv0: 228 str lr, [sp, #-4]! 229 bl SYM (__div0) __PLT__ 230 mov r0, #0 @ about as wrong as it could be 231 ldmia sp!, {pc}RETCOND 232 233 SIZE (__umodsi3) 234 235#endif /* L_umodsi3 */ 236 237#ifdef L_divsi3 238 239dividend .req r0 240divisor .req r1 241result .req r2 242curbit .req r3 243ip .req r12 244sp .req r13 245lr .req r14 246pc .req r15 247 248 .text 249 .globl SYM (__divsi3) 250 TYPE (__divsi3) 251 .align 0 252 253SYM (__divsi3): 254 eor ip, dividend, divisor @ Save the sign of the result. 255 mov curbit, #1 256 mov result, #0 257 cmp divisor, #0 258 rsbmi divisor, divisor, #0 @ Loops below use unsigned. 259 beq Ldiv0 260 cmp dividend, #0 261 rsbmi dividend, dividend, #0 262 cmp dividend, divisor 263 bcc Lgot_result 264 265Loop1: 266 @ Unless the divisor is very big, shift it up in multiples of 267 @ four bits, since this is the amount of unwinding in the main 268 @ division loop. Continue shifting until the divisor is 269 @ larger than the dividend. 270 cmp divisor, #0x10000000 271 cmpcc divisor, dividend 272 movcc divisor, divisor, lsl #4 273 movcc curbit, curbit, lsl #4 274 bcc Loop1 275 276Lbignum: 277 @ For very big divisors, we must shift it a bit at a time, or 278 @ we will be in danger of overflowing. 279 cmp divisor, #0x80000000 280 cmpcc divisor, dividend 281 movcc divisor, divisor, lsl #1 282 movcc curbit, curbit, lsl #1 283 bcc Lbignum 284 285Loop3: 286 @ Test for possible subtractions, and note which bits 287 @ are done in the result. On the final pass, this may subtract 288 @ too much from the dividend, but the result will be ok, since the 289 @ "bit" will have been shifted out at the bottom. 290 cmp dividend, divisor 291 subcs dividend, dividend, divisor 292 orrcs result, result, curbit 293 cmp dividend, divisor, lsr #1 294 subcs dividend, dividend, divisor, lsr #1 295 orrcs result, result, curbit, lsr #1 296 cmp dividend, divisor, lsr #2 297 subcs dividend, dividend, divisor, lsr #2 298 orrcs result, result, curbit, lsr #2 299 cmp dividend, divisor, lsr #3 300 subcs dividend, dividend, divisor, lsr #3 301 orrcs result, result, curbit, lsr #3 302 cmp dividend, #0 @ Early termination? 303 movnes curbit, curbit, lsr #4 @ No, any more bits to do? 304 movne divisor, divisor, lsr #4 305 bne Loop3 306Lgot_result: 307 mov r0, result 308 cmp ip, #0 309 rsbmi r0, r0, #0 310 RET pc, lr 311 312Ldiv0: 313 str lr, [sp, #-4]! 314 bl SYM (__div0) __PLT__ 315 mov r0, #0 @ about as wrong as it could be 316 ldmia sp!, {pc}RETCOND 317 318 SIZE (__divsi3) 319 320#endif /* L_divsi3 */ 321 322#ifdef L_modsi3 323 324dividend .req r0 325divisor .req r1 326overdone .req r2 327curbit .req r3 328ip .req r12 329sp .req r13 330lr .req r14 331pc .req r15 332 333 .text 334 .globl SYM (__modsi3) 335 TYPE (__modsi3) 336 .align 0 337 338SYM (__modsi3): 339 mov curbit, #1 340 cmp divisor, #0 341 rsbmi divisor, divisor, #0 @ Loops below use unsigned. 342 beq Ldiv0 343 @ Need to save the sign of the dividend, unfortunately, we need 344 @ ip later on; this is faster than pushing lr and using that. 345 str dividend, [sp, #-4]! 346 cmp dividend, #0 347 rsbmi dividend, dividend, #0 348 cmp dividend, divisor 349 bcc Lgot_result 350 351Loop1: 352 @ Unless the divisor is very big, shift it up in multiples of 353 @ four bits, since this is the amount of unwinding in the main 354 @ division loop. Continue shifting until the divisor is 355 @ larger than the dividend. 356 cmp divisor, #0x10000000 357 cmpcc divisor, dividend 358 movcc divisor, divisor, lsl #4 359 movcc curbit, curbit, lsl #4 360 bcc Loop1 361 362Lbignum: 363 @ For very big divisors, we must shift it a bit at a time, or 364 @ we will be in danger of overflowing. 365 cmp divisor, #0x80000000 366 cmpcc divisor, dividend 367 movcc divisor, divisor, lsl #1 368 movcc curbit, curbit, lsl #1 369 bcc Lbignum 370 371Loop3: 372 @ Test for possible subtractions. On the final pass, this may 373 @ subtract too much from the dividend, so keep track of which 374 @ subtractions are done, we can fix them up afterwards... 375 mov overdone, #0 376 cmp dividend, divisor 377 subcs dividend, dividend, divisor 378 cmp dividend, divisor, lsr #1 379 subcs dividend, dividend, divisor, lsr #1 380 orrcs overdone, overdone, curbit, ror #1 381 cmp dividend, divisor, lsr #2 382 subcs dividend, dividend, divisor, lsr #2 383 orrcs overdone, overdone, curbit, ror #2 384 cmp dividend, divisor, lsr #3 385 subcs dividend, dividend, divisor, lsr #3 386 orrcs overdone, overdone, curbit, ror #3 387 mov ip, curbit 388 cmp dividend, #0 @ Early termination? 389 movnes curbit, curbit, lsr #4 @ No, any more bits to do? 390 movne divisor, divisor, lsr #4 391 bne Loop3 392 393 @ Any subtractions that we should not have done will be recorded in 394 @ the top three bits of "overdone". Exactly which were not needed 395 @ are governed by the position of the bit, stored in ip. 396 @ If we terminated early, because dividend became zero, 397 @ then none of the below will match, since the bit in ip will not be 398 @ in the bottom nibble. 399 ands overdone, overdone, #0xe0000000 400 beq Lgot_result 401 tst overdone, ip, ror #3 402 addne dividend, dividend, divisor, lsr #3 403 tst overdone, ip, ror #2 404 addne dividend, dividend, divisor, lsr #2 405 tst overdone, ip, ror #1 406 addne dividend, dividend, divisor, lsr #1 407Lgot_result: 408 ldr ip, [sp], #4 409 cmp ip, #0 410 rsbmi dividend, dividend, #0 411 RET pc, lr 412 413Ldiv0: 414 str lr, [sp, #-4]! 415 bl SYM (__div0) __PLT__ 416 mov r0, #0 @ about as wrong as it could be 417 ldmia sp!, {pc}RETCOND 418 419 SIZE (__modsi3) 420 421#endif /* L_modsi3 */ 422 423#ifdef L_dvmd_tls 424 425 .globl SYM (__div0) 426 TYPE (__div0) 427 .align 0 428SYM (__div0): 429 RET pc, lr 430 431 SIZE (__div0) 432 433#endif /* L_divmodsi_tools */ 434 435#ifdef L_dvmd_lnx 436@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls 437 438#include <asm/unistd.h> 439 440#define SIGFPE 8 @ cant use <asm/signal.h> as it 441 @ contains too much C rubbish 442 .globl SYM (__div0) 443 TYPE (__div0) 444 .align 0 445SYM (__div0): 446 stmfd sp!, {r1, lr} 447 swi __NR_getpid 448 cmn r0, #1000 449 ldmhsfd sp!, {r1, pc}RETCOND @ not much we can do 450 mov r1, #SIGFPE 451 swi __NR_kill 452 ldmfd sp!, {r1, pc}RETCOND 453 454 SIZE (__div0) 455 456#endif /* L_dvmd_lnx */ 457 458/* These next two sections are here despite the fact that they contain Thumb 459 assembler because their presence allows interworked code to be linked even 460 when the GCC library is this one. */ 461 462#ifdef L_call_via_rX 463 464/* These labels & instructions are used by the Arm/Thumb interworking code. 465 The address of function to be called is loaded into a register and then 466 one of these labels is called via a BL instruction. This puts the 467 return address into the link register with the bottom bit set, and the 468 code here switches to the correct mode before executing the function. */ 469 470 .text 471 .align 0 472 .code 16 473.macro call_via register 474 .globl SYM (_call_via_\register) 475 TYPE (_call_via_\register) 476 .thumb_func 477SYM (_call_via_\register): 478 bx \register 479 nop 480 481 SIZE (_call_via_\register) 482.endm 483 484 call_via r0 485 call_via r1 486 call_via r2 487 call_via r3 488 call_via r4 489 call_via r5 490 call_via r6 491 call_via r7 492 call_via r8 493 call_via r9 494 call_via sl 495 call_via fp 496 call_via ip 497 call_via sp 498 call_via lr 499 500#endif /* L_call_via_rX */ 501 502#ifdef L_interwork_call_via_rX 503 504/* These labels & instructions are used by the Arm/Thumb interworking code, 505 when the target address is in an unknown instruction set. The address 506 of function to be called is loaded into a register and then one of these 507 labels is called via a BL instruction. This puts the return address 508 into the link register with the bottom bit set, and the code here 509 switches to the correct mode before executing the function. Unfortunately 510 the target code cannot be relied upon to return via a BX instruction, so 511 instead we have to store the resturn address on the stack and allow the 512 called function to return here instead. Upon return we recover the real 513 return address and use a BX to get back to Thumb mode. */ 514 515 .text 516 .align 0 517 518 .code 32 519 .globl _arm_return 520_arm_return: 521 ldmia r13!, {r12} 522 bx r12 523 .code 16 524 525.macro interwork register 526 .code 16 527 .globl SYM (_interwork_call_via_\register) 528 TYPE (_interwork_call_via_\register) 529 .thumb_func 530SYM (_interwork_call_via_\register): 531 bx pc 532 nop 533 534 .code 32 535 .globl .Lchange_\register 536.Lchange_\register: 537 tst \register, #1 538 stmeqdb r13!, {lr} 539 adreq lr, _arm_return 540 bx \register 541 542 SIZE (_interwork_call_via_\register) 543.endm 544 545 interwork r0 546 interwork r1 547 interwork r2 548 interwork r3 549 interwork r4 550 interwork r5 551 interwork r6 552 interwork r7 553 interwork r8 554 interwork r9 555 interwork sl 556 interwork fp 557 interwork ip 558 interwork sp 559 560 /* The lr case has to be handled a little differently...*/ 561 .code 16 562 .globl SYM (_interwork_call_via_lr) 563 TYPE (_interwork_call_via_lr) 564 .thumb_func 565SYM (_interwork_call_via_lr): 566 bx pc 567 nop 568 569 .code 32 570 .globl .Lchange_lr 571.Lchange_lr: 572 tst lr, #1 573 stmeqdb r13!, {lr} 574 mov ip, lr 575 adreq lr, _arm_return 576 bx ip 577 578 SIZE (_interwork_call_via_lr) 579 580#endif /* L_interwork_call_via_rX */ 581