1132718Skan/* ieee754-sf.S single-precision floating point support for ARM 2132718Skan 3169689Skan Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. 4132718Skan Contributed by Nicolas Pitre (nico@cam.org) 5132718Skan 6132718Skan This file is free software; you can redistribute it and/or modify it 7132718Skan under the terms of the GNU General Public License as published by the 8132718Skan Free Software Foundation; either version 2, or (at your option) any 9132718Skan later version. 10132718Skan 11132718Skan In addition to the permissions in the GNU General Public License, the 12132718Skan Free Software Foundation gives you unlimited permission to link the 13132718Skan compiled version of this file into combinations with other programs, 14132718Skan and to distribute those combinations without any restriction coming 15132718Skan from the use of this file. (The General Public License restrictions 16132718Skan do apply in other respects; for example, they cover modification of 17132718Skan the file, and distribution when not linked into a combine 18132718Skan executable.) 19132718Skan 20132718Skan This file is distributed in the hope that it will be useful, but 21132718Skan WITHOUT ANY WARRANTY; without even the implied warranty of 22132718Skan MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23132718Skan General Public License for more details. 24132718Skan 25132718Skan You should have received a copy of the GNU General Public License 26132718Skan along with this program; see the file COPYING. If not, write to 27169689Skan the Free Software Foundation, 51 Franklin Street, Fifth Floor, 28169689Skan Boston, MA 02110-1301, USA. */ 29132718Skan 30132718Skan/* 31132718Skan * Notes: 32132718Skan * 33132718Skan * The goal of this code is to be as fast as possible. This is 34132718Skan * not meant to be easy to understand for the casual reader. 35132718Skan * 36132718Skan * Only the default rounding mode is intended for best performances. 37132718Skan * Exceptions aren't supported yet, but that can be added quite easily 38132718Skan * if necessary without impacting performances. 39132718Skan */ 40132718Skan 41132718Skan#ifdef L_negsf2 42132718Skan 43132718SkanARM_FUNC_START negsf2 44169689SkanARM_FUNC_ALIAS aeabi_fneg negsf2 45169689Skan 46132718Skan eor r0, r0, #0x80000000 @ flip sign bit 47132718Skan RET 48132718Skan 49169689Skan FUNC_END aeabi_fneg 50132718Skan FUNC_END negsf2 51132718Skan 52132718Skan#endif 53132718Skan 54132718Skan#ifdef L_addsubsf3 55132718Skan 56169689SkanARM_FUNC_START aeabi_frsub 57169689Skan 58169689Skan eor r0, r0, #0x80000000 @ flip sign bit of first arg 59169689Skan b 1f 60169689Skan 61132718SkanARM_FUNC_START subsf3 62169689SkanARM_FUNC_ALIAS aeabi_fsub subsf3 63169689Skan 64132718Skan eor r1, r1, #0x80000000 @ flip sign bit of second arg 65169689Skan#if defined(__INTERWORKING_STUBS__) 66132718Skan b 1f @ Skip Thumb-code prologue 67132718Skan#endif 68132718Skan 69132718SkanARM_FUNC_START addsf3 70169689SkanARM_FUNC_ALIAS aeabi_fadd addsf3 71132718Skan 72169689Skan1: @ Look for zeroes, equal values, INF, or NAN. 73169689Skan movs r2, r0, lsl #1 74169689Skan movnes r3, r1, lsl #1 75169689Skan teqne r2, r3 76169689Skan mvnnes ip, r2, asr #24 77169689Skan mvnnes ip, r3, asr #24 78169689Skan beq LSYM(Lad_s) 79132718Skan 80132718Skan @ Compute exponent difference. Make largest exponent in r2, 81132718Skan @ corresponding arg in r0, and positive exponent difference in r3. 82169689Skan mov r2, r2, lsr #24 83169689Skan rsbs r3, r2, r3, lsr #24 84132718Skan addgt r2, r2, r3 85132718Skan eorgt r1, r0, r1 86132718Skan eorgt r0, r1, r0 87132718Skan eorgt r1, r0, r1 88132718Skan rsblt r3, r3, #0 89132718Skan 90132718Skan @ If exponent difference is too large, return largest argument 91132718Skan @ already in r0. We need up to 25 bit to handle proper rounding 92132718Skan @ of 0x1p25 - 1.1. 93169689Skan cmp r3, #25 94132718Skan RETc(hi) 95132718Skan 96132718Skan @ Convert mantissa to signed integer. 97132718Skan tst r0, #0x80000000 98132718Skan orr r0, r0, #0x00800000 99132718Skan bic r0, r0, #0xff000000 100132718Skan rsbne r0, r0, #0 101132718Skan tst r1, #0x80000000 102132718Skan orr r1, r1, #0x00800000 103132718Skan bic r1, r1, #0xff000000 104132718Skan rsbne r1, r1, #0 105132718Skan 106132718Skan @ If exponent == difference, one or both args were denormalized. 107132718Skan @ Since this is not common case, rescale them off line. 108132718Skan teq r2, r3 109132718Skan beq LSYM(Lad_d) 110132718SkanLSYM(Lad_x): 111132718Skan 112169689Skan @ Compensate for the exponent overlapping the mantissa MSB added later 113169689Skan sub r2, r2, #1 114132718Skan 115169689Skan @ Shift and add second arg to first arg in r0. 116169689Skan @ Keep leftover bits into r1. 117169689Skan adds r0, r0, r1, asr r3 118132718Skan rsb r3, r3, #32 119132718Skan mov r1, r1, lsl r3 120132718Skan 121169689Skan @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above) 122169689Skan and r3, r0, #0x80000000 123132718Skan bpl LSYM(Lad_p) 124132718Skan rsbs r1, r1, #0 125132718Skan rsc r0, r0, #0 126132718Skan 127132718Skan @ Determine how to normalize the result. 128132718SkanLSYM(Lad_p): 129132718Skan cmp r0, #0x00800000 130169689Skan bcc LSYM(Lad_a) 131132718Skan cmp r0, #0x01000000 132169689Skan bcc LSYM(Lad_e) 133132718Skan 134132718Skan @ Result needs to be shifted right. 135132718Skan movs r0, r0, lsr #1 136132718Skan mov r1, r1, rrx 137169689Skan add r2, r2, #1 138132718Skan 139169689Skan @ Make sure we did not bust our exponent. 140169689Skan cmp r2, #254 141169689Skan bhs LSYM(Lad_o) 142169689Skan 143132718Skan @ Our result is now properly aligned into r0, remaining bits in r1. 144169689Skan @ Pack final result together. 145132718Skan @ Round with MSB of r1. If halfway between two numbers, round towards 146132718Skan @ LSB of r0 = 0. 147169689SkanLSYM(Lad_e): 148169689Skan cmp r1, #0x80000000 149169689Skan adc r0, r0, r2, lsl #23 150132718Skan biceq r0, r0, #1 151132718Skan orr r0, r0, r3 152132718Skan RET 153132718Skan 154169689Skan @ Result must be shifted left and exponent adjusted. 155169689SkanLSYM(Lad_a): 156169689Skan movs r1, r1, lsl #1 157169689Skan adc r0, r0, r0 158169689Skan tst r0, #0x00800000 159169689Skan sub r2, r2, #1 160169689Skan bne LSYM(Lad_e) 161169689Skan 162169689Skan @ No rounding necessary since r1 will always be 0 at this point. 163132718SkanLSYM(Lad_l): 164132718Skan 165132718Skan#if __ARM_ARCH__ < 5 166132718Skan 167132718Skan movs ip, r0, lsr #12 168132718Skan moveq r0, r0, lsl #12 169169689Skan subeq r2, r2, #12 170132718Skan tst r0, #0x00ff0000 171132718Skan moveq r0, r0, lsl #8 172169689Skan subeq r2, r2, #8 173132718Skan tst r0, #0x00f00000 174132718Skan moveq r0, r0, lsl #4 175169689Skan subeq r2, r2, #4 176132718Skan tst r0, #0x00c00000 177132718Skan moveq r0, r0, lsl #2 178169689Skan subeq r2, r2, #2 179169689Skan cmp r0, #0x00800000 180169689Skan movcc r0, r0, lsl #1 181169689Skan sbcs r2, r2, #0 182132718Skan 183132718Skan#else 184132718Skan 185132718Skan clz ip, r0 186132718Skan sub ip, ip, #8 187169689Skan subs r2, r2, ip 188132718Skan mov r0, r0, lsl ip 189132718Skan 190132718Skan#endif 191132718Skan 192169689Skan @ Final result with sign 193169689Skan @ If exponent negative, denormalize result. 194169689Skan addge r0, r0, r2, lsl #23 195169689Skan rsblt r2, r2, #0 196169689Skan orrge r0, r0, r3 197169689Skan orrlt r0, r3, r0, lsr r2 198132718Skan RET 199132718Skan 200132718Skan @ Fixup and adjust bit position for denormalized arguments. 201132718Skan @ Note that r2 must not remain equal to 0. 202132718SkanLSYM(Lad_d): 203132718Skan teq r2, #0 204169689Skan eor r1, r1, #0x00800000 205132718Skan eoreq r0, r0, #0x00800000 206169689Skan addeq r2, r2, #1 207169689Skan subne r3, r3, #1 208132718Skan b LSYM(Lad_x) 209132718Skan 210169689SkanLSYM(Lad_s): 211169689Skan mov r3, r1, lsl #1 212169689Skan 213169689Skan mvns ip, r2, asr #24 214169689Skan mvnnes ip, r3, asr #24 215169689Skan beq LSYM(Lad_i) 216169689Skan 217169689Skan teq r2, r3 218169689Skan beq 1f 219169689Skan 220169689Skan @ Result is x + 0.0 = x or 0.0 + y = y. 221169689Skan teq r2, #0 222169689Skan moveq r0, r1 223169689Skan RET 224169689Skan 225169689Skan1: teq r0, r1 226169689Skan 227169689Skan @ Result is x - x = 0. 228132718Skan movne r0, #0 229169689Skan RETc(ne) 230169689Skan 231169689Skan @ Result is x + x = 2x. 232169689Skan tst r2, #0xff000000 233169689Skan bne 2f 234169689Skan movs r0, r0, lsl #1 235169689Skan orrcs r0, r0, #0x80000000 236132718Skan RET 237169689Skan2: adds r2, r2, #(2 << 24) 238169689Skan addcc r0, r0, #(1 << 23) 239169689Skan RETc(cc) 240169689Skan and r3, r0, #0x80000000 241132718Skan 242132718Skan @ Overflow: return INF. 243132718SkanLSYM(Lad_o): 244132718Skan orr r0, r3, #0x7f000000 245132718Skan orr r0, r0, #0x00800000 246132718Skan RET 247132718Skan 248132718Skan @ At least one of r0/r1 is INF/NAN. 249132718Skan @ if r0 != INF/NAN: return r1 (which is INF/NAN) 250132718Skan @ if r1 != INF/NAN: return r0 (which is INF/NAN) 251132718Skan @ if r0 or r1 is NAN: return NAN 252132718Skan @ if opposite sign: return NAN 253169689Skan @ otherwise return r0 (which is INF or -INF) 254132718SkanLSYM(Lad_i): 255169689Skan mvns r2, r2, asr #24 256132718Skan movne r0, r1 257169689Skan mvneqs r3, r3, asr #24 258169689Skan movne r1, r0 259132718Skan movs r2, r0, lsl #9 260169689Skan moveqs r3, r1, lsl #9 261132718Skan teqeq r0, r1 262169689Skan orrne r0, r0, #0x00400000 @ quiet NAN 263132718Skan RET 264132718Skan 265169689Skan FUNC_END aeabi_frsub 266169689Skan FUNC_END aeabi_fadd 267132718Skan FUNC_END addsf3 268169689Skan FUNC_END aeabi_fsub 269132718Skan FUNC_END subsf3 270132718Skan 271132718SkanARM_FUNC_START floatunsisf 272169689SkanARM_FUNC_ALIAS aeabi_ui2f floatunsisf 273169689Skan 274132718Skan mov r3, #0 275132718Skan b 1f 276132718Skan 277132718SkanARM_FUNC_START floatsisf 278169689SkanARM_FUNC_ALIAS aeabi_i2f floatsisf 279169689Skan 280132718Skan ands r3, r0, #0x80000000 281132718Skan rsbmi r0, r0, #0 282132718Skan 283169689Skan1: movs ip, r0 284132718Skan RETc(eq) 285132718Skan 286169689Skan @ Add initial exponent to sign 287169689Skan orr r3, r3, #((127 + 23) << 23) 288132718Skan 289169689Skan .ifnc ah, r0 290169689Skan mov ah, r0 291169689Skan .endif 292169689Skan mov al, #0 293169689Skan b 2f 294132718Skan 295169689Skan FUNC_END aeabi_i2f 296132718Skan FUNC_END floatsisf 297169689Skan FUNC_END aeabi_ui2f 298132718Skan FUNC_END floatunsisf 299132718Skan 300169689SkanARM_FUNC_START floatundisf 301169689SkanARM_FUNC_ALIAS aeabi_ul2f floatundisf 302169689Skan 303169689Skan orrs r2, r0, r1 304169689Skan#if !defined (__VFP_FP__) && !defined(__SOFTFP__) 305169689Skan mvfeqs f0, #0.0 306169689Skan#endif 307169689Skan RETc(eq) 308169689Skan 309169689Skan mov r3, #0 310169689Skan b 1f 311169689Skan 312169689SkanARM_FUNC_START floatdisf 313169689SkanARM_FUNC_ALIAS aeabi_l2f floatdisf 314169689Skan 315169689Skan orrs r2, r0, r1 316169689Skan#if !defined (__VFP_FP__) && !defined(__SOFTFP__) 317169689Skan mvfeqs f0, #0.0 318169689Skan#endif 319169689Skan RETc(eq) 320169689Skan 321169689Skan ands r3, ah, #0x80000000 @ sign bit in r3 322169689Skan bpl 1f 323169689Skan rsbs al, al, #0 324169689Skan rsc ah, ah, #0 325169689Skan1: 326169689Skan#if !defined (__VFP_FP__) && !defined(__SOFTFP__) 327169689Skan @ For hard FPA code we want to return via the tail below so that 328169689Skan @ we can return the result in f0 as well as in r0 for backwards 329169689Skan @ compatibility. 330169689Skan str lr, [sp, #-8]! 331169689Skan adr lr, LSYM(f0_ret) 332169689Skan#endif 333169689Skan 334169689Skan movs ip, ah 335169689Skan moveq ip, al 336169689Skan moveq ah, al 337169689Skan moveq al, #0 338169689Skan 339169689Skan @ Add initial exponent to sign 340169689Skan orr r3, r3, #((127 + 23 + 32) << 23) 341169689Skan subeq r3, r3, #(32 << 23) 342169689Skan2: sub r3, r3, #(1 << 23) 343169689Skan 344169689Skan#if __ARM_ARCH__ < 5 345169689Skan 346169689Skan mov r2, #23 347169689Skan cmp ip, #(1 << 16) 348169689Skan movhs ip, ip, lsr #16 349169689Skan subhs r2, r2, #16 350169689Skan cmp ip, #(1 << 8) 351169689Skan movhs ip, ip, lsr #8 352169689Skan subhs r2, r2, #8 353169689Skan cmp ip, #(1 << 4) 354169689Skan movhs ip, ip, lsr #4 355169689Skan subhs r2, r2, #4 356169689Skan cmp ip, #(1 << 2) 357169689Skan subhs r2, r2, #2 358169689Skan sublo r2, r2, ip, lsr #1 359169689Skan subs r2, r2, ip, lsr #3 360169689Skan 361169689Skan#else 362169689Skan 363169689Skan clz r2, ip 364169689Skan subs r2, r2, #8 365169689Skan 366169689Skan#endif 367169689Skan 368169689Skan sub r3, r3, r2, lsl #23 369169689Skan blt 3f 370169689Skan 371169689Skan add r3, r3, ah, lsl r2 372169689Skan mov ip, al, lsl r2 373169689Skan rsb r2, r2, #32 374169689Skan cmp ip, #0x80000000 375169689Skan adc r0, r3, al, lsr r2 376169689Skan biceq r0, r0, #1 377169689Skan RET 378169689Skan 379169689Skan3: add r2, r2, #32 380169689Skan mov ip, ah, lsl r2 381169689Skan rsb r2, r2, #32 382169689Skan orrs al, al, ip, lsl #1 383169689Skan adc r0, r3, ah, lsr r2 384169689Skan biceq r0, r0, ip, lsr #31 385169689Skan RET 386169689Skan 387169689Skan#if !defined (__VFP_FP__) && !defined(__SOFTFP__) 388169689Skan 389169689SkanLSYM(f0_ret): 390169689Skan str r0, [sp, #-4]! 391169689Skan ldfs f0, [sp], #4 392169689Skan RETLDM 393169689Skan 394169689Skan#endif 395169689Skan 396169689Skan FUNC_END floatdisf 397169689Skan FUNC_END aeabi_l2f 398169689Skan FUNC_END floatundisf 399169689Skan FUNC_END aeabi_ul2f 400169689Skan 401132718Skan#endif /* L_addsubsf3 */ 402132718Skan 403132718Skan#ifdef L_muldivsf3 404132718Skan 405132718SkanARM_FUNC_START mulsf3 406169689SkanARM_FUNC_ALIAS aeabi_fmul mulsf3 407132718Skan 408169689Skan @ Mask out exponents, trap any zero/denormal/INF/NAN. 409169689Skan mov ip, #0xff 410169689Skan ands r2, ip, r0, lsr #23 411169689Skan andnes r3, ip, r1, lsr #23 412169689Skan teqne r2, ip 413169689Skan teqne r3, ip 414132718Skan beq LSYM(Lml_s) 415169689SkanLSYM(Lml_x): 416132718Skan 417169689Skan @ Add exponents together 418169689Skan add r2, r2, r3 419132718Skan 420169689Skan @ Determine final sign. 421169689Skan eor ip, r0, r1 422132718Skan 423132718Skan @ Convert mantissa to unsigned integer. 424169689Skan @ If power of two, branch to a separate path. 425169689Skan @ Make up for final alignment. 426169689Skan movs r0, r0, lsl #9 427169689Skan movnes r1, r1, lsl #9 428169689Skan beq LSYM(Lml_1) 429169689Skan mov r3, #0x08000000 430169689Skan orr r0, r3, r0, lsr #5 431169689Skan orr r1, r3, r1, lsr #5 432132718Skan 433132718Skan#if __ARM_ARCH__ < 4 434132718Skan 435169689Skan @ Put sign bit in r3, which will be restored into r0 later. 436169689Skan and r3, ip, #0x80000000 437169689Skan 438132718Skan @ Well, no way to make it shorter without the umull instruction. 439169689Skan stmfd sp!, {r3, r4, r5} 440132718Skan mov r4, r0, lsr #16 441132718Skan mov r5, r1, lsr #16 442169689Skan bic r0, r0, r4, lsl #16 443169689Skan bic r1, r1, r5, lsl #16 444132718Skan mul ip, r4, r5 445132718Skan mul r3, r0, r1 446132718Skan mul r0, r5, r0 447132718Skan mla r0, r4, r1, r0 448132718Skan adds r3, r3, r0, lsl #16 449169689Skan adc r1, ip, r0, lsr #16 450169689Skan ldmfd sp!, {r0, r4, r5} 451132718Skan 452132718Skan#else 453132718Skan 454169689Skan @ The actual multiplication. 455169689Skan umull r3, r1, r0, r1 456132718Skan 457169689Skan @ Put final sign in r0. 458169689Skan and r0, ip, #0x80000000 459169689Skan 460132718Skan#endif 461132718Skan 462169689Skan @ Adjust result upon the MSB position. 463169689Skan cmp r1, #(1 << 23) 464169689Skan movcc r1, r1, lsl #1 465169689Skan orrcc r1, r1, r3, lsr #31 466169689Skan movcc r3, r3, lsl #1 467132718Skan 468169689Skan @ Add sign to result. 469169689Skan orr r0, r0, r1 470132718Skan 471169689Skan @ Apply exponent bias, check for under/overflow. 472169689Skan sbc r2, r2, #127 473169689Skan cmp r2, #(254 - 1) 474169689Skan bhi LSYM(Lml_u) 475132718Skan 476169689Skan @ Round the result, merge final exponent. 477169689Skan cmp r3, #0x80000000 478169689Skan adc r0, r0, r2, lsl #23 479132718Skan biceq r0, r0, #1 480169689Skan RET 481132718Skan 482169689Skan @ Multiplication by 0x1p*: let''s shortcut a lot of code. 483169689SkanLSYM(Lml_1): 484169689Skan teq r0, #0 485169689Skan and ip, ip, #0x80000000 486169689Skan moveq r1, r1, lsl #9 487169689Skan orr r0, ip, r0, lsr #9 488169689Skan orr r0, r0, r1, lsr #9 489169689Skan subs r2, r2, #127 490169689Skan rsbgts r3, r2, #255 491169689Skan orrgt r0, r0, r2, lsl #23 492169689Skan RETc(gt) 493132718Skan 494169689Skan @ Under/overflow: fix things up for the code below. 495169689Skan orr r0, r0, #0x00800000 496169689Skan mov r3, #0 497169689Skan subs r2, r2, #1 498132718Skan 499169689SkanLSYM(Lml_u): 500169689Skan @ Overflow? 501169689Skan bgt LSYM(Lml_o) 502132718Skan 503132718Skan @ Check if denormalized result is possible, otherwise return signed 0. 504169689Skan cmn r2, #(24 + 1) 505169689Skan bicle r0, r0, #0x7fffffff 506132718Skan RETc(le) 507132718Skan 508132718Skan @ Shift value right, round, etc. 509169689Skan rsb r2, r2, #0 510169689Skan movs r1, r0, lsl #1 511169689Skan mov r1, r1, lsr r2 512169689Skan rsb r2, r2, #32 513169689Skan mov ip, r0, lsl r2 514169689Skan movs r0, r1, rrx 515132718Skan adc r0, r0, #0 516169689Skan orrs r3, r3, ip, lsl #1 517169689Skan biceq r0, r0, ip, lsr #31 518132718Skan RET 519132718Skan 520132718Skan @ One or both arguments are denormalized. 521132718Skan @ Scale them leftwards and preserve sign bit. 522132718SkanLSYM(Lml_d): 523132718Skan teq r2, #0 524132718Skan and ip, r0, #0x80000000 525132718Skan1: moveq r0, r0, lsl #1 526132718Skan tsteq r0, #0x00800000 527169689Skan subeq r2, r2, #1 528132718Skan beq 1b 529132718Skan orr r0, r0, ip 530132718Skan teq r3, #0 531132718Skan and ip, r1, #0x80000000 532132718Skan2: moveq r1, r1, lsl #1 533132718Skan tsteq r1, #0x00800000 534169689Skan subeq r3, r3, #1 535132718Skan beq 2b 536132718Skan orr r1, r1, ip 537132718Skan b LSYM(Lml_x) 538132718Skan 539132718SkanLSYM(Lml_s): 540169689Skan @ Isolate the INF and NAN cases away 541169689Skan and r3, ip, r1, lsr #23 542169689Skan teq r2, ip 543169689Skan teqne r3, ip 544169689Skan beq 1f 545169689Skan 546169689Skan @ Here, one or more arguments are either denormalized or zero. 547169689Skan bics ip, r0, #0x80000000 548169689Skan bicnes ip, r1, #0x80000000 549169689Skan bne LSYM(Lml_d) 550169689Skan 551169689Skan @ Result is 0, but determine sign anyway. 552169689SkanLSYM(Lml_z): 553169689Skan eor r0, r0, r1 554169689Skan bic r0, r0, #0x7fffffff 555169689Skan RET 556169689Skan 557169689Skan1: @ One or both args are INF or NAN. 558132718Skan teq r0, #0x0 559169689Skan teqne r0, #0x80000000 560169689Skan moveq r0, r1 561132718Skan teqne r1, #0x0 562132718Skan teqne r1, #0x80000000 563132718Skan beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN 564169689Skan teq r2, ip 565132718Skan bne 1f 566132718Skan movs r2, r0, lsl #9 567132718Skan bne LSYM(Lml_n) @ NAN * <anything> -> NAN 568169689Skan1: teq r3, ip 569132718Skan bne LSYM(Lml_i) 570132718Skan movs r3, r1, lsl #9 571169689Skan movne r0, r1 572132718Skan bne LSYM(Lml_n) @ <anything> * NAN -> NAN 573132718Skan 574132718Skan @ Result is INF, but we need to determine its sign. 575132718SkanLSYM(Lml_i): 576132718Skan eor r0, r0, r1 577132718Skan 578132718Skan @ Overflow: return INF (sign already in r0). 579132718SkanLSYM(Lml_o): 580132718Skan and r0, r0, #0x80000000 581132718Skan orr r0, r0, #0x7f000000 582132718Skan orr r0, r0, #0x00800000 583132718Skan RET 584132718Skan 585169689Skan @ Return a quiet NAN. 586132718SkanLSYM(Lml_n): 587169689Skan orr r0, r0, #0x7f000000 588132718Skan orr r0, r0, #0x00c00000 589132718Skan RET 590132718Skan 591169689Skan FUNC_END aeabi_fmul 592132718Skan FUNC_END mulsf3 593132718Skan 594132718SkanARM_FUNC_START divsf3 595169689SkanARM_FUNC_ALIAS aeabi_fdiv divsf3 596132718Skan 597169689Skan @ Mask out exponents, trap any zero/denormal/INF/NAN. 598169689Skan mov ip, #0xff 599169689Skan ands r2, ip, r0, lsr #23 600169689Skan andnes r3, ip, r1, lsr #23 601169689Skan teqne r2, ip 602169689Skan teqne r3, ip 603132718Skan beq LSYM(Ldv_s) 604132718SkanLSYM(Ldv_x): 605132718Skan 606169689Skan @ Substract divisor exponent from dividend''s 607169689Skan sub r2, r2, r3 608169689Skan 609132718Skan @ Preserve final sign into ip. 610132718Skan eor ip, r0, r1 611132718Skan 612132718Skan @ Convert mantissa to unsigned integer. 613132718Skan @ Dividend -> r3, divisor -> r1. 614132718Skan movs r1, r1, lsl #9 615132718Skan mov r0, r0, lsl #9 616132718Skan beq LSYM(Ldv_1) 617169689Skan mov r3, #0x10000000 618132718Skan orr r1, r3, r1, lsr #4 619132718Skan orr r3, r3, r0, lsr #4 620132718Skan 621132718Skan @ Initialize r0 (result) with final sign bit. 622132718Skan and r0, ip, #0x80000000 623132718Skan 624132718Skan @ Ensure result will land to known bit position. 625169689Skan @ Apply exponent bias accordingly. 626132718Skan cmp r3, r1 627132718Skan movcc r3, r3, lsl #1 628169689Skan adc r2, r2, #(127 - 2) 629132718Skan 630132718Skan @ The actual division loop. 631132718Skan mov ip, #0x00800000 632132718Skan1: cmp r3, r1 633132718Skan subcs r3, r3, r1 634132718Skan orrcs r0, r0, ip 635132718Skan cmp r3, r1, lsr #1 636132718Skan subcs r3, r3, r1, lsr #1 637132718Skan orrcs r0, r0, ip, lsr #1 638132718Skan cmp r3, r1, lsr #2 639132718Skan subcs r3, r3, r1, lsr #2 640132718Skan orrcs r0, r0, ip, lsr #2 641132718Skan cmp r3, r1, lsr #3 642132718Skan subcs r3, r3, r1, lsr #3 643132718Skan orrcs r0, r0, ip, lsr #3 644132718Skan movs r3, r3, lsl #4 645132718Skan movnes ip, ip, lsr #4 646132718Skan bne 1b 647132718Skan 648169689Skan @ Check exponent for under/overflow. 649169689Skan cmp r2, #(254 - 1) 650169689Skan bhi LSYM(Lml_u) 651132718Skan 652169689Skan @ Round the result, merge final exponent. 653132718Skan cmp r3, r1 654169689Skan adc r0, r0, r2, lsl #23 655132718Skan biceq r0, r0, #1 656132718Skan RET 657132718Skan 658132718Skan @ Division by 0x1p*: let''s shortcut a lot of code. 659132718SkanLSYM(Ldv_1): 660132718Skan and ip, ip, #0x80000000 661132718Skan orr r0, ip, r0, lsr #9 662169689Skan adds r2, r2, #127 663169689Skan rsbgts r3, r2, #255 664169689Skan orrgt r0, r0, r2, lsl #23 665132718Skan RETc(gt) 666169689Skan 667132718Skan orr r0, r0, #0x00800000 668132718Skan mov r3, #0 669169689Skan subs r2, r2, #1 670169689Skan b LSYM(Lml_u) 671132718Skan 672132718Skan @ One or both arguments are denormalized. 673132718Skan @ Scale them leftwards and preserve sign bit. 674132718SkanLSYM(Ldv_d): 675132718Skan teq r2, #0 676132718Skan and ip, r0, #0x80000000 677132718Skan1: moveq r0, r0, lsl #1 678132718Skan tsteq r0, #0x00800000 679169689Skan subeq r2, r2, #1 680132718Skan beq 1b 681132718Skan orr r0, r0, ip 682132718Skan teq r3, #0 683132718Skan and ip, r1, #0x80000000 684132718Skan2: moveq r1, r1, lsl #1 685132718Skan tsteq r1, #0x00800000 686169689Skan subeq r3, r3, #1 687132718Skan beq 2b 688132718Skan orr r1, r1, ip 689132718Skan b LSYM(Ldv_x) 690132718Skan 691169689Skan @ One or both arguments are either INF, NAN, zero or denormalized. 692132718SkanLSYM(Ldv_s): 693169689Skan and r3, ip, r1, lsr #23 694169689Skan teq r2, ip 695132718Skan bne 1f 696132718Skan movs r2, r0, lsl #9 697132718Skan bne LSYM(Lml_n) @ NAN / <anything> -> NAN 698169689Skan teq r3, ip 699169689Skan bne LSYM(Lml_i) @ INF / <anything> -> INF 700169689Skan mov r0, r1 701169689Skan b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN 702169689Skan1: teq r3, ip 703132718Skan bne 2f 704132718Skan movs r3, r1, lsl #9 705169689Skan beq LSYM(Lml_z) @ <anything> / INF -> 0 706169689Skan mov r0, r1 707169689Skan b LSYM(Lml_n) @ <anything> / NAN -> NAN 708169689Skan2: @ If both are nonzero, we need to normalize and resume above. 709169689Skan bics ip, r0, #0x80000000 710169689Skan bicnes ip, r1, #0x80000000 711169689Skan bne LSYM(Ldv_d) 712169689Skan @ One or both arguments are zero. 713132718Skan bics r2, r0, #0x80000000 714132718Skan bne LSYM(Lml_i) @ <non_zero> / 0 -> INF 715132718Skan bics r3, r1, #0x80000000 716132718Skan bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 717132718Skan b LSYM(Lml_n) @ 0 / 0 -> NAN 718132718Skan 719169689Skan FUNC_END aeabi_fdiv 720132718Skan FUNC_END divsf3 721132718Skan 722132718Skan#endif /* L_muldivsf3 */ 723132718Skan 724132718Skan#ifdef L_cmpsf2 725132718Skan 726169689Skan @ The return value in r0 is 727169689Skan @ 728169689Skan @ 0 if the operands are equal 729169689Skan @ 1 if the first operand is greater than the second, or 730169689Skan @ the operands are unordered and the operation is 731169689Skan @ CMP, LT, LE, NE, or EQ. 732169689Skan @ -1 if the first operand is less than the second, or 733169689Skan @ the operands are unordered and the operation is GT 734169689Skan @ or GE. 735169689Skan @ 736169689Skan @ The Z flag will be set iff the operands are equal. 737169689Skan @ 738169689Skan @ The following registers are clobbered by this function: 739169689Skan @ ip, r0, r1, r2, r3 740169689Skan 741132718SkanARM_FUNC_START gtsf2 742132718SkanARM_FUNC_ALIAS gesf2 gtsf2 743169689Skan mov ip, #-1 744132718Skan b 1f 745132718Skan 746132718SkanARM_FUNC_START ltsf2 747132718SkanARM_FUNC_ALIAS lesf2 ltsf2 748169689Skan mov ip, #1 749132718Skan b 1f 750132718Skan 751132718SkanARM_FUNC_START cmpsf2 752132718SkanARM_FUNC_ALIAS nesf2 cmpsf2 753132718SkanARM_FUNC_ALIAS eqsf2 cmpsf2 754169689Skan mov ip, #1 @ how should we specify unordered here? 755132718Skan 756169689Skan1: str ip, [sp, #-4] 757169689Skan 758169689Skan @ Trap any INF/NAN first. 759169689Skan mov r2, r0, lsl #1 760169689Skan mov r3, r1, lsl #1 761169689Skan mvns ip, r2, asr #24 762169689Skan mvnnes ip, r3, asr #24 763132718Skan beq 3f 764132718Skan 765169689Skan @ Compare values. 766132718Skan @ Note that 0.0 is equal to -0.0. 767169689Skan2: orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag 768169689Skan teqne r0, r1 @ if not 0 compare sign 769169689Skan subpls r0, r2, r3 @ if same sign compare values, set r0 770132718Skan 771169689Skan @ Result: 772169689Skan movhi r0, r1, asr #31 773169689Skan mvnlo r0, r1, asr #31 774169689Skan orrne r0, r0, #1 775132718Skan RET 776132718Skan 777132718Skan @ Look for a NAN. 778169689Skan3: mvns ip, r2, asr #24 779132718Skan bne 4f 780169689Skan movs ip, r0, lsl #9 781169689Skan bne 5f @ r0 is NAN 782169689Skan4: mvns ip, r3, asr #24 783132718Skan bne 2b 784169689Skan movs ip, r1, lsl #9 785169689Skan beq 2b @ r1 is not NAN 786169689Skan5: ldr r0, [sp, #-4] @ return unordered code. 787132718Skan RET 788132718Skan 789132718Skan FUNC_END gesf2 790132718Skan FUNC_END gtsf2 791132718Skan FUNC_END lesf2 792132718Skan FUNC_END ltsf2 793132718Skan FUNC_END nesf2 794132718Skan FUNC_END eqsf2 795132718Skan FUNC_END cmpsf2 796132718Skan 797169689SkanARM_FUNC_START aeabi_cfrcmple 798169689Skan 799169689Skan mov ip, r0 800169689Skan mov r0, r1 801169689Skan mov r1, ip 802169689Skan b 6f 803169689Skan 804169689SkanARM_FUNC_START aeabi_cfcmpeq 805169689SkanARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq 806169689Skan 807169689Skan @ The status-returning routines are required to preserve all 808169689Skan @ registers except ip, lr, and cpsr. 809169689Skan6: stmfd sp!, {r0, r1, r2, r3, lr} 810169689Skan ARM_CALL cmpsf2 811169689Skan @ Set the Z flag correctly, and the C flag unconditionally. 812169689Skan cmp r0, #0 813169689Skan @ Clear the C flag if the return value was -1, indicating 814169689Skan @ that the first operand was smaller than the second. 815169689Skan cmnmi r0, #0 816169689Skan RETLDM "r0, r1, r2, r3" 817169689Skan 818169689Skan FUNC_END aeabi_cfcmple 819169689Skan FUNC_END aeabi_cfcmpeq 820169689Skan FUNC_END aeabi_cfrcmple 821169689Skan 822169689SkanARM_FUNC_START aeabi_fcmpeq 823169689Skan 824169689Skan str lr, [sp, #-8]! 825169689Skan ARM_CALL aeabi_cfcmple 826169689Skan moveq r0, #1 @ Equal to. 827169689Skan movne r0, #0 @ Less than, greater than, or unordered. 828169689Skan RETLDM 829169689Skan 830169689Skan FUNC_END aeabi_fcmpeq 831169689Skan 832169689SkanARM_FUNC_START aeabi_fcmplt 833169689Skan 834169689Skan str lr, [sp, #-8]! 835169689Skan ARM_CALL aeabi_cfcmple 836169689Skan movcc r0, #1 @ Less than. 837169689Skan movcs r0, #0 @ Equal to, greater than, or unordered. 838169689Skan RETLDM 839169689Skan 840169689Skan FUNC_END aeabi_fcmplt 841169689Skan 842169689SkanARM_FUNC_START aeabi_fcmple 843169689Skan 844169689Skan str lr, [sp, #-8]! 845169689Skan ARM_CALL aeabi_cfcmple 846169689Skan movls r0, #1 @ Less than or equal to. 847169689Skan movhi r0, #0 @ Greater than or unordered. 848169689Skan RETLDM 849169689Skan 850169689Skan FUNC_END aeabi_fcmple 851169689Skan 852169689SkanARM_FUNC_START aeabi_fcmpge 853169689Skan 854169689Skan str lr, [sp, #-8]! 855169689Skan ARM_CALL aeabi_cfrcmple 856169689Skan movls r0, #1 @ Operand 2 is less than or equal to operand 1. 857169689Skan movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. 858169689Skan RETLDM 859169689Skan 860169689Skan FUNC_END aeabi_fcmpge 861169689Skan 862169689SkanARM_FUNC_START aeabi_fcmpgt 863169689Skan 864169689Skan str lr, [sp, #-8]! 865169689Skan ARM_CALL aeabi_cfrcmple 866169689Skan movcc r0, #1 @ Operand 2 is less than operand 1. 867169689Skan movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, 868169689Skan @ or they are unordered. 869169689Skan RETLDM 870169689Skan 871169689Skan FUNC_END aeabi_fcmpgt 872169689Skan 873132718Skan#endif /* L_cmpsf2 */ 874132718Skan 875132718Skan#ifdef L_unordsf2 876132718Skan 877132718SkanARM_FUNC_START unordsf2 878169689SkanARM_FUNC_ALIAS aeabi_fcmpun unordsf2 879169689Skan 880169689Skan mov r2, r0, lsl #1 881169689Skan mov r3, r1, lsl #1 882169689Skan mvns ip, r2, asr #24 883132718Skan bne 1f 884169689Skan movs ip, r0, lsl #9 885169689Skan bne 3f @ r0 is NAN 886169689Skan1: mvns ip, r3, asr #24 887169689Skan bne 2f 888169689Skan movs ip, r1, lsl #9 889132718Skan bne 3f @ r1 is NAN 890132718Skan2: mov r0, #0 @ arguments are ordered. 891132718Skan RET 892132718Skan3: mov r0, #1 @ arguments are unordered. 893132718Skan RET 894132718Skan 895169689Skan FUNC_END aeabi_fcmpun 896132718Skan FUNC_END unordsf2 897132718Skan 898132718Skan#endif /* L_unordsf2 */ 899132718Skan 900132718Skan#ifdef L_fixsfsi 901132718Skan 902132718SkanARM_FUNC_START fixsfsi 903169689SkanARM_FUNC_ALIAS aeabi_f2iz fixsfsi 904132718Skan 905132718Skan @ check exponent range. 906169689Skan mov r2, r0, lsl #1 907132718Skan cmp r2, #(127 << 24) 908169689Skan bcc 1f @ value is too small 909169689Skan mov r3, #(127 + 31) 910169689Skan subs r2, r3, r2, lsr #24 911169689Skan bls 2f @ value is too large 912132718Skan 913169689Skan @ scale value 914169689Skan mov r3, r0, lsl #8 915169689Skan orr r3, r3, #0x80000000 916169689Skan tst r0, #0x80000000 @ the sign bit 917169689Skan mov r0, r3, lsr r2 918132718Skan rsbne r0, r0, #0 919132718Skan RET 920132718Skan 921169689Skan1: mov r0, #0 922169689Skan RET 923169689Skan 924169689Skan2: cmp r2, #(127 + 31 - 0xff) 925169689Skan bne 3f 926169689Skan movs r2, r0, lsl #9 927169689Skan bne 4f @ r0 is NAN. 928169689Skan3: ands r0, r0, #0x80000000 @ the sign bit 929132718Skan moveq r0, #0x7fffffff @ the maximum signed positive si 930132718Skan RET 931132718Skan 932169689Skan4: mov r0, #0 @ What should we convert NAN to? 933132718Skan RET 934132718Skan 935169689Skan FUNC_END aeabi_f2iz 936132718Skan FUNC_END fixsfsi 937132718Skan 938132718Skan#endif /* L_fixsfsi */ 939132718Skan 940132718Skan#ifdef L_fixunssfsi 941132718Skan 942132718SkanARM_FUNC_START fixunssfsi 943169689SkanARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi 944132718Skan 945132718Skan @ check exponent range. 946169689Skan movs r2, r0, lsl #1 947169689Skan bcs 1f @ value is negative 948132718Skan cmp r2, #(127 << 24) 949169689Skan bcc 1f @ value is too small 950169689Skan mov r3, #(127 + 31) 951169689Skan subs r2, r3, r2, lsr #24 952169689Skan bmi 2f @ value is too large 953132718Skan 954169689Skan @ scale the value 955169689Skan mov r3, r0, lsl #8 956169689Skan orr r3, r3, #0x80000000 957169689Skan mov r0, r3, lsr r2 958132718Skan RET 959132718Skan 960169689Skan1: mov r0, #0 961132718Skan RET 962132718Skan 963169689Skan2: cmp r2, #(127 + 31 - 0xff) 964169689Skan bne 3f 965169689Skan movs r2, r0, lsl #9 966169689Skan bne 4f @ r0 is NAN. 967169689Skan3: mov r0, #0xffffffff @ maximum unsigned si 968132718Skan RET 969132718Skan 970169689Skan4: mov r0, #0 @ What should we convert NAN to? 971169689Skan RET 972169689Skan 973169689Skan FUNC_END aeabi_f2uiz 974132718Skan FUNC_END fixunssfsi 975132718Skan 976132718Skan#endif /* L_fixunssfsi */ 977