lib1funcs.S revision 1.3
1# ieee754 sf routines for FT32 2 3/* Copyright (C) 1995-2017 Free Software Foundation, Inc. 4 5This file is free software; you can redistribute it and/or modify it 6under the terms of the GNU General Public License as published by the 7Free Software Foundation; either version 3, or (at your option) any 8later version. 9 10This file is distributed in the hope that it will be useful, but 11WITHOUT ANY WARRANTY; without even the implied warranty of 12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13General Public License for more details. 14 15Under Section 7 of GPL version 3, you are granted additional 16permissions described in the GCC Runtime Library Exception, version 173.1, as published by the Free Software Foundation. 18 19You should have received a copy of the GNU General Public License and 20a copy of the GCC Runtime Library Exception along with this program; 21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22<http://www.gnu.org/licenses/>. */ 23 24# See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf 25# for implementation details of all except division which is detailed below 26# 27 28#ifdef L_fp_tools 29// .global __cmpsf2_ 30nan: .long 0x7FFFFFFF # also abs mask 31inf: .long 0x7F800000 32sign_mask: .long 0x80000000 33m_mask: .long 0x007FFFFF 34exp_bias: .long 127 35edge_case: .long 0x00FFFFFF 36smallest_norm: .long 0x00800000 # implicit bit 37high_FF: .long 0xFF000000 38high_uint: .long 0xFFFFFFFF 39 40ntz_table: 41 .byte 32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14 42 .byte 10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15 43 .byte 31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26 44 .byte 30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0 45 46#endif 47 48# Supply a few 'missing' instructions 49 50# not 51.macro not rd,r1 52 xor \rd,\r1,-1 53.endm 54 55# negate 56.macro neg x 57 not \x, \x 58 add \x, \x, 1 59.endm 60 61# set $cc from the result of "ashl reg,dist" 62.macro ashlcc reg,dist 63 .long 0x5de04008 | (\reg << 15) | (\dist << 4) 64.endm 65 66 67# converts an unsigned number x to a signed rep based on the bits in sign 68# sign should be 0x00000000 or 0xffffffff. 69.macro to_signed x, sign 70 add \x,\x,\sign # conditionally decrement x 71 xor \x,\x,\sign # conditionally complement x 72.endm 73 74 75.macro ld32 r,v 76 ldk \r,(\v>>10) 77 ldl \r,\r,(\v & 1023) 78.endm 79 80# calculate trailing zero count in x, also uses scr. 81# Using Seal's algorithm 82.macro ntz x, scr 83 not \scr, \x 84 add \scr, \scr, 1 85 and \x, \x, \scr 86 ashl \scr, \x, 4 87 add \x, \scr, \x 88 ashl \scr, \x, 6 89 add \x, \scr, \x 90 ashl \scr, \x, 16 91 sub \x, \scr, \x 92 lshr \x, \x, 26 93 ldk \scr, ntz_table 94 add \x, \x, \scr 95 lpmi.b \x, \x, 0 96.endm 97 98# calculate leading zero count 99.macro nlz x, scr 100 flip \x, \x, 31 101 ntz \x, \scr 102.endm 103 104 105# Round 26 bit mantissa to nearest 106# | 23 bits frac | G | R | S | 107.macro round m, s1, s2 108 ldk \s1,0xc8 109 and \s2,\m,7 110 lshr \s1,\s1,\s2 111 and \s1,\s1,1 112 lshr \m,\m,2 113 add \m,\m,\s1 114.endm 115 116# If NZ, set the LSB of reg 117.macro sticky reg 118 jmpc z,1f 119 or \reg,\reg,1 # set the sticky bit to 1 1201: 121.endm 122 123########################################################################## 124########################################################################## 125## addition & subtraction 126 127#if defined(L_subsf3) || defined(L_addsub_sf) 128.global __subsf3 129__subsf3: 130 # this is subtraction, so we just change the sign of r1 131 lpm $r2,sign_mask 132 xor $r1,$r1,$r2 133 jmp __addsf3 134#endif 135 136#if defined(L_addsf3) || defined(L_addsub_sf) 137.global __addsf3 138__addsf3: 139 # x in $r0, y in $r1, result z in $r0 --||| 100 instructions +/- |||-- 140 # unpack e, calc d 141 bextu $r2,$r0,(8<<5)|23 # ex in r2 142 bextu $r3,$r1,(8<<5)|23 # ey in r3 143 sub $r5,$r2,$r3 # d = ex - ey 144 145 # Special values are 0x00 and 0xff in ex and ey. 146 # If (ex&ey) != 0 or (xy|ey)=255 then there may be 147 # a special value. 148 tst $r2,$r3 149 jmpc nz,1f 150 jmp slow 1511: or $r4,$r2,$r3 152 cmp $r4,255 153 jmpc nz,no_special_vals 154slow: 155 # Check for early exit 156 cmp $r2,0 157 jmpc z,test_if_not_255 158 cmp $r3,0 159 jmpc nz,no_early_exit 160test_if_not_255: 161 cmp $r2,255 162 jmpc z,no_early_exit 163 cmp $r3,255 164 jmpc z,no_early_exit 165 or $r6,$r2,$r3 166 cmp $r6,0 167 jmpc nz,was_not_zero 168 and $r0,$r0,$r1 169 lpm $r1,sign_mask 170 and $r0,$r0,$r1 171 return 172was_not_zero: 173 cmp $r2,0 174 jmpc nz,ret_x 175 move $r0,$r1 176 return 177ret_x: 178 return 179no_early_exit: 180 # setup to test for special values 181 sub $r6,$r2,1 182 and $r6,$r6,0xFE 183 sub $r7,$r3,1 184 and $r7,$r7,0xFE 185 # test for special values 186 cmp $r6,$r7 187 jmpc gte,ex_spec_is_gte 188 move $r6,$r7 189ex_spec_is_gte: 190 cmp $r6,0xFE 191 jmpc nz,no_special_vals 192 cmp $r5,0 193 jmpc ns,d_gte_0 194 cmp $r3,0xFF 195 jmpc z,ret_y 196 cmp $r2,0 197 jmpc z,ret_y 198ret_y: 199 move $r0,$r1 200 return 201d_gte_0: 202 cmp $r5,0 203 jmpc z,d_is_0 204 cmp $r2,0xFF 205 jmpc z,ret_x 206 cmp $r3,0 207 jmpc z,ret_x 208d_is_0: 209 cmp $r2,0xFF 210 jmpc nz,no_special_vals 211 ashl $r6,$r0,9 # clear all except x frac 212 ashl $r7,$r1,9 # clear all except y frac 213 or $r6,$r6,$r7 214 cmp $r6,0 215 jmpc nz,ret_nan 216 lshr $r4,$r0,31 # sx in r4 217 lshr $r5,$r1,31 # sy in r4 218 cmp $r4,$r5 219 jmpc nz,ret_nan 220 return 221ret_nan: 222 lpm $r0,nan 223 return 224no_special_vals: 225 ldk $r8,(1<<10)|(9<<5)|26 # setup implicit bit and mask for e 226 #---------------------- 227 ashr $r4,$r0,31 # sx in r4 228 ashl $r0,$r0,3 # shift mx 3 for GRS bits 229 bins $r0,$r0,$r8 # clear sx, ex and add implicit bit mx 230 # change mx to signed mantissa 231 to_signed $r0,$r4 232 #---------------------- 233 ashr $r4,$r1,31 # sy in r4 234 ashl $r1,$r1,3 # shift my 3 for GRS bits 235 bins $r1,$r1,$r8 # clear sy, ey and add implicit bit my 236 # change my to signed mantissa 237 to_signed $r1,$r4 238 #---------------------- 239 # test if we swap ms based on d sign 240 cmp $r5,0 241 jmpc gte,noswap 242 # swap mx & my 243 xor $r0,$r0,$r1 244 xor $r1,$r0,$r1 245 xor $r0,$r0,$r1 246 # d positive means that ex>=ey, so ez = ex 247 # d negative means that ey>ex, so ez = ey 248 move $r2,$r3 249 # |d| 250 neg $r5 251noswap: 252 # now $r2 = ez = max(ex,ey) 253 cmp $r5,26 # max necessary alignment shift is 26 254 jmpc lt,under_26 255 ldk $r5,26 256under_26: 257 ldk $r7,-1 258 ashl $r7,$r7,$r5 # create inverse of mask for test of S bit value in discarded my 259 not $r7,$r7 260 tst $r1,$r7 # determine value of sticky bit 261 # shift my >> |d| 262 ashr $r1,$r1,$r5 263 sticky $r1 264 265 # add ms 266 add $r0,$r0,$r1 267 268 # $r4 = sign(mx), mx = |mx| 269 ashr $r4,$r0,31 270 xor $r0,$r0,$r4 271 sub $r0,$r0,$r4 272 273 # realign mantissa using leading zero count 274 flip $r7,$r0,31 275 ntz $r7,$r8 276 ashl $r0,$r0,$r7 277 btst $r0,(6<<5)|0 # test low bits for sticky again 278 lshr $r0,$r0,6 279 sticky $r0 280 281 # update exponent 282 add $r2,$r2,5 283 sub $r2,$r2,$r7 284 285 # Round to nearest 286 round $r0,$r7,$r6 287 288 # detect_exp_update 289 lshr $r6,$r0,24 290 add $r2,$r2,$r6 291 292 # final tests 293 # mz == 0? if so, we just bail with a +0 294 cmp $r0,0 295 jmpc nz,msum_not_zero 296 ldk $r0,0 297 return 298msum_not_zero: 299 # Combined check that (1 <= ez <= 254) 300 sub $r3,$r2,1 301 cmp $r3,254 302 jmpc b,no_special_ret 303 # underflow? 304 cmp $r2,0 305 jmpc gt,no_under 306 ldk $r0,0 307 jmp pack_sz 308no_under: 309 # overflow? 310 cmp $r2,255 311 jmpc lt,no_special_ret 312 ldk $r0,0x7F8 313 ashl $r0,$r0,20 314 jmp pack_sz 315no_special_ret: 316 # Pack ez 317 ldl $r2,$r2,(8<<5)|23 318 bins $r0,$r0,$r2 # width = 8, pos = 23 pack ez 319 # Pack sz 320pack_sz: 321 ldl $r4,$r4,(1<<5)|31 322 bins $r0,$r0,$r4 # width = 1, pos = 31 set sz to sy 323 return 324#endif 325 326########################################################################## 327########################################################################## 328## multiplication 329 330#ifdef L_mulsf3 331.global __mulsf3 332__mulsf3: 333 # x in $r0, y in $r1, result z in $r0 --||| 61 instructions +/- |||-- 334 335 # unpack e 336 bextu $r2,$r0,(8<<5)|23 # ex in r2 337 bextu $r3,$r1,(8<<5)|23 # ey in r3 338 # calc result sign 339 xor $r4,$r0,$r1 340 lpm $r5,sign_mask 341 and $r4,$r4,$r5 # sz in r4 342 343 # unpack m add implicit bit 344 ldk $r5,(1<<10)|(9<<5)|23 # setup implicit bit and mask for e 345 #---------------------- 346 bins $r0,$r0,$r5 # clear sx, ex and add implicit bit mx 347 348 sub $r6,$r2,1 349 cmp $r6,254 350 jmpc b,1f 351 jmp slow_mul 3521: sub $r6,$r3,1 353 cmp $r6,254 354 jmpc b,no_special_vals_mul 355 356slow_mul: 357 # Check for early exit 358 cmp $r2,0 359 jmpc z,op_is_zero 360 cmp $r3,0 361 jmpc nz,no_early_exit_mul 362op_is_zero: 363 cmp $r2,255 364 jmpc z,no_early_exit_mul 365 cmp $r3,255 366 jmpc z,no_early_exit_mul 367 move $r0,$r4 368 return 369no_early_exit_mul: 370 # setup to test for special values 371 sub $r6,$r2,1 372 and $r6,$r6,0xFE 373 sub $r7,$r3,1 374 and $r7,$r7,0xFE 375 # test for special values 376 cmp $r6,$r7 377 jmpc gte,ex_spec_is_gte_ey_mul 378 move $r6,$r7 379ex_spec_is_gte_ey_mul: 380 cmp $r6,0xFE 381 jmpc nz,no_special_vals_mul 382 cmp $r2,0xFF 383 jmpc nz,ex_not_FF_mul 384 ashl $r6,$r0,9 385 cmp $r6,0 386 jmpc nz,ret_nan 387 cmp $r3,0 388 jmpc z,ret_nan 389 ashl $r6,$r1,1 390 lpm $r7,high_FF 391 cmp $r6,$r7 392 jmpc a,ret_nan 393 cmp $r6,0 394 jmpc z,ret_nan 395 # infinity 396 lpm $r0,inf 397 or $r0,$r0,$r4 398 return 399ex_not_FF_mul: 400 cmp $r2,0 401 jmpc nz,no_nan_mul 402 cmp $r3,0xFF 403 jmpc nz,no_nan_mul 404 jmp ret_nan 405no_nan_mul: 406 lpm $r0,nan 407 and $r0,$r0,$r1 408 or $r0,$r0,$r4 409 return 410 411ret_nan: 412 lpm $r0,nan 413 return 414 415no_special_vals_mul: 416 bins $r1,$r1,$r5 # clear sy, ey and add implicit bit my 417 # calc ez 418 add $r3,$r2,$r3 419 sub $r3,$r3,127 # ez in r3 420 421 # (r1,r2) = R0 * R1 422 mul $r2,$r0,$r1 423 muluh $r1,$r0,$r1 424 425 btst $r1,(1<<5)|15 # XXX use jmpx 426 jmpc z,mul_z0 427 428 # mz is 1X.XX...X 429 # 48-bit product is in (r1,r2). The low 22 bits of r2 430 # are discarded. 431 lshr $r0,$r2,22 432 ashl $r1,$r1,10 433 or $r0,$r0,$r1 # r0 = (r1,r2) >> 22 434 ashlcc 2,10 435 sticky $r0 436 add $r3,$r3,1 # bump exponent 437 438 # Round to nearest 439 round $r0, $r1, $r2 440 lshr $r6,$r0,24 441 add $r3,$r3,$r6 442 443 sub $r6,$r3,1 444 cmp $r6,254 445 jmpc b,no_special_ret_mul 446 447special_ret_mul: 448 # When the final exponent <= 0, result is flushed to 0 except 449 # for the border case 0x00FFFFFF which is promoted to next higher 450 # FP no., that is, the smallest "normalized" number. 451 cmp $r3,0 452 jmpc gt,exp_normal 453 # Pack ez 454 ldl $r3,$r3,(8<<5)|23 455 bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez 456 lpm $r2,edge_case 457 cmp $r0,$r2 458 jmpc nz,no_edge_case 459 lpm $r0,smallest_norm 460 jmp pack_sz_mul 461no_edge_case: 462 ldk $r0,0 463 jmp pack_sz_mul 464exp_normal: 465 # overflow? 466 cmp $r3,255 467 jmpc lt,no_special_ret_mul 468 ldk $r0,0x7F8 469 ashl $r0,$r0,20 470 jmp pack_sz_mul 471no_special_ret_mul: 472 # Pack ez 473 ldl $r3,$r3,(8<<5)|23 474 bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez 475 # Pack sz 476pack_sz_mul: 477 or $r0,$r0,$r4 478 return 479 480mul_z0: 481 # mz is 0X.XX...X 482 # 48-bit product is in (r1,r2). The low 21 bits of r2 483 # are discarded. 484 lshr $r0,$r2,21 485 ashl $r1,$r1,11 486 or $r0,$r0,$r1 # r0 = (r1,r2) >> 22 487 ashlcc 2,11 488 sticky $r0 489 # Round to nearest 490 round $r0, $r1, $r2 491 lshr $r6,$r0,24 492 add $r3,$r3,$r6 493 494 sub $r6,$r3,1 495 cmp $r6,254 496 jmpc b,no_special_ret_mul 497 jmp special_ret_mul 498#endif 499 500########################################################################## 501########################################################################## 502## division 503 504## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf 505## for implementation details 506 507 508 509 510#ifdef L_divsf3 511dc_1: .long 0xffffe7d7 512dc_2: .long 0xffffffe8 513dc_3: .long 0xffbad86f 514dc_4: .long 0xfffbece7 515dc_5: .long 0xf3672b51 516dc_6: .long 0xfd9d3a3e 517dc_7: .long 0x9a3c4390 518dc_8: .long 0xd4d2ce9b 519dc_9: .long 0x1bba92b3 520dc_10: .long 0x525a1a8b 521dc_11: .long 0x0452b1bf 522dc_12: .long 0xFFFFFFC0 523spec_val_test: .long 0x7F7FFFFF 524 525.global __divsf3 526__divsf3: 527 push $r13 528 # x in $r0, y in $r1, result z in $r0 --||| 73 instructions +/- |||- 529 bextu $r10,$r0,(8<<5)|23 # ex in r2 530 bextu $r11,$r1,(8<<5)|23 # ey in r3 531 lpm $r6, m_mask 532 and $r2, $r0, $r6 # mx 533 and $r3, $r1, $r6 # my 534 cmp $r2,$r3 535 bextu $r2,$r30,(1<<5)|4 # c = Tx >= T; 536 ashl $r3,$r3,9 # T = X << 9; 537 lpm $r13, sign_mask 538 ashl $r4,$r0,8 # X8 = X << 8; 539 or $r4,$r4,$r13 # Mx = X8 | 0x80000000; 540 lshr $r5,$r4,$r2 # S = Mx >> c; 541 # calc D 542 sub $r2, $r11, $r2 543 add $r12, $r10, 125 544 sub $r2, $r12, $r2 # int D = (Ex + 125) - (Ey - c); 545 # calc result sign 546 xor $r12,$r0,$r1 547 and $r12,$r12,$r13 # Sr = ( X �� Y ) & 0x80000000; 548 # check early exit 549 cmp $r10, 0 550 jmpc nz, no_early_ret_dev 551 cmp $r11, 0 552 jmpc z, no_early_ret_dev 553 cmp $r11, 255 554 jmpc z, no_early_ret_dev 555 move $r0, $r12 556 pop $r13 557 return 558no_early_ret_dev: 559 # setup to test for special values 560 sub $r8,$r10,1 561 and $r8,$r8,0xFE 562 sub $r9,$r11,1 563 and $r9,$r9,0xFE 564 # test for special values 565 cmp $r8, $r9 566 jmpc gte, absXm1_gte_absYm1 567 move $r8, $r9 568absXm1_gte_absYm1: 569 cmp $r8, 0xFE 570 jmpc nz, no_spec_ret_div 571 cmp $r10, 0xFF 572 jmpc nz, ex_not_FF_div 573 lpm $r6, m_mask 574 and $r2, $r0, $r6 # mx 575 cmp $r2, 0 576 jmpc nz, ret_nan_div 577 cmp $r11, 0xFF 578 jmpc z, ret_nan_div 579 jmp ret_inf_div 580ex_not_FF_div: 581 cmp $r11, 0xFF 582 jmpc nz, ey_not_FF_div 583 ashl $r13, $r1, 9 584 cmp $r13, 0 585 jmpc nz, ret_nan_div 586 move $r0, $r12 587 pop $r13 588 return 589ey_not_FF_div: 590 or $r10, $r10, $r11 591 cmp $r10, 0 592 jmpc z, ret_nan_div 593ret_inf_div: 594 lpm $r6, inf 595 move $r0, $r6 596 or $r0, $r0, $r12 597 pop $r13 598 return 599ret_nan_div: 600 lpm $r0, nan 601 pop $r13 602 return 603 604no_spec_ret_div: 605# check for overflow 606 ldk $r6, 0xFE 607 cmp $r2, $r6 608 jmpc lt, no_overflow_div 609 lpm $r6, inf 610 or $r0, $r12, $r6 611 pop $r13 612 return 613no_overflow_div: 614# check for underflow 615 cmp $r2, 0 616 jmpc ns, no_underflow_div 617 xnor $r6, $r6, $r6 # -1 618 cmp $r2, $r6 619 jmpc nz, ret_sr_div 620 ldk $r7, 0xFF 621 xor $r6, $r6, $r7 # 0xFF ^ -1 = 0xFFFFFF00 622 cmp $r4, $r6 623 jmpc nz, ret_sr_div 624 lpm $r6, sign_mask 625 cmp $r4, $r6 626 jmpc nz, ret_sr_div 627 lshr $r0, $r6, 8 628 or $r0, $r0, $r12 629 pop $r13 630 return 631ret_sr_div: 632 move $r0, $r12 633 pop $r13 634 return 635no_underflow_div: 636 lpm $r6, dc_1 637 muluh $r7, $r3, $r6 # i0 = mul( T , 0xffffe7d7 ); 638 lpm $r6, dc_2 639 sub $r7, $r6, $r7 # i1 = 0xffffffe8 - i0; 640 muluh $r7, $r5, $r7 # i2 = mul( S , i1 ); 641 add $r7, $r7, 0x20 # i3 = 0x00000020 + i2; 642 muluh $r8, $r3, $r3 # i4 = mul( T , T ); 643 muluh $r9, $r5, $r8 # i5 = mul( S , i4 ); 644 lpm $r6, dc_3 645 muluh $r10, $r3, $r6 # i6 = mul( T , 0xffbad86f ); 646 lpm $r6, dc_4 647 sub $r10, $r6, $r10 # i7 = 0xfffbece7 - i6; 648 muluh $r10, $r9, $r10 # i8 = mul( i5 , i7 ); 649 add $r7, $r7, $r10 # i9 = i3 + i8; 650 muluh $r9, $r8, $r9 # i10 = mul( i4 , i5 ); 651 lpm $r6, dc_5 652 muluh $r10, $r3, $r6 # i11 = mul( T , 0xf3672b51 ); 653 lpm $r6, dc_6 654 sub $r10, $r6, $r10 # i12 = 0xfd9d3a3e - i11; 655 lpm $r6, dc_7 656 muluh $r11, $r3, $r6 # i13 = mul( T , 0x9a3c4390 ); 657 lpm $r6, dc_8 658 sub $r11, $r6, $r11 # i14 = 0xd4d2ce9b - i13 659 muluh $r11, $r8, $r11 # i15 = mul( i4 , i14 ); 660 add $r10, $r10, $r11 # i16 = i12 + i15; 661 muluh $r10, $r9, $r10 # i17 = mul( i10 , i16 ) 662 add $r7, $r7, $r10 # i18 = i9 + i17; 663 muluh $r10, $r8, $r8 # i19 = mul( i4 , i4 ); 664 lpm $r6, dc_9 665 muluh $r11, $r3, $r6 # i20 = mul( T , 0x1bba92b3 ); 666 lpm $r6, dc_10 667 sub $r11, $r6, $r11 # i21 = 0x525a1a8b - i20; 668 lpm $r6, dc_11 669 muluh $r8, $r8, $r6 # i22 = mul( i4 , 0x0452b1bf ); 670 add $r8, $r11, $r8 # i23 = i21 + i22; 671 muluh $r8, $r10, $r8 # i24 = mul( i19 , i23 ); 672 muluh $r8, $r9, $r8 # i25 = mul( i10 , i24 ); 673 add $r3, $r7, $r8 # V = i18 + i25; 674# W = V & 0xFFFFFFC0; 675 lpm $r6, dc_12 676 and $r3, $r3, $r6 # W 677# round and pack final values 678 ashl $r0, $r2, 23 # pack D 679 or $r0, $r0, $r12 # pack Sr 680 ashl $r12, $r1, 8 681 or $r12, $r12, $r13 # My 682 muluh $r10, $r3, $r12 683 lshr $r11, $r5, 1 684 cmp $r10, $r11 685 jmpc gte, div_ret_1 686 add $r3, $r3, 0x40 687div_ret_1: 688 lshr $r3, $r3, 7 689 add $r0, $r0, $r3 690 pop $r13 691 return 692#endif 693 694########################################################################## 695########################################################################## 696## Negate 697 698#ifdef L_negsf 699.global __negsf 700__negsf: 701 lpm $r1, sign_mask 702 xor $r0, $r0, $r1 703 return 704#endif 705 706########################################################################## 707########################################################################## 708## float to int & unsigned int 709 710#ifdef L_fixsfsi 711.global __fixsfsi 712__fixsfsi: # 20 instructions 713 bextu $r1,$r0,(8<<5)|23 # e in r1 714 lshr $r2,$r0,31 # s in r2 715 lpm $r3, m_mask 716 and $r0,$r0,$r3 # m in r0 717 # test nan 718 cmp $r1,0xFF 719 jmpc nz, int_not_nan 720 cmp $r0,0 721 jmpc z, int_not_nan 722 ldk $r0,0 723 return 724int_not_nan: 725 # test edges 726 cmp $r1, 127 727 jmpc gte, int_not_zero # lower limit 728 ldk $r0,0 729 return 730int_not_zero: 731 cmp $r1, 158 732 jmpc lt, int_not_max # upper limit 733 lpm $r0, nan 734 cmp $r2, 0 735 jmpc z, int_positive 736 xnor $r0, $r0, 0 737 return 738int_not_max: 739 lpm $r3, smallest_norm 740 or $r0, $r0, $r3 # set implicit bit 741 sub $r1, $r1, 150 742 cmp $r1, 0 743 jmpc s, shift_right 744 ashl $r0, $r0, $r1 745 jmp set_int_sign 746shift_right: 747 xnor $r1, $r1, 0 748 add $r1, $r1, 1 749 lshr $r0, $r0, $r1 750set_int_sign: 751 cmp $r2, 0 752 jmpc z, int_positive 753 xnor $r0, $r0, 0 754 add $r0, $r0, 1 755int_positive: 756 return 757#endif 758 759#ifdef L_fixunssfsi 760.global __fixunssfsi 761__fixunssfsi: # 19 instructions 762 lshr $r2, $r0, 31 # s in r2 763 cmp $r2, 0 764 jmpc z, uint_not_neg 765 ldk $r0, 0 766 return 767uint_not_neg: 768 bextu $r1, $r0, (8<<5)|23 # e in r1 769 sub $r1, $r1, 127 770 lpm $r3, m_mask 771 and $r0, $r0, $r3 # m in r0 772 # test nan 773 cmp $r1, 0xFF 774 jmpc nz, uint_not_nan 775 cmp $r0, 0 776 jmpc z, uint_not_nan 777 ldk $r0, 0 778 return 779uint_not_nan: 780 # test edges 781 cmp $r1, 0 782 jmpc ns, uint_not_zero # lower limit 783 ldk $r0, 0 784 return 785uint_not_zero: 786 lpm $r3, smallest_norm 787 or $r0, $r0, $r3 # set implicit bit 788 cmp $r1, 23 789 jmpc lt, shift_uint_right 790 sub $r1, $r1, 23 791 ashl $r0, $r0, $r1 792 return 793shift_uint_right: 794 ldk $r3, 23 795 sub $r1, $r3, $r1 796 lshr $r0, $r0, $r1 797 return 798#endif 799 800########################################################################## 801########################################################################## 802## int & unsigned int to float 803 804 805.macro i2f x, s1, s2, s3, lbl 806 move \s1, \x 807 nlz \s1, \s2 808 cmp \s1, 8 809 jmpc s, float_round\lbl 810 sub \s2, \s1, 8 811 ashl \x, \x, \s2 812 jmp float_no_round\lbl 813float_round\lbl: 814 cmp \s1, 6 815 jmpc s, float_shift_right\lbl 816 sub \s2, \s1, 6 817 ashl \x, \x, \s2 818 jmp float_round_and_pack\lbl 819float_shift_right\lbl: 820 ldk \s2, 6 821 sub \s2, \s2, \s1 822 xnor \s3, \s3 ,\s3 # 0xFFFFFFFF 823 ashl \s3, \s3 ,\s2 # create inverse of mask for test of S bit value in discarded my 824 xnor \s3, \s3 ,0 # NOT 825 tst \x, \s3 # determine value of sticky bit 826 lshr \x, \x, \s2 827 jmpc z,float_round_and_pack\lbl 828 or \x, \x, 1 # set the sticky bit to 1 829float_round_and_pack\lbl: 830 bextu \s2, \x, (1<<5)|2 # extract low bit of m 831 or \x, \x, \s2 # or p into r 832 add \x, \x, 1 833 lshr \x, \x, 2 834 btst \x, (1<<5)|24 # test for carry from round 835 jmpc z, float_no_round\lbl 836 sub \s1, \s1, 1 # inc e for carry (actually dec nlz) 837 lshr \x, \x, 1 838float_no_round\lbl: 839 ldk \s2, 158 840 sub \s1, \s2, \s1 841 # Pack e 842 ldl \s1, \s1, (8<<5)|23 843 bins \x, \x, \s1 844.endm 845 846 847#ifdef L_floatsisf 848.global __floatsisf 849__floatsisf: # 32 instructions 850 cmp $r0, 0 851 jmpc nz, float_not_zero 852 return 853float_not_zero: 854 ashr $r1, $r0, 31 # s in r1 855 xor $r0, $r0, $r1 # cond neg 856 sub $r0, $r0, $r1 857 i2f $r0, $r2, $r3, $r4, 1 858 ldl $r1, $r1, (1<<5)|31 859 bins $r0, $r0, $r1 860 return 861#endif 862 863#ifdef L_floatunsisf 864.global __floatunsisf 865__floatunsisf: # 26 instructions 866 cmp $r0, 0 867 jmpc nz, float_not_zero2 868 return 869float_not_zero2: 870 i2f $r0, $r1, $r2, $r3, 2 871 return 872#endif 873 874#if 0 875########################################################################## 876########################################################################## 877## float compare 878 879 880__cmpsf2_: 881 # calc abs vals 882 lpm $r3, nan # also abs mask 883 and $r2, $r0, $r3 884 and $r3, $r1, $r3 885 # test if either abs is nan 886 lpm $r4, inf 887 cmp $r2, $r4 888 jmpc gt, cmp_is_gt 889 cmp $r3, $r4 890 jmpc gt, cmp_is_gt 891 # test if both are 0 892 or $r2, $r2, $r3 893 cmp $r2, 0 894 jmpc z, cmp_is_eq 895 # test if eq 896 cmp $r0, $r1 897 jmpc z, cmp_is_eq 898 # -- if either is pos 899 and $r2, $r0, $r1 900 cmp $r2, 0 901 jmpc s, cmp_both_neg 902 cmp $r0, $r1 903 jmpc gt, cmp_is_gt 904 # r0 < r1 905 lpm $r0, high_uint 906 return 907cmp_both_neg: 908 cmp $r0, $r1 909 jmpc lt, cmp_is_gt 910 # r0 < r1 911 lpm $r0, high_uint 912 return 913cmp_is_gt: 914 ldk $r0, 1 915 return 916cmp_is_eq: 917 ldk $r0, 0 918 return 919#endif 920 921#ifdef L_udivsi3 922.global __udivsi3 923__udivsi3: 924 # $r0 is dividend 925 # $r1 is divisor 926 ldk $r2,0 927 push $r28 928 ldk $r28,-32 9290: 930 lshr $r3,$r0,31 # Shift $r2:$r0 left one 931 ashl $r0,$r0,1 932 ashl $r2,$r2,1 933 or $r2,$r2,$r3 934 cmp $r2,$r1 935 jmpc b,1f 9362: 937 sub $r2,$r2,$r1 938 add $r0,$r0,1 9391: 940 add $r28,$r28,1 941 jmpx 31,$r28,1,0b 942 pop $r28 943 # $r0: quotient 944 # $r2: remainder 945 return 946#endif 947 948#ifdef L_umodsi3 949.global __umodsi3 950__umodsi3: 951 call __udivsi3 952 move $r0,$r2 953 return 954#endif 955 956#ifdef L_divsi3 957.global __divsi3 958__divsi3: 959 xor $r5,$r0,$r1 # $r5 is sign of result 960 ashr $r2,$r0,31 # $r0 = abs($r0) 961 xor $r0,$r0,$r2 962 sub $r0,$r0,$r2 963 ashr $r2,$r1,31 # $r1 = abs($r1) 964 xor $r1,$r1,$r2 965 sub $r1,$r1,$r2 966 call __udivsi3 967 ashr $r5,$r5,31 968 xor $r0,$r0,$r5 969 sub $r0,$r0,$r5 970 return 971 972#endif 973 974#ifdef L_modsi3 975.global __modsi3 976__modsi3: 977 move $r5,$r0 # $r5 is sign of result 978 ashr $r2,$r0,31 # $r0 = abs($r0) 979 xor $r0,$r0,$r2 980 sub $r0,$r0,$r2 981 ashr $r2,$r1,31 # $r1 = abs($r1) 982 xor $r1,$r1,$r2 983 sub $r1,$r1,$r2 984 call __umodsi3 985 ashr $r5,$r5,31 986 xor $r0,$r0,$r5 987 sub $r0,$r0,$r5 988 return 989#endif 990