# ieee754 sf routines for FT32

/* Copyright (C) 1995-2018 Free Software Foundation, Inc.

This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.

This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.

You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
<http://www.gnu.org/licenses/>.  */

# See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf
# for implementation details of all except division which is detailed below
#

#ifdef L_fp_tools
// .global __cmpsf2_
nan:            .long 0x7FFFFFFF    # also abs mask
inf:            .long 0x7F800000
sign_mask:      .long 0x80000000
m_mask:         .long 0x007FFFFF
exp_bias:       .long 127
edge_case:      .long 0x00FFFFFF
smallest_norm:  .long 0x00800000    # implicit bit
high_FF:        .long 0xFF000000
high_uint:      .long 0xFFFFFFFF

ntz_table:
    .byte   32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14
    .byte   10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15
    .byte   31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26
    .byte   30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0

#endif

# Supply a few 'missing' instructions

# not
.macro      not rd,r1
    xor     \rd,\r1,-1
.endm

# negate
.macro      neg x
    not     \x, \x
    add     \x, \x, 1
.endm

# set $cc from the result of "ashl reg,dist"
.macro  ashlcc reg,dist
    .long   0x5de04008 | (\reg << 15) | (\dist << 4)
.endm


# converts an unsigned number x to a signed rep based on the bits in sign
# sign should be 0x00000000 or 0xffffffff.
.macro      to_signed x, sign
    add     \x,\x,\sign    # conditionally decrement x
    xor     \x,\x,\sign    # conditionally complement x
.endm


.macro  ld32    r,v
    ldk     \r,(\v>>10)
    ldl     \r,\r,(\v & 1023)
.endm

# calculate trailing zero count in x, also uses scr.
# Using Seal's algorithm
.macro      ntz x, scr
    not     \scr, \x
    add     \scr, \scr, 1
    and     \x, \x, \scr
    ashl    \scr, \x, 4
    add     \x, \scr, \x
    ashl    \scr, \x, 6
    add     \x, \scr, \x
    ashl    \scr, \x, 16
    sub     \x, \scr, \x
    lshr    \x, \x, 26
    ldk     \scr, ntz_table
    add     \x, \x, \scr
    lpmi.b  \x, \x, 0
.endm

# calculate leading zero count
.macro      nlz x, scr
    flip    \x, \x, 31
    ntz     \x, \scr
.endm


# Round 26 bit mantissa to nearest
# | 23 bits frac | G | R | S |
.macro      round m,  s1, s2
    ldk     \s1,0xc8
    and     \s2,\m,7
    lshr    \s1,\s1,\s2
    and     \s1,\s1,1
    lshr    \m,\m,2
    add     \m,\m,\s1
.endm

# If NZ, set the LSB of reg
.macro      sticky reg
    jmpc    z,1f
    or      \reg,\reg,1             # set the sticky bit to 1
1:
.endm

##########################################################################
##########################################################################
## addition & subtraction

#if defined(L_subsf3) || defined(L_addsub_sf)
.global __subsf3
__subsf3:
    # this is subtraction, so we just change the sign of r1
    lpm     $r2,sign_mask
    xor     $r1,$r1,$r2
    jmp     __addsf3
#endif

#if defined(L_addsf3) || defined(L_addsub_sf)
.global __addsf3
__addsf3:
    # x in $r0, y in $r1, result z in $r0       --||| 100 instructions +/- |||--
    # unpack e, calc d
    bextu   $r2,$r0,(8<<5)|23   # ex in r2
    bextu   $r3,$r1,(8<<5)|23   # ey in r3
    sub     $r5,$r2,$r3         # d = ex - ey

    # Special values are 0x00 and 0xff in ex and ey.
    # If (ex&ey) != 0 or (xy|ey)=255 then there may be
    # a special value.
    tst     $r2,$r3
    jmpc    nz,1f
    jmp     slow
1:  or      $r4,$r2,$r3
    cmp     $r4,255
    jmpc    nz,no_special_vals
slow:
    # Check for early exit
    cmp     $r2,0
    jmpc    z,test_if_not_255
    cmp     $r3,0
    jmpc    nz,no_early_exit
test_if_not_255:
    cmp     $r2,255
    jmpc    z,no_early_exit
    cmp     $r3,255
    jmpc    z,no_early_exit
    or      $r6,$r2,$r3
    cmp     $r6,0
    jmpc    nz,was_not_zero
    and     $r0,$r0,$r1
    lpm     $r1,sign_mask
    and     $r0,$r0,$r1
    return
was_not_zero:
    cmp     $r2,0
    jmpc    nz,ret_x
    move    $r0,$r1
    return
ret_x:
    return
no_early_exit:
    # setup to test for special values
    sub     $r6,$r2,1
    and     $r6,$r6,0xFE
    sub     $r7,$r3,1
    and     $r7,$r7,0xFE
    # test for special values
    cmp     $r6,$r7
    jmpc    gte,ex_spec_is_gte
    move    $r6,$r7
ex_spec_is_gte:
    cmp     $r6,0xFE
    jmpc    nz,no_special_vals
    cmp     $r5,0
    jmpc    ns,d_gte_0
    cmp     $r3,0xFF
    jmpc    z,ret_y
    cmp     $r2,0
    jmpc    z,ret_y
ret_y:
    move    $r0,$r1
    return
d_gte_0:
    cmp     $r5,0
    jmpc    z,d_is_0
    cmp     $r2,0xFF
    jmpc    z,ret_x
    cmp     $r3,0
    jmpc    z,ret_x
d_is_0:
    cmp     $r2,0xFF
    jmpc    nz,no_special_vals
    ashl    $r6,$r0,9           # clear all except x frac
    ashl    $r7,$r1,9           # clear all except y frac
    or      $r6,$r6,$r7
    cmp     $r6,0
    jmpc    nz,ret_nan
    lshr    $r4,$r0,31          # sx in r4
    lshr    $r5,$r1,31          # sy in r4
    cmp     $r4,$r5
    jmpc    nz,ret_nan
    return
ret_nan:
    lpm     $r0,nan
    return
no_special_vals:
    ldk     $r8,(1<<10)|(9<<5)|26   # setup implicit bit and mask for e
    #----------------------
    ashr    $r4,$r0,31              # sx in r4
    ashl    $r0,$r0,3               # shift mx 3 for GRS bits
    bins    $r0,$r0,$r8             # clear sx, ex and add implicit bit mx
    # change mx to signed mantissa
    to_signed $r0,$r4
    #----------------------
    ashr    $r4,$r1,31              # sy in r4
    ashl    $r1,$r1,3               # shift my 3 for GRS bits
    bins    $r1,$r1,$r8             # clear sy, ey and add implicit bit my
    # change my to signed mantissa
    to_signed $r1,$r4
    #----------------------
    # test if we swap ms based on d sign
    cmp     $r5,0
    jmpc    gte,noswap
    # swap mx & my
    xor     $r0,$r0,$r1
    xor     $r1,$r0,$r1
    xor     $r0,$r0,$r1
    # d positive means that ex>=ey, so ez = ex
    # d negative means that ey>ex, so ez = ey
    move    $r2,$r3
    # |d|
    neg     $r5
noswap:
                                    # now $r2 = ez = max(ex,ey)
    cmp     $r5,26                  # max necessary alignment shift is 26
    jmpc    lt,under_26
    ldk     $r5,26
under_26:
    ldk     $r7,-1
    ashl    $r7,$r7,$r5             # create inverse of mask for test of S bit value in discarded my
    not     $r7,$r7
    tst     $r1,$r7                 # determine value of sticky bit
    # shift my >> |d|
    ashr    $r1,$r1,$r5
    sticky  $r1

    # add ms
    add     $r0,$r0,$r1

    # $r4 = sign(mx), mx = |mx|
    ashr    $r4,$r0,31
    xor     $r0,$r0,$r4
    sub     $r0,$r0,$r4

    # realign mantissa using leading zero count
    flip    $r7,$r0,31
    ntz     $r7,$r8
    ashl    $r0,$r0,$r7
    btst    $r0,(6<<5)|0            # test low bits for sticky again
    lshr    $r0,$r0,6
    sticky  $r0

    # update exponent
    add     $r2,$r2,5
    sub     $r2,$r2,$r7

    # Round to nearest
    round   $r0,$r7,$r6

    # detect_exp_update
    lshr    $r6,$r0,24
    add     $r2,$r2,$r6

    # final tests
    # mz == 0? if so, we just bail with a +0
    cmp     $r0,0
    jmpc    nz,msum_not_zero
    ldk     $r0,0
    return
msum_not_zero:
    # Combined check that (1 <= ez <= 254)
    sub     $r3,$r2,1
    cmp     $r3,254
    jmpc    b,no_special_ret
    # underflow?
    cmp     $r2,0
    jmpc    gt,no_under
    ldk     $r0,0
    jmp     pack_sz
no_under:
    # overflow?
    cmp     $r2,255
    jmpc    lt,no_special_ret
    ldk     $r0,0x7F8
    ashl    $r0,$r0,20
    jmp     pack_sz
no_special_ret:
    # Pack ez
    ldl     $r2,$r2,(8<<5)|23
    bins    $r0,$r0,$r2             # width = 8, pos = 23 pack ez
    # Pack sz
pack_sz:
    ldl     $r4,$r4,(1<<5)|31
    bins    $r0,$r0,$r4             # width = 1, pos = 31 set sz to sy
    return
#endif

##########################################################################
##########################################################################
## multiplication

#ifdef  L_mulsf3
.global __mulsf3
__mulsf3:
    # x in $r0, y in $r1, result z in $r0       --||| 61 instructions +/- |||--

    # unpack e
    bextu   $r2,$r0,(8<<5)|23   # ex in r2
    bextu   $r3,$r1,(8<<5)|23   # ey in r3
    # calc result sign
    xor     $r4,$r0,$r1
    lpm     $r5,sign_mask
    and     $r4,$r4,$r5         # sz in r4

    # unpack m add implicit bit
    ldk     $r5,(1<<10)|(9<<5)|23   # setup implicit bit and mask for e
    #----------------------
    bins    $r0,$r0,$r5             # clear sx, ex and add implicit bit mx

    sub     $r6,$r2,1
    cmp     $r6,254
    jmpc    b,1f
    jmp     slow_mul
1:  sub     $r6,$r3,1
    cmp     $r6,254
    jmpc    b,no_special_vals_mul

slow_mul:
    # Check for early exit
    cmp     $r2,0
    jmpc    z,op_is_zero
    cmp     $r3,0
    jmpc    nz,no_early_exit_mul
op_is_zero:
    cmp     $r2,255
    jmpc    z,no_early_exit_mul
    cmp     $r3,255
    jmpc    z,no_early_exit_mul
    move    $r0,$r4
    return
no_early_exit_mul:
    # setup to test for special values
    sub     $r6,$r2,1
    and     $r6,$r6,0xFE
    sub     $r7,$r3,1
    and     $r7,$r7,0xFE
    # test for special values
    cmp     $r6,$r7
    jmpc    gte,ex_spec_is_gte_ey_mul
    move    $r6,$r7
ex_spec_is_gte_ey_mul:
    cmp     $r6,0xFE
    jmpc    nz,no_special_vals_mul
    cmp     $r2,0xFF
    jmpc    nz,ex_not_FF_mul
    ashl    $r6,$r0,9
    cmp     $r6,0
    jmpc    nz,ret_nan
    cmp     $r3,0
    jmpc    z,ret_nan
    ashl    $r6,$r1,1
    lpm     $r7,high_FF
    cmp     $r6,$r7
    jmpc    a,ret_nan
    cmp     $r6,0
    jmpc    z,ret_nan
    # infinity
    lpm     $r0,inf
    or      $r0,$r0,$r4
    return
ex_not_FF_mul:
    cmp     $r2,0
    jmpc    nz,no_nan_mul
    cmp     $r3,0xFF
    jmpc    nz,no_nan_mul
    jmp     ret_nan
no_nan_mul:
    lpm     $r0,nan
    and     $r0,$r0,$r1
    or      $r0,$r0,$r4
    return

ret_nan:
    lpm     $r0,nan
    return

no_special_vals_mul:
    bins    $r1,$r1,$r5         # clear sy, ey and add implicit bit my
    # calc ez
    add     $r3,$r2,$r3
    sub     $r3,$r3,127         # ez in r3

    # (r1,r2) = R0 * R1
    mul     $r2,$r0,$r1
    muluh   $r1,$r0,$r1

    btst    $r1,(1<<5)|15       # XXX use jmpx
    jmpc    z,mul_z0

    # mz is 1X.XX...X
    # 48-bit product is in (r1,r2). The low 22 bits of r2
    # are discarded.
    lshr    $r0,$r2,22
    ashl    $r1,$r1,10
    or      $r0,$r0,$r1         # r0 = (r1,r2) >> 22
    ashlcc  2,10
    sticky  $r0
    add     $r3,$r3,1           # bump exponent

    # Round to nearest
    round   $r0, $r1, $r2
    lshr    $r6,$r0,24
    add     $r3,$r3,$r6

    sub     $r6,$r3,1
    cmp     $r6,254
    jmpc    b,no_special_ret_mul

special_ret_mul:
    # When the final exponent <= 0, result is flushed to 0 except
    # for the border case 0x00FFFFFF which is promoted to next higher
    # FP no., that is, the smallest "normalized" number.
    cmp     $r3,0
    jmpc    gt,exp_normal
    # Pack ez
    ldl     $r3,$r3,(8<<5)|23
    bins    $r0,$r0,$r3 # width = 8, pos = 23 pack ez
    lpm     $r2,edge_case
    cmp     $r0,$r2
    jmpc    nz,no_edge_case
    lpm     $r0,smallest_norm
    jmp     pack_sz_mul
no_edge_case:
    ldk     $r0,0
    jmp     pack_sz_mul
exp_normal:
    # overflow?
    cmp     $r3,255
    jmpc    lt,no_special_ret_mul
    ldk     $r0,0x7F8
    ashl    $r0,$r0,20
    jmp     pack_sz_mul
no_special_ret_mul:
    # Pack ez
    ldl     $r3,$r3,(8<<5)|23
    bins    $r0,$r0,$r3 # width = 8, pos = 23 pack ez
    # Pack sz
pack_sz_mul:
    or    $r0,$r0,$r4
    return

mul_z0:
    # mz is 0X.XX...X
    # 48-bit product is in (r1,r2). The low 21 bits of r2
    # are discarded.
    lshr    $r0,$r2,21
    ashl    $r1,$r1,11
    or      $r0,$r0,$r1         # r0 = (r1,r2) >> 22
    ashlcc  2,11
    sticky  $r0
    # Round to nearest
    round   $r0, $r1, $r2
    lshr    $r6,$r0,24
    add     $r3,$r3,$r6

    sub     $r6,$r3,1
    cmp     $r6,254
    jmpc    b,no_special_ret_mul
    jmp     special_ret_mul
#endif

##########################################################################
##########################################################################
## division

## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf
## for implementation details


#ifdef  L_divsf3
dc_1: .long             0xffffe7d7
dc_2: .long             0xffffffe8
dc_3: .long             0xffbad86f
dc_4: .long             0xfffbece7
dc_5: .long             0xf3672b51
dc_6: .long             0xfd9d3a3e
dc_7: .long             0x9a3c4390
dc_8: .long             0xd4d2ce9b
dc_9: .long             0x1bba92b3
dc_10: .long            0x525a1a8b
dc_11: .long            0x0452b1bf
dc_12: .long            0xFFFFFFC0
spec_val_test:  .long   0x7F7FFFFF

.global __divsf3
__divsf3:
    push    $r13
    # x in $r0, y in $r1, result z in $r0       --||| 73 instructions +/- |||-
    bextu   $r10,$r0,(8<<5)|23   # ex in r2
    bextu   $r11,$r1,(8<<5)|23   # ey in r3
    lpm     $r6, m_mask
    and     $r2, $r0, $r6        # mx
    and     $r3, $r1, $r6        # my
    cmp     $r2,$r3
    bextu   $r2,$r30,(1<<5)|4   # c = Tx >= T;
    ashl    $r3,$r3,9           # T = X << 9;
    lpm     $r13, sign_mask
    ashl    $r4,$r0,8           # X8 = X << 8;
    or      $r4,$r4,$r13        # Mx = X8 | 0x80000000;
    lshr    $r5,$r4,$r2         # S = Mx >> c;
    # calc D
    sub     $r2, $r11, $r2
    add     $r12, $r10, 125
    sub     $r2, $r12, $r2      # int D = (Ex + 125) - (Ey - c);
    # calc result sign
    xor     $r12,$r0,$r1
    and     $r12,$r12,$r13      # Sr = ( X ˆ Y ) & 0x80000000;
    # check early exit
    cmp     $r10, 0
    jmpc    nz, no_early_ret_dev
    cmp     $r11, 0
    jmpc    z, no_early_ret_dev
    cmp     $r11, 255
    jmpc    z, no_early_ret_dev
    move    $r0, $r12
    pop     $r13
    return
no_early_ret_dev:
 # setup to test for special values
    sub     $r8,$r10,1
    and     $r8,$r8,0xFE
    sub     $r9,$r11,1
    and     $r9,$r9,0xFE
    # test for special values
    cmp     $r8, $r9
    jmpc    gte, absXm1_gte_absYm1
    move    $r8, $r9
absXm1_gte_absYm1:
    cmp     $r8, 0xFE
    jmpc    nz, no_spec_ret_div
    cmp     $r10, 0xFF
    jmpc    nz, ex_not_FF_div
    lpm     $r6, m_mask
    and     $r2, $r0, $r6        # mx
    cmp     $r2, 0
    jmpc    nz, ret_nan_div
    cmp     $r11, 0xFF
    jmpc    z, ret_nan_div
    jmp     ret_inf_div
ex_not_FF_div:
    cmp     $r11, 0xFF
    jmpc    nz, ey_not_FF_div
    ashl    $r13, $r1, 9
    cmp     $r13, 0
    jmpc    nz, ret_nan_div
    move    $r0, $r12
    pop     $r13
    return
ey_not_FF_div:
    or      $r10, $r10, $r11
    cmp     $r10, 0
    jmpc    z, ret_nan_div
ret_inf_div:
    lpm     $r6, inf
    move    $r0, $r6
    or      $r0, $r0, $r12
    pop     $r13
    return
ret_nan_div:
    lpm     $r0, nan
    pop     $r13
    return

no_spec_ret_div:
# check for overflow
    ldk     $r6, 0xFE
    cmp     $r2, $r6
    jmpc    lt, no_overflow_div
    lpm     $r6, inf
    or      $r0, $r12, $r6
    pop     $r13
    return
no_overflow_div:
# check for underflow
    cmp     $r2, 0
    jmpc    ns, no_underflow_div
    xnor    $r6, $r6, $r6       # -1
    cmp     $r2, $r6
    jmpc    nz, ret_sr_div
    ldk     $r7, 0xFF
    xor     $r6, $r6, $r7       # 0xFF ^ -1 = 0xFFFFFF00
    cmp     $r4, $r6
    jmpc    nz, ret_sr_div
    lpm     $r6, sign_mask
    cmp     $r4, $r6
    jmpc    nz, ret_sr_div
    lshr    $r0, $r6, 8
    or      $r0, $r0, $r12
    pop     $r13
    return
ret_sr_div:
    move    $r0, $r12
    pop     $r13
    return
no_underflow_div:
    lpm     $r6, dc_1
    muluh   $r7, $r3, $r6       # i0 = mul( T , 0xffffe7d7 );
    lpm     $r6, dc_2
    sub     $r7, $r6, $r7       # i1 = 0xffffffe8 - i0;
    muluh   $r7, $r5, $r7       # i2 = mul( S , i1 );
    add     $r7, $r7, 0x20      # i3 = 0x00000020 + i2;
    muluh   $r8, $r3, $r3       # i4 = mul( T , T );
    muluh   $r9, $r5, $r8       # i5 = mul( S , i4 );
    lpm     $r6, dc_3
    muluh   $r10, $r3, $r6      # i6 = mul( T , 0xffbad86f );
    lpm     $r6, dc_4
    sub     $r10, $r6, $r10     # i7 = 0xfffbece7 - i6;
    muluh   $r10, $r9, $r10     # i8 = mul( i5 , i7 );
    add     $r7, $r7, $r10      # i9 = i3 + i8;
    muluh   $r9, $r8, $r9       # i10 = mul( i4 , i5 );
    lpm     $r6, dc_5
    muluh   $r10, $r3, $r6      # i11 = mul( T , 0xf3672b51 );
    lpm     $r6, dc_6
    sub     $r10, $r6, $r10     # i12 = 0xfd9d3a3e - i11;
    lpm     $r6, dc_7
    muluh   $r11, $r3, $r6      # i13 = mul( T , 0x9a3c4390 );
    lpm     $r6, dc_8
    sub     $r11, $r6, $r11     # i14 = 0xd4d2ce9b - i13
    muluh   $r11, $r8, $r11     # i15 = mul( i4 , i14 );
    add     $r10, $r10, $r11    # i16 = i12 + i15;
    muluh   $r10, $r9, $r10     # i17 = mul( i10 , i16 )
    add     $r7, $r7, $r10      # i18 = i9 + i17;
    muluh   $r10, $r8, $r8      # i19 = mul( i4 , i4 );
    lpm     $r6, dc_9
    muluh   $r11, $r3, $r6      # i20 = mul( T , 0x1bba92b3 );
    lpm     $r6, dc_10
    sub     $r11, $r6, $r11     # i21 = 0x525a1a8b - i20;
    lpm     $r6, dc_11
    muluh   $r8, $r8, $r6       # i22 = mul( i4 , 0x0452b1bf );
    add     $r8, $r11, $r8      # i23 = i21 + i22;
    muluh   $r8, $r10, $r8      # i24 = mul( i19 , i23 );
    muluh   $r8, $r9, $r8       # i25 = mul( i10 , i24 );
    add     $r3, $r7, $r8       # V = i18 + i25;
# W = V & 0xFFFFFFC0;
    lpm     $r6, dc_12
    and     $r3, $r3, $r6   # W
# round and pack final values
    ashl    $r0, $r2, 23        # pack D
    or      $r0, $r0, $r12      # pack Sr
    ashl    $r12, $r1, 8
    or      $r12, $r12, $r13    # My
    muluh   $r10, $r3, $r12
    lshr    $r11, $r5, 1
    cmp     $r10, $r11
    jmpc    gte, div_ret_1
    add     $r3, $r3, 0x40
div_ret_1:
    lshr    $r3, $r3, 7
    add     $r0, $r0, $r3
    pop     $r13
    return
#endif

##########################################################################
##########################################################################
## Negate

#ifdef L_negsf
.global __negsf
__negsf:
    lpm     $r1, sign_mask
    xor     $r0, $r0, $r1
    return
#endif

##########################################################################
##########################################################################
## float to int & unsigned int

#ifdef L_fixsfsi
.global __fixsfsi
__fixsfsi: # 20 instructions
    bextu   $r1,$r0,(8<<5)|23   # e in r1
    lshr    $r2,$r0,31          # s in r2
    lpm     $r3, m_mask
    and     $r0,$r0,$r3         # m in r0
    # test nan
    cmp     $r1,0xFF
    jmpc    nz, int_not_nan
    cmp     $r0,0
    jmpc    z, int_not_nan
    ldk     $r0,0
    return
int_not_nan:
    # test edges
    cmp     $r1, 127
    jmpc    gte, int_not_zero   # lower limit
    ldk     $r0,0
    return
int_not_zero:
    cmp     $r1, 158
    jmpc    lt, int_not_max    # upper limit
    lpm     $r0, nan
    cmp     $r2, 0
    jmpc    z, int_positive
    xnor    $r0, $r0, 0
    return
int_not_max:
    lpm     $r3, smallest_norm
    or      $r0, $r0, $r3       # set implicit bit
    sub     $r1, $r1, 150
    cmp     $r1, 0
    jmpc    s, shift_right
    ashl    $r0, $r0, $r1
    jmp     set_int_sign
shift_right:
    xnor    $r1, $r1, 0
    add     $r1, $r1, 1
    lshr    $r0, $r0, $r1
set_int_sign:
    cmp     $r2, 0
    jmpc    z, int_positive
    xnor    $r0, $r0, 0
    add     $r0, $r0, 1
int_positive:
    return
#endif

#ifdef L_fixunssfsi
.global __fixunssfsi
__fixunssfsi: # 19 instructions
    lshr    $r2, $r0, 31          # s in r2
    cmp     $r2, 0
    jmpc    z, uint_not_neg
    ldk     $r0, 0
    return
uint_not_neg:
    bextu   $r1, $r0, (8<<5)|23   # e in r1
    sub     $r1, $r1, 127
    lpm     $r3, m_mask
    and     $r0, $r0, $r3         # m in r0
    # test nan
    cmp     $r1, 0xFF
    jmpc    nz, uint_not_nan
    cmp     $r0, 0
    jmpc    z, uint_not_nan
    ldk     $r0, 0
    return
uint_not_nan:
    # test edges
    cmp     $r1, 0
    jmpc    ns, uint_not_zero   # lower limit
    ldk     $r0, 0
    return
uint_not_zero:
    lpm     $r3, smallest_norm
    or      $r0, $r0, $r3       # set implicit bit
    cmp     $r1, 23
    jmpc    lt, shift_uint_right
    sub     $r1, $r1, 23
    ashl    $r0, $r0, $r1
    return
shift_uint_right:
    ldk     $r3, 23
    sub     $r1, $r3, $r1
    lshr    $r0, $r0, $r1
    return
#endif

##########################################################################
##########################################################################
## int & unsigned int to float


.macro  i2f x, s1, s2, s3, lbl
    move    \s1, \x
    nlz     \s1, \s2
    cmp     \s1, 8
    jmpc    s, float_round\lbl
    sub     \s2, \s1, 8
    ashl    \x, \x, \s2
    jmp     float_no_round\lbl
float_round\lbl:
    cmp     \s1, 6
    jmpc    s, float_shift_right\lbl
    sub     \s2, \s1, 6
    ashl    \x, \x, \s2
    jmp     float_round_and_pack\lbl
float_shift_right\lbl:
    ldk     \s2, 6
    sub     \s2, \s2, \s1
    xnor    \s3, \s3 ,\s3           # 0xFFFFFFFF
    ashl    \s3, \s3 ,\s2           # create inverse of mask for test of S bit value in discarded my
    xnor    \s3, \s3 ,0             # NOT
    tst     \x, \s3                # determine value of sticky bit
    lshr    \x, \x, \s2
    jmpc    z,float_round_and_pack\lbl
    or      \x, \x, 1               # set the sticky bit to 1
float_round_and_pack\lbl:
    bextu   \s2, \x, (1<<5)|2      # extract low bit of m
    or      \x, \x, \s2           # or p into r
    add     \x, \x, 1
    lshr    \x, \x, 2
    btst    \x, (1<<5)|24          # test for carry from round
    jmpc    z, float_no_round\lbl
    sub     \s1, \s1, 1             # inc e for carry (actually dec nlz)
    lshr    \x, \x, 1
float_no_round\lbl:
    ldk     \s2, 158
    sub     \s1, \s2, \s1
    # Pack e
    ldl     \s1, \s1, (8<<5)|23
    bins    \x, \x, \s1
.endm


#ifdef L_floatsisf
.global __floatsisf
__floatsisf:                       # 32 instructions
    cmp     $r0, 0
    jmpc    nz, float_not_zero
    return
float_not_zero:
    ashr    $r1, $r0, 31            # s in r1
    xor     $r0, $r0, $r1           # cond neg
    sub     $r0, $r0, $r1
    i2f     $r0, $r2, $r3, $r4, 1
    ldl     $r1, $r1, (1<<5)|31
    bins    $r0, $r0, $r1
    return
#endif

#ifdef L_floatunsisf
.global __floatunsisf
__floatunsisf:                        # 26 instructions
    cmp     $r0, 0
    jmpc    nz, float_not_zero2
    return
float_not_zero2:
    i2f     $r0, $r1, $r2, $r3, 2
    return
#endif

#if 0
##########################################################################
##########################################################################
## float compare


__cmpsf2_:
    # calc abs vals
    lpm     $r3, nan                # also abs mask
    and     $r2, $r0, $r3
    and     $r3, $r1, $r3
    # test if either abs is nan
    lpm     $r4, inf
    cmp     $r2, $r4
    jmpc    gt, cmp_is_gt
    cmp     $r3, $r4
    jmpc    gt, cmp_is_gt
    # test if both are 0
    or      $r2, $r2, $r3
    cmp     $r2, 0
    jmpc    z, cmp_is_eq
    # test if eq
    cmp     $r0, $r1
    jmpc    z, cmp_is_eq
    # -- if either is pos
    and     $r2, $r0, $r1
    cmp     $r2, 0
    jmpc    s, cmp_both_neg
    cmp     $r0, $r1
    jmpc    gt, cmp_is_gt
    # r0 < r1
    lpm     $r0, high_uint
    return
cmp_both_neg:
    cmp     $r0, $r1
    jmpc    lt, cmp_is_gt
    # r0 < r1
    lpm     $r0, high_uint
    return
cmp_is_gt:
    ldk     $r0, 1
    return
cmp_is_eq:
    ldk     $r0, 0
    return
#endif

#ifdef  L_udivsi3
.global __udivsi3
__udivsi3:
	# $r0 is dividend
	# $r1 is divisor
	ldk	$r2,0
	push	$r28
	ldk	$r28,-32
0:
	lshr	$r3,$r0,31	# Shift $r2:$r0 left one
	ashl	$r0,$r0,1
	ashl	$r2,$r2,1
	or	$r2,$r2,$r3
	cmp	$r2,$r1
	jmpc	b,1f
2:
	sub	$r2,$r2,$r1
	add	$r0,$r0,1
1:
	add	$r28,$r28,1
	jmpx	31,$r28,1,0b
	pop	$r28
	# $r0: quotient
	# $r2: remainder
	return
#endif

#ifdef	L_umodsi3
.global	__umodsi3
__umodsi3:
	call	__udivsi3
	move	$r0,$r2
	return
#endif

#ifdef	L_divsi3
.global	__divsi3
__divsi3:
	xor	$r5,$r0,$r1	# $r5 is sign of result
	ashr	$r2,$r0,31	# $r0 = abs($r0)
	xor	$r0,$r0,$r2
	sub	$r0,$r0,$r2
	ashr	$r2,$r1,31	# $r1 = abs($r1)
	xor	$r1,$r1,$r2
	sub	$r1,$r1,$r2
	call	__udivsi3
	ashr	$r5,$r5,31
	xor	$r0,$r0,$r5
	sub	$r0,$r0,$r5
	return
	
#endif

#ifdef	L_modsi3
.global	__modsi3
__modsi3:
	move	$r5,$r0		# $r5 is sign of result
	ashr	$r2,$r0,31	# $r0 = abs($r0)
	xor	$r0,$r0,$r2
	sub	$r0,$r0,$r2
	ashr	$r2,$r1,31	# $r1 = abs($r1)
	xor	$r1,$r1,$r2
	sub	$r1,$r1,$r2
	call	__umodsi3
	ashr	$r5,$r5,31
	xor	$r0,$r0,$r5
	sub	$r0,$r0,$r5
	return
#endif