lib1funcs.S revision 1.3
1# ieee754 sf routines for FT32
2
3/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
4
5This file is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 3, or (at your option) any
8later version.
9
10This file is distributed in the hope that it will be useful, but
11WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13General Public License for more details.
14
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22<http://www.gnu.org/licenses/>.  */
23
24# See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf
25# for implementation details of all except division which is detailed below
26#
27
28#ifdef L_fp_tools
29// .global __cmpsf2_
30nan:            .long 0x7FFFFFFF    # also abs mask
31inf:            .long 0x7F800000
32sign_mask:      .long 0x80000000
33m_mask:         .long 0x007FFFFF
34exp_bias:       .long 127
35edge_case:      .long 0x00FFFFFF
36smallest_norm:  .long 0x00800000    # implicit bit
37high_FF:        .long 0xFF000000
38high_uint:      .long 0xFFFFFFFF
39
40ntz_table:
41    .byte   32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14
42    .byte   10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15
43    .byte   31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26
44    .byte   30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0
45
46#endif
47
48# Supply a few 'missing' instructions
49
50# not
51.macro      not rd,r1
52    xor     \rd,\r1,-1
53.endm
54
55# negate
56.macro      neg x
57    not     \x, \x
58    add     \x, \x, 1
59.endm
60
61# set $cc from the result of "ashl reg,dist"
62.macro  ashlcc reg,dist
63    .long   0x5de04008 | (\reg << 15) | (\dist << 4)
64.endm
65
66
67# converts an unsigned number x to a signed rep based on the bits in sign
68# sign should be 0x00000000 or 0xffffffff.
69.macro      to_signed x, sign
70    add     \x,\x,\sign    # conditionally decrement x
71    xor     \x,\x,\sign    # conditionally complement x
72.endm
73
74
75.macro  ld32    r,v
76    ldk     \r,(\v>>10)
77    ldl     \r,\r,(\v & 1023)
78.endm
79
80# calculate trailing zero count in x, also uses scr.
81# Using Seal's algorithm
82.macro      ntz x, scr
83    not     \scr, \x
84    add     \scr, \scr, 1
85    and     \x, \x, \scr
86    ashl    \scr, \x, 4
87    add     \x, \scr, \x
88    ashl    \scr, \x, 6
89    add     \x, \scr, \x
90    ashl    \scr, \x, 16
91    sub     \x, \scr, \x
92    lshr    \x, \x, 26
93    ldk     \scr, ntz_table
94    add     \x, \x, \scr
95    lpmi.b  \x, \x, 0
96.endm
97
98# calculate leading zero count
99.macro      nlz x, scr
100    flip    \x, \x, 31
101    ntz     \x, \scr
102.endm
103
104
105# Round 26 bit mantissa to nearest
106# | 23 bits frac | G | R | S |
107.macro      round m,  s1, s2
108    ldk     \s1,0xc8
109    and     \s2,\m,7
110    lshr    \s1,\s1,\s2
111    and     \s1,\s1,1
112    lshr    \m,\m,2
113    add     \m,\m,\s1
114.endm
115
116# If NZ, set the LSB of reg
117.macro      sticky reg
118    jmpc    z,1f
119    or      \reg,\reg,1             # set the sticky bit to 1
1201:
121.endm
122
123##########################################################################
124##########################################################################
125## addition & subtraction
126
127#if defined(L_subsf3) || defined(L_addsub_sf)
128.global __subsf3
129__subsf3:
130    # this is subtraction, so we just change the sign of r1
131    lpm     $r2,sign_mask
132    xor     $r1,$r1,$r2
133    jmp     __addsf3
134#endif
135
136#if defined(L_addsf3) || defined(L_addsub_sf)
137.global __addsf3
138__addsf3:
139    # x in $r0, y in $r1, result z in $r0       --||| 100 instructions +/- |||--
140    # unpack e, calc d
141    bextu   $r2,$r0,(8<<5)|23   # ex in r2
142    bextu   $r3,$r1,(8<<5)|23   # ey in r3
143    sub     $r5,$r2,$r3         # d = ex - ey
144
145    # Special values are 0x00 and 0xff in ex and ey.
146    # If (ex&ey) != 0 or (xy|ey)=255 then there may be
147    # a special value.
148    tst     $r2,$r3
149    jmpc    nz,1f
150    jmp     slow
1511:  or      $r4,$r2,$r3
152    cmp     $r4,255
153    jmpc    nz,no_special_vals
154slow:
155    # Check for early exit
156    cmp     $r2,0
157    jmpc    z,test_if_not_255
158    cmp     $r3,0
159    jmpc    nz,no_early_exit
160test_if_not_255:
161    cmp     $r2,255
162    jmpc    z,no_early_exit
163    cmp     $r3,255
164    jmpc    z,no_early_exit
165    or      $r6,$r2,$r3
166    cmp     $r6,0
167    jmpc    nz,was_not_zero
168    and     $r0,$r0,$r1
169    lpm     $r1,sign_mask
170    and     $r0,$r0,$r1
171    return
172was_not_zero:
173    cmp     $r2,0
174    jmpc    nz,ret_x
175    move    $r0,$r1
176    return
177ret_x:
178    return
179no_early_exit:
180    # setup to test for special values
181    sub     $r6,$r2,1
182    and     $r6,$r6,0xFE
183    sub     $r7,$r3,1
184    and     $r7,$r7,0xFE
185    # test for special values
186    cmp     $r6,$r7
187    jmpc    gte,ex_spec_is_gte
188    move    $r6,$r7
189ex_spec_is_gte:
190    cmp     $r6,0xFE
191    jmpc    nz,no_special_vals
192    cmp     $r5,0
193    jmpc    ns,d_gte_0
194    cmp     $r3,0xFF
195    jmpc    z,ret_y
196    cmp     $r2,0
197    jmpc    z,ret_y
198ret_y:
199    move    $r0,$r1
200    return
201d_gte_0:
202    cmp     $r5,0
203    jmpc    z,d_is_0
204    cmp     $r2,0xFF
205    jmpc    z,ret_x
206    cmp     $r3,0
207    jmpc    z,ret_x
208d_is_0:
209    cmp     $r2,0xFF
210    jmpc    nz,no_special_vals
211    ashl    $r6,$r0,9           # clear all except x frac
212    ashl    $r7,$r1,9           # clear all except y frac
213    or      $r6,$r6,$r7
214    cmp     $r6,0
215    jmpc    nz,ret_nan
216    lshr    $r4,$r0,31          # sx in r4
217    lshr    $r5,$r1,31          # sy in r4
218    cmp     $r4,$r5
219    jmpc    nz,ret_nan
220    return
221ret_nan:
222    lpm     $r0,nan
223    return
224no_special_vals:
225    ldk     $r8,(1<<10)|(9<<5)|26   # setup implicit bit and mask for e
226    #----------------------
227    ashr    $r4,$r0,31              # sx in r4
228    ashl    $r0,$r0,3               # shift mx 3 for GRS bits
229    bins    $r0,$r0,$r8             # clear sx, ex and add implicit bit mx
230    # change mx to signed mantissa
231    to_signed $r0,$r4
232    #----------------------
233    ashr    $r4,$r1,31              # sy in r4
234    ashl    $r1,$r1,3               # shift my 3 for GRS bits
235    bins    $r1,$r1,$r8             # clear sy, ey and add implicit bit my
236    # change my to signed mantissa
237    to_signed $r1,$r4
238    #----------------------
239    # test if we swap ms based on d sign
240    cmp     $r5,0
241    jmpc    gte,noswap
242    # swap mx & my
243    xor     $r0,$r0,$r1
244    xor     $r1,$r0,$r1
245    xor     $r0,$r0,$r1
246    # d positive means that ex>=ey, so ez = ex
247    # d negative means that ey>ex, so ez = ey
248    move    $r2,$r3
249    # |d|
250    neg     $r5
251noswap:
252                                    # now $r2 = ez = max(ex,ey)
253    cmp     $r5,26                  # max necessary alignment shift is 26
254    jmpc    lt,under_26
255    ldk     $r5,26
256under_26:
257    ldk     $r7,-1
258    ashl    $r7,$r7,$r5             # create inverse of mask for test of S bit value in discarded my
259    not     $r7,$r7
260    tst     $r1,$r7                 # determine value of sticky bit
261    # shift my >> |d|
262    ashr    $r1,$r1,$r5
263    sticky  $r1
264
265    # add ms
266    add     $r0,$r0,$r1
267
268    # $r4 = sign(mx), mx = |mx|
269    ashr    $r4,$r0,31
270    xor     $r0,$r0,$r4
271    sub     $r0,$r0,$r4
272
273    # realign mantissa using leading zero count
274    flip    $r7,$r0,31
275    ntz     $r7,$r8
276    ashl    $r0,$r0,$r7
277    btst    $r0,(6<<5)|0            # test low bits for sticky again
278    lshr    $r0,$r0,6
279    sticky  $r0
280
281    # update exponent
282    add     $r2,$r2,5
283    sub     $r2,$r2,$r7
284
285    # Round to nearest
286    round   $r0,$r7,$r6
287
288    # detect_exp_update
289    lshr    $r6,$r0,24
290    add     $r2,$r2,$r6
291
292    # final tests
293    # mz == 0? if so, we just bail with a +0
294    cmp     $r0,0
295    jmpc    nz,msum_not_zero
296    ldk     $r0,0
297    return
298msum_not_zero:
299    # Combined check that (1 <= ez <= 254)
300    sub     $r3,$r2,1
301    cmp     $r3,254
302    jmpc    b,no_special_ret
303    # underflow?
304    cmp     $r2,0
305    jmpc    gt,no_under
306    ldk     $r0,0
307    jmp     pack_sz
308no_under:
309    # overflow?
310    cmp     $r2,255
311    jmpc    lt,no_special_ret
312    ldk     $r0,0x7F8
313    ashl    $r0,$r0,20
314    jmp     pack_sz
315no_special_ret:
316    # Pack ez
317    ldl     $r2,$r2,(8<<5)|23
318    bins    $r0,$r0,$r2             # width = 8, pos = 23 pack ez
319    # Pack sz
320pack_sz:
321    ldl     $r4,$r4,(1<<5)|31
322    bins    $r0,$r0,$r4             # width = 1, pos = 31 set sz to sy
323    return
324#endif
325
326##########################################################################
327##########################################################################
328## multiplication
329
330#ifdef  L_mulsf3
331.global __mulsf3
332__mulsf3:
333    # x in $r0, y in $r1, result z in $r0       --||| 61 instructions +/- |||--
334
335    # unpack e
336    bextu   $r2,$r0,(8<<5)|23   # ex in r2
337    bextu   $r3,$r1,(8<<5)|23   # ey in r3
338    # calc result sign
339    xor     $r4,$r0,$r1
340    lpm     $r5,sign_mask
341    and     $r4,$r4,$r5         # sz in r4
342
343    # unpack m add implicit bit
344    ldk     $r5,(1<<10)|(9<<5)|23   # setup implicit bit and mask for e
345    #----------------------
346    bins    $r0,$r0,$r5             # clear sx, ex and add implicit bit mx
347
348    sub     $r6,$r2,1
349    cmp     $r6,254
350    jmpc    b,1f
351    jmp     slow_mul
3521:  sub     $r6,$r3,1
353    cmp     $r6,254
354    jmpc    b,no_special_vals_mul
355
356slow_mul:
357    # Check for early exit
358    cmp     $r2,0
359    jmpc    z,op_is_zero
360    cmp     $r3,0
361    jmpc    nz,no_early_exit_mul
362op_is_zero:
363    cmp     $r2,255
364    jmpc    z,no_early_exit_mul
365    cmp     $r3,255
366    jmpc    z,no_early_exit_mul
367    move    $r0,$r4
368    return
369no_early_exit_mul:
370    # setup to test for special values
371    sub     $r6,$r2,1
372    and     $r6,$r6,0xFE
373    sub     $r7,$r3,1
374    and     $r7,$r7,0xFE
375    # test for special values
376    cmp     $r6,$r7
377    jmpc    gte,ex_spec_is_gte_ey_mul
378    move    $r6,$r7
379ex_spec_is_gte_ey_mul:
380    cmp     $r6,0xFE
381    jmpc    nz,no_special_vals_mul
382    cmp     $r2,0xFF
383    jmpc    nz,ex_not_FF_mul
384    ashl    $r6,$r0,9
385    cmp     $r6,0
386    jmpc    nz,ret_nan
387    cmp     $r3,0
388    jmpc    z,ret_nan
389    ashl    $r6,$r1,1
390    lpm     $r7,high_FF
391    cmp     $r6,$r7
392    jmpc    a,ret_nan
393    cmp     $r6,0
394    jmpc    z,ret_nan
395    # infinity
396    lpm     $r0,inf
397    or      $r0,$r0,$r4
398    return
399ex_not_FF_mul:
400    cmp     $r2,0
401    jmpc    nz,no_nan_mul
402    cmp     $r3,0xFF
403    jmpc    nz,no_nan_mul
404    jmp     ret_nan
405no_nan_mul:
406    lpm     $r0,nan
407    and     $r0,$r0,$r1
408    or      $r0,$r0,$r4
409    return
410
411ret_nan:
412    lpm     $r0,nan
413    return
414
415no_special_vals_mul:
416    bins    $r1,$r1,$r5         # clear sy, ey and add implicit bit my
417    # calc ez
418    add     $r3,$r2,$r3
419    sub     $r3,$r3,127         # ez in r3
420
421    # (r1,r2) = R0 * R1
422    mul     $r2,$r0,$r1
423    muluh   $r1,$r0,$r1
424
425    btst    $r1,(1<<5)|15       # XXX use jmpx
426    jmpc    z,mul_z0
427
428    # mz is 1X.XX...X
429    # 48-bit product is in (r1,r2). The low 22 bits of r2
430    # are discarded.
431    lshr    $r0,$r2,22
432    ashl    $r1,$r1,10
433    or      $r0,$r0,$r1         # r0 = (r1,r2) >> 22
434    ashlcc  2,10
435    sticky  $r0
436    add     $r3,$r3,1           # bump exponent
437
438    # Round to nearest
439    round   $r0, $r1, $r2
440    lshr    $r6,$r0,24
441    add     $r3,$r3,$r6
442
443    sub     $r6,$r3,1
444    cmp     $r6,254
445    jmpc    b,no_special_ret_mul
446
447special_ret_mul:
448    # When the final exponent <= 0, result is flushed to 0 except
449    # for the border case 0x00FFFFFF which is promoted to next higher
450    # FP no., that is, the smallest "normalized" number.
451    cmp     $r3,0
452    jmpc    gt,exp_normal
453    # Pack ez
454    ldl     $r3,$r3,(8<<5)|23
455    bins    $r0,$r0,$r3 # width = 8, pos = 23 pack ez
456    lpm     $r2,edge_case
457    cmp     $r0,$r2
458    jmpc    nz,no_edge_case
459    lpm     $r0,smallest_norm
460    jmp     pack_sz_mul
461no_edge_case:
462    ldk     $r0,0
463    jmp     pack_sz_mul
464exp_normal:
465    # overflow?
466    cmp     $r3,255
467    jmpc    lt,no_special_ret_mul
468    ldk     $r0,0x7F8
469    ashl    $r0,$r0,20
470    jmp     pack_sz_mul
471no_special_ret_mul:
472    # Pack ez
473    ldl     $r3,$r3,(8<<5)|23
474    bins    $r0,$r0,$r3 # width = 8, pos = 23 pack ez
475    # Pack sz
476pack_sz_mul:
477    or    $r0,$r0,$r4
478    return
479
480mul_z0:
481    # mz is 0X.XX...X
482    # 48-bit product is in (r1,r2). The low 21 bits of r2
483    # are discarded.
484    lshr    $r0,$r2,21
485    ashl    $r1,$r1,11
486    or      $r0,$r0,$r1         # r0 = (r1,r2) >> 22
487    ashlcc  2,11
488    sticky  $r0
489    # Round to nearest
490    round   $r0, $r1, $r2
491    lshr    $r6,$r0,24
492    add     $r3,$r3,$r6
493
494    sub     $r6,$r3,1
495    cmp     $r6,254
496    jmpc    b,no_special_ret_mul
497    jmp     special_ret_mul
498#endif
499
500##########################################################################
501##########################################################################
502## division
503
504## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf
505## for implementation details
506
507
508
509
510#ifdef  L_divsf3
511dc_1: .long             0xffffe7d7
512dc_2: .long             0xffffffe8
513dc_3: .long             0xffbad86f
514dc_4: .long             0xfffbece7
515dc_5: .long             0xf3672b51
516dc_6: .long             0xfd9d3a3e
517dc_7: .long             0x9a3c4390
518dc_8: .long             0xd4d2ce9b
519dc_9: .long             0x1bba92b3
520dc_10: .long            0x525a1a8b
521dc_11: .long            0x0452b1bf
522dc_12: .long            0xFFFFFFC0
523spec_val_test:  .long   0x7F7FFFFF
524
525.global __divsf3
526__divsf3:
527    push    $r13
528    # x in $r0, y in $r1, result z in $r0       --||| 73 instructions +/- |||-
529    bextu   $r10,$r0,(8<<5)|23   # ex in r2
530    bextu   $r11,$r1,(8<<5)|23   # ey in r3
531    lpm     $r6, m_mask
532    and     $r2, $r0, $r6        # mx
533    and     $r3, $r1, $r6        # my
534    cmp     $r2,$r3
535    bextu   $r2,$r30,(1<<5)|4   # c = Tx >= T;
536    ashl    $r3,$r3,9           # T = X << 9;
537    lpm     $r13, sign_mask
538    ashl    $r4,$r0,8           # X8 = X << 8;
539    or      $r4,$r4,$r13        # Mx = X8 | 0x80000000;
540    lshr    $r5,$r4,$r2         # S = Mx >> c;
541    # calc D
542    sub     $r2, $r11, $r2
543    add     $r12, $r10, 125
544    sub     $r2, $r12, $r2      # int D = (Ex + 125) - (Ey - c);
545    # calc result sign
546    xor     $r12,$r0,$r1
547    and     $r12,$r12,$r13      # Sr = ( X �� Y ) & 0x80000000;
548    # check early exit
549    cmp     $r10, 0
550    jmpc    nz, no_early_ret_dev
551    cmp     $r11, 0
552    jmpc    z, no_early_ret_dev
553    cmp     $r11, 255
554    jmpc    z, no_early_ret_dev
555    move    $r0, $r12
556    pop     $r13
557    return
558no_early_ret_dev:
559 # setup to test for special values
560    sub     $r8,$r10,1
561    and     $r8,$r8,0xFE
562    sub     $r9,$r11,1
563    and     $r9,$r9,0xFE
564    # test for special values
565    cmp     $r8, $r9
566    jmpc    gte, absXm1_gte_absYm1
567    move    $r8, $r9
568absXm1_gte_absYm1:
569    cmp     $r8, 0xFE
570    jmpc    nz, no_spec_ret_div
571    cmp     $r10, 0xFF
572    jmpc    nz, ex_not_FF_div
573    lpm     $r6, m_mask
574    and     $r2, $r0, $r6        # mx
575    cmp     $r2, 0
576    jmpc    nz, ret_nan_div
577    cmp     $r11, 0xFF
578    jmpc    z, ret_nan_div
579    jmp     ret_inf_div
580ex_not_FF_div:
581    cmp     $r11, 0xFF
582    jmpc    nz, ey_not_FF_div
583    ashl    $r13, $r1, 9
584    cmp     $r13, 0
585    jmpc    nz, ret_nan_div
586    move    $r0, $r12
587    pop     $r13
588    return
589ey_not_FF_div:
590    or      $r10, $r10, $r11
591    cmp     $r10, 0
592    jmpc    z, ret_nan_div
593ret_inf_div:
594    lpm     $r6, inf
595    move    $r0, $r6
596    or      $r0, $r0, $r12
597    pop     $r13
598    return
599ret_nan_div:
600    lpm     $r0, nan
601    pop     $r13
602    return
603
604no_spec_ret_div:
605# check for overflow
606    ldk     $r6, 0xFE
607    cmp     $r2, $r6
608    jmpc    lt, no_overflow_div
609    lpm     $r6, inf
610    or      $r0, $r12, $r6
611    pop     $r13
612    return
613no_overflow_div:
614# check for underflow
615    cmp     $r2, 0
616    jmpc    ns, no_underflow_div
617    xnor    $r6, $r6, $r6       # -1
618    cmp     $r2, $r6
619    jmpc    nz, ret_sr_div
620    ldk     $r7, 0xFF
621    xor     $r6, $r6, $r7       # 0xFF ^ -1 = 0xFFFFFF00
622    cmp     $r4, $r6
623    jmpc    nz, ret_sr_div
624    lpm     $r6, sign_mask
625    cmp     $r4, $r6
626    jmpc    nz, ret_sr_div
627    lshr    $r0, $r6, 8
628    or      $r0, $r0, $r12
629    pop     $r13
630    return
631ret_sr_div:
632    move    $r0, $r12
633    pop     $r13
634    return
635no_underflow_div:
636    lpm     $r6, dc_1
637    muluh   $r7, $r3, $r6       # i0 = mul( T , 0xffffe7d7 );
638    lpm     $r6, dc_2
639    sub     $r7, $r6, $r7       # i1 = 0xffffffe8 - i0;
640    muluh   $r7, $r5, $r7       # i2 = mul( S , i1 );
641    add     $r7, $r7, 0x20      # i3 = 0x00000020 + i2;
642    muluh   $r8, $r3, $r3       # i4 = mul( T , T );
643    muluh   $r9, $r5, $r8       # i5 = mul( S , i4 );
644    lpm     $r6, dc_3
645    muluh   $r10, $r3, $r6      # i6 = mul( T , 0xffbad86f );
646    lpm     $r6, dc_4
647    sub     $r10, $r6, $r10     # i7 = 0xfffbece7 - i6;
648    muluh   $r10, $r9, $r10     # i8 = mul( i5 , i7 );
649    add     $r7, $r7, $r10      # i9 = i3 + i8;
650    muluh   $r9, $r8, $r9       # i10 = mul( i4 , i5 );
651    lpm     $r6, dc_5
652    muluh   $r10, $r3, $r6      # i11 = mul( T , 0xf3672b51 );
653    lpm     $r6, dc_6
654    sub     $r10, $r6, $r10     # i12 = 0xfd9d3a3e - i11;
655    lpm     $r6, dc_7
656    muluh   $r11, $r3, $r6      # i13 = mul( T , 0x9a3c4390 );
657    lpm     $r6, dc_8
658    sub     $r11, $r6, $r11     # i14 = 0xd4d2ce9b - i13
659    muluh   $r11, $r8, $r11     # i15 = mul( i4 , i14 );
660    add     $r10, $r10, $r11    # i16 = i12 + i15;
661    muluh   $r10, $r9, $r10     # i17 = mul( i10 , i16 )
662    add     $r7, $r7, $r10      # i18 = i9 + i17;
663    muluh   $r10, $r8, $r8      # i19 = mul( i4 , i4 );
664    lpm     $r6, dc_9
665    muluh   $r11, $r3, $r6      # i20 = mul( T , 0x1bba92b3 );
666    lpm     $r6, dc_10
667    sub     $r11, $r6, $r11     # i21 = 0x525a1a8b - i20;
668    lpm     $r6, dc_11
669    muluh   $r8, $r8, $r6       # i22 = mul( i4 , 0x0452b1bf );
670    add     $r8, $r11, $r8      # i23 = i21 + i22;
671    muluh   $r8, $r10, $r8      # i24 = mul( i19 , i23 );
672    muluh   $r8, $r9, $r8       # i25 = mul( i10 , i24 );
673    add     $r3, $r7, $r8       # V = i18 + i25;
674# W = V & 0xFFFFFFC0;
675    lpm     $r6, dc_12
676    and     $r3, $r3, $r6   # W
677# round and pack final values
678    ashl    $r0, $r2, 23        # pack D
679    or      $r0, $r0, $r12      # pack Sr
680    ashl    $r12, $r1, 8
681    or      $r12, $r12, $r13    # My
682    muluh   $r10, $r3, $r12
683    lshr    $r11, $r5, 1
684    cmp     $r10, $r11
685    jmpc    gte, div_ret_1
686    add     $r3, $r3, 0x40
687div_ret_1:
688    lshr    $r3, $r3, 7
689    add     $r0, $r0, $r3
690    pop     $r13
691    return
692#endif
693
694##########################################################################
695##########################################################################
696## Negate
697
698#ifdef L_negsf
699.global __negsf
700__negsf:
701    lpm     $r1, sign_mask
702    xor     $r0, $r0, $r1
703    return
704#endif
705
706##########################################################################
707##########################################################################
708## float to int & unsigned int
709
710#ifdef L_fixsfsi
711.global __fixsfsi
712__fixsfsi: # 20 instructions
713    bextu   $r1,$r0,(8<<5)|23   # e in r1
714    lshr    $r2,$r0,31          # s in r2
715    lpm     $r3, m_mask
716    and     $r0,$r0,$r3         # m in r0
717    # test nan
718    cmp     $r1,0xFF
719    jmpc    nz, int_not_nan
720    cmp     $r0,0
721    jmpc    z, int_not_nan
722    ldk     $r0,0
723    return
724int_not_nan:
725    # test edges
726    cmp     $r1, 127
727    jmpc    gte, int_not_zero   # lower limit
728    ldk     $r0,0
729    return
730int_not_zero:
731    cmp     $r1, 158
732    jmpc    lt, int_not_max    # upper limit
733    lpm     $r0, nan
734    cmp     $r2, 0
735    jmpc    z, int_positive
736    xnor    $r0, $r0, 0
737    return
738int_not_max:
739    lpm     $r3, smallest_norm
740    or      $r0, $r0, $r3       # set implicit bit
741    sub     $r1, $r1, 150
742    cmp     $r1, 0
743    jmpc    s, shift_right
744    ashl    $r0, $r0, $r1
745    jmp     set_int_sign
746shift_right:
747    xnor    $r1, $r1, 0
748    add     $r1, $r1, 1
749    lshr    $r0, $r0, $r1
750set_int_sign:
751    cmp     $r2, 0
752    jmpc    z, int_positive
753    xnor    $r0, $r0, 0
754    add     $r0, $r0, 1
755int_positive:
756    return
757#endif
758
759#ifdef L_fixunssfsi
760.global __fixunssfsi
761__fixunssfsi: # 19 instructions
762    lshr    $r2, $r0, 31          # s in r2
763    cmp     $r2, 0
764    jmpc    z, uint_not_neg
765    ldk     $r0, 0
766    return
767uint_not_neg:
768    bextu   $r1, $r0, (8<<5)|23   # e in r1
769    sub     $r1, $r1, 127
770    lpm     $r3, m_mask
771    and     $r0, $r0, $r3         # m in r0
772    # test nan
773    cmp     $r1, 0xFF
774    jmpc    nz, uint_not_nan
775    cmp     $r0, 0
776    jmpc    z, uint_not_nan
777    ldk     $r0, 0
778    return
779uint_not_nan:
780    # test edges
781    cmp     $r1, 0
782    jmpc    ns, uint_not_zero   # lower limit
783    ldk     $r0, 0
784    return
785uint_not_zero:
786    lpm     $r3, smallest_norm
787    or      $r0, $r0, $r3       # set implicit bit
788    cmp     $r1, 23
789    jmpc    lt, shift_uint_right
790    sub     $r1, $r1, 23
791    ashl    $r0, $r0, $r1
792    return
793shift_uint_right:
794    ldk     $r3, 23
795    sub     $r1, $r3, $r1
796    lshr    $r0, $r0, $r1
797    return
798#endif
799
800##########################################################################
801##########################################################################
802## int & unsigned int to float
803
804
805.macro  i2f x, s1, s2, s3, lbl
806    move    \s1, \x
807    nlz     \s1, \s2
808    cmp     \s1, 8
809    jmpc    s, float_round\lbl
810    sub     \s2, \s1, 8
811    ashl    \x, \x, \s2
812    jmp     float_no_round\lbl
813float_round\lbl:
814    cmp     \s1, 6
815    jmpc    s, float_shift_right\lbl
816    sub     \s2, \s1, 6
817    ashl    \x, \x, \s2
818    jmp     float_round_and_pack\lbl
819float_shift_right\lbl:
820    ldk     \s2, 6
821    sub     \s2, \s2, \s1
822    xnor    \s3, \s3 ,\s3           # 0xFFFFFFFF
823    ashl    \s3, \s3 ,\s2           # create inverse of mask for test of S bit value in discarded my
824    xnor    \s3, \s3 ,0             # NOT
825    tst     \x, \s3                # determine value of sticky bit
826    lshr    \x, \x, \s2
827    jmpc    z,float_round_and_pack\lbl
828    or      \x, \x, 1               # set the sticky bit to 1
829float_round_and_pack\lbl:
830    bextu   \s2, \x, (1<<5)|2      # extract low bit of m
831    or      \x, \x, \s2           # or p into r
832    add     \x, \x, 1
833    lshr    \x, \x, 2
834    btst    \x, (1<<5)|24          # test for carry from round
835    jmpc    z, float_no_round\lbl
836    sub     \s1, \s1, 1             # inc e for carry (actually dec nlz)
837    lshr    \x, \x, 1
838float_no_round\lbl:
839    ldk     \s2, 158
840    sub     \s1, \s2, \s1
841    # Pack e
842    ldl     \s1, \s1, (8<<5)|23
843    bins    \x, \x, \s1
844.endm
845
846
847#ifdef L_floatsisf
848.global __floatsisf
849__floatsisf:                       # 32 instructions
850    cmp     $r0, 0
851    jmpc    nz, float_not_zero
852    return
853float_not_zero:
854    ashr    $r1, $r0, 31            # s in r1
855    xor     $r0, $r0, $r1           # cond neg
856    sub     $r0, $r0, $r1
857    i2f     $r0, $r2, $r3, $r4, 1
858    ldl     $r1, $r1, (1<<5)|31
859    bins    $r0, $r0, $r1
860    return
861#endif
862
863#ifdef L_floatunsisf
864.global __floatunsisf
865__floatunsisf:                        # 26 instructions
866    cmp     $r0, 0
867    jmpc    nz, float_not_zero2
868    return
869float_not_zero2:
870    i2f     $r0, $r1, $r2, $r3, 2
871    return
872#endif
873
874#if 0
875##########################################################################
876##########################################################################
877## float compare
878
879
880__cmpsf2_:
881    # calc abs vals
882    lpm     $r3, nan                # also abs mask
883    and     $r2, $r0, $r3
884    and     $r3, $r1, $r3
885    # test if either abs is nan
886    lpm     $r4, inf
887    cmp     $r2, $r4
888    jmpc    gt, cmp_is_gt
889    cmp     $r3, $r4
890    jmpc    gt, cmp_is_gt
891    # test if both are 0
892    or      $r2, $r2, $r3
893    cmp     $r2, 0
894    jmpc    z, cmp_is_eq
895    # test if eq
896    cmp     $r0, $r1
897    jmpc    z, cmp_is_eq
898    # -- if either is pos
899    and     $r2, $r0, $r1
900    cmp     $r2, 0
901    jmpc    s, cmp_both_neg
902    cmp     $r0, $r1
903    jmpc    gt, cmp_is_gt
904    # r0 < r1
905    lpm     $r0, high_uint
906    return
907cmp_both_neg:
908    cmp     $r0, $r1
909    jmpc    lt, cmp_is_gt
910    # r0 < r1
911    lpm     $r0, high_uint
912    return
913cmp_is_gt:
914    ldk     $r0, 1
915    return
916cmp_is_eq:
917    ldk     $r0, 0
918    return
919#endif
920
921#ifdef  L_udivsi3
922.global __udivsi3
923__udivsi3:
924	# $r0 is dividend
925	# $r1 is divisor
926	ldk	$r2,0
927	push	$r28
928	ldk	$r28,-32
9290:
930	lshr	$r3,$r0,31	# Shift $r2:$r0 left one
931	ashl	$r0,$r0,1
932	ashl	$r2,$r2,1
933	or	$r2,$r2,$r3
934	cmp	$r2,$r1
935	jmpc	b,1f
9362:
937	sub	$r2,$r2,$r1
938	add	$r0,$r0,1
9391:
940	add	$r28,$r28,1
941	jmpx	31,$r28,1,0b
942	pop	$r28
943	# $r0: quotient
944	# $r2: remainder
945	return
946#endif
947
948#ifdef	L_umodsi3
949.global	__umodsi3
950__umodsi3:
951	call	__udivsi3
952	move	$r0,$r2
953	return
954#endif
955
956#ifdef	L_divsi3
957.global	__divsi3
958__divsi3:
959	xor	$r5,$r0,$r1	# $r5 is sign of result
960	ashr	$r2,$r0,31	# $r0 = abs($r0)
961	xor	$r0,$r0,$r2
962	sub	$r0,$r0,$r2
963	ashr	$r2,$r1,31	# $r1 = abs($r1)
964	xor	$r1,$r1,$r2
965	sub	$r1,$r1,$r2
966	call	__udivsi3
967	ashr	$r5,$r5,31
968	xor	$r0,$r0,$r5
969	sub	$r0,$r0,$r5
970	return
971
972#endif
973
974#ifdef	L_modsi3
975.global	__modsi3
976__modsi3:
977	move	$r5,$r0		# $r5 is sign of result
978	ashr	$r2,$r0,31	# $r0 = abs($r0)
979	xor	$r0,$r0,$r2
980	sub	$r0,$r0,$r2
981	ashr	$r2,$r1,31	# $r1 = abs($r1)
982	xor	$r1,$r1,$r2
983	sub	$r1,$r1,$r2
984	call	__umodsi3
985	ashr	$r5,$r5,31
986	xor	$r0,$r0,$r5
987	sub	$r0,$r0,$r5
988	return
989#endif
990