175584Sru#include "or1k-asm.h"
275584Sru/*
375584Sru * Assembly functions for software multiplication and devision.
475584Sru */
575584Sru
675584Sru#define ENTRY(symbol)	\
775584Sru	.align 4	;\
875584Sru	.global symbol	;\
975584Sru	.type	symbol, @function ;\
1075584Srusymbol:
1175584Sru
1275584Sru#ifdef L__mulsi3
1375584SruENTRY(__mulsi3)
1475584Sru	l.addi r11,r0,0x0
1575584Sru	l.sfne r3,r11
1675584SruOR1K_DELAYED(
1775584Sru	OR1K_INST(l.ori r5,r3,0x0),
1875584Sru	OR1K_INST(l.bnf 3f)
1975584Sru)
2075584Sru	l.addi r6,r0,0x0
2175584Sru1:
2275584Sru	l.andi r3,r5,0x1
2375584Sru	l.sfeq r3,r6
2475584SruOR1K_DELAYED(
2575584Sru	OR1K_INST(l.srli r5,r5,0x1),
2675584Sru	OR1K_INST(l.bf 2f)
2775584Sru)
2875584Sru	l.add r11,r11,r4
2975584Sru2:
3075584Sru	l.sfne r5,r6
3175584SruOR1K_DELAYED(
3275584Sru	OR1K_INST(l.slli r4,r4,0x1),
3375584Sru	OR1K_INST(l.bf 1b)
3475584Sru)
3575584Sru3:
3675584SruOR1K_DELAYED_NOP(
3775584Sru	OR1K_INST(l.jr r9)
3875584Sru)
3975584Sru.size __mulsi3,.-__mulsi3
4075584Sru#endif
4175584Sru
4275584Sru#ifdef L__udivsi3
4375584Sru.global __udivsi3_internal
4475584Sru.hidden __udivsi3_internal
4575584Sru__udivsi3_internal:
4675584SruENTRY(__udivsi3)
4775584Sru	l.addi          r1,r1,-4
4875584Sru        l.sw            0(r1),r9
4975584Sru        l.addi          r11,r0,0
5075584Sru        l.addi          r8,r4,0
5175584Sru        l.addi          r5,r3,0
5275584Sru        l.sfne          r8,r11
5375584SruOR1K_DELAYED(
5475584Sru	OR1K_INST(l.addi r7,r0,0),
5575584Sru	OR1K_INST(l.bnf 4f)
5675584Sru)
5775584Sru	/* The following work equally on delay and no-delay implementations */
5875584Sru        l.sfgtu         r8,r5
5975584Sru        l.bf            5f
6075584Sru        l.sfeq          r8,r5
6175584Sru        l.bf            6f
6275584Sru        l.sfltu         r11,r8
6375584Sru
6475584SruOR1K_DELAYED(
6575584Sru	OR1K_INST(l.addi r13,r0,32),
6675584Sru        OR1K_INST(l.bnf 2f)
6775584Sru)
6875584Sru        l.movhi         r9,hi(0x80000000)
6975584Sru        l.addi          r6,r0,-1
7075584Sru1:
7175584Sru        l.and           r3,r5,r9
7275584Sru        l.slli          r4,r7,1
7375584Sru        l.addi          r15,r5,0
7475584Sru        l.srli          r3,r3,31
7575584Sru        l.add           r13,r13,r6
7675584Sru        l.or            r7,r4,r3
7775584Sru        l.sfltu         r7,r8
7875584SruOR1K_DELAYED(
7975584Sru        OR1K_INST(l.slli r5,r5,1),
8075584Sru        OR1K_INST(l.bf 1b)
8175584Sru)
8275584Sru2:
8375584Sru        l.srli          r7,r7,1
8475584Sru        l.addi          r13,r13,1
8575584Sru        l.addi          r9,r0,0
8675584Sru        l.sfltu         r9,r13
8775584SruOR1K_DELAYED(
8875584Sru        OR1K_INST(l.addi r5,r15,0),
8975584Sru        OR1K_INST(l.bnf 4f)
9075584Sru)
9175584Sru        l.movhi         r15,hi(0x80000000)
9275584Sru        l.addi          r17,r0,0
9375584Sru3:
9475584Sru        l.and           r3,r5,r15
9575584Sru        l.slli          r4,r7,1
9675584Sru        l.srli          r3,r3,31
9775584Sru        l.or            r7,r4,r3
9875584Sru        l.sub           r6,r7,r8
9975584Sru        l.and           r3,r6,r15
10075584Sru        l.srli          r3,r3,31
10175584Sru        l.addi          r4,r0,0
10275584Sru        l.sfne          r3,r4
10375584SruOR1K_DELAYED(
10475584Sru        OR1K_INST(l.slli r3,r11,1),
10575584Sru        OR1K_INST(l.bf 1f)
10675584Sru)
10775584Sru        l.addi          r4,r0,1
10875584Sru1:
10975584Sru        l.slli          r5,r5,1
11075584Sru        l.sfne          r4,r17
11175584SruOR1K_DELAYED(
11275584Sru        OR1K_INST(l.or r11,r3,r4),
11375584Sru        OR1K_INST(l.bnf 2f)
11475584Sru)
11575584Sru        l.addi          r7,r6,0
11675584Sru2:
11775584Sru        l.addi          r9,r9,1
11875584Sru        l.sfltu         r9,r13
11975584SruOR1K_DELAYED_NOP(
12075584Sru        OR1K_INST(l.bf 3b)
12175584Sru)
12275584SruOR1K_DELAYED_NOP(
12375584Sru	OR1K_INST(l.j 4f)
12475584Sru)
12575584Sru6:
12675584SruOR1K_DELAYED(
12775584Sru	OR1K_INST(l.addi r11,r0,1),
12875584Sru	OR1K_INST(l.j 4f)
12975584Sru)
13075584Sru5:
13175584Sru	l.addi		r7,r5,0
13275584Sru4:
13375584Sru        l.lwz           r9,0(r1)
13475584SruOR1K_DELAYED(
13575584Sru        OR1K_INST(l.addi r1,r1,4),
13675584Sru        OR1K_INST(l.jr r9)
13775584Sru)
13875584Sru.size __udivsi3,.-__udivsi3
13975584Sru#endif
14075584Sru
14175584Sru
14275584Sru#ifdef L__divsi3
14375584SruENTRY(__divsi3)
14475584Sru	l.addi          r1,r1,-8
14575584Sru        l.sw            0(r1),r9
14675584Sru        l.sw            4(r1),r14
14775584Sru        l.addi          r5,r3,0
14875584Sru        l.addi          r14,r0,0
14975584Sru        l.sflts         r5,r0
15075584SruOR1K_DELAYED(
15175584Sru        OR1K_INST(l.addi r3,r0,0),
15275584Sru        OR1K_INST(l.bnf 1f)
15375584Sru)
15475584Sru        l.addi          r14,r0,1
15575584Sru        l.sub           r5,r0,r5
15675584Sru1:
15775584Sru        l.sflts         r4,r0
15875584SruOR1K_DELAYED_NOP(
15975584Sru        OR1K_INST(l.bnf 1f)
16075584Sru)
16175584Sru        l.addi          r14,r14,1
16275584Sru        l.sub           r4,r0,r4
16375584Sru1:
16475584SruOR1K_DELAYED(
16575584Sru        OR1K_INST(l.addi r3,r5,0),
16675584Sru        OR1K_INST(l.jal __udivsi3_internal)
16775584Sru)
16875584Sru        l.sfeqi         r14,1
16975584SruOR1K_DELAYED_NOP(
17075584Sru        OR1K_INST(l.bnf 1f)
17175584Sru)
17275584Sru        l.sub           r11,r0,r11
17375584Sru1:
17475584Sru        l.lwz           r9,0(r1)
17575584Sru        l.lwz           r14,4(r1)
17675584SruOR1K_DELAYED(
17775584Sru        OR1K_INST(l.addi r1,r1,8),
17875584Sru        OR1K_INST(l.jr r9)
17975584Sru)
18075584Sru.size __divsi3,.-__divsi3
18175584Sru#endif
18275584Sru
18375584Sru
18475584Sru#ifdef L__umodsi3
18575584SruENTRY(__umodsi3)
18675584Sru	l.addi          r1,r1,-4
18775584Sru	l.sw            0(r1),r9
18875584SruOR1K_DELAYED_NOP(
18975584Sru	OR1K_INST(l.jal __udivsi3_internal)
19075584Sru)
19175584Sru	l.addi		r11,r7,0
19275584Sru	l.lwz           r9,0(r1)
19375584SruOR1K_DELAYED(
19475584Sru	OR1K_INST(l.addi r1,r1,4),
19575584Sru	OR1K_INST(l.jr r9)
19675584Sru)
19775584Sru.size __umodsi3,.-__umodsi3
19875584Sru#endif
19975584Sru
20075584Sru
20175584Sru#ifdef L__modsi3
20275584SruENTRY(__modsi3)
20375584Sru        l.addi          r1,r1,-8
20475584Sru        l.sw            0(r1),r9
20575584Sru        l.sw            4(r1),r14
20675584Sru        l.addi          r14,r0,0
20775584Sru        l.sflts         r3,r0
20875584SruOR1K_DELAYED_NOP(
20975584Sru        OR1K_INST(l.bnf 1f)
21075584Sru)
21175584Sru        l.addi          r14,r0,1
21275584Sru        l.sub           r3,r0,r3
21375584Sru1:
21475584Sru        l.sflts         r4,r0
21575584SruOR1K_DELAYED_NOP(
21675584Sru        OR1K_INST(l.bnf 1f)
21775584Sru)
21875584Sru        l.sub           r4,r0,r4
21975584Sru1:
22075584SruOR1K_DELAYED_NOP(
22175584Sru        OR1K_INST(l.jal __udivsi3_internal)
22275584Sru)
22375584Sru        l.sfeqi         r14,1
22475584SruOR1K_DELAYED(
22575584Sru        OR1K_INST(l.addi r11,r7,0),
22675584Sru        OR1K_INST(l.bnf 1f)
22775584Sru)
22875584Sru        l.sub           r11,r0,r11
22975584Sru1:
23075584Sru        l.lwz           r9,0(r1)
23175584Sru        l.lwz           r14,4(r1)
23275584SruOR1K_DELAYED(
23375584Sru        OR1K_INST(l.addi r1,r1,8),
23475584Sru        OR1K_INST(l.jr r9)
23575584Sru)
23675584Sru.size __modsi3,.-__modsi3
23775584Sru#endif
23875584Sru