175584Sru#include "or1k-asm.h" 275584Sru/* 375584Sru * Assembly functions for software multiplication and devision. 475584Sru */ 575584Sru 675584Sru#define ENTRY(symbol) \ 775584Sru .align 4 ;\ 875584Sru .global symbol ;\ 975584Sru .type symbol, @function ;\ 1075584Srusymbol: 1175584Sru 1275584Sru#ifdef L__mulsi3 1375584SruENTRY(__mulsi3) 1475584Sru l.addi r11,r0,0x0 1575584Sru l.sfne r3,r11 1675584SruOR1K_DELAYED( 1775584Sru OR1K_INST(l.ori r5,r3,0x0), 1875584Sru OR1K_INST(l.bnf 3f) 1975584Sru) 2075584Sru l.addi r6,r0,0x0 2175584Sru1: 2275584Sru l.andi r3,r5,0x1 2375584Sru l.sfeq r3,r6 2475584SruOR1K_DELAYED( 2575584Sru OR1K_INST(l.srli r5,r5,0x1), 2675584Sru OR1K_INST(l.bf 2f) 2775584Sru) 2875584Sru l.add r11,r11,r4 2975584Sru2: 3075584Sru l.sfne r5,r6 3175584SruOR1K_DELAYED( 3275584Sru OR1K_INST(l.slli r4,r4,0x1), 3375584Sru OR1K_INST(l.bf 1b) 3475584Sru) 3575584Sru3: 3675584SruOR1K_DELAYED_NOP( 3775584Sru OR1K_INST(l.jr r9) 3875584Sru) 3975584Sru.size __mulsi3,.-__mulsi3 4075584Sru#endif 4175584Sru 4275584Sru#ifdef L__udivsi3 4375584Sru.global __udivsi3_internal 4475584Sru.hidden __udivsi3_internal 4575584Sru__udivsi3_internal: 4675584SruENTRY(__udivsi3) 4775584Sru l.addi r1,r1,-4 4875584Sru l.sw 0(r1),r9 4975584Sru l.addi r11,r0,0 5075584Sru l.addi r8,r4,0 5175584Sru l.addi r5,r3,0 5275584Sru l.sfne r8,r11 5375584SruOR1K_DELAYED( 5475584Sru OR1K_INST(l.addi r7,r0,0), 5575584Sru OR1K_INST(l.bnf 4f) 5675584Sru) 5775584Sru /* The following work equally on delay and no-delay implementations */ 5875584Sru l.sfgtu r8,r5 5975584Sru l.bf 5f 6075584Sru l.sfeq r8,r5 6175584Sru l.bf 6f 6275584Sru l.sfltu r11,r8 6375584Sru 6475584SruOR1K_DELAYED( 6575584Sru OR1K_INST(l.addi r13,r0,32), 6675584Sru OR1K_INST(l.bnf 2f) 6775584Sru) 6875584Sru l.movhi r9,hi(0x80000000) 6975584Sru l.addi r6,r0,-1 7075584Sru1: 7175584Sru l.and r3,r5,r9 7275584Sru l.slli r4,r7,1 7375584Sru l.addi r15,r5,0 7475584Sru l.srli r3,r3,31 7575584Sru l.add r13,r13,r6 7675584Sru l.or r7,r4,r3 7775584Sru l.sfltu r7,r8 7875584SruOR1K_DELAYED( 7975584Sru OR1K_INST(l.slli r5,r5,1), 8075584Sru OR1K_INST(l.bf 1b) 8175584Sru) 8275584Sru2: 8375584Sru l.srli r7,r7,1 8475584Sru l.addi r13,r13,1 8575584Sru l.addi r9,r0,0 8675584Sru l.sfltu r9,r13 8775584SruOR1K_DELAYED( 8875584Sru OR1K_INST(l.addi r5,r15,0), 8975584Sru OR1K_INST(l.bnf 4f) 9075584Sru) 9175584Sru l.movhi r15,hi(0x80000000) 9275584Sru l.addi r17,r0,0 9375584Sru3: 9475584Sru l.and r3,r5,r15 9575584Sru l.slli r4,r7,1 9675584Sru l.srli r3,r3,31 9775584Sru l.or r7,r4,r3 9875584Sru l.sub r6,r7,r8 9975584Sru l.and r3,r6,r15 10075584Sru l.srli r3,r3,31 10175584Sru l.addi r4,r0,0 10275584Sru l.sfne r3,r4 10375584SruOR1K_DELAYED( 10475584Sru OR1K_INST(l.slli r3,r11,1), 10575584Sru OR1K_INST(l.bf 1f) 10675584Sru) 10775584Sru l.addi r4,r0,1 10875584Sru1: 10975584Sru l.slli r5,r5,1 11075584Sru l.sfne r4,r17 11175584SruOR1K_DELAYED( 11275584Sru OR1K_INST(l.or r11,r3,r4), 11375584Sru OR1K_INST(l.bnf 2f) 11475584Sru) 11575584Sru l.addi r7,r6,0 11675584Sru2: 11775584Sru l.addi r9,r9,1 11875584Sru l.sfltu r9,r13 11975584SruOR1K_DELAYED_NOP( 12075584Sru OR1K_INST(l.bf 3b) 12175584Sru) 12275584SruOR1K_DELAYED_NOP( 12375584Sru OR1K_INST(l.j 4f) 12475584Sru) 12575584Sru6: 12675584SruOR1K_DELAYED( 12775584Sru OR1K_INST(l.addi r11,r0,1), 12875584Sru OR1K_INST(l.j 4f) 12975584Sru) 13075584Sru5: 13175584Sru l.addi r7,r5,0 13275584Sru4: 13375584Sru l.lwz r9,0(r1) 13475584SruOR1K_DELAYED( 13575584Sru OR1K_INST(l.addi r1,r1,4), 13675584Sru OR1K_INST(l.jr r9) 13775584Sru) 13875584Sru.size __udivsi3,.-__udivsi3 13975584Sru#endif 14075584Sru 14175584Sru 14275584Sru#ifdef L__divsi3 14375584SruENTRY(__divsi3) 14475584Sru l.addi r1,r1,-8 14575584Sru l.sw 0(r1),r9 14675584Sru l.sw 4(r1),r14 14775584Sru l.addi r5,r3,0 14875584Sru l.addi r14,r0,0 14975584Sru l.sflts r5,r0 15075584SruOR1K_DELAYED( 15175584Sru OR1K_INST(l.addi r3,r0,0), 15275584Sru OR1K_INST(l.bnf 1f) 15375584Sru) 15475584Sru l.addi r14,r0,1 15575584Sru l.sub r5,r0,r5 15675584Sru1: 15775584Sru l.sflts r4,r0 15875584SruOR1K_DELAYED_NOP( 15975584Sru OR1K_INST(l.bnf 1f) 16075584Sru) 16175584Sru l.addi r14,r14,1 16275584Sru l.sub r4,r0,r4 16375584Sru1: 16475584SruOR1K_DELAYED( 16575584Sru OR1K_INST(l.addi r3,r5,0), 16675584Sru OR1K_INST(l.jal __udivsi3_internal) 16775584Sru) 16875584Sru l.sfeqi r14,1 16975584SruOR1K_DELAYED_NOP( 17075584Sru OR1K_INST(l.bnf 1f) 17175584Sru) 17275584Sru l.sub r11,r0,r11 17375584Sru1: 17475584Sru l.lwz r9,0(r1) 17575584Sru l.lwz r14,4(r1) 17675584SruOR1K_DELAYED( 17775584Sru OR1K_INST(l.addi r1,r1,8), 17875584Sru OR1K_INST(l.jr r9) 17975584Sru) 18075584Sru.size __divsi3,.-__divsi3 18175584Sru#endif 18275584Sru 18375584Sru 18475584Sru#ifdef L__umodsi3 18575584SruENTRY(__umodsi3) 18675584Sru l.addi r1,r1,-4 18775584Sru l.sw 0(r1),r9 18875584SruOR1K_DELAYED_NOP( 18975584Sru OR1K_INST(l.jal __udivsi3_internal) 19075584Sru) 19175584Sru l.addi r11,r7,0 19275584Sru l.lwz r9,0(r1) 19375584SruOR1K_DELAYED( 19475584Sru OR1K_INST(l.addi r1,r1,4), 19575584Sru OR1K_INST(l.jr r9) 19675584Sru) 19775584Sru.size __umodsi3,.-__umodsi3 19875584Sru#endif 19975584Sru 20075584Sru 20175584Sru#ifdef L__modsi3 20275584SruENTRY(__modsi3) 20375584Sru l.addi r1,r1,-8 20475584Sru l.sw 0(r1),r9 20575584Sru l.sw 4(r1),r14 20675584Sru l.addi r14,r0,0 20775584Sru l.sflts r3,r0 20875584SruOR1K_DELAYED_NOP( 20975584Sru OR1K_INST(l.bnf 1f) 21075584Sru) 21175584Sru l.addi r14,r0,1 21275584Sru l.sub r3,r0,r3 21375584Sru1: 21475584Sru l.sflts r4,r0 21575584SruOR1K_DELAYED_NOP( 21675584Sru OR1K_INST(l.bnf 1f) 21775584Sru) 21875584Sru l.sub r4,r0,r4 21975584Sru1: 22075584SruOR1K_DELAYED_NOP( 22175584Sru OR1K_INST(l.jal __udivsi3_internal) 22275584Sru) 22375584Sru l.sfeqi r14,1 22475584SruOR1K_DELAYED( 22575584Sru OR1K_INST(l.addi r11,r7,0), 22675584Sru OR1K_INST(l.bnf 1f) 22775584Sru) 22875584Sru l.sub r11,r0,r11 22975584Sru1: 23075584Sru l.lwz r9,0(r1) 23175584Sru l.lwz r14,4(r1) 23275584SruOR1K_DELAYED( 23375584Sru OR1K_INST(l.addi r1,r1,8), 23475584Sru OR1K_INST(l.jr r9) 23575584Sru) 23675584Sru.size __modsi3,.-__modsi3 23775584Sru#endif 23875584Sru