1/* Copyright (C) 2005, 2007 Free Software Foundation, Inc.
2   Contributed by Sunnorth
3
4   This file is part of GCC.
5
6   GCC is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published
8   by the Free Software Foundation; either version 3, or (at your
9   option) any later version.
10
11   GCC is distributed in the hope that it will be useful, but WITHOUT
12   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14   License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with GCC; see the file COPYING3.  If not see
18   <http://www.gnu.org/licenses/>.  */
19
20#define ra r3
21#define a0 r4
22#define a1 r5
23#define a2 r6
24#define a3 r7
25#define v0 r23
26
27#define t0 r8
28#define t1 r9
29#define t2 r10
30#define t3 r11
31#define t4 r22
32
33#ifndef __pic__
34#if !defined(L_mulsi3) && !defined(L_divsi3)
35       .text
36       .global _flush_cache
37#ifdef __score3__
38_flush_cache:
39        br      r3
40#else
41_flush_cache:
42        srli    r9, r5, 4
43        mv      r8, r4
44        mtsr    r9, sr0
451:
46        cache   0xe, [r8, 0]            # write back invalid dcache
47        addi    r8, 16
48        bcnz    1b
49        mfcr    r8, cr4
50        bittst! r8, 0x3                 # if LDM is enable, write back LDM
51        beq!    6f
52        ldi     r10, 0
53        cache   0xc, [r10, 0]
546:
55        bittst! r8, 0x2                 # if LIM is enable, refill it
56        beq!    7f
57        cache   0x4, [r10, 0]
587:
59        #nop!
60        #nop!
61        #nop!
62        #nop!
63        #nop!
64        mv      r8, r4
65        mtsr    r9, sr0
662:
67        cache   0x2, [r8, 0]            # invalid unlock icache
68        #nop!
69        #nop!
70        #nop!
71        #nop!
72        #nop!
73        addi    r8, 16
74        bcnz    2b
75        br      r3
76#endif
77#endif
78
79/* FUNCTION
80   (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1);
81   REGISTERS:
82        use     t0
83        modify  a0
84        a1      -> become 0
85   NOTE:
86   this seems to give better performance to just rotate and add.  */
87
88#ifdef L_mulsi3
89        .text
90        .global __umulsi3
91        .global __mulsi3
92        /* signed multiplication (32x32)  */
93        .ent    __mulsi3
94__umulsi3:
95__mulsi3:
96        li      t1, 0
97__mulsi3_loop:
98        andri.c t0, a1, 1               # t0 = multiplier[0]
99        srli    a1, a1, 1               # a1 /= 2
100        beq     __mulsi3_loop2          # skip if (t0 == 0)
101        add     t1, t1, a0              # add multiplicand
102__mulsi3_loop2:
103        slli    a0, a0, 1               # multiplicand mul 2
104        cmpi.c  a1, 0
105        bne     __mulsi3_loop
106        mv      r4, t1
107        br      ra
108        .end    __mulsi3
109#endif /* L_mulsi3 */
110
111/* FUNCTION
112   UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1));
113   INT32 (v0) = __divsi3 (INT32 (a0),  INT32 (a1));
114   UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1));
115   INT32 (v0) = __modsi3 (INT32 (a0),  INT32 (a1));
116   DESCRIPTION
117   performs 32-bit division/modulo.
118   REGISTERS
119   used t0      bit-index
120        t1
121   modify a0    becomes remainer  */
122#ifdef L_divsi3
123        .text
124        .global __udivsi3
125        .global __umodsi3
126        .global __divsi3
127        .global __modsi3
128
129        /* unsigned division  */
130        .ent    __udivsi3
131__udivsi3:
132        li      t4, 0
133        cmpi.c  a1, 0
134        beq     __uds_exit
135        li      t0, 1
136        blt     __uds_ok
137__uds_normalize:
138        cmp.c   a0, a1
139        bcc     __uds_ok
140        slli    a1, a1, 1
141        slli    t0, t0, 1
142        cmpi.c  a1, 0
143        bge     __uds_normalize
144__uds_ok:
145__uds_loop2:
146        cmp.c   a0, a1
147        bcc     __uds_loop3
148        sub     a0, a0, a1
149        or      t4, t4, t0
150__uds_loop3:
151        srli    t0, t0, 1
152        srli    a1, a1, 1
153        cmpi.c  t0, 0
154        bne     __uds_loop2
155__uds_exit:
156        mv      a1, a0
157        mv      r4, t4
158        br      ra
159        .end    __udivsi3
160
161        /* unsigned modulus  */
162        .ent    __umodsi3
163__umodsi3:
164        mv      t3, ra
165        jl      __udivsi3
166        mv      r4, a1
167        br      t3
168        .end    __umodsi3
169
170        /* abs and div  */
171        .ent    __orgsi3
172__orgsi3:
173        cmpi.c  a0, 0
174        bge     __orgsi3_a0p
175        neg     a0, a0
176__orgsi3_a0p:
177        cmpi.c  a1, 0
178        bge     __udivsi3
179        neg     a1, a1
180        b       __udivsi3               # goto udivsi3
181        .end    __orgsi3
182
183        /* signed division  */
184        .ent    __divsi3
185__divsi3:
186        mv      t3, ra
187        xor     t2, a0, a1
188        jl      __orgsi3
189__divsi3_adjust:
190        cmpi.c  t2, 0
191        bge     __divsi3_exit
192        neg     r4, r4
193__divsi3_exit:
194        br      t3
195        .end    __divsi3
196
197        /* signed modulus  */
198        .ent    __modsi3
199__modsi3:
200        mv      t3, ra
201        mv      t2, a0
202        jl      __orgsi3
203        mv      r4, a1
204        b       __divsi3_adjust
205        .end    __modsi3
206
207#endif /* L_divsi3 */
208#else /* -fPIC */
209#if !defined(L_mulsi3) && !defined(L_divsi3)
210        .set pic
211        .text
212        .global _flush_cache
213#ifdef __score3__
214_flush_cache:
215        br      r3
216#else
217_flush_cache:
218        addi    r0, -8                  # pic used
219        .cpload r29                     # pic used
220        srli    r9, r5, 4
221        mv      r8, r4
222        mtsr    r9, sr0
2231:
224        cache   0xe, [r8, 0]            # write back invalid dcache
225        addi    r8, 16
226        bcnz    1b
227        mfcr    r8, cr4
228        bittst! r8, 0x3                 # if LDM is enable, write back LDM
229        beq!    6f
230        ldi     r10, 0
231        cache   0xc, [r10, 0]
2326:
233        bittst! r8, 0x2                 # if LIM is enable, refill it
234        beq!    7f
235        cache   0x4, [r10, 0]
2367:
237        #nop!
238        #nop!
239        #nop!
240        #nop!
241        #nop!
242        mv      r8, r4
243        mtsr    r9, sr0
2442:
245        cache   0x2, [r8, 0]            # invalid unlock icache
246        #nop!
247        #nop!
248        #nop!
249        #nop!
250        #nop!
251        addi    r8, 16
252        bcnz    2b
253        .cprestore r0, 12               # pic used
254        addi    r0, 8                   # pic used
255        br      r3
256#endif
257#endif
258
259/* FUNCTION
260   (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1);
261   REGISTERS:
262        use     t0
263        modify  a0
264        a1      -> become 0
265   NOTE:
266   this seems to give better performance to just rotate and add.  */
267
268#ifdef L_mulsi3
269        .set pic
270        .text
271        .global __umulsi3
272        .global __mulsi3
273        /* signed multiplication (32x32)  */
274        .ent    __mulsi3
275__umulsi3:
276__mulsi3:
277        addi    r0, -8                  # pic used
278        .cpload r29                     # pic used
279        li      t1, 0
280__mulsi3_loop:
281        andri.c t0, a1, 1               # t0 = multiplier[0]
282        srli    a1, a1, 1               # a1 /= 2
283        beq     __mulsi3_loop2          # skip if (t0 == 0)
284        add     t1, t1, a0              # add multiplicand
285__mulsi3_loop2:
286        slli    a0, a0, 1               # multiplicand mul 2
287        cmpi.c  a1, 0
288        bne     __mulsi3_loop
289        mv      r4, t1
290        .cprestore r0, 12               # pic used
291        addi    r0, 8                   # pic used
292        br      ra
293        .end    __mulsi3
294#endif /* L_mulsi3 */
295
296/* FUNCTION
297   UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1));
298   INT32 (v0) = __divsi3 (INT32 (a0),  INT32 (a1));
299   UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1));
300   INT32 (v0) = __modsi3 (INT32 (a0),  INT32 (a1));
301   DESCRIPTION
302   performs 32-bit division/modulo.
303   REGISTERS
304   used t0      bit-index
305        t1
306   modify a0    becomes remainer  */
307#ifdef L_divsi3
308        .set pic
309        .text
310        .global __udivsi3
311        .global __umodsi3
312        .global __divsi3
313        .global __modsi3
314
315        /* unsigned division  */
316        .ent    __udivsi3
317__udivsi3:
318        addi    r0, -8                  # pic used
319        .cpload r29                     # pic used
320        li      t4, 0
321        cmpi.c  a1, 0
322        beq     __uds_exit
323        li      t0, 1
324        blt     __uds_ok
325__uds_normalize:
326        cmp.c   a0, a1
327        bcc     __uds_ok
328        slli    a1, a1, 1
329        slli    t0, t0, 1
330        cmpi.c  a1, 0
331        bge     __uds_normalize
332__uds_ok:
333__uds_loop2:
334        cmp.c   a0, a1
335        bcc     __uds_loop3
336        sub     a0, a0, a1
337        or      t4, t4, t0
338__uds_loop3:
339        srli    t0, t0, 1
340        srli    a1, a1, 1
341        cmpi.c  t0, 0
342        bne     __uds_loop2
343__uds_exit:
344        mv      a1, a0
345        mv      r4, t4
346        .cprestore r0, 12               # pic used
347        addi    r0, 8                   # pic used
348        br      ra
349        .end    __udivsi3
350
351        /* unsigned modulus  */
352        .ent    __umodsi3
353__umodsi3:
354        addi    r0, -8                  # pic used
355        .cpload r29                     # pic used
356        li      t1, 0
357        mv      t3, ra
358        la      r29, __udivsi3
359        brl     r29
360        mv      r4, a1
361        .cprestore r0, 12               # pic used
362        addi    r0, 8                   # pic used
363        br      t3
364        .end    __umodsi3
365
366        /* abs and div  */
367        .ent    __orgsi3
368__orgsi3:
369        cmpi.c  a0, 0
370        bge     __orgsi3_a0p
371        neg     a0, a0
372__orgsi3_a0p:
373        cmpi.c  a1, 0
374        bge     __udivsi3
375        neg     a1, a1
376        b       __udivsi3               # goto udivsi3
377        .end    __orgsi3
378
379        /* signed division  */
380        .ent    __divsi3
381__divsi3:
382        addi    r0, -8                  # pic used
383        .cpload r29                     # pic used
384        mv      t3, ra
385        xor     t2, a0, a1
386        la      r29, __orgsi3
387        brl     r29
388__divsi3_adjust:
389        cmpi.c  t2, 0
390        bge     __divsi3_exit
391        neg     r4, r4
392__divsi3_exit:
393        .cprestore r0, 12               # pic used
394        addi    r0, 8                   # pic used
395        br      t3
396        .end    __divsi3
397
398        /* signed modulus  */
399        .ent    __modsi3
400__modsi3:
401        addi    r0, -8                  # pic used
402        .cpload r29                     # pic used
403        mv      t3, ra
404        mv      t2, a0
405        la      r29, __orgsi3
406        brl     r29
407        mv      r4, a1
408        b       __divsi3_adjust
409        .end    __modsi3
410
411#endif /*L_divsi3 */
412#endif
413