1/* Copyright (C) 2005, 2007 Free Software Foundation, Inc. 2 Contributed by Sunnorth 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published 8 by the Free Software Foundation; either version 3, or (at your 9 option) any later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 14 License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20#define ra r3 21#define a0 r4 22#define a1 r5 23#define a2 r6 24#define a3 r7 25#define v0 r23 26 27#define t0 r8 28#define t1 r9 29#define t2 r10 30#define t3 r11 31#define t4 r22 32 33#ifndef __pic__ 34#if !defined(L_mulsi3) && !defined(L_divsi3) 35 .text 36 .global _flush_cache 37#ifdef __score3__ 38_flush_cache: 39 br r3 40#else 41_flush_cache: 42 srli r9, r5, 4 43 mv r8, r4 44 mtsr r9, sr0 451: 46 cache 0xe, [r8, 0] # write back invalid dcache 47 addi r8, 16 48 bcnz 1b 49 mfcr r8, cr4 50 bittst! r8, 0x3 # if LDM is enable, write back LDM 51 beq! 6f 52 ldi r10, 0 53 cache 0xc, [r10, 0] 546: 55 bittst! r8, 0x2 # if LIM is enable, refill it 56 beq! 7f 57 cache 0x4, [r10, 0] 587: 59 #nop! 60 #nop! 61 #nop! 62 #nop! 63 #nop! 64 mv r8, r4 65 mtsr r9, sr0 662: 67 cache 0x2, [r8, 0] # invalid unlock icache 68 #nop! 69 #nop! 70 #nop! 71 #nop! 72 #nop! 73 addi r8, 16 74 bcnz 2b 75 br r3 76#endif 77#endif 78 79/* FUNCTION 80 (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1); 81 REGISTERS: 82 use t0 83 modify a0 84 a1 -> become 0 85 NOTE: 86 this seems to give better performance to just rotate and add. */ 87 88#ifdef L_mulsi3 89 .text 90 .global __umulsi3 91 .global __mulsi3 92 /* signed multiplication (32x32) */ 93 .ent __mulsi3 94__umulsi3: 95__mulsi3: 96 li t1, 0 97__mulsi3_loop: 98 andri.c t0, a1, 1 # t0 = multiplier[0] 99 srli a1, a1, 1 # a1 /= 2 100 beq __mulsi3_loop2 # skip if (t0 == 0) 101 add t1, t1, a0 # add multiplicand 102__mulsi3_loop2: 103 slli a0, a0, 1 # multiplicand mul 2 104 cmpi.c a1, 0 105 bne __mulsi3_loop 106 mv r4, t1 107 br ra 108 .end __mulsi3 109#endif /* L_mulsi3 */ 110 111/* FUNCTION 112 UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1)); 113 INT32 (v0) = __divsi3 (INT32 (a0), INT32 (a1)); 114 UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1)); 115 INT32 (v0) = __modsi3 (INT32 (a0), INT32 (a1)); 116 DESCRIPTION 117 performs 32-bit division/modulo. 118 REGISTERS 119 used t0 bit-index 120 t1 121 modify a0 becomes remainer */ 122#ifdef L_divsi3 123 .text 124 .global __udivsi3 125 .global __umodsi3 126 .global __divsi3 127 .global __modsi3 128 129 /* unsigned division */ 130 .ent __udivsi3 131__udivsi3: 132 li t4, 0 133 cmpi.c a1, 0 134 beq __uds_exit 135 li t0, 1 136 blt __uds_ok 137__uds_normalize: 138 cmp.c a0, a1 139 bcc __uds_ok 140 slli a1, a1, 1 141 slli t0, t0, 1 142 cmpi.c a1, 0 143 bge __uds_normalize 144__uds_ok: 145__uds_loop2: 146 cmp.c a0, a1 147 bcc __uds_loop3 148 sub a0, a0, a1 149 or t4, t4, t0 150__uds_loop3: 151 srli t0, t0, 1 152 srli a1, a1, 1 153 cmpi.c t0, 0 154 bne __uds_loop2 155__uds_exit: 156 mv a1, a0 157 mv r4, t4 158 br ra 159 .end __udivsi3 160 161 /* unsigned modulus */ 162 .ent __umodsi3 163__umodsi3: 164 mv t3, ra 165 jl __udivsi3 166 mv r4, a1 167 br t3 168 .end __umodsi3 169 170 /* abs and div */ 171 .ent __orgsi3 172__orgsi3: 173 cmpi.c a0, 0 174 bge __orgsi3_a0p 175 neg a0, a0 176__orgsi3_a0p: 177 cmpi.c a1, 0 178 bge __udivsi3 179 neg a1, a1 180 b __udivsi3 # goto udivsi3 181 .end __orgsi3 182 183 /* signed division */ 184 .ent __divsi3 185__divsi3: 186 mv t3, ra 187 xor t2, a0, a1 188 jl __orgsi3 189__divsi3_adjust: 190 cmpi.c t2, 0 191 bge __divsi3_exit 192 neg r4, r4 193__divsi3_exit: 194 br t3 195 .end __divsi3 196 197 /* signed modulus */ 198 .ent __modsi3 199__modsi3: 200 mv t3, ra 201 mv t2, a0 202 jl __orgsi3 203 mv r4, a1 204 b __divsi3_adjust 205 .end __modsi3 206 207#endif /* L_divsi3 */ 208#else /* -fPIC */ 209#if !defined(L_mulsi3) && !defined(L_divsi3) 210 .set pic 211 .text 212 .global _flush_cache 213#ifdef __score3__ 214_flush_cache: 215 br r3 216#else 217_flush_cache: 218 addi r0, -8 # pic used 219 .cpload r29 # pic used 220 srli r9, r5, 4 221 mv r8, r4 222 mtsr r9, sr0 2231: 224 cache 0xe, [r8, 0] # write back invalid dcache 225 addi r8, 16 226 bcnz 1b 227 mfcr r8, cr4 228 bittst! r8, 0x3 # if LDM is enable, write back LDM 229 beq! 6f 230 ldi r10, 0 231 cache 0xc, [r10, 0] 2326: 233 bittst! r8, 0x2 # if LIM is enable, refill it 234 beq! 7f 235 cache 0x4, [r10, 0] 2367: 237 #nop! 238 #nop! 239 #nop! 240 #nop! 241 #nop! 242 mv r8, r4 243 mtsr r9, sr0 2442: 245 cache 0x2, [r8, 0] # invalid unlock icache 246 #nop! 247 #nop! 248 #nop! 249 #nop! 250 #nop! 251 addi r8, 16 252 bcnz 2b 253 .cprestore r0, 12 # pic used 254 addi r0, 8 # pic used 255 br r3 256#endif 257#endif 258 259/* FUNCTION 260 (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1); 261 REGISTERS: 262 use t0 263 modify a0 264 a1 -> become 0 265 NOTE: 266 this seems to give better performance to just rotate and add. */ 267 268#ifdef L_mulsi3 269 .set pic 270 .text 271 .global __umulsi3 272 .global __mulsi3 273 /* signed multiplication (32x32) */ 274 .ent __mulsi3 275__umulsi3: 276__mulsi3: 277 addi r0, -8 # pic used 278 .cpload r29 # pic used 279 li t1, 0 280__mulsi3_loop: 281 andri.c t0, a1, 1 # t0 = multiplier[0] 282 srli a1, a1, 1 # a1 /= 2 283 beq __mulsi3_loop2 # skip if (t0 == 0) 284 add t1, t1, a0 # add multiplicand 285__mulsi3_loop2: 286 slli a0, a0, 1 # multiplicand mul 2 287 cmpi.c a1, 0 288 bne __mulsi3_loop 289 mv r4, t1 290 .cprestore r0, 12 # pic used 291 addi r0, 8 # pic used 292 br ra 293 .end __mulsi3 294#endif /* L_mulsi3 */ 295 296/* FUNCTION 297 UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1)); 298 INT32 (v0) = __divsi3 (INT32 (a0), INT32 (a1)); 299 UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1)); 300 INT32 (v0) = __modsi3 (INT32 (a0), INT32 (a1)); 301 DESCRIPTION 302 performs 32-bit division/modulo. 303 REGISTERS 304 used t0 bit-index 305 t1 306 modify a0 becomes remainer */ 307#ifdef L_divsi3 308 .set pic 309 .text 310 .global __udivsi3 311 .global __umodsi3 312 .global __divsi3 313 .global __modsi3 314 315 /* unsigned division */ 316 .ent __udivsi3 317__udivsi3: 318 addi r0, -8 # pic used 319 .cpload r29 # pic used 320 li t4, 0 321 cmpi.c a1, 0 322 beq __uds_exit 323 li t0, 1 324 blt __uds_ok 325__uds_normalize: 326 cmp.c a0, a1 327 bcc __uds_ok 328 slli a1, a1, 1 329 slli t0, t0, 1 330 cmpi.c a1, 0 331 bge __uds_normalize 332__uds_ok: 333__uds_loop2: 334 cmp.c a0, a1 335 bcc __uds_loop3 336 sub a0, a0, a1 337 or t4, t4, t0 338__uds_loop3: 339 srli t0, t0, 1 340 srli a1, a1, 1 341 cmpi.c t0, 0 342 bne __uds_loop2 343__uds_exit: 344 mv a1, a0 345 mv r4, t4 346 .cprestore r0, 12 # pic used 347 addi r0, 8 # pic used 348 br ra 349 .end __udivsi3 350 351 /* unsigned modulus */ 352 .ent __umodsi3 353__umodsi3: 354 addi r0, -8 # pic used 355 .cpload r29 # pic used 356 li t1, 0 357 mv t3, ra 358 la r29, __udivsi3 359 brl r29 360 mv r4, a1 361 .cprestore r0, 12 # pic used 362 addi r0, 8 # pic used 363 br t3 364 .end __umodsi3 365 366 /* abs and div */ 367 .ent __orgsi3 368__orgsi3: 369 cmpi.c a0, 0 370 bge __orgsi3_a0p 371 neg a0, a0 372__orgsi3_a0p: 373 cmpi.c a1, 0 374 bge __udivsi3 375 neg a1, a1 376 b __udivsi3 # goto udivsi3 377 .end __orgsi3 378 379 /* signed division */ 380 .ent __divsi3 381__divsi3: 382 addi r0, -8 # pic used 383 .cpload r29 # pic used 384 mv t3, ra 385 xor t2, a0, a1 386 la r29, __orgsi3 387 brl r29 388__divsi3_adjust: 389 cmpi.c t2, 0 390 bge __divsi3_exit 391 neg r4, r4 392__divsi3_exit: 393 .cprestore r0, 12 # pic used 394 addi r0, 8 # pic used 395 br t3 396 .end __divsi3 397 398 /* signed modulus */ 399 .ent __modsi3 400__modsi3: 401 addi r0, -8 # pic used 402 .cpload r29 # pic used 403 mv t3, ra 404 mv t2, a0 405 la r29, __orgsi3 406 brl r29 407 mv r4, a1 408 b __divsi3_adjust 409 .end __modsi3 410 411#endif /*L_divsi3 */ 412#endif 413