1/* Assembly functions for the Xtensa version of libgcc1. 2 Copyright (C) 2001,2002,2003, 2005 Free Software Foundation, Inc. 3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12In addition to the permissions in the GNU General Public License, the 13Free Software Foundation gives you unlimited permission to link the 14compiled version of this file into combinations with other programs, 15and to distribute those combinations without any restriction coming 16from the use of this file. (The General Public License restrictions 17do apply in other respects; for example, they cover modification of 18the file, and distribution when not linked into a combine 19executable.) 20 21GCC is distributed in the hope that it will be useful, but WITHOUT ANY 22WARRANTY; without even the implied warranty of MERCHANTABILITY or 23FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 24for more details. 25 26You should have received a copy of the GNU General Public License 27along with GCC; see the file COPYING. If not, write to the Free 28Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 2902110-1301, USA. */ 30 31#include "xtensa-config.h" 32 33# Note: These functions use a minimum stack frame size of 32. This is 34# necessary for Xtensa configurations that only support a fixed register 35# window size of 8, where even leaf functions (such as these) need to 36# allocate space for a 4-word "extra save area". 37 38# Define macros for the ABS and ADDX* instructions to handle cases 39# where they are not included in the Xtensa processor configuration. 40 41 .macro do_abs dst, src, tmp 42#if XCHAL_HAVE_ABS 43 abs \dst, \src 44#else 45 neg \tmp, \src 46 movgez \tmp, \src, \src 47 mov \dst, \tmp 48#endif 49 .endm 50 51 .macro do_addx2 dst, as, at, tmp 52#if XCHAL_HAVE_ADDX 53 addx2 \dst, \as, \at 54#else 55 slli \tmp, \as, 1 56 add \dst, \tmp, \at 57#endif 58 .endm 59 60 .macro do_addx4 dst, as, at, tmp 61#if XCHAL_HAVE_ADDX 62 addx4 \dst, \as, \at 63#else 64 slli \tmp, \as, 2 65 add \dst, \tmp, \at 66#endif 67 .endm 68 69 .macro do_addx8 dst, as, at, tmp 70#if XCHAL_HAVE_ADDX 71 addx8 \dst, \as, \at 72#else 73 slli \tmp, \as, 3 74 add \dst, \tmp, \at 75#endif 76 .endm 77 78# Define macros for function entry and return, supporting either the 79# standard register windowed ABI or the non-windowed call0 ABI. These 80# macros do not allocate any extra stack space, so they only work for 81# leaf functions that do not need to spill anything to the stack. 82 83 .macro abi_entry reg, size 84#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 85 entry \reg, \size 86#else 87 /* do nothing */ 88#endif 89 .endm 90 91 .macro abi_return 92#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 93 retw 94#else 95 ret 96#endif 97 .endm 98 99 100#ifdef L_mulsi3 101 .align 4 102 .global __mulsi3 103 .type __mulsi3,@function 104__mulsi3: 105 abi_entry sp, 32 106 107#if XCHAL_HAVE_MUL16 108 or a4, a2, a3 109 srai a4, a4, 16 110 bnez a4, .LMUL16 111 mul16u a2, a2, a3 112 abi_return 113.LMUL16: 114 srai a4, a2, 16 115 srai a5, a3, 16 116 mul16u a7, a4, a3 117 mul16u a6, a5, a2 118 mul16u a4, a2, a3 119 add a7, a7, a6 120 slli a7, a7, 16 121 add a2, a7, a4 122 123#elif XCHAL_HAVE_MAC16 124 mul.aa.hl a2, a3 125 mula.aa.lh a2, a3 126 rsr a5, ACCLO 127 umul.aa.ll a2, a3 128 rsr a4, ACCLO 129 slli a5, a5, 16 130 add a2, a4, a5 131 132#else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */ 133 134 # Multiply one bit at a time, but unroll the loop 4x to better 135 # exploit the addx instructions and avoid overhead. 136 # Peel the first iteration to save a cycle on init. 137 138 # Avoid negative numbers. 139 xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative 140 do_abs a3, a3, a6 141 do_abs a2, a2, a6 142 143 # Swap so the second argument is smaller. 144 sub a7, a2, a3 145 mov a4, a3 146 movgez a4, a2, a7 # a4 = max(a2, a3) 147 movltz a3, a2, a7 # a3 = min(a2, a3) 148 149 movi a2, 0 150 extui a6, a3, 0, 1 151 movnez a2, a4, a6 152 153 do_addx2 a7, a4, a2, a7 154 extui a6, a3, 1, 1 155 movnez a2, a7, a6 156 157 do_addx4 a7, a4, a2, a7 158 extui a6, a3, 2, 1 159 movnez a2, a7, a6 160 161 do_addx8 a7, a4, a2, a7 162 extui a6, a3, 3, 1 163 movnez a2, a7, a6 164 165 bgeui a3, 16, .Lmult_main_loop 166 neg a3, a2 167 movltz a2, a3, a5 168 abi_return 169 170 .align 4 171.Lmult_main_loop: 172 srli a3, a3, 4 173 slli a4, a4, 4 174 175 add a7, a4, a2 176 extui a6, a3, 0, 1 177 movnez a2, a7, a6 178 179 do_addx2 a7, a4, a2, a7 180 extui a6, a3, 1, 1 181 movnez a2, a7, a6 182 183 do_addx4 a7, a4, a2, a7 184 extui a6, a3, 2, 1 185 movnez a2, a7, a6 186 187 do_addx8 a7, a4, a2, a7 188 extui a6, a3, 3, 1 189 movnez a2, a7, a6 190 191 bgeui a3, 16, .Lmult_main_loop 192 193 neg a3, a2 194 movltz a2, a3, a5 195 196#endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */ 197 198 abi_return 199 .size __mulsi3,.-__mulsi3 200 201#endif /* L_mulsi3 */ 202 203 204# Define a macro for the NSAU (unsigned normalize shift amount) 205# instruction, which computes the number of leading zero bits, 206# to handle cases where it is not included in the Xtensa processor 207# configuration. 208 209 .macro do_nsau cnt, val, tmp, a 210#if XCHAL_HAVE_NSA 211 nsau \cnt, \val 212#else 213 mov \a, \val 214 movi \cnt, 0 215 extui \tmp, \a, 16, 16 216 bnez \tmp, 0f 217 movi \cnt, 16 218 slli \a, \a, 16 2190: 220 extui \tmp, \a, 24, 8 221 bnez \tmp, 1f 222 addi \cnt, \cnt, 8 223 slli \a, \a, 8 2241: 225 movi \tmp, __nsau_data 226 extui \a, \a, 24, 8 227 add \tmp, \tmp, \a 228 l8ui \tmp, \tmp, 0 229 add \cnt, \cnt, \tmp 230#endif /* !XCHAL_HAVE_NSA */ 231 .endm 232 233#ifdef L_nsau 234 .section .rodata 235 .align 4 236 .global __nsau_data 237 .type __nsau_data,@object 238__nsau_data: 239#if !XCHAL_HAVE_NSA 240 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 241 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 242 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 243 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 244 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 245 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 246 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 247 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 248 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 249 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 250 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 251 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 252 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 253 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 254 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 255 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 256#endif /* !XCHAL_HAVE_NSA */ 257 .size __nsau_data,.-__nsau_data 258 .hidden __nsau_data 259#endif /* L_nsau */ 260 261 262#ifdef L_udivsi3 263 .align 4 264 .global __udivsi3 265 .type __udivsi3,@function 266__udivsi3: 267 abi_entry sp, 32 268 bltui a3, 2, .Lle_one # check if the divisor <= 1 269 270 mov a6, a2 # keep dividend in a6 271 do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend) 272 do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor) 273 bgeu a5, a4, .Lspecial 274 275 sub a4, a4, a5 # count = divisor_shift - dividend_shift 276 ssl a4 277 sll a3, a3 # divisor <<= count 278 movi a2, 0 # quotient = 0 279 280 # test-subtract-and-shift loop; one quotient bit on each iteration 281#if XCHAL_HAVE_LOOPS 282 loopnez a4, .Lloopend 283#endif /* XCHAL_HAVE_LOOPS */ 284.Lloop: 285 bltu a6, a3, .Lzerobit 286 sub a6, a6, a3 287 addi a2, a2, 1 288.Lzerobit: 289 slli a2, a2, 1 290 srli a3, a3, 1 291#if !XCHAL_HAVE_LOOPS 292 addi a4, a4, -1 293 bnez a4, .Lloop 294#endif /* !XCHAL_HAVE_LOOPS */ 295.Lloopend: 296 297 bltu a6, a3, .Lreturn 298 addi a2, a2, 1 # increment quotient if dividend >= divisor 299.Lreturn: 300 abi_return 301 302.Lle_one: 303 beqz a3, .Lerror # if divisor == 1, return the dividend 304 abi_return 305 306.Lspecial: 307 # return dividend >= divisor 308 bltu a6, a3, .Lreturn0 309 movi a2, 1 310 abi_return 311 312.Lerror: 313 # just return 0; could throw an exception 314 315.Lreturn0: 316 movi a2, 0 317 abi_return 318 .size __udivsi3,.-__udivsi3 319 320#endif /* L_udivsi3 */ 321 322 323#ifdef L_divsi3 324 .align 4 325 .global __divsi3 326 .type __divsi3,@function 327__divsi3: 328 abi_entry sp, 32 329 xor a7, a2, a3 # sign = dividend ^ divisor 330 do_abs a6, a2, a4 # udividend = abs(dividend) 331 do_abs a3, a3, a4 # udivisor = abs(divisor) 332 bltui a3, 2, .Lle_one # check if udivisor <= 1 333 do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend) 334 do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor) 335 bgeu a5, a4, .Lspecial 336 337 sub a4, a4, a5 # count = udivisor_shift - udividend_shift 338 ssl a4 339 sll a3, a3 # udivisor <<= count 340 movi a2, 0 # quotient = 0 341 342 # test-subtract-and-shift loop; one quotient bit on each iteration 343#if XCHAL_HAVE_LOOPS 344 loopnez a4, .Lloopend 345#endif /* XCHAL_HAVE_LOOPS */ 346.Lloop: 347 bltu a6, a3, .Lzerobit 348 sub a6, a6, a3 349 addi a2, a2, 1 350.Lzerobit: 351 slli a2, a2, 1 352 srli a3, a3, 1 353#if !XCHAL_HAVE_LOOPS 354 addi a4, a4, -1 355 bnez a4, .Lloop 356#endif /* !XCHAL_HAVE_LOOPS */ 357.Lloopend: 358 359 bltu a6, a3, .Lreturn 360 addi a2, a2, 1 # increment quotient if udividend >= udivisor 361.Lreturn: 362 neg a5, a2 363 movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient 364 abi_return 365 366.Lle_one: 367 beqz a3, .Lerror 368 neg a2, a6 # if udivisor == 1, then return... 369 movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend 370 abi_return 371 372.Lspecial: 373 bltu a6, a3, .Lreturn0 # if dividend < divisor, return 0 374 movi a2, 1 375 movi a4, -1 376 movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1 377 abi_return 378 379.Lerror: 380 # just return 0; could throw an exception 381 382.Lreturn0: 383 movi a2, 0 384 abi_return 385 .size __divsi3,.-__divsi3 386 387#endif /* L_divsi3 */ 388 389 390#ifdef L_umodsi3 391 .align 4 392 .global __umodsi3 393 .type __umodsi3,@function 394__umodsi3: 395 abi_entry sp, 32 396 bltui a3, 2, .Lle_one # check if the divisor is <= 1 397 398 do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend) 399 do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor) 400 bgeu a5, a4, .Lspecial 401 402 sub a4, a4, a5 # count = divisor_shift - dividend_shift 403 ssl a4 404 sll a3, a3 # divisor <<= count 405 406 # test-subtract-and-shift loop 407#if XCHAL_HAVE_LOOPS 408 loopnez a4, .Lloopend 409#endif /* XCHAL_HAVE_LOOPS */ 410.Lloop: 411 bltu a2, a3, .Lzerobit 412 sub a2, a2, a3 413.Lzerobit: 414 srli a3, a3, 1 415#if !XCHAL_HAVE_LOOPS 416 addi a4, a4, -1 417 bnez a4, .Lloop 418#endif /* !XCHAL_HAVE_LOOPS */ 419.Lloopend: 420 421.Lspecial: 422 bltu a2, a3, .Lreturn 423 sub a2, a2, a3 # subtract once more if dividend >= divisor 424.Lreturn: 425 abi_return 426 427.Lle_one: 428 # the divisor is either 0 or 1, so just return 0. 429 # someday we may want to throw an exception if the divisor is 0. 430 movi a2, 0 431 abi_return 432 .size __umodsi3,.-__umodsi3 433 434#endif /* L_umodsi3 */ 435 436 437#ifdef L_modsi3 438 .align 4 439 .global __modsi3 440 .type __modsi3,@function 441__modsi3: 442 abi_entry sp, 32 443 mov a7, a2 # save original (signed) dividend 444 do_abs a2, a2, a4 # udividend = abs(dividend) 445 do_abs a3, a3, a4 # udivisor = abs(divisor) 446 bltui a3, 2, .Lle_one # check if udivisor <= 1 447 do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend) 448 do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor) 449 bgeu a5, a4, .Lspecial 450 451 sub a4, a4, a5 # count = udivisor_shift - udividend_shift 452 ssl a4 453 sll a3, a3 # udivisor <<= count 454 455 # test-subtract-and-shift loop 456#if XCHAL_HAVE_LOOPS 457 loopnez a4, .Lloopend 458#endif /* XCHAL_HAVE_LOOPS */ 459.Lloop: 460 bltu a2, a3, .Lzerobit 461 sub a2, a2, a3 462.Lzerobit: 463 srli a3, a3, 1 464#if !XCHAL_HAVE_LOOPS 465 addi a4, a4, -1 466 bnez a4, .Lloop 467#endif /* !XCHAL_HAVE_LOOPS */ 468.Lloopend: 469 470.Lspecial: 471 bltu a2, a3, .Lreturn 472 sub a2, a2, a3 # subtract once more if udividend >= udivisor 473.Lreturn: 474 bgez a7, .Lpositive 475 neg a2, a2 # if (dividend < 0), return -udividend 476.Lpositive: 477 abi_return 478 479.Lle_one: 480 # udivisor is either 0 or 1, so just return 0. 481 # someday we may want to throw an exception if udivisor is 0. 482 movi a2, 0 483 abi_return 484 .size __modsi3,.-__modsi3 485 486#endif /* L_modsi3 */ 487