1/* Copyright (C) 2008-2015 Free Software Foundation, Inc. 2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com> 3 on behalf of Synopsys Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26#include "arc-ieee-754.h" 27#if 0 /* DEBUG */ 28 .global __adddf3 29 .balign 4 30__adddf3: 31 push_s blink 32 push_s r2 33 push_s r3 34 push_s r0 35 bl.d __adddf3_c 36 push_s r1 37 ld_s r2,[sp,12] 38 ld_s r3,[sp,8] 39 st_s r0,[sp,12] 40 st_s r1,[sp,8] 41 pop_s r1 42 bl.d __adddf3_asm 43 pop_s r0 44 pop_s r3 45 pop_s r2 46 pop_s blink 47 cmp r0,r2 48 cmp.eq r1,r3 49 jeq_s [blink] 50 bl abort 51 .global __subdf3 52 .balign 4 53__subdf3: 54 push_s blink 55 push_s r2 56 push_s r3 57 push_s r0 58 bl.d __subdf3_c 59 push_s r1 60 ld_s r2,[sp,12] 61 ld_s r3,[sp,8] 62 st_s r0,[sp,12] 63 st_s r1,[sp,8] 64 pop_s r1 65 bl.d __subdf3_asm 66 pop_s r0 67 pop_s r3 68 pop_s r2 69 pop_s blink 70 cmp r0,r2 71 cmp.eq r1,r3 72 jeq_s [blink] 73 bl abort 74#define __adddf3 __adddf3_asm 75#define __subdf3 __subdf3_asm 76#endif /* DEBUG */ 77/* N.B. This is optimized for ARC700. 78 ARC600 has very different scheduling / instruction selection criteria. */ 79 80/* inputs: DBL0, DBL1 (r0-r3) 81 output: DBL0 (r0, r1) 82 clobber: r2-r10, r12, flags 83 All NaN highword bits must be 1. NaN low word is random. */ 84 85 .balign 4 86 .global __adddf3 87 .global __subdf3 88 .long 0x7ff00000 ; exponent mask 89 FUNC(__adddf3) 90 FUNC(__subdf3) 91__subdf3: 92 bxor_l DBL1H,DBL1H,31 93__adddf3: 94 ld r9,[pcl,-8] 95 bmsk r4,DBL0H,30 96 xor r10,DBL0H,DBL1H 97 and r6,DBL1H,r9 98 sub.f r12,r4,r6 99 asr_s r12,r12,20 100 blo .Ldbl1_gt 101 brhs r4,r9,.Linf_nan 102 brhs r12,32,.Large_shift 103 brne r12,0,.Lsmall_shift 104 brge r10,0,.Ladd_same_exp ; r12 == 0 105 106/* After subtracting, we need to normalize; when shifting to place the 107 leading 1 into position for the implicit 1 and adding that to DBL0H, 108 we increment the exponent. Thus, we have to subtract one more than 109 the shift count from the exponent beforehand. Iff the exponent drops thus 110 below zero (before adding in the fraction with the leading one), we have 111 generated a denormal number. Denormal handling is basicallly reducing the 112 shift count so that we produce a zero exponent instead; however, this way 113 the shift count can become zero (if we started out with exponent 1). 114 Therefore, a simple min operation is not good enough, since we don't 115 want to handle a zero normalizing shift in the main path. 116 On the plus side, we don't need to check for denorm input, the result 117 of subtracing these looks just the same as denormals generated during 118 subtraction. */ 119 bmsk r7,DBL1H,30 120 cmp r4,r7 121 cmp.eq DBL0L,DBL1L 122 blo .L_rsub_same_exp 123 sub.f DBL0L,DBL0L,DBL1L 124 bmsk r12,DBL0H,19 125 bic DBL1H,DBL0H,r12 126 sbc.f r4,r4,r7 127 beq_l .Large_cancel 128 norm DBL1L,r4 129 b.d .Lsub_done_same_exp 130 sub r12,DBL1L,9 131 132 .balign 4 133.Linf_nan: 134 ; If both inputs are inf, but with different signs, the result is NaN. 135 asr r12,r10,31 136 or_s DBL1H,DBL1H,r12 137 j_s.d [blink] 138 or.eq DBL0H,DBL0H,DBL1H 139 140 .balign 4 141.L_rsub_same_exp: 142 rsub.f DBL0L,DBL0L,DBL1L 143 bmsk r12,DBL1H,19 144 bic_s DBL1H,DBL1H,r12 145 sbc.f r4,r7,r4 146 beq_l .Large_cancel 147 norm DBL1L,r4 148 149 sub r12,DBL1L,9 150.Lsub_done_same_exp: 151 asl_s r12,r12,20 152 sub_s DBL1L,DBL1L,10 153 sub DBL0H,DBL1H,r12 154 xor.f 0,DBL0H,DBL1H 155 bmi .Ldenorm 156.Lpast_denorm: 157 neg_s r12,DBL1L 158 lsr r7,DBL0L,r12 159 asl r12,r4,DBL1L 160 asl_s DBL0L,DBL0L,DBL1L 161 add_s r12,r12,r7 162 j_s.d [blink] 163 add_l DBL0H,DBL0H,r12 164 .balign 4 165.Ladd_same_exp: 166 /* This is a special case because we can't test for need to shift 167 down by checking if bit 20 of DBL0H changes. OTOH, here we know 168 that we always need to shift down. */ 169 ; The implicit 1 of DBL0 is not shifted together with the 170 ; fraction, thus effectively doubled, compensating for not setting 171 ; implicit1 for DBL1 172 add_s r12,DBL0L,DBL1L 173 lsr.f 0,r12,2 ; round to even 174 breq r6,0,.Ldenorm_add 175 adc.f DBL0L,DBL0L,DBL1L 176 sub r7,DBL1H,DBL0H 177 sub1 r7,r7,r9 ; boost exponent by 2/2 178 rrc DBL0L,DBL0L 179 asr.f r7,r7 ; DBL1.fraction/2 - DBL0.fraction/2 ; exp++ 180 add.cs.f DBL0L,DBL0L,0x80000000 181 add_l DBL0H,DBL0H,r7 ; DBL0.implicit1 not shifted for DBL1.implicit1 182 add.cs DBL0H,DBL0H,1 183 bic.f 0,r9,DBL0H ; check for overflow -> infinity. 184 jne_l [blink] 185 and DBL0H,DBL0H,0xfff00000 186 j_s.d [blink] 187 mov_s DBL0L,0 188 .balign 4 189.Large_shift: 190 brhs r12,55,.Lret_dbl0 191 bmsk_s DBL1H,DBL1H,19 192 brne r6,0,.Lno_denorm_large_shift 193 brhi.d r12,33,.Lfixed_denorm_large_shift 194 sub_s r12,r12,1 195 breq r12,31, .Lfixed_denorm_small_shift 196.Lshift32: 197 mov_s r12,DBL1L 198 mov_s DBL1L,DBL1H 199 brlt.d r10,0,.Lsub 200 mov_s DBL1H,0 201 b_s .Ladd 202.Ldenorm_add: 203 cmp_s r12,DBL1L 204 mov_s DBL0L,r12 205 j_s.d [blink] 206 adc DBL0H,r4,DBL1H 207 208.Lret_dbl0: 209 j_s [blink] 210 .balign 4 211.Lsmall_shift: 212 breq.d r6,0,.Ldenorm_small_shift 213 bmsk_s DBL1H,DBL1H,19 214 bset_s DBL1H,DBL1H,20 215.Lfixed_denorm_small_shift: 216 neg r8,r12 217 asl r4,DBL1H,r8 218 lsr_l DBL1H,DBL1H,r12 219 lsr r5,DBL1L,r12 220 asl r12,DBL1L,r8 221 brge.d r10,0,.Ladd 222 or DBL1L,r4,r5 223/* subtract, abs(DBL0) > abs(DBL1) */ 224/* DBL0H, DBL0L: original values 225 DBL1H, DBL1L: fraction with explicit leading 1, shifted into place 226 r4: orig. DBL0H & 0x7fffffff 227 r6: orig. DBL1H & 0x7ff00000 228 r9: 0x7ff00000 229 r10: orig. DBL0H ^ DBL1H 230 r12: guard bits */ 231 .balign 4 232.Lsub: 233 neg.f r12,r12 234 mov_s r7,DBL1H 235 bmsk r5,DBL0H,19 236 sbc.f DBL0L,DBL0L,DBL1L 237 bic DBL1H,DBL0H,r5 238 bset r5,r5,20 239 sbc.f r4,r5,r7 240 beq_l .Large_cancel_sub 241 norm DBL1L,r4 242 bmsk r6,DBL1H,30 243.Lsub_done: 244 sub_s DBL1L,DBL1L,9 245 breq DBL1L,1,.Lsub_done_noshift 246 asl r5,DBL1L,20 247 sub_s DBL1L,DBL1L,1 248 brlo r6,r5,.Ldenorm_sub 249 sub DBL0H,DBL1H,r5 250.Lpast_denorm_sub: 251 neg_s DBL1H,DBL1L 252 lsr r6,r12,DBL1H 253 asl_s r12,r12,DBL1L 254 and r8,r6,1 255 add1.f 0,r8,r12 256 add.ne.f r12,r12,r12 257 asl r8,DBL0L,DBL1L 258 lsr r12,DBL0L,DBL1H 259 adc.f DBL0L,r8,r6 260 asl r5,r4,DBL1L 261 add_s DBL0H,DBL0H,r12 262 j_s.d [blink] 263 adc DBL0H,DBL0H,r5 264 265 .balign 4 266.Lno_denorm_large_shift: 267 breq.d r12,32,.Lshift32 268 bset_l DBL1H,DBL1H,20 269.Lfixed_denorm_large_shift: 270 neg r8,r12 271 asl r4,DBL1H,r8 272 lsr r5,DBL1L,r12 273 asl.f 0,DBL1L,r8 274 lsr DBL1L,DBL1H,r12 275 or r12,r4,r5 276 tst.eq r12,1 277 or.ne r12,r12,2 278 brlt.d r10,0,.Lsub 279 mov_s DBL1H,0 280 b_l .Ladd 281 282 ; If a denorm is produced without shifting, we have an exact result - 283 ; no need for rounding. 284 .balign 4 285.Ldenorm_sub: 286 lsr DBL1L,r6,20 287 xor DBL0H,r6,DBL1H 288 brne.d DBL1L,1,.Lpast_denorm_sub 289 sub_s DBL1L,DBL1L,1 290.Lsub_done_noshift: 291 add.f 0,r12,r12 292 btst.eq DBL0L,0 293 cmp.eq r12,r12 294 add.cs.f DBL0L,DBL0L,1 295 bclr r4,r4,20 296 j_s.d [blink] 297 adc DBL0H,DBL1H,r4 298 299 .balign 4 300.Ldenorm_small_shift: 301 brne.d r12,1,.Lfixed_denorm_small_shift 302 sub_l r12,r12,1 303 brlt r10,0,.Lsub 304.Ladd: ; bit 20 of DBL1H is clear and bit 0 of r12 does not matter 305 add.f DBL0L,DBL0L,DBL1L 306 add_s DBL1H,DBL1H,DBL0H 307 add.cs DBL1H,DBL1H,1 308 xor_l DBL0H,DBL0H,DBL1H 309 bbit0 DBL0H,20,.Lno_shiftdown 310 lsr.f DBL0H,DBL1H 311 and r4,DBL0L,2 312 bmsk DBL0H,DBL0H,18 313 sbc DBL0H,DBL1H,DBL0H 314 rrc.f DBL0L,DBL0L 315 or.f r12,r12,r4 316 cmp.eq r12,r12 317 add.cs.f DBL0L,DBL0L,1 318 bic.f 0,r9,DBL0H ; check for generating infinity with possible ... 319 jne.d [blink] ; ... non-zero fraction 320 add.cs DBL0H,DBL0H,1 321 mov_s DBL0L,0 322 bmsk DBL1H,DBL0H,19 323 j_s.d [blink] 324 bic_s DBL0H,DBL0H,DBL1H 325.Lno_shiftdown: 326 mov_s DBL0H,DBL1H 327 add.f 0,r12,r12 328 btst.eq DBL0L,0 329 cmp.eq r12,r12 330 add.cs.f DBL0L,DBL0L,1 331 j_s.d [blink] 332 add.cs DBL0H,DBL0H,1 333 .balign 4 334.Ldenorm: 335 bmsk DBL0H,DBL1H,30 336 lsr r12,DBL0H,20 337 xor_s DBL0H,DBL0H,DBL1H 338 sub_l DBL1L,r12,1 339 bgt .Lpast_denorm 340 j_s.d [blink] 341 add_l DBL0H,DBL0H,r4 342 343 .balign 4 344.Large_cancel: 345 ;DBL0L: mantissa DBL1H: sign & exponent 346 norm.f DBL1L,DBL0L 347 bmsk DBL0H,DBL1H,30 348 add_s DBL1L,DBL1L,22 349 mov.mi DBL1L,21 350 add_s r12,DBL1L,1 351 asl_s r12,r12,20 352 beq_s .Lret0 353 brhs.d DBL0H,r12,.Lpast_denorm_large_cancel 354 sub DBL0H,DBL1H,r12 355 bmsk DBL0H,DBL1H,30 356 lsr r12,DBL0H,20 357 xor_s DBL0H,DBL0H,DBL1H 358 sub.f DBL1L,r12,1 359 jle [blink] 360.Lpast_denorm_large_cancel: 361 rsub.f r7,DBL1L,32 362 lsr r7,DBL0L,r7 363 asl_s DBL0L,DBL0L,DBL1L 364 mov.ls r7,DBL0L 365 add_s DBL0H,DBL0H,r7 366 j_s.d [blink] 367 mov.ls DBL0L,0 368.Lret0: 369 j_s.d [blink] 370 mov_l DBL0H,0 371 372/* r4:DBL0L:r12 : unnormalized result fraction 373 DBL1H: result sign and exponent */ 374/* When seeing large cancellation, only the topmost guard bit might be set. */ 375 .balign 4 376.Large_cancel_sub: 377 norm.f DBL1L,DBL0L 378 bpnz.d 0f 379 bmsk DBL0H,DBL1H,30 380 mov r5,22<<20 381 bne.d 1f 382 mov_s DBL1L,21 383 bset r5,r5,5+20 384 add_s DBL1L,DBL1L,32 385 brne r12,0,1f 386 j_s.d [blink] 387 mov_l DBL0H,0 388 .balign 4 3890: add r5,DBL1L,23 390 asl r5,r5,20 391 add_s DBL1L,DBL1L,22 3921: brlo DBL0H,r5,.Ldenorm_large_cancel_sub 393 sub DBL0H,DBL1H,r5 394.Lpast_denorm_large_cancel_sub: 395 rsub.f r7,DBL1L,32 396 lsr r12,r12,r7 397 lsr r7,DBL0L,r7 398 asl_s DBL0L,DBL0L,DBL1L 399 add.ge DBL0H,DBL0H,r7 400 add_s DBL0L,DBL0L,r12 401 add.lt DBL0H,DBL0H,DBL0L 402 mov.eq DBL0L,r12 403 j_s.d [blink] 404 mov.lt DBL0L,0 405 .balign 4 406.Ldenorm_large_cancel_sub: 407 lsr r5,DBL0H,20 408 xor_s DBL0H,DBL0H,DBL1H 409 brgt.d r5,1,.Lpast_denorm_large_cancel_sub 410 sub DBL1L,r5,1 411 j_l [blink] ; denorm, no shift -> no rounding needed. 412 413/* r4: DBL0H & 0x7fffffff 414 r6: DBL1H & 0x7ff00000 415 r9: 0x7ff00000 416 r10: sign difference 417 r12: shift count (negative) */ 418 .balign 4 419.Ldbl1_gt: 420 brhs r6,r9,.Lret_dbl1 ; inf or NaN 421 neg r8,r12 422 brhs r8,32,.Large_shift_dbl0 423.Lsmall_shift_dbl0: 424 breq.d r6,0,.Ldenorm_small_shift_dbl0 425 bmsk_s DBL0H,DBL0H,19 426 bset_s DBL0H,DBL0H,20 427.Lfixed_denorm_small_shift_dbl0: 428 asl r4,DBL0H,r12 429 lsr DBL0H,DBL0H,r8 430 lsr r5,DBL0L,r8 431 asl r12,DBL0L,r12 432 brge.d r10,0,.Ladd_dbl1_gt 433 or DBL0L,r4,r5 434/* subtract, abs(DBL0) < abs(DBL1) */ 435/* DBL0H, DBL0L: fraction with explicit leading 1, shifted into place 436 DBL1H, DBL1L: original values 437 r6: orig. DBL1H & 0x7ff00000 438 r9: 0x7ff00000 439 r12: guard bits */ 440 .balign 4 441.Lrsub: 442 neg.f r12,r12 443 bmsk r7,DBL1H,19 444 mov_s r5,DBL0H 445 sbc.f DBL0L,DBL1L,DBL0L 446 bic DBL1H,DBL1H,r7 447 bset r7,r7,20 448 sbc.f r4,r7,r5 449 beq_l .Large_cancel_sub 450 norm DBL1L,r4 451 b_l .Lsub_done ; note: r6 is already set up. 452 453.Lret_dbl1: 454 mov_s DBL0H,DBL1H 455 j_s.d [blink] 456 mov_l DBL0L,DBL1L 457 .balign 4 458.Ldenorm_small_shift_dbl0: 459 sub.f r8,r8,1 460 bne.d .Lfixed_denorm_small_shift_dbl0 461 add_s r12,r12,1 462 brlt r10,0,.Lrsub 463.Ladd_dbl1_gt: ; bit 20 of DBL0H is clear and bit 0 of r12 does not matter 464 add.f DBL0L,DBL0L,DBL1L 465 add_s DBL0H,DBL0H,DBL1H 466 add.cs DBL0H,DBL0H,1 467 xor DBL1H,DBL0H,DBL1H 468 bbit0 DBL1H,20,.Lno_shiftdown_dbl1_gt 469 lsr.f DBL1H,DBL0H 470 and r4,DBL0L,2 471 bmsk DBL1H,DBL1H,18 472 sbc DBL0H,DBL0H,DBL1H 473 rrc.f DBL0L,DBL0L 474 or.f r12,r12,r4 475 cmp.eq r12,r12 476 add.cs.f DBL0L,DBL0L,1 477 bic.f 0,r9,DBL0H ; check for generating infinity with possible ... 478 jne.d [blink] ; ... non-zero fraction 479 add.cs DBL0H,DBL0H,1 480 mov_s DBL0L,0 481 bmsk DBL1H,DBL0H,19 482 j_s.d [blink] 483 bic_s DBL0H,DBL0H,DBL1H 484.Lno_shiftdown_dbl1_gt: 485 add.f 0,r12,r12 486 btst.eq DBL0L,0 487 cmp.eq r12,r12 488 add.cs.f DBL0L,DBL0L,1 489 j_s.d [blink] 490 add.cs DBL0H,DBL0H,1 491 492 .balign 4 493.Large_shift_dbl0: 494 brhs r8,55,.Lret_dbl1 495 bmsk_s DBL0H,DBL0H,19 496 brne r6,0,.Lno_denorm_large_shift_dbl0 497 add_s r12,r12,1 498 brne.d r8,33,.Lfixed_denorm_large_shift_dbl0 499 sub r8,r8,1 500 bset_s DBL0H,DBL0H,20 501.Lshift32_dbl0: 502 mov_s r12,DBL0L 503 mov_s DBL0L,DBL0H 504 brlt.d r10,0,.Lrsub 505 mov_s DBL0H,0 506 b_s .Ladd_dbl1_gt 507 508 .balign 4 509.Lno_denorm_large_shift_dbl0: 510 breq.d r8,32,.Lshift32_dbl0 511 bset_l DBL0H,DBL0H,20 512.Lfixed_denorm_large_shift_dbl0: 513 asl r4,DBL0H,r12 514 lsr r5,DBL0L,r8 515 asl.f 0,DBL0L,r12 516 lsr DBL0L,DBL0H,r8 517 or r12,r4,r5 518 tst.eq r12,1 519 or.ne r12,r12,2 520 brlt.d r10,0,.Lrsub 521 mov_s DBL0H,0 522 b_l .Ladd_dbl1_gt 523 ENDFUNC(__adddf3) 524 ENDFUNC(__subdf3) 525