1/* 32 and 64-bit millicode, original author Hewlett-Packard 2 adapted for gcc by Paul Bame <bame@debian.org> 3 and Alan Modra <alan@linuxcare.com.au>. 4 5 Copyright 2001, 2002, 2003 Free Software Foundation, Inc. 6 7 This file is part of GCC and is released under the terms of 8 of the GNU General Public License as published by the Free Software 9 Foundation; either version 2, or (at your option) any later version. 10 See the file COPYING in the top-level GCC source directory for a copy 11 of the license. */ 12 13 14#ifdef pa64 15 .level 2.0w 16#endif 17 18/* Hardware General Registers. */ 19r0: .reg %r0 20r1: .reg %r1 21r2: .reg %r2 22r3: .reg %r3 23r4: .reg %r4 24r5: .reg %r5 25r6: .reg %r6 26r7: .reg %r7 27r8: .reg %r8 28r9: .reg %r9 29r10: .reg %r10 30r11: .reg %r11 31r12: .reg %r12 32r13: .reg %r13 33r14: .reg %r14 34r15: .reg %r15 35r16: .reg %r16 36r17: .reg %r17 37r18: .reg %r18 38r19: .reg %r19 39r20: .reg %r20 40r21: .reg %r21 41r22: .reg %r22 42r23: .reg %r23 43r24: .reg %r24 44r25: .reg %r25 45r26: .reg %r26 46r27: .reg %r27 47r28: .reg %r28 48r29: .reg %r29 49r30: .reg %r30 50r31: .reg %r31 51 52/* Hardware Space Registers. */ 53sr0: .reg %sr0 54sr1: .reg %sr1 55sr2: .reg %sr2 56sr3: .reg %sr3 57sr4: .reg %sr4 58sr5: .reg %sr5 59sr6: .reg %sr6 60sr7: .reg %sr7 61 62/* Hardware Floating Point Registers. */ 63fr0: .reg %fr0 64fr1: .reg %fr1 65fr2: .reg %fr2 66fr3: .reg %fr3 67fr4: .reg %fr4 68fr5: .reg %fr5 69fr6: .reg %fr6 70fr7: .reg %fr7 71fr8: .reg %fr8 72fr9: .reg %fr9 73fr10: .reg %fr10 74fr11: .reg %fr11 75fr12: .reg %fr12 76fr13: .reg %fr13 77fr14: .reg %fr14 78fr15: .reg %fr15 79 80/* Hardware Control Registers. */ 81cr11: .reg %cr11 82sar: .reg %cr11 /* Shift Amount Register */ 83 84/* Software Architecture General Registers. */ 85rp: .reg r2 /* return pointer */ 86#ifdef pa64 87mrp: .reg r2 /* millicode return pointer */ 88#else 89mrp: .reg r31 /* millicode return pointer */ 90#endif 91ret0: .reg r28 /* return value */ 92ret1: .reg r29 /* return value (high part of double) */ 93sp: .reg r30 /* stack pointer */ 94dp: .reg r27 /* data pointer */ 95arg0: .reg r26 /* argument */ 96arg1: .reg r25 /* argument or high part of double argument */ 97arg2: .reg r24 /* argument */ 98arg3: .reg r23 /* argument or high part of double argument */ 99 100/* Software Architecture Space Registers. */ 101/* sr0 ; return link from BLE */ 102sret: .reg sr1 /* return value */ 103sarg: .reg sr1 /* argument */ 104/* sr4 ; PC SPACE tracker */ 105/* sr5 ; process private data */ 106 107/* Frame Offsets (millicode convention!) Used when calling other 108 millicode routines. Stack unwinding is dependent upon these 109 definitions. */ 110r31_slot: .equ -20 /* "current RP" slot */ 111sr0_slot: .equ -16 /* "static link" slot */ 112#if defined(pa64) 113mrp_slot: .equ -16 /* "current RP" slot */ 114psp_slot: .equ -8 /* "previous SP" slot */ 115#else 116mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */ 117#endif 118 119 120#define DEFINE(name,value)name: .EQU value 121#define RDEFINE(name,value)name: .REG value 122#ifdef milliext 123#define MILLI_BE(lbl) BE lbl(sr7,r0) 124#define MILLI_BEN(lbl) BE,n lbl(sr7,r0) 125#define MILLI_BLE(lbl) BLE lbl(sr7,r0) 126#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0) 127#define MILLIRETN BE,n 0(sr0,mrp) 128#define MILLIRET BE 0(sr0,mrp) 129#define MILLI_RETN BE,n 0(sr0,mrp) 130#define MILLI_RET BE 0(sr0,mrp) 131#else 132#define MILLI_BE(lbl) B lbl 133#define MILLI_BEN(lbl) B,n lbl 134#define MILLI_BLE(lbl) BL lbl,mrp 135#define MILLI_BLEN(lbl) BL,n lbl,mrp 136#define MILLIRETN BV,n 0(mrp) 137#define MILLIRET BV 0(mrp) 138#define MILLI_RETN BV,n 0(mrp) 139#define MILLI_RET BV 0(mrp) 140#endif 141 142#ifdef __STDC__ 143#define CAT(a,b) a##b 144#else 145#define CAT(a,b) a/**/b 146#endif 147 148#ifdef ELF 149#define SUBSPA_MILLI .section .text 150#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16 151#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16 152#define ATTR_MILLI 153#define SUBSPA_DATA .section .data 154#define ATTR_DATA 155#define GLOBAL $global$ 156#define GSYM(sym) !sym: 157#define LSYM(sym) !CAT(.L,sym:) 158#define LREF(sym) CAT(.L,sym) 159 160#else 161 162#ifdef coff 163/* This used to be .milli but since link32 places different named 164 sections in different segments millicode ends up a long ways away 165 from .text (1meg?). This way they will be a lot closer. 166 167 The SUBSPA_MILLI_* specify locality sets for certain millicode 168 modules in order to ensure that modules that call one another are 169 placed close together. Without locality sets this is unlikely to 170 happen because of the Dynamite linker library search algorithm. We 171 want these modules close together so that short calls always reach 172 (we don't want to require long calls or use long call stubs). */ 173 174#define SUBSPA_MILLI .subspa .text 175#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16 176#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16 177#define ATTR_MILLI .attr code,read,execute 178#define SUBSPA_DATA .subspa .data 179#define ATTR_DATA .attr init_data,read,write 180#define GLOBAL _gp 181#else 182#define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8 183#define SUBSPA_MILLI_DIV SUBSPA_MILLI 184#define SUBSPA_MILLI_MUL SUBSPA_MILLI 185#define ATTR_MILLI 186#define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero 187#define ATTR_DATA 188#define GLOBAL $global$ 189#endif 190#define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16 191 192#define GSYM(sym) !sym 193#define LSYM(sym) !CAT(L$,sym) 194#define LREF(sym) CAT(L$,sym) 195#endif 196 197#ifdef L_dyncall 198 SUBSPA_MILLI 199 ATTR_DATA 200GSYM($$dyncall) 201 .export $$dyncall,millicode 202 .proc 203 .callinfo millicode 204 .entry 205 bb,>=,n %r22,30,LREF(1) ; branch if not plabel address 206 depi 0,31,2,%r22 ; clear the two least significant bits 207 ldw 4(%r22),%r19 ; load new LTP value 208 ldw 0(%r22),%r22 ; load address of target 209LSYM(1) 210#if defined(LINUX) || defined(NETBSD) 211 bv %r0(%r22) ; branch to the real target 212#else 213 ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22 214 mtsp %r1,%sr0 ; move that space identifier into sr0 215 be 0(%sr0,%r22) ; branch to the real target 216#endif 217 stw %r2,-24(%r30) ; save return address into frame marker 218 .exit 219 .procend 220#endif 221 222#ifdef L_divI 223/* ROUTINES: $$divI, $$divoI 224 225 Single precision divide for signed binary integers. 226 227 The quotient is truncated towards zero. 228 The sign of the quotient is the XOR of the signs of the dividend and 229 divisor. 230 Divide by zero is trapped. 231 Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI. 232 233 INPUT REGISTERS: 234 . arg0 == dividend 235 . arg1 == divisor 236 . mrp == return pc 237 . sr0 == return space when called externally 238 239 OUTPUT REGISTERS: 240 . arg0 = undefined 241 . arg1 = undefined 242 . ret1 = quotient 243 244 OTHER REGISTERS AFFECTED: 245 . r1 = undefined 246 247 SIDE EFFECTS: 248 . Causes a trap under the following conditions: 249 . divisor is zero (traps with ADDIT,= 0,25,0) 250 . dividend==-2**31 and divisor==-1 and routine is $$divoI 251 . (traps with ADDO 26,25,0) 252 . Changes memory at the following places: 253 . NONE 254 255 PERMISSIBLE CONTEXT: 256 . Unwindable. 257 . Suitable for internal or external millicode. 258 . Assumes the special millicode register conventions. 259 260 DISCUSSION: 261 . Branchs to other millicode routines using BE 262 . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15 263 . 264 . For selected divisors, calls a divide by constant routine written by 265 . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13. 266 . 267 . The only overflow case is -2**31 divided by -1. 268 . Both routines return -2**31 but only $$divoI traps. */ 269 270RDEFINE(temp,r1) 271RDEFINE(retreg,ret1) /* r29 */ 272RDEFINE(temp1,arg0) 273 SUBSPA_MILLI_DIV 274 ATTR_MILLI 275 .import $$divI_2,millicode 276 .import $$divI_3,millicode 277 .import $$divI_4,millicode 278 .import $$divI_5,millicode 279 .import $$divI_6,millicode 280 .import $$divI_7,millicode 281 .import $$divI_8,millicode 282 .import $$divI_9,millicode 283 .import $$divI_10,millicode 284 .import $$divI_12,millicode 285 .import $$divI_14,millicode 286 .import $$divI_15,millicode 287 .export $$divI,millicode 288 .export $$divoI,millicode 289 .proc 290 .callinfo millicode 291 .entry 292GSYM($$divoI) 293 comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */ 294GSYM($$divI) 295 ldo -1(arg1),temp /* is there at most one bit set ? */ 296 and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */ 297 addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */ 298 b,n LREF(neg_denom) 299LSYM(pow2) 300 addi,>= 0,arg0,retreg /* if numerator is negative, add the */ 301 add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */ 302 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ 303 extrs retreg,15,16,retreg /* retreg = retreg >> 16 */ 304 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ 305 ldi 0xcc,temp1 /* setup 0xcc in temp1 */ 306 extru,= arg1,23,8,temp /* test denominator with 0xff00 */ 307 extrs retreg,23,24,retreg /* retreg = retreg >> 8 */ 308 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ 309 ldi 0xaa,temp /* setup 0xaa in temp */ 310 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ 311 extrs retreg,27,28,retreg /* retreg = retreg >> 4 */ 312 and,= arg1,temp1,r0 /* test denominator with 0xcc */ 313 extrs retreg,29,30,retreg /* retreg = retreg >> 2 */ 314 and,= arg1,temp,r0 /* test denominator with 0xaa */ 315 extrs retreg,30,31,retreg /* retreg = retreg >> 1 */ 316 MILLIRETN 317LSYM(neg_denom) 318 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */ 319 b,n LREF(regular_seq) 320 sub r0,arg1,temp /* make denominator positive */ 321 comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */ 322 ldo -1(temp),retreg /* is there at most one bit set ? */ 323 and,= temp,retreg,r0 /* if so, the denominator is power of 2 */ 324 b,n LREF(regular_seq) 325 sub r0,arg0,retreg /* negate numerator */ 326 comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */ 327 copy retreg,arg0 /* set up arg0, arg1 and temp */ 328 copy temp,arg1 /* before branching to pow2 */ 329 b LREF(pow2) 330 ldo -1(arg1),temp 331LSYM(regular_seq) 332 comib,>>=,n 15,arg1,LREF(small_divisor) 333 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ 334LSYM(normal) 335 subi 0,retreg,retreg /* make it positive */ 336 sub 0,arg1,temp /* clear carry, */ 337 /* negate the divisor */ 338 ds 0,temp,0 /* set V-bit to the comple- */ 339 /* ment of the divisor sign */ 340 add retreg,retreg,retreg /* shift msb bit into carry */ 341 ds r0,arg1,temp /* 1st divide step, if no carry */ 342 addc retreg,retreg,retreg /* shift retreg with/into carry */ 343 ds temp,arg1,temp /* 2nd divide step */ 344 addc retreg,retreg,retreg /* shift retreg with/into carry */ 345 ds temp,arg1,temp /* 3rd divide step */ 346 addc retreg,retreg,retreg /* shift retreg with/into carry */ 347 ds temp,arg1,temp /* 4th divide step */ 348 addc retreg,retreg,retreg /* shift retreg with/into carry */ 349 ds temp,arg1,temp /* 5th divide step */ 350 addc retreg,retreg,retreg /* shift retreg with/into carry */ 351 ds temp,arg1,temp /* 6th divide step */ 352 addc retreg,retreg,retreg /* shift retreg with/into carry */ 353 ds temp,arg1,temp /* 7th divide step */ 354 addc retreg,retreg,retreg /* shift retreg with/into carry */ 355 ds temp,arg1,temp /* 8th divide step */ 356 addc retreg,retreg,retreg /* shift retreg with/into carry */ 357 ds temp,arg1,temp /* 9th divide step */ 358 addc retreg,retreg,retreg /* shift retreg with/into carry */ 359 ds temp,arg1,temp /* 10th divide step */ 360 addc retreg,retreg,retreg /* shift retreg with/into carry */ 361 ds temp,arg1,temp /* 11th divide step */ 362 addc retreg,retreg,retreg /* shift retreg with/into carry */ 363 ds temp,arg1,temp /* 12th divide step */ 364 addc retreg,retreg,retreg /* shift retreg with/into carry */ 365 ds temp,arg1,temp /* 13th divide step */ 366 addc retreg,retreg,retreg /* shift retreg with/into carry */ 367 ds temp,arg1,temp /* 14th divide step */ 368 addc retreg,retreg,retreg /* shift retreg with/into carry */ 369 ds temp,arg1,temp /* 15th divide step */ 370 addc retreg,retreg,retreg /* shift retreg with/into carry */ 371 ds temp,arg1,temp /* 16th divide step */ 372 addc retreg,retreg,retreg /* shift retreg with/into carry */ 373 ds temp,arg1,temp /* 17th divide step */ 374 addc retreg,retreg,retreg /* shift retreg with/into carry */ 375 ds temp,arg1,temp /* 18th divide step */ 376 addc retreg,retreg,retreg /* shift retreg with/into carry */ 377 ds temp,arg1,temp /* 19th divide step */ 378 addc retreg,retreg,retreg /* shift retreg with/into carry */ 379 ds temp,arg1,temp /* 20th divide step */ 380 addc retreg,retreg,retreg /* shift retreg with/into carry */ 381 ds temp,arg1,temp /* 21st divide step */ 382 addc retreg,retreg,retreg /* shift retreg with/into carry */ 383 ds temp,arg1,temp /* 22nd divide step */ 384 addc retreg,retreg,retreg /* shift retreg with/into carry */ 385 ds temp,arg1,temp /* 23rd divide step */ 386 addc retreg,retreg,retreg /* shift retreg with/into carry */ 387 ds temp,arg1,temp /* 24th divide step */ 388 addc retreg,retreg,retreg /* shift retreg with/into carry */ 389 ds temp,arg1,temp /* 25th divide step */ 390 addc retreg,retreg,retreg /* shift retreg with/into carry */ 391 ds temp,arg1,temp /* 26th divide step */ 392 addc retreg,retreg,retreg /* shift retreg with/into carry */ 393 ds temp,arg1,temp /* 27th divide step */ 394 addc retreg,retreg,retreg /* shift retreg with/into carry */ 395 ds temp,arg1,temp /* 28th divide step */ 396 addc retreg,retreg,retreg /* shift retreg with/into carry */ 397 ds temp,arg1,temp /* 29th divide step */ 398 addc retreg,retreg,retreg /* shift retreg with/into carry */ 399 ds temp,arg1,temp /* 30th divide step */ 400 addc retreg,retreg,retreg /* shift retreg with/into carry */ 401 ds temp,arg1,temp /* 31st divide step */ 402 addc retreg,retreg,retreg /* shift retreg with/into carry */ 403 ds temp,arg1,temp /* 32nd divide step, */ 404 addc retreg,retreg,retreg /* shift last retreg bit into retreg */ 405 xor,>= arg0,arg1,0 /* get correct sign of quotient */ 406 sub 0,retreg,retreg /* based on operand signs */ 407 MILLIRETN 408 nop 409 410LSYM(small_divisor) 411 412#if defined(pa64) 413/* Clear the upper 32 bits of the arg1 register. We are working with */ 414/* small divisors (and 32 bit integers) We must not be mislead */ 415/* by "1" bits left in the upper 32 bits. */ 416 depd %r0,31,32,%r25 417#endif 418 blr,n arg1,r0 419 nop 420/* table for divisor == 0,1, ... ,15 */ 421 addit,= 0,arg1,r0 /* trap if divisor == 0 */ 422 nop 423 MILLIRET /* divisor == 1 */ 424 copy arg0,retreg 425 MILLI_BEN($$divI_2) /* divisor == 2 */ 426 nop 427 MILLI_BEN($$divI_3) /* divisor == 3 */ 428 nop 429 MILLI_BEN($$divI_4) /* divisor == 4 */ 430 nop 431 MILLI_BEN($$divI_5) /* divisor == 5 */ 432 nop 433 MILLI_BEN($$divI_6) /* divisor == 6 */ 434 nop 435 MILLI_BEN($$divI_7) /* divisor == 7 */ 436 nop 437 MILLI_BEN($$divI_8) /* divisor == 8 */ 438 nop 439 MILLI_BEN($$divI_9) /* divisor == 9 */ 440 nop 441 MILLI_BEN($$divI_10) /* divisor == 10 */ 442 nop 443 b LREF(normal) /* divisor == 11 */ 444 add,>= 0,arg0,retreg 445 MILLI_BEN($$divI_12) /* divisor == 12 */ 446 nop 447 b LREF(normal) /* divisor == 13 */ 448 add,>= 0,arg0,retreg 449 MILLI_BEN($$divI_14) /* divisor == 14 */ 450 nop 451 MILLI_BEN($$divI_15) /* divisor == 15 */ 452 nop 453 454LSYM(negative1) 455 sub 0,arg0,retreg /* result is negation of dividend */ 456 MILLIRET 457 addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */ 458 .exit 459 .procend 460 .end 461#endif 462 463#ifdef L_divU 464/* ROUTINE: $$divU 465 . 466 . Single precision divide for unsigned integers. 467 . 468 . Quotient is truncated towards zero. 469 . Traps on divide by zero. 470 471 INPUT REGISTERS: 472 . arg0 == dividend 473 . arg1 == divisor 474 . mrp == return pc 475 . sr0 == return space when called externally 476 477 OUTPUT REGISTERS: 478 . arg0 = undefined 479 . arg1 = undefined 480 . ret1 = quotient 481 482 OTHER REGISTERS AFFECTED: 483 . r1 = undefined 484 485 SIDE EFFECTS: 486 . Causes a trap under the following conditions: 487 . divisor is zero 488 . Changes memory at the following places: 489 . NONE 490 491 PERMISSIBLE CONTEXT: 492 . Unwindable. 493 . Does not create a stack frame. 494 . Suitable for internal or external millicode. 495 . Assumes the special millicode register conventions. 496 497 DISCUSSION: 498 . Branchs to other millicode routines using BE: 499 . $$divU_# for 3,5,6,7,9,10,12,14,15 500 . 501 . For selected small divisors calls the special divide by constant 502 . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */ 503 504RDEFINE(temp,r1) 505RDEFINE(retreg,ret1) /* r29 */ 506RDEFINE(temp1,arg0) 507 SUBSPA_MILLI_DIV 508 ATTR_MILLI 509 .export $$divU,millicode 510 .import $$divU_3,millicode 511 .import $$divU_5,millicode 512 .import $$divU_6,millicode 513 .import $$divU_7,millicode 514 .import $$divU_9,millicode 515 .import $$divU_10,millicode 516 .import $$divU_12,millicode 517 .import $$divU_14,millicode 518 .import $$divU_15,millicode 519 .proc 520 .callinfo millicode 521 .entry 522GSYM($$divU) 523/* The subtract is not nullified since it does no harm and can be used 524 by the two cases that branch back to "normal". */ 525 ldo -1(arg1),temp /* is there at most one bit set ? */ 526 and,= arg1,temp,r0 /* if so, denominator is power of 2 */ 527 b LREF(regular_seq) 528 addit,= 0,arg1,0 /* trap for zero dvr */ 529 copy arg0,retreg 530 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ 531 extru retreg,15,16,retreg /* retreg = retreg >> 16 */ 532 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ 533 ldi 0xcc,temp1 /* setup 0xcc in temp1 */ 534 extru,= arg1,23,8,temp /* test denominator with 0xff00 */ 535 extru retreg,23,24,retreg /* retreg = retreg >> 8 */ 536 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ 537 ldi 0xaa,temp /* setup 0xaa in temp */ 538 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ 539 extru retreg,27,28,retreg /* retreg = retreg >> 4 */ 540 and,= arg1,temp1,r0 /* test denominator with 0xcc */ 541 extru retreg,29,30,retreg /* retreg = retreg >> 2 */ 542 and,= arg1,temp,r0 /* test denominator with 0xaa */ 543 extru retreg,30,31,retreg /* retreg = retreg >> 1 */ 544 MILLIRETN 545 nop 546LSYM(regular_seq) 547 comib,>= 15,arg1,LREF(special_divisor) 548 subi 0,arg1,temp /* clear carry, negate the divisor */ 549 ds r0,temp,r0 /* set V-bit to 1 */ 550LSYM(normal) 551 add arg0,arg0,retreg /* shift msb bit into carry */ 552 ds r0,arg1,temp /* 1st divide step, if no carry */ 553 addc retreg,retreg,retreg /* shift retreg with/into carry */ 554 ds temp,arg1,temp /* 2nd divide step */ 555 addc retreg,retreg,retreg /* shift retreg with/into carry */ 556 ds temp,arg1,temp /* 3rd divide step */ 557 addc retreg,retreg,retreg /* shift retreg with/into carry */ 558 ds temp,arg1,temp /* 4th divide step */ 559 addc retreg,retreg,retreg /* shift retreg with/into carry */ 560 ds temp,arg1,temp /* 5th divide step */ 561 addc retreg,retreg,retreg /* shift retreg with/into carry */ 562 ds temp,arg1,temp /* 6th divide step */ 563 addc retreg,retreg,retreg /* shift retreg with/into carry */ 564 ds temp,arg1,temp /* 7th divide step */ 565 addc retreg,retreg,retreg /* shift retreg with/into carry */ 566 ds temp,arg1,temp /* 8th divide step */ 567 addc retreg,retreg,retreg /* shift retreg with/into carry */ 568 ds temp,arg1,temp /* 9th divide step */ 569 addc retreg,retreg,retreg /* shift retreg with/into carry */ 570 ds temp,arg1,temp /* 10th divide step */ 571 addc retreg,retreg,retreg /* shift retreg with/into carry */ 572 ds temp,arg1,temp /* 11th divide step */ 573 addc retreg,retreg,retreg /* shift retreg with/into carry */ 574 ds temp,arg1,temp /* 12th divide step */ 575 addc retreg,retreg,retreg /* shift retreg with/into carry */ 576 ds temp,arg1,temp /* 13th divide step */ 577 addc retreg,retreg,retreg /* shift retreg with/into carry */ 578 ds temp,arg1,temp /* 14th divide step */ 579 addc retreg,retreg,retreg /* shift retreg with/into carry */ 580 ds temp,arg1,temp /* 15th divide step */ 581 addc retreg,retreg,retreg /* shift retreg with/into carry */ 582 ds temp,arg1,temp /* 16th divide step */ 583 addc retreg,retreg,retreg /* shift retreg with/into carry */ 584 ds temp,arg1,temp /* 17th divide step */ 585 addc retreg,retreg,retreg /* shift retreg with/into carry */ 586 ds temp,arg1,temp /* 18th divide step */ 587 addc retreg,retreg,retreg /* shift retreg with/into carry */ 588 ds temp,arg1,temp /* 19th divide step */ 589 addc retreg,retreg,retreg /* shift retreg with/into carry */ 590 ds temp,arg1,temp /* 20th divide step */ 591 addc retreg,retreg,retreg /* shift retreg with/into carry */ 592 ds temp,arg1,temp /* 21st divide step */ 593 addc retreg,retreg,retreg /* shift retreg with/into carry */ 594 ds temp,arg1,temp /* 22nd divide step */ 595 addc retreg,retreg,retreg /* shift retreg with/into carry */ 596 ds temp,arg1,temp /* 23rd divide step */ 597 addc retreg,retreg,retreg /* shift retreg with/into carry */ 598 ds temp,arg1,temp /* 24th divide step */ 599 addc retreg,retreg,retreg /* shift retreg with/into carry */ 600 ds temp,arg1,temp /* 25th divide step */ 601 addc retreg,retreg,retreg /* shift retreg with/into carry */ 602 ds temp,arg1,temp /* 26th divide step */ 603 addc retreg,retreg,retreg /* shift retreg with/into carry */ 604 ds temp,arg1,temp /* 27th divide step */ 605 addc retreg,retreg,retreg /* shift retreg with/into carry */ 606 ds temp,arg1,temp /* 28th divide step */ 607 addc retreg,retreg,retreg /* shift retreg with/into carry */ 608 ds temp,arg1,temp /* 29th divide step */ 609 addc retreg,retreg,retreg /* shift retreg with/into carry */ 610 ds temp,arg1,temp /* 30th divide step */ 611 addc retreg,retreg,retreg /* shift retreg with/into carry */ 612 ds temp,arg1,temp /* 31st divide step */ 613 addc retreg,retreg,retreg /* shift retreg with/into carry */ 614 ds temp,arg1,temp /* 32nd divide step, */ 615 MILLIRET 616 addc retreg,retreg,retreg /* shift last retreg bit into retreg */ 617 618/* Handle the cases where divisor is a small constant or has high bit on. */ 619LSYM(special_divisor) 620/* blr arg1,r0 */ 621/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */ 622 623/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from 624 generating such a blr, comib sequence. A problem in nullification. So I 625 rewrote this code. */ 626 627#if defined(pa64) 628/* Clear the upper 32 bits of the arg1 register. We are working with 629 small divisors (and 32 bit unsigned integers) We must not be mislead 630 by "1" bits left in the upper 32 bits. */ 631 depd %r0,31,32,%r25 632#endif 633 comib,> 0,arg1,LREF(big_divisor) 634 nop 635 blr arg1,r0 636 nop 637 638LSYM(zero_divisor) /* this label is here to provide external visibility */ 639 addit,= 0,arg1,0 /* trap for zero dvr */ 640 nop 641 MILLIRET /* divisor == 1 */ 642 copy arg0,retreg 643 MILLIRET /* divisor == 2 */ 644 extru arg0,30,31,retreg 645 MILLI_BEN($$divU_3) /* divisor == 3 */ 646 nop 647 MILLIRET /* divisor == 4 */ 648 extru arg0,29,30,retreg 649 MILLI_BEN($$divU_5) /* divisor == 5 */ 650 nop 651 MILLI_BEN($$divU_6) /* divisor == 6 */ 652 nop 653 MILLI_BEN($$divU_7) /* divisor == 7 */ 654 nop 655 MILLIRET /* divisor == 8 */ 656 extru arg0,28,29,retreg 657 MILLI_BEN($$divU_9) /* divisor == 9 */ 658 nop 659 MILLI_BEN($$divU_10) /* divisor == 10 */ 660 nop 661 b LREF(normal) /* divisor == 11 */ 662 ds r0,temp,r0 /* set V-bit to 1 */ 663 MILLI_BEN($$divU_12) /* divisor == 12 */ 664 nop 665 b LREF(normal) /* divisor == 13 */ 666 ds r0,temp,r0 /* set V-bit to 1 */ 667 MILLI_BEN($$divU_14) /* divisor == 14 */ 668 nop 669 MILLI_BEN($$divU_15) /* divisor == 15 */ 670 nop 671 672/* Handle the case where the high bit is on in the divisor. 673 Compute: if( dividend>=divisor) quotient=1; else quotient=0; 674 Note: dividend>==divisor iff dividend-divisor does not borrow 675 and not borrow iff carry. */ 676LSYM(big_divisor) 677 sub arg0,arg1,r0 678 MILLIRET 679 addc r0,r0,retreg 680 .exit 681 .procend 682 .end 683#endif 684 685#ifdef L_remI 686/* ROUTINE: $$remI 687 688 DESCRIPTION: 689 . $$remI returns the remainder of the division of two signed 32-bit 690 . integers. The sign of the remainder is the same as the sign of 691 . the dividend. 692 693 694 INPUT REGISTERS: 695 . arg0 == dividend 696 . arg1 == divisor 697 . mrp == return pc 698 . sr0 == return space when called externally 699 700 OUTPUT REGISTERS: 701 . arg0 = destroyed 702 . arg1 = destroyed 703 . ret1 = remainder 704 705 OTHER REGISTERS AFFECTED: 706 . r1 = undefined 707 708 SIDE EFFECTS: 709 . Causes a trap under the following conditions: DIVIDE BY ZERO 710 . Changes memory at the following places: NONE 711 712 PERMISSIBLE CONTEXT: 713 . Unwindable 714 . Does not create a stack frame 715 . Is usable for internal or external microcode 716 717 DISCUSSION: 718 . Calls other millicode routines via mrp: NONE 719 . Calls other millicode routines: NONE */ 720 721RDEFINE(tmp,r1) 722RDEFINE(retreg,ret1) 723 724 SUBSPA_MILLI 725 ATTR_MILLI 726 .proc 727 .callinfo millicode 728 .entry 729GSYM($$remI) 730GSYM($$remoI) 731 .export $$remI,MILLICODE 732 .export $$remoI,MILLICODE 733 ldo -1(arg1),tmp /* is there at most one bit set ? */ 734 and,<> arg1,tmp,r0 /* if not, don't use power of 2 */ 735 addi,> 0,arg1,r0 /* if denominator > 0, use power */ 736 /* of 2 */ 737 b,n LREF(neg_denom) 738LSYM(pow2) 739 comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */ 740 and arg0,tmp,retreg /* get the result */ 741 MILLIRETN 742LSYM(neg_num) 743 subi 0,arg0,arg0 /* negate numerator */ 744 and arg0,tmp,retreg /* get the result */ 745 subi 0,retreg,retreg /* negate result */ 746 MILLIRETN 747LSYM(neg_denom) 748 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */ 749 /* of 2 */ 750 b,n LREF(regular_seq) 751 sub r0,arg1,tmp /* make denominator positive */ 752 comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */ 753 ldo -1(tmp),retreg /* is there at most one bit set ? */ 754 and,= tmp,retreg,r0 /* if not, go to regular_seq */ 755 b,n LREF(regular_seq) 756 comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */ 757 and arg0,retreg,retreg 758 MILLIRETN 759LSYM(neg_num_2) 760 subi 0,arg0,tmp /* test against 0x80000000 */ 761 and tmp,retreg,retreg 762 subi 0,retreg,retreg 763 MILLIRETN 764LSYM(regular_seq) 765 addit,= 0,arg1,0 /* trap if div by zero */ 766 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ 767 sub 0,retreg,retreg /* make it positive */ 768 sub 0,arg1, tmp /* clear carry, */ 769 /* negate the divisor */ 770 ds 0, tmp,0 /* set V-bit to the comple- */ 771 /* ment of the divisor sign */ 772 or 0,0, tmp /* clear tmp */ 773 add retreg,retreg,retreg /* shift msb bit into carry */ 774 ds tmp,arg1, tmp /* 1st divide step, if no carry */ 775 /* out, msb of quotient = 0 */ 776 addc retreg,retreg,retreg /* shift retreg with/into carry */ 777LSYM(t1) 778 ds tmp,arg1, tmp /* 2nd divide step */ 779 addc retreg,retreg,retreg /* shift retreg with/into carry */ 780 ds tmp,arg1, tmp /* 3rd divide step */ 781 addc retreg,retreg,retreg /* shift retreg with/into carry */ 782 ds tmp,arg1, tmp /* 4th divide step */ 783 addc retreg,retreg,retreg /* shift retreg with/into carry */ 784 ds tmp,arg1, tmp /* 5th divide step */ 785 addc retreg,retreg,retreg /* shift retreg with/into carry */ 786 ds tmp,arg1, tmp /* 6th divide step */ 787 addc retreg,retreg,retreg /* shift retreg with/into carry */ 788 ds tmp,arg1, tmp /* 7th divide step */ 789 addc retreg,retreg,retreg /* shift retreg with/into carry */ 790 ds tmp,arg1, tmp /* 8th divide step */ 791 addc retreg,retreg,retreg /* shift retreg with/into carry */ 792 ds tmp,arg1, tmp /* 9th divide step */ 793 addc retreg,retreg,retreg /* shift retreg with/into carry */ 794 ds tmp,arg1, tmp /* 10th divide step */ 795 addc retreg,retreg,retreg /* shift retreg with/into carry */ 796 ds tmp,arg1, tmp /* 11th divide step */ 797 addc retreg,retreg,retreg /* shift retreg with/into carry */ 798 ds tmp,arg1, tmp /* 12th divide step */ 799 addc retreg,retreg,retreg /* shift retreg with/into carry */ 800 ds tmp,arg1, tmp /* 13th divide step */ 801 addc retreg,retreg,retreg /* shift retreg with/into carry */ 802 ds tmp,arg1, tmp /* 14th divide step */ 803 addc retreg,retreg,retreg /* shift retreg with/into carry */ 804 ds tmp,arg1, tmp /* 15th divide step */ 805 addc retreg,retreg,retreg /* shift retreg with/into carry */ 806 ds tmp,arg1, tmp /* 16th divide step */ 807 addc retreg,retreg,retreg /* shift retreg with/into carry */ 808 ds tmp,arg1, tmp /* 17th divide step */ 809 addc retreg,retreg,retreg /* shift retreg with/into carry */ 810 ds tmp,arg1, tmp /* 18th divide step */ 811 addc retreg,retreg,retreg /* shift retreg with/into carry */ 812 ds tmp,arg1, tmp /* 19th divide step */ 813 addc retreg,retreg,retreg /* shift retreg with/into carry */ 814 ds tmp,arg1, tmp /* 20th divide step */ 815 addc retreg,retreg,retreg /* shift retreg with/into carry */ 816 ds tmp,arg1, tmp /* 21st divide step */ 817 addc retreg,retreg,retreg /* shift retreg with/into carry */ 818 ds tmp,arg1, tmp /* 22nd divide step */ 819 addc retreg,retreg,retreg /* shift retreg with/into carry */ 820 ds tmp,arg1, tmp /* 23rd divide step */ 821 addc retreg,retreg,retreg /* shift retreg with/into carry */ 822 ds tmp,arg1, tmp /* 24th divide step */ 823 addc retreg,retreg,retreg /* shift retreg with/into carry */ 824 ds tmp,arg1, tmp /* 25th divide step */ 825 addc retreg,retreg,retreg /* shift retreg with/into carry */ 826 ds tmp,arg1, tmp /* 26th divide step */ 827 addc retreg,retreg,retreg /* shift retreg with/into carry */ 828 ds tmp,arg1, tmp /* 27th divide step */ 829 addc retreg,retreg,retreg /* shift retreg with/into carry */ 830 ds tmp,arg1, tmp /* 28th divide step */ 831 addc retreg,retreg,retreg /* shift retreg with/into carry */ 832 ds tmp,arg1, tmp /* 29th divide step */ 833 addc retreg,retreg,retreg /* shift retreg with/into carry */ 834 ds tmp,arg1, tmp /* 30th divide step */ 835 addc retreg,retreg,retreg /* shift retreg with/into carry */ 836 ds tmp,arg1, tmp /* 31st divide step */ 837 addc retreg,retreg,retreg /* shift retreg with/into carry */ 838 ds tmp,arg1, tmp /* 32nd divide step, */ 839 addc retreg,retreg,retreg /* shift last bit into retreg */ 840 movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */ 841 add,< arg1,0,0 /* if arg1 > 0, add arg1 */ 842 add,tr tmp,arg1,retreg /* for correcting remainder tmp */ 843 sub tmp,arg1,retreg /* else add absolute value arg1 */ 844LSYM(finish) 845 add,>= arg0,0,0 /* set sign of remainder */ 846 sub 0,retreg,retreg /* to sign of dividend */ 847 MILLIRET 848 nop 849 .exit 850 .procend 851#ifdef milliext 852 .origin 0x00000200 853#endif 854 .end 855#endif 856 857#ifdef L_remU 858/* ROUTINE: $$remU 859 . Single precision divide for remainder with unsigned binary integers. 860 . 861 . The remainder must be dividend-(dividend/divisor)*divisor. 862 . Divide by zero is trapped. 863 864 INPUT REGISTERS: 865 . arg0 == dividend 866 . arg1 == divisor 867 . mrp == return pc 868 . sr0 == return space when called externally 869 870 OUTPUT REGISTERS: 871 . arg0 = undefined 872 . arg1 = undefined 873 . ret1 = remainder 874 875 OTHER REGISTERS AFFECTED: 876 . r1 = undefined 877 878 SIDE EFFECTS: 879 . Causes a trap under the following conditions: DIVIDE BY ZERO 880 . Changes memory at the following places: NONE 881 882 PERMISSIBLE CONTEXT: 883 . Unwindable. 884 . Does not create a stack frame. 885 . Suitable for internal or external millicode. 886 . Assumes the special millicode register conventions. 887 888 DISCUSSION: 889 . Calls other millicode routines using mrp: NONE 890 . Calls other millicode routines: NONE */ 891 892 893RDEFINE(temp,r1) 894RDEFINE(rmndr,ret1) /* r29 */ 895 SUBSPA_MILLI 896 ATTR_MILLI 897 .export $$remU,millicode 898 .proc 899 .callinfo millicode 900 .entry 901GSYM($$remU) 902 ldo -1(arg1),temp /* is there at most one bit set ? */ 903 and,= arg1,temp,r0 /* if not, don't use power of 2 */ 904 b LREF(regular_seq) 905 addit,= 0,arg1,r0 /* trap on div by zero */ 906 and arg0,temp,rmndr /* get the result for power of 2 */ 907 MILLIRETN 908LSYM(regular_seq) 909 comib,>=,n 0,arg1,LREF(special_case) 910 subi 0,arg1,rmndr /* clear carry, negate the divisor */ 911 ds r0,rmndr,r0 /* set V-bit to 1 */ 912 add arg0,arg0,temp /* shift msb bit into carry */ 913 ds r0,arg1,rmndr /* 1st divide step, if no carry */ 914 addc temp,temp,temp /* shift temp with/into carry */ 915 ds rmndr,arg1,rmndr /* 2nd divide step */ 916 addc temp,temp,temp /* shift temp with/into carry */ 917 ds rmndr,arg1,rmndr /* 3rd divide step */ 918 addc temp,temp,temp /* shift temp with/into carry */ 919 ds rmndr,arg1,rmndr /* 4th divide step */ 920 addc temp,temp,temp /* shift temp with/into carry */ 921 ds rmndr,arg1,rmndr /* 5th divide step */ 922 addc temp,temp,temp /* shift temp with/into carry */ 923 ds rmndr,arg1,rmndr /* 6th divide step */ 924 addc temp,temp,temp /* shift temp with/into carry */ 925 ds rmndr,arg1,rmndr /* 7th divide step */ 926 addc temp,temp,temp /* shift temp with/into carry */ 927 ds rmndr,arg1,rmndr /* 8th divide step */ 928 addc temp,temp,temp /* shift temp with/into carry */ 929 ds rmndr,arg1,rmndr /* 9th divide step */ 930 addc temp,temp,temp /* shift temp with/into carry */ 931 ds rmndr,arg1,rmndr /* 10th divide step */ 932 addc temp,temp,temp /* shift temp with/into carry */ 933 ds rmndr,arg1,rmndr /* 11th divide step */ 934 addc temp,temp,temp /* shift temp with/into carry */ 935 ds rmndr,arg1,rmndr /* 12th divide step */ 936 addc temp,temp,temp /* shift temp with/into carry */ 937 ds rmndr,arg1,rmndr /* 13th divide step */ 938 addc temp,temp,temp /* shift temp with/into carry */ 939 ds rmndr,arg1,rmndr /* 14th divide step */ 940 addc temp,temp,temp /* shift temp with/into carry */ 941 ds rmndr,arg1,rmndr /* 15th divide step */ 942 addc temp,temp,temp /* shift temp with/into carry */ 943 ds rmndr,arg1,rmndr /* 16th divide step */ 944 addc temp,temp,temp /* shift temp with/into carry */ 945 ds rmndr,arg1,rmndr /* 17th divide step */ 946 addc temp,temp,temp /* shift temp with/into carry */ 947 ds rmndr,arg1,rmndr /* 18th divide step */ 948 addc temp,temp,temp /* shift temp with/into carry */ 949 ds rmndr,arg1,rmndr /* 19th divide step */ 950 addc temp,temp,temp /* shift temp with/into carry */ 951 ds rmndr,arg1,rmndr /* 20th divide step */ 952 addc temp,temp,temp /* shift temp with/into carry */ 953 ds rmndr,arg1,rmndr /* 21st divide step */ 954 addc temp,temp,temp /* shift temp with/into carry */ 955 ds rmndr,arg1,rmndr /* 22nd divide step */ 956 addc temp,temp,temp /* shift temp with/into carry */ 957 ds rmndr,arg1,rmndr /* 23rd divide step */ 958 addc temp,temp,temp /* shift temp with/into carry */ 959 ds rmndr,arg1,rmndr /* 24th divide step */ 960 addc temp,temp,temp /* shift temp with/into carry */ 961 ds rmndr,arg1,rmndr /* 25th divide step */ 962 addc temp,temp,temp /* shift temp with/into carry */ 963 ds rmndr,arg1,rmndr /* 26th divide step */ 964 addc temp,temp,temp /* shift temp with/into carry */ 965 ds rmndr,arg1,rmndr /* 27th divide step */ 966 addc temp,temp,temp /* shift temp with/into carry */ 967 ds rmndr,arg1,rmndr /* 28th divide step */ 968 addc temp,temp,temp /* shift temp with/into carry */ 969 ds rmndr,arg1,rmndr /* 29th divide step */ 970 addc temp,temp,temp /* shift temp with/into carry */ 971 ds rmndr,arg1,rmndr /* 30th divide step */ 972 addc temp,temp,temp /* shift temp with/into carry */ 973 ds rmndr,arg1,rmndr /* 31st divide step */ 974 addc temp,temp,temp /* shift temp with/into carry */ 975 ds rmndr,arg1,rmndr /* 32nd divide step, */ 976 comiclr,<= 0,rmndr,r0 977 add rmndr,arg1,rmndr /* correction */ 978 MILLIRETN 979 nop 980 981/* Putting >= on the last DS and deleting COMICLR does not work! */ 982LSYM(special_case) 983 sub,>>= arg0,arg1,rmndr 984 copy arg0,rmndr 985 MILLIRETN 986 nop 987 .exit 988 .procend 989 .end 990#endif 991 992#ifdef L_div_const 993/* ROUTINE: $$divI_2 994 . $$divI_3 $$divU_3 995 . $$divI_4 996 . $$divI_5 $$divU_5 997 . $$divI_6 $$divU_6 998 . $$divI_7 $$divU_7 999 . $$divI_8 1000 . $$divI_9 $$divU_9 1001 . $$divI_10 $$divU_10 1002 . 1003 . $$divI_12 $$divU_12 1004 . 1005 . $$divI_14 $$divU_14 1006 . $$divI_15 $$divU_15 1007 . $$divI_16 1008 . $$divI_17 $$divU_17 1009 . 1010 . Divide by selected constants for single precision binary integers. 1011 1012 INPUT REGISTERS: 1013 . arg0 == dividend 1014 . mrp == return pc 1015 . sr0 == return space when called externally 1016 1017 OUTPUT REGISTERS: 1018 . arg0 = undefined 1019 . arg1 = undefined 1020 . ret1 = quotient 1021 1022 OTHER REGISTERS AFFECTED: 1023 . r1 = undefined 1024 1025 SIDE EFFECTS: 1026 . Causes a trap under the following conditions: NONE 1027 . Changes memory at the following places: NONE 1028 1029 PERMISSIBLE CONTEXT: 1030 . Unwindable. 1031 . Does not create a stack frame. 1032 . Suitable for internal or external millicode. 1033 . Assumes the special millicode register conventions. 1034 1035 DISCUSSION: 1036 . Calls other millicode routines using mrp: NONE 1037 . Calls other millicode routines: NONE */ 1038 1039 1040/* TRUNCATED DIVISION BY SMALL INTEGERS 1041 1042 We are interested in q(x) = floor(x/y), where x >= 0 and y > 0 1043 (with y fixed). 1044 1045 Let a = floor(z/y), for some choice of z. Note that z will be 1046 chosen so that division by z is cheap. 1047 1048 Let r be the remainder(z/y). In other words, r = z - ay. 1049 1050 Now, our method is to choose a value for b such that 1051 1052 q'(x) = floor((ax+b)/z) 1053 1054 is equal to q(x) over as large a range of x as possible. If the 1055 two are equal over a sufficiently large range, and if it is easy to 1056 form the product (ax), and it is easy to divide by z, then we can 1057 perform the division much faster than the general division algorithm. 1058 1059 So, we want the following to be true: 1060 1061 . For x in the following range: 1062 . 1063 . ky <= x < (k+1)y 1064 . 1065 . implies that 1066 . 1067 . k <= (ax+b)/z < (k+1) 1068 1069 We want to determine b such that this is true for all k in the 1070 range {0..K} for some maximum K. 1071 1072 Since (ax+b) is an increasing function of x, we can take each 1073 bound separately to determine the "best" value for b. 1074 1075 (ax+b)/z < (k+1) implies 1076 1077 (a((k+1)y-1)+b < (k+1)z implies 1078 1079 b < a + (k+1)(z-ay) implies 1080 1081 b < a + (k+1)r 1082 1083 This needs to be true for all k in the range {0..K}. In 1084 particular, it is true for k = 0 and this leads to a maximum 1085 acceptable value for b. 1086 1087 b < a+r or b <= a+r-1 1088 1089 Taking the other bound, we have 1090 1091 k <= (ax+b)/z implies 1092 1093 k <= (aky+b)/z implies 1094 1095 k(z-ay) <= b implies 1096 1097 kr <= b 1098 1099 Clearly, the largest range for k will be achieved by maximizing b, 1100 when r is not zero. When r is zero, then the simplest choice for b 1101 is 0. When r is not 0, set 1102 1103 . b = a+r-1 1104 1105 Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y) 1106 for all x in the range: 1107 1108 . 0 <= x < (K+1)y 1109 1110 We need to determine what K is. Of our two bounds, 1111 1112 . b < a+(k+1)r is satisfied for all k >= 0, by construction. 1113 1114 The other bound is 1115 1116 . kr <= b 1117 1118 This is always true if r = 0. If r is not 0 (the usual case), then 1119 K = floor((a+r-1)/r), is the maximum value for k. 1120 1121 Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct 1122 answer for q(x) = floor(x/y) when x is in the range 1123 1124 (0,(K+1)y-1) K = floor((a+r-1)/r) 1125 1126 To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that 1127 the formula for q'(x) yields the correct value of q(x) for all x 1128 representable by a single word in HPPA. 1129 1130 We are also constrained in that computing the product (ax), adding 1131 b, and dividing by z must all be done quickly, otherwise we will be 1132 better off going through the general algorithm using the DS 1133 instruction, which uses approximately 70 cycles. 1134 1135 For each y, there is a choice of z which satisfies the constraints 1136 for (K+1)y >= 2**32. We may not, however, be able to satisfy the 1137 timing constraints for arbitrary y. It seems that z being equal to 1138 a power of 2 or a power of 2 minus 1 is as good as we can do, since 1139 it minimizes the time to do division by z. We want the choice of z 1140 to also result in a value for (a) that minimizes the computation of 1141 the product (ax). This is best achieved if (a) has a regular bit 1142 pattern (so the multiplication can be done with shifts and adds). 1143 The value of (a) also needs to be less than 2**32 so the product is 1144 always guaranteed to fit in 2 words. 1145 1146 In actual practice, the following should be done: 1147 1148 1) For negative x, you should take the absolute value and remember 1149 . the fact so that the result can be negated. This obviously does 1150 . not apply in the unsigned case. 1151 2) For even y, you should factor out the power of 2 that divides y 1152 . and divide x by it. You can then proceed by dividing by the 1153 . odd factor of y. 1154 1155 Here is a table of some odd values of y, and corresponding choices 1156 for z which are "good". 1157 1158 y z r a (hex) max x (hex) 1159 1160 3 2**32 1 55555555 100000001 1161 5 2**32 1 33333333 100000003 1162 7 2**24-1 0 249249 (infinite) 1163 9 2**24-1 0 1c71c7 (infinite) 1164 11 2**20-1 0 1745d (infinite) 1165 13 2**24-1 0 13b13b (infinite) 1166 15 2**32 1 11111111 10000000d 1167 17 2**32 1 f0f0f0f 10000000f 1168 1169 If r is 1, then b = a+r-1 = a. This simplifies the computation 1170 of (ax+b), since you can compute (x+1)(a) instead. If r is 0, 1171 then b = 0 is ok to use which simplifies (ax+b). 1172 1173 The bit patterns for 55555555, 33333333, and 11111111 are obviously 1174 very regular. The bit patterns for the other values of a above are: 1175 1176 y (hex) (binary) 1177 1178 7 249249 001001001001001001001001 << regular >> 1179 9 1c71c7 000111000111000111000111 << regular >> 1180 11 1745d 000000010111010001011101 << irregular >> 1181 13 13b13b 000100111011000100111011 << irregular >> 1182 1183 The bit patterns for (a) corresponding to (y) of 11 and 13 may be 1184 too irregular to warrant using this method. 1185 1186 When z is a power of 2 minus 1, then the division by z is slightly 1187 more complicated, involving an iterative solution. 1188 1189 The code presented here solves division by 1 through 17, except for 1190 11 and 13. There are algorithms for both signed and unsigned 1191 quantities given. 1192 1193 TIMINGS (cycles) 1194 1195 divisor positive negative unsigned 1196 1197 . 1 2 2 2 1198 . 2 4 4 2 1199 . 3 19 21 19 1200 . 4 4 4 2 1201 . 5 18 22 19 1202 . 6 19 22 19 1203 . 8 4 4 2 1204 . 10 18 19 17 1205 . 12 18 20 18 1206 . 15 16 18 16 1207 . 16 4 4 2 1208 . 17 16 18 16 1209 1210 Now, the algorithm for 7, 9, and 14 is an iterative one. That is, 1211 a loop body is executed until the tentative quotient is 0. The 1212 number of times the loop body is executed varies depending on the 1213 dividend, but is never more than two times. If the dividend is 1214 less than the divisor, then the loop body is not executed at all. 1215 Each iteration adds 4 cycles to the timings. 1216 1217 divisor positive negative unsigned 1218 1219 . 7 19+4n 20+4n 20+4n n = number of iterations 1220 . 9 21+4n 22+4n 21+4n 1221 . 14 21+4n 22+4n 20+4n 1222 1223 To give an idea of how the number of iterations varies, here is a 1224 table of dividend versus number of iterations when dividing by 7. 1225 1226 smallest largest required 1227 dividend dividend iterations 1228 1229 . 0 6 0 1230 . 7 0x6ffffff 1 1231 0x1000006 0xffffffff 2 1232 1233 There is some overlap in the range of numbers requiring 1 and 2 1234 iterations. */ 1235 1236RDEFINE(t2,r1) 1237RDEFINE(x2,arg0) /* r26 */ 1238RDEFINE(t1,arg1) /* r25 */ 1239RDEFINE(x1,ret1) /* r29 */ 1240 1241 SUBSPA_MILLI_DIV 1242 ATTR_MILLI 1243 1244 .proc 1245 .callinfo millicode 1246 .entry 1247/* NONE of these routines require a stack frame 1248 ALL of these routines are unwindable from millicode */ 1249 1250GSYM($$divide_by_constant) 1251 .export $$divide_by_constant,millicode 1252/* Provides a "nice" label for the code covered by the unwind descriptor 1253 for things like gprof. */ 1254 1255/* DIVISION BY 2 (shift by 1) */ 1256GSYM($$divI_2) 1257 .export $$divI_2,millicode 1258 comclr,>= arg0,0,0 1259 addi 1,arg0,arg0 1260 MILLIRET 1261 extrs arg0,30,31,ret1 1262 1263 1264/* DIVISION BY 4 (shift by 2) */ 1265GSYM($$divI_4) 1266 .export $$divI_4,millicode 1267 comclr,>= arg0,0,0 1268 addi 3,arg0,arg0 1269 MILLIRET 1270 extrs arg0,29,30,ret1 1271 1272 1273/* DIVISION BY 8 (shift by 3) */ 1274GSYM($$divI_8) 1275 .export $$divI_8,millicode 1276 comclr,>= arg0,0,0 1277 addi 7,arg0,arg0 1278 MILLIRET 1279 extrs arg0,28,29,ret1 1280 1281/* DIVISION BY 16 (shift by 4) */ 1282GSYM($$divI_16) 1283 .export $$divI_16,millicode 1284 comclr,>= arg0,0,0 1285 addi 15,arg0,arg0 1286 MILLIRET 1287 extrs arg0,27,28,ret1 1288 1289/**************************************************************************** 1290* 1291* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these 1292* 1293* includes 3,5,15,17 and also 6,10,12 1294* 1295****************************************************************************/ 1296 1297/* DIVISION BY 3 (use z = 2**32; a = 55555555) */ 1298 1299GSYM($$divI_3) 1300 .export $$divI_3,millicode 1301 comb,<,N x2,0,LREF(neg3) 1302 1303 addi 1,x2,x2 /* this cannot overflow */ 1304 extru x2,1,2,x1 /* multiply by 5 to get started */ 1305 sh2add x2,x2,x2 1306 b LREF(pos) 1307 addc x1,0,x1 1308 1309LSYM(neg3) 1310 subi 1,x2,x2 /* this cannot overflow */ 1311 extru x2,1,2,x1 /* multiply by 5 to get started */ 1312 sh2add x2,x2,x2 1313 b LREF(neg) 1314 addc x1,0,x1 1315 1316GSYM($$divU_3) 1317 .export $$divU_3,millicode 1318 addi 1,x2,x2 /* this CAN overflow */ 1319 addc 0,0,x1 1320 shd x1,x2,30,t1 /* multiply by 5 to get started */ 1321 sh2add x2,x2,x2 1322 b LREF(pos) 1323 addc x1,t1,x1 1324 1325/* DIVISION BY 5 (use z = 2**32; a = 33333333) */ 1326 1327GSYM($$divI_5) 1328 .export $$divI_5,millicode 1329 comb,<,N x2,0,LREF(neg5) 1330 1331 addi 3,x2,t1 /* this cannot overflow */ 1332 sh1add x2,t1,x2 /* multiply by 3 to get started */ 1333 b LREF(pos) 1334 addc 0,0,x1 1335 1336LSYM(neg5) 1337 sub 0,x2,x2 /* negate x2 */ 1338 addi 1,x2,x2 /* this cannot overflow */ 1339 shd 0,x2,31,x1 /* get top bit (can be 1) */ 1340 sh1add x2,x2,x2 /* multiply by 3 to get started */ 1341 b LREF(neg) 1342 addc x1,0,x1 1343 1344GSYM($$divU_5) 1345 .export $$divU_5,millicode 1346 addi 1,x2,x2 /* this CAN overflow */ 1347 addc 0,0,x1 1348 shd x1,x2,31,t1 /* multiply by 3 to get started */ 1349 sh1add x2,x2,x2 1350 b LREF(pos) 1351 addc t1,x1,x1 1352 1353/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */ 1354GSYM($$divI_6) 1355 .export $$divI_6,millicode 1356 comb,<,N x2,0,LREF(neg6) 1357 extru x2,30,31,x2 /* divide by 2 */ 1358 addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */ 1359 sh2add x2,t1,x2 /* multiply by 5 to get started */ 1360 b LREF(pos) 1361 addc 0,0,x1 1362 1363LSYM(neg6) 1364 subi 2,x2,x2 /* negate, divide by 2, and add 1 */ 1365 /* negation and adding 1 are done */ 1366 /* at the same time by the SUBI */ 1367 extru x2,30,31,x2 1368 shd 0,x2,30,x1 1369 sh2add x2,x2,x2 /* multiply by 5 to get started */ 1370 b LREF(neg) 1371 addc x1,0,x1 1372 1373GSYM($$divU_6) 1374 .export $$divU_6,millicode 1375 extru x2,30,31,x2 /* divide by 2 */ 1376 addi 1,x2,x2 /* cannot carry */ 1377 shd 0,x2,30,x1 /* multiply by 5 to get started */ 1378 sh2add x2,x2,x2 1379 b LREF(pos) 1380 addc x1,0,x1 1381 1382/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */ 1383GSYM($$divU_10) 1384 .export $$divU_10,millicode 1385 extru x2,30,31,x2 /* divide by 2 */ 1386 addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */ 1387 sh1add x2,t1,x2 /* multiply by 3 to get started */ 1388 addc 0,0,x1 1389LSYM(pos) 1390 shd x1,x2,28,t1 /* multiply by 0x11 */ 1391 shd x2,0,28,t2 1392 add x2,t2,x2 1393 addc x1,t1,x1 1394LSYM(pos_for_17) 1395 shd x1,x2,24,t1 /* multiply by 0x101 */ 1396 shd x2,0,24,t2 1397 add x2,t2,x2 1398 addc x1,t1,x1 1399 1400 shd x1,x2,16,t1 /* multiply by 0x10001 */ 1401 shd x2,0,16,t2 1402 add x2,t2,x2 1403 MILLIRET 1404 addc x1,t1,x1 1405 1406GSYM($$divI_10) 1407 .export $$divI_10,millicode 1408 comb,< x2,0,LREF(neg10) 1409 copy 0,x1 1410 extru x2,30,31,x2 /* divide by 2 */ 1411 addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */ 1412 sh1add x2,x2,x2 /* multiply by 3 to get started */ 1413 1414LSYM(neg10) 1415 subi 2,x2,x2 /* negate, divide by 2, and add 1 */ 1416 /* negation and adding 1 are done */ 1417 /* at the same time by the SUBI */ 1418 extru x2,30,31,x2 1419 sh1add x2,x2,x2 /* multiply by 3 to get started */ 1420LSYM(neg) 1421 shd x1,x2,28,t1 /* multiply by 0x11 */ 1422 shd x2,0,28,t2 1423 add x2,t2,x2 1424 addc x1,t1,x1 1425LSYM(neg_for_17) 1426 shd x1,x2,24,t1 /* multiply by 0x101 */ 1427 shd x2,0,24,t2 1428 add x2,t2,x2 1429 addc x1,t1,x1 1430 1431 shd x1,x2,16,t1 /* multiply by 0x10001 */ 1432 shd x2,0,16,t2 1433 add x2,t2,x2 1434 addc x1,t1,x1 1435 MILLIRET 1436 sub 0,x1,x1 1437 1438/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */ 1439GSYM($$divI_12) 1440 .export $$divI_12,millicode 1441 comb,< x2,0,LREF(neg12) 1442 copy 0,x1 1443 extru x2,29,30,x2 /* divide by 4 */ 1444 addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */ 1445 sh2add x2,x2,x2 /* multiply by 5 to get started */ 1446 1447LSYM(neg12) 1448 subi 4,x2,x2 /* negate, divide by 4, and add 1 */ 1449 /* negation and adding 1 are done */ 1450 /* at the same time by the SUBI */ 1451 extru x2,29,30,x2 1452 b LREF(neg) 1453 sh2add x2,x2,x2 /* multiply by 5 to get started */ 1454 1455GSYM($$divU_12) 1456 .export $$divU_12,millicode 1457 extru x2,29,30,x2 /* divide by 4 */ 1458 addi 5,x2,t1 /* cannot carry */ 1459 sh2add x2,t1,x2 /* multiply by 5 to get started */ 1460 b LREF(pos) 1461 addc 0,0,x1 1462 1463/* DIVISION BY 15 (use z = 2**32; a = 11111111) */ 1464GSYM($$divI_15) 1465 .export $$divI_15,millicode 1466 comb,< x2,0,LREF(neg15) 1467 copy 0,x1 1468 addib,tr 1,x2,LREF(pos)+4 1469 shd x1,x2,28,t1 1470 1471LSYM(neg15) 1472 b LREF(neg) 1473 subi 1,x2,x2 1474 1475GSYM($$divU_15) 1476 .export $$divU_15,millicode 1477 addi 1,x2,x2 /* this CAN overflow */ 1478 b LREF(pos) 1479 addc 0,0,x1 1480 1481/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */ 1482GSYM($$divI_17) 1483 .export $$divI_17,millicode 1484 comb,<,n x2,0,LREF(neg17) 1485 addi 1,x2,x2 /* this cannot overflow */ 1486 shd 0,x2,28,t1 /* multiply by 0xf to get started */ 1487 shd x2,0,28,t2 1488 sub t2,x2,x2 1489 b LREF(pos_for_17) 1490 subb t1,0,x1 1491 1492LSYM(neg17) 1493 subi 1,x2,x2 /* this cannot overflow */ 1494 shd 0,x2,28,t1 /* multiply by 0xf to get started */ 1495 shd x2,0,28,t2 1496 sub t2,x2,x2 1497 b LREF(neg_for_17) 1498 subb t1,0,x1 1499 1500GSYM($$divU_17) 1501 .export $$divU_17,millicode 1502 addi 1,x2,x2 /* this CAN overflow */ 1503 addc 0,0,x1 1504 shd x1,x2,28,t1 /* multiply by 0xf to get started */ 1505LSYM(u17) 1506 shd x2,0,28,t2 1507 sub t2,x2,x2 1508 b LREF(pos_for_17) 1509 subb t1,x1,x1 1510 1511 1512/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these 1513 includes 7,9 and also 14 1514 1515 1516 z = 2**24-1 1517 r = z mod x = 0 1518 1519 so choose b = 0 1520 1521 Also, in order to divide by z = 2**24-1, we approximate by dividing 1522 by (z+1) = 2**24 (which is easy), and then correcting. 1523 1524 (ax) = (z+1)q' + r 1525 . = zq' + (q'+r) 1526 1527 So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1) 1528 Then the true remainder of (ax)/z is (q'+r). Repeat the process 1529 with this new remainder, adding the tentative quotients together, 1530 until a tentative quotient is 0 (and then we are done). There is 1531 one last correction to be done. It is possible that (q'+r) = z. 1532 If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But, 1533 in fact, we need to add 1 more to the quotient. Now, it turns 1534 out that this happens if and only if the original value x is 1535 an exact multiple of y. So, to avoid a three instruction test at 1536 the end, instead use 1 instruction to add 1 to x at the beginning. */ 1537 1538/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */ 1539GSYM($$divI_7) 1540 .export $$divI_7,millicode 1541 comb,<,n x2,0,LREF(neg7) 1542LSYM(7) 1543 addi 1,x2,x2 /* cannot overflow */ 1544 shd 0,x2,29,x1 1545 sh3add x2,x2,x2 1546 addc x1,0,x1 1547LSYM(pos7) 1548 shd x1,x2,26,t1 1549 shd x2,0,26,t2 1550 add x2,t2,x2 1551 addc x1,t1,x1 1552 1553 shd x1,x2,20,t1 1554 shd x2,0,20,t2 1555 add x2,t2,x2 1556 addc x1,t1,t1 1557 1558 /* computed <t1,x2>. Now divide it by (2**24 - 1) */ 1559 1560 copy 0,x1 1561 shd,= t1,x2,24,t1 /* tentative quotient */ 1562LSYM(1) 1563 addb,tr t1,x1,LREF(2) /* add to previous quotient */ 1564 extru x2,31,24,x2 /* new remainder (unadjusted) */ 1565 1566 MILLIRETN 1567 1568LSYM(2) 1569 addb,tr t1,x2,LREF(1) /* adjust remainder */ 1570 extru,= x2,7,8,t1 /* new quotient */ 1571 1572LSYM(neg7) 1573 subi 1,x2,x2 /* negate x2 and add 1 */ 1574LSYM(8) 1575 shd 0,x2,29,x1 1576 sh3add x2,x2,x2 1577 addc x1,0,x1 1578 1579LSYM(neg7_shift) 1580 shd x1,x2,26,t1 1581 shd x2,0,26,t2 1582 add x2,t2,x2 1583 addc x1,t1,x1 1584 1585 shd x1,x2,20,t1 1586 shd x2,0,20,t2 1587 add x2,t2,x2 1588 addc x1,t1,t1 1589 1590 /* computed <t1,x2>. Now divide it by (2**24 - 1) */ 1591 1592 copy 0,x1 1593 shd,= t1,x2,24,t1 /* tentative quotient */ 1594LSYM(3) 1595 addb,tr t1,x1,LREF(4) /* add to previous quotient */ 1596 extru x2,31,24,x2 /* new remainder (unadjusted) */ 1597 1598 MILLIRET 1599 sub 0,x1,x1 /* negate result */ 1600 1601LSYM(4) 1602 addb,tr t1,x2,LREF(3) /* adjust remainder */ 1603 extru,= x2,7,8,t1 /* new quotient */ 1604 1605GSYM($$divU_7) 1606 .export $$divU_7,millicode 1607 addi 1,x2,x2 /* can carry */ 1608 addc 0,0,x1 1609 shd x1,x2,29,t1 1610 sh3add x2,x2,x2 1611 b LREF(pos7) 1612 addc t1,x1,x1 1613 1614/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */ 1615GSYM($$divI_9) 1616 .export $$divI_9,millicode 1617 comb,<,n x2,0,LREF(neg9) 1618 addi 1,x2,x2 /* cannot overflow */ 1619 shd 0,x2,29,t1 1620 shd x2,0,29,t2 1621 sub t2,x2,x2 1622 b LREF(pos7) 1623 subb t1,0,x1 1624 1625LSYM(neg9) 1626 subi 1,x2,x2 /* negate and add 1 */ 1627 shd 0,x2,29,t1 1628 shd x2,0,29,t2 1629 sub t2,x2,x2 1630 b LREF(neg7_shift) 1631 subb t1,0,x1 1632 1633GSYM($$divU_9) 1634 .export $$divU_9,millicode 1635 addi 1,x2,x2 /* can carry */ 1636 addc 0,0,x1 1637 shd x1,x2,29,t1 1638 shd x2,0,29,t2 1639 sub t2,x2,x2 1640 b LREF(pos7) 1641 subb t1,x1,x1 1642 1643/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */ 1644GSYM($$divI_14) 1645 .export $$divI_14,millicode 1646 comb,<,n x2,0,LREF(neg14) 1647GSYM($$divU_14) 1648 .export $$divU_14,millicode 1649 b LREF(7) /* go to 7 case */ 1650 extru x2,30,31,x2 /* divide by 2 */ 1651 1652LSYM(neg14) 1653 subi 2,x2,x2 /* negate (and add 2) */ 1654 b LREF(8) 1655 extru x2,30,31,x2 /* divide by 2 */ 1656 .exit 1657 .procend 1658 .end 1659#endif 1660 1661#ifdef L_mulI 1662/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */ 1663/****************************************************************************** 1664This routine is used on PA2.0 processors when gcc -mno-fpregs is used 1665 1666ROUTINE: $$mulI 1667 1668 1669DESCRIPTION: 1670 1671 $$mulI multiplies two single word integers, giving a single 1672 word result. 1673 1674 1675INPUT REGISTERS: 1676 1677 arg0 = Operand 1 1678 arg1 = Operand 2 1679 r31 == return pc 1680 sr0 == return space when called externally 1681 1682 1683OUTPUT REGISTERS: 1684 1685 arg0 = undefined 1686 arg1 = undefined 1687 ret1 = result 1688 1689OTHER REGISTERS AFFECTED: 1690 1691 r1 = undefined 1692 1693SIDE EFFECTS: 1694 1695 Causes a trap under the following conditions: NONE 1696 Changes memory at the following places: NONE 1697 1698PERMISSIBLE CONTEXT: 1699 1700 Unwindable 1701 Does not create a stack frame 1702 Is usable for internal or external microcode 1703 1704DISCUSSION: 1705 1706 Calls other millicode routines via mrp: NONE 1707 Calls other millicode routines: NONE 1708 1709***************************************************************************/ 1710 1711 1712#define a0 %arg0 1713#define a1 %arg1 1714#define t0 %r1 1715#define r %ret1 1716 1717#define a0__128a0 zdep a0,24,25,a0 1718#define a0__256a0 zdep a0,23,24,a0 1719#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0) 1720#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1) 1721#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2) 1722#define b_n_ret_t0 b,n LREF(ret_t0) 1723#define b_e_shift b LREF(e_shift) 1724#define b_e_t0ma0 b LREF(e_t0ma0) 1725#define b_e_t0 b LREF(e_t0) 1726#define b_e_t0a0 b LREF(e_t0a0) 1727#define b_e_t02a0 b LREF(e_t02a0) 1728#define b_e_t04a0 b LREF(e_t04a0) 1729#define b_e_2t0 b LREF(e_2t0) 1730#define b_e_2t0a0 b LREF(e_2t0a0) 1731#define b_e_2t04a0 b LREF(e2t04a0) 1732#define b_e_3t0 b LREF(e_3t0) 1733#define b_e_4t0 b LREF(e_4t0) 1734#define b_e_4t0a0 b LREF(e_4t0a0) 1735#define b_e_4t08a0 b LREF(e4t08a0) 1736#define b_e_5t0 b LREF(e_5t0) 1737#define b_e_8t0 b LREF(e_8t0) 1738#define b_e_8t0a0 b LREF(e_8t0a0) 1739#define r__r_a0 add r,a0,r 1740#define r__r_2a0 sh1add a0,r,r 1741#define r__r_4a0 sh2add a0,r,r 1742#define r__r_8a0 sh3add a0,r,r 1743#define r__r_t0 add r,t0,r 1744#define r__r_2t0 sh1add t0,r,r 1745#define r__r_4t0 sh2add t0,r,r 1746#define r__r_8t0 sh3add t0,r,r 1747#define t0__3a0 sh1add a0,a0,t0 1748#define t0__4a0 sh2add a0,0,t0 1749#define t0__5a0 sh2add a0,a0,t0 1750#define t0__8a0 sh3add a0,0,t0 1751#define t0__9a0 sh3add a0,a0,t0 1752#define t0__16a0 zdep a0,27,28,t0 1753#define t0__32a0 zdep a0,26,27,t0 1754#define t0__64a0 zdep a0,25,26,t0 1755#define t0__128a0 zdep a0,24,25,t0 1756#define t0__t0ma0 sub t0,a0,t0 1757#define t0__t0_a0 add t0,a0,t0 1758#define t0__t0_2a0 sh1add a0,t0,t0 1759#define t0__t0_4a0 sh2add a0,t0,t0 1760#define t0__t0_8a0 sh3add a0,t0,t0 1761#define t0__2t0_a0 sh1add t0,a0,t0 1762#define t0__3t0 sh1add t0,t0,t0 1763#define t0__4t0 sh2add t0,0,t0 1764#define t0__4t0_a0 sh2add t0,a0,t0 1765#define t0__5t0 sh2add t0,t0,t0 1766#define t0__8t0 sh3add t0,0,t0 1767#define t0__8t0_a0 sh3add t0,a0,t0 1768#define t0__9t0 sh3add t0,t0,t0 1769#define t0__16t0 zdep t0,27,28,t0 1770#define t0__32t0 zdep t0,26,27,t0 1771#define t0__256a0 zdep a0,23,24,t0 1772 1773 1774 SUBSPA_MILLI 1775 ATTR_MILLI 1776 .align 16 1777 .proc 1778 .callinfo millicode 1779 .export $$mulI,millicode 1780GSYM($$mulI) 1781 combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */ 1782 copy 0,r /* zero out the result */ 1783 xor a0,a1,a0 /* swap a0 & a1 using the */ 1784 xor a0,a1,a1 /* old xor trick */ 1785 xor a0,a1,a0 1786LSYM(l4) 1787 combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */ 1788 zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ 1789 sub,> 0,a1,t0 /* otherwise negate both and */ 1790 combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */ 1791 sub 0,a0,a1 1792 movb,tr,n t0,a0,LREF(l2) /* 10th inst. */ 1793 1794LSYM(l0) r__r_t0 /* add in this partial product */ 1795LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */ 1796LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ 1797LSYM(l3) blr t0,0 /* case on these 8 bits ****** */ 1798 extru a1,23,24,a1 /* a1 >>= 8 ****************** */ 1799 1800/*16 insts before this. */ 1801/* a0 <<= 8 ************************** */ 1802LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop 1803LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop 1804LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop 1805LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0 1806LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop 1807LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0 1808LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN 1809LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0 1810LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop 1811LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0 1812LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN 1813LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 1814LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN 1815LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 1816LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 1817LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0 1818LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN 1819LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0 1820LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN 1821LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0 1822LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN 1823LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 1824LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 1825LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 1826LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN 1827LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 1828LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 1829LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0 1830LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 1831LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 1832LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 1833LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 1834LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN 1835LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 1836LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 1837LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0 1838LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN 1839LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 1840LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 1841LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 1842LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN 1843LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 1844LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 1845LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 1846LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 1847LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 1848LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0 1849LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0 1850LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0 1851LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0 1852LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 1853LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0 1854LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 1855LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 1856LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 1857LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0 1858LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 1859LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0 1860LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 1861LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0 1862LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 1863LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 1864LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 1865LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 1866LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN 1867LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 1868LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 1869LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 1870LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 1871LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 1872LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0 1873LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0 1874LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN 1875LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0 1876LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 1877LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 1878LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 1879LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 1880LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0 1881LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 1882LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0 1883LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0 1884LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 1885LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 1886LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 1887LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 1888LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 1889LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0 1890LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 1891LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 1892LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 1893LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0 1894LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 1895LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0 1896LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0 1897LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 1898LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 1899LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 1900LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0 1901LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 1902LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 1903LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 1904LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 1905LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0 1906LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 1907LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 1908LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0 1909LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0 1910LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 1911LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 1912LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0 1913LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 1914LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0 1915LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0 1916LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0 1917LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0 1918LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0 1919LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 1920LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0 1921LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0 1922LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 1923LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 1924LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 1925LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 1926LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 1927LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 1928LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 1929LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 1930LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN 1931LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0 1932LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 1933LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 1934LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 1935LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 1936LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 1937LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0 1938LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 1939LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 1940LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 1941LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0 1942LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0 1943LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0 1944LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0 1945LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0 1946LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0 1947LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0 1948LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 1949LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 1950LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 1951LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 1952LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 1953LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 1954LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 1955LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 1956LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 1957LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0 1958LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 1959LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0 1960LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0 1961LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 1962LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0 1963LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 1964LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0 1965LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0 1966LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0 1967LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 1968LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0 1969LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 1970LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 1971LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0 1972LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0 1973LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0 1974LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0 1975LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0 1976LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0 1977LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0 1978LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0 1979LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0 1980LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0 1981LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0 1982LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 1983LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 1984LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0 1985LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0 1986LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0 1987LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 1988LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0 1989LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0 1990LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0 1991LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 1992LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0 1993LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0 1994LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 1995LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 1996LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 1997LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 1998LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0 1999LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0 2000LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 2001LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 2002LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0 2003LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0 2004LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0 2005LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0 2006LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 2007LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0 2008LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0 2009LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 2010LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0 2011LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0 2012LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0 2013LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0 2014LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0 2015LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0 2016LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0 2017LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0 2018LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 2019LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 2020LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 2021LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 2022LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0 2023LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0 2024LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0 2025LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 2026LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0 2027LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 2028LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0 2029LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0 2030LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 2031LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0 2032LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0 2033LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 2034LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0 2035LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0 2036LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0 2037LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0 2038LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0 2039LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0 2040LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0 2041LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0 2042LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0 2043LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0 2044LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0 2045LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0 2046LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0 2047LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0 2048LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0 2049LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0 2050LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0 2051LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0 2052LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0 2053LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0 2054LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 2055LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0 2056LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 2057LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 2058/*1040 insts before this. */ 2059LSYM(ret_t0) MILLIRET 2060LSYM(e_t0) r__r_t0 2061LSYM(e_shift) a1_ne_0_b_l2 2062 a0__256a0 /* a0 <<= 8 *********** */ 2063 MILLIRETN 2064LSYM(e_t0ma0) a1_ne_0_b_l0 2065 t0__t0ma0 2066 MILLIRET 2067 r__r_t0 2068LSYM(e_t0a0) a1_ne_0_b_l0 2069 t0__t0_a0 2070 MILLIRET 2071 r__r_t0 2072LSYM(e_t02a0) a1_ne_0_b_l0 2073 t0__t0_2a0 2074 MILLIRET 2075 r__r_t0 2076LSYM(e_t04a0) a1_ne_0_b_l0 2077 t0__t0_4a0 2078 MILLIRET 2079 r__r_t0 2080LSYM(e_2t0) a1_ne_0_b_l1 2081 r__r_2t0 2082 MILLIRETN 2083LSYM(e_2t0a0) a1_ne_0_b_l0 2084 t0__2t0_a0 2085 MILLIRET 2086 r__r_t0 2087LSYM(e2t04a0) t0__t0_2a0 2088 a1_ne_0_b_l1 2089 r__r_2t0 2090 MILLIRETN 2091LSYM(e_3t0) a1_ne_0_b_l0 2092 t0__3t0 2093 MILLIRET 2094 r__r_t0 2095LSYM(e_4t0) a1_ne_0_b_l1 2096 r__r_4t0 2097 MILLIRETN 2098LSYM(e_4t0a0) a1_ne_0_b_l0 2099 t0__4t0_a0 2100 MILLIRET 2101 r__r_t0 2102LSYM(e4t08a0) t0__t0_2a0 2103 a1_ne_0_b_l1 2104 r__r_4t0 2105 MILLIRETN 2106LSYM(e_5t0) a1_ne_0_b_l0 2107 t0__5t0 2108 MILLIRET 2109 r__r_t0 2110LSYM(e_8t0) a1_ne_0_b_l1 2111 r__r_8t0 2112 MILLIRETN 2113LSYM(e_8t0a0) a1_ne_0_b_l0 2114 t0__8t0_a0 2115 MILLIRET 2116 r__r_t0 2117 2118 .procend 2119 .end 2120#endif 2121