1; mc88100 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2; store difference in a third limb vector. 3 4; Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc. 5 6; This file is part of the GNU MP Library. 7 8; The GNU MP Library is free software; you can redistribute it and/or modify 9; it under the terms of the GNU Lesser General Public License as published by 10; the Free Software Foundation; either version 3 of the License, or (at your 11; option) any later version. 12 13; The GNU MP Library is distributed in the hope that it will be useful, but 14; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16; License for more details. 17 18; You should have received a copy of the GNU Lesser General Public License 19; along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21 22; INPUT PARAMETERS 23; res_ptr r2 24; s1_ptr r3 25; s2_ptr r4 26; size r5 27 28; This code has been optimized to run one instruction per clock, avoiding 29; load stalls and writeback contention. As a result, the instruction 30; order is not always natural. 31 32; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100, 33; but on the 88110, it seems to run much slower, 6.6 clocks/limb. 34 35 text 36 align 16 37 global ___gmpn_sub_n 38___gmpn_sub_n: 39 ld r6,r3,0 ; read first limb from s1_ptr 40 extu r10,r5,3 41 ld r7,r4,0 ; read first limb from s2_ptr 42 43 subu r5,r0,r5 44 mak r5,r5,3<4> 45 bcnd.n eq0,r5,Lzero 46 subu.co r0,r0,r0 ; initialize carry 47 48 or r12,r0,lo16(Lbase) 49 or.u r12,r12,hi16(Lbase) 50 addu r12,r12,r5 ; r12 is address for entering in loop 51 52 extu r5,r5,2 ; divide by 4 53 subu r2,r2,r5 ; adjust res_ptr 54 subu r3,r3,r5 ; adjust s1_ptr 55 subu r4,r4,r5 ; adjust s2_ptr 56 57 or r8,r6,r0 58 59 jmp.n r12 60 or r9,r7,r0 61 62Loop: addu r3,r3,32 63 st r8,r2,28 64 addu r4,r4,32 65 ld r6,r3,0 66 addu r2,r2,32 67 ld r7,r4,0 68Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt) 69Lbase: ld r8,r3,4 70 subu.cio r6,r6,r7 71 ld r9,r4,4 72 st r6,r2,0 73 ld r6,r3,8 ; subtract 7 + 8r limbs 74 subu.cio r8,r8,r9 75 ld r7,r4,8 76 st r8,r2,4 77 ld r8,r3,12 ; subtract 6 + 8r limbs 78 subu.cio r6,r6,r7 79 ld r9,r4,12 80 st r6,r2,8 81 ld r6,r3,16 ; subtract 5 + 8r limbs 82 subu.cio r8,r8,r9 83 ld r7,r4,16 84 st r8,r2,12 85 ld r8,r3,20 ; subtract 4 + 8r limbs 86 subu.cio r6,r6,r7 87 ld r9,r4,20 88 st r6,r2,16 89 ld r6,r3,24 ; subtract 3 + 8r limbs 90 subu.cio r8,r8,r9 91 ld r7,r4,24 92 st r8,r2,20 93 ld r8,r3,28 ; subtract 2 + 8r limbs 94 subu.cio r6,r6,r7 95 ld r9,r4,28 96 st r6,r2,24 97 bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs 98 subu.cio r8,r8,r9 99 100 st r8,r2,28 ; store most significant limb 101 102 addu.ci r2,r0,r0 ; return carry-out from most sign. limb 103 jmp.n r1 104 xor r2,r2,1 105