1; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store 2; sum in a third limb vector. 3 4; Copyright 1995, 1996, 2000 Free Software Foundation, Inc. 5 6; This file is part of the GNU MP Library. 7; 8; The GNU MP Library is free software; you can redistribute it and/or modify 9; it under the terms of either: 10; 11; * the GNU Lesser General Public License as published by the Free 12; Software Foundation; either version 3 of the License, or (at your 13; option) any later version. 14; 15; or 16; 17; * the GNU General Public License as published by the Free Software 18; Foundation; either version 2 of the License, or (at your option) any 19; later version. 20; 21; or both in parallel, as here. 22; 23; The GNU MP Library is distributed in the hope that it will be useful, but 24; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26; for more details. 27; 28; You should have received copies of the GNU General Public License and the 29; GNU Lesser General Public License along with the GNU MP Library. If not, 30; see https://www.gnu.org/licenses/. 31 32 33; INPUT PARAMETERS 34#define res_ptr r2 35#define s1_ptr r3 36#define s2_ptr r4 37#define size r5 38 39#include "sysdep.h" 40 41 text 42 align 16 43 global C_SYMBOL_NAME(__gmpn_add_n) 44C_SYMBOL_NAME(__gmpn_add_n): 45 addu.co r0,r0,r0 ; clear cy flag 46 xor r12,s2_ptr,res_ptr 47 bb1 2,r12,L1 48; ** V1a ** 49L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned? 50/* Add least significant limb separately to align res_ptr and s2_ptr */ 51 ld r10,s1_ptr,0 52 addu s1_ptr,s1_ptr,4 53 ld r8,s2_ptr,0 54 addu s2_ptr,s2_ptr,4 55 subu size,size,1 56 addu.co r6,r10,r8 57 st r6,res_ptr,0 58 addu res_ptr,res_ptr,4 59L_v1: cmp r12,size,2 60 bb1 lt,r12,Lend2 61 62 ld r10,s1_ptr,0 63 ld r12,s1_ptr,4 64 ld.d r8,s2_ptr,0 65 subu size,size,10 66 bcnd lt0,size,Lfin1 67/* Add blocks of 8 limbs until less than 8 limbs remain */ 68 align 8 69Loop1: subu size,size,8 70 addu.cio r6,r10,r8 71 ld r10,s1_ptr,8 72 addu.cio r7,r12,r9 73 ld r12,s1_ptr,12 74 ld.d r8,s2_ptr,8 75 st.d r6,res_ptr,0 76 addu.cio r6,r10,r8 77 ld r10,s1_ptr,16 78 addu.cio r7,r12,r9 79 ld r12,s1_ptr,20 80 ld.d r8,s2_ptr,16 81 st.d r6,res_ptr,8 82 addu.cio r6,r10,r8 83 ld r10,s1_ptr,24 84 addu.cio r7,r12,r9 85 ld r12,s1_ptr,28 86 ld.d r8,s2_ptr,24 87 st.d r6,res_ptr,16 88 addu.cio r6,r10,r8 89 ld r10,s1_ptr,32 90 addu.cio r7,r12,r9 91 ld r12,s1_ptr,36 92 addu s1_ptr,s1_ptr,32 93 ld.d r8,s2_ptr,32 94 addu s2_ptr,s2_ptr,32 95 st.d r6,res_ptr,24 96 addu res_ptr,res_ptr,32 97 bcnd ge0,size,Loop1 98 99Lfin1: addu size,size,8-2 100 bcnd lt0,size,Lend1 101/* Add blocks of 2 limbs until less than 2 limbs remain */ 102Loope1: addu.cio r6,r10,r8 103 ld r10,s1_ptr,8 104 addu.cio r7,r12,r9 105 ld r12,s1_ptr,12 106 ld.d r8,s2_ptr,8 107 st.d r6,res_ptr,0 108 subu size,size,2 109 addu s1_ptr,s1_ptr,8 110 addu s2_ptr,s2_ptr,8 111 addu res_ptr,res_ptr,8 112 bcnd ge0,size,Loope1 113Lend1: addu.cio r6,r10,r8 114 addu.cio r7,r12,r9 115 st.d r6,res_ptr,0 116 117 bb0 0,size,Lret1 118/* Add last limb */ 119 ld r10,s1_ptr,8 120 ld r8,s2_ptr,8 121 addu.cio r6,r10,r8 122 st r6,res_ptr,8 123 124Lret1: jmp.n r1 125 addu.ci r2,r0,r0 ; return carry-out from most sign. limb 126 127L1: xor r12,s1_ptr,res_ptr 128 bb1 2,r12,L2 129; ** V1b ** 130 or r12,r0,s2_ptr 131 or s2_ptr,r0,s1_ptr 132 or s1_ptr,r0,r12 133 br L0 134 135; ** V2 ** 136/* If we come here, the alignment of s1_ptr and res_ptr as well as the 137 alignment of s2_ptr and res_ptr differ. Since there are only two ways 138 things can be aligned (that we care about) we now know that the alignment 139 of s1_ptr and s2_ptr are the same. */ 140 141L2: cmp r12,size,1 142 bb1 eq,r12,Ljone 143 bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned 144/* Add least significant limb separately to align res_ptr and s2_ptr */ 145 ld r10,s1_ptr,0 146 addu s1_ptr,s1_ptr,4 147 ld r8,s2_ptr,0 148 addu s2_ptr,s2_ptr,4 149 subu size,size,1 150 addu.co r6,r10,r8 151 st r6,res_ptr,0 152 addu res_ptr,res_ptr,4 153 154L_v2: subu size,size,8 155 bcnd lt0,size,Lfin2 156/* Add blocks of 8 limbs until less than 8 limbs remain */ 157 align 8 158Loop2: subu size,size,8 159 ld.d r8,s1_ptr,0 160 ld.d r6,s2_ptr,0 161 addu.cio r8,r8,r6 162 st r8,res_ptr,0 163 addu.cio r9,r9,r7 164 st r9,res_ptr,4 165 ld.d r8,s1_ptr,8 166 ld.d r6,s2_ptr,8 167 addu.cio r8,r8,r6 168 st r8,res_ptr,8 169 addu.cio r9,r9,r7 170 st r9,res_ptr,12 171 ld.d r8,s1_ptr,16 172 ld.d r6,s2_ptr,16 173 addu.cio r8,r8,r6 174 st r8,res_ptr,16 175 addu.cio r9,r9,r7 176 st r9,res_ptr,20 177 ld.d r8,s1_ptr,24 178 ld.d r6,s2_ptr,24 179 addu.cio r8,r8,r6 180 st r8,res_ptr,24 181 addu.cio r9,r9,r7 182 st r9,res_ptr,28 183 addu s1_ptr,s1_ptr,32 184 addu s2_ptr,s2_ptr,32 185 addu res_ptr,res_ptr,32 186 bcnd ge0,size,Loop2 187 188Lfin2: addu size,size,8-2 189 bcnd lt0,size,Lend2 190Loope2: ld.d r8,s1_ptr,0 191 ld.d r6,s2_ptr,0 192 addu.cio r8,r8,r6 193 st r8,res_ptr,0 194 addu.cio r9,r9,r7 195 st r9,res_ptr,4 196 subu size,size,2 197 addu s1_ptr,s1_ptr,8 198 addu s2_ptr,s2_ptr,8 199 addu res_ptr,res_ptr,8 200 bcnd ge0,size,Loope2 201Lend2: bb0 0,size,Lret2 202/* Add last limb */ 203Ljone: ld r10,s1_ptr,0 204 ld r8,s2_ptr,0 205 addu.cio r6,r10,r8 206 st r6,res_ptr,0 207 208Lret2: jmp.n r1 209 addu.ci r2,r0,r0 ; return carry-out from most sign. limb 210