1/* SPARC _add_n -- Add two limb vectors of the same length > 0 and store 2 * sum in a third limb vector. 3 * 4 * Copyright (C) 1995, 1996, 1998, 5 * 2001, 2002 Free Software Foundation, Inc. 6 * 7 * This file is part of Libgcrypt. 8 * 9 * Libgcrypt is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU Lesser General Public License as 11 * published by the Free Software Foundation; either version 2.1 of 12 * the License, or (at your option) any later version. 13 * 14 * Libgcrypt is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA 22 */ 23 24 25 26/******************* 27 * mpi_limb_t 28 * _gcry_mpih_add_n( mpi_ptr_t res_ptr, 29 * mpi_ptr_t s1_ptr, 30 * mpi_ptr_t s2_ptr, 31 * mpi_size_t size) 32 */ 33 34! INPUT PARAMETERS 35#define res_ptr %o0 36#define s1_ptr %o1 37#define s2_ptr %o2 38#define size %o3 39 40#include "sysdep.h" 41 42 .text 43 .align 4 44 .global C_SYMBOL_NAME(_gcry_mpih_add_n) 45C_SYMBOL_NAME(_gcry_mpih_add_n): 46 xor s2_ptr,res_ptr,%g1 47 andcc %g1,4,%g0 48 bne L1 ! branch if alignment differs 49 nop 50! ** V1a ** 51L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 52 be L_v1 ! if no, branch 53 nop 54/* Add least significant limb separately to align res_ptr and s2_ptr */ 55 ld [s1_ptr],%g4 56 add s1_ptr,4,s1_ptr 57 ld [s2_ptr],%g2 58 add s2_ptr,4,s2_ptr 59 add size,-1,size 60 addcc %g4,%g2,%o4 61 st %o4,[res_ptr] 62 add res_ptr,4,res_ptr 63L_v1: addx %g0,%g0,%o4 ! save cy in register 64 cmp size,2 ! if size < 2 ... 65 bl Lend2 ! ... branch to tail code 66 subcc %g0,%o4,%g0 ! restore cy 67 68 ld [s1_ptr+0],%g4 69 addcc size,-10,size 70 ld [s1_ptr+4],%g1 71 ldd [s2_ptr+0],%g2 72 blt Lfin1 73 subcc %g0,%o4,%g0 ! restore cy 74/* Add blocks of 8 limbs until less than 8 limbs remain */ 75Loop1: addxcc %g4,%g2,%o4 76 ld [s1_ptr+8],%g4 77 addxcc %g1,%g3,%o5 78 ld [s1_ptr+12],%g1 79 ldd [s2_ptr+8],%g2 80 std %o4,[res_ptr+0] 81 addxcc %g4,%g2,%o4 82 ld [s1_ptr+16],%g4 83 addxcc %g1,%g3,%o5 84 ld [s1_ptr+20],%g1 85 ldd [s2_ptr+16],%g2 86 std %o4,[res_ptr+8] 87 addxcc %g4,%g2,%o4 88 ld [s1_ptr+24],%g4 89 addxcc %g1,%g3,%o5 90 ld [s1_ptr+28],%g1 91 ldd [s2_ptr+24],%g2 92 std %o4,[res_ptr+16] 93 addxcc %g4,%g2,%o4 94 ld [s1_ptr+32],%g4 95 addxcc %g1,%g3,%o5 96 ld [s1_ptr+36],%g1 97 ldd [s2_ptr+32],%g2 98 std %o4,[res_ptr+24] 99 addx %g0,%g0,%o4 ! save cy in register 100 addcc size,-8,size 101 add s1_ptr,32,s1_ptr 102 add s2_ptr,32,s2_ptr 103 add res_ptr,32,res_ptr 104 bge Loop1 105 subcc %g0,%o4,%g0 ! restore cy 106 107Lfin1: addcc size,8-2,size 108 blt Lend1 109 subcc %g0,%o4,%g0 ! restore cy 110/* Add blocks of 2 limbs until less than 2 limbs remain */ 111Loope1: addxcc %g4,%g2,%o4 112 ld [s1_ptr+8],%g4 113 addxcc %g1,%g3,%o5 114 ld [s1_ptr+12],%g1 115 ldd [s2_ptr+8],%g2 116 std %o4,[res_ptr+0] 117 addx %g0,%g0,%o4 ! save cy in register 118 addcc size,-2,size 119 add s1_ptr,8,s1_ptr 120 add s2_ptr,8,s2_ptr 121 add res_ptr,8,res_ptr 122 bge Loope1 123 subcc %g0,%o4,%g0 ! restore cy 124Lend1: addxcc %g4,%g2,%o4 125 addxcc %g1,%g3,%o5 126 std %o4,[res_ptr+0] 127 addx %g0,%g0,%o4 ! save cy in register 128 129 andcc size,1,%g0 130 be Lret1 131 subcc %g0,%o4,%g0 ! restore cy 132/* Add last limb */ 133 ld [s1_ptr+8],%g4 134 ld [s2_ptr+8],%g2 135 addxcc %g4,%g2,%o4 136 st %o4,[res_ptr+8] 137 138Lret1: retl 139 addx %g0,%g0,%o0 ! return carry-out from most sign. limb 140 141L1: xor s1_ptr,res_ptr,%g1 142 andcc %g1,4,%g0 143 bne L2 144 nop 145! ** V1b ** 146 mov s2_ptr,%g1 147 mov s1_ptr,s2_ptr 148 b L0 149 mov %g1,s1_ptr 150 151! ** V2 ** 152/* If we come here, the alignment of s1_ptr and res_ptr as well as the 153 alignment of s2_ptr and res_ptr differ. Since there are only two ways 154 things can be aligned (that we care about) we now know that the alignment 155 of s1_ptr and s2_ptr are the same. */ 156 157L2: cmp size,1 158 be Ljone 159 nop 160 andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 161 be L_v2 ! if no, branch 162 nop 163/* Add least significant limb separately to align s1_ptr and s2_ptr */ 164 ld [s1_ptr],%g4 165 add s1_ptr,4,s1_ptr 166 ld [s2_ptr],%g2 167 add s2_ptr,4,s2_ptr 168 add size,-1,size 169 addcc %g4,%g2,%o4 170 st %o4,[res_ptr] 171 add res_ptr,4,res_ptr 172 173L_v2: addx %g0,%g0,%o4 ! save cy in register 174 addcc size,-8,size 175 blt Lfin2 176 subcc %g0,%o4,%g0 ! restore cy 177/* Add blocks of 8 limbs until less than 8 limbs remain */ 178Loop2: ldd [s1_ptr+0],%g2 179 ldd [s2_ptr+0],%o4 180 addxcc %g2,%o4,%g2 181 st %g2,[res_ptr+0] 182 addxcc %g3,%o5,%g3 183 st %g3,[res_ptr+4] 184 ldd [s1_ptr+8],%g2 185 ldd [s2_ptr+8],%o4 186 addxcc %g2,%o4,%g2 187 st %g2,[res_ptr+8] 188 addxcc %g3,%o5,%g3 189 st %g3,[res_ptr+12] 190 ldd [s1_ptr+16],%g2 191 ldd [s2_ptr+16],%o4 192 addxcc %g2,%o4,%g2 193 st %g2,[res_ptr+16] 194 addxcc %g3,%o5,%g3 195 st %g3,[res_ptr+20] 196 ldd [s1_ptr+24],%g2 197 ldd [s2_ptr+24],%o4 198 addxcc %g2,%o4,%g2 199 st %g2,[res_ptr+24] 200 addxcc %g3,%o5,%g3 201 st %g3,[res_ptr+28] 202 addx %g0,%g0,%o4 ! save cy in register 203 addcc size,-8,size 204 add s1_ptr,32,s1_ptr 205 add s2_ptr,32,s2_ptr 206 add res_ptr,32,res_ptr 207 bge Loop2 208 subcc %g0,%o4,%g0 ! restore cy 209 210Lfin2: addcc size,8-2,size 211 blt Lend2 212 subcc %g0,%o4,%g0 ! restore cy 213Loope2: ldd [s1_ptr+0],%g2 214 ldd [s2_ptr+0],%o4 215 addxcc %g2,%o4,%g2 216 st %g2,[res_ptr+0] 217 addxcc %g3,%o5,%g3 218 st %g3,[res_ptr+4] 219 addx %g0,%g0,%o4 ! save cy in register 220 addcc size,-2,size 221 add s1_ptr,8,s1_ptr 222 add s2_ptr,8,s2_ptr 223 add res_ptr,8,res_ptr 224 bge Loope2 225 subcc %g0,%o4,%g0 ! restore cy 226Lend2: andcc size,1,%g0 227 be Lret2 228 subcc %g0,%o4,%g0 ! restore cy 229/* Add last limb */ 230Ljone: ld [s1_ptr],%g4 231 ld [s2_ptr],%g2 232 addxcc %g4,%g2,%o4 233 st %o4,[res_ptr] 234 235Lret2: retl 236 addx %g0,%g0,%o0 ! return carry-out from most sign. limb 237 238 239 240