1#include "sparc_arch.h" 2 3#ifdef __arch64__ 4.register %g2,#scratch 5.register %g3,#scratch 6#endif 7 8.section ".text",#alloc,#execinstr 9 10.align 64 11rem_4bit: 12 .long 0,0,471859200,0,943718400,0,610271232,0 13 .long 1887436800,0,1822425088,0,1220542464,0,1423966208,0 14 .long 3774873600,0,4246732800,0,3644850176,0,3311403008,0 15 .long 2441084928,0,2376073216,0,2847932416,0,3051356160,0 16.type rem_4bit,#object 17.size rem_4bit,(.-rem_4bit) 18 19.globl gcm_ghash_4bit 20.align 32 21gcm_ghash_4bit: 22 save %sp,-STACK_FRAME,%sp 23 ldub [%i2+15],%l1 24 ldub [%i0+15],%l2 25 ldub [%i0+14],%l3 26 add %i3,%i2,%i3 27 add %i1,8,%l6 28 291: call .+8 30 add %o7,rem_4bit-1b,%l4 31 32.Louter: 33 xor %l2,%l1,%l1 34 and %l1,0xf0,%l0 35 and %l1,0x0f,%l1 36 sll %l1,4,%l1 37 ldx [%l6+%l1],%o1 38 ldx [%i1+%l1],%o0 39 40 ldub [%i2+14],%l1 41 42 ldx [%l6+%l0],%o3 43 and %o1,0xf,%l5 44 ldx [%i1+%l0],%o2 45 sll %l5,3,%l5 46 ldx [%l4+%l5],%o4 47 srlx %o1,4,%o1 48 mov 13,%l7 49 sllx %o0,60,%o5 50 xor %o3,%o1,%o1 51 srlx %o0,4,%o0 52 xor %o1,%o5,%o1 53 54 xor %l3,%l1,%l1 55 and %o1,0xf,%l5 56 and %l1,0xf0,%l0 57 and %l1,0x0f,%l1 58 ba .Lghash_inner 59 sll %l1,4,%l1 60.align 32 61.Lghash_inner: 62 ldx [%l6+%l1],%o3 63 sll %l5,3,%l5 64 xor %o2,%o0,%o0 65 ldx [%i1+%l1],%o2 66 srlx %o1,4,%o1 67 xor %o4,%o0,%o0 68 ldx [%l4+%l5],%o4 69 sllx %o0,60,%o5 70 xor %o3,%o1,%o1 71 ldub [%i2+%l7],%l1 72 srlx %o0,4,%o0 73 xor %o1,%o5,%o1 74 ldub [%i0+%l7],%l3 75 xor %o2,%o0,%o0 76 and %o1,0xf,%l5 77 78 ldx [%l6+%l0],%o3 79 sll %l5,3,%l5 80 xor %o4,%o0,%o0 81 ldx [%i1+%l0],%o2 82 srlx %o1,4,%o1 83 ldx [%l4+%l5],%o4 84 sllx %o0,60,%o5 85 xor %l3,%l1,%l1 86 srlx %o0,4,%o0 87 and %l1,0xf0,%l0 88 addcc %l7,-1,%l7 89 xor %o1,%o5,%o1 90 and %l1,0x0f,%l1 91 xor %o3,%o1,%o1 92 sll %l1,4,%l1 93 blu .Lghash_inner 94 and %o1,0xf,%l5 95 96 ldx [%l6+%l1],%o3 97 sll %l5,3,%l5 98 xor %o2,%o0,%o0 99 ldx [%i1+%l1],%o2 100 srlx %o1,4,%o1 101 xor %o4,%o0,%o0 102 ldx [%l4+%l5],%o4 103 sllx %o0,60,%o5 104 xor %o3,%o1,%o1 105 srlx %o0,4,%o0 106 xor %o1,%o5,%o1 107 xor %o2,%o0,%o0 108 109 add %i2,16,%i2 110 cmp %i2,%i3 111 be,pn SIZE_T_CC,.Ldone 112 and %o1,0xf,%l5 113 114 ldx [%l6+%l0],%o3 115 sll %l5,3,%l5 116 xor %o4,%o0,%o0 117 ldx [%i1+%l0],%o2 118 srlx %o1,4,%o1 119 ldx [%l4+%l5],%o4 120 sllx %o0,60,%o5 121 xor %o3,%o1,%o1 122 ldub [%i2+15],%l1 123 srlx %o0,4,%o0 124 xor %o1,%o5,%o1 125 xor %o2,%o0,%o0 126 stx %o1,[%i0+8] 127 xor %o4,%o0,%o0 128 stx %o0,[%i0] 129 srl %o1,8,%l3 130 and %o1,0xff,%l2 131 ba .Louter 132 and %l3,0xff,%l3 133.align 32 134.Ldone: 135 ldx [%l6+%l0],%o3 136 sll %l5,3,%l5 137 xor %o4,%o0,%o0 138 ldx [%i1+%l0],%o2 139 srlx %o1,4,%o1 140 ldx [%l4+%l5],%o4 141 sllx %o0,60,%o5 142 xor %o3,%o1,%o1 143 srlx %o0,4,%o0 144 xor %o1,%o5,%o1 145 xor %o2,%o0,%o0 146 stx %o1,[%i0+8] 147 xor %o4,%o0,%o0 148 stx %o0,[%i0] 149 150 ret 151 restore 152.type gcm_ghash_4bit,#function 153.size gcm_ghash_4bit,(.-gcm_ghash_4bit) 154.globl gcm_gmult_4bit 155.align 32 156gcm_gmult_4bit: 157 save %sp,-STACK_FRAME,%sp 158 ldub [%i0+15],%l1 159 add %i1,8,%l6 160 1611: call .+8 162 add %o7,rem_4bit-1b,%l4 163 164 and %l1,0xf0,%l0 165 and %l1,0x0f,%l1 166 sll %l1,4,%l1 167 ldx [%l6+%l1],%o1 168 ldx [%i1+%l1],%o0 169 170 ldub [%i0+14],%l1 171 172 ldx [%l6+%l0],%o3 173 and %o1,0xf,%l5 174 ldx [%i1+%l0],%o2 175 sll %l5,3,%l5 176 ldx [%l4+%l5],%o4 177 srlx %o1,4,%o1 178 mov 13,%l7 179 sllx %o0,60,%o5 180 xor %o3,%o1,%o1 181 srlx %o0,4,%o0 182 xor %o1,%o5,%o1 183 184 and %o1,0xf,%l5 185 and %l1,0xf0,%l0 186 and %l1,0x0f,%l1 187 ba .Lgmult_inner 188 sll %l1,4,%l1 189.align 32 190.Lgmult_inner: 191 ldx [%l6+%l1],%o3 192 sll %l5,3,%l5 193 xor %o2,%o0,%o0 194 ldx [%i1+%l1],%o2 195 srlx %o1,4,%o1 196 xor %o4,%o0,%o0 197 ldx [%l4+%l5],%o4 198 sllx %o0,60,%o5 199 xor %o3,%o1,%o1 200 ldub [%i0+%l7],%l1 201 srlx %o0,4,%o0 202 xor %o1,%o5,%o1 203 xor %o2,%o0,%o0 204 and %o1,0xf,%l5 205 206 ldx [%l6+%l0],%o3 207 sll %l5,3,%l5 208 xor %o4,%o0,%o0 209 ldx [%i1+%l0],%o2 210 srlx %o1,4,%o1 211 ldx [%l4+%l5],%o4 212 sllx %o0,60,%o5 213 srlx %o0,4,%o0 214 and %l1,0xf0,%l0 215 addcc %l7,-1,%l7 216 xor %o1,%o5,%o1 217 and %l1,0x0f,%l1 218 xor %o3,%o1,%o1 219 sll %l1,4,%l1 220 blu .Lgmult_inner 221 and %o1,0xf,%l5 222 223 ldx [%l6+%l1],%o3 224 sll %l5,3,%l5 225 xor %o2,%o0,%o0 226 ldx [%i1+%l1],%o2 227 srlx %o1,4,%o1 228 xor %o4,%o0,%o0 229 ldx [%l4+%l5],%o4 230 sllx %o0,60,%o5 231 xor %o3,%o1,%o1 232 srlx %o0,4,%o0 233 xor %o1,%o5,%o1 234 xor %o2,%o0,%o0 235 and %o1,0xf,%l5 236 237 ldx [%l6+%l0],%o3 238 sll %l5,3,%l5 239 xor %o4,%o0,%o0 240 ldx [%i1+%l0],%o2 241 srlx %o1,4,%o1 242 ldx [%l4+%l5],%o4 243 sllx %o0,60,%o5 244 xor %o3,%o1,%o1 245 srlx %o0,4,%o0 246 xor %o1,%o5,%o1 247 xor %o2,%o0,%o0 248 stx %o1,[%i0+8] 249 xor %o4,%o0,%o0 250 stx %o0,[%i0] 251 252 ret 253 restore 254.type gcm_gmult_4bit,#function 255.size gcm_gmult_4bit,(.-gcm_gmult_4bit) 256.globl gcm_init_vis3 257.align 32 258gcm_init_vis3: 259 save %sp,-STACK_FRAME,%sp 260 261 ldx [%i1+0],%o2 262 ldx [%i1+8],%o1 263 mov 0xE1,%o4 264 mov 1,%o3 265 sllx %o4,57,%o4 266 srax %o2,63,%g1 ! broadcast carry 267 addcc %o1,%o1,%o1 ! H<<=1 268 .word 0x95b2822a !addxc %o2,%o2,%o2 269 and %g1,%o3,%o3 270 and %g1,%o4,%o4 271 xor %o3,%o1,%o1 272 xor %o4,%o2,%o2 273 stx %o1,[%i0+8] ! save twisted H 274 stx %o2,[%i0+0] 275 276 sethi %hi(0xA0406080),%g5 277 sethi %hi(0x20C0E000),%l0 278 or %g5,%lo(0xA0406080),%g5 279 or %l0,%lo(0x20C0E000),%l0 280 sllx %g5,32,%g5 281 or %l0,%g5,%g5 ! (0xE0��i)&0xff=0xA040608020C0E000 282 stx %g5,[%i0+16] 283 284 ret 285 restore 286.type gcm_init_vis3,#function 287.size gcm_init_vis3,.-gcm_init_vis3 288 289.globl gcm_gmult_vis3 290.align 32 291gcm_gmult_vis3: 292 save %sp,-STACK_FRAME,%sp 293 294 ldx [%i0+8],%o3 ! load Xi 295 ldx [%i0+0],%o4 296 ldx [%i1+8],%o1 ! load twisted H 297 ldx [%i1+0],%o2 298 299 mov 0xE1,%l7 300 sllx %l7,57,%o5 ! 57 is not a typo 301 ldx [%i1+16],%g5 ! (0xE0��i)&0xff=0xA040608020C0E000 302 303 xor %o2,%o1,%o0 ! Karatsuba pre-processing 304 .word 0x83b2e2a9 !xmulx %o3,%o1,%g1 305 xor %o3,%o4,%g3 ! Karatsuba pre-processing 306 .word 0x85b0e2a8 !xmulx %g3,%o0,%g2 307 .word 0x97b2e2c9 !xmulxhi %o3,%o1,%o3 308 .word 0x87b0e2c8 !xmulxhi %g3,%o0,%g3 309 .word 0x89b322ca !xmulxhi %o4,%o2,%g4 310 .word 0x99b322aa !xmulx %o4,%o2,%o4 311 312 sll %g1,3,%o7 313 srlx %g5,%o7,%o7 ! ��0xE0 [implicit &(7<<3)] 314 xor %g1,%o7,%o7 315 sllx %o7,57,%o7 ! (%g1��0xE1)<<1<<56 [implicit &0x7f] 316 317 xor %g1,%g2,%g2 ! Karatsuba post-processing 318 xor %o3,%g3,%g3 319 xor %o7,%o3,%o3 ! real destination is %g2 320 xor %g4,%g3,%g3 321 xor %o3,%g2,%g2 322 xor %o4,%g3,%g3 323 xor %o4,%g2,%g2 324 325 .word 0x97b062cd !xmulxhi %g1,%o5,%o3 ! ��0xE1<<1<<56 326 xor %g1,%g3,%g3 327 .word 0x83b0a2ad !xmulx %g2,%o5,%g1 328 xor %g2,%g4,%g4 329 .word 0x85b0a2cd !xmulxhi %g2,%o5,%g2 330 331 xor %o3,%g3,%g3 332 xor %g1,%g3,%g3 333 xor %g2,%g4,%g4 334 335 stx %g3,[%i0+8] ! save Xi 336 stx %g4,[%i0+0] 337 338 ret 339 restore 340.type gcm_gmult_vis3,#function 341.size gcm_gmult_vis3,.-gcm_gmult_vis3 342 343.globl gcm_ghash_vis3 344.align 32 345gcm_ghash_vis3: 346 save %sp,-STACK_FRAME,%sp 347 nop 348 srln %i3,0,%i3 ! needed on v8+, "nop" on v9 349 350 ldx [%i0+8],%g3 ! load Xi 351 ldx [%i0+0],%g4 352 ldx [%i1+8],%o1 ! load twisted H 353 ldx [%i1+0],%o2 354 355 mov 0xE1,%l7 356 sllx %l7,57,%o5 ! 57 is not a typo 357 ldx [%i1+16],%g5 ! (0xE0��i)&0xff=0xA040608020C0E000 358 359 and %i2,7,%l0 360 andn %i2,7,%i2 361 sll %l0,3,%l0 362 prefetch [%i2+63], 20 363 sub %g0,%l0,%l1 364 365 xor %o2,%o1,%o0 ! Karatsuba pre-processing 366.Loop: 367 ldx [%i2+8],%o3 368 brz,pt %l0,1f 369 ldx [%i2+0],%o4 370 371 ldx [%i2+16],%g2 ! align data 372 srlx %o3,%l1,%g1 373 sllx %o3,%l0,%o3 374 sllx %o4,%l0,%o4 375 srlx %g2,%l1,%g2 376 or %g1,%o4,%o4 377 or %g2,%o3,%o3 3781: 379 add %i2,16,%i2 380 sub %i3,16,%i3 381 xor %g3,%o3,%o3 382 xor %g4,%o4,%o4 383 prefetch [%i2+63], 20 384 385 .word 0x83b2e2a9 !xmulx %o3,%o1,%g1 386 xor %o3,%o4,%g3 ! Karatsuba pre-processing 387 .word 0x85b0e2a8 !xmulx %g3,%o0,%g2 388 .word 0x97b2e2c9 !xmulxhi %o3,%o1,%o3 389 .word 0x87b0e2c8 !xmulxhi %g3,%o0,%g3 390 .word 0x89b322ca !xmulxhi %o4,%o2,%g4 391 .word 0x99b322aa !xmulx %o4,%o2,%o4 392 393 sll %g1,3,%o7 394 srlx %g5,%o7,%o7 ! ��0xE0 [implicit &(7<<3)] 395 xor %g1,%o7,%o7 396 sllx %o7,57,%o7 ! (%g1��0xE1)<<1<<56 [implicit &0x7f] 397 398 xor %g1,%g2,%g2 ! Karatsuba post-processing 399 xor %o3,%g3,%g3 400 xor %o7,%o3,%o3 ! real destination is %g2 401 xor %g4,%g3,%g3 402 xor %o3,%g2,%g2 403 xor %o4,%g3,%g3 404 xor %o4,%g2,%g2 405 406 .word 0x97b062cd !xmulxhi %g1,%o5,%o3 ! ��0xE1<<1<<56 407 xor %g1,%g3,%g3 408 .word 0x83b0a2ad !xmulx %g2,%o5,%g1 409 xor %g2,%g4,%g4 410 .word 0x85b0a2cd !xmulxhi %g2,%o5,%g2 411 412 xor %o3,%g3,%g3 413 xor %g1,%g3,%g3 414 brnz,pt %i3,.Loop 415 xor %g2,%g4,%g4 416 417 stx %g3,[%i0+8] ! save Xi 418 stx %g4,[%i0+0] 419 420 ret 421 restore 422.type gcm_ghash_vis3,#function 423.size gcm_ghash_vis3,.-gcm_ghash_vis3 424.asciz "GHASH for SPARCv9/VIS3, CRYPTOGAMS by <appro@openssl.org>" 425.align 4 426