1#ifndef __ASSEMBLER__ 2# define __ASSEMBLER__ 1 3#endif 4#include "crypto/sparc_arch.h" 5 6#ifdef __arch64__ 7.register %g2,#scratch 8.register %g3,#scratch 9#endif 10 11.section ".text",#alloc,#execinstr 12 13.align 64 14rem_4bit: 15 .long 0,0,471859200,0,943718400,0,610271232,0 16 .long 1887436800,0,1822425088,0,1220542464,0,1423966208,0 17 .long 3774873600,0,4246732800,0,3644850176,0,3311403008,0 18 .long 2441084928,0,2376073216,0,2847932416,0,3051356160,0 19.type rem_4bit,#object 20.size rem_4bit,(.-rem_4bit) 21 22.globl gcm_ghash_4bit 23.align 32 24gcm_ghash_4bit: 25 save %sp,-STACK_FRAME,%sp 26 ldub [%i2+15],%l1 27 ldub [%i0+15],%l2 28 ldub [%i0+14],%l3 29 add %i3,%i2,%i3 30 add %i1,8,%l6 31 321: call .+8 33 add %o7,rem_4bit-1b,%l4 34 35.Louter: 36 xor %l2,%l1,%l1 37 and %l1,0xf0,%l0 38 and %l1,0x0f,%l1 39 sll %l1,4,%l1 40 ldx [%l6+%l1],%o1 41 ldx [%i1+%l1],%o0 42 43 ldub [%i2+14],%l1 44 45 ldx [%l6+%l0],%o3 46 and %o1,0xf,%l5 47 ldx [%i1+%l0],%o2 48 sll %l5,3,%l5 49 ldx [%l4+%l5],%o4 50 srlx %o1,4,%o1 51 mov 13,%l7 52 sllx %o0,60,%o5 53 xor %o3,%o1,%o1 54 srlx %o0,4,%o0 55 xor %o1,%o5,%o1 56 57 xor %l3,%l1,%l1 58 and %o1,0xf,%l5 59 and %l1,0xf0,%l0 60 and %l1,0x0f,%l1 61 ba .Lghash_inner 62 sll %l1,4,%l1 63.align 32 64.Lghash_inner: 65 ldx [%l6+%l1],%o3 66 sll %l5,3,%l5 67 xor %o2,%o0,%o0 68 ldx [%i1+%l1],%o2 69 srlx %o1,4,%o1 70 xor %o4,%o0,%o0 71 ldx [%l4+%l5],%o4 72 sllx %o0,60,%o5 73 xor %o3,%o1,%o1 74 ldub [%i2+%l7],%l1 75 srlx %o0,4,%o0 76 xor %o1,%o5,%o1 77 ldub [%i0+%l7],%l3 78 xor %o2,%o0,%o0 79 and %o1,0xf,%l5 80 81 ldx [%l6+%l0],%o3 82 sll %l5,3,%l5 83 xor %o4,%o0,%o0 84 ldx [%i1+%l0],%o2 85 srlx %o1,4,%o1 86 ldx [%l4+%l5],%o4 87 sllx %o0,60,%o5 88 xor %l3,%l1,%l1 89 srlx %o0,4,%o0 90 and %l1,0xf0,%l0 91 addcc %l7,-1,%l7 92 xor %o1,%o5,%o1 93 and %l1,0x0f,%l1 94 xor %o3,%o1,%o1 95 sll %l1,4,%l1 96 blu .Lghash_inner 97 and %o1,0xf,%l5 98 99 ldx [%l6+%l1],%o3 100 sll %l5,3,%l5 101 xor %o2,%o0,%o0 102 ldx [%i1+%l1],%o2 103 srlx %o1,4,%o1 104 xor %o4,%o0,%o0 105 ldx [%l4+%l5],%o4 106 sllx %o0,60,%o5 107 xor %o3,%o1,%o1 108 srlx %o0,4,%o0 109 xor %o1,%o5,%o1 110 xor %o2,%o0,%o0 111 112 add %i2,16,%i2 113 cmp %i2,%i3 114 be,pn SIZE_T_CC,.Ldone 115 and %o1,0xf,%l5 116 117 ldx [%l6+%l0],%o3 118 sll %l5,3,%l5 119 xor %o4,%o0,%o0 120 ldx [%i1+%l0],%o2 121 srlx %o1,4,%o1 122 ldx [%l4+%l5],%o4 123 sllx %o0,60,%o5 124 xor %o3,%o1,%o1 125 ldub [%i2+15],%l1 126 srlx %o0,4,%o0 127 xor %o1,%o5,%o1 128 xor %o2,%o0,%o0 129 stx %o1,[%i0+8] 130 xor %o4,%o0,%o0 131 stx %o0,[%i0] 132 srl %o1,8,%l3 133 and %o1,0xff,%l2 134 ba .Louter 135 and %l3,0xff,%l3 136.align 32 137.Ldone: 138 ldx [%l6+%l0],%o3 139 sll %l5,3,%l5 140 xor %o4,%o0,%o0 141 ldx [%i1+%l0],%o2 142 srlx %o1,4,%o1 143 ldx [%l4+%l5],%o4 144 sllx %o0,60,%o5 145 xor %o3,%o1,%o1 146 srlx %o0,4,%o0 147 xor %o1,%o5,%o1 148 xor %o2,%o0,%o0 149 stx %o1,[%i0+8] 150 xor %o4,%o0,%o0 151 stx %o0,[%i0] 152 153 ret 154 restore 155.type gcm_ghash_4bit,#function 156.size gcm_ghash_4bit,(.-gcm_ghash_4bit) 157.globl gcm_gmult_4bit 158.align 32 159gcm_gmult_4bit: 160 save %sp,-STACK_FRAME,%sp 161 ldub [%i0+15],%l1 162 add %i1,8,%l6 163 1641: call .+8 165 add %o7,rem_4bit-1b,%l4 166 167 and %l1,0xf0,%l0 168 and %l1,0x0f,%l1 169 sll %l1,4,%l1 170 ldx [%l6+%l1],%o1 171 ldx [%i1+%l1],%o0 172 173 ldub [%i0+14],%l1 174 175 ldx [%l6+%l0],%o3 176 and %o1,0xf,%l5 177 ldx [%i1+%l0],%o2 178 sll %l5,3,%l5 179 ldx [%l4+%l5],%o4 180 srlx %o1,4,%o1 181 mov 13,%l7 182 sllx %o0,60,%o5 183 xor %o3,%o1,%o1 184 srlx %o0,4,%o0 185 xor %o1,%o5,%o1 186 187 and %o1,0xf,%l5 188 and %l1,0xf0,%l0 189 and %l1,0x0f,%l1 190 ba .Lgmult_inner 191 sll %l1,4,%l1 192.align 32 193.Lgmult_inner: 194 ldx [%l6+%l1],%o3 195 sll %l5,3,%l5 196 xor %o2,%o0,%o0 197 ldx [%i1+%l1],%o2 198 srlx %o1,4,%o1 199 xor %o4,%o0,%o0 200 ldx [%l4+%l5],%o4 201 sllx %o0,60,%o5 202 xor %o3,%o1,%o1 203 ldub [%i0+%l7],%l1 204 srlx %o0,4,%o0 205 xor %o1,%o5,%o1 206 xor %o2,%o0,%o0 207 and %o1,0xf,%l5 208 209 ldx [%l6+%l0],%o3 210 sll %l5,3,%l5 211 xor %o4,%o0,%o0 212 ldx [%i1+%l0],%o2 213 srlx %o1,4,%o1 214 ldx [%l4+%l5],%o4 215 sllx %o0,60,%o5 216 srlx %o0,4,%o0 217 and %l1,0xf0,%l0 218 addcc %l7,-1,%l7 219 xor %o1,%o5,%o1 220 and %l1,0x0f,%l1 221 xor %o3,%o1,%o1 222 sll %l1,4,%l1 223 blu .Lgmult_inner 224 and %o1,0xf,%l5 225 226 ldx [%l6+%l1],%o3 227 sll %l5,3,%l5 228 xor %o2,%o0,%o0 229 ldx [%i1+%l1],%o2 230 srlx %o1,4,%o1 231 xor %o4,%o0,%o0 232 ldx [%l4+%l5],%o4 233 sllx %o0,60,%o5 234 xor %o3,%o1,%o1 235 srlx %o0,4,%o0 236 xor %o1,%o5,%o1 237 xor %o2,%o0,%o0 238 and %o1,0xf,%l5 239 240 ldx [%l6+%l0],%o3 241 sll %l5,3,%l5 242 xor %o4,%o0,%o0 243 ldx [%i1+%l0],%o2 244 srlx %o1,4,%o1 245 ldx [%l4+%l5],%o4 246 sllx %o0,60,%o5 247 xor %o3,%o1,%o1 248 srlx %o0,4,%o0 249 xor %o1,%o5,%o1 250 xor %o2,%o0,%o0 251 stx %o1,[%i0+8] 252 xor %o4,%o0,%o0 253 stx %o0,[%i0] 254 255 ret 256 restore 257.type gcm_gmult_4bit,#function 258.size gcm_gmult_4bit,(.-gcm_gmult_4bit) 259.globl gcm_init_vis3 260.align 32 261gcm_init_vis3: 262 save %sp,-STACK_FRAME,%sp 263 264 ldx [%i1+0],%o2 265 ldx [%i1+8],%o1 266 mov 0xE1,%o4 267 mov 1,%o3 268 sllx %o4,57,%o4 269 srax %o2,63,%g1 ! broadcast carry 270 addcc %o1,%o1,%o1 ! H<<=1 271 .word 0x95b2822a !addxc %o2,%o2,%o2 272 and %g1,%o3,%o3 273 and %g1,%o4,%o4 274 xor %o3,%o1,%o1 275 xor %o4,%o2,%o2 276 stx %o1,[%i0+8] ! save twisted H 277 stx %o2,[%i0+0] 278 279 sethi %hi(0xA0406080),%g5 280 sethi %hi(0x20C0E000),%l0 281 or %g5,%lo(0xA0406080),%g5 282 or %l0,%lo(0x20C0E000),%l0 283 sllx %g5,32,%g5 284 or %l0,%g5,%g5 ! (0xE0��i)&0xff=0xA040608020C0E000 285 stx %g5,[%i0+16] 286 287 ret 288 restore 289.type gcm_init_vis3,#function 290.size gcm_init_vis3,.-gcm_init_vis3 291 292.globl gcm_gmult_vis3 293.align 32 294gcm_gmult_vis3: 295 save %sp,-STACK_FRAME,%sp 296 297 ldx [%i0+8],%o3 ! load Xi 298 ldx [%i0+0],%o4 299 ldx [%i1+8],%o1 ! load twisted H 300 ldx [%i1+0],%o2 301 302 mov 0xE1,%l7 303 sllx %l7,57,%o5 ! 57 is not a typo 304 ldx [%i1+16],%g5 ! (0xE0��i)&0xff=0xA040608020C0E000 305 306 xor %o2,%o1,%o0 ! Karatsuba pre-processing 307 .word 0x83b2e2a9 !xmulx %o3,%o1,%g1 308 xor %o3,%o4,%g3 ! Karatsuba pre-processing 309 .word 0x85b0e2a8 !xmulx %g3,%o0,%g2 310 .word 0x97b2e2c9 !xmulxhi %o3,%o1,%o3 311 .word 0x87b0e2c8 !xmulxhi %g3,%o0,%g3 312 .word 0x89b322ca !xmulxhi %o4,%o2,%g4 313 .word 0x99b322aa !xmulx %o4,%o2,%o4 314 315 sll %g1,3,%o7 316 srlx %g5,%o7,%o7 ! ��0xE0 [implicit &(7<<3)] 317 xor %g1,%o7,%o7 318 sllx %o7,57,%o7 ! (%g1��0xE1)<<1<<56 [implicit &0x7f] 319 320 xor %g1,%g2,%g2 ! Karatsuba post-processing 321 xor %o3,%g3,%g3 322 xor %o7,%o3,%o3 ! real destination is %g2 323 xor %g4,%g3,%g3 324 xor %o3,%g2,%g2 325 xor %o4,%g3,%g3 326 xor %o4,%g2,%g2 327 328 .word 0x97b062cd !xmulxhi %g1,%o5,%o3 ! ��0xE1<<1<<56 329 xor %g1,%g3,%g3 330 .word 0x83b0a2ad !xmulx %g2,%o5,%g1 331 xor %g2,%g4,%g4 332 .word 0x85b0a2cd !xmulxhi %g2,%o5,%g2 333 334 xor %o3,%g3,%g3 335 xor %g1,%g3,%g3 336 xor %g2,%g4,%g4 337 338 stx %g3,[%i0+8] ! save Xi 339 stx %g4,[%i0+0] 340 341 ret 342 restore 343.type gcm_gmult_vis3,#function 344.size gcm_gmult_vis3,.-gcm_gmult_vis3 345 346.globl gcm_ghash_vis3 347.align 32 348gcm_ghash_vis3: 349 save %sp,-STACK_FRAME,%sp 350 nop 351 srln %i3,0,%i3 ! needed on v8+, "nop" on v9 352 353 ldx [%i0+8],%g3 ! load Xi 354 ldx [%i0+0],%g4 355 ldx [%i1+8],%o1 ! load twisted H 356 ldx [%i1+0],%o2 357 358 mov 0xE1,%l7 359 sllx %l7,57,%o5 ! 57 is not a typo 360 ldx [%i1+16],%g5 ! (0xE0��i)&0xff=0xA040608020C0E000 361 362 and %i2,7,%l0 363 andn %i2,7,%i2 364 sll %l0,3,%l0 365 prefetch [%i2+63], 20 366 sub %g0,%l0,%l1 367 368 xor %o2,%o1,%o0 ! Karatsuba pre-processing 369.Loop: 370 ldx [%i2+8],%o3 371 brz,pt %l0,1f 372 ldx [%i2+0],%o4 373 374 ldx [%i2+16],%g2 ! align data 375 srlx %o3,%l1,%g1 376 sllx %o3,%l0,%o3 377 sllx %o4,%l0,%o4 378 srlx %g2,%l1,%g2 379 or %g1,%o4,%o4 380 or %g2,%o3,%o3 3811: 382 add %i2,16,%i2 383 sub %i3,16,%i3 384 xor %g3,%o3,%o3 385 xor %g4,%o4,%o4 386 prefetch [%i2+63], 20 387 388 .word 0x83b2e2a9 !xmulx %o3,%o1,%g1 389 xor %o3,%o4,%g3 ! Karatsuba pre-processing 390 .word 0x85b0e2a8 !xmulx %g3,%o0,%g2 391 .word 0x97b2e2c9 !xmulxhi %o3,%o1,%o3 392 .word 0x87b0e2c8 !xmulxhi %g3,%o0,%g3 393 .word 0x89b322ca !xmulxhi %o4,%o2,%g4 394 .word 0x99b322aa !xmulx %o4,%o2,%o4 395 396 sll %g1,3,%o7 397 srlx %g5,%o7,%o7 ! ��0xE0 [implicit &(7<<3)] 398 xor %g1,%o7,%o7 399 sllx %o7,57,%o7 ! (%g1��0xE1)<<1<<56 [implicit &0x7f] 400 401 xor %g1,%g2,%g2 ! Karatsuba post-processing 402 xor %o3,%g3,%g3 403 xor %o7,%o3,%o3 ! real destination is %g2 404 xor %g4,%g3,%g3 405 xor %o3,%g2,%g2 406 xor %o4,%g3,%g3 407 xor %o4,%g2,%g2 408 409 .word 0x97b062cd !xmulxhi %g1,%o5,%o3 ! ��0xE1<<1<<56 410 xor %g1,%g3,%g3 411 .word 0x83b0a2ad !xmulx %g2,%o5,%g1 412 xor %g2,%g4,%g4 413 .word 0x85b0a2cd !xmulxhi %g2,%o5,%g2 414 415 xor %o3,%g3,%g3 416 xor %g1,%g3,%g3 417 brnz,pt %i3,.Loop 418 xor %g2,%g4,%g4 419 420 stx %g3,[%i0+8] ! save Xi 421 stx %g4,[%i0+0] 422 423 ret 424 restore 425.type gcm_ghash_vis3,#function 426.size gcm_ghash_vis3,.-gcm_ghash_vis3 427.asciz "GHASH for SPARCv9/VIS3, CRYPTOGAMS by <appro@openssl.org>" 428.align 4 429