poly1305-mips64.S revision 1.1
1#ifdef MIPSEB 2# define MSB 0 3# define LSB 7 4#else 5# define MSB 7 6# define LSB 0 7#endif 8 9.text 10.set noat 11.set noreorder 12 13.align 5 14.globl poly1305_init 15.ent poly1305_init 16poly1305_init: 17 .frame $29,0,$31 18 .set reorder 19 20 sd $0,0($4) 21 sd $0,8($4) 22 sd $0,16($4) 23 24 beqz $5,.Lno_key 25 26 ldl $8,0+MSB($5) 27 ldl $9,8+MSB($5) 28 ldr $8,0+LSB($5) 29 ldr $9,8+LSB($5) 30#ifdef MIPSEB 31# if defined(_MIPS_ARCH_MIPS64R2) 32 dsbh $8,$8 # byte swap 33 dsbh $9,$9 34 dshd $8,$8 35 dshd $9,$9 36# else 37 ori $10,$0,0xFF 38 dsll $1,$10,32 39 or $10,$1 # 0x000000FF000000FF 40 41 and $11,$8,$10 # byte swap 42 and $2,$9,$10 43 dsrl $1,$8,24 44 dsrl $24,$9,24 45 dsll $11,24 46 dsll $2,24 47 and $1,$10 48 and $24,$10 49 dsll $10,8 # 0x0000FF000000FF00 50 or $11,$1 51 or $2,$24 52 and $1,$8,$10 53 and $24,$9,$10 54 dsrl $8,8 55 dsrl $9,8 56 dsll $1,8 57 dsll $24,8 58 and $8,$10 59 and $9,$10 60 or $11,$1 61 or $2,$24 62 or $8,$11 63 or $9,$2 64 dsrl $11,$8,32 65 dsrl $2,$9,32 66 dsll $8,32 67 dsll $9,32 68 or $8,$11 69 or $9,$2 70# endif 71#endif 72 li $10,1 73 dsll $10,32 74 daddiu $10,-63 75 dsll $10,28 76 daddiu $10,-1 # 0ffffffc0fffffff 77 78 and $8,$10 79 daddiu $10,-3 # 0ffffffc0ffffffc 80 and $9,$10 81 82 sd $8,24($4) 83 dsrl $10,$9,2 84 sd $9,32($4) 85 daddu $10,$9 # s1 = r1 + (r1 >> 2) 86 sd $10,40($4) 87 88.Lno_key: 89 li $2,0 # return 0 90 jr $31 91.end poly1305_init 92.align 5 93.globl poly1305_blocks 94.ent poly1305_blocks 95poly1305_blocks: 96 .set noreorder 97 dsrl $6,4 # number of complete blocks 98 bnez $6,poly1305_blocks_internal 99 nop 100 jr $31 101 nop 102.end poly1305_blocks 103 104.align 5 105.ent poly1305_blocks_internal 106poly1305_blocks_internal: 107 .frame $29,6*8,$31 108 .mask 0x00030000,-8 109 .set noreorder 110 dsub $29,6*8 111 sd $17,40($29) 112 sd $16,32($29) 113 .set reorder 114 115 ld $12,0($4) # load hash value 116 ld $13,8($4) 117 ld $14,16($4) 118 119 ld $15,24($4) # load key 120 ld $16,32($4) 121 ld $17,40($4) 122 123.Loop: 124 ldl $8,0+MSB($5) # load input 125 ldl $9,8+MSB($5) 126 ldr $8,0+LSB($5) 127 daddiu $6,-1 128 ldr $9,8+LSB($5) 129 daddiu $5,16 130#ifdef MIPSEB 131# if defined(_MIPS_ARCH_MIPS64R2) 132 dsbh $8,$8 # byte swap 133 dsbh $9,$9 134 dshd $8,$8 135 dshd $9,$9 136# else 137 ori $10,$0,0xFF 138 dsll $1,$10,32 139 or $10,$1 # 0x000000FF000000FF 140 141 and $11,$8,$10 # byte swap 142 and $2,$9,$10 143 dsrl $1,$8,24 144 dsrl $24,$9,24 145 dsll $11,24 146 dsll $2,24 147 and $1,$10 148 and $24,$10 149 dsll $10,8 # 0x0000FF000000FF00 150 or $11,$1 151 or $2,$24 152 and $1,$8,$10 153 and $24,$9,$10 154 dsrl $8,8 155 dsrl $9,8 156 dsll $1,8 157 dsll $24,8 158 and $8,$10 159 and $9,$10 160 or $11,$1 161 or $2,$24 162 or $8,$11 163 or $9,$2 164 dsrl $11,$8,32 165 dsrl $2,$9,32 166 dsll $8,32 167 dsll $9,32 168 or $8,$11 169 or $9,$2 170# endif 171#endif 172 daddu $12,$8 # accumulate input 173 daddu $13,$9 174 sltu $10,$12,$8 175 sltu $11,$13,$9 176 daddu $13,$10 177 178 dmultu $15,$12 # h0*r0 179 daddu $14,$7 180 sltu $10,$13,$10 181 mflo $8 182 mfhi $9 183 184 dmultu $17,$13 # h1*5*r1 185 daddu $10,$11 186 daddu $14,$10 187 mflo $10 188 mfhi $11 189 190 dmultu $16,$12 # h0*r1 191 daddu $8,$10 192 daddu $9,$11 193 mflo $1 194 mfhi $25 195 sltu $10,$8,$10 196 daddu $9,$10 197 198 dmultu $15,$13 # h1*r0 199 daddu $9,$1 200 sltu $1,$9,$1 201 mflo $10 202 mfhi $11 203 daddu $25,$1 204 205 dmultu $17,$14 # h2*5*r1 206 daddu $9,$10 207 daddu $25,$11 208 mflo $1 209 210 dmultu $15,$14 # h2*r0 211 sltu $10,$9,$10 212 daddu $25,$10 213 mflo $2 214 215 daddu $9,$1 216 daddu $25,$2 217 sltu $1,$9,$1 218 daddu $25,$1 219 220 li $10,-4 # final reduction 221 and $10,$25 222 dsrl $11,$25,2 223 andi $14,$25,3 224 daddu $10,$11 225 daddu $12,$8,$10 226 sltu $10,$12,$10 227 daddu $13,$9,$10 228 sltu $10,$13,$10 229 daddu $14,$14,$10 230 231 bnez $6,.Loop 232 233 sd $12,0($4) # store hash value 234 sd $13,8($4) 235 sd $14,16($4) 236 237 .set noreorder 238 ld $17,40($29) # epilogue 239 ld $16,32($29) 240 jr $31 241 dadd $29,6*8 242.end poly1305_blocks_internal 243.align 5 244.globl poly1305_emit 245.ent poly1305_emit 246poly1305_emit: 247 .frame $29,0,$31 248 .set reorder 249 250 ld $10,0($4) 251 ld $11,8($4) 252 ld $1,16($4) 253 254 daddiu $8,$10,5 # compare to modulus 255 sltiu $2,$8,5 256 daddu $9,$11,$2 257 sltu $2,$9,$2 258 daddu $1,$1,$2 259 260 dsrl $1,2 # see if it carried/borrowed 261 dsubu $1,$0,$1 262 nor $2,$0,$1 263 264 and $8,$1 265 and $10,$2 266 and $9,$1 267 and $11,$2 268 or $8,$10 269 or $9,$11 270 271 lwu $10,0($6) # load nonce 272 lwu $11,4($6) 273 lwu $1,8($6) 274 lwu $2,12($6) 275 dsll $11,32 276 dsll $2,32 277 or $10,$11 278 or $1,$2 279 280 daddu $8,$10 # accumulate nonce 281 daddu $9,$1 282 sltu $10,$8,$10 283 daddu $9,$10 284 285 dsrl $10,$8,8 # write mac value 286 dsrl $11,$8,16 287 dsrl $1,$8,24 288 sb $8,0($5) 289 dsrl $2,$8,32 290 sb $10,1($5) 291 dsrl $10,$8,40 292 sb $11,2($5) 293 dsrl $11,$8,48 294 sb $1,3($5) 295 dsrl $1,$8,56 296 sb $2,4($5) 297 dsrl $2,$9,8 298 sb $10,5($5) 299 dsrl $10,$9,16 300 sb $11,6($5) 301 dsrl $11,$9,24 302 sb $1,7($5) 303 304 sb $9,8($5) 305 dsrl $1,$9,32 306 sb $2,9($5) 307 dsrl $2,$9,40 308 sb $10,10($5) 309 dsrl $10,$9,48 310 sb $11,11($5) 311 dsrl $11,$9,56 312 sb $1,12($5) 313 sb $2,13($5) 314 sb $10,14($5) 315 sb $11,15($5) 316 317 jr $31 318.end poly1305_emit 319.rdata 320.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by <appro@openssl.org>" 321.align 2 322