poly1305-x86.S revision 1.2
1#include <machine/asm.h> 2.text 3.align 64 4.globl poly1305_init 5.type poly1305_init,@function 6.align 16 7poly1305_init: 8.L_poly1305_init_begin: 9 pushl %ebp 10 pushl %ebx 11 pushl %esi 12 pushl %edi 13 movl 20(%esp),%edi 14 movl 24(%esp),%esi 15 movl 28(%esp),%ebp 16 xorl %eax,%eax 17 movl %eax,(%edi) 18 movl %eax,4(%edi) 19 movl %eax,8(%edi) 20 movl %eax,12(%edi) 21 movl %eax,16(%edi) 22 movl %eax,20(%edi) 23 cmpl $0,%esi 24 je .L000nokey 25 movl (%esi),%eax 26 movl 4(%esi),%ebx 27 movl 8(%esi),%ecx 28 movl 12(%esi),%edx 29 andl $268435455,%eax 30 andl $268435452,%ebx 31 andl $268435452,%ecx 32 andl $268435452,%edx 33 movl %eax,24(%edi) 34 movl %ebx,28(%edi) 35 movl %ecx,32(%edi) 36 movl %edx,36(%edi) 37 movl $0,%eax 38.L000nokey: 39 popl %edi 40 popl %esi 41 popl %ebx 42 popl %ebp 43 ret 44.size poly1305_init,.-.L_poly1305_init_begin 45.globl poly1305_blocks 46.type poly1305_blocks,@function 47.align 16 48poly1305_blocks: 49.L_poly1305_blocks_begin: 50 pushl %ebp 51 pushl %ebx 52 pushl %esi 53 pushl %edi 54 movl 20(%esp),%edi 55 movl 24(%esp),%esi 56 movl 28(%esp),%ecx 57.L001enter_blocks: 58 andl $-15,%ecx 59 jz .L002nodata 60 subl $64,%esp 61 movl 24(%edi),%eax 62 movl 28(%edi),%ebx 63 leal (%esi,%ecx,1),%ebp 64 movl 32(%edi),%ecx 65 movl 36(%edi),%edx 66 movl %ebp,92(%esp) 67 movl %esi,%ebp 68 movl %eax,36(%esp) 69 movl %ebx,%eax 70 shrl $2,%eax 71 movl %ebx,40(%esp) 72 addl %ebx,%eax 73 movl %ecx,%ebx 74 shrl $2,%ebx 75 movl %ecx,44(%esp) 76 addl %ecx,%ebx 77 movl %edx,%ecx 78 shrl $2,%ecx 79 movl %edx,48(%esp) 80 addl %edx,%ecx 81 movl %eax,52(%esp) 82 movl %ebx,56(%esp) 83 movl %ecx,60(%esp) 84 movl (%edi),%eax 85 movl 4(%edi),%ebx 86 movl 8(%edi),%ecx 87 movl 12(%edi),%esi 88 movl 16(%edi),%edi 89 jmp .L003loop 90.align 32 91.L003loop: 92 addl (%ebp),%eax 93 adcl 4(%ebp),%ebx 94 adcl 8(%ebp),%ecx 95 adcl 12(%ebp),%esi 96 leal 16(%ebp),%ebp 97 adcl 96(%esp),%edi 98 movl %eax,(%esp) 99 movl %esi,12(%esp) 100 mull 36(%esp) 101 movl %edi,16(%esp) 102 movl %eax,%edi 103 movl %ebx,%eax 104 movl %edx,%esi 105 mull 60(%esp) 106 addl %eax,%edi 107 movl %ecx,%eax 108 adcl %edx,%esi 109 mull 56(%esp) 110 addl %eax,%edi 111 movl 12(%esp),%eax 112 adcl %edx,%esi 113 mull 52(%esp) 114 addl %eax,%edi 115 movl (%esp),%eax 116 adcl %edx,%esi 117 mull 40(%esp) 118 movl %edi,20(%esp) 119 xorl %edi,%edi 120 addl %eax,%esi 121 movl %ebx,%eax 122 adcl %edx,%edi 123 mull 36(%esp) 124 addl %eax,%esi 125 movl %ecx,%eax 126 adcl %edx,%edi 127 mull 60(%esp) 128 addl %eax,%esi 129 movl 12(%esp),%eax 130 adcl %edx,%edi 131 mull 56(%esp) 132 addl %eax,%esi 133 movl 16(%esp),%eax 134 adcl %edx,%edi 135 imull 52(%esp),%eax 136 addl %eax,%esi 137 movl (%esp),%eax 138 adcl $0,%edi 139 mull 44(%esp) 140 movl %esi,24(%esp) 141 xorl %esi,%esi 142 addl %eax,%edi 143 movl %ebx,%eax 144 adcl %edx,%esi 145 mull 40(%esp) 146 addl %eax,%edi 147 movl %ecx,%eax 148 adcl %edx,%esi 149 mull 36(%esp) 150 addl %eax,%edi 151 movl 12(%esp),%eax 152 adcl %edx,%esi 153 mull 60(%esp) 154 addl %eax,%edi 155 movl 16(%esp),%eax 156 adcl %edx,%esi 157 imull 56(%esp),%eax 158 addl %eax,%edi 159 movl (%esp),%eax 160 adcl $0,%esi 161 mull 48(%esp) 162 movl %edi,28(%esp) 163 xorl %edi,%edi 164 addl %eax,%esi 165 movl %ebx,%eax 166 adcl %edx,%edi 167 mull 44(%esp) 168 addl %eax,%esi 169 movl %ecx,%eax 170 adcl %edx,%edi 171 mull 40(%esp) 172 addl %eax,%esi 173 movl 12(%esp),%eax 174 adcl %edx,%edi 175 mull 36(%esp) 176 addl %eax,%esi 177 movl 16(%esp),%ecx 178 adcl %edx,%edi 179 movl %ecx,%edx 180 imull 60(%esp),%ecx 181 addl %ecx,%esi 182 movl 20(%esp),%eax 183 adcl $0,%edi 184 imull 36(%esp),%edx 185 addl %edi,%edx 186 movl 24(%esp),%ebx 187 movl 28(%esp),%ecx 188 movl %edx,%edi 189 shrl $2,%edx 190 andl $3,%edi 191 leal (%edx,%edx,4),%edx 192 addl %edx,%eax 193 adcl $0,%ebx 194 adcl $0,%ecx 195 adcl $0,%esi 196 adcl $0,%edi 197 cmpl 92(%esp),%ebp 198 jne .L003loop 199 movl 84(%esp),%edx 200 addl $64,%esp 201 movl %eax,(%edx) 202 movl %ebx,4(%edx) 203 movl %ecx,8(%edx) 204 movl %esi,12(%edx) 205 movl %edi,16(%edx) 206.L002nodata: 207 popl %edi 208 popl %esi 209 popl %ebx 210 popl %ebp 211 ret 212.size poly1305_blocks,.-.L_poly1305_blocks_begin 213.globl poly1305_emit 214.type poly1305_emit,@function 215.align 16 216poly1305_emit: 217.L_poly1305_emit_begin: 218 pushl %ebp 219 pushl %ebx 220 pushl %esi 221 pushl %edi 222 movl 20(%esp),%ebp 223.L004enter_emit: 224 movl 24(%esp),%edi 225 movl (%ebp),%eax 226 movl 4(%ebp),%ebx 227 movl 8(%ebp),%ecx 228 movl 12(%ebp),%edx 229 movl 16(%ebp),%esi 230 addl $5,%eax 231 adcl $0,%ebx 232 adcl $0,%ecx 233 adcl $0,%edx 234 adcl $0,%esi 235 shrl $2,%esi 236 negl %esi 237 andl %esi,%eax 238 andl %esi,%ebx 239 andl %esi,%ecx 240 andl %esi,%edx 241 movl %eax,(%edi) 242 movl %ebx,4(%edi) 243 movl %ecx,8(%edi) 244 movl %edx,12(%edi) 245 notl %esi 246 movl (%ebp),%eax 247 movl 4(%ebp),%ebx 248 movl 8(%ebp),%ecx 249 movl 12(%ebp),%edx 250 movl 28(%esp),%ebp 251 andl %esi,%eax 252 andl %esi,%ebx 253 andl %esi,%ecx 254 andl %esi,%edx 255 orl (%edi),%eax 256 orl 4(%edi),%ebx 257 orl 8(%edi),%ecx 258 orl 12(%edi),%edx 259 addl (%ebp),%eax 260 adcl 4(%ebp),%ebx 261 adcl 8(%ebp),%ecx 262 adcl 12(%ebp),%edx 263 movl %eax,(%edi) 264 movl %ebx,4(%edi) 265 movl %ecx,8(%edi) 266 movl %edx,12(%edi) 267 popl %edi 268 popl %esi 269 popl %ebx 270 popl %ebp 271 ret 272.size poly1305_emit,.-.L_poly1305_emit_begin 273.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54 274.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 275.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 276.byte 114,103,62,0 277.align 4 278