1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IP/TCP/UDP checksumming routines 7 * 8 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Tom May, <ftom@netcom.com> 11 * Pentium Pro/II routines: 12 * Alexander Kjeldaas <astor@guardian.no> 13 * Finn Arne Gangstad <finnag@guardian.no> 14 * Lots of code moved from tcp.c and ip.c; see those files 15 * for more names. 16 * 17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 18 * handling. 19 * Andi Kleen, add zeroing on error 20 * converted to pure assembler 21 * 22 * This program is free software; you can redistribute it and/or 23 * modify it under the terms of the GNU General Public License 24 * as published by the Free Software Foundation; either version 25 * 2 of the License, or (at your option) any later version. 26 */ 27 28#include <linux/linkage.h> 29#include <asm/dwarf2.h> 30#include <asm/errno.h> 31 32/* 33 * computes a partial checksum, e.g. for TCP/UDP fragments 34 */ 35 36/* 37unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 38 */ 39 40.text 41 42#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 43 44 /* 45 * Experiments with Ethernet and SLIP connections show that buff 46 * is aligned on either a 2-byte or 4-byte boundary. We get at 47 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 49 * alignment for the unrolled loop. 50 */ 51ENTRY(csum_partial) 52 CFI_STARTPROC 53 pushl %esi 54 CFI_ADJUST_CFA_OFFSET 4 55 CFI_REL_OFFSET esi, 0 56 pushl %ebx 57 CFI_ADJUST_CFA_OFFSET 4 58 CFI_REL_OFFSET ebx, 0 59 movl 20(%esp),%eax # Function arg: unsigned int sum 60 movl 16(%esp),%ecx # Function arg: int len 61 movl 12(%esp),%esi # Function arg: unsigned char *buff 62 testl $3, %esi # Check alignment. 63 jz 2f # Jump if alignment is ok. 64 testl $1, %esi # Check alignment. 65 jz 10f # Jump if alignment is boundary of 2bytes. 66 67 # buf is odd 68 dec %ecx 69 jl 8f 70 movzbl (%esi), %ebx 71 adcl %ebx, %eax 72 roll $8, %eax 73 inc %esi 74 testl $2, %esi 75 jz 2f 7610: 77 subl $2, %ecx # Alignment uses up two bytes. 78 jae 1f # Jump if we had at least two bytes. 79 addl $2, %ecx # ecx was < 2. Deal with it. 80 jmp 4f 811: movw (%esi), %bx 82 addl $2, %esi 83 addw %bx, %ax 84 adcl $0, %eax 852: 86 movl %ecx, %edx 87 shrl $5, %ecx 88 jz 2f 89 testl %esi, %esi 901: movl (%esi), %ebx 91 adcl %ebx, %eax 92 movl 4(%esi), %ebx 93 adcl %ebx, %eax 94 movl 8(%esi), %ebx 95 adcl %ebx, %eax 96 movl 12(%esi), %ebx 97 adcl %ebx, %eax 98 movl 16(%esi), %ebx 99 adcl %ebx, %eax 100 movl 20(%esi), %ebx 101 adcl %ebx, %eax 102 movl 24(%esi), %ebx 103 adcl %ebx, %eax 104 movl 28(%esi), %ebx 105 adcl %ebx, %eax 106 lea 32(%esi), %esi 107 dec %ecx 108 jne 1b 109 adcl $0, %eax 1102: movl %edx, %ecx 111 andl $0x1c, %edx 112 je 4f 113 shrl $2, %edx # This clears CF 1143: adcl (%esi), %eax 115 lea 4(%esi), %esi 116 dec %edx 117 jne 3b 118 adcl $0, %eax 1194: andl $3, %ecx 120 jz 7f 121 cmpl $2, %ecx 122 jb 5f 123 movw (%esi),%cx 124 leal 2(%esi),%esi 125 je 6f 126 shll $16,%ecx 1275: movb (%esi),%cl 1286: addl %ecx,%eax 129 adcl $0, %eax 1307: 131 testl $1, 12(%esp) 132 jz 8f 133 roll $8, %eax 1348: 135 popl %ebx 136 CFI_ADJUST_CFA_OFFSET -4 137 CFI_RESTORE ebx 138 popl %esi 139 CFI_ADJUST_CFA_OFFSET -4 140 CFI_RESTORE esi 141 ret 142 CFI_ENDPROC 143ENDPROC(csum_partial) 144 145#else 146 147/* Version for PentiumII/PPro */ 148 149ENTRY(csum_partial) 150 CFI_STARTPROC 151 pushl %esi 152 CFI_ADJUST_CFA_OFFSET 4 153 CFI_REL_OFFSET esi, 0 154 pushl %ebx 155 CFI_ADJUST_CFA_OFFSET 4 156 CFI_REL_OFFSET ebx, 0 157 movl 20(%esp),%eax # Function arg: unsigned int sum 158 movl 16(%esp),%ecx # Function arg: int len 159 movl 12(%esp),%esi # Function arg: const unsigned char *buf 160 161 testl $3, %esi 162 jnz 25f 16310: 164 movl %ecx, %edx 165 movl %ecx, %ebx 166 andl $0x7c, %ebx 167 shrl $7, %ecx 168 addl %ebx,%esi 169 shrl $2, %ebx 170 negl %ebx 171 lea 45f(%ebx,%ebx,2), %ebx 172 testl %esi, %esi 173 jmp *%ebx 174 175 # Handle 2-byte-aligned regions 17620: addw (%esi), %ax 177 lea 2(%esi), %esi 178 adcl $0, %eax 179 jmp 10b 18025: 181 testl $1, %esi 182 jz 30f 183 # buf is odd 184 dec %ecx 185 jl 90f 186 movzbl (%esi), %ebx 187 addl %ebx, %eax 188 adcl $0, %eax 189 roll $8, %eax 190 inc %esi 191 testl $2, %esi 192 jz 10b 193 19430: subl $2, %ecx 195 ja 20b 196 je 32f 197 addl $2, %ecx 198 jz 80f 199 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 200 addl %ebx, %eax 201 adcl $0, %eax 202 jmp 80f 20332: 204 addw (%esi), %ax # csumming 2 bytes, 2-aligned 205 adcl $0, %eax 206 jmp 80f 207 20840: 209 addl -128(%esi), %eax 210 adcl -124(%esi), %eax 211 adcl -120(%esi), %eax 212 adcl -116(%esi), %eax 213 adcl -112(%esi), %eax 214 adcl -108(%esi), %eax 215 adcl -104(%esi), %eax 216 adcl -100(%esi), %eax 217 adcl -96(%esi), %eax 218 adcl -92(%esi), %eax 219 adcl -88(%esi), %eax 220 adcl -84(%esi), %eax 221 adcl -80(%esi), %eax 222 adcl -76(%esi), %eax 223 adcl -72(%esi), %eax 224 adcl -68(%esi), %eax 225 adcl -64(%esi), %eax 226 adcl -60(%esi), %eax 227 adcl -56(%esi), %eax 228 adcl -52(%esi), %eax 229 adcl -48(%esi), %eax 230 adcl -44(%esi), %eax 231 adcl -40(%esi), %eax 232 adcl -36(%esi), %eax 233 adcl -32(%esi), %eax 234 adcl -28(%esi), %eax 235 adcl -24(%esi), %eax 236 adcl -20(%esi), %eax 237 adcl -16(%esi), %eax 238 adcl -12(%esi), %eax 239 adcl -8(%esi), %eax 240 adcl -4(%esi), %eax 24145: 242 lea 128(%esi), %esi 243 adcl $0, %eax 244 dec %ecx 245 jge 40b 246 movl %edx, %ecx 24750: andl $3, %ecx 248 jz 80f 249 250 # Handle the last 1-3 bytes without jumping 251 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 252 movl $0xffffff,%ebx # by the shll and shrl instructions 253 shll $3,%ecx 254 shrl %cl,%ebx 255 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 256 addl %ebx,%eax 257 adcl $0,%eax 25880: 259 testl $1, 12(%esp) 260 jz 90f 261 roll $8, %eax 26290: 263 popl %ebx 264 CFI_ADJUST_CFA_OFFSET -4 265 CFI_RESTORE ebx 266 popl %esi 267 CFI_ADJUST_CFA_OFFSET -4 268 CFI_RESTORE esi 269 ret 270 CFI_ENDPROC 271ENDPROC(csum_partial) 272 273#endif 274 275/* 276unsigned int csum_partial_copy_generic (const char *src, char *dst, 277 int len, int sum, int *src_err_ptr, int *dst_err_ptr) 278 */ 279 280 281#define SRC(y...) \ 282 9999: y; \ 283 .section __ex_table, "a"; \ 284 .long 9999b, 6001f ; \ 285 .previous 286 287#define DST(y...) \ 288 9999: y; \ 289 .section __ex_table, "a"; \ 290 .long 9999b, 6002f ; \ 291 .previous 292 293#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 294 295#define ARGBASE 16 296#define FP 12 297 298ENTRY(csum_partial_copy_generic) 299 CFI_STARTPROC 300 subl $4,%esp 301 CFI_ADJUST_CFA_OFFSET 4 302 pushl %edi 303 CFI_ADJUST_CFA_OFFSET 4 304 CFI_REL_OFFSET edi, 0 305 pushl %esi 306 CFI_ADJUST_CFA_OFFSET 4 307 CFI_REL_OFFSET esi, 0 308 pushl %ebx 309 CFI_ADJUST_CFA_OFFSET 4 310 CFI_REL_OFFSET ebx, 0 311 movl ARGBASE+16(%esp),%eax # sum 312 movl ARGBASE+12(%esp),%ecx # len 313 movl ARGBASE+4(%esp),%esi # src 314 movl ARGBASE+8(%esp),%edi # dst 315 316 testl $2, %edi # Check alignment. 317 jz 2f # Jump if alignment is ok. 318 subl $2, %ecx # Alignment uses up two bytes. 319 jae 1f # Jump if we had at least two bytes. 320 addl $2, %ecx # ecx was < 2. Deal with it. 321 jmp 4f 322SRC(1: movw (%esi), %bx ) 323 addl $2, %esi 324DST( movw %bx, (%edi) ) 325 addl $2, %edi 326 addw %bx, %ax 327 adcl $0, %eax 3282: 329 movl %ecx, FP(%esp) 330 shrl $5, %ecx 331 jz 2f 332 testl %esi, %esi 333SRC(1: movl (%esi), %ebx ) 334SRC( movl 4(%esi), %edx ) 335 adcl %ebx, %eax 336DST( movl %ebx, (%edi) ) 337 adcl %edx, %eax 338DST( movl %edx, 4(%edi) ) 339 340SRC( movl 8(%esi), %ebx ) 341SRC( movl 12(%esi), %edx ) 342 adcl %ebx, %eax 343DST( movl %ebx, 8(%edi) ) 344 adcl %edx, %eax 345DST( movl %edx, 12(%edi) ) 346 347SRC( movl 16(%esi), %ebx ) 348SRC( movl 20(%esi), %edx ) 349 adcl %ebx, %eax 350DST( movl %ebx, 16(%edi) ) 351 adcl %edx, %eax 352DST( movl %edx, 20(%edi) ) 353 354SRC( movl 24(%esi), %ebx ) 355SRC( movl 28(%esi), %edx ) 356 adcl %ebx, %eax 357DST( movl %ebx, 24(%edi) ) 358 adcl %edx, %eax 359DST( movl %edx, 28(%edi) ) 360 361 lea 32(%esi), %esi 362 lea 32(%edi), %edi 363 dec %ecx 364 jne 1b 365 adcl $0, %eax 3662: movl FP(%esp), %edx 367 movl %edx, %ecx 368 andl $0x1c, %edx 369 je 4f 370 shrl $2, %edx # This clears CF 371SRC(3: movl (%esi), %ebx ) 372 adcl %ebx, %eax 373DST( movl %ebx, (%edi) ) 374 lea 4(%esi), %esi 375 lea 4(%edi), %edi 376 dec %edx 377 jne 3b 378 adcl $0, %eax 3794: andl $3, %ecx 380 jz 7f 381 cmpl $2, %ecx 382 jb 5f 383SRC( movw (%esi), %cx ) 384 leal 2(%esi), %esi 385DST( movw %cx, (%edi) ) 386 leal 2(%edi), %edi 387 je 6f 388 shll $16,%ecx 389SRC(5: movb (%esi), %cl ) 390DST( movb %cl, (%edi) ) 3916: addl %ecx, %eax 392 adcl $0, %eax 3937: 3945000: 395 396# Exception handler: 397.section .fixup, "ax" 398 3996001: 400 movl ARGBASE+20(%esp), %ebx # src_err_ptr 401 movl $-EFAULT, (%ebx) 402 403 # zero the complete destination - computing the rest 404 # is too much work 405 movl ARGBASE+8(%esp), %edi # dst 406 movl ARGBASE+12(%esp), %ecx # len 407 xorl %eax,%eax 408 rep ; stosb 409 410 jmp 5000b 411 4126002: 413 movl ARGBASE+24(%esp), %ebx # dst_err_ptr 414 movl $-EFAULT,(%ebx) 415 jmp 5000b 416 417.previous 418 419 popl %ebx 420 CFI_ADJUST_CFA_OFFSET -4 421 CFI_RESTORE ebx 422 popl %esi 423 CFI_ADJUST_CFA_OFFSET -4 424 CFI_RESTORE esi 425 popl %edi 426 CFI_ADJUST_CFA_OFFSET -4 427 CFI_RESTORE edi 428 popl %ecx # equivalent to addl $4,%esp 429 CFI_ADJUST_CFA_OFFSET -4 430 ret 431 CFI_ENDPROC 432ENDPROC(csum_partial_copy_generic) 433 434#else 435 436/* Version for PentiumII/PPro */ 437 438#define ROUND1(x) \ 439 SRC(movl x(%esi), %ebx ) ; \ 440 addl %ebx, %eax ; \ 441 DST(movl %ebx, x(%edi) ) ; 442 443#define ROUND(x) \ 444 SRC(movl x(%esi), %ebx ) ; \ 445 adcl %ebx, %eax ; \ 446 DST(movl %ebx, x(%edi) ) ; 447 448#define ARGBASE 12 449 450ENTRY(csum_partial_copy_generic) 451 CFI_STARTPROC 452 pushl %ebx 453 CFI_ADJUST_CFA_OFFSET 4 454 CFI_REL_OFFSET ebx, 0 455 pushl %edi 456 CFI_ADJUST_CFA_OFFSET 4 457 CFI_REL_OFFSET edi, 0 458 pushl %esi 459 CFI_ADJUST_CFA_OFFSET 4 460 CFI_REL_OFFSET esi, 0 461 movl ARGBASE+4(%esp),%esi #src 462 movl ARGBASE+8(%esp),%edi #dst 463 movl ARGBASE+12(%esp),%ecx #len 464 movl ARGBASE+16(%esp),%eax #sum 465# movl %ecx, %edx 466 movl %ecx, %ebx 467 movl %esi, %edx 468 shrl $6, %ecx 469 andl $0x3c, %ebx 470 negl %ebx 471 subl %ebx, %esi 472 subl %ebx, %edi 473 lea -1(%esi),%edx 474 andl $-32,%edx 475 lea 3f(%ebx,%ebx), %ebx 476 testl %esi, %esi 477 jmp *%ebx 4781: addl $64,%esi 479 addl $64,%edi 480 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) 481 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 482 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 483 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 484 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 4853: adcl $0,%eax 486 addl $64, %edx 487 dec %ecx 488 jge 1b 4894: movl ARGBASE+12(%esp),%edx #len 490 andl $3, %edx 491 jz 7f 492 cmpl $2, %edx 493 jb 5f 494SRC( movw (%esi), %dx ) 495 leal 2(%esi), %esi 496DST( movw %dx, (%edi) ) 497 leal 2(%edi), %edi 498 je 6f 499 shll $16,%edx 5005: 501SRC( movb (%esi), %dl ) 502DST( movb %dl, (%edi) ) 5036: addl %edx, %eax 504 adcl $0, %eax 5057: 506.section .fixup, "ax" 5076001: movl ARGBASE+20(%esp), %ebx # src_err_ptr 508 movl $-EFAULT, (%ebx) 509 # zero the complete destination (computing the rest is too much work) 510 movl ARGBASE+8(%esp),%edi # dst 511 movl ARGBASE+12(%esp),%ecx # len 512 xorl %eax,%eax 513 rep; stosb 514 jmp 7b 5156002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr 516 movl $-EFAULT, (%ebx) 517 jmp 7b 518.previous 519 520 popl %esi 521 CFI_ADJUST_CFA_OFFSET -4 522 CFI_RESTORE esi 523 popl %edi 524 CFI_ADJUST_CFA_OFFSET -4 525 CFI_RESTORE edi 526 popl %ebx 527 CFI_ADJUST_CFA_OFFSET -4 528 CFI_RESTORE ebx 529 ret 530 CFI_ENDPROC 531ENDPROC(csum_partial_copy_generic) 532 533#undef ROUND 534#undef ROUND1 535 536#endif 537