1/* $Id: checksum.S,v 1.1.1.1 2007/08/03 18:52:16 Exp $ 2 * 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IP/TCP/UDP checksumming routines 8 * 9 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11 * Tom May, <ftom@netcom.com> 12 * Pentium Pro/II routines: 13 * Alexander Kjeldaas <astor@guardian.no> 14 * Finn Arne Gangstad <finnag@guardian.no> 15 * Lots of code moved from tcp.c and ip.c; see those files 16 * for more names. 17 * 18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 19 * handling. 20 * Andi Kleen, add zeroing on error 21 * converted to pure assembler 22 * 23 * SuperH version: Copyright (C) 1999 Niibe Yutaka 24 * 25 * This program is free software; you can redistribute it and/or 26 * modify it under the terms of the GNU General Public License 27 * as published by the Free Software Foundation; either version 28 * 2 of the License, or (at your option) any later version. 29 */ 30 31#include <asm/errno.h> 32#include <linux/linkage.h> 33 34/* 35 * computes a partial checksum, e.g. for TCP/UDP fragments 36 */ 37 38/* 39 * unsigned int csum_partial(const unsigned char *buf, int len, 40 * unsigned int sum); 41 */ 42 43.text 44ENTRY(csum_partial) 45 /* 46 * Experiments with Ethernet and SLIP connections show that buff 47 * is aligned on either a 2-byte or 4-byte boundary. We get at 48 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 49 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 50 * alignment for the unrolled loop. 51 */ 52 mov r5, r1 53 mov r4, r0 54 tst #2, r0 ! Check alignment. 55 bt 2f ! Jump if alignment is ok. 56 ! 57 add #-2, r5 ! Alignment uses up two bytes. 58 cmp/pz r5 ! 59 bt/s 1f ! Jump if we had at least two bytes. 60 clrt 61 bra 6f 62 add #2, r5 ! r5 was < 2. Deal with it. 631: 64 mov r5, r1 ! Save new len for later use. 65 mov.w @r4+, r0 66 extu.w r0, r0 67 addc r0, r6 68 bf 2f 69 add #1, r6 702: 71 mov #-5, r0 72 shld r0, r5 73 tst r5, r5 74 bt/s 4f ! if it's =0, go to 4f 75 clrt 76 .align 2 773: 78 mov.l @r4+, r0 79 mov.l @r4+, r2 80 mov.l @r4+, r3 81 addc r0, r6 82 mov.l @r4+, r0 83 addc r2, r6 84 mov.l @r4+, r2 85 addc r3, r6 86 mov.l @r4+, r3 87 addc r0, r6 88 mov.l @r4+, r0 89 addc r2, r6 90 mov.l @r4+, r2 91 addc r3, r6 92 addc r0, r6 93 addc r2, r6 94 movt r0 95 dt r5 96 bf/s 3b 97 cmp/eq #1, r0 98 ! here, we know r5==0 99 addc r5, r6 ! add carry to r6 1004: 101 mov r1, r0 102 and #0x1c, r0 103 tst r0, r0 104 bt/s 6f 105 mov r0, r5 106 shlr2 r5 107 mov #0, r2 1085: 109 addc r2, r6 110 mov.l @r4+, r2 111 movt r0 112 dt r5 113 bf/s 5b 114 cmp/eq #1, r0 115 addc r2, r6 116 addc r5, r6 ! r5==0 here, so it means add carry-bit 1176: 118 mov r1, r5 119 mov #3, r0 120 and r0, r5 121 tst r5, r5 122 bt 9f ! if it's =0 go to 9f 123 mov #2, r1 124 cmp/hs r1, r5 125 bf 7f 126 mov.w @r4+, r0 127 extu.w r0, r0 128 cmp/eq r1, r5 129 bt/s 8f 130 clrt 131 shll16 r0 132 addc r0, r6 1337: 134 mov.b @r4+, r0 135 extu.b r0, r0 136#ifndef __LITTLE_ENDIAN__ 137 shll8 r0 138#endif 1398: 140 addc r0, r6 141 mov #0, r0 142 addc r0, r6 1439: 144 rts 145 mov r6, r0 146 147/* 148unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, 149 int sum, int *src_err_ptr, int *dst_err_ptr) 150 */ 151 152 153#define SRC(...) \ 154 9999: __VA_ARGS__ ; \ 155 .section __ex_table, "a"; \ 156 .long 9999b, 6001f ; \ 157 .previous 158 159#define DST(...) \ 160 9999: __VA_ARGS__ ; \ 161 .section __ex_table, "a"; \ 162 .long 9999b, 6002f ; \ 163 .previous 164 165! 166! r4: const char *SRC 167! r5: char *DST 168! r6: int LEN 169! r7: int SUM 170! 171! on stack: 172! int *SRC_ERR_PTR 173! int *DST_ERR_PTR 174! 175ENTRY(csum_partial_copy_generic) 176 mov.l r5,@-r15 177 mov.l r6,@-r15 178 179 mov #3,r0 ! Check src and dest are equally aligned 180 mov r4,r1 181 and r0,r1 182 and r5,r0 183 cmp/eq r1,r0 184 bf 3f ! Different alignments, use slow version 185 tst #1,r0 ! Check dest word aligned 186 bf 3f ! If not, do it the slow way 187 188 mov #2,r0 189 tst r0,r5 ! Check dest alignment. 190 bt 2f ! Jump if alignment is ok. 191 add #-2,r6 ! Alignment uses up two bytes. 192 cmp/pz r6 ! Jump if we had at least two bytes. 193 bt/s 1f 194 clrt 195 add #2,r6 ! r6 was < 2. Deal with it. 196 bra 4f 197 mov r6,r2 198 1993: ! Handle different src and dest alignments. 200 ! This is not common, so simple byte by byte copy will do. 201 mov r6,r2 202 shlr r6 203 tst r6,r6 204 bt 4f 205 clrt 206 .align 2 2075: 208SRC( mov.b @r4+,r1 ) 209SRC( mov.b @r4+,r0 ) 210 extu.b r1,r1 211DST( mov.b r1,@r5 ) 212DST( mov.b r0,@(1,r5) ) 213 extu.b r0,r0 214 add #2,r5 215 216#ifdef __LITTLE_ENDIAN__ 217 shll8 r0 218#else 219 shll8 r1 220#endif 221 or r1,r0 222 223 addc r0,r7 224 movt r0 225 dt r6 226 bf/s 5b 227 cmp/eq #1,r0 228 mov #0,r0 229 addc r0, r7 230 231 mov r2, r0 232 tst #1, r0 233 bt 7f 234 bra 5f 235 clrt 236 237 ! src and dest equally aligned, but to a two byte boundary. 238 ! Handle first two bytes as a special case 239 .align 2 2401: 241SRC( mov.w @r4+,r0 ) 242DST( mov.w r0,@r5 ) 243 add #2,r5 244 extu.w r0,r0 245 addc r0,r7 246 mov #0,r0 247 addc r0,r7 2482: 249 mov r6,r2 250 mov #-5,r0 251 shld r0,r6 252 tst r6,r6 253 bt/s 2f 254 clrt 255 .align 2 2561: 257SRC( mov.l @r4+,r0 ) 258SRC( mov.l @r4+,r1 ) 259 addc r0,r7 260DST( mov.l r0,@r5 ) 261DST( mov.l r1,@(4,r5) ) 262 addc r1,r7 263 264SRC( mov.l @r4+,r0 ) 265SRC( mov.l @r4+,r1 ) 266 addc r0,r7 267DST( mov.l r0,@(8,r5) ) 268DST( mov.l r1,@(12,r5) ) 269 addc r1,r7 270 271SRC( mov.l @r4+,r0 ) 272SRC( mov.l @r4+,r1 ) 273 addc r0,r7 274DST( mov.l r0,@(16,r5) ) 275DST( mov.l r1,@(20,r5) ) 276 addc r1,r7 277 278SRC( mov.l @r4+,r0 ) 279SRC( mov.l @r4+,r1 ) 280 addc r0,r7 281DST( mov.l r0,@(24,r5) ) 282DST( mov.l r1,@(28,r5) ) 283 addc r1,r7 284 add #32,r5 285 movt r0 286 dt r6 287 bf/s 1b 288 cmp/eq #1,r0 289 mov #0,r0 290 addc r0,r7 291 2922: mov r2,r6 293 mov #0x1c,r0 294 and r0,r6 295 cmp/pl r6 296 bf/s 4f 297 clrt 298 shlr2 r6 2993: 300SRC( mov.l @r4+,r0 ) 301 addc r0,r7 302DST( mov.l r0,@r5 ) 303 add #4,r5 304 movt r0 305 dt r6 306 bf/s 3b 307 cmp/eq #1,r0 308 mov #0,r0 309 addc r0,r7 3104: mov r2,r6 311 mov #3,r0 312 and r0,r6 313 cmp/pl r6 314 bf 7f 315 mov #2,r1 316 cmp/hs r1,r6 317 bf 5f 318SRC( mov.w @r4+,r0 ) 319DST( mov.w r0,@r5 ) 320 extu.w r0,r0 321 add #2,r5 322 cmp/eq r1,r6 323 bt/s 6f 324 clrt 325 shll16 r0 326 addc r0,r7 3275: 328SRC( mov.b @r4+,r0 ) 329DST( mov.b r0,@r5 ) 330 extu.b r0,r0 331#ifndef __LITTLE_ENDIAN__ 332 shll8 r0 333#endif 3346: addc r0,r7 335 mov #0,r0 336 addc r0,r7 3377: 3385000: 339 340# Exception handler: 341.section .fixup, "ax" 342 3436001: 344 mov.l @(8,r15),r0 ! src_err_ptr 345 mov #-EFAULT,r1 346 mov.l r1,@r0 347 348 ! zero the complete destination - computing the rest 349 ! is too much work 350 mov.l @(4,r15),r5 ! dst 351 mov.l @r15,r6 ! len 352 mov #0,r7 3531: mov.b r7,@r5 354 dt r6 355 bf/s 1b 356 add #1,r5 357 mov.l 8000f,r0 358 jmp @r0 359 nop 360 .align 2 3618000: .long 5000b 362 3636002: 364 mov.l @(12,r15),r0 ! dst_err_ptr 365 mov #-EFAULT,r1 366 mov.l r1,@r0 367 mov.l 8001f,r0 368 jmp @r0 369 nop 370 .align 2 3718001: .long 5000b 372 373.previous 374 add #8,r15 375 rts 376 mov r7,r0 377