1/* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 Exp $ 2 * 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IP/TCP/UDP checksumming routines 8 * 9 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11 * Tom May, <ftom@netcom.com> 12 * Pentium Pro/II routines: 13 * Alexander Kjeldaas <astor@guardian.no> 14 * Finn Arne Gangstad <finnag@guardian.no> 15 * Lots of code moved from tcp.c and ip.c; see those files 16 * for more names. 17 * 18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 19 * handling. 20 * Andi Kleen, add zeroing on error 21 * converted to pure assembler 22 * 23 * SuperH version: Copyright (C) 1999 Niibe Yutaka 24 * 25 * This program is free software; you can redistribute it and/or 26 * modify it under the terms of the GNU General Public License 27 * as published by the Free Software Foundation; either version 28 * 2 of the License, or (at your option) any later version. 29 */ 30 31#include <asm/errno.h> 32#include <linux/linkage.h> 33 34/* 35 * computes a partial checksum, e.g. for TCP/UDP fragments 36 */ 37 38/* 39 * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); 40 */ 41 42.text 43ENTRY(csum_partial) 44 /* 45 * Experiments with Ethernet and SLIP connections show that buff 46 * is aligned on either a 2-byte or 4-byte boundary. We get at 47 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 49 * alignment for the unrolled loop. 50 */ 51 mov r4, r0 52 tst #3, r0 ! Check alignment. 53 bt/s 2f ! Jump if alignment is ok. 54 mov r4, r7 ! Keep a copy to check for alignment 55 ! 56 tst #1, r0 ! Check alignment. 57 bt 21f ! Jump if alignment is boundary of 2bytes. 58 59 ! buf is odd 60 tst r5, r5 61 add #-1, r5 62 bt 9f 63 mov.b @r4+, r0 64 extu.b r0, r0 65 addc r0, r6 ! t=0 from previous tst 66 mov r6, r0 67 shll8 r6 68 shlr16 r0 69 shlr8 r0 70 or r0, r6 71 mov r4, r0 72 tst #2, r0 73 bt 2f 7421: 75 ! buf is 2 byte aligned (len could be 0) 76 add #-2, r5 ! Alignment uses up two bytes. 77 cmp/pz r5 ! 78 bt/s 1f ! Jump if we had at least two bytes. 79 clrt 80 bra 6f 81 add #2, r5 ! r5 was < 2. Deal with it. 821: 83 mov.w @r4+, r0 84 extu.w r0, r0 85 addc r0, r6 86 bf 2f 87 add #1, r6 882: 89 ! buf is 4 byte aligned (len could be 0) 90 mov r5, r1 91 mov #-5, r0 92 shld r0, r1 93 tst r1, r1 94 bt/s 4f ! if it's =0, go to 4f 95 clrt 96 .align 2 973: 98 mov.l @r4+, r0 99 mov.l @r4+, r2 100 mov.l @r4+, r3 101 addc r0, r6 102 mov.l @r4+, r0 103 addc r2, r6 104 mov.l @r4+, r2 105 addc r3, r6 106 mov.l @r4+, r3 107 addc r0, r6 108 mov.l @r4+, r0 109 addc r2, r6 110 mov.l @r4+, r2 111 addc r3, r6 112 addc r0, r6 113 addc r2, r6 114 movt r0 115 dt r1 116 bf/s 3b 117 cmp/eq #1, r0 118 ! here, we know r1==0 119 addc r1, r6 ! add carry to r6 1204: 121 mov r5, r0 122 and #0x1c, r0 123 tst r0, r0 124 bt 6f 125 ! 4 bytes or more remaining 126 mov r0, r1 127 shlr2 r1 128 mov #0, r2 1295: 130 addc r2, r6 131 mov.l @r4+, r2 132 movt r0 133 dt r1 134 bf/s 5b 135 cmp/eq #1, r0 136 addc r2, r6 137 addc r1, r6 ! r1==0 here, so it means add carry-bit 1386: 139 ! 3 bytes or less remaining 140 mov #3, r0 141 and r0, r5 142 tst r5, r5 143 bt 9f ! if it's =0 go to 9f 144 mov #2, r1 145 cmp/hs r1, r5 146 bf 7f 147 mov.w @r4+, r0 148 extu.w r0, r0 149 cmp/eq r1, r5 150 bt/s 8f 151 clrt 152 shll16 r0 153 addc r0, r6 1547: 155 mov.b @r4+, r0 156 extu.b r0, r0 157#ifndef __LITTLE_ENDIAN__ 158 shll8 r0 159#endif 1608: 161 addc r0, r6 162 mov #0, r0 163 addc r0, r6 1649: 165 ! Check if the buffer was misaligned, if so realign sum 166 mov r7, r0 167 tst #1, r0 168 bt 10f 169 mov r6, r0 170 shll8 r6 171 shlr16 r0 172 shlr8 r0 173 or r0, r6 17410: 175 rts 176 mov r6, r0 177 178/* 179unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, 180 int sum, int *src_err_ptr, int *dst_err_ptr) 181 */ 182 183 184#define SRC(...) \ 185 9999: __VA_ARGS__ ; \ 186 .section __ex_table, "a"; \ 187 .long 9999b, 6001f ; \ 188 .previous 189 190#define DST(...) \ 191 9999: __VA_ARGS__ ; \ 192 .section __ex_table, "a"; \ 193 .long 9999b, 6002f ; \ 194 .previous 195 196! 197! r4: const char *SRC 198! r5: char *DST 199! r6: int LEN 200! r7: int SUM 201! 202! on stack: 203! int *SRC_ERR_PTR 204! int *DST_ERR_PTR 205! 206ENTRY(csum_partial_copy_generic) 207 mov.l r5,@-r15 208 mov.l r6,@-r15 209 210 mov #3,r0 ! Check src and dest are equally aligned 211 mov r4,r1 212 and r0,r1 213 and r5,r0 214 cmp/eq r1,r0 215 bf 3f ! Different alignments, use slow version 216 tst #1,r0 ! Check dest word aligned 217 bf 3f ! If not, do it the slow way 218 219 mov #2,r0 220 tst r0,r5 ! Check dest alignment. 221 bt 2f ! Jump if alignment is ok. 222 add #-2,r6 ! Alignment uses up two bytes. 223 cmp/pz r6 ! Jump if we had at least two bytes. 224 bt/s 1f 225 clrt 226 add #2,r6 ! r6 was < 2. Deal with it. 227 bra 4f 228 mov r6,r2 229 2303: ! Handle different src and dest alignments. 231 ! This is not common, so simple byte by byte copy will do. 232 mov r6,r2 233 shlr r6 234 tst r6,r6 235 bt 4f 236 clrt 237 .align 2 2385: 239SRC( mov.b @r4+,r1 ) 240SRC( mov.b @r4+,r0 ) 241 extu.b r1,r1 242DST( mov.b r1,@r5 ) 243DST( mov.b r0,@(1,r5) ) 244 extu.b r0,r0 245 add #2,r5 246 247#ifdef __LITTLE_ENDIAN__ 248 shll8 r0 249#else 250 shll8 r1 251#endif 252 or r1,r0 253 254 addc r0,r7 255 movt r0 256 dt r6 257 bf/s 5b 258 cmp/eq #1,r0 259 mov #0,r0 260 addc r0, r7 261 262 mov r2, r0 263 tst #1, r0 264 bt 7f 265 bra 5f 266 clrt 267 268 ! src and dest equally aligned, but to a two byte boundary. 269 ! Handle first two bytes as a special case 270 .align 2 2711: 272SRC( mov.w @r4+,r0 ) 273DST( mov.w r0,@r5 ) 274 add #2,r5 275 extu.w r0,r0 276 addc r0,r7 277 mov #0,r0 278 addc r0,r7 2792: 280 mov r6,r2 281 mov #-5,r0 282 shld r0,r6 283 tst r6,r6 284 bt/s 2f 285 clrt 286 .align 2 2871: 288SRC( mov.l @r4+,r0 ) 289SRC( mov.l @r4+,r1 ) 290 addc r0,r7 291DST( mov.l r0,@r5 ) 292DST( mov.l r1,@(4,r5) ) 293 addc r1,r7 294 295SRC( mov.l @r4+,r0 ) 296SRC( mov.l @r4+,r1 ) 297 addc r0,r7 298DST( mov.l r0,@(8,r5) ) 299DST( mov.l r1,@(12,r5) ) 300 addc r1,r7 301 302SRC( mov.l @r4+,r0 ) 303SRC( mov.l @r4+,r1 ) 304 addc r0,r7 305DST( mov.l r0,@(16,r5) ) 306DST( mov.l r1,@(20,r5) ) 307 addc r1,r7 308 309SRC( mov.l @r4+,r0 ) 310SRC( mov.l @r4+,r1 ) 311 addc r0,r7 312DST( mov.l r0,@(24,r5) ) 313DST( mov.l r1,@(28,r5) ) 314 addc r1,r7 315 add #32,r5 316 movt r0 317 dt r6 318 bf/s 1b 319 cmp/eq #1,r0 320 mov #0,r0 321 addc r0,r7 322 3232: mov r2,r6 324 mov #0x1c,r0 325 and r0,r6 326 cmp/pl r6 327 bf/s 4f 328 clrt 329 shlr2 r6 3303: 331SRC( mov.l @r4+,r0 ) 332 addc r0,r7 333DST( mov.l r0,@r5 ) 334 add #4,r5 335 movt r0 336 dt r6 337 bf/s 3b 338 cmp/eq #1,r0 339 mov #0,r0 340 addc r0,r7 3414: mov r2,r6 342 mov #3,r0 343 and r0,r6 344 cmp/pl r6 345 bf 7f 346 mov #2,r1 347 cmp/hs r1,r6 348 bf 5f 349SRC( mov.w @r4+,r0 ) 350DST( mov.w r0,@r5 ) 351 extu.w r0,r0 352 add #2,r5 353 cmp/eq r1,r6 354 bt/s 6f 355 clrt 356 shll16 r0 357 addc r0,r7 3585: 359SRC( mov.b @r4+,r0 ) 360DST( mov.b r0,@r5 ) 361 extu.b r0,r0 362#ifndef __LITTLE_ENDIAN__ 363 shll8 r0 364#endif 3656: addc r0,r7 366 mov #0,r0 367 addc r0,r7 3687: 3695000: 370 371# Exception handler: 372.section .fixup, "ax" 373 3746001: 375 mov.l @(8,r15),r0 ! src_err_ptr 376 mov #-EFAULT,r1 377 mov.l r1,@r0 378 379 ! zero the complete destination - computing the rest 380 ! is too much work 381 mov.l @(4,r15),r5 ! dst 382 mov.l @r15,r6 ! len 383 mov #0,r7 3841: mov.b r7,@r5 385 dt r6 386 bf/s 1b 387 add #1,r5 388 mov.l 8000f,r0 389 jmp @r0 390 nop 391 .align 2 3928000: .long 5000b 393 3946002: 395 mov.l @(12,r15),r0 ! dst_err_ptr 396 mov #-EFAULT,r1 397 mov.l r1,@r0 398 mov.l 8001f,r0 399 jmp @r0 400 nop 401 .align 2 4028001: .long 5000b 403 404.previous 405 add #8,r15 406 rts 407 mov r7,r0 408