in_cksum.c (54882) | in_cksum.c (58698) |
---|---|
1/*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 18 unchanged lines hidden (view full) --- 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from tahoe: in_cksum.c 1.2 86/01/05 34 * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 | 1/*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 18 unchanged lines hidden (view full) --- 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from tahoe: in_cksum.c 1.2 86/01/05 34 * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 |
35 * $FreeBSD: head/sys/i386/i386/in_cksum.c 54882 1999-12-20 12:11:34Z sheldonh $ | 35 * $FreeBSD: head/sys/i386/i386/in_cksum.c 58698 2000-03-27 19:14:27Z jlemon $ |
36 */ 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/mbuf.h> 41 42#include <netinet/in.h> 43#include <netinet/in_systm.h> --- 8 unchanged lines hidden (view full) --- 52 * code and should be modified for each CPU to be as fast as possible. 53 * 54 * This implementation is 386 version. 55 */ 56 57#undef ADDCARRY 58#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff 59#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} | 36 */ 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/mbuf.h> 41 42#include <netinet/in.h> 43#include <netinet/in_systm.h> --- 8 unchanged lines hidden (view full) --- 52 * code and should be modified for each CPU to be as fast as possible. 53 * 54 * This implementation is 386 version. 55 */ 56 57#undef ADDCARRY 58#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff 59#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} |
60#define INVERT sum == 0xffff ? sum : ~sum & 0xffff |
|
60 61/* 62 * Thanks to gcc we don't have to guess 63 * which registers contain sum & w. 64 */ 65#define ADD(n) __asm __volatile \ 66 ("addl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w)) 67#define ADDC(n) __asm __volatile \ --- 173 unchanged lines hidden (view full) --- 241 standard (the odd byte is shifted left by 8 bits) */ 242 su.c[1] = 0; 243 sum += su.s; 244 } 245 REDUCE; 246 return (~sum & 0xffff); 247} 248 | 61 62/* 63 * Thanks to gcc we don't have to guess 64 * which registers contain sum & w. 65 */ 66#define ADD(n) __asm __volatile \ 67 ("addl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w)) 68#define ADDC(n) __asm __volatile \ --- 173 unchanged lines hidden (view full) --- 242 standard (the odd byte is shifted left by 8 bits) */ 243 su.c[1] = 0; 244 sum += su.s; 245 } 246 REDUCE; 247 return (~sum & 0xffff); 248} 249 |
250u_short 251in_cksum_skip(m, len, skip) 252 struct mbuf *m; 253 int len; 254 int skip; 255{ 256 register u_short *w; 257 register unsigned sum = 0; 258 register int mlen = 0; 259 int byte_swapped = 0; 260 union { char c[2]; u_short s; } su; 261 262 len -= skip; 263 for (; skip && m; m = m->m_next) { 264 if (m->m_len > skip) { 265 mlen = m->m_len - skip; 266 w = (u_short *)(mtod(m, u_char *) + skip); 267 goto skip_start; 268 } else { 269 skip -= m->m_len; 270 } 271 } 272 273 for (;m && len; m = m->m_next) { 274 if (m->m_len == 0) 275 continue; 276 w = mtod(m, u_short *); 277 if (mlen == -1) { 278 /* 279 * The first byte of this mbuf is the continuation 280 * of a word spanning between this mbuf and the 281 * last mbuf. 282 */ 283 284 /* su.c[0] is already saved when scanning previous 285 * mbuf. sum was REDUCEd when we found mlen == -1 286 */ 287 su.c[1] = *(u_char *)w; 288 sum += su.s; 289 w = (u_short *)((char *)w + 1); 290 mlen = m->m_len - 1; 291 len--; 292 } else 293 mlen = m->m_len; 294skip_start: 295 if (len < mlen) 296 mlen = len; 297 len -= mlen; 298 /* 299 * Force to long boundary so we do longword aligned 300 * memory operations 301 */ 302 if (3 & (int) w) { 303 REDUCE; 304 if ((1 & (int) w) && (mlen > 0)) { 305 sum <<= 8; 306 su.c[0] = *(char *)w; 307 w = (u_short *)((char *)w + 1); 308 mlen--; 309 byte_swapped = 1; 310 } 311 if ((2 & (int) w) && (mlen >= 2)) { 312 sum += *w++; 313 mlen -= 2; 314 } 315 } 316 /* 317 * Advance to a 486 cache line boundary. 318 */ 319 if (4 & (int) w && mlen >= 4) { 320 ADD(0); 321 MOP; 322 w += 2; 323 mlen -= 4; 324 } 325 if (8 & (int) w && mlen >= 8) { 326 ADD(0); 327 ADDC(4); 328 MOP; 329 w += 4; 330 mlen -= 8; 331 } 332 /* 333 * Do as much of the checksum as possible 32 bits at at time. 334 * In fact, this loop is unrolled to make overhead from 335 * branches &c small. 336 */ 337 mlen -= 1; 338 while ((mlen -= 32) >= 0) { 339 u_char junk; 340 /* 341 * Add with carry 16 words and fold in the last 342 * carry by adding a 0 with carry. 343 * 344 * The early ADD(16) and the LOAD(32) are to load 345 * the next 2 cache lines in advance on 486's. The 346 * 486 has a penalty of 2 clock cycles for loading 347 * a cache line, plus whatever time the external 348 * memory takes to load the first word(s) addressed. 349 * These penalties are unavoidable. Subsequent 350 * accesses to a cache line being loaded (and to 351 * other external memory?) are delayed until the 352 * whole load finishes. These penalties are mostly 353 * avoided by not accessing external memory for 354 * 8 cycles after the ADD(16) and 12 cycles after 355 * the LOAD(32). The loop terminates when mlen 356 * is initially 33 (not 32) to guaranteed that 357 * the LOAD(32) is within bounds. 358 */ 359 ADD(16); 360 ADDC(0); 361 ADDC(4); 362 ADDC(8); 363 ADDC(12); 364 LOAD(32); 365 ADDC(20); 366 ADDC(24); 367 ADDC(28); 368 MOP; 369 w += 16; 370 } 371 mlen += 32 + 1; 372 if (mlen >= 32) { 373 ADD(16); 374 ADDC(0); 375 ADDC(4); 376 ADDC(8); 377 ADDC(12); 378 ADDC(20); 379 ADDC(24); 380 ADDC(28); 381 MOP; 382 w += 16; 383 mlen -= 32; 384 } 385 if (mlen >= 16) { 386 ADD(0); 387 ADDC(4); 388 ADDC(8); 389 ADDC(12); 390 MOP; 391 w += 8; 392 mlen -= 16; 393 } 394 if (mlen >= 8) { 395 ADD(0); 396 ADDC(4); 397 MOP; 398 w += 4; 399 mlen -= 8; 400 } 401 if (mlen == 0 && byte_swapped == 0) 402 continue; /* worth 1% maybe ?? */ 403 REDUCE; 404 while ((mlen -= 2) >= 0) { 405 sum += *w++; 406 } 407 if (byte_swapped) { 408 sum <<= 8; 409 byte_swapped = 0; 410 if (mlen == -1) { 411 su.c[1] = *(char *)w; 412 sum += su.s; 413 mlen = 0; 414 } else 415 mlen = -1; 416 } else if (mlen == -1) 417 /* 418 * This mbuf has odd number of bytes. 419 * There could be a word split betwen 420 * this mbuf and the next mbuf. 421 * Save the last byte (to prepend to next mbuf). 422 */ 423 su.c[0] = *(char *)w; 424 } 425 426 if (len) 427 printf("cksum: out of data\n"); 428 if (mlen == -1) { 429 /* The last mbuf has odd # of bytes. Follow the 430 standard (the odd byte is shifted left by 8 bits) */ 431 su.c[1] = 0; 432 sum += su.s; 433 } 434 REDUCE; 435 return (INVERT); 436} 437 |
|
249/* 250 * This is the exact same algorithm as above with a few exceptions: 251 * (1) it is designed to operate on buffers, not mbufs 252 * (2) it returns an intermediate form of the sum which has to be 253 * explicitly finalized (but this can be delayed) 254 * (3) it accepts an intermediate sum 255 * 256 * This is particularly useful when building packets quickly, --- 159 unchanged lines hidden --- | 438/* 439 * This is the exact same algorithm as above with a few exceptions: 440 * (1) it is designed to operate on buffers, not mbufs 441 * (2) it returns an intermediate form of the sum which has to be 442 * explicitly finalized (but this can be delayed) 443 * (3) it accepts an intermediate sum 444 * 445 * This is particularly useful when building packets quickly, --- 159 unchanged lines hidden --- |