in_cksum.c revision 58937
1210311Sjmallett/*- 2210311Sjmallett * Copyright (c) 1990 The Regents of the University of California. 3210311Sjmallett * All rights reserved. 4210311Sjmallett * 5210311Sjmallett * Redistribution and use in source and binary forms, with or without 6210311Sjmallett * modification, are permitted provided that the following conditions 7210311Sjmallett * are met: 8210311Sjmallett * 1. Redistributions of source code must retain the above copyright 9210311Sjmallett * notice, this list of conditions and the following disclaimer. 10210311Sjmallett * 2. Redistributions in binary form must reproduce the above copyright 11210311Sjmallett * notice, this list of conditions and the following disclaimer in the 12210311Sjmallett * documentation and/or other materials provided with the distribution. 13210311Sjmallett * 3. All advertising materials mentioning features or use of this software 14210311Sjmallett * must display the following acknowledgement: 15210311Sjmallett * This product includes software developed by the University of 16210311Sjmallett * California, Berkeley and its contributors. 17210311Sjmallett * 4. Neither the name of the University nor the names of its contributors 18210311Sjmallett * may be used to endorse or promote products derived from this software 19210311Sjmallett * without specific prior written permission. 20210311Sjmallett * 21210311Sjmallett * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22210311Sjmallett * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23210311Sjmallett * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24210311Sjmallett * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25210311Sjmallett * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26210311Sjmallett * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27210311Sjmallett * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28210311Sjmallett * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29210311Sjmallett * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30210311Sjmallett * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31210311Sjmallett * SUCH DAMAGE. 32210311Sjmallett * 33210311Sjmallett * from tahoe: in_cksum.c 1.2 86/01/05 34210311Sjmallett * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 35210311Sjmallett * $FreeBSD: head/sys/i386/i386/in_cksum.c 58937 2000-04-02 16:23:16Z shin $ 36210311Sjmallett */ 37210311Sjmallett 38210311Sjmallett#include <sys/param.h> 39210311Sjmallett#include <sys/systm.h> 40210311Sjmallett#include <sys/mbuf.h> 41210311Sjmallett 42210311Sjmallett#include <netinet/in.h> 43213150Sjmallett#include <netinet/in_systm.h> 44210311Sjmallett#include <netinet/ip.h> 45210311Sjmallett 46210311Sjmallett#include <machine/in_cksum.h> 47257324Sglebius 48210311Sjmallett/* 49210311Sjmallett * Checksum routine for Internet Protocol family headers. 50210311Sjmallett * 51210311Sjmallett * This routine is very heavily used in the network 52210311Sjmallett * code and should be modified for each CPU to be as fast as possible. 53210311Sjmallett * 54210311Sjmallett * This implementation is 386 version. 55210311Sjmallett */ 56210311Sjmallett 57210311Sjmallett#undef ADDCARRY 58210311Sjmallett#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff 59210311Sjmallett#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} 60210311Sjmallett#define INVERT sum == 0xffff ? sum : ~sum & 0xffff 61210311Sjmallett 62210311Sjmallett/* 63210311Sjmallett * Thanks to gcc we don't have to guess 64210311Sjmallett * which registers contain sum & w. 65210311Sjmallett */ 66210311Sjmallett#define ADD(n) __asm __volatile \ 67210311Sjmallett ("addl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w)) 68210311Sjmallett#define ADDC(n) __asm __volatile \ 69210311Sjmallett ("adcl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w)) 70210311Sjmallett#define LOAD(n) __asm __volatile \ 71210311Sjmallett ("movb " #n "(%1), %0" : "=r" (junk) : "r" (w)) 72210311Sjmallett#define MOP __asm __volatile \ 73210311Sjmallett ("adcl $0, %0" : "=r" (sum) : "0" (sum)) 74210311Sjmallett 75210311Sjmallettint 76210311Sjmallettin_cksum(m, len) 77210311Sjmallett register struct mbuf *m; 78210311Sjmallett register int len; 79210311Sjmallett{ 80210311Sjmallett register u_short *w; 81210311Sjmallett register unsigned sum = 0; 82210311Sjmallett register int mlen = 0; 83210311Sjmallett int byte_swapped = 0; 84210311Sjmallett union { char c[2]; u_short s; } su; 85210311Sjmallett 86210311Sjmallett for (;m && len; m = m->m_next) { 87210311Sjmallett if (m->m_len == 0) 88210311Sjmallett continue; 89213150Sjmallett w = mtod(m, u_short *); 90213150Sjmallett if (mlen == -1) { 91213150Sjmallett /* 92213150Sjmallett * The first byte of this mbuf is the continuation 93210311Sjmallett * of a word spanning between this mbuf and the 94219694Sjmallett * last mbuf. 95219694Sjmallett */ 96219694Sjmallett 97219694Sjmallett /* su.c[0] is already saved when scanning previous 98219694Sjmallett * mbuf. sum was REDUCEd when we found mlen == -1 99210311Sjmallett */ 100213150Sjmallett su.c[1] = *(u_char *)w; 101213150Sjmallett sum += su.s; 102213150Sjmallett w = (u_short *)((char *)w + 1); 103213150Sjmallett mlen = m->m_len - 1; 104213150Sjmallett len--; 105213150Sjmallett } else 106213150Sjmallett mlen = m->m_len; 107213150Sjmallett if (len < mlen) 108213150Sjmallett mlen = len; 109213150Sjmallett len -= mlen; 110213150Sjmallett /* 111213150Sjmallett * Force to long boundary so we do longword aligned 112215959Sjmallett * memory operations 113215959Sjmallett */ 114215959Sjmallett if (3 & (int) w) { 115215959Sjmallett REDUCE; 116213150Sjmallett if ((1 & (int) w) && (mlen > 0)) { 117213150Sjmallett sum <<= 8; 118213150Sjmallett su.c[0] = *(char *)w; 119213150Sjmallett w = (u_short *)((char *)w + 1); 120213150Sjmallett mlen--; 121213150Sjmallett byte_swapped = 1; 122213150Sjmallett } 123213150Sjmallett if ((2 & (int) w) && (mlen >= 2)) { 124210311Sjmallett sum += *w++; 125210311Sjmallett mlen -= 2; 126210311Sjmallett } 127210311Sjmallett } 128210311Sjmallett /* 129210311Sjmallett * Advance to a 486 cache line boundary. 130210311Sjmallett */ 131213807Sjmallett if (4 & (int) w && mlen >= 4) { 132210311Sjmallett ADD(0); 133210311Sjmallett MOP; 134210311Sjmallett w += 2; 135210311Sjmallett mlen -= 4; 136210311Sjmallett } 137213150Sjmallett if (8 & (int) w && mlen >= 8) { 138216071Sjmallett ADD(0); 139216071Sjmallett ADDC(4); 140216071Sjmallett MOP; 141216071Sjmallett w += 4; 142210311Sjmallett mlen -= 8; 143210311Sjmallett } 144210311Sjmallett /* 145210311Sjmallett * Do as much of the checksum as possible 32 bits at at time. 146210311Sjmallett * In fact, this loop is unrolled to make overhead from 147210311Sjmallett * branches &c small. 148210311Sjmallett */ 149210311Sjmallett mlen -= 1; 150210311Sjmallett while ((mlen -= 32) >= 0) { 151210311Sjmallett u_char junk; 152210311Sjmallett /* 153210311Sjmallett * Add with carry 16 words and fold in the last 154210311Sjmallett * carry by adding a 0 with carry. 155210311Sjmallett * 156210311Sjmallett * The early ADD(16) and the LOAD(32) are to load 157210311Sjmallett * the next 2 cache lines in advance on 486's. The 158210311Sjmallett * 486 has a penalty of 2 clock cycles for loading 159210311Sjmallett * a cache line, plus whatever time the external 160210311Sjmallett * memory takes to load the first word(s) addressed. 161210311Sjmallett * These penalties are unavoidable. Subsequent 162210311Sjmallett * accesses to a cache line being loaded (and to 163210311Sjmallett * other external memory?) are delayed until the 164210311Sjmallett * whole load finishes. These penalties are mostly 165210311Sjmallett * avoided by not accessing external memory for 166210311Sjmallett * 8 cycles after the ADD(16) and 12 cycles after 167210311Sjmallett * the LOAD(32). The loop terminates when mlen 168210311Sjmallett * is initially 33 (not 32) to guaranteed that 169210311Sjmallett * the LOAD(32) is within bounds. 170213807Sjmallett */ 171213807Sjmallett ADD(16); 172213807Sjmallett ADDC(0); 173213807Sjmallett ADDC(4); 174213807Sjmallett ADDC(8); 175213807Sjmallett ADDC(12); 176213807Sjmallett LOAD(32); 177213807Sjmallett ADDC(20); 178213807Sjmallett ADDC(24); 179213807Sjmallett ADDC(28); 180213807Sjmallett MOP; 181213807Sjmallett w += 16; 182213807Sjmallett } 183210311Sjmallett mlen += 32 + 1; 184210311Sjmallett if (mlen >= 32) { 185210311Sjmallett ADD(16); 186210311Sjmallett ADDC(0); 187210311Sjmallett ADDC(4); 188210311Sjmallett ADDC(8); 189210311Sjmallett ADDC(12); 190210311Sjmallett ADDC(20); 191210311Sjmallett ADDC(24); 192219694Sjmallett ADDC(28); 193210311Sjmallett MOP; 194210311Sjmallett w += 16; 195210311Sjmallett mlen -= 32; 196210311Sjmallett } 197210311Sjmallett if (mlen >= 16) { 198210311Sjmallett ADD(0); 199210311Sjmallett ADDC(4); 200219694Sjmallett ADDC(8); 201219694Sjmallett ADDC(12); 202219694Sjmallett MOP; 203219694Sjmallett w += 8; 204219694Sjmallett mlen -= 16; 205219694Sjmallett } 206219694Sjmallett if (mlen >= 8) { 207219694Sjmallett ADD(0); 208219694Sjmallett ADDC(4); 209219694Sjmallett MOP; 210219694Sjmallett w += 4; 211219694Sjmallett mlen -= 8; 212219694Sjmallett } 213219694Sjmallett if (mlen == 0 && byte_swapped == 0) 214219694Sjmallett continue; /* worth 1% maybe ?? */ 215219694Sjmallett REDUCE; 216219694Sjmallett while ((mlen -= 2) >= 0) { 217219694Sjmallett sum += *w++; 218210311Sjmallett } 219219694Sjmallett if (byte_swapped) { 220219694Sjmallett sum <<= 8; 221219694Sjmallett byte_swapped = 0; 222219694Sjmallett if (mlen == -1) { 223219694Sjmallett su.c[1] = *(char *)w; 224219694Sjmallett sum += su.s; 225210311Sjmallett mlen = 0; 226219694Sjmallett } else 227219694Sjmallett mlen = -1; 228210311Sjmallett } else if (mlen == -1) 229210311Sjmallett /* 230242346Sjmallett * This mbuf has odd number of bytes. 231210311Sjmallett * There could be a word split betwen 232210311Sjmallett * this mbuf and the next mbuf. 233210311Sjmallett * Save the last byte (to prepend to next mbuf). 234210311Sjmallett */ 235210311Sjmallett su.c[0] = *(char *)w; 236232812Sjmallett } 237232812Sjmallett 238210311Sjmallett if (len) 239210311Sjmallett printf("%s: out of data by %d\n", __func__, len); 240210311Sjmallett if (mlen == -1) { 241210311Sjmallett /* The last mbuf has odd # of bytes. Follow the 242210311Sjmallett standard (the odd byte is shifted left by 8 bits) */ 243210311Sjmallett su.c[1] = 0; 244210311Sjmallett sum += su.s; 245210311Sjmallett } 246219695Sjmallett REDUCE; 247210311Sjmallett return (~sum & 0xffff); 248210311Sjmallett} 249210311Sjmallett 250210311Sjmallettu_short 251210311Sjmallettin_cksum_skip(m, len, skip) 252210311Sjmallett struct mbuf *m; 253210311Sjmallett int len; 254217664Sjmallett int skip; 255217664Sjmallett{ 256217664Sjmallett register u_short *w; 257217210Sjmallett register unsigned sum = 0; 258217664Sjmallett register int mlen = 0; 259217664Sjmallett int byte_swapped = 0; 260217664Sjmallett union { char c[2]; u_short s; } su; 261217210Sjmallett 262217664Sjmallett len -= skip; 263217664Sjmallett for (; skip && m; m = m->m_next) { 264217664Sjmallett if (m->m_len > skip) { 265210311Sjmallett mlen = m->m_len - skip; 266210311Sjmallett w = (u_short *)(mtod(m, u_char *) + skip); 267210311Sjmallett goto skip_start; 268210311Sjmallett } else { 269210311Sjmallett skip -= m->m_len; 270210311Sjmallett } 271210311Sjmallett } 272210311Sjmallett 273210311Sjmallett for (;m && len; m = m->m_next) { 274210311Sjmallett if (m->m_len == 0) 275210311Sjmallett continue; 276210311Sjmallett w = mtod(m, u_short *); 277210311Sjmallett if (mlen == -1) { 278210311Sjmallett /* 279210311Sjmallett * The first byte of this mbuf is the continuation 280210311Sjmallett * of a word spanning between this mbuf and the 281210311Sjmallett * last mbuf. 282210311Sjmallett */ 283210311Sjmallett 284210311Sjmallett /* su.c[0] is already saved when scanning previous 285210311Sjmallett * mbuf. sum was REDUCEd when we found mlen == -1 286210311Sjmallett */ 287210311Sjmallett su.c[1] = *(u_char *)w; 288210311Sjmallett sum += su.s; 289210311Sjmallett w = (u_short *)((char *)w + 1); 290210311Sjmallett mlen = m->m_len - 1; 291210311Sjmallett len--; 292210311Sjmallett } else 293210311Sjmallett mlen = m->m_len; 294210311Sjmallettskip_start: 295210311Sjmallett if (len < mlen) 296210311Sjmallett mlen = len; 297210311Sjmallett len -= mlen; 298210311Sjmallett /* 299210311Sjmallett * Force to long boundary so we do longword aligned 300210311Sjmallett * memory operations 301210311Sjmallett */ 302210311Sjmallett if (3 & (int) w) { 303210311Sjmallett REDUCE; 304210311Sjmallett if ((1 & (int) w) && (mlen > 0)) { 305210311Sjmallett sum <<= 8; 306210311Sjmallett su.c[0] = *(char *)w; 307210311Sjmallett w = (u_short *)((char *)w + 1); 308210311Sjmallett mlen--; 309210311Sjmallett byte_swapped = 1; 310210311Sjmallett } 311210311Sjmallett if ((2 & (int) w) && (mlen >= 2)) { 312210311Sjmallett sum += *w++; 313210311Sjmallett mlen -= 2; 314210311Sjmallett } 315210311Sjmallett } 316210311Sjmallett /* 317210311Sjmallett * Advance to a 486 cache line boundary. 318210311Sjmallett */ 319210311Sjmallett if (4 & (int) w && mlen >= 4) { 320210311Sjmallett ADD(0); 321210311Sjmallett MOP; 322210311Sjmallett w += 2; 323219694Sjmallett mlen -= 4; 324210311Sjmallett } 325219694Sjmallett if (8 & (int) w && mlen >= 8) { 326219694Sjmallett ADD(0); 327219694Sjmallett ADDC(4); 328210311Sjmallett MOP; 329219694Sjmallett w += 4; 330210311Sjmallett mlen -= 8; 331210311Sjmallett } 332210311Sjmallett /* 333210311Sjmallett * Do as much of the checksum as possible 32 bits at at time. 334210311Sjmallett * In fact, this loop is unrolled to make overhead from 335210311Sjmallett * branches &c small. 336210311Sjmallett */ 337213150Sjmallett mlen -= 1; 338213150Sjmallett while ((mlen -= 32) >= 0) { 339213150Sjmallett u_char junk; 340213150Sjmallett /* 341213150Sjmallett * Add with carry 16 words and fold in the last 342210311Sjmallett * carry by adding a 0 with carry. 343210311Sjmallett * 344210311Sjmallett * The early ADD(16) and the LOAD(32) are to load 345210311Sjmallett * the next 2 cache lines in advance on 486's. The 346210311Sjmallett * 486 has a penalty of 2 clock cycles for loading 347210311Sjmallett * a cache line, plus whatever time the external 348210311Sjmallett * memory takes to load the first word(s) addressed. 349210311Sjmallett * These penalties are unavoidable. Subsequent 350210311Sjmallett * accesses to a cache line being loaded (and to 351210311Sjmallett * other external memory?) are delayed until the 352231987Sgonzo * whole load finishes. These penalties are mostly 353231987Sgonzo * avoided by not accessing external memory for 354231987Sgonzo * 8 cycles after the ADD(16) and 12 cycles after 355210311Sjmallett * the LOAD(32). The loop terminates when mlen 356210311Sjmallett * is initially 33 (not 32) to guaranteed that 357210311Sjmallett * the LOAD(32) is within bounds. 358231987Sgonzo */ 359210311Sjmallett ADD(16); 360210311Sjmallett ADDC(0); 361210311Sjmallett ADDC(4); 362231987Sgonzo ADDC(8); 363210311Sjmallett ADDC(12); 364210311Sjmallett LOAD(32); 365210311Sjmallett ADDC(20); 366210311Sjmallett ADDC(24); 367210311Sjmallett ADDC(28); 368210311Sjmallett MOP; 369210311Sjmallett w += 16; 370210311Sjmallett } 371210311Sjmallett mlen += 32 + 1; 372210311Sjmallett if (mlen >= 32) { 373210311Sjmallett ADD(16); 374210311Sjmallett ADDC(0); 375210311Sjmallett ADDC(4); 376210311Sjmallett ADDC(8); 377213150Sjmallett ADDC(12); 378210311Sjmallett ADDC(20); 379210311Sjmallett ADDC(24); 380210311Sjmallett ADDC(28); 381210311Sjmallett MOP; 382210311Sjmallett w += 16; 383210311Sjmallett mlen -= 32; 384210311Sjmallett } 385210311Sjmallett if (mlen >= 16) { 386210311Sjmallett ADD(0); 387210311Sjmallett ADDC(4); 388210311Sjmallett ADDC(8); 389210311Sjmallett ADDC(12); 390210311Sjmallett MOP; 391210311Sjmallett w += 8; 392210311Sjmallett mlen -= 16; 393210311Sjmallett } 394210311Sjmallett if (mlen >= 8) { 395215974Sjmallett ADD(0); 396210311Sjmallett ADDC(4); 397210311Sjmallett MOP; 398210311Sjmallett w += 4; 399210311Sjmallett mlen -= 8; 400210311Sjmallett } 401210311Sjmallett if (mlen == 0 && byte_swapped == 0) 402210311Sjmallett continue; /* worth 1% maybe ?? */ 403210311Sjmallett REDUCE; 404210311Sjmallett while ((mlen -= 2) >= 0) { 405210311Sjmallett sum += *w++; 406210311Sjmallett } 407215974Sjmallett if (byte_swapped) { 408210311Sjmallett sum <<= 8; 409210311Sjmallett byte_swapped = 0; 410210311Sjmallett if (mlen == -1) { 411210311Sjmallett su.c[1] = *(char *)w; 412210311Sjmallett sum += su.s; 413210311Sjmallett mlen = 0; 414210311Sjmallett } else 415210311Sjmallett mlen = -1; 416210311Sjmallett } else if (mlen == -1) 417210311Sjmallett /* 418210311Sjmallett * This mbuf has odd number of bytes. 419210311Sjmallett * There could be a word split betwen 420210311Sjmallett * this mbuf and the next mbuf. 421210311Sjmallett * Save the last byte (to prepend to next mbuf). 422210311Sjmallett */ 423210311Sjmallett su.c[0] = *(char *)w; 424210311Sjmallett } 425210311Sjmallett 426210311Sjmallett if (len) 427210311Sjmallett printf("%s: out of data by %d\n", __func__, len); 428210311Sjmallett if (mlen == -1) { 429210311Sjmallett /* The last mbuf has odd # of bytes. Follow the 430210311Sjmallett standard (the odd byte is shifted left by 8 bits) */ 431210311Sjmallett su.c[1] = 0; 432210311Sjmallett sum += su.s; 433231987Sgonzo } 434231987Sgonzo REDUCE; 435231987Sgonzo return (INVERT); 436231987Sgonzo} 437231987Sgonzo 438231987Sgonzo/* 439231987Sgonzo * This is the exact same algorithm as above with a few exceptions: 440210311Sjmallett * (1) it is designed to operate on buffers, not mbufs 441210311Sjmallett * (2) it returns an intermediate form of the sum which has to be 442210311Sjmallett * explicitly finalized (but this can be delayed) 443210311Sjmallett * (3) it accepts an intermediate sum 444210311Sjmallett * 445210311Sjmallett * This is particularly useful when building packets quickly, 446210311Sjmallett * since one can compute the checksum of the pseudoheader ahead of 447210311Sjmallett * time and then use this function to complete the work. That way, 448210311Sjmallett * the pseudoheader never actually has to exist in the packet buffer, 449226024Smarcel * which avoids needless duplication of work. 450210311Sjmallett */ 451210311Sjmallettin_psum_t 452210311Sjmallettin_cksum_partial(psum, w, len) 453210311Sjmallett in_psum_t psum; 454210311Sjmallett const u_short *w; 455210311Sjmallett int len; 456210311Sjmallett{ 457210311Sjmallett register in_psum_t sum = psum; 458283291Sjkim int byte_swapped = 0; 459210311Sjmallett union { char c[2]; u_short s; } su; 460210311Sjmallett 461210311Sjmallett /* 462210311Sjmallett * Force to long boundary so we do longword aligned 463210311Sjmallett * memory operations 464210311Sjmallett */ 465210311Sjmallett if (3 & (int) w) { 466210311Sjmallett REDUCE; 467210311Sjmallett if ((1 & (int) w) && (len > 0)) { 468210311Sjmallett sum <<= 8; 469210311Sjmallett su.c[0] = *(const char *)w; 470219695Sjmallett w = (const u_short *)((const char *)w + 1); 471210311Sjmallett len--; 472210311Sjmallett byte_swapped = 1; 473219695Sjmallett } 474210311Sjmallett if ((2 & (int) w) && (len >= 2)) { 475210311Sjmallett sum += *w++; 476210311Sjmallett len -= 2; 477210311Sjmallett } 478210311Sjmallett } 479219695Sjmallett /* 480210311Sjmallett * Advance to a 486 cache line boundary. 481210311Sjmallett */ 482210311Sjmallett if (4 & (int) w && len >= 4) { 483210311Sjmallett ADD(0); 484215990Sjmallett MOP; 485215990Sjmallett w += 2; 486210311Sjmallett len -= 4; 487210311Sjmallett } 488210311Sjmallett if (8 & (int) w && len >= 8) { 489210311Sjmallett ADD(0); 490210311Sjmallett ADDC(4); 491210311Sjmallett MOP; 492210311Sjmallett w += 4; 493210311Sjmallett len -= 8; 494210311Sjmallett } 495210311Sjmallett /* 496210311Sjmallett * Do as much of the checksum as possible 32 bits at at time. 497210311Sjmallett * In fact, this loop is unrolled to make overhead from 498210311Sjmallett * branches &c small. 499219694Sjmallett */ 500219694Sjmallett len -= 1; 501219694Sjmallett while ((len -= 32) >= 0) { 502219694Sjmallett u_char junk; 503219694Sjmallett /* 504219694Sjmallett * Add with carry 16 words and fold in the last 505219694Sjmallett * carry by adding a 0 with carry. 506219694Sjmallett * 507219694Sjmallett * The early ADD(16) and the LOAD(32) are to load 508210311Sjmallett * the next 2 cache lines in advance on 486's. The 509 * 486 has a penalty of 2 clock cycles for loading 510 * a cache line, plus whatever time the external 511 * memory takes to load the first word(s) addressed. 512 * These penalties are unavoidable. Subsequent 513 * accesses to a cache line being loaded (and to 514 * other external memory?) are delayed until the 515 * whole load finishes. These penalties are mostly 516 * avoided by not accessing external memory for 517 * 8 cycles after the ADD(16) and 12 cycles after 518 * the LOAD(32). The loop terminates when len 519 * is initially 33 (not 32) to guaranteed that 520 * the LOAD(32) is within bounds. 521 */ 522 ADD(16); 523 ADDC(0); 524 ADDC(4); 525 ADDC(8); 526 ADDC(12); 527 LOAD(32); 528 ADDC(20); 529 ADDC(24); 530 ADDC(28); 531 MOP; 532 w += 16; 533 } 534 len += 32 + 1; 535 if (len >= 32) { 536 ADD(16); 537 ADDC(0); 538 ADDC(4); 539 ADDC(8); 540 ADDC(12); 541 ADDC(20); 542 ADDC(24); 543 ADDC(28); 544 MOP; 545 w += 16; 546 len -= 32; 547 } 548 if (len >= 16) { 549 ADD(0); 550 ADDC(4); 551 ADDC(8); 552 ADDC(12); 553 MOP; 554 w += 8; 555 len -= 16; 556 } 557 if (len >= 8) { 558 ADD(0); 559 ADDC(4); 560 MOP; 561 w += 4; 562 len -= 8; 563 } 564 if (len == 0 && byte_swapped == 0) 565 goto out; 566 REDUCE; 567 while ((len -= 2) >= 0) { 568 sum += *w++; 569 } 570 if (byte_swapped) { 571 sum <<= 8; 572 byte_swapped = 0; 573 if (len == -1) { 574 su.c[1] = *(const char *)w; 575 sum += su.s; 576 len = 0; 577 } else 578 len = -1; 579 } else if (len == -1) { 580 /* 581 * This buffer has odd number of bytes. 582 * There could be a word split betwen 583 * this buffer and the next. 584 */ 585 su.c[0] = *(const char *)w; 586 } 587out: 588 if (len == -1) { 589 /* The last buffer has odd # of bytes. Follow the 590 standard (the odd byte is shifted left by 8 bits) */ 591 su.c[1] = 0; 592 sum += su.s; 593 } 594 return sum; 595} 596 597int 598in_cksum_finalize(psum) 599 in_psum_t psum; 600{ 601 in_psum_t sum = psum; 602 REDUCE; 603 return (~sum & 0xffff); 604} 605