in_cksum.c revision 29041
1210311Sjmallett/*- 2210311Sjmallett * Copyright (c) 1990 The Regents of the University of California. 3210311Sjmallett * All rights reserved. 4210311Sjmallett * 5210311Sjmallett * Redistribution and use in source and binary forms, with or without 6210311Sjmallett * modification, are permitted provided that the following conditions 7210311Sjmallett * are met: 8210311Sjmallett * 1. Redistributions of source code must retain the above copyright 9210311Sjmallett * notice, this list of conditions and the following disclaimer. 10210311Sjmallett * 2. Redistributions in binary form must reproduce the above copyright 11210311Sjmallett * notice, this list of conditions and the following disclaimer in the 12210311Sjmallett * documentation and/or other materials provided with the distribution. 13210311Sjmallett * 3. All advertising materials mentioning features or use of this software 14210311Sjmallett * must display the following acknowledgement: 15210311Sjmallett * This product includes software developed by the University of 16210311Sjmallett * California, Berkeley and its contributors. 17210311Sjmallett * 4. Neither the name of the University nor the names of its contributors 18210311Sjmallett * may be used to endorse or promote products derived from this software 19210311Sjmallett * without specific prior written permission. 20210311Sjmallett * 21210311Sjmallett * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22210311Sjmallett * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23210311Sjmallett * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24210311Sjmallett * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25210311Sjmallett * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26210311Sjmallett * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27210311Sjmallett * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28210311Sjmallett * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29210311Sjmallett * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30210311Sjmallett * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31210311Sjmallett * SUCH DAMAGE. 32210311Sjmallett * 33210311Sjmallett * from tahoe: in_cksum.c 1.2 86/01/05 34210311Sjmallett * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 35210311Sjmallett * $Id: in_cksum.c,v 1.11 1997/08/16 19:14:52 wollman Exp $ 36210311Sjmallett */ 37210311Sjmallett 38210311Sjmallett#include <sys/param.h> 39210311Sjmallett#include <sys/systm.h> 40210311Sjmallett#include <sys/mbuf.h> 41210311Sjmallett 42210311Sjmallett#include <netinet/in.h> 43213150Sjmallett#include <netinet/in_systm.h> 44210311Sjmallett#include <netinet/ip.h> 45210311Sjmallett 46210311Sjmallett#include <machine/in_cksum.h> 47257324Sglebius 48210311Sjmallett/* 49210311Sjmallett * Checksum routine for Internet Protocol family headers. 50210311Sjmallett * 51210311Sjmallett * This routine is very heavily used in the network 52210311Sjmallett * code and should be modified for each CPU to be as fast as possible. 53210311Sjmallett * 54210311Sjmallett * This implementation is 386 version. 55210311Sjmallett */ 56210311Sjmallett 57210311Sjmallett#undef ADDCARRY 58210311Sjmallett#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff 59210311Sjmallett#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} 60210311Sjmallett 61210311Sjmallett/* 62210311Sjmallett * Thanks to gcc we don't have to guess 63210311Sjmallett * which registers contain sum & w. 64210311Sjmallett */ 65210311Sjmallett#define ADD(n) asm("addl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w)) 66210311Sjmallett#define ADDC(n) asm("adcl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w)) 67210311Sjmallett#define LOAD(n) asm volatile("movb " #n "(%1), %0" : "=r" (junk) : "r" (w)) 68210311Sjmallett#define MOP asm("adcl $0, %0" : "=r" (sum) : "0" (sum)) 69210311Sjmallett 70210311Sjmallettint 71210311Sjmallettin_cksum(m, len) 72210311Sjmallett register struct mbuf *m; 73210311Sjmallett register int len; 74210311Sjmallett{ 75210311Sjmallett register u_short *w; 76210311Sjmallett register unsigned sum = 0; 77210311Sjmallett register int mlen = 0; 78210311Sjmallett int byte_swapped = 0; 79210311Sjmallett union { char c[2]; u_short s; } su; 80210311Sjmallett 81210311Sjmallett for (;m && len; m = m->m_next) { 82210311Sjmallett if (m->m_len == 0) 83210311Sjmallett continue; 84210311Sjmallett w = mtod(m, u_short *); 85210311Sjmallett if (mlen == -1) { 86210311Sjmallett /* 87210311Sjmallett * The first byte of this mbuf is the continuation 88210311Sjmallett * of a word spanning between this mbuf and the 89213150Sjmallett * last mbuf. 90213150Sjmallett */ 91213150Sjmallett 92213150Sjmallett /* su.c[0] is already saved when scanning previous 93210311Sjmallett * mbuf. sum was REDUCEd when we found mlen == -1 94219694Sjmallett */ 95219694Sjmallett su.c[1] = *(u_char *)w; 96219694Sjmallett sum += su.s; 97219694Sjmallett w = (u_short *)((char *)w + 1); 98219694Sjmallett mlen = m->m_len - 1; 99210311Sjmallett len--; 100213150Sjmallett } else 101213150Sjmallett mlen = m->m_len; 102213150Sjmallett if (len < mlen) 103213150Sjmallett mlen = len; 104213150Sjmallett len -= mlen; 105213150Sjmallett /* 106213150Sjmallett * Force to long boundary so we do longword aligned 107213150Sjmallett * memory operations 108213150Sjmallett */ 109213150Sjmallett if (3 & (int) w) { 110213150Sjmallett REDUCE; 111213150Sjmallett if ((1 & (int) w) && (mlen > 0)) { 112215959Sjmallett sum <<= 8; 113215959Sjmallett su.c[0] = *(char *)w; 114215959Sjmallett w = (u_short *)((char *)w + 1); 115215959Sjmallett mlen--; 116213150Sjmallett byte_swapped = 1; 117213150Sjmallett } 118213150Sjmallett if ((2 & (int) w) && (mlen >= 2)) { 119213150Sjmallett sum += *w++; 120213150Sjmallett mlen -= 2; 121213150Sjmallett } 122213150Sjmallett } 123213150Sjmallett /* 124210311Sjmallett * Advance to a 486 cache line boundary. 125210311Sjmallett */ 126210311Sjmallett if (4 & (int) w && mlen >= 4) { 127210311Sjmallett ADD(0); 128210311Sjmallett MOP; 129210311Sjmallett w += 2; 130210311Sjmallett mlen -= 4; 131213807Sjmallett } 132210311Sjmallett if (8 & (int) w && mlen >= 8) { 133210311Sjmallett ADD(0); 134210311Sjmallett ADDC(4); 135210311Sjmallett MOP; 136210311Sjmallett w += 4; 137213150Sjmallett mlen -= 8; 138216071Sjmallett } 139216071Sjmallett /* 140216071Sjmallett * Do as much of the checksum as possible 32 bits at at time. 141216071Sjmallett * In fact, this loop is unrolled to make overhead from 142210311Sjmallett * branches &c small. 143210311Sjmallett */ 144210311Sjmallett mlen -= 1; 145210311Sjmallett while ((mlen -= 32) >= 0) { 146210311Sjmallett u_char junk; 147210311Sjmallett /* 148210311Sjmallett * Add with carry 16 words and fold in the last 149210311Sjmallett * carry by adding a 0 with carry. 150210311Sjmallett * 151210311Sjmallett * The early ADD(16) and the LOAD(32) are to load 152210311Sjmallett * the next 2 cache lines in advance on 486's. The 153210311Sjmallett * 486 has a penalty of 2 clock cycles for loading 154210311Sjmallett * a cache line, plus whatever time the external 155210311Sjmallett * memory takes to load the first word(s) addressed. 156210311Sjmallett * These penalties are unavoidable. Subsequent 157210311Sjmallett * accesses to a cache line being loaded (and to 158210311Sjmallett * other external memory?) are delayed until the 159210311Sjmallett * whole load finishes. These penalties are mostly 160210311Sjmallett * avoided by not accessing external memory for 161210311Sjmallett * 8 cycles after the ADD(16) and 12 cycles after 162210311Sjmallett * the LOAD(32). The loop terminates when mlen 163210311Sjmallett * is initially 33 (not 32) to guaranteed that 164210311Sjmallett * the LOAD(32) is within bounds. 165210311Sjmallett */ 166210311Sjmallett ADD(16); 167210311Sjmallett ADDC(0); 168210311Sjmallett ADDC(4); 169210311Sjmallett ADDC(8); 170213807Sjmallett ADDC(12); 171213807Sjmallett LOAD(32); 172213807Sjmallett ADDC(20); 173213807Sjmallett ADDC(24); 174213807Sjmallett ADDC(28); 175213807Sjmallett MOP; 176213807Sjmallett w += 16; 177213807Sjmallett } 178213807Sjmallett mlen += 32 + 1; 179213807Sjmallett if (mlen >= 32) { 180213807Sjmallett ADD(16); 181213807Sjmallett ADDC(0); 182213807Sjmallett ADDC(4); 183210311Sjmallett ADDC(8); 184210311Sjmallett ADDC(12); 185210311Sjmallett ADDC(20); 186210311Sjmallett ADDC(24); 187210311Sjmallett ADDC(28); 188210311Sjmallett MOP; 189210311Sjmallett w += 16; 190210311Sjmallett mlen -= 32; 191210311Sjmallett } 192219694Sjmallett if (mlen >= 16) { 193210311Sjmallett ADD(0); 194210311Sjmallett ADDC(4); 195210311Sjmallett ADDC(8); 196210311Sjmallett ADDC(12); 197210311Sjmallett MOP; 198210311Sjmallett w += 8; 199210311Sjmallett mlen -= 16; 200219694Sjmallett } 201219694Sjmallett if (mlen >= 8) { 202219694Sjmallett ADD(0); 203219694Sjmallett ADDC(4); 204219694Sjmallett MOP; 205219694Sjmallett w += 4; 206219694Sjmallett mlen -= 8; 207219694Sjmallett } 208219694Sjmallett if (mlen == 0 && byte_swapped == 0) 209219694Sjmallett continue; /* worth 1% maybe ?? */ 210219694Sjmallett REDUCE; 211219694Sjmallett while ((mlen -= 2) >= 0) { 212219694Sjmallett sum += *w++; 213219694Sjmallett } 214219694Sjmallett if (byte_swapped) { 215219694Sjmallett sum <<= 8; 216219694Sjmallett byte_swapped = 0; 217219694Sjmallett if (mlen == -1) { 218210311Sjmallett su.c[1] = *(char *)w; 219219694Sjmallett sum += su.s; 220219694Sjmallett mlen = 0; 221219694Sjmallett } else 222219694Sjmallett mlen = -1; 223219694Sjmallett } else if (mlen == -1) 224219694Sjmallett /* 225210311Sjmallett * This mbuf has odd number of bytes. 226219694Sjmallett * There could be a word split betwen 227219694Sjmallett * this mbuf and the next mbuf. 228210311Sjmallett * Save the last byte (to prepend to next mbuf). 229210311Sjmallett */ 230242346Sjmallett su.c[0] = *(char *)w; 231210311Sjmallett } 232210311Sjmallett 233210311Sjmallett if (len) 234210311Sjmallett printf("cksum: out of data\n"); 235210311Sjmallett if (mlen == -1) { 236232812Sjmallett /* The last mbuf has odd # of bytes. Follow the 237232812Sjmallett standard (the odd byte is shifted left by 8 bits) */ 238210311Sjmallett su.c[1] = 0; 239210311Sjmallett sum += su.s; 240210311Sjmallett } 241210311Sjmallett REDUCE; 242210311Sjmallett return (~sum & 0xffff); 243210311Sjmallett} 244210311Sjmallett 245210311Sjmallett/* 246219695Sjmallett * This is the exact same algorithm as above with a few exceptions: 247210311Sjmallett * (1) it is designed to operate on buffers, not mbufs 248210311Sjmallett * (2) it returns an intermediate form of the sum which has to be 249210311Sjmallett * explicitly finalized (but this can be delayed) 250210311Sjmallett * (3) it accepts an intermediate sum 251210311Sjmallett * 252210311Sjmallett * This is particularly useful when building packets quickly, 253210311Sjmallett * since one can compute the checksum of the pseudoheader ahead of 254217664Sjmallett * time and then use this function to complete the work. That way, 255217664Sjmallett * the pseudoheader never actually has to exist in the packet buffer, 256217664Sjmallett * which avoids needless duplication of work. 257217210Sjmallett */ 258217664Sjmallettin_psum_t 259217664Sjmallettin_cksum_partial(psum, w, len) 260217664Sjmallett in_psum_t psum; 261217210Sjmallett const u_short *w; 262217664Sjmallett int len; 263217664Sjmallett{ 264217664Sjmallett register in_psum_t sum = psum; 265210311Sjmallett int byte_swapped = 0; 266210311Sjmallett union { char c[2]; u_short s; } su; 267210311Sjmallett 268210311Sjmallett /* 269210311Sjmallett * Force to long boundary so we do longword aligned 270210311Sjmallett * memory operations 271210311Sjmallett */ 272210311Sjmallett if (3 & (int) w) { 273210311Sjmallett REDUCE; 274210311Sjmallett if ((1 & (int) w) && (len > 0)) { 275210311Sjmallett sum <<= 8; 276210311Sjmallett su.c[0] = *(char *)w; 277210311Sjmallett w = (u_short *)((char *)w + 1); 278210311Sjmallett len--; 279210311Sjmallett byte_swapped = 1; 280210311Sjmallett } 281210311Sjmallett if ((2 & (int) w) && (len >= 2)) { 282210311Sjmallett sum += *w++; 283210311Sjmallett len -= 2; 284210311Sjmallett } 285210311Sjmallett } 286210311Sjmallett /* 287210311Sjmallett * Advance to a 486 cache line boundary. 288210311Sjmallett */ 289210311Sjmallett if (4 & (int) w && len >= 4) { 290210311Sjmallett ADD(0); 291210311Sjmallett MOP; 292210311Sjmallett w += 2; 293210311Sjmallett len -= 4; 294210311Sjmallett } 295210311Sjmallett if (8 & (int) w && len >= 8) { 296210311Sjmallett ADD(0); 297210311Sjmallett ADDC(4); 298210311Sjmallett MOP; 299210311Sjmallett w += 4; 300210311Sjmallett len -= 8; 301210311Sjmallett } 302210311Sjmallett /* 303210311Sjmallett * Do as much of the checksum as possible 32 bits at at time. 304210311Sjmallett * In fact, this loop is unrolled to make overhead from 305210311Sjmallett * branches &c small. 306210311Sjmallett */ 307210311Sjmallett len -= 1; 308210311Sjmallett while ((len -= 32) >= 0) { 309210311Sjmallett u_char junk; 310210311Sjmallett /* 311210311Sjmallett * Add with carry 16 words and fold in the last 312210311Sjmallett * carry by adding a 0 with carry. 313210311Sjmallett * 314210311Sjmallett * The early ADD(16) and the LOAD(32) are to load 315210311Sjmallett * the next 2 cache lines in advance on 486's. The 316210311Sjmallett * 486 has a penalty of 2 clock cycles for loading 317210311Sjmallett * a cache line, plus whatever time the external 318210311Sjmallett * memory takes to load the first word(s) addressed. 319210311Sjmallett * These penalties are unavoidable. Subsequent 320210311Sjmallett * accesses to a cache line being loaded (and to 321210311Sjmallett * other external memory?) are delayed until the 322210311Sjmallett * whole load finishes. These penalties are mostly 323219694Sjmallett * avoided by not accessing external memory for 324210311Sjmallett * 8 cycles after the ADD(16) and 12 cycles after 325219694Sjmallett * the LOAD(32). The loop terminates when len 326219694Sjmallett * is initially 33 (not 32) to guaranteed that 327219694Sjmallett * the LOAD(32) is within bounds. 328210311Sjmallett */ 329219694Sjmallett ADD(16); 330210311Sjmallett ADDC(0); 331210311Sjmallett ADDC(4); 332210311Sjmallett ADDC(8); 333210311Sjmallett ADDC(12); 334210311Sjmallett LOAD(32); 335210311Sjmallett ADDC(20); 336210311Sjmallett ADDC(24); 337213150Sjmallett ADDC(28); 338213150Sjmallett MOP; 339213150Sjmallett w += 16; 340213150Sjmallett } 341213150Sjmallett len += 32 + 1; 342210311Sjmallett if (len >= 32) { 343210311Sjmallett ADD(16); 344210311Sjmallett ADDC(0); 345210311Sjmallett ADDC(4); 346210311Sjmallett ADDC(8); 347210311Sjmallett ADDC(12); 348210311Sjmallett ADDC(20); 349210311Sjmallett ADDC(24); 350210311Sjmallett ADDC(28); 351210311Sjmallett MOP; 352231987Sgonzo w += 16; 353231987Sgonzo len -= 32; 354231987Sgonzo } 355210311Sjmallett if (len >= 16) { 356210311Sjmallett ADD(0); 357210311Sjmallett ADDC(4); 358231987Sgonzo ADDC(8); 359210311Sjmallett ADDC(12); 360210311Sjmallett MOP; 361210311Sjmallett w += 8; 362231987Sgonzo len -= 16; 363210311Sjmallett } 364210311Sjmallett if (len >= 8) { 365210311Sjmallett ADD(0); 366210311Sjmallett ADDC(4); 367210311Sjmallett MOP; 368210311Sjmallett w += 4; 369210311Sjmallett len -= 8; 370210311Sjmallett } 371210311Sjmallett if (len == 0 && byte_swapped == 0) 372210311Sjmallett goto out; 373210311Sjmallett REDUCE; 374210311Sjmallett while ((len -= 2) >= 0) { 375210311Sjmallett sum += *w++; 376210311Sjmallett } 377213150Sjmallett if (byte_swapped) { 378210311Sjmallett sum <<= 8; 379210311Sjmallett byte_swapped = 0; 380210311Sjmallett if (len == -1) { 381210311Sjmallett su.c[1] = *(char *)w; 382210311Sjmallett sum += su.s; 383210311Sjmallett len = 0; 384210311Sjmallett } else 385210311Sjmallett len = -1; 386210311Sjmallett } else if (len == -1) { 387210311Sjmallett /* 388210311Sjmallett * This buffer has odd number of bytes. 389210311Sjmallett * There could be a word split betwen 390210311Sjmallett * this buffer and the next. 391210311Sjmallett */ 392210311Sjmallett su.c[0] = *(char *)w; 393210311Sjmallett } 394210311Sjmallettout: 395215974Sjmallett if (len == -1) { 396210311Sjmallett /* The last buffer has odd # of bytes. Follow the 397210311Sjmallett standard (the odd byte is shifted left by 8 bits) */ 398210311Sjmallett su.c[1] = 0; 399210311Sjmallett sum += su.s; 400210311Sjmallett } 401210311Sjmallett return sum; 402210311Sjmallett} 403210311Sjmallett 404210311Sjmallettint 405210311Sjmallettin_cksum_finalize(psum) 406210311Sjmallett in_psum_t psum; 407215974Sjmallett{ 408210311Sjmallett in_psum_t sum = psum; 409210311Sjmallett REDUCE; 410210311Sjmallett return (sum & 0xffff); 411210311Sjmallett} 412210311Sjmallett