in4_cksum.c revision 130613
14Srgrimes/* $FreeBSD: head/sys/contrib/pf/netinet/in4_cksum.c 126261 2004-02-26 02:34:12Z mlaier $ */ 24Srgrimes/* $OpenBSD: in4_cksum.c,v 1.7 2003/06/02 23:28:13 millert Exp $ */ 34Srgrimes/* $KAME: in4_cksum.c,v 1.10 2001/11/30 10:06:15 itojun Exp $ */ 44Srgrimes/* $NetBSD: in_cksum.c,v 1.13 1996/10/13 02:03:03 christos Exp $ */ 54Srgrimes 64Srgrimes/* 74Srgrimes * Copyright (C) 1999 WIDE Project. 84Srgrimes * All rights reserved. 94Srgrimes * 104Srgrimes * Redistribution and use in source and binary forms, with or without 114Srgrimes * modification, are permitted provided that the following conditions 124Srgrimes * are met: 134Srgrimes * 1. Redistributions of source code must retain the above copyright 144Srgrimes * notice, this list of conditions and the following disclaimer. 154Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 164Srgrimes * notice, this list of conditions and the following disclaimer in the 174Srgrimes * documentation and/or other materials provided with the distribution. 184Srgrimes * 3. Neither the name of the project nor the names of its contributors 194Srgrimes * may be used to endorse or promote products derived from this software 204Srgrimes * without specific prior written permission. 214Srgrimes * 224Srgrimes * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 234Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 244Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 254Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 264Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 274Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 284Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 294Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 304Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 314Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 324Srgrimes * SUCH DAMAGE. 334Srgrimes */ 344Srgrimes 354Srgrimes/* 36556Srgrimes * Copyright (c) 1988, 1992, 1993 3750477Speter * The Regents of the University of California. All rights reserved. 3815392Sphk * 39757Sdg * Redistribution and use in source and binary forms, with or without 40757Sdg * modification, are permitted provided that the following conditions 41757Sdg * are met: 4215392Sphk * 1. Redistributions of source code must retain the above copyright 4315392Sphk * notice, this list of conditions and the following disclaimer. 444Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 454Srgrimes * notice, this list of conditions and the following disclaimer in the 4632358Seivind * documentation and/or other materials provided with the distribution. 4790132Sbde * 3. Neither the name of the University nor the names of its contributors 4837272Sjmg * may be used to endorse or promote products derived from this software 4914835Sbde * without specific prior written permission. 5014835Sbde * 515908Sbde * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 524Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 5314835Sbde * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 5414835Sbde * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 5514835Sbde * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 5615543Sphk * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 5714835Sbde * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 5814835Sbde * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 5914835Sbde * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 6014835Sbde * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 614Srgrimes * SUCH DAMAGE. 62757Sdg * 63757Sdg * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 644Srgrimes */ 654Srgrimes 664Srgrimes#include <sys/param.h> 674Srgrimes#include <sys/mbuf.h> 684Srgrimes#include <sys/systm.h> 69200Sdg#include <sys/socket.h> 704Srgrimes#include <net/route.h> 714Srgrimes#include <netinet/in.h> 7282307Sjulian#include <netinet/in_systm.h> 7382307Sjulian#include <netinet/ip.h> 7482307Sjulian#include <netinet/ip_var.h> 7582308Speter 7682394Speter#if defined(__FreeBSD__) && defined(__i386__) 7782307Sjulian/* 784Srgrimes * Copied from FreeBSD 5.0 sys/i386/i386/in_cksum.c 7973011Sjake * XXX 8073011Sjake * Currently support I386 processor only. 8173011Sjake * In the long run, we need an optimized cksum routines for each Tier1 8273011Sjake * architecture. Due to the lack of available hardware except I386 I 83592Srgrimes * can't support other processors now. For those users which use Sparc64, 843861Sbde * Alpha processors can use more optimized version in FreeBSD. 854Srgrimes * See sys/$ARCH/$ARCH/in_cksum.c where $ARCH=`uname -p` 864Srgrimes */ 8782307Sjulian 884Srgrimes/* 8973011Sjake * These asm statements require __volatile because they pass information 9073011Sjake * via the condition codes. GCC does not currently provide a way to specify 9173011Sjake * the condition codes as an input or output operand. 9273011Sjake * 934Srgrimes * The LOAD macro below is effectively a prefetch into cache. GCC will 9470928Sjake * load the value into a register but will not use it. Since modern CPUs 954Srgrimes * reorder operations, this will generally take place in parallel with 9670928Sjake * other calculations. 9770928Sjake */ 9870928Sjake#define ADD(n) __asm __volatile \ 9970928Sjake ("addl %1, %0" : "+r" (sum) : \ 10073011Sjake "g" (((const u_int32_t *)w)[n / 4])) 10173011Sjake#define ADDC(n) __asm __volatile \ 10273011Sjake ("adcl %1, %0" : "+r" (sum) : \ 10370928Sjake "g" (((const u_int32_t *)w)[n / 4])) 10470928Sjake#define LOAD(n) __asm __volatile \ 10570928Sjake ("" : : "r" (((const u_int32_t *)w)[n / 4])) 10682262Speter#define MOP __asm __volatile \ 10782262Speter ("adcl $0, %0" : "+r" (sum)) 10882262Speter#endif 10982262Speter/* 11082262Speter * Checksum routine for Internet Protocol family headers (Portable Version). 11182262Speter * This is only for IPv4 pseudo header checksum. 112556Srgrimes * No need to clear non-pseudo-header fields in IPv4 header. 113556Srgrimes * len is for actual payload size, and does not include IPv4 header and 114556Srgrimes * skipped header chain (off + len should be equal to the whole packet). 11599741Sobrien * 116134Sdg * This routine is very heavily used in the network 11725083Sjdp * code and should be modified for each CPU to be as fast as possible. 11899741Sobrien */ 11925083Sjdp 1203842Sdg#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) 12182957Speter#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} 12299741Sobrien 1234Srgrimes#if defined(__FreeBSD__) 12499862Speterint 12599741Sobrienin4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); 12699741Sobrien#endif 127757Sdg 12826812Speterint 12973011Sjakein4_cksum(m, nxt, off, len) 13099741Sobrien struct mbuf *m; 13199741Sobrien u_int8_t nxt; 13225164Speter int off, len; 13373011Sjake{ 13499741Sobrien u_int16_t *w; 13599741Sobrien int sum = 0; 13626812Speter int mlen = 0; 13725164Speter int byte_swapped = 0; 13873011Sjake union { 13999741Sobrien struct ipovly ipov; 1403861Sbde u_int16_t w[10]; 14126812Speter } u; 14273011Sjake union { 14326812Speter u_int8_t c[2]; 14499741Sobrien u_int16_t s; 1454Srgrimes } s_util; 14683366Sjulian union { 14799741Sobrien u_int16_t s[2]; 14899741Sobrien u_int32_t l; 14999741Sobrien } l_util; 15099741Sobrien 151134Sdg if (nxt != 0) { 15299741Sobrien /* pseudo header */ 15337889Sjlemon if (off < sizeof(struct ipovly)) 15473011Sjake panic("in4_cksum: offset too short"); 15599741Sobrien if (m->m_len < sizeof(struct ip)) 15699741Sobrien panic("in4_cksum: bad mbuf chain"); 15734840Sjlemon bzero(&u.ipov, sizeof(u.ipov)); 15843434Skato u.ipov.ih_len = htons(len); 15973011Sjake u.ipov.ih_pr = nxt; 16073011Sjake u.ipov.ih_src = mtod(m, struct ip *)->ip_src; 16143434Skato u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst; 16243434Skato w = u.w; 16315428Sphk /* assumes sizeof(ipov) == 20 */ 16415392Sphk sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; 16515392Sphk sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; 16615392Sphk } 16715392Sphk 168556Srgrimes /* skip unnecessary part */ 169134Sdg while (m && off > 0) { 17015392Sphk if (m->m_len > off) 17115392Sphk break; 17215392Sphk off -= m->m_len; 17315392Sphk m = m->m_next; 17415543Sphk } 17515392Sphk 17615392Sphk for (;m && len; m = m->m_next) { 17715392Sphk if (m->m_len == 0) 17815543Sphk continue; 17915392Sphk w = (u_int16_t *)(mtod(m, caddr_t) + off); 18015428Sphk if (mlen == -1) { 18115428Sphk /* 18215428Sphk * The first byte of this mbuf is the continuation 18315392Sphk * of a word spanning between this mbuf and the 184134Sdg * last mbuf. 18515392Sphk * 18615565Sphk * s_util.c[0] is already saved when scanning previous 18715565Sphk * mbuf. 18815392Sphk */ 18915565Sphk s_util.c[1] = *(u_int8_t *)w; 19015565Sphk sum += s_util.s; 191134Sdg w = (u_int16_t *)((u_int8_t *)w + 1); 19215565Sphk mlen = m->m_len - off - 1; 193111299Sjake len--; 19419621Sdyson } else 19519621Sdyson mlen = m->m_len - off; 19619621Sdyson off = 0; 19715565Sphk if (len < mlen) 19815565Sphk mlen = len; 199111299Sjake len -= mlen; 20015428Sphk#if defined(__FreeBSD__) && defined(__i386__) 20115392Sphk /* 20215565Sphk * Force to long boundary so we do longword aligned 20315565Sphk * memory operations 20415565Sphk */ 20515565Sphk if (3 & (int) w) { 20615565Sphk REDUCE; 20715565Sphk if ((1 & (int) w) && (mlen > 0)) { 20815565Sphk sum <<= 8; 20915565Sphk s_util.c[0] = *(char *)w; 21015565Sphk w = (u_short *)((char *)w + 1); 21173011Sjake mlen--; 21215565Sphk byte_swapped = 1; 21315392Sphk } 21415392Sphk if ((2 & (int) w) && (mlen >= 2)) { 21515392Sphk sum += *w++; 21615392Sphk mlen -= 2; 21715392Sphk } 21815392Sphk } 2191321Sdg /* 2204Srgrimes * Advance to a 486 cache line boundary. 22124112Skato */ 22224112Skato if (4 & (int) w && mlen >= 4) { 22343434Skato ADD(0); 22473011Sjake MOP; 22543434Skato w += 2; 22624112Skato mlen -= 4; 22724112Skato } 22824112Skato if (8 & (int) w && mlen >= 8) { 22924112Skato ADD(0); 23015392Sphk ADDC(4); 23115392Sphk MOP; 23254128Skato w += 4; 23315392Sphk mlen -= 8; 23415428Sphk } 2353384Srgrimes /* 2363384Srgrimes * Do as much of the checksum as possible 32 bits at at time. 2373384Srgrimes * In fact, this loop is unrolled to make overhead from 23815392Sphk * branches &c small. 2395603Sbde */ 2402486Sdg mlen -= 1; 24115428Sphk while ((mlen -= 32) >= 0) { 24215428Sphk /* 24315428Sphk * Add with carry 16 words and fold in the last 24415428Sphk * carry by adding a 0 with carry. 24515428Sphk * 24615428Sphk * The early ADD(16) and the LOAD(32) are to load 2474217Sphk * the next 2 cache lines in advance on 486's. The 2484217Sphk * 486 has a penalty of 2 clock cycles for loading 2494217Sphk * a cache line, plus whatever time the external 25015392Sphk * memory takes to load the first word(s) addressed. 25115392Sphk * These penalties are unavoidable. Subsequent 25215428Sphk * accesses to a cache line being loaded (and to 25315428Sphk * other external memory?) are delayed until the 25415428Sphk * whole load finishes. These penalties are mostly 25515428Sphk * avoided by not accessing external memory for 25615428Sphk * 8 cycles after the ADD(16) and 12 cycles after 25715428Sphk * the LOAD(32). The loop terminates when mlen 25825083Sjdp * is initially 33 (not 32) to guaranteed that 25915392Sphk * the LOAD(32) is within bounds. 26024112Skato */ 26143447Skato ADD(16); 26273011Sjake ADDC(0); 26324112Skato ADDC(4); 26443447Skato ADDC(8); 26573011Sjake ADDC(12); 26624112Skato LOAD(32); 26724112Skato ADDC(20); 26824112Skato ADDC(24); 26924112Skato ADDC(28); 27024112Skato MOP; 27124112Skato w += 16; 27224112Skato } 27324112Skato mlen += 32 + 1; 27424112Skato if (mlen >= 32) { 27524112Skato ADD(16); 27624112Skato ADDC(0); 27724112Skato ADDC(4); 27824112Skato ADDC(8); 27924112Skato ADDC(12); 28073011Sjake ADDC(20); 28124112Skato ADDC(24); 28258786Skato ADDC(28); 28373011Sjake MOP; 28473011Sjake w += 16; 28524112Skato mlen -= 32; 28624112Skato } 28715392Sphk if (mlen >= 16) { 28815392Sphk ADD(0); 28915392Sphk ADDC(4); 29015428Sphk ADDC(8); 29117120Sbde ADDC(12); 29215428Sphk MOP; 29317120Sbde w += 8; 29417120Sbde mlen -= 16; 29515428Sphk } 29615428Sphk if (mlen >= 8) { 29715428Sphk ADD(0); 29815428Sphk ADDC(4); 29915428Sphk MOP; 30015428Sphk w += 4; 30115428Sphk mlen -= 8; 302109994Sjake } 303109994Sjake if (mlen == 0 && byte_swapped == 0) 30415428Sphk continue; /* worth 1% maybe ?? */ 30573011Sjake REDUCE; 30673011Sjake while ((mlen -= 2) >= 0) { 30715392Sphk sum += *w++; 30815392Sphk } 30915428Sphk if (byte_swapped) { 31015428Sphk REDUCE; 31115428Sphk sum <<= 8; 31215392Sphk byte_swapped = 0; 31315392Sphk if (mlen == -1) { 31415392Sphk s_util.c[1] = *(char *)w; 31527993Sdyson sum += s_util.s; 31627993Sdyson mlen = 0; 31727993Sdyson } else 31873011Sjake mlen = -1; 31927993Sdyson } else if (mlen == -1) 32027993Sdyson /* 32127993Sdyson * This mbuf has odd number of bytes. 32227993Sdyson * There could be a word split betwen 32327993Sdyson * this mbuf and the next mbuf. 32427993Sdyson * Save the last byte (to prepend to next mbuf). 32515392Sphk */ 32673011Sjake s_util.c[0] = *(char *)w; 32799741Sobrien#else 32899741Sobrien /* 32999741Sobrien * Force to even boundary. 33099741Sobrien */ 33115392Sphk if ((1 & (long) w) && (mlen > 0)) { 33299741Sobrien REDUCE; 33315392Sphk sum <<= 8; 33415392Sphk s_util.c[0] = *(u_int8_t *)w; 33515392Sphk w = (u_int16_t *)((int8_t *)w + 1); 33615392Sphk mlen--; 33715392Sphk byte_swapped = 1; 33899741Sobrien } 33983366Sjulian /* 34083366Sjulian * Unroll the loop to make overhead from 34165815Sbde * branches &c small. 34299741Sobrien */ 34365815Sbde while ((mlen -= 32) >= 0) { 34473011Sjake sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; 34583366Sjulian sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; 34615392Sphk sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; 34799741Sobrien sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; 34899741Sobrien w += 16; 34915392Sphk } 35065815Sbde mlen += 32; 35165815Sbde while ((mlen -= 8) >= 0) { 35265815Sbde sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; 35365815Sbde w += 4; 35465815Sbde } 35565815Sbde mlen += 8; 35665815Sbde if (mlen == 0 && byte_swapped == 0) 35799741Sobrien continue; 35865815Sbde REDUCE; 35999741Sobrien while ((mlen -= 2) >= 0) { 36015392Sphk sum += *w++; 36124691Speter } 36215392Sphk if (byte_swapped) { 36315392Sphk REDUCE; 36415392Sphk sum <<= 8; 365107521Sdeischen byte_swapped = 0; 366107521Sdeischen if (mlen == -1) { 36715392Sphk s_util.c[1] = *(u_int8_t *)w; 36852140Sluoqi sum += s_util.s; 369107521Sdeischen mlen = 0; 37099741Sobrien } else 371107521Sdeischen mlen = -1; 37252140Sluoqi } else if (mlen == -1) 37399741Sobrien s_util.c[0] = *(u_int8_t *)w; 37499741Sobrien#endif 375107521Sdeischen } 376107521Sdeischen if (len) 377107521Sdeischen printf("cksum4: out of data\n"); 37852140Sluoqi if (mlen == -1) { 379105950Speter /* The last mbuf has odd # of bytes. Follow the 380105950Speter standard (the odd byte may be shifted left by 8 bits 381105950Speter or not as determined by endian-ness of the machine) */ 382107521Sdeischen s_util.c[1] = 0; 383107521Sdeischen sum += s_util.s; 384105950Speter } 385105950Speter REDUCE; 386107521Sdeischen return (~sum & 0xffff); 387105950Speter} 388107521Sdeischen