in4_cksum.c revision 130613
14Srgrimes/*	$FreeBSD: head/sys/contrib/pf/netinet/in4_cksum.c 126261 2004-02-26 02:34:12Z mlaier $	*/
24Srgrimes/*	$OpenBSD: in4_cksum.c,v 1.7 2003/06/02 23:28:13 millert Exp $	*/
34Srgrimes/*	$KAME: in4_cksum.c,v 1.10 2001/11/30 10:06:15 itojun Exp $	*/
44Srgrimes/*	$NetBSD: in_cksum.c,v 1.13 1996/10/13 02:03:03 christos Exp $	*/
54Srgrimes
64Srgrimes/*
74Srgrimes * Copyright (C) 1999 WIDE Project.
84Srgrimes * All rights reserved.
94Srgrimes *
104Srgrimes * Redistribution and use in source and binary forms, with or without
114Srgrimes * modification, are permitted provided that the following conditions
124Srgrimes * are met:
134Srgrimes * 1. Redistributions of source code must retain the above copyright
144Srgrimes *    notice, this list of conditions and the following disclaimer.
154Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
164Srgrimes *    notice, this list of conditions and the following disclaimer in the
174Srgrimes *    documentation and/or other materials provided with the distribution.
184Srgrimes * 3. Neither the name of the project nor the names of its contributors
194Srgrimes *    may be used to endorse or promote products derived from this software
204Srgrimes *    without specific prior written permission.
214Srgrimes *
224Srgrimes * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
234Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
244Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
254Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
264Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
274Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
284Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
294Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
304Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
314Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
324Srgrimes * SUCH DAMAGE.
334Srgrimes */
344Srgrimes
354Srgrimes/*
36556Srgrimes * Copyright (c) 1988, 1992, 1993
3750477Speter *	The Regents of the University of California.  All rights reserved.
3815392Sphk *
39757Sdg * Redistribution and use in source and binary forms, with or without
40757Sdg * modification, are permitted provided that the following conditions
41757Sdg * are met:
4215392Sphk * 1. Redistributions of source code must retain the above copyright
4315392Sphk *    notice, this list of conditions and the following disclaimer.
444Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
454Srgrimes *    notice, this list of conditions and the following disclaimer in the
4632358Seivind *    documentation and/or other materials provided with the distribution.
4790132Sbde * 3. Neither the name of the University nor the names of its contributors
4837272Sjmg *    may be used to endorse or promote products derived from this software
4914835Sbde *    without specific prior written permission.
5014835Sbde *
515908Sbde * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
524Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
5314835Sbde * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
5414835Sbde * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
5514835Sbde * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
5615543Sphk * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
5714835Sbde * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
5814835Sbde * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
5914835Sbde * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
6014835Sbde * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
614Srgrimes * SUCH DAMAGE.
62757Sdg *
63757Sdg *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
644Srgrimes */
654Srgrimes
664Srgrimes#include <sys/param.h>
674Srgrimes#include <sys/mbuf.h>
684Srgrimes#include <sys/systm.h>
69200Sdg#include <sys/socket.h>
704Srgrimes#include <net/route.h>
714Srgrimes#include <netinet/in.h>
7282307Sjulian#include <netinet/in_systm.h>
7382307Sjulian#include <netinet/ip.h>
7482307Sjulian#include <netinet/ip_var.h>
7582308Speter
7682394Speter#if defined(__FreeBSD__) && defined(__i386__)
7782307Sjulian/*
784Srgrimes * Copied from FreeBSD 5.0 sys/i386/i386/in_cksum.c
7973011Sjake * XXX
8073011Sjake * Currently support I386 processor only.
8173011Sjake * In the long run, we need an optimized cksum routines for each Tier1
8273011Sjake * architecture. Due to the lack of available hardware except I386 I
83592Srgrimes * can't support other processors now. For those users which use Sparc64,
843861Sbde * Alpha processors can use more optimized version in FreeBSD.
854Srgrimes * See sys/$ARCH/$ARCH/in_cksum.c where $ARCH=`uname -p`
864Srgrimes */
8782307Sjulian
884Srgrimes/*
8973011Sjake * These asm statements require __volatile because they pass information
9073011Sjake * via the condition codes.  GCC does not currently provide a way to specify
9173011Sjake * the condition codes as an input or output operand.
9273011Sjake *
934Srgrimes * The LOAD macro below is effectively a prefetch into cache.  GCC will
9470928Sjake * load the value into a register but will not use it.  Since modern CPUs
954Srgrimes * reorder operations, this will generally take place in parallel with
9670928Sjake * other calculations.
9770928Sjake */
9870928Sjake#define ADD(n)	__asm __volatile \
9970928Sjake		("addl %1, %0" : "+r" (sum) : \
10073011Sjake		"g" (((const u_int32_t *)w)[n / 4]))
10173011Sjake#define ADDC(n)	__asm __volatile \
10273011Sjake		("adcl %1, %0" : "+r" (sum) : \
10370928Sjake		"g" (((const u_int32_t *)w)[n / 4]))
10470928Sjake#define LOAD(n)	__asm __volatile \
10570928Sjake		("" : : "r" (((const u_int32_t *)w)[n / 4]))
10682262Speter#define MOP	__asm __volatile \
10782262Speter		("adcl         $0, %0" : "+r" (sum))
10882262Speter#endif
10982262Speter/*
11082262Speter * Checksum routine for Internet Protocol family headers (Portable Version).
11182262Speter * This is only for IPv4 pseudo header checksum.
112556Srgrimes * No need to clear non-pseudo-header fields in IPv4 header.
113556Srgrimes * len is for actual payload size, and does not include IPv4 header and
114556Srgrimes * skipped header chain (off + len should be equal to the whole packet).
11599741Sobrien *
116134Sdg * This routine is very heavily used in the network
11725083Sjdp * code and should be modified for each CPU to be as fast as possible.
11899741Sobrien */
11925083Sjdp
1203842Sdg#define ADDCARRY(x)  (x > 65535 ? x -= 65535 : x)
12182957Speter#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
12299741Sobrien
1234Srgrimes#if defined(__FreeBSD__)
12499862Speterint
12599741Sobrienin4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
12699741Sobrien#endif
127757Sdg
12826812Speterint
12973011Sjakein4_cksum(m, nxt, off, len)
13099741Sobrien	struct mbuf *m;
13199741Sobrien	u_int8_t nxt;
13225164Speter	int off, len;
13373011Sjake{
13499741Sobrien	u_int16_t *w;
13599741Sobrien	int sum = 0;
13626812Speter	int mlen = 0;
13725164Speter	int byte_swapped = 0;
13873011Sjake	union {
13999741Sobrien		struct ipovly ipov;
1403861Sbde		u_int16_t w[10];
14126812Speter	} u;
14273011Sjake	union {
14326812Speter		u_int8_t  c[2];
14499741Sobrien		u_int16_t s;
1454Srgrimes	} s_util;
14683366Sjulian	union {
14799741Sobrien		u_int16_t s[2];
14899741Sobrien		u_int32_t l;
14999741Sobrien	} l_util;
15099741Sobrien
151134Sdg	if (nxt != 0) {
15299741Sobrien		/* pseudo header */
15337889Sjlemon		if (off < sizeof(struct ipovly))
15473011Sjake			panic("in4_cksum: offset too short");
15599741Sobrien		if (m->m_len < sizeof(struct ip))
15699741Sobrien			panic("in4_cksum: bad mbuf chain");
15734840Sjlemon		bzero(&u.ipov, sizeof(u.ipov));
15843434Skato		u.ipov.ih_len = htons(len);
15973011Sjake		u.ipov.ih_pr = nxt;
16073011Sjake		u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
16143434Skato		u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
16243434Skato		w = u.w;
16315428Sphk		/* assumes sizeof(ipov) == 20 */
16415392Sphk		sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
16515392Sphk		sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
16615392Sphk	}
16715392Sphk
168556Srgrimes	/* skip unnecessary part */
169134Sdg	while (m && off > 0) {
17015392Sphk		if (m->m_len > off)
17115392Sphk			break;
17215392Sphk		off -= m->m_len;
17315392Sphk		m = m->m_next;
17415543Sphk	}
17515392Sphk
17615392Sphk	for (;m && len; m = m->m_next) {
17715392Sphk		if (m->m_len == 0)
17815543Sphk			continue;
17915392Sphk		w = (u_int16_t *)(mtod(m, caddr_t) + off);
18015428Sphk		if (mlen == -1) {
18115428Sphk			/*
18215428Sphk			 * The first byte of this mbuf is the continuation
18315392Sphk			 * of a word spanning between this mbuf and the
184134Sdg			 * last mbuf.
18515392Sphk			 *
18615565Sphk			 * s_util.c[0] is already saved when scanning previous
18715565Sphk			 * mbuf.
18815392Sphk			 */
18915565Sphk			s_util.c[1] = *(u_int8_t *)w;
19015565Sphk			sum += s_util.s;
191134Sdg			w = (u_int16_t *)((u_int8_t *)w + 1);
19215565Sphk			mlen = m->m_len - off - 1;
193111299Sjake			len--;
19419621Sdyson		} else
19519621Sdyson			mlen = m->m_len - off;
19619621Sdyson		off = 0;
19715565Sphk		if (len < mlen)
19815565Sphk			mlen = len;
199111299Sjake		len -= mlen;
20015428Sphk#if defined(__FreeBSD__) && defined(__i386__)
20115392Sphk		/*
20215565Sphk		 * Force to long boundary so we do longword aligned
20315565Sphk		 * memory operations
20415565Sphk		 */
20515565Sphk		if (3 & (int) w) {
20615565Sphk			REDUCE;
20715565Sphk			if ((1 & (int) w) && (mlen > 0)) {
20815565Sphk				sum <<= 8;
20915565Sphk				s_util.c[0] = *(char *)w;
21015565Sphk				w = (u_short *)((char *)w + 1);
21173011Sjake				mlen--;
21215565Sphk				byte_swapped = 1;
21315392Sphk			}
21415392Sphk			if ((2 & (int) w) && (mlen >= 2)) {
21515392Sphk				sum += *w++;
21615392Sphk				mlen -= 2;
21715392Sphk			}
21815392Sphk		}
2191321Sdg		/*
2204Srgrimes		 * Advance to a 486 cache line boundary.
22124112Skato		 */
22224112Skato		if (4 & (int) w && mlen >= 4) {
22343434Skato			ADD(0);
22473011Sjake			MOP;
22543434Skato			w += 2;
22624112Skato			mlen -= 4;
22724112Skato		}
22824112Skato		if (8 & (int) w && mlen >= 8) {
22924112Skato			ADD(0);
23015392Sphk			ADDC(4);
23115392Sphk			MOP;
23254128Skato			w += 4;
23315392Sphk			mlen -= 8;
23415428Sphk		}
2353384Srgrimes		/*
2363384Srgrimes		 * Do as much of the checksum as possible 32 bits at at time.
2373384Srgrimes		 * In fact, this loop is unrolled to make overhead from
23815392Sphk		 * branches &c small.
2395603Sbde		 */
2402486Sdg		mlen -= 1;
24115428Sphk		while ((mlen -= 32) >= 0) {
24215428Sphk			/*
24315428Sphk			 * Add with carry 16 words and fold in the last
24415428Sphk			 * carry by adding a 0 with carry.
24515428Sphk			 *
24615428Sphk			 * The early ADD(16) and the LOAD(32) are to load
2474217Sphk			 * the next 2 cache lines in advance on 486's.  The
2484217Sphk			 * 486 has a penalty of 2 clock cycles for loading
2494217Sphk			 * a cache line, plus whatever time the external
25015392Sphk			 * memory takes to load the first word(s) addressed.
25115392Sphk			 * These penalties are unavoidable.  Subsequent
25215428Sphk			 * accesses to a cache line being loaded (and to
25315428Sphk			 * other external memory?) are delayed until the
25415428Sphk			 * whole load finishes.  These penalties are mostly
25515428Sphk			 * avoided by not accessing external memory for
25615428Sphk			 * 8 cycles after the ADD(16) and 12 cycles after
25715428Sphk			 * the LOAD(32).  The loop terminates when mlen
25825083Sjdp			 * is initially 33 (not 32) to guaranteed that
25915392Sphk			 * the LOAD(32) is within bounds.
26024112Skato			 */
26143447Skato			ADD(16);
26273011Sjake			ADDC(0);
26324112Skato			ADDC(4);
26443447Skato			ADDC(8);
26573011Sjake			ADDC(12);
26624112Skato			LOAD(32);
26724112Skato			ADDC(20);
26824112Skato			ADDC(24);
26924112Skato			ADDC(28);
27024112Skato			MOP;
27124112Skato			w += 16;
27224112Skato		}
27324112Skato		mlen += 32 + 1;
27424112Skato		if (mlen >= 32) {
27524112Skato			ADD(16);
27624112Skato			ADDC(0);
27724112Skato			ADDC(4);
27824112Skato			ADDC(8);
27924112Skato			ADDC(12);
28073011Sjake			ADDC(20);
28124112Skato			ADDC(24);
28258786Skato			ADDC(28);
28373011Sjake			MOP;
28473011Sjake			w += 16;
28524112Skato			mlen -= 32;
28624112Skato		}
28715392Sphk		if (mlen >= 16) {
28815392Sphk			ADD(0);
28915392Sphk			ADDC(4);
29015428Sphk			ADDC(8);
29117120Sbde			ADDC(12);
29215428Sphk			MOP;
29317120Sbde			w += 8;
29417120Sbde			mlen -= 16;
29515428Sphk		}
29615428Sphk		if (mlen >= 8) {
29715428Sphk			ADD(0);
29815428Sphk			ADDC(4);
29915428Sphk			MOP;
30015428Sphk			w += 4;
30115428Sphk			mlen -= 8;
302109994Sjake		}
303109994Sjake		if (mlen == 0 && byte_swapped == 0)
30415428Sphk			continue;       /* worth 1% maybe ?? */
30573011Sjake		REDUCE;
30673011Sjake		while ((mlen -= 2) >= 0) {
30715392Sphk			sum += *w++;
30815392Sphk		}
30915428Sphk		if (byte_swapped) {
31015428Sphk			REDUCE;
31115428Sphk			sum <<= 8;
31215392Sphk			byte_swapped = 0;
31315392Sphk			if (mlen == -1) {
31415392Sphk				s_util.c[1] = *(char *)w;
31527993Sdyson				sum += s_util.s;
31627993Sdyson				mlen = 0;
31727993Sdyson			} else
31873011Sjake				mlen = -1;
31927993Sdyson		} else if (mlen == -1)
32027993Sdyson			/*
32127993Sdyson			 * This mbuf has odd number of bytes.
32227993Sdyson			 * There could be a word split betwen
32327993Sdyson			 * this mbuf and the next mbuf.
32427993Sdyson			 * Save the last byte (to prepend to next mbuf).
32515392Sphk			 */
32673011Sjake			s_util.c[0] = *(char *)w;
32799741Sobrien#else
32899741Sobrien		/*
32999741Sobrien		 * Force to even boundary.
33099741Sobrien		 */
33115392Sphk		if ((1 & (long) w) && (mlen > 0)) {
33299741Sobrien			REDUCE;
33315392Sphk			sum <<= 8;
33415392Sphk			s_util.c[0] = *(u_int8_t *)w;
33515392Sphk			w = (u_int16_t *)((int8_t *)w + 1);
33615392Sphk			mlen--;
33715392Sphk			byte_swapped = 1;
33899741Sobrien		}
33983366Sjulian		/*
34083366Sjulian		 * Unroll the loop to make overhead from
34165815Sbde		 * branches &c small.
34299741Sobrien		 */
34365815Sbde		while ((mlen -= 32) >= 0) {
34473011Sjake			sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
34583366Sjulian			sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
34615392Sphk			sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11];
34799741Sobrien			sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15];
34899741Sobrien			w += 16;
34915392Sphk		}
35065815Sbde		mlen += 32;
35165815Sbde		while ((mlen -= 8) >= 0) {
35265815Sbde			sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
35365815Sbde			w += 4;
35465815Sbde		}
35565815Sbde		mlen += 8;
35665815Sbde		if (mlen == 0 && byte_swapped == 0)
35799741Sobrien			continue;
35865815Sbde		REDUCE;
35999741Sobrien		while ((mlen -= 2) >= 0) {
36015392Sphk			sum += *w++;
36124691Speter		}
36215392Sphk		if (byte_swapped) {
36315392Sphk			REDUCE;
36415392Sphk			sum <<= 8;
365107521Sdeischen			byte_swapped = 0;
366107521Sdeischen			if (mlen == -1) {
36715392Sphk				s_util.c[1] = *(u_int8_t *)w;
36852140Sluoqi				sum += s_util.s;
369107521Sdeischen				mlen = 0;
37099741Sobrien			} else
371107521Sdeischen				mlen = -1;
37252140Sluoqi		} else if (mlen == -1)
37399741Sobrien			s_util.c[0] = *(u_int8_t *)w;
37499741Sobrien#endif
375107521Sdeischen	}
376107521Sdeischen	if (len)
377107521Sdeischen		printf("cksum4: out of data\n");
37852140Sluoqi	if (mlen == -1) {
379105950Speter		/* The last mbuf has odd # of bytes. Follow the
380105950Speter		   standard (the odd byte may be shifted left by 8 bits
381105950Speter		   or not as determined by endian-ness of the machine) */
382107521Sdeischen		s_util.c[1] = 0;
383107521Sdeischen		sum += s_util.s;
384105950Speter	}
385105950Speter	REDUCE;
386107521Sdeischen	return (~sum & 0xffff);
387105950Speter}
388107521Sdeischen