in4_cksum.c revision 126261
1/*	$FreeBSD: head/sys/contrib/pf/netinet/in4_cksum.c 126261 2004-02-26 02:34:12Z mlaier $	*/
2/*	$OpenBSD: in4_cksum.c,v 1.7 2003/06/02 23:28:13 millert Exp $	*/
3/*	$KAME: in4_cksum.c,v 1.10 2001/11/30 10:06:15 itojun Exp $	*/
4/*	$NetBSD: in_cksum.c,v 1.13 1996/10/13 02:03:03 christos Exp $	*/
5
6/*
7 * Copyright (C) 1999 WIDE Project.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the project nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35/*
36 * Copyright (c) 1988, 1992, 1993
37 *	The Regents of the University of California.  All rights reserved.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 *    notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 *    notice, this list of conditions and the following disclaimer in the
46 *    documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 *    may be used to endorse or promote products derived from this software
49 *    without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
64 */
65
66#include <sys/param.h>
67#include <sys/mbuf.h>
68#include <sys/systm.h>
69#include <sys/socket.h>
70#include <net/route.h>
71#include <netinet/in.h>
72#include <netinet/in_systm.h>
73#include <netinet/ip.h>
74#include <netinet/ip_var.h>
75
76#if defined(__FreeBSD__) && defined(__i386__)
77/*
78 * Copied from FreeBSD 5.0 sys/i386/i386/in_cksum.c
79 * XXX
80 * Currently support I386 processor only.
81 * In the long run, we need an optimized cksum routines for each Tier1
82 * architecture. Due to the lack of available hardware except I386 I
83 * can't support other processors now. For those users which use Sparc64,
84 * Alpha processors can use more optimized version in FreeBSD.
85 * See sys/$ARCH/$ARCH/in_cksum.c where $ARCH=`uname -p`
86 */
87
88/*
89 * These asm statements require __volatile because they pass information
90 * via the condition codes.  GCC does not currently provide a way to specify
91 * the condition codes as an input or output operand.
92 *
93 * The LOAD macro below is effectively a prefetch into cache.  GCC will
94 * load the value into a register but will not use it.  Since modern CPUs
95 * reorder operations, this will generally take place in parallel with
96 * other calculations.
97 */
98#define ADD(n)	__asm __volatile \
99		("addl %1, %0" : "+r" (sum) : \
100		"g" (((const u_int32_t *)w)[n / 4]))
101#define ADDC(n)	__asm __volatile \
102		("adcl %1, %0" : "+r" (sum) : \
103		"g" (((const u_int32_t *)w)[n / 4]))
104#define LOAD(n)	__asm __volatile \
105		("" : : "r" (((const u_int32_t *)w)[n / 4]))
106#define MOP	__asm __volatile \
107		("adcl         $0, %0" : "+r" (sum))
108#endif
109/*
110 * Checksum routine for Internet Protocol family headers (Portable Version).
111 * This is only for IPv4 pseudo header checksum.
112 * No need to clear non-pseudo-header fields in IPv4 header.
113 * len is for actual payload size, and does not include IPv4 header and
114 * skipped header chain (off + len should be equal to the whole packet).
115 *
116 * This routine is very heavily used in the network
117 * code and should be modified for each CPU to be as fast as possible.
118 */
119
120#define ADDCARRY(x)  (x > 65535 ? x -= 65535 : x)
121#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
122
123#if defined(__FreeBSD__)
124int
125in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
126#endif
127
128int
129in4_cksum(m, nxt, off, len)
130	struct mbuf *m;
131	u_int8_t nxt;
132	int off, len;
133{
134	u_int16_t *w;
135	int sum = 0;
136	int mlen = 0;
137	int byte_swapped = 0;
138	union {
139		struct ipovly ipov;
140		u_int16_t w[10];
141	} u;
142	union {
143		u_int8_t  c[2];
144		u_int16_t s;
145	} s_util;
146	union {
147		u_int16_t s[2];
148		u_int32_t l;
149	} l_util;
150
151	if (nxt != 0) {
152		/* pseudo header */
153		if (off < sizeof(struct ipovly))
154			panic("in4_cksum: offset too short");
155		if (m->m_len < sizeof(struct ip))
156			panic("in4_cksum: bad mbuf chain");
157		bzero(&u.ipov, sizeof(u.ipov));
158		u.ipov.ih_len = htons(len);
159		u.ipov.ih_pr = nxt;
160		u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
161		u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
162		w = u.w;
163		/* assumes sizeof(ipov) == 20 */
164		sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
165		sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
166	}
167
168	/* skip unnecessary part */
169	while (m && off > 0) {
170		if (m->m_len > off)
171			break;
172		off -= m->m_len;
173		m = m->m_next;
174	}
175
176	for (;m && len; m = m->m_next) {
177		if (m->m_len == 0)
178			continue;
179		w = (u_int16_t *)(mtod(m, caddr_t) + off);
180		if (mlen == -1) {
181			/*
182			 * The first byte of this mbuf is the continuation
183			 * of a word spanning between this mbuf and the
184			 * last mbuf.
185			 *
186			 * s_util.c[0] is already saved when scanning previous
187			 * mbuf.
188			 */
189			s_util.c[1] = *(u_int8_t *)w;
190			sum += s_util.s;
191			w = (u_int16_t *)((u_int8_t *)w + 1);
192			mlen = m->m_len - off - 1;
193			len--;
194		} else
195			mlen = m->m_len - off;
196		off = 0;
197		if (len < mlen)
198			mlen = len;
199		len -= mlen;
200#if defined(__FreeBSD__) && defined(__i386__)
201		/*
202		 * Force to long boundary so we do longword aligned
203		 * memory operations
204		 */
205		if (3 & (int) w) {
206			REDUCE;
207			if ((1 & (int) w) && (mlen > 0)) {
208				sum <<= 8;
209				s_util.c[0] = *(char *)w;
210				w = (u_short *)((char *)w + 1);
211				mlen--;
212				byte_swapped = 1;
213			}
214			if ((2 & (int) w) && (mlen >= 2)) {
215				sum += *w++;
216				mlen -= 2;
217			}
218		}
219		/*
220		 * Advance to a 486 cache line boundary.
221		 */
222		if (4 & (int) w && mlen >= 4) {
223			ADD(0);
224			MOP;
225			w += 2;
226			mlen -= 4;
227		}
228		if (8 & (int) w && mlen >= 8) {
229			ADD(0);
230			ADDC(4);
231			MOP;
232			w += 4;
233			mlen -= 8;
234		}
235		/*
236		 * Do as much of the checksum as possible 32 bits at at time.
237		 * In fact, this loop is unrolled to make overhead from
238		 * branches &c small.
239		 */
240		mlen -= 1;
241		while ((mlen -= 32) >= 0) {
242			/*
243			 * Add with carry 16 words and fold in the last
244			 * carry by adding a 0 with carry.
245			 *
246			 * The early ADD(16) and the LOAD(32) are to load
247			 * the next 2 cache lines in advance on 486's.  The
248			 * 486 has a penalty of 2 clock cycles for loading
249			 * a cache line, plus whatever time the external
250			 * memory takes to load the first word(s) addressed.
251			 * These penalties are unavoidable.  Subsequent
252			 * accesses to a cache line being loaded (and to
253			 * other external memory?) are delayed until the
254			 * whole load finishes.  These penalties are mostly
255			 * avoided by not accessing external memory for
256			 * 8 cycles after the ADD(16) and 12 cycles after
257			 * the LOAD(32).  The loop terminates when mlen
258			 * is initially 33 (not 32) to guaranteed that
259			 * the LOAD(32) is within bounds.
260			 */
261			ADD(16);
262			ADDC(0);
263			ADDC(4);
264			ADDC(8);
265			ADDC(12);
266			LOAD(32);
267			ADDC(20);
268			ADDC(24);
269			ADDC(28);
270			MOP;
271			w += 16;
272		}
273		mlen += 32 + 1;
274		if (mlen >= 32) {
275			ADD(16);
276			ADDC(0);
277			ADDC(4);
278			ADDC(8);
279			ADDC(12);
280			ADDC(20);
281			ADDC(24);
282			ADDC(28);
283			MOP;
284			w += 16;
285			mlen -= 32;
286		}
287		if (mlen >= 16) {
288			ADD(0);
289			ADDC(4);
290			ADDC(8);
291			ADDC(12);
292			MOP;
293			w += 8;
294			mlen -= 16;
295		}
296		if (mlen >= 8) {
297			ADD(0);
298			ADDC(4);
299			MOP;
300			w += 4;
301			mlen -= 8;
302		}
303		if (mlen == 0 && byte_swapped == 0)
304			continue;       /* worth 1% maybe ?? */
305		REDUCE;
306		while ((mlen -= 2) >= 0) {
307			sum += *w++;
308		}
309		if (byte_swapped) {
310			REDUCE;
311			sum <<= 8;
312			byte_swapped = 0;
313			if (mlen == -1) {
314				s_util.c[1] = *(char *)w;
315				sum += s_util.s;
316				mlen = 0;
317			} else
318				mlen = -1;
319		} else if (mlen == -1)
320			/*
321			 * This mbuf has odd number of bytes.
322			 * There could be a word split betwen
323			 * this mbuf and the next mbuf.
324			 * Save the last byte (to prepend to next mbuf).
325			 */
326			s_util.c[0] = *(char *)w;
327#else
328		/*
329		 * Force to even boundary.
330		 */
331		if ((1 & (long) w) && (mlen > 0)) {
332			REDUCE;
333			sum <<= 8;
334			s_util.c[0] = *(u_int8_t *)w;
335			w = (u_int16_t *)((int8_t *)w + 1);
336			mlen--;
337			byte_swapped = 1;
338		}
339		/*
340		 * Unroll the loop to make overhead from
341		 * branches &c small.
342		 */
343		while ((mlen -= 32) >= 0) {
344			sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
345			sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
346			sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11];
347			sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15];
348			w += 16;
349		}
350		mlen += 32;
351		while ((mlen -= 8) >= 0) {
352			sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
353			w += 4;
354		}
355		mlen += 8;
356		if (mlen == 0 && byte_swapped == 0)
357			continue;
358		REDUCE;
359		while ((mlen -= 2) >= 0) {
360			sum += *w++;
361		}
362		if (byte_swapped) {
363			REDUCE;
364			sum <<= 8;
365			byte_swapped = 0;
366			if (mlen == -1) {
367				s_util.c[1] = *(u_int8_t *)w;
368				sum += s_util.s;
369				mlen = 0;
370			} else
371				mlen = -1;
372		} else if (mlen == -1)
373			s_util.c[0] = *(u_int8_t *)w;
374#endif
375	}
376	if (len)
377		printf("cksum4: out of data\n");
378	if (mlen == -1) {
379		/* The last mbuf has odd # of bytes. Follow the
380		   standard (the odd byte may be shifted left by 8 bits
381		   or not as determined by endian-ness of the machine) */
382		s_util.c[1] = 0;
383		sum += s_util.s;
384	}
385	REDUCE;
386	return (~sum & 0xffff);
387}
388