in_cksum.c revision 29041
1210311Sjmallett/*-
2210311Sjmallett * Copyright (c) 1990 The Regents of the University of California.
3210311Sjmallett * All rights reserved.
4210311Sjmallett *
5210311Sjmallett * Redistribution and use in source and binary forms, with or without
6210311Sjmallett * modification, are permitted provided that the following conditions
7210311Sjmallett * are met:
8210311Sjmallett * 1. Redistributions of source code must retain the above copyright
9210311Sjmallett *    notice, this list of conditions and the following disclaimer.
10210311Sjmallett * 2. Redistributions in binary form must reproduce the above copyright
11210311Sjmallett *    notice, this list of conditions and the following disclaimer in the
12210311Sjmallett *    documentation and/or other materials provided with the distribution.
13210311Sjmallett * 3. All advertising materials mentioning features or use of this software
14210311Sjmallett *    must display the following acknowledgement:
15210311Sjmallett *	This product includes software developed by the University of
16210311Sjmallett *	California, Berkeley and its contributors.
17210311Sjmallett * 4. Neither the name of the University nor the names of its contributors
18210311Sjmallett *    may be used to endorse or promote products derived from this software
19210311Sjmallett *    without specific prior written permission.
20210311Sjmallett *
21210311Sjmallett * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22210311Sjmallett * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23210311Sjmallett * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24210311Sjmallett * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25210311Sjmallett * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26210311Sjmallett * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27210311Sjmallett * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28210311Sjmallett * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29210311Sjmallett * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30210311Sjmallett * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31210311Sjmallett * SUCH DAMAGE.
32210311Sjmallett *
33210311Sjmallett *	from tahoe:	in_cksum.c	1.2	86/01/05
34210311Sjmallett *	from:		@(#)in_cksum.c	1.3 (Berkeley) 1/19/91
35210311Sjmallett *	$Id: in_cksum.c,v 1.11 1997/08/16 19:14:52 wollman Exp $
36210311Sjmallett */
37210311Sjmallett
38210311Sjmallett#include <sys/param.h>
39210311Sjmallett#include <sys/systm.h>
40210311Sjmallett#include <sys/mbuf.h>
41210311Sjmallett
42210311Sjmallett#include <netinet/in.h>
43213150Sjmallett#include <netinet/in_systm.h>
44210311Sjmallett#include <netinet/ip.h>
45210311Sjmallett
46210311Sjmallett#include <machine/in_cksum.h>
47257324Sglebius
48210311Sjmallett/*
49210311Sjmallett * Checksum routine for Internet Protocol family headers.
50210311Sjmallett *
51210311Sjmallett * This routine is very heavily used in the network
52210311Sjmallett * code and should be modified for each CPU to be as fast as possible.
53210311Sjmallett *
54210311Sjmallett * This implementation is 386 version.
55210311Sjmallett */
56210311Sjmallett
57210311Sjmallett#undef	ADDCARRY
58210311Sjmallett#define ADDCARRY(x)     if ((x) > 0xffff) (x) -= 0xffff
59210311Sjmallett#define REDUCE          {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
60210311Sjmallett
61210311Sjmallett/*
62210311Sjmallett * Thanks to gcc we don't have to guess
63210311Sjmallett * which registers contain sum & w.
64210311Sjmallett */
65210311Sjmallett#define ADD(n)	asm("addl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w))
66210311Sjmallett#define ADDC(n)	asm("adcl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w))
67210311Sjmallett#define LOAD(n)	asm volatile("movb " #n "(%1), %0" : "=r" (junk) : "r" (w))
68210311Sjmallett#define MOP	asm("adcl         $0, %0" : "=r" (sum) : "0" (sum))
69210311Sjmallett
70210311Sjmallettint
71210311Sjmallettin_cksum(m, len)
72210311Sjmallett	register struct mbuf *m;
73210311Sjmallett	register int len;
74210311Sjmallett{
75210311Sjmallett	register u_short *w;
76210311Sjmallett	register unsigned sum = 0;
77210311Sjmallett	register int mlen = 0;
78210311Sjmallett	int byte_swapped = 0;
79210311Sjmallett	union { char	c[2]; u_short	s; } su;
80210311Sjmallett
81210311Sjmallett	for (;m && len; m = m->m_next) {
82210311Sjmallett		if (m->m_len == 0)
83210311Sjmallett			continue;
84210311Sjmallett		w = mtod(m, u_short *);
85210311Sjmallett		if (mlen == -1) {
86210311Sjmallett			/*
87210311Sjmallett			 * The first byte of this mbuf is the continuation
88210311Sjmallett			 * of a word spanning between this mbuf and the
89213150Sjmallett			 * last mbuf.
90213150Sjmallett			 */
91213150Sjmallett
92213150Sjmallett			/* su.c[0] is already saved when scanning previous
93210311Sjmallett			 * mbuf.  sum was REDUCEd when we found mlen == -1
94219694Sjmallett			 */
95219694Sjmallett			su.c[1] = *(u_char *)w;
96219694Sjmallett			sum += su.s;
97219694Sjmallett			w = (u_short *)((char *)w + 1);
98219694Sjmallett			mlen = m->m_len - 1;
99210311Sjmallett			len--;
100213150Sjmallett		} else
101213150Sjmallett			mlen = m->m_len;
102213150Sjmallett		if (len < mlen)
103213150Sjmallett			mlen = len;
104213150Sjmallett		len -= mlen;
105213150Sjmallett		/*
106213150Sjmallett		 * Force to long boundary so we do longword aligned
107213150Sjmallett		 * memory operations
108213150Sjmallett		 */
109213150Sjmallett		if (3 & (int) w) {
110213150Sjmallett			REDUCE;
111213150Sjmallett			if ((1 & (int) w) && (mlen > 0)) {
112215959Sjmallett				sum <<= 8;
113215959Sjmallett				su.c[0] = *(char *)w;
114215959Sjmallett				w = (u_short *)((char *)w + 1);
115215959Sjmallett				mlen--;
116213150Sjmallett				byte_swapped = 1;
117213150Sjmallett			}
118213150Sjmallett			if ((2 & (int) w) && (mlen >= 2)) {
119213150Sjmallett				sum += *w++;
120213150Sjmallett				mlen -= 2;
121213150Sjmallett			}
122213150Sjmallett		}
123213150Sjmallett		/*
124210311Sjmallett		 * Advance to a 486 cache line boundary.
125210311Sjmallett		 */
126210311Sjmallett		if (4 & (int) w && mlen >= 4) {
127210311Sjmallett			ADD(0);
128210311Sjmallett			MOP;
129210311Sjmallett			w += 2;
130210311Sjmallett			mlen -= 4;
131213807Sjmallett		}
132210311Sjmallett		if (8 & (int) w && mlen >= 8) {
133210311Sjmallett			ADD(0);
134210311Sjmallett			ADDC(4);
135210311Sjmallett			MOP;
136210311Sjmallett			w += 4;
137213150Sjmallett			mlen -= 8;
138216071Sjmallett		}
139216071Sjmallett		/*
140216071Sjmallett		 * Do as much of the checksum as possible 32 bits at at time.
141216071Sjmallett		 * In fact, this loop is unrolled to make overhead from
142210311Sjmallett		 * branches &c small.
143210311Sjmallett		 */
144210311Sjmallett		mlen -= 1;
145210311Sjmallett		while ((mlen -= 32) >= 0) {
146210311Sjmallett			u_char junk;
147210311Sjmallett			/*
148210311Sjmallett			 * Add with carry 16 words and fold in the last
149210311Sjmallett			 * carry by adding a 0 with carry.
150210311Sjmallett			 *
151210311Sjmallett			 * The early ADD(16) and the LOAD(32) are to load
152210311Sjmallett			 * the next 2 cache lines in advance on 486's.  The
153210311Sjmallett			 * 486 has a penalty of 2 clock cycles for loading
154210311Sjmallett			 * a cache line, plus whatever time the external
155210311Sjmallett			 * memory takes to load the first word(s) addressed.
156210311Sjmallett			 * These penalties are unavoidable.  Subsequent
157210311Sjmallett			 * accesses to a cache line being loaded (and to
158210311Sjmallett			 * other external memory?) are delayed until the
159210311Sjmallett			 * whole load finishes.  These penalties are mostly
160210311Sjmallett			 * avoided by not accessing external memory for
161210311Sjmallett			 * 8 cycles after the ADD(16) and 12 cycles after
162210311Sjmallett			 * the LOAD(32).  The loop terminates when mlen
163210311Sjmallett			 * is initially 33 (not 32) to guaranteed that
164210311Sjmallett			 * the LOAD(32) is within bounds.
165210311Sjmallett			 */
166210311Sjmallett			ADD(16);
167210311Sjmallett			ADDC(0);
168210311Sjmallett			ADDC(4);
169210311Sjmallett			ADDC(8);
170213807Sjmallett			ADDC(12);
171213807Sjmallett			LOAD(32);
172213807Sjmallett			ADDC(20);
173213807Sjmallett			ADDC(24);
174213807Sjmallett			ADDC(28);
175213807Sjmallett			MOP;
176213807Sjmallett			w += 16;
177213807Sjmallett		}
178213807Sjmallett		mlen += 32 + 1;
179213807Sjmallett		if (mlen >= 32) {
180213807Sjmallett			ADD(16);
181213807Sjmallett			ADDC(0);
182213807Sjmallett			ADDC(4);
183210311Sjmallett			ADDC(8);
184210311Sjmallett			ADDC(12);
185210311Sjmallett			ADDC(20);
186210311Sjmallett			ADDC(24);
187210311Sjmallett			ADDC(28);
188210311Sjmallett			MOP;
189210311Sjmallett			w += 16;
190210311Sjmallett			mlen -= 32;
191210311Sjmallett		}
192219694Sjmallett		if (mlen >= 16) {
193210311Sjmallett			ADD(0);
194210311Sjmallett			ADDC(4);
195210311Sjmallett			ADDC(8);
196210311Sjmallett			ADDC(12);
197210311Sjmallett			MOP;
198210311Sjmallett			w += 8;
199210311Sjmallett			mlen -= 16;
200219694Sjmallett		}
201219694Sjmallett		if (mlen >= 8) {
202219694Sjmallett			ADD(0);
203219694Sjmallett			ADDC(4);
204219694Sjmallett			MOP;
205219694Sjmallett			w += 4;
206219694Sjmallett			mlen -= 8;
207219694Sjmallett		}
208219694Sjmallett		if (mlen == 0 && byte_swapped == 0)
209219694Sjmallett			continue;       /* worth 1% maybe ?? */
210219694Sjmallett		REDUCE;
211219694Sjmallett		while ((mlen -= 2) >= 0) {
212219694Sjmallett			sum += *w++;
213219694Sjmallett		}
214219694Sjmallett		if (byte_swapped) {
215219694Sjmallett			sum <<= 8;
216219694Sjmallett			byte_swapped = 0;
217219694Sjmallett			if (mlen == -1) {
218210311Sjmallett				su.c[1] = *(char *)w;
219219694Sjmallett				sum += su.s;
220219694Sjmallett				mlen = 0;
221219694Sjmallett			} else
222219694Sjmallett				mlen = -1;
223219694Sjmallett		} else if (mlen == -1)
224219694Sjmallett			/*
225210311Sjmallett			 * This mbuf has odd number of bytes.
226219694Sjmallett			 * There could be a word split betwen
227219694Sjmallett			 * this mbuf and the next mbuf.
228210311Sjmallett			 * Save the last byte (to prepend to next mbuf).
229210311Sjmallett			 */
230242346Sjmallett			su.c[0] = *(char *)w;
231210311Sjmallett	}
232210311Sjmallett
233210311Sjmallett	if (len)
234210311Sjmallett		printf("cksum: out of data\n");
235210311Sjmallett	if (mlen == -1) {
236232812Sjmallett		/* The last mbuf has odd # of bytes. Follow the
237232812Sjmallett		   standard (the odd byte is shifted left by 8 bits) */
238210311Sjmallett		su.c[1] = 0;
239210311Sjmallett		sum += su.s;
240210311Sjmallett	}
241210311Sjmallett	REDUCE;
242210311Sjmallett	return (~sum & 0xffff);
243210311Sjmallett}
244210311Sjmallett
245210311Sjmallett/*
246219695Sjmallett * This is the exact same algorithm as above with a few exceptions:
247210311Sjmallett * (1) it is designed to operate on buffers, not mbufs
248210311Sjmallett * (2) it returns an intermediate form of the sum which has to be
249210311Sjmallett *     explicitly finalized (but this can be delayed)
250210311Sjmallett * (3) it accepts an intermediate sum
251210311Sjmallett *
252210311Sjmallett * This is particularly useful when building packets quickly,
253210311Sjmallett * since one can compute the checksum of the pseudoheader ahead of
254217664Sjmallett * time and then use this function to complete the work.  That way,
255217664Sjmallett * the pseudoheader never actually has to exist in the packet buffer,
256217664Sjmallett * which avoids needless duplication of work.
257217210Sjmallett */
258217664Sjmallettin_psum_t
259217664Sjmallettin_cksum_partial(psum, w, len)
260217664Sjmallett	in_psum_t psum;
261217210Sjmallett	const u_short *w;
262217664Sjmallett	int len;
263217664Sjmallett{
264217664Sjmallett	register in_psum_t sum = psum;
265210311Sjmallett	int byte_swapped = 0;
266210311Sjmallett	union { char	c[2]; u_short	s; } su;
267210311Sjmallett
268210311Sjmallett	/*
269210311Sjmallett	 * Force to long boundary so we do longword aligned
270210311Sjmallett	 * memory operations
271210311Sjmallett	 */
272210311Sjmallett	if (3 & (int) w) {
273210311Sjmallett		REDUCE;
274210311Sjmallett		if ((1 & (int) w) && (len > 0)) {
275210311Sjmallett			sum <<= 8;
276210311Sjmallett			su.c[0] = *(char *)w;
277210311Sjmallett			w = (u_short *)((char *)w + 1);
278210311Sjmallett			len--;
279210311Sjmallett			byte_swapped = 1;
280210311Sjmallett		}
281210311Sjmallett		if ((2 & (int) w) && (len >= 2)) {
282210311Sjmallett			sum += *w++;
283210311Sjmallett			len -= 2;
284210311Sjmallett		}
285210311Sjmallett	}
286210311Sjmallett	/*
287210311Sjmallett	 * Advance to a 486 cache line boundary.
288210311Sjmallett	 */
289210311Sjmallett	if (4 & (int) w && len >= 4) {
290210311Sjmallett		ADD(0);
291210311Sjmallett		MOP;
292210311Sjmallett		w += 2;
293210311Sjmallett		len -= 4;
294210311Sjmallett	}
295210311Sjmallett	if (8 & (int) w && len >= 8) {
296210311Sjmallett		ADD(0);
297210311Sjmallett		ADDC(4);
298210311Sjmallett		MOP;
299210311Sjmallett		w += 4;
300210311Sjmallett		len -= 8;
301210311Sjmallett	}
302210311Sjmallett	/*
303210311Sjmallett	 * Do as much of the checksum as possible 32 bits at at time.
304210311Sjmallett	 * In fact, this loop is unrolled to make overhead from
305210311Sjmallett	 * branches &c small.
306210311Sjmallett	 */
307210311Sjmallett	len -= 1;
308210311Sjmallett	while ((len -= 32) >= 0) {
309210311Sjmallett		u_char junk;
310210311Sjmallett		/*
311210311Sjmallett		 * Add with carry 16 words and fold in the last
312210311Sjmallett		 * carry by adding a 0 with carry.
313210311Sjmallett		 *
314210311Sjmallett		 * The early ADD(16) and the LOAD(32) are to load
315210311Sjmallett		 * the next 2 cache lines in advance on 486's.  The
316210311Sjmallett		 * 486 has a penalty of 2 clock cycles for loading
317210311Sjmallett		 * a cache line, plus whatever time the external
318210311Sjmallett		 * memory takes to load the first word(s) addressed.
319210311Sjmallett		 * These penalties are unavoidable.  Subsequent
320210311Sjmallett		 * accesses to a cache line being loaded (and to
321210311Sjmallett		 * other external memory?) are delayed until the
322210311Sjmallett		 * whole load finishes.  These penalties are mostly
323219694Sjmallett		 * avoided by not accessing external memory for
324210311Sjmallett		 * 8 cycles after the ADD(16) and 12 cycles after
325219694Sjmallett		 * the LOAD(32).  The loop terminates when len
326219694Sjmallett		 * is initially 33 (not 32) to guaranteed that
327219694Sjmallett		 * the LOAD(32) is within bounds.
328210311Sjmallett		 */
329219694Sjmallett		ADD(16);
330210311Sjmallett		ADDC(0);
331210311Sjmallett		ADDC(4);
332210311Sjmallett		ADDC(8);
333210311Sjmallett		ADDC(12);
334210311Sjmallett		LOAD(32);
335210311Sjmallett		ADDC(20);
336210311Sjmallett		ADDC(24);
337213150Sjmallett		ADDC(28);
338213150Sjmallett		MOP;
339213150Sjmallett		w += 16;
340213150Sjmallett	}
341213150Sjmallett	len += 32 + 1;
342210311Sjmallett	if (len >= 32) {
343210311Sjmallett		ADD(16);
344210311Sjmallett		ADDC(0);
345210311Sjmallett		ADDC(4);
346210311Sjmallett		ADDC(8);
347210311Sjmallett		ADDC(12);
348210311Sjmallett		ADDC(20);
349210311Sjmallett		ADDC(24);
350210311Sjmallett		ADDC(28);
351210311Sjmallett		MOP;
352231987Sgonzo		w += 16;
353231987Sgonzo		len -= 32;
354231987Sgonzo	}
355210311Sjmallett	if (len >= 16) {
356210311Sjmallett		ADD(0);
357210311Sjmallett		ADDC(4);
358231987Sgonzo		ADDC(8);
359210311Sjmallett		ADDC(12);
360210311Sjmallett		MOP;
361210311Sjmallett		w += 8;
362231987Sgonzo		len -= 16;
363210311Sjmallett	}
364210311Sjmallett	if (len >= 8) {
365210311Sjmallett		ADD(0);
366210311Sjmallett		ADDC(4);
367210311Sjmallett		MOP;
368210311Sjmallett		w += 4;
369210311Sjmallett		len -= 8;
370210311Sjmallett	}
371210311Sjmallett	if (len == 0 && byte_swapped == 0)
372210311Sjmallett		goto out;
373210311Sjmallett	REDUCE;
374210311Sjmallett	while ((len -= 2) >= 0) {
375210311Sjmallett		sum += *w++;
376210311Sjmallett	}
377213150Sjmallett	if (byte_swapped) {
378210311Sjmallett		sum <<= 8;
379210311Sjmallett		byte_swapped = 0;
380210311Sjmallett		if (len == -1) {
381210311Sjmallett			su.c[1] = *(char *)w;
382210311Sjmallett			sum += su.s;
383210311Sjmallett			len = 0;
384210311Sjmallett		} else
385210311Sjmallett			len = -1;
386210311Sjmallett	} else if (len == -1) {
387210311Sjmallett		/*
388210311Sjmallett		 * This buffer has odd number of bytes.
389210311Sjmallett		 * There could be a word split betwen
390210311Sjmallett		 * this buffer and the next.
391210311Sjmallett		 */
392210311Sjmallett		su.c[0] = *(char *)w;
393210311Sjmallett	}
394210311Sjmallettout:
395215974Sjmallett	if (len == -1) {
396210311Sjmallett		/* The last buffer has odd # of bytes. Follow the
397210311Sjmallett		   standard (the odd byte is shifted left by 8 bits) */
398210311Sjmallett		su.c[1] = 0;
399210311Sjmallett		sum += su.s;
400210311Sjmallett	}
401210311Sjmallett	return sum;
402210311Sjmallett}
403210311Sjmallett
404210311Sjmallettint
405210311Sjmallettin_cksum_finalize(psum)
406210311Sjmallett	in_psum_t psum;
407215974Sjmallett{
408210311Sjmallett	in_psum_t sum = psum;
409210311Sjmallett	REDUCE;
410210311Sjmallett	return (sum & 0xffff);
411210311Sjmallett}
412210311Sjmallett