Deleted Added
sdiff udiff text old ( 54882 ) new ( 58698 )
full compact
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * from tahoe: in_cksum.c 1.2 86/01/05
34 * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91
35 * $FreeBSD: head/sys/i386/i386/in_cksum.c 54882 1999-12-20 12:11:34Z sheldonh $
36 */
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/mbuf.h>
41
42#include <netinet/in.h>
43#include <netinet/in_systm.h>
44#include <netinet/ip.h>
45
46#include <machine/in_cksum.h>
47
48/*
49 * Checksum routine for Internet Protocol family headers.
50 *
51 * This routine is very heavily used in the network
52 * code and should be modified for each CPU to be as fast as possible.
53 *
54 * This implementation is 386 version.
55 */
56
57#undef ADDCARRY
58#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff
59#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
60
61/*
62 * Thanks to gcc we don't have to guess
63 * which registers contain sum & w.
64 */
65#define ADD(n) __asm __volatile \
66 ("addl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w))
67#define ADDC(n) __asm __volatile \
68 ("adcl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w))
69#define LOAD(n) __asm __volatile \
70 ("movb " #n "(%1), %0" : "=r" (junk) : "r" (w))
71#define MOP __asm __volatile \
72 ("adcl $0, %0" : "=r" (sum) : "0" (sum))
73
74int
75in_cksum(m, len)
76 register struct mbuf *m;
77 register int len;
78{
79 register u_short *w;
80 register unsigned sum = 0;
81 register int mlen = 0;
82 int byte_swapped = 0;
83 union { char c[2]; u_short s; } su;
84
85 for (;m && len; m = m->m_next) {
86 if (m->m_len == 0)
87 continue;
88 w = mtod(m, u_short *);
89 if (mlen == -1) {
90 /*
91 * The first byte of this mbuf is the continuation
92 * of a word spanning between this mbuf and the
93 * last mbuf.
94 */
95
96 /* su.c[0] is already saved when scanning previous
97 * mbuf. sum was REDUCEd when we found mlen == -1
98 */
99 su.c[1] = *(u_char *)w;
100 sum += su.s;
101 w = (u_short *)((char *)w + 1);
102 mlen = m->m_len - 1;
103 len--;
104 } else
105 mlen = m->m_len;
106 if (len < mlen)
107 mlen = len;
108 len -= mlen;
109 /*
110 * Force to long boundary so we do longword aligned
111 * memory operations
112 */
113 if (3 & (int) w) {
114 REDUCE;
115 if ((1 & (int) w) && (mlen > 0)) {
116 sum <<= 8;
117 su.c[0] = *(char *)w;
118 w = (u_short *)((char *)w + 1);
119 mlen--;
120 byte_swapped = 1;
121 }
122 if ((2 & (int) w) && (mlen >= 2)) {
123 sum += *w++;
124 mlen -= 2;
125 }
126 }
127 /*
128 * Advance to a 486 cache line boundary.
129 */
130 if (4 & (int) w && mlen >= 4) {
131 ADD(0);
132 MOP;
133 w += 2;
134 mlen -= 4;
135 }
136 if (8 & (int) w && mlen >= 8) {
137 ADD(0);
138 ADDC(4);
139 MOP;
140 w += 4;
141 mlen -= 8;
142 }
143 /*
144 * Do as much of the checksum as possible 32 bits at at time.
145 * In fact, this loop is unrolled to make overhead from
146 * branches &c small.
147 */
148 mlen -= 1;
149 while ((mlen -= 32) >= 0) {
150 u_char junk;
151 /*
152 * Add with carry 16 words and fold in the last
153 * carry by adding a 0 with carry.
154 *
155 * The early ADD(16) and the LOAD(32) are to load
156 * the next 2 cache lines in advance on 486's. The
157 * 486 has a penalty of 2 clock cycles for loading
158 * a cache line, plus whatever time the external
159 * memory takes to load the first word(s) addressed.
160 * These penalties are unavoidable. Subsequent
161 * accesses to a cache line being loaded (and to
162 * other external memory?) are delayed until the
163 * whole load finishes. These penalties are mostly
164 * avoided by not accessing external memory for
165 * 8 cycles after the ADD(16) and 12 cycles after
166 * the LOAD(32). The loop terminates when mlen
167 * is initially 33 (not 32) to guaranteed that
168 * the LOAD(32) is within bounds.
169 */
170 ADD(16);
171 ADDC(0);
172 ADDC(4);
173 ADDC(8);
174 ADDC(12);
175 LOAD(32);
176 ADDC(20);
177 ADDC(24);
178 ADDC(28);
179 MOP;
180 w += 16;
181 }
182 mlen += 32 + 1;
183 if (mlen >= 32) {
184 ADD(16);
185 ADDC(0);
186 ADDC(4);
187 ADDC(8);
188 ADDC(12);
189 ADDC(20);
190 ADDC(24);
191 ADDC(28);
192 MOP;
193 w += 16;
194 mlen -= 32;
195 }
196 if (mlen >= 16) {
197 ADD(0);
198 ADDC(4);
199 ADDC(8);
200 ADDC(12);
201 MOP;
202 w += 8;
203 mlen -= 16;
204 }
205 if (mlen >= 8) {
206 ADD(0);
207 ADDC(4);
208 MOP;
209 w += 4;
210 mlen -= 8;
211 }
212 if (mlen == 0 && byte_swapped == 0)
213 continue; /* worth 1% maybe ?? */
214 REDUCE;
215 while ((mlen -= 2) >= 0) {
216 sum += *w++;
217 }
218 if (byte_swapped) {
219 sum <<= 8;
220 byte_swapped = 0;
221 if (mlen == -1) {
222 su.c[1] = *(char *)w;
223 sum += su.s;
224 mlen = 0;
225 } else
226 mlen = -1;
227 } else if (mlen == -1)
228 /*
229 * This mbuf has odd number of bytes.
230 * There could be a word split betwen
231 * this mbuf and the next mbuf.
232 * Save the last byte (to prepend to next mbuf).
233 */
234 su.c[0] = *(char *)w;
235 }
236
237 if (len)
238 printf("cksum: out of data\n");
239 if (mlen == -1) {
240 /* The last mbuf has odd # of bytes. Follow the
241 standard (the odd byte is shifted left by 8 bits) */
242 su.c[1] = 0;
243 sum += su.s;
244 }
245 REDUCE;
246 return (~sum & 0xffff);
247}
248
249/*
250 * This is the exact same algorithm as above with a few exceptions:
251 * (1) it is designed to operate on buffers, not mbufs
252 * (2) it returns an intermediate form of the sum which has to be
253 * explicitly finalized (but this can be delayed)
254 * (3) it accepts an intermediate sum
255 *
256 * This is particularly useful when building packets quickly,
257 * since one can compute the checksum of the pseudoheader ahead of
258 * time and then use this function to complete the work. That way,
259 * the pseudoheader never actually has to exist in the packet buffer,
260 * which avoids needless duplication of work.
261 */
262in_psum_t
263in_cksum_partial(psum, w, len)
264 in_psum_t psum;
265 const u_short *w;
266 int len;
267{
268 register in_psum_t sum = psum;
269 int byte_swapped = 0;
270 union { char c[2]; u_short s; } su;
271
272 /*
273 * Force to long boundary so we do longword aligned
274 * memory operations
275 */
276 if (3 & (int) w) {
277 REDUCE;
278 if ((1 & (int) w) && (len > 0)) {
279 sum <<= 8;
280 su.c[0] = *(const char *)w;
281 w = (const u_short *)((const char *)w + 1);
282 len--;
283 byte_swapped = 1;
284 }
285 if ((2 & (int) w) && (len >= 2)) {
286 sum += *w++;
287 len -= 2;
288 }
289 }
290 /*
291 * Advance to a 486 cache line boundary.
292 */
293 if (4 & (int) w && len >= 4) {
294 ADD(0);
295 MOP;
296 w += 2;
297 len -= 4;
298 }
299 if (8 & (int) w && len >= 8) {
300 ADD(0);
301 ADDC(4);
302 MOP;
303 w += 4;
304 len -= 8;
305 }
306 /*
307 * Do as much of the checksum as possible 32 bits at at time.
308 * In fact, this loop is unrolled to make overhead from
309 * branches &c small.
310 */
311 len -= 1;
312 while ((len -= 32) >= 0) {
313 u_char junk;
314 /*
315 * Add with carry 16 words and fold in the last
316 * carry by adding a 0 with carry.
317 *
318 * The early ADD(16) and the LOAD(32) are to load
319 * the next 2 cache lines in advance on 486's. The
320 * 486 has a penalty of 2 clock cycles for loading
321 * a cache line, plus whatever time the external
322 * memory takes to load the first word(s) addressed.
323 * These penalties are unavoidable. Subsequent
324 * accesses to a cache line being loaded (and to
325 * other external memory?) are delayed until the
326 * whole load finishes. These penalties are mostly
327 * avoided by not accessing external memory for
328 * 8 cycles after the ADD(16) and 12 cycles after
329 * the LOAD(32). The loop terminates when len
330 * is initially 33 (not 32) to guaranteed that
331 * the LOAD(32) is within bounds.
332 */
333 ADD(16);
334 ADDC(0);
335 ADDC(4);
336 ADDC(8);
337 ADDC(12);
338 LOAD(32);
339 ADDC(20);
340 ADDC(24);
341 ADDC(28);
342 MOP;
343 w += 16;
344 }
345 len += 32 + 1;
346 if (len >= 32) {
347 ADD(16);
348 ADDC(0);
349 ADDC(4);
350 ADDC(8);
351 ADDC(12);
352 ADDC(20);
353 ADDC(24);
354 ADDC(28);
355 MOP;
356 w += 16;
357 len -= 32;
358 }
359 if (len >= 16) {
360 ADD(0);
361 ADDC(4);
362 ADDC(8);
363 ADDC(12);
364 MOP;
365 w += 8;
366 len -= 16;
367 }
368 if (len >= 8) {
369 ADD(0);
370 ADDC(4);
371 MOP;
372 w += 4;
373 len -= 8;
374 }
375 if (len == 0 && byte_swapped == 0)
376 goto out;
377 REDUCE;
378 while ((len -= 2) >= 0) {
379 sum += *w++;
380 }
381 if (byte_swapped) {
382 sum <<= 8;
383 byte_swapped = 0;
384 if (len == -1) {
385 su.c[1] = *(const char *)w;
386 sum += su.s;
387 len = 0;
388 } else
389 len = -1;
390 } else if (len == -1) {
391 /*
392 * This buffer has odd number of bytes.
393 * There could be a word split betwen
394 * this buffer and the next.
395 */
396 su.c[0] = *(const char *)w;
397 }
398out:
399 if (len == -1) {
400 /* The last buffer has odd # of bytes. Follow the
401 standard (the odd byte is shifted left by 8 bits) */
402 su.c[1] = 0;
403 sum += su.s;
404 }
405 return sum;
406}
407
408int
409in_cksum_finalize(psum)
410 in_psum_t psum;
411{
412 in_psum_t sum = psum;
413 REDUCE;
414 return (~sum & 0xffff);
415}