in4_cksum.c revision 126261
1/* $FreeBSD: head/sys/contrib/pf/netinet/in4_cksum.c 126261 2004-02-26 02:34:12Z mlaier $ */ 2/* $OpenBSD: in4_cksum.c,v 1.7 2003/06/02 23:28:13 millert Exp $ */ 3/* $KAME: in4_cksum.c,v 1.10 2001/11/30 10:06:15 itojun Exp $ */ 4/* $NetBSD: in_cksum.c,v 1.13 1996/10/13 02:03:03 christos Exp $ */ 5 6/* 7 * Copyright (C) 1999 WIDE Project. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the project nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35/* 36 * Copyright (c) 1988, 1992, 1993 37 * The Regents of the University of California. All rights reserved. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 64 */ 65 66#include <sys/param.h> 67#include <sys/mbuf.h> 68#include <sys/systm.h> 69#include <sys/socket.h> 70#include <net/route.h> 71#include <netinet/in.h> 72#include <netinet/in_systm.h> 73#include <netinet/ip.h> 74#include <netinet/ip_var.h> 75 76#if defined(__FreeBSD__) && defined(__i386__) 77/* 78 * Copied from FreeBSD 5.0 sys/i386/i386/in_cksum.c 79 * XXX 80 * Currently support I386 processor only. 81 * In the long run, we need an optimized cksum routines for each Tier1 82 * architecture. Due to the lack of available hardware except I386 I 83 * can't support other processors now. For those users which use Sparc64, 84 * Alpha processors can use more optimized version in FreeBSD. 85 * See sys/$ARCH/$ARCH/in_cksum.c where $ARCH=`uname -p` 86 */ 87 88/* 89 * These asm statements require __volatile because they pass information 90 * via the condition codes. GCC does not currently provide a way to specify 91 * the condition codes as an input or output operand. 92 * 93 * The LOAD macro below is effectively a prefetch into cache. GCC will 94 * load the value into a register but will not use it. Since modern CPUs 95 * reorder operations, this will generally take place in parallel with 96 * other calculations. 97 */ 98#define ADD(n) __asm __volatile \ 99 ("addl %1, %0" : "+r" (sum) : \ 100 "g" (((const u_int32_t *)w)[n / 4])) 101#define ADDC(n) __asm __volatile \ 102 ("adcl %1, %0" : "+r" (sum) : \ 103 "g" (((const u_int32_t *)w)[n / 4])) 104#define LOAD(n) __asm __volatile \ 105 ("" : : "r" (((const u_int32_t *)w)[n / 4])) 106#define MOP __asm __volatile \ 107 ("adcl $0, %0" : "+r" (sum)) 108#endif 109/* 110 * Checksum routine for Internet Protocol family headers (Portable Version). 111 * This is only for IPv4 pseudo header checksum. 112 * No need to clear non-pseudo-header fields in IPv4 header. 113 * len is for actual payload size, and does not include IPv4 header and 114 * skipped header chain (off + len should be equal to the whole packet). 115 * 116 * This routine is very heavily used in the network 117 * code and should be modified for each CPU to be as fast as possible. 118 */ 119 120#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) 121#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} 122 123#if defined(__FreeBSD__) 124int 125in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); 126#endif 127 128int 129in4_cksum(m, nxt, off, len) 130 struct mbuf *m; 131 u_int8_t nxt; 132 int off, len; 133{ 134 u_int16_t *w; 135 int sum = 0; 136 int mlen = 0; 137 int byte_swapped = 0; 138 union { 139 struct ipovly ipov; 140 u_int16_t w[10]; 141 } u; 142 union { 143 u_int8_t c[2]; 144 u_int16_t s; 145 } s_util; 146 union { 147 u_int16_t s[2]; 148 u_int32_t l; 149 } l_util; 150 151 if (nxt != 0) { 152 /* pseudo header */ 153 if (off < sizeof(struct ipovly)) 154 panic("in4_cksum: offset too short"); 155 if (m->m_len < sizeof(struct ip)) 156 panic("in4_cksum: bad mbuf chain"); 157 bzero(&u.ipov, sizeof(u.ipov)); 158 u.ipov.ih_len = htons(len); 159 u.ipov.ih_pr = nxt; 160 u.ipov.ih_src = mtod(m, struct ip *)->ip_src; 161 u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst; 162 w = u.w; 163 /* assumes sizeof(ipov) == 20 */ 164 sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; 165 sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; 166 } 167 168 /* skip unnecessary part */ 169 while (m && off > 0) { 170 if (m->m_len > off) 171 break; 172 off -= m->m_len; 173 m = m->m_next; 174 } 175 176 for (;m && len; m = m->m_next) { 177 if (m->m_len == 0) 178 continue; 179 w = (u_int16_t *)(mtod(m, caddr_t) + off); 180 if (mlen == -1) { 181 /* 182 * The first byte of this mbuf is the continuation 183 * of a word spanning between this mbuf and the 184 * last mbuf. 185 * 186 * s_util.c[0] is already saved when scanning previous 187 * mbuf. 188 */ 189 s_util.c[1] = *(u_int8_t *)w; 190 sum += s_util.s; 191 w = (u_int16_t *)((u_int8_t *)w + 1); 192 mlen = m->m_len - off - 1; 193 len--; 194 } else 195 mlen = m->m_len - off; 196 off = 0; 197 if (len < mlen) 198 mlen = len; 199 len -= mlen; 200#if defined(__FreeBSD__) && defined(__i386__) 201 /* 202 * Force to long boundary so we do longword aligned 203 * memory operations 204 */ 205 if (3 & (int) w) { 206 REDUCE; 207 if ((1 & (int) w) && (mlen > 0)) { 208 sum <<= 8; 209 s_util.c[0] = *(char *)w; 210 w = (u_short *)((char *)w + 1); 211 mlen--; 212 byte_swapped = 1; 213 } 214 if ((2 & (int) w) && (mlen >= 2)) { 215 sum += *w++; 216 mlen -= 2; 217 } 218 } 219 /* 220 * Advance to a 486 cache line boundary. 221 */ 222 if (4 & (int) w && mlen >= 4) { 223 ADD(0); 224 MOP; 225 w += 2; 226 mlen -= 4; 227 } 228 if (8 & (int) w && mlen >= 8) { 229 ADD(0); 230 ADDC(4); 231 MOP; 232 w += 4; 233 mlen -= 8; 234 } 235 /* 236 * Do as much of the checksum as possible 32 bits at at time. 237 * In fact, this loop is unrolled to make overhead from 238 * branches &c small. 239 */ 240 mlen -= 1; 241 while ((mlen -= 32) >= 0) { 242 /* 243 * Add with carry 16 words and fold in the last 244 * carry by adding a 0 with carry. 245 * 246 * The early ADD(16) and the LOAD(32) are to load 247 * the next 2 cache lines in advance on 486's. The 248 * 486 has a penalty of 2 clock cycles for loading 249 * a cache line, plus whatever time the external 250 * memory takes to load the first word(s) addressed. 251 * These penalties are unavoidable. Subsequent 252 * accesses to a cache line being loaded (and to 253 * other external memory?) are delayed until the 254 * whole load finishes. These penalties are mostly 255 * avoided by not accessing external memory for 256 * 8 cycles after the ADD(16) and 12 cycles after 257 * the LOAD(32). The loop terminates when mlen 258 * is initially 33 (not 32) to guaranteed that 259 * the LOAD(32) is within bounds. 260 */ 261 ADD(16); 262 ADDC(0); 263 ADDC(4); 264 ADDC(8); 265 ADDC(12); 266 LOAD(32); 267 ADDC(20); 268 ADDC(24); 269 ADDC(28); 270 MOP; 271 w += 16; 272 } 273 mlen += 32 + 1; 274 if (mlen >= 32) { 275 ADD(16); 276 ADDC(0); 277 ADDC(4); 278 ADDC(8); 279 ADDC(12); 280 ADDC(20); 281 ADDC(24); 282 ADDC(28); 283 MOP; 284 w += 16; 285 mlen -= 32; 286 } 287 if (mlen >= 16) { 288 ADD(0); 289 ADDC(4); 290 ADDC(8); 291 ADDC(12); 292 MOP; 293 w += 8; 294 mlen -= 16; 295 } 296 if (mlen >= 8) { 297 ADD(0); 298 ADDC(4); 299 MOP; 300 w += 4; 301 mlen -= 8; 302 } 303 if (mlen == 0 && byte_swapped == 0) 304 continue; /* worth 1% maybe ?? */ 305 REDUCE; 306 while ((mlen -= 2) >= 0) { 307 sum += *w++; 308 } 309 if (byte_swapped) { 310 REDUCE; 311 sum <<= 8; 312 byte_swapped = 0; 313 if (mlen == -1) { 314 s_util.c[1] = *(char *)w; 315 sum += s_util.s; 316 mlen = 0; 317 } else 318 mlen = -1; 319 } else if (mlen == -1) 320 /* 321 * This mbuf has odd number of bytes. 322 * There could be a word split betwen 323 * this mbuf and the next mbuf. 324 * Save the last byte (to prepend to next mbuf). 325 */ 326 s_util.c[0] = *(char *)w; 327#else 328 /* 329 * Force to even boundary. 330 */ 331 if ((1 & (long) w) && (mlen > 0)) { 332 REDUCE; 333 sum <<= 8; 334 s_util.c[0] = *(u_int8_t *)w; 335 w = (u_int16_t *)((int8_t *)w + 1); 336 mlen--; 337 byte_swapped = 1; 338 } 339 /* 340 * Unroll the loop to make overhead from 341 * branches &c small. 342 */ 343 while ((mlen -= 32) >= 0) { 344 sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; 345 sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; 346 sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; 347 sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; 348 w += 16; 349 } 350 mlen += 32; 351 while ((mlen -= 8) >= 0) { 352 sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; 353 w += 4; 354 } 355 mlen += 8; 356 if (mlen == 0 && byte_swapped == 0) 357 continue; 358 REDUCE; 359 while ((mlen -= 2) >= 0) { 360 sum += *w++; 361 } 362 if (byte_swapped) { 363 REDUCE; 364 sum <<= 8; 365 byte_swapped = 0; 366 if (mlen == -1) { 367 s_util.c[1] = *(u_int8_t *)w; 368 sum += s_util.s; 369 mlen = 0; 370 } else 371 mlen = -1; 372 } else if (mlen == -1) 373 s_util.c[0] = *(u_int8_t *)w; 374#endif 375 } 376 if (len) 377 printf("cksum4: out of data\n"); 378 if (mlen == -1) { 379 /* The last mbuf has odd # of bytes. Follow the 380 standard (the odd byte may be shifted left by 8 bits 381 or not as determined by endian-ness of the machine) */ 382 s_util.c[1] = 0; 383 sum += s_util.s; 384 } 385 REDUCE; 386 return (~sum & 0xffff); 387} 388