in_cksum.c revision 50477
1/*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from tahoe: in_cksum.c 1.2 86/01/05 34 * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 35 * $FreeBSD: head/sys/i386/i386/in_cksum.c 50477 1999-08-28 01:08:13Z peter $ 36 */ 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/mbuf.h> 41 42#include <netinet/in.h> 43#include <netinet/in_systm.h> 44#include <netinet/ip.h> 45 46#include <machine/in_cksum.h> 47 48/* 49 * Checksum routine for Internet Protocol family headers. 50 * 51 * This routine is very heavily used in the network 52 * code and should be modified for each CPU to be as fast as possible. 53 * 54 * This implementation is 386 version. 55 */ 56 57#undef ADDCARRY 58#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff 59#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} 60 61/* 62 * Thanks to gcc we don't have to guess 63 * which registers contain sum & w. 64 */ 65#define ADD(n) __asm __volatile \ 66 ("addl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w)) 67#define ADDC(n) __asm __volatile \ 68 ("adcl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w)) 69#define LOAD(n) __asm __volatile \ 70 ("movb " #n "(%1), %0" : "=r" (junk) : "r" (w)) 71#define MOP __asm __volatile \ 72 ("adcl $0, %0" : "=r" (sum) : "0" (sum)) 73 74int 75in_cksum(m, len) 76 register struct mbuf *m; 77 register int len; 78{ 79 register u_short *w; 80 register unsigned sum = 0; 81 register int mlen = 0; 82 int byte_swapped = 0; 83 union { char c[2]; u_short s; } su; 84 85 for (;m && len; m = m->m_next) { 86 if (m->m_len == 0) 87 continue; 88 w = mtod(m, u_short *); 89 if (mlen == -1) { 90 /* 91 * The first byte of this mbuf is the continuation 92 * of a word spanning between this mbuf and the 93 * last mbuf. 94 */ 95 96 /* su.c[0] is already saved when scanning previous 97 * mbuf. sum was REDUCEd when we found mlen == -1 98 */ 99 su.c[1] = *(u_char *)w; 100 sum += su.s; 101 w = (u_short *)((char *)w + 1); 102 mlen = m->m_len - 1; 103 len--; 104 } else 105 mlen = m->m_len; 106 if (len < mlen) 107 mlen = len; 108 len -= mlen; 109 /* 110 * Force to long boundary so we do longword aligned 111 * memory operations 112 */ 113 if (3 & (int) w) { 114 REDUCE; 115 if ((1 & (int) w) && (mlen > 0)) { 116 sum <<= 8; 117 su.c[0] = *(char *)w; 118 w = (u_short *)((char *)w + 1); 119 mlen--; 120 byte_swapped = 1; 121 } 122 if ((2 & (int) w) && (mlen >= 2)) { 123 sum += *w++; 124 mlen -= 2; 125 } 126 } 127 /* 128 * Advance to a 486 cache line boundary. 129 */ 130 if (4 & (int) w && mlen >= 4) { 131 ADD(0); 132 MOP; 133 w += 2; 134 mlen -= 4; 135 } 136 if (8 & (int) w && mlen >= 8) { 137 ADD(0); 138 ADDC(4); 139 MOP; 140 w += 4; 141 mlen -= 8; 142 } 143 /* 144 * Do as much of the checksum as possible 32 bits at at time. 145 * In fact, this loop is unrolled to make overhead from 146 * branches &c small. 147 */ 148 mlen -= 1; 149 while ((mlen -= 32) >= 0) { 150 u_char junk; 151 /* 152 * Add with carry 16 words and fold in the last 153 * carry by adding a 0 with carry. 154 * 155 * The early ADD(16) and the LOAD(32) are to load 156 * the next 2 cache lines in advance on 486's. The 157 * 486 has a penalty of 2 clock cycles for loading 158 * a cache line, plus whatever time the external 159 * memory takes to load the first word(s) addressed. 160 * These penalties are unavoidable. Subsequent 161 * accesses to a cache line being loaded (and to 162 * other external memory?) are delayed until the 163 * whole load finishes. These penalties are mostly 164 * avoided by not accessing external memory for 165 * 8 cycles after the ADD(16) and 12 cycles after 166 * the LOAD(32). The loop terminates when mlen 167 * is initially 33 (not 32) to guaranteed that 168 * the LOAD(32) is within bounds. 169 */ 170 ADD(16); 171 ADDC(0); 172 ADDC(4); 173 ADDC(8); 174 ADDC(12); 175 LOAD(32); 176 ADDC(20); 177 ADDC(24); 178 ADDC(28); 179 MOP; 180 w += 16; 181 } 182 mlen += 32 + 1; 183 if (mlen >= 32) { 184 ADD(16); 185 ADDC(0); 186 ADDC(4); 187 ADDC(8); 188 ADDC(12); 189 ADDC(20); 190 ADDC(24); 191 ADDC(28); 192 MOP; 193 w += 16; 194 mlen -= 32; 195 } 196 if (mlen >= 16) { 197 ADD(0); 198 ADDC(4); 199 ADDC(8); 200 ADDC(12); 201 MOP; 202 w += 8; 203 mlen -= 16; 204 } 205 if (mlen >= 8) { 206 ADD(0); 207 ADDC(4); 208 MOP; 209 w += 4; 210 mlen -= 8; 211 } 212 if (mlen == 0 && byte_swapped == 0) 213 continue; /* worth 1% maybe ?? */ 214 REDUCE; 215 while ((mlen -= 2) >= 0) { 216 sum += *w++; 217 } 218 if (byte_swapped) { 219 sum <<= 8; 220 byte_swapped = 0; 221 if (mlen == -1) { 222 su.c[1] = *(char *)w; 223 sum += su.s; 224 mlen = 0; 225 } else 226 mlen = -1; 227 } else if (mlen == -1) 228 /* 229 * This mbuf has odd number of bytes. 230 * There could be a word split betwen 231 * this mbuf and the next mbuf. 232 * Save the last byte (to prepend to next mbuf). 233 */ 234 su.c[0] = *(char *)w; 235 } 236 237 if (len) 238 printf("cksum: out of data\n"); 239 if (mlen == -1) { 240 /* The last mbuf has odd # of bytes. Follow the 241 standard (the odd byte is shifted left by 8 bits) */ 242 su.c[1] = 0; 243 sum += su.s; 244 } 245 REDUCE; 246 return (~sum & 0xffff); 247} 248 249/* 250 * This is the exact same algorithm as above with a few exceptions: 251 * (1) it is designed to operate on buffers, not mbufs 252 * (2) it returns an intermediate form of the sum which has to be 253 * explicitly finalized (but this can be delayed) 254 * (3) it accepts an intermediate sum 255 * 256 * This is particularly useful when building packets quickly, 257 * since one can compute the checksum of the pseudoheader ahead of 258 * time and then use this function to complete the work. That way, 259 * the pseudoheader never actually has to exist in the packet buffer, 260 * which avoids needless duplication of work. 261 */ 262in_psum_t 263in_cksum_partial(psum, w, len) 264 in_psum_t psum; 265 const u_short *w; 266 int len; 267{ 268 register in_psum_t sum = psum; 269 int byte_swapped = 0; 270 union { char c[2]; u_short s; } su; 271 272 /* 273 * Force to long boundary so we do longword aligned 274 * memory operations 275 */ 276 if (3 & (int) w) { 277 REDUCE; 278 if ((1 & (int) w) && (len > 0)) { 279 sum <<= 8; 280 su.c[0] = *(const char *)w; 281 w = (const u_short *)((const char *)w + 1); 282 len--; 283 byte_swapped = 1; 284 } 285 if ((2 & (int) w) && (len >= 2)) { 286 sum += *w++; 287 len -= 2; 288 } 289 } 290 /* 291 * Advance to a 486 cache line boundary. 292 */ 293 if (4 & (int) w && len >= 4) { 294 ADD(0); 295 MOP; 296 w += 2; 297 len -= 4; 298 } 299 if (8 & (int) w && len >= 8) { 300 ADD(0); 301 ADDC(4); 302 MOP; 303 w += 4; 304 len -= 8; 305 } 306 /* 307 * Do as much of the checksum as possible 32 bits at at time. 308 * In fact, this loop is unrolled to make overhead from 309 * branches &c small. 310 */ 311 len -= 1; 312 while ((len -= 32) >= 0) { 313 u_char junk; 314 /* 315 * Add with carry 16 words and fold in the last 316 * carry by adding a 0 with carry. 317 * 318 * The early ADD(16) and the LOAD(32) are to load 319 * the next 2 cache lines in advance on 486's. The 320 * 486 has a penalty of 2 clock cycles for loading 321 * a cache line, plus whatever time the external 322 * memory takes to load the first word(s) addressed. 323 * These penalties are unavoidable. Subsequent 324 * accesses to a cache line being loaded (and to 325 * other external memory?) are delayed until the 326 * whole load finishes. These penalties are mostly 327 * avoided by not accessing external memory for 328 * 8 cycles after the ADD(16) and 12 cycles after 329 * the LOAD(32). The loop terminates when len 330 * is initially 33 (not 32) to guaranteed that 331 * the LOAD(32) is within bounds. 332 */ 333 ADD(16); 334 ADDC(0); 335 ADDC(4); 336 ADDC(8); 337 ADDC(12); 338 LOAD(32); 339 ADDC(20); 340 ADDC(24); 341 ADDC(28); 342 MOP; 343 w += 16; 344 } 345 len += 32 + 1; 346 if (len >= 32) { 347 ADD(16); 348 ADDC(0); 349 ADDC(4); 350 ADDC(8); 351 ADDC(12); 352 ADDC(20); 353 ADDC(24); 354 ADDC(28); 355 MOP; 356 w += 16; 357 len -= 32; 358 } 359 if (len >= 16) { 360 ADD(0); 361 ADDC(4); 362 ADDC(8); 363 ADDC(12); 364 MOP; 365 w += 8; 366 len -= 16; 367 } 368 if (len >= 8) { 369 ADD(0); 370 ADDC(4); 371 MOP; 372 w += 4; 373 len -= 8; 374 } 375 if (len == 0 && byte_swapped == 0) 376 goto out; 377 REDUCE; 378 while ((len -= 2) >= 0) { 379 sum += *w++; 380 } 381 if (byte_swapped) { 382 sum <<= 8; 383 byte_swapped = 0; 384 if (len == -1) { 385 su.c[1] = *(const char *)w; 386 sum += su.s; 387 len = 0; 388 } else 389 len = -1; 390 } else if (len == -1) { 391 /* 392 * This buffer has odd number of bytes. 393 * There could be a word split betwen 394 * this buffer and the next. 395 */ 396 su.c[0] = *(const char *)w; 397 } 398out: 399 if (len == -1) { 400 /* The last buffer has odd # of bytes. Follow the 401 standard (the odd byte is shifted left by 8 bits) */ 402 su.c[1] = 0; 403 sum += su.s; 404 } 405 return sum; 406} 407 408int 409in_cksum_finalize(psum) 410 in_psum_t psum; 411{ 412 in_psum_t sum = psum; 413 REDUCE; 414 return (sum & 0xffff); 415} 416