1/* 2 * Copyright (c) 2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/*- 30 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>. 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in 41 * the documentation and/or other materials provided with the 42 * distribution. 43 * 44 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 45 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 46 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 47 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 48 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 49 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 50 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 51 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 52 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 53 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 54 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 */ 57 58#include <sys/param.h> 59#include <mach/boolean.h> 60#include <machine/endian.h> 61#include <sys/mcache.h> 62#include <sys/mbuf.h> 63#include <kern/debug.h> 64#include <netinet/in.h> 65#include <libkern/libkern.h> 66 67int cpu_in_cksum(struct mbuf *, int, int, uint32_t); 68 69#define PREDICT_FALSE(_exp) __builtin_expect((_exp), 0) 70 71/* 72 * Checksum routine for Internet Protocol family headers (Portable Version). 73 * 74 * This routine is very heavily used in the network 75 * code and should be modified for each CPU to be as fast as possible. 76 * 77 * A discussion of different implementation techniques can be found in 78 * RFC 1071. 79 * 80 * The default implementation for 32-bit architectures is using 81 * a 32-bit accumulator and operating on 16-bit operands. 82 * 83 * The default implementation for 64-bit architectures is using 84 * a 64-bit accumulator and operating on 32-bit operands. 85 * 86 * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core 87 * of the inner loop. After each iteration of the inner loop, a partial 88 * reduction is done to avoid carry in long packets. 89 */ 90 91#if ULONG_MAX == 0xffffffffUL 92/* 32-bit version */ 93int 94cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum) 95{ 96 int mlen; 97 uint32_t sum, partial; 98 unsigned int final_acc; 99 uint8_t *data; 100 boolean_t needs_swap, started_on_odd; 101 102 VERIFY(len >= 0); 103 VERIFY(off >= 0); 104 105 needs_swap = FALSE; 106 started_on_odd = FALSE; 107 sum = (initial_sum >> 16) + (initial_sum & 0xffff); 108 109 for (;;) { 110 if (PREDICT_FALSE(m == NULL)) { 111 printf("%s: out of data\n", __func__); 112 return (-1); 113 } 114 mlen = m->m_len; 115 if (mlen > off) { 116 mlen -= off; 117 data = mtod(m, uint8_t *) + off; 118 goto post_initial_offset; 119 } 120 off -= mlen; 121 if (len == 0) 122 break; 123 m = m->m_next; 124 } 125 126 for (; len > 0; m = m->m_next) { 127 if (PREDICT_FALSE(m == NULL)) { 128 printf("%s: out of data\n", __func__); 129 return (-1); 130 } 131 mlen = m->m_len; 132 data = mtod(m, uint8_t *); 133post_initial_offset: 134 if (mlen == 0) 135 continue; 136 if (mlen > len) 137 mlen = len; 138 len -= mlen; 139 140 partial = 0; 141 if ((uintptr_t)data & 1) { 142 /* Align on word boundary */ 143 started_on_odd = !started_on_odd; 144#if BYTE_ORDER == LITTLE_ENDIAN 145 partial = *data << 8; 146#else 147 partial = *data; 148#endif 149 ++data; 150 --mlen; 151 } 152 needs_swap = started_on_odd; 153 while (mlen >= 32) { 154 __builtin_prefetch(data + 32); 155 partial += *(uint16_t *)(void *)data; 156 partial += *(uint16_t *)(void *)(data + 2); 157 partial += *(uint16_t *)(void *)(data + 4); 158 partial += *(uint16_t *)(void *)(data + 6); 159 partial += *(uint16_t *)(void *)(data + 8); 160 partial += *(uint16_t *)(void *)(data + 10); 161 partial += *(uint16_t *)(void *)(data + 12); 162 partial += *(uint16_t *)(void *)(data + 14); 163 partial += *(uint16_t *)(void *)(data + 16); 164 partial += *(uint16_t *)(void *)(data + 18); 165 partial += *(uint16_t *)(void *)(data + 20); 166 partial += *(uint16_t *)(void *)(data + 22); 167 partial += *(uint16_t *)(void *)(data + 24); 168 partial += *(uint16_t *)(void *)(data + 26); 169 partial += *(uint16_t *)(void *)(data + 28); 170 partial += *(uint16_t *)(void *)(data + 30); 171 data += 32; 172 mlen -= 32; 173 if (PREDICT_FALSE(partial & 0xc0000000)) { 174 if (needs_swap) 175 partial = (partial << 8) + 176 (partial >> 24); 177 sum += (partial >> 16); 178 sum += (partial & 0xffff); 179 partial = 0; 180 } 181 } 182 if (mlen & 16) { 183 partial += *(uint16_t *)(void *)data; 184 partial += *(uint16_t *)(void *)(data + 2); 185 partial += *(uint16_t *)(void *)(data + 4); 186 partial += *(uint16_t *)(void *)(data + 6); 187 partial += *(uint16_t *)(void *)(data + 8); 188 partial += *(uint16_t *)(void *)(data + 10); 189 partial += *(uint16_t *)(void *)(data + 12); 190 partial += *(uint16_t *)(void *)(data + 14); 191 data += 16; 192 mlen -= 16; 193 } 194 /* 195 * mlen is not updated below as the remaining tests 196 * are using bit masks, which are not affected. 197 */ 198 if (mlen & 8) { 199 partial += *(uint16_t *)(void *)data; 200 partial += *(uint16_t *)(void *)(data + 2); 201 partial += *(uint16_t *)(void *)(data + 4); 202 partial += *(uint16_t *)(void *)(data + 6); 203 data += 8; 204 } 205 if (mlen & 4) { 206 partial += *(uint16_t *)(void *)data; 207 partial += *(uint16_t *)(void *)(data + 2); 208 data += 4; 209 } 210 if (mlen & 2) { 211 partial += *(uint16_t *)(void *)data; 212 data += 2; 213 } 214 if (mlen & 1) { 215#if BYTE_ORDER == LITTLE_ENDIAN 216 partial += *data; 217#else 218 partial += *data << 8; 219#endif 220 started_on_odd = !started_on_odd; 221 } 222 223 if (needs_swap) 224 partial = (partial << 8) + (partial >> 24); 225 sum += (partial >> 16) + (partial & 0xffff); 226 /* 227 * Reduce sum to allow potential byte swap 228 * in the next iteration without carry. 229 */ 230 sum = (sum >> 16) + (sum & 0xffff); 231 } 232 final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); 233 final_acc = (final_acc >> 16) + (final_acc & 0xffff); 234 return (~final_acc & 0xffff); 235} 236 237#else 238/* 64-bit version */ 239int 240cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum) 241{ 242 int mlen; 243 uint64_t sum, partial; 244 unsigned int final_acc; 245 uint8_t *data; 246 boolean_t needs_swap, started_on_odd; 247 248 VERIFY(len >= 0); 249 VERIFY(off >= 0); 250 251 needs_swap = FALSE; 252 started_on_odd = FALSE; 253 sum = initial_sum; 254 255 for (;;) { 256 if (PREDICT_FALSE(m == NULL)) { 257 printf("%s: out of data\n", __func__); 258 return (-1); 259 } 260 mlen = m->m_len; 261 if (mlen > off) { 262 mlen -= off; 263 data = mtod(m, uint8_t *) + off; 264 goto post_initial_offset; 265 } 266 off -= mlen; 267 if (len == 0) 268 break; 269 m = m->m_next; 270 } 271 272 for (; len > 0; m = m->m_next) { 273 if (PREDICT_FALSE(m == NULL)) { 274 printf("%s: out of data\n", __func__); 275 return (-1); 276 } 277 mlen = m->m_len; 278 data = mtod(m, uint8_t *); 279post_initial_offset: 280 if (mlen == 0) 281 continue; 282 if (mlen > len) 283 mlen = len; 284 len -= mlen; 285 286 partial = 0; 287 if ((uintptr_t)data & 1) { 288 /* Align on word boundary */ 289 started_on_odd = !started_on_odd; 290#if BYTE_ORDER == LITTLE_ENDIAN 291 partial = *data << 8; 292#else 293 partial = *data; 294#endif 295 ++data; 296 --mlen; 297 } 298 needs_swap = started_on_odd; 299 if ((uintptr_t)data & 2) { 300 if (mlen < 2) 301 goto trailing_bytes; 302 partial += *(uint16_t *)(void *)data; 303 data += 2; 304 mlen -= 2; 305 } 306 while (mlen >= 64) { 307 __builtin_prefetch(data + 32); 308 __builtin_prefetch(data + 64); 309 partial += *(uint32_t *)(void *)data; 310 partial += *(uint32_t *)(void *)(data + 4); 311 partial += *(uint32_t *)(void *)(data + 8); 312 partial += *(uint32_t *)(void *)(data + 12); 313 partial += *(uint32_t *)(void *)(data + 16); 314 partial += *(uint32_t *)(void *)(data + 20); 315 partial += *(uint32_t *)(void *)(data + 24); 316 partial += *(uint32_t *)(void *)(data + 28); 317 partial += *(uint32_t *)(void *)(data + 32); 318 partial += *(uint32_t *)(void *)(data + 36); 319 partial += *(uint32_t *)(void *)(data + 40); 320 partial += *(uint32_t *)(void *)(data + 44); 321 partial += *(uint32_t *)(void *)(data + 48); 322 partial += *(uint32_t *)(void *)(data + 52); 323 partial += *(uint32_t *)(void *)(data + 56); 324 partial += *(uint32_t *)(void *)(data + 60); 325 data += 64; 326 mlen -= 64; 327 if (PREDICT_FALSE(partial & (3ULL << 62))) { 328 if (needs_swap) 329 partial = (partial << 8) + 330 (partial >> 56); 331 sum += (partial >> 32); 332 sum += (partial & 0xffffffff); 333 partial = 0; 334 } 335 } 336 /* 337 * mlen is not updated below as the remaining tests 338 * are using bit masks, which are not affected. 339 */ 340 if (mlen & 32) { 341 partial += *(uint32_t *)(void *)data; 342 partial += *(uint32_t *)(void *)(data + 4); 343 partial += *(uint32_t *)(void *)(data + 8); 344 partial += *(uint32_t *)(void *)(data + 12); 345 partial += *(uint32_t *)(void *)(data + 16); 346 partial += *(uint32_t *)(void *)(data + 20); 347 partial += *(uint32_t *)(void *)(data + 24); 348 partial += *(uint32_t *)(void *)(data + 28); 349 data += 32; 350 } 351 if (mlen & 16) { 352 partial += *(uint32_t *)(void *)data; 353 partial += *(uint32_t *)(void *)(data + 4); 354 partial += *(uint32_t *)(void *)(data + 8); 355 partial += *(uint32_t *)(void *)(data + 12); 356 data += 16; 357 } 358 if (mlen & 8) { 359 partial += *(uint32_t *)(void *)data; 360 partial += *(uint32_t *)(void *)(data + 4); 361 data += 8; 362 } 363 if (mlen & 4) { 364 partial += *(uint32_t *)(void *)data; 365 data += 4; 366 } 367 if (mlen & 2) { 368 partial += *(uint16_t *)(void *)data; 369 data += 2; 370 } 371trailing_bytes: 372 if (mlen & 1) { 373#if BYTE_ORDER == LITTLE_ENDIAN 374 partial += *data; 375#else 376 partial += *data << 8; 377#endif 378 started_on_odd = !started_on_odd; 379 } 380 381 if (needs_swap) 382 partial = (partial << 8) + (partial >> 56); 383 sum += (partial >> 32) + (partial & 0xffffffff); 384 /* 385 * Reduce sum to allow potential byte swap 386 * in the next iteration without carry. 387 */ 388 sum = (sum >> 32) + (sum & 0xffffffff); 389 } 390 final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + 391 ((sum >> 16) & 0xffff) + (sum & 0xffff); 392 final_acc = (final_acc >> 16) + (final_acc & 0xffff); 393 final_acc = (final_acc >> 16) + (final_acc & 0xffff); 394 return (~final_acc & 0xffff); 395} 396#endif /* ULONG_MAX != 0xffffffffUL */ 397