1/* $NetBSD$ */ 2 3/* 4 * Copyright (c) 1995 Matthew R. Green. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29/* 30 * Copyright (c) 1992, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * All advertising materials mentioning features or use of this software 34 * must display the following acknowledgement: 35 * This product includes software developed by the University of 36 * California, and it's contributors. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)in_cksum.c 8.1 (Berkeley) 6/11/93 63 */ 64 65/* 66 * Copyright (c) 1995 Zubin Dittia. 67 * Copyright (c) 1994, 1998 Charles M. Hannum. 68 * 69 * All advertising materials mentioning features or use of this software 70 * must display the following acknowledgement: 71 * This product includes software developed by the University of 72 * California, and it's contributors. 73 * 74 * Redistribution and use in source and binary forms, with or without 75 * modification, are permitted provided that the following conditions 76 * are met: 77 * 1. Redistributions of source code must retain the above copyright 78 * notice, this list of conditions and the following disclaimer. 79 * 2. Redistributions in binary form must reproduce the above copyright 80 * notice, this list of conditions and the following disclaimer in the 81 * documentation and/or other materials provided with the distribution. 82 * 3. All advertising materials mentioning features or use of this software 83 * must display the following acknowledgement: 84 * This product includes software developed by the University of 85 * California, Berkeley and its contributors. 86 * 4. Neither the name of the University nor the names of its contributors 87 * may be used to endorse or promote products derived from this software 88 * without specific prior written permission. 89 * 90 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 91 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 92 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 93 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 94 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 95 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 96 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 97 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 98 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 99 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 100 * SUCH DAMAGE. 101 * 102 * @(#)in_cksum.c 8.1 (Berkeley) 6/11/93 103 */ 104 105#include <sys/cdefs.h> 106__KERNEL_RCSID(0, "$NetBSD: in_cksum.c,v 1.20 2008/05/30 02:29:37 mrg Exp $"); 107 108#include <sys/param.h> 109#include <sys/systm.h> 110#include <sys/mbuf.h> 111#include <netinet/in.h> 112 113/* 114 * Checksum routine for Internet Protocol family headers. 115 * 116 * This routine is very heavily used in the network 117 * code and should be modified for each CPU to be as fast as possible. 118 * 119 * SPARC version. 120 */ 121 122/* 123 * The checksum computation code here is significantly faster than its 124 * vanilla C counterpart (by significantly, I mean 2-3 times faster if 125 * the data is in cache, and 1.5-2 times faster if the data is not in 126 * cache). 127 * We optimize on three fronts: 128 * 1. By using the add-with-carry (addxcc) instruction, we can use 129 * 32-bit operations instead of 16-bit operations. 130 * 2. By unrolling the main loop to reduce branch overheads. 131 * 3. By doing a sequence of load,load,add,add,load,load,add,add, 132 * we can avoid the extra stall cycle which is incurred if the 133 * instruction immediately following a load tries to use the 134 * target register of the load. 135 * Another possible optimization is to replace a pair of 32-bit loads 136 * with a single 64-bit load (ldd) instruction, but I found that although 137 * this improves performance somewhat on Sun4c machines, it actually 138 * reduces performance considerably on Sun4m machines (I don't know why). 139 * So I chose to leave it out. 140 * 141 * Zubin Dittia (zubin@dworkin.wustl.edu) 142 */ 143 144#define Asm asm volatile 145#define ADD64 Asm(" ld [%4+ 0],%1; ld [%4+ 4],%2; \ 146 addcc %0,%1,%0; addxcc %0,%2,%0; \ 147 ld [%4+ 8],%1; ld [%4+12],%2; \ 148 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 149 ld [%4+16],%1; ld [%4+20],%2; \ 150 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 151 ld [%4+24],%1; ld [%4+28],%2; \ 152 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 153 ld [%4+32],%1; ld [%4+36],%2; \ 154 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 155 ld [%4+40],%1; ld [%4+44],%2; \ 156 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 157 ld [%4+48],%1; ld [%4+52],%2; \ 158 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 159 ld [%4+56],%1; ld [%4+60],%2; \ 160 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 161 addxcc %0,0,%0" \ 162 : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\ 163 : "0" (sum), "r" (w)) 164#define ADD32 Asm(" ld [%4+ 0],%1; ld [%4+ 4],%2; \ 165 addcc %0,%1,%0; addxcc %0,%2,%0; \ 166 ld [%4+ 8],%1; ld [%4+12],%2; \ 167 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 168 ld [%4+16],%1; ld [%4+20],%2; \ 169 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 170 ld [%4+24],%1; ld [%4+28],%2; \ 171 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 172 addxcc %0,0,%0" \ 173 : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\ 174 : "0" (sum), "r" (w)) 175#define ADD16 Asm(" ld [%4+ 0],%1; ld [%4+ 4],%2; \ 176 addcc %0,%1,%0; addxcc %0,%2,%0; \ 177 ld [%4+ 8],%1; ld [%4+12],%2; \ 178 addxcc %0,%1,%0; addxcc %0,%2,%0; \ 179 addxcc %0,0,%0" \ 180 : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\ 181 : "0" (sum), "r" (w)) 182#define ADD8 Asm(" ld [%4+ 0],%1; ld [%4+ 4],%2; \ 183 addcc %0,%1,%0; addxcc %0,%2,%0; \ 184 addxcc %0,0,%0" \ 185 : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\ 186 : "0" (sum), "r" (w)) 187#define ADD4 Asm(" ld [%3+ 0],%1; \ 188 addcc %0,%1,%0; \ 189 addxcc %0,0,%0" \ 190 : "=r" (sum), "=&r" (tmp1) \ 191 : "0" (sum), "r" (w)) 192 193#define REDUCE {sum = (sum & 0xffff) + (sum >> 16);} 194#define ADDCARRY {if (sum > 0xffff) sum -= 0xffff;} 195#define ROL {sum = sum << 8;} /* depends on recent REDUCE */ 196#define ADDBYTE {ROL; sum += *w; byte_swapped ^= 1;} 197#define ADDSHORT {sum += *(uint16_t *)w;} 198#define ADVANCE(n) {w += n; mlen -= n;} 199 200int 201cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t sum) 202{ 203 uint8_t *w; 204 int mlen = 0; 205 int byte_swapped = 0; 206 207 /* 208 * Declare two temporary registers for use by the asm code. We 209 * allow the compiler to pick which specific machine registers to 210 * use, instead of hard-coding this in the asm code above. 211 */ 212 uint32_t tmp1, tmp2; 213 214 for (; m && len; m = m->m_next) { 215 if (m->m_len == 0) 216 continue; 217 w = mtod(m, uint8_t *) + off; 218 mlen = m->m_len - off; 219 off = 0; 220 if (len < mlen) 221 mlen = len; 222 len -= mlen; 223 224 /* 225 * Ensure that we're aligned on a word boundary here so 226 * that we can do 32 bit operations below. 227 */ 228 if (((uintptr_t)w & 3) != 0) { 229 REDUCE; 230 if (((uintptr_t)w & 1) != 0 && mlen >= 1) { 231 ADDBYTE; 232 ADVANCE(1); 233 } 234 if (((uintptr_t)w & 2) != 0 && mlen >= 2) { 235 ADDSHORT; 236 ADVANCE(2); 237 } 238 } 239 240 /* 241 * Do as many 32 bit operations as possible using the 242 * 64/32/16/8/4 macro's above, using as many as possible of 243 * these. 244 */ 245 while (mlen >= 64) { 246 ADD64; 247 ADVANCE(64); 248 } 249 if (mlen >= 32) { 250 ADD32; 251 ADVANCE(32); 252 } 253 if (mlen >= 16) { 254 ADD16; 255 ADVANCE(16); 256 } 257 if (mlen >= 8) { 258 ADD8; 259 ADVANCE(8); 260 } 261 if (mlen >= 4) { 262 ADD4; 263 ADVANCE(4) 264 } 265 if (mlen == 0) 266 continue; 267 268 REDUCE; 269 if (mlen >= 2) { 270 ADDSHORT; 271 ADVANCE(2); 272 } 273 if (mlen == 1) { 274 ADDBYTE; 275 } 276 } 277 if (byte_swapped) { 278 REDUCE; 279 ROL; 280 } 281 REDUCE; 282 ADDCARRY; 283 284 return 0xffff ^ sum; 285} 286