1/* 2 * arch/ubicom32/lib/checksum.c 3 * Optimized checksum utilities for IP. 4 * 5 * (C) Copyright 2009, Ubicom, Inc. 6 * 7 * This file is part of the Ubicom32 Linux Kernel Port. 8 * 9 * The Ubicom32 Linux Kernel Port is free software: you can redistribute 10 * it and/or modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation, either version 2 of the 12 * License, or (at your option) any later version. 13 * 14 * The Ubicom32 Linux Kernel Port is distributed in the hope that it 15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied 16 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 17 * the GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with the Ubicom32 Linux Kernel Port. If not, 21 * see <http://www.gnu.org/licenses/>. 22 * 23 * Ubicom32 implementation derived from (with many thanks): 24 * arch/m68knommu 25 * arch/blackfin 26 * arch/parisc 27 */ 28/* 29 * INET An implementation of the TCP/IP protocol suite for the LINUX 30 * operating system. INET is implemented using the BSD Socket 31 * interface as the means of communication with the user level. 32 * 33 * IP/TCP/UDP checksumming routines 34 * 35 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 36 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 37 * Tom May, <ftom@netcom.com> 38 * Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de> 39 * Lots of code moved from tcp.c and ip.c; see those files 40 * for more names. 41 * 42 * 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek: 43 * Fixed some nasty bugs, causing some horrible crashes. 44 * A: At some points, the sum (%0) was used as 45 * length-counter instead of the length counter 46 * (%1). Thanks to Roman Hodek for pointing this out. 47 * B: GCC seems to mess up if one uses too many 48 * data-registers to hold input values and one tries to 49 * specify d0 and d1 as scratch registers. Letting gcc choose these 50 * registers itself solves the problem. 51 * 52 * This program is free software; you can redistribute it and/or 53 * modify it under the terms of the GNU General Public License 54 * as published by the Free Software Foundation; either version 55 * 2 of the License, or (at your option) any later version. 56 */ 57 58/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access kills, so most 59 of the assembly has to go. */ 60 61#include <linux/module.h> 62#include <net/checksum.h> 63 64static unsigned long do_csum(const unsigned char * buff, int len) 65{ 66 int count; 67 unsigned long result = 0; 68 69 /* 70 * The following optimized assembly code cannot handle data length less than 7 bytes! 71 */ 72 if (likely(len >= 7)) { 73 len -= (4 - (int)buff) & 3; 74 count = len >> 2; 75 asm ( 76 " sub.4 d15, #0, %2 \n\t" // set up for jump table 77 " and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1) 78 79 " bfextu d14, %0, #2 \n\t" // test 2 LSB of buff 80 " jmpne.w.f 100f \n\t" 81 " add.4 %1, #0, %1 \n\t" // clear C 82 " moveai a3, #%%hi(1f) \n\t" // table jump 83 " lea.1 a3, %%lo(1f)(a3) \n\t" 84 " lea.4 a3, (a3,d15) \n\t" 85 " calli a3, 0(a3) \n\t" 86 87 "100: sub.4 %0, %0, d14 \n\t" 88 " sub.4 d14, #4, d14 \n\t" 89 " lsl.4 d14, d14, #3 \n\t" 90 " add.4 %1, #0, %1 \n\t" // clear C 91 " moveai a3, #%%hi(1f) \n\t" // table jump 92 " lea.1 a3, %%lo(1f)(a3) \n\t" 93 " lea.4 a3, (a3,d15) \n\t" 94 " bfextu %1, (%0)4++, d14 \n\t" // read first partial word 95 " calli a3, 0(a3) \n\t" 96#if 1 97 "200: lsl.4 %3, %3, #3 \n\t" 98 " bfrvrs d15, (%0), #0 \n\t" // read last word (partial) 99 " bfextu d15, d15, %3 \n\t" 100 " bfrvrs d15, d15, #0 \n\t" 101 " add.4 %1, d15, %1 \n\t" 102 " addc %1, #0, %1 \n\t" // sample C again 103 " jmpt.w.t 2f \n\t" 104#else 105 "200: move.1 d15, 0(%0) \n\t" 106 " lsl.4 d15, d15, #8 \n\t" 107 " add.4 %1, d15, %1 \n\t" 108 " addc %1, #0, %1 \n\t" // sample C again 109 " add.4 %3, #-1, %3 \n\t" 110 " jmpeq.w.t 2f \n\t" 111 112 " move.1 d15, 1(%0) \n\t" 113 " add.4 %1, d15, %1 \n\t" 114 " addc %1, #0, %1 \n\t" // sample C again 115 " add.4 %3, #-1, %3 \n\t" 116 " jmpeq.w.t 2f \n\t" 117 118 " move.1 d15, 2(%0) \n\t" 119 " lsl.4 d15, d15, #8 \n\t" 120 " add.4 %1, d15, %1 \n\t" 121 " addc %1, #0, %1 \n\t" // sample C again 122 " jmpt.w.t 2f \n\t" 123#endif 124#if defined(IP7000) || defined(IP7000_REV2) 125 "300: swapb.2 %1, %1 \n\t" 126#else 127 "300: shmrg.2 %1, %1, %1 \n\t" 128 " lsr.4 %1, %1, #8 \n\t" 129 " bfextu %1, %1, #16 \n\t" 130#endif 131 " jmpt.w.t 3f \n\t" 132 133 "1: add.4 %1, (%0)4++, %1 \n\t" // first add without C 134 " .rept 31 \n\t" 135 " addc %1, (%0)4++, %1 \n\t" 136 " .endr \n\t" 137 " addc %1, #0, %1 \n\t" // sample C again 138 " add.4 %2, #-32, %2 \n\t" 139 " jmpgt.w.t 1b \n\t" 140 141 " and.4 %3, #3, %3 \n\t" // check n 142 " jmpne.w.f 200b \n\t" 143 144 "2: .rept 2 \n\t" 145 " lsr.4 d15, %1, #16 \n\t" 146 " bfextu %1, %1, #16 \n\t" 147 " add.4 %1, d15, %1 \n\t" 148 " .endr \n\t" 149 " btst d14, #3 \n\t" // start from odd address (<< 3)? 150 " jmpne.w.f 300b \n\t" 151 "3: \n\t" 152 153 : "+a"(buff), "+d"(result), "+d"(count), "+d"(len) 154 : 155 : "d15", "d14", "a3", "cc" 156 ); 157 158 return result; 159 } 160 161 /* 162 * handle a few bytes and fold result into 16-bit 163 */ 164 while (len-- > 0) { 165 result += (*buff++ << 8); 166 if (len) { 167 result += *buff++; 168 len--; 169 } 170 } 171 asm ( 172 " .rept 2 \n\t" 173 " lsr.4 d15, %0, #16 \n\t" 174 " bfextu %0, %0, #16 \n\t" 175 " add.4 %0, d15, %0 \n\t" 176 " .endr \n\t" 177 : "+d" (result) 178 : 179 : "d15", "cc" 180 ); 181 182 return result; 183} 184 185/* 186 * This is a version of ip_compute_csum() optimized for IP headers, 187 * which always checksum on 4 octet boundaries. 188 */ 189__sum16 ip_fast_csum(const void *iph, unsigned int ihl) 190{ 191 return (__force __sum16)~do_csum(iph,ihl*4); 192} 193 194/* 195 * computes the checksum of a memory block at buff, length len, 196 * and adds in "sum" (32-bit) 197 * 198 * returns a 32-bit number suitable for feeding into itself 199 * or csum_tcpudp_magic 200 * 201 * this function must be called with even lengths, except 202 * for the last fragment, which may be odd 203 * 204 * it's best to have buff aligned on a 32-bit boundary 205 */ 206__wsum csum_partial(const void *buff, int len, __wsum sum) 207{ 208 unsigned int result = do_csum(buff, len); 209 210 /* add in old sum, and carry.. */ 211 result += (__force u32)sum; 212 if ((__force u32)sum > result) 213 result += 1; 214 return (__force __wsum)result; 215} 216 217EXPORT_SYMBOL(csum_partial); 218 219/* 220 * this routine is used for miscellaneous IP-like checksums, mainly 221 * in icmp.c 222 */ 223__sum16 ip_compute_csum(const void *buff, int len) 224{ 225 return (__force __sum16)~do_csum(buff,len); 226} 227 228/* 229 * copy from fs while checksumming, otherwise like csum_partial 230 */ 231 232__wsum 233csum_partial_copy_from_user(const void __user *src, void *dst, 234 int len, __wsum sum, int *csum_err) 235{ 236 if (csum_err) *csum_err = 0; 237 memcpy(dst, (__force const void *)src, len); 238 return csum_partial(dst, len, sum); 239} 240 241/* 242 * copy from ds while checksumming, otherwise like csum_partial 243 */ 244 245__wsum 246csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) 247{ 248 memcpy(dst, src, len); 249 return csum_partial(dst, len, sum); 250} 251