1/*
2 * arch/ubicom32/lib/checksum.c
3 *   Optimized checksum utilities for IP.
4 *
5 * (C) Copyright 2009, Ubicom, Inc.
6 *
7 * This file is part of the Ubicom32 Linux Kernel Port.
8 *
9 * The Ubicom32 Linux Kernel Port is free software: you can redistribute
10 * it and/or modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation, either version 2 of the
12 * License, or (at your option) any later version.
13 *
14 * The Ubicom32 Linux Kernel Port is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
17 * the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with the Ubicom32 Linux Kernel Port.  If not,
21 * see <http://www.gnu.org/licenses/>.
22 *
23 * Ubicom32 implementation derived from (with many thanks):
24 *   arch/m68knommu
25 *   arch/blackfin
26 *   arch/parisc
27 */
28/*
29 * INET		An implementation of the TCP/IP protocol suite for the LINUX
30 *		operating system.  INET is implemented using the  BSD Socket
31 *		interface as the means of communication with the user level.
32 *
33 *		IP/TCP/UDP checksumming routines
34 *
35 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
36 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
37 *		Tom May, <ftom@netcom.com>
38 *		Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
39 *		Lots of code moved from tcp.c and ip.c; see those files
40 *		for more names.
41 *
42 * 03/02/96	Jes Sorensen, Andreas Schwab, Roman Hodek:
43 *		Fixed some nasty bugs, causing some horrible crashes.
44 *		A: At some points, the sum (%0) was used as
45 *		length-counter instead of the length counter
46 *		(%1). Thanks to Roman Hodek for pointing this out.
47 *		B: GCC seems to mess up if one uses too many
48 *		data-registers to hold input values and one tries to
49 *		specify d0 and d1 as scratch registers. Letting gcc choose these
50 *		registers itself solves the problem.
51 *
52 *		This program is free software; you can redistribute it and/or
53 *		modify it under the terms of the GNU General Public License
54 *		as published by the Free Software Foundation; either version
55 *		2 of the License, or (at your option) any later version.
56 */
57
58/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access kills, so most
59   of the assembly has to go. */
60
61#include <linux/module.h>
62#include <net/checksum.h>
63
64static unsigned long do_csum(const unsigned char * buff, int len)
65{
66	int count;
67	unsigned long result = 0;
68
69	/*
70	 * The following optimized assembly code cannot handle data length less than 7 bytes!
71	 */
72	if (likely(len >= 7)) {
73		len -= (4 - (int)buff) & 3;
74		count = len >> 2;
75		asm (
76		"	sub.4		d15, #0, %2		\n\t"	// set up for jump table
77		"	and.4		d15, #(32-1), d15	\n\t"	// d15 = (-m) & (32 - 1)
78
79		"	bfextu		d14, %0, #2		\n\t"	// test 2 LSB of buff
80		"	jmpne.w.f	100f			\n\t"
81		"	add.4		%1, #0, %1		\n\t"	// clear C
82		"	moveai		a3, #%%hi(1f)		\n\t"	// table jump
83		"	lea.1		a3, %%lo(1f)(a3)	\n\t"
84		"	lea.4		a3, (a3,d15)		\n\t"
85		"	calli		a3, 0(a3)		\n\t"
86
87		"100:	sub.4		%0, %0, d14		\n\t"
88		"	sub.4		d14, #4, d14		\n\t"
89		"	lsl.4		d14, d14, #3		\n\t"
90		"	add.4		%1, #0, %1		\n\t"	// clear C
91		"	moveai		a3, #%%hi(1f)		\n\t"	// table jump
92		"	lea.1		a3, %%lo(1f)(a3)	\n\t"
93		"	lea.4		a3, (a3,d15)		\n\t"
94		"	bfextu		%1, (%0)4++, d14	\n\t"	// read first partial word
95		"	calli		a3, 0(a3)		\n\t"
96#if 1
97		"200:	lsl.4		%3, %3, #3		\n\t"
98		"	bfrvrs		d15, (%0), #0		\n\t"	// read last word (partial)
99		"	bfextu		d15, d15, %3		\n\t"
100		"	bfrvrs		d15, d15, #0		\n\t"
101		"	add.4		%1, d15, %1		\n\t"
102		"	addc		%1, #0, %1		\n\t"	// sample C again
103		"	jmpt.w.t	2f			\n\t"
104#else
105		"200:	move.1		d15, 0(%0)		\n\t"
106		"	lsl.4		d15, d15, #8		\n\t"
107		"	add.4		%1, d15, %1		\n\t"
108		"	addc		%1, #0, %1		\n\t"	// sample C again
109		"	add.4		%3, #-1, %3		\n\t"
110		"	jmpeq.w.t	2f			\n\t"
111
112		"	move.1		d15, 1(%0)		\n\t"
113		"	add.4		%1, d15, %1		\n\t"
114		"	addc		%1, #0, %1		\n\t"	// sample C again
115		"	add.4		%3, #-1, %3		\n\t"
116		"	jmpeq.w.t	2f			\n\t"
117
118		"	move.1		d15, 2(%0)		\n\t"
119		"	lsl.4		d15, d15, #8		\n\t"
120		"	add.4		%1, d15, %1		\n\t"
121		"	addc		%1, #0, %1		\n\t"	// sample C again
122		"	jmpt.w.t	2f			\n\t"
123#endif
124#if defined(IP7000) || defined(IP7000_REV2)
125		"300:	swapb.2		%1, %1			\n\t"
126#else
127		"300:	shmrg.2		%1, %1, %1		\n\t"
128		"	lsr.4		%1, %1, #8		\n\t"
129		"	bfextu		%1, %1, #16		\n\t"
130#endif
131		"	jmpt.w.t	3f			\n\t"
132
133		"1:	add.4		%1, (%0)4++, %1		\n\t"	// first add without C
134		"	.rept		31			\n\t"
135		"	addc		%1, (%0)4++, %1		\n\t"
136		"	.endr					\n\t"
137		"	addc		%1, #0, %1		\n\t"	// sample C again
138		"	add.4		%2, #-32, %2		\n\t"
139		"	jmpgt.w.t	1b			\n\t"
140
141		"	and.4		%3, #3, %3		\n\t"	// check n
142		"	jmpne.w.f	200b			\n\t"
143
144		"2:	.rept		2			\n\t"
145		"	lsr.4		d15, %1, #16		\n\t"
146		"	bfextu		%1, %1, #16		\n\t"
147		"	add.4		%1, d15, %1		\n\t"
148		"	.endr					\n\t"
149		"	btst		d14, #3			\n\t"	// start from odd address (<< 3)?
150		"	jmpne.w.f	300b			\n\t"
151		"3:						\n\t"
152
153			: "+a"(buff), "+d"(result), "+d"(count), "+d"(len)
154			:
155			: "d15", "d14", "a3", "cc"
156		);
157
158		return result;
159	}
160
161	/*
162	 * handle a few bytes and fold result into 16-bit
163	 */
164	while (len-- > 0) {
165		result += (*buff++ << 8);
166		if (len) {
167			result += *buff++;
168			len--;
169		}
170	}
171	asm (
172	"	.rept		2			\n\t"
173	"	lsr.4		d15, %0, #16		\n\t"
174	"	bfextu		%0, %0, #16		\n\t"
175	"	add.4		%0, d15, %0		\n\t"
176	"	.endr					\n\t"
177		: "+d" (result)
178		:
179		: "d15", "cc"
180	);
181
182	return result;
183}
184
185/*
186 *	This is a version of ip_compute_csum() optimized for IP headers,
187 *	which always checksum on 4 octet boundaries.
188 */
189__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
190{
191	return (__force __sum16)~do_csum(iph,ihl*4);
192}
193
194/*
195 * computes the checksum of a memory block at buff, length len,
196 * and adds in "sum" (32-bit)
197 *
198 * returns a 32-bit number suitable for feeding into itself
199 * or csum_tcpudp_magic
200 *
201 * this function must be called with even lengths, except
202 * for the last fragment, which may be odd
203 *
204 * it's best to have buff aligned on a 32-bit boundary
205 */
206__wsum csum_partial(const void *buff, int len, __wsum sum)
207{
208	unsigned int result = do_csum(buff, len);
209
210	/* add in old sum, and carry.. */
211	result += (__force u32)sum;
212	if ((__force u32)sum > result)
213		result += 1;
214	return (__force __wsum)result;
215}
216
217EXPORT_SYMBOL(csum_partial);
218
219/*
220 * this routine is used for miscellaneous IP-like checksums, mainly
221 * in icmp.c
222 */
223__sum16 ip_compute_csum(const void *buff, int len)
224{
225	return (__force __sum16)~do_csum(buff,len);
226}
227
228/*
229 * copy from fs while checksumming, otherwise like csum_partial
230 */
231
232__wsum
233csum_partial_copy_from_user(const void __user *src, void *dst,
234			    int len, __wsum sum, int *csum_err)
235{
236	if (csum_err) *csum_err = 0;
237	memcpy(dst, (__force const void *)src, len);
238	return csum_partial(dst, len, sum);
239}
240
241/*
242 * copy from ds while checksumming, otherwise like csum_partial
243 */
244
245__wsum
246csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
247{
248	memcpy(dst, src, len);
249	return csum_partial(dst, len, sum);
250}
251