1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License.  See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1998 Ralf Baechle
7 */
8#include <asm/asm.h>
9#include <asm/regdef.h>
10
11#define ADDC(sum,reg)			\
12	addu	sum, reg;		\
13	sltu	v1, sum, reg;		\
14	addu	sum, v1
15
16#define CSUM_BIGCHUNK(src, offset, sum, t0, t1, t2, t3) \
17	lw	t0, (offset + 0x00)(src); \
18	lw	t1, (offset + 0x04)(src); \
19	lw	t2, (offset + 0x08)(src); \
20	lw	t3, (offset + 0x0c)(src); \
21	ADDC(sum, t0);                    \
22	ADDC(sum, t1);                    \
23	ADDC(sum, t2);                    \
24	ADDC(sum, t3);                    \
25	lw	t0, (offset + 0x10)(src); \
26	lw	t1, (offset + 0x14)(src); \
27	lw	t2, (offset + 0x18)(src); \
28	lw	t3, (offset + 0x1c)(src); \
29	ADDC(sum, t0);                    \
30	ADDC(sum, t1);                    \
31	ADDC(sum, t2);                    \
32	ADDC(sum, t3);                    \
33
34/*
35 * a0: source address
36 * a1: length of the area to checksum
37 * a2: partial checksum
38 */
39
40#define src a0
41#define dest a1
42#define sum v0
43
44	.text
45	.set	noreorder
46
47/* unknown src alignment and < 8 bytes to go  */
48small_csumcpy:
49	move	a1, t2
50
51	andi	t0, a1, 4
52	beqz	t0, 1f
53	 andi	t0, a1, 2
54
55	/* Still a full word to go  */
56	ulw	t1, (src)
57	addiu	src, 4
58	ADDC(sum, t1)
59
601:	move	t1, zero
61	beqz	t0, 1f
62	 andi	t0, a1, 1
63
64	/* Still a halfword to go  */
65	ulhu	t1, (src)
66	addiu	src, 2
67
681:	beqz	t0, 1f
69	 sll	t1, t1, 16
70
71	lbu	t2, (src)
72	 nop
73
74#ifdef __MIPSEB__
75	sll	t2, t2, 8
76#endif
77	or	t1, t2
78
791:	ADDC(sum, t1)
80
81	/* fold checksum */
82	sll	v1, sum, 16
83	addu	sum, v1
84	sltu	v1, sum, v1
85	srl	sum, sum, 16
86	addu	sum, v1
87
88	/* odd buffer alignment? */
89	beqz	t7, 1f
90	 nop
91	sll	v1, sum, 8
92	srl	sum, sum, 8
93	or	sum, v1
94	andi	sum, 0xffff
951:
96	.set	reorder
97	/* Add the passed partial csum.  */
98	ADDC(sum, a2)
99	jr	ra
100	.set	noreorder
101
102/* ------------------------------------------------------------------------- */
103
104	.align	5
105LEAF(csum_partial)
106	move sum, zero
107	move t7, zero
108
109	sltiu	t8, a1, 0x8
110	bnez	t8, small_csumcpy		/* < 8 bytes to copy */
111	 move	t2, a1
112
113	beqz	a1, out
114	 andi	t7, src, 0x1			/* odd buffer? */
115
116hword_align:
117	beqz	t7, word_align
118	 andi	t8, src, 0x2
119
120	lbu	t0, (src)
121	subu	a1, a1, 0x1
122#ifdef __MIPSEL__
123	sll	t0, t0, 8
124#endif
125	ADDC(sum, t0)
126	addu	src, src, 0x1
127	andi	t8, src, 0x2
128
129word_align:
130	beqz	t8, dword_align
131	 sltiu	t8, a1, 56
132
133	lhu	t0, (src)
134	subu	a1, a1, 0x2
135	ADDC(sum, t0)
136	sltiu	t8, a1, 56
137	addu	src, src, 0x2
138
139dword_align:
140	bnez	t8, do_end_words
141	 move	t8, a1
142
143	andi	t8, src, 0x4
144	beqz	t8, qword_align
145	 andi	t8, src, 0x8
146
147	lw	t0, 0x00(src)
148	subu	a1, a1, 0x4
149	ADDC(sum, t0)
150	addu	src, src, 0x4
151	andi	t8, src, 0x8
152
153qword_align:
154	beqz	t8, oword_align
155	 andi	t8, src, 0x10
156
157	lw	t0, 0x00(src)
158	lw	t1, 0x04(src)
159	subu	a1, a1, 0x8
160	ADDC(sum, t0)
161	ADDC(sum, t1)
162	addu	src, src, 0x8
163	andi	t8, src, 0x10
164
165oword_align:
166	beqz	t8, begin_movement
167	 srl	t8, a1, 0x7
168
169	lw	t3, 0x08(src)
170	lw	t4, 0x0c(src)
171	lw	t0, 0x00(src)
172	lw	t1, 0x04(src)
173	ADDC(sum, t3)
174	ADDC(sum, t4)
175	ADDC(sum, t0)
176	ADDC(sum, t1)
177	subu	a1, a1, 0x10
178	addu	src, src, 0x10
179	srl	t8, a1, 0x7
180
181begin_movement:
182	beqz	t8, 1f
183	 andi	t2, a1, 0x40
184
185move_128bytes:
186	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
187	CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
188	CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
189	CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
190	subu	t8, t8, 0x01
191	bnez	t8, move_128bytes
192	 addu	src, src, 0x80
193
1941:
195	beqz	t2, 1f
196	 andi	t2, a1, 0x20
197
198move_64bytes:
199	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
200	CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
201	addu	src, src, 0x40
202
2031:
204	beqz	t2, do_end_words
205	 andi	t8, a1, 0x1c
206
207move_32bytes:
208	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209	andi	t8, a1, 0x1c
210	addu	src, src, 0x20
211
212do_end_words:
213	beqz	t8, maybe_end_cruft
214	 srl	t8, t8, 0x2
215
216end_words:
217	lw	t0, (src)
218	subu	t8, t8, 0x1
219	ADDC(sum, t0)
220	bnez	t8, end_words
221	 addu	src, src, 0x4
222
223maybe_end_cruft:
224	andi	t2, a1, 0x3
225
226small_memcpy:
227 j small_csumcpy; move a1, t2
228	beqz	t2, out
229	 move	a1, t2
230
231end_bytes:
232	lb	t0, (src)
233	subu	a1, a1, 0x1
234	bnez	a2, end_bytes
235	 addu	src, src, 0x1
236
237out:
238	jr	ra
239	 move	v0, sum
240	END(csum_partial)
241