xlr_csum_nocopy.S revision 201917
1#include <machine/asm.h>
2
3
4/*
5 * a0: source address
6 * a1: length of the area to checksum
7 * a2: partial checksum
8 * a3: dst
9 */
10
11#define src a0
12#define dst a3
13#define sum v0
14
15	.text
16	.set	noreorder
17
18	.macro CSUM_BIGCHUNK_AND_COPY offset
19	pref                    0,  (\offset+0x0)(a0)
20	ld			t0, (\offset+0x00)(a0)
21	ld			t1, (\offset+0x08)(a0)
22	.word			0x70481038  /*daddwc v0, v0, t0 */
23	.word			0x70491038 /*daddwc v0, v0, t1 */
24	ld			t0, (\offset + 0x10)(a0)
25	ld			t1, (\offset + 0x18)(a0)
26	.word			0x70481038 /* daddwc v0, v0, t0 */
27	.word			0x70491038 /*daddwc v0, v0, t1 */
28	.endm
29
30small_csumcpy: 						/* unknown src alignment and < 8 bytes to go  */
31	move 		a1, t2
32
33	andi		t0, a1, 4
34	beqz		t0, 1f
35	andi		t0, a1, 2
36
37	ulw			t1, (src) 			/* Still a full word to go  */
38	daddiu		src, 4
39	.word			0x70491038 /*daddwc v0, v0, t1 */
40
411:	move		t1, zero
42	beqz		t0, 1f
43	andi		t0, a1, 1
44
45	ulhu		t1, (src) 			/* Still a halfword to go  */
46	daddiu		src, 2
47
481:	beqz		t0, 1f
49	sll			t1, t1, 16
50
51	lbu			t2, (src)
52	nop
53
54#ifdef __MIPSEB__
55	sll		t2, t2, 8
56#endif
57	or		t1, t2
58
591: 	.word			0x70491038 /*daddwc v0, v0, t1 */
60
61	.word			0x70461038 /*daddwc v0, v0, a2 */
62	.word			0x70401038 /*daddwc v0, v0, $0 */
63
64	/* Ideally at this point of time the status flag must be cleared */
65
66	dsll32      v1, sum, 0
67	.word			0x70431038 /*daddwc v0, v0, v1 */
68	dsrl32		sum, sum, 0
69	.word			0x70401038 /*daddwc v0, v0, zero */
70
71	/* fold the checksum */
72	sll             v1, sum, 16
73	addu            sum, v1
74	sltu            v1, sum, v1
75	srl             sum, sum, 16
76	addu            sum, v1
771:
78	.set		reorder
79	jr			ra
80	.set		noreorder
81
82/* ------------------------------------------------------------------ */
83
84	.align	5
85LEAF(xlr_csum_partial_nocopy)
86	move		sum, zero
87	move		t7, zero
88
89	sltiu		t8, a1, 0x8
90	bnez		t8, small_csumcpy		/* < 8 bytes to copy */
91	move		t2, a1
92
93	beqz		a1, out
94	andi		t7, src, 0x1			/* odd buffer? */
95
96hword_align:
97	beqz		t7, word_align
98	andi		t8, src, 0x2
99
100	lbu			t0, (src)
101	dsubu		a1, a1, 0x1
102	.word			0x70481038 /*daddwc v0, v0, t0 */
103	daddu		src, src, 0x1
104	andi		t8, src, 0x2
105
106word_align:
107	beqz		t8, dword_align
108	sltiu		t8, a1, 56
109
110	lhu			t0, (src)
111	dsubu		a1, a1, 0x2
112	.word			0x70481038 /*daddwc v0, v0, t0 */
113	sltiu		t8, a1, 56
114	daddu		src, src, 0x2
115
116dword_align:
117	bnez		t8, do_end_words
118	move		t8, a1
119
120	andi		t8, src, 0x4
121	beqz		t8, qword_align
122	andi		t8, src, 0x8
123
124	lw			t0, 0x00(src)
125	dsubu		a1, a1, 0x4
126	.word			0x70481038 /*daddwc v0, v0, t0 */
127	daddu		src, src, 0x4
128	andi		t8, src, 0x8
129
130qword_align:
131	beqz		t8, oword_align
132	andi		t8, src, 0x10
133
134	ld			t0, 0x00(src)
135	dsubu		a1, a1, 0x8
136	.word			0x70481038 /*daddwc v0, v0, t0 */
137	daddu		src, src, 0x8
138	andi		t8, src, 0x10
139
140oword_align:
141	beqz		t8, begin_movement
142	dsrl		t8, a1, 0x7
143
144	ld			t3, 0x08(src)
145	ld			t0, 0x00(src)
146	.word			0x704b1038 /*daddwc v0, v0, t3 */
147	.word			0x70481038 /*daddwc v0, v0, t0 */
148	dsubu		a1, a1, 0x10
149	daddu		src, src, 0x10
150	dsrl		t8, a1, 0x7
151
152begin_movement:
153	beqz		t8, 1f
154	andi		t2, a1, 0x40
155
156move_128bytes:
157	pref		0, 0x20(a0)
158	pref		0, 0x40(a0)
159	pref		0, 0x60(a0)
160	CSUM_BIGCHUNK_AND_COPY(0x00)
161	CSUM_BIGCHUNK_AND_COPY(0x20)
162	CSUM_BIGCHUNK_AND_COPY(0x40)
163	CSUM_BIGCHUNK_AND_COPY(0x60)
164	dsubu		t8, t8, 0x01
165	bnez		t8, move_128bytes	/* flag */
166	daddu		src, src, 0x80
167
1681:
169	beqz		t2, 1f
170	andi		t2, a1, 0x20
171
172move_64bytes:
173	pref		0, 0x20(a0)
174	pref		0, 0x40(a0)
175	CSUM_BIGCHUNK_AND_COPY(0x00)
176	CSUM_BIGCHUNK_AND_COPY(0x20)
177	daddu	src, src, 0x40
178
1791:
180	beqz		t2, do_end_words
181	andi		t8, a1, 0x1c
182
183move_32bytes:
184	pref		0, 0x20(a0)
185	CSUM_BIGCHUNK_AND_COPY(0x00)
186	andi		t8, a1, 0x1c
187	daddu		src, src, 0x20
188
189do_end_words:
190	beqz		t8, maybe_end_cruft
191	dsrl		t8, t8, 0x2
192
193end_words:
194	lw			t0, (src)
195	dsubu		t8, t8, 0x1
196	.word			0x70481038 /*daddwc v0, v0, t0 */
197	bnez		t8, end_words
198	daddu		src, src, 0x4
199
200maybe_end_cruft:
201	andi		t2, a1, 0x3
202
203small_memcpy:
204 j small_csumcpy; move a1, t2
205	beqz		t2, out
206	move		a1, t2
207
208end_bytes:
209	lb			t0, (src)
210	dsubu		a1, a1, 0x1
211	bnez		a2, end_bytes
212	daddu		src, src, 0x1
213
214out:
215	jr			ra
216	move		v0, sum
217	END(xlr_csum_partial_nocopy)
218