1/*	$NetBSD: cpu_in_cksum.S,v 1.1 2018/04/25 11:06:49 ragge Exp $	*/
2
3/*-
4 * Copyright (c) 2017 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Anders Magnusson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31/*
32 * Copyright (c) 1988, 1992, 1993
33 *	The Regents of the University of California.  All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 *    notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 *    notice, this list of conditions and the following disclaimer in the
42 *    documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 *    may be used to endorse or promote products derived from this software
45 *    without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
60 */
61
62/*
63 * Assembly version of cpu_in_cksum() for vax, following the structure
64 * in the C version of the file but using vax instructions for speed.
65 * Increases network traffic speed with almost 50% (NFS tests).
66 */
67
68#include <machine/asm.h>
69__KERNEL_RCSID(0, "$NetBSD: cpu_in_cksum.S,v 1.1 2018/04/25 11:06:49 ragge Exp $");
70
71#include "assym.h"
72
73#define off	%r0
74#define mlen	%r1
75#define m	%r2
76#define data	%r3
77#define sum	%r4
78#define len	%r5
79#define byte_swapped	%r6
80#define tmp	%r7
81#
82# int cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum)
83#
84ENTRY(cpu_in_cksum, R7|R6)
85
86	subl2 $4,%sp
87
88	movl 4(%ap),m
89	movl 8(%ap),len
90	movl 12(%ap),off
91	movl 16(%ap),sum
92
93	clrl byte_swapped
94
95.Lfirstloop:				# for (;;) {
96	tstl m				# if (__predict_false(m == NULL)) {
97	jeql .Lout_of_data
98
99	movl M_LEN(m),mlen		# mlen = m->m_len;
100	cmpl off,mlen			# if (mlen > off) {
101	jgeq 1f
102	subl2 off,mlen			#	mlen -= off;
103	addl3 M_DATA(m),off,data	#	data = mtod(m, uint8_t *) + off;
104	jbr .Lpost_initial_offset	#	goto post_initial_offset;
1051:					# }
106	subl2 mlen,off			# off -= mlen;
107	tstl len			# if (len == 0)
108	jeql .Lsecondloop		#	break;
109	movl M_NEXT(m),m		# m = m->m_next;
110	jbr .Lfirstloop			# }
111
112.Lthirdstmt:
113	movl M_NEXT(m),m		# m = m->m_next) {
114.Lsecondloop:				# for (;
115	tstl len			# len > 0;
116	jeql .Lendsecond
117	tstl m				# if (__predict_false(m == NULL)) {
118	jeql .Lout_of_data
119
120	movl M_LEN(m),mlen		# mlen = m->m_len;
121	movl M_DATA(m),data		# data = mtod(m, uint8_t *);
122.Lpost_initial_offset:
123	tstl mlen			# if (mlen == 0)
124	jeql .Lthirdstmt		#	continue;
125	cmpl len,mlen			# if (mlen > len)
126	jgeq 1f
127	movl len,mlen			#	mlen = len;
1281:	subl2 mlen,len			# len -= mlen
129	cmpl mlen,$16			# if (mlen < 16)
130	jlss .Lshort_mbuf		#	goto short_mbuf;
131#
132#	Align on longword boundary
133#
134	blbc data,1f			# if ((uintptr_t)data & 1) {
135	movzbl (data)+,tmp		#	tmp = *data++;
136	addl2 tmp,sum			#	sum += tmp;
137	adwc $0,sum
138	rotl $8,sum,sum			#	sum = (sum << 8 | sum >> 24);
139	xorl2 $1,byte_swapped		#	byte_swapped ^= 1;
140	decl mlen			#	mlen--;
1411:					# }
142	bbc $1,data,1f			# if ((uintptr_t)data & 2) {
143	movzwl (data)+,tmp		#	tmp = *data++; (word *)
144	addl2 tmp,sum			#	sum += tmp;
145	adwc $0,sum
146	subl2 $2,mlen			#	mlen -= 2;
1471:					# }
148#
149# Add 16 word in a chunk
150#
1512:	subl2 $32,mlen			# while ((mlen -= 32) >= 0) {
152	jlss 1f
153	addl2 (data)+,sum		#	sum += *(uint32_t *)data;
154	adwc (data)+,sum		#	sum += *(uint32_t *)data;
155	adwc (data)+,sum		#	sum += *(uint32_t *)data;
156	adwc (data)+,sum		#	sum += *(uint32_t *)data;
157	adwc (data)+,sum		#	sum += *(uint32_t *)data;
158	adwc (data)+,sum		#	sum += *(uint32_t *)data;
159	adwc (data)+,sum		#	sum += *(uint32_t *)data;
160	adwc (data)+,sum		#	sum += *(uint32_t *)data;
161	adwc $0,sum
162	jbr 2b				# }
163
1641:	addl2 $32,mlen			# mlen += 32;
165	bbc $4,mlen,1f			# if (mlen >= 16) {
166	addl2 (data)+,sum		#	sum += *(uint32_t *)data;
167	adwc (data)+,sum		#	sum += *(uint32_t *)data;
168	adwc (data)+,sum		#	sum += *(uint32_t *)data;
169	adwc (data)+,sum		#	sum += *(uint32_t *)data;
170	adwc $0,sum
171	subl2 $16,mlen			#	mlen -= 16;
1721:					# }
173
174.Lshort_mbuf:				# short_mbuf:
175	bbc $3,mlen,1f			# if (mlen >= 8) {
176	addl2 (data)+,sum		#	sum += *(uint32_t *)data;
177	adwc (data)+,sum		#	sum += *(uint32_t *)data;
178	adwc $0,sum
179	subl2 $8,mlen			#	mlen -= 8;
1801:					# }
181	bbc $2,mlen,1f			# if (mlen >= 4) {
182	addl2 (data)+,sum		#	sum += *(uint32_t *)data;
183	adwc $0,sum
184	subl2 $4,mlen			#	mlen -= 4;
1851:					# }
186
187	bbc $1,mlen,1f			# if (mlen >= 2) {
188	movzwl (data)+,tmp		#	tmp = *data++; (word *)
189	addl2 tmp,sum			#	sum += tmp;
190	adwc $0,sum
1911:					# }
192	blbc mlen,1f			# if (mlen & 1) {
193	movzbl (data)+,tmp		#	tmp = *data++;
194	addl2 tmp,sum			#	sum += tmp;
195	adwc $0,sum
196	rotl $8,sum,sum			#	sum = (sum << 8 | sum >> 24);
197	xorl2 $1,byte_swapped		#	byte_swapped ^= 1;
1981:					# }
199	jbr .Lthirdstmt
200
201.Lendsecond:
202	tstl len			# if (len != 0)
203	jneq .Lout_of_data		#	goto out_of_data;
204	tstl byte_swapped		# if (byte_swapped) {
205	jeql 1f
206	rotl $8,sum,sum			# sum = (sum << 8 | sum >> 24);
2071:	rotl $16,sum,tmp		# tmp = sum >> 16;
208	addw2 tmp,sum			# sum(16) += tmp;
209	bicl2 $0xffff0000,sum		# sum &= ~0xffff0000;
210	adwc $0,sum
211	xorl3 $0xffff,sum,%r0		# return (sum ^ 0xffff);
212	ret
213.Lout_of_data:
214	pushab .Lin_cksum
215	calls $1,printf
216	mnegl $1,%r0
217	ret
218
219	.section	.rodata
220.Lin_cksum:
221	.asciz "in_cksum: out of data\n"
222
223