1/* $Id: checksumcopy.S,v 1.1.1.1 2007/08/03 18:51:41 Exp $
2 * A fast checksum+copy routine using movem
3 * Copyright (c) 1998, 2001 Axis Communications AB
4 *
5 * Authors:	Bjorn Wesen
6 *
7 * csum_partial_copy_nocheck(const char *src, char *dst,
8 *		             int len, unsigned int sum)
9 */
10
11	.globl	csum_partial_copy_nocheck
12csum_partial_copy_nocheck:
13
14	;; r10 - src
15	;; r11 - dst
16	;; r12 - length
17	;; r13 - checksum
18
19	;; check for breakeven length between movem and normal word looping versions
20	;; we also do _NOT_ want to compute a checksum over more than the
21	;; actual length when length < 40
22
23	cmpu.w	80, $r12
24	blo	_word_loop
25	nop
26
27	;; need to save the registers we use below in the movem loop
28	;; this overhead is why we have a check above for breakeven length
29	;; only r0 - r8 have to be saved, the other ones are clobber-able
30	;; according to the ABI
31
32	subq	9*4, $sp
33	movem	$r8, [$sp]
34
35	;; do a movem copy and checksum
36
37	subq	10*4, $r12	; update length for the first loop
38
39_mloop:	movem	[$r10+],$r9	; read 10 longwords
401:	;; A failing userspace access will have this as PC.
41	movem	$r9,[$r11+]	; write 10 longwords
42
43	;; perform dword checksumming on the 10 longwords
44
45	add.d	$r0,$r13
46	ax
47	add.d	$r1,$r13
48	ax
49	add.d	$r2,$r13
50	ax
51	add.d	$r3,$r13
52	ax
53	add.d	$r4,$r13
54	ax
55	add.d	$r5,$r13
56	ax
57	add.d	$r6,$r13
58	ax
59	add.d	$r7,$r13
60	ax
61	add.d	$r8,$r13
62	ax
63	add.d	$r9,$r13
64
65	;; fold the carry into the checksum, to avoid having to loop the carry
66	;; back into the top
67
68	ax
69	addq	0,$r13
70	ax			; do it again, since we might have generated a carry
71	addq	0,$r13
72
73	subq	10*4,$r12
74	bge	_mloop
75	nop
76
77	addq	10*4,$r12	; compensate for last loop underflowing length
78
79	movem	[$sp+],$r8	; restore regs
80
81_word_loop:
82	;; only fold if there is anything to fold.
83
84	cmpq	0,$r13
85	beq	_no_fold
86
87	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
88	;; r9 can be used as temporary.
89
90	move.d	$r13,$r9
91	lsrq	16,$r9		; r0 = checksum >> 16
92	and.d	0xffff,$r13	; checksum = checksum & 0xffff
93	add.d	$r9,$r13	; checksum += r0
94	move.d	$r13,$r9	; do the same again, maybe we got a carry last add
95	lsrq	16,$r9
96	and.d	0xffff,$r13
97	add.d	$r9,$r13
98
99_no_fold:
100	cmpq	2,$r12
101	blt	_no_words
102	nop
103
104	;; copy and checksum the rest of the words
105
106	subq	2,$r12
107
108_wloop:	move.w	[$r10+],$r9
1092:	;; A failing userspace access will have this as PC.
110	addu.w	$r9,$r13
111	subq	2,$r12
112	bge	_wloop
113	move.w	$r9,[$r11+]
114
115	addq	2,$r12
116
117_no_words:
118	;; see if we have one odd byte more
119	cmpq	1,$r12
120	beq	_do_byte
121	nop
122	ret
123	move.d	$r13, $r10
124
125_do_byte:
126	;; copy and checksum the last byte
127	move.b	[$r10],$r9
1283:	;; A failing userspace access will have this as PC.
129	addu.b	$r9,$r13
130	move.b	$r9,[$r11]
131	ret
132	move.d	$r13, $r10
133