1/*-
2 * Copyright (c) 2000 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <machine/asm.h>
28__FBSDID("$FreeBSD$");
29
30/*
31 * Not the fastest bcopy in the world.
32 */
33ENTRY(bcopy, 3)
34
35	cmp.le	p6,p0=in2,r0			// bail if len <= 0
36(p6)	br.ret.spnt.few rp
37
38	sub	r14=in1,in0 ;;			// check for overlap
39	cmp.ltu	p6,p0=r14,in2			// dst-src < len
40(p6)	br.cond.spnt.few 5f
41
42	extr.u	r14=in0,0,3			// src & 7
43	extr.u	r15=in1,0,3 ;;			// dst & 7
44	cmp.eq	p6,p0=r14,r15			// different alignment?
45(p6)	br.cond.spnt.few 2f			// branch if same alignment
46
471:	ld1	r14=[in0],1 ;;			// copy bytewise
48	st1	[in1]=r14,1
49	add	in2=-1,in2 ;;			// len--
50	cmp.ne	p6,p0=r0,in2
51(p6)	br.cond.dptk.few 1b			// loop
52	br.ret.sptk.few rp			// done
53
542:	cmp.eq	p6,p0=r14,r0			// aligned?
55(p6)	br.cond.sptk.few 4f
56
573:	ld1	r14=[in0],1 ;;			// copy bytewise
58	st1	[in1]=r14,1
59	extr.u	r15=in0,0,3			// src & 7
60	add	in2=-1,in2 ;;			// len--
61	cmp.eq	p6,p0=r0,in2			// done?
62	cmp.eq	p7,p0=r0,r15 ;;			// aligned now?
63(p6)	br.ret.spnt.few rp			// return if done
64(p7)	br.cond.spnt.few 4f			// go to main copy
65	br.cond.sptk.few 3b			// more bytes to copy
66
67	// At this point, in2 is non-zero
68
694:	mov	r14=8 ;;
70	cmp.ltu	p6,p0=in2,r14 ;;		// len < 8?
71(p6)	br.cond.spnt.few 1b			// byte copy the end
72	ld8	r15=[in0],8 ;;			// copy word
73	st8	[in1]=r15,8
74	add	in2=-8,in2 ;;			// len -= 8
75	cmp.ne	p6,p0=r0,in2			// done?
76(p6)	br.cond.spnt.few 4b			// again
77
78	br.ret.sptk.few rp			// return
79
80	// Don't bother optimising overlap case
81
825:	add	in0=in0,in2
83	add	in1=in1,in2 ;;
84	add	in0=-1,in0
85	add	in1=-1,in1 ;;
86
876:	ld1	r14=[in0],-1 ;;
88	st1	[in1]=r14,-1
89	add	in2=-1,in2 ;;
90	cmp.ne	p6,p0=r0,in2
91(p6)	br.cond.spnt.few 6b
92
93	br.ret.sptk.few rp
94
95END(bcopy)
96