1/*	$OpenBSD: memmove.S,v 1.6 2015/08/31 02:53:56 guenther Exp $	*/
2/*	$NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $	*/
3
4/*
5 * Copyright (c) 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
9 *	   added by Chris Demetriou.
10 *
11 * Permission to use, copy, modify and distribute this software and
12 * its documentation is hereby granted, provided that both the copyright
13 * notice and this permission notice appear in all copies of the
14 * software, derivative works or modified versions, and any portions
15 * thereof, and that both notices appear in supporting documentation.
16 *
17 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
18 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
19 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
20 *
21 * Carnegie Mellon requests users of this software to return to
22 *
23 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
24 *  School of Computer Science
25 *  Carnegie Mellon University
26 *  Pittsburgh PA 15213-3890
27 *
28 * any improvements or extensions that they make and grant Carnegie the
29 * rights to redistribute these changes.
30 */
31
32#include "SYS.h"
33
34#define	SRCREG		a1
35#define	DSTREG		a0
36
37#define	SIZEREG		a2
38
39/*
40 * Copy bytes.
41 *
42 * char *memmove(void *to, const void *from, size_t len);
43 *
44 * No matter how invoked, the source and destination registers
45 * for calculation.  There's no point in copying them to "working"
46 * registers, since the code uses their values "in place," and
47 * copying them would be slower.
48 */
49
50LEAF(memmove,3)
51	/* set up return value, while we still can */
52	mov	DSTREG,v0
53
54	/* Check for zero length */
55	beq	SIZEREG,bcopy_done
56
57	/* Check for overlap */
58	subq	DSTREG,SRCREG,t5
59	cmpult	t5,SIZEREG,t5
60	bne	t5,bcopy_overlap
61
62	/* a3 = end address */
63	addq	SRCREG,SIZEREG,a3
64
65	/* Get the first word */
66	ldq_u	t2,0(SRCREG)
67
68	/* Do they have the same alignment? */
69	xor	SRCREG,DSTREG,t0
70	and	t0,7,t0
71	and	DSTREG,7,t1
72	bne	t0,bcopy_different_alignment
73
74	/* src & dst have same alignment */
75	beq	t1,bcopy_all_aligned
76
77	ldq_u	t3,0(DSTREG)
78	addq	SIZEREG,t1,SIZEREG
79	mskqh	t2,SRCREG,t2
80	mskql	t3,SRCREG,t3
81	or	t2,t3,t2
82
83	/* Dst is 8-byte aligned */
84
85bcopy_all_aligned:
86	/* If less than 8 bytes,skip loop */
87	subq	SIZEREG,1,t0
88	and	SIZEREG,7,SIZEREG
89	bic	t0,7,t0
90	beq	t0,bcopy_samealign_lp_end
91
92bcopy_samealign_lp:
93	stq_u	t2,0(DSTREG)
94	addq	DSTREG,8,DSTREG
95	ldq_u	t2,8(SRCREG)
96	subq	t0,8,t0
97	addq	SRCREG,8,SRCREG
98	bne	t0,bcopy_samealign_lp
99
100bcopy_samealign_lp_end:
101	/* If we're done, exit */
102	bne	SIZEREG,bcopy_small_left
103	stq_u	t2,0(DSTREG)
104	RET
105
106bcopy_small_left:
107	mskql	t2,SIZEREG,t4
108	ldq_u	t3,0(DSTREG)
109	mskqh	t3,SIZEREG,t3
110	or	t4,t3,t4
111	stq_u	t4,0(DSTREG)
112	RET
113
114bcopy_different_alignment:
115	/*
116	 * this is the fun part
117	 */
118	addq	SRCREG,SIZEREG,a3
119	cmpule	SIZEREG,8,t0
120	bne	t0,bcopy_da_finish
121
122	beq	t1,bcopy_da_noentry
123
124	/* Do the initial partial word */
125	subq	zero,DSTREG,t0
126	and	t0,7,t0
127	ldq_u	t3,7(SRCREG)
128	extql	t2,SRCREG,t2
129	extqh	t3,SRCREG,t3
130	or	t2,t3,t5
131	insql	t5,DSTREG,t5
132	ldq_u	t6,0(DSTREG)
133	mskql	t6,DSTREG,t6
134	or	t5,t6,t5
135	stq_u	t5,0(DSTREG)
136	addq	SRCREG,t0,SRCREG
137	addq	DSTREG,t0,DSTREG
138	subq	SIZEREG,t0,SIZEREG
139	ldq_u	t2,0(SRCREG)
140
141bcopy_da_noentry:
142	subq	SIZEREG,1,t0
143	bic	t0,7,t0
144	and	SIZEREG,7,SIZEREG
145	beq	t0,bcopy_da_finish2
146
147bcopy_da_lp:
148	ldq_u	t3,7(SRCREG)
149	addq	SRCREG,8,SRCREG
150	extql	t2,SRCREG,t4
151	extqh	t3,SRCREG,t5
152	subq	t0,8,t0
153	or	t4,t5,t5
154	stq	t5,0(DSTREG)
155	addq	DSTREG,8,DSTREG
156	beq	t0,bcopy_da_finish1
157	ldq_u	t2,7(SRCREG)
158	addq	SRCREG,8,SRCREG
159	extql	t3,SRCREG,t4
160	extqh	t2,SRCREG,t5
161	subq	t0,8,t0
162	or	t4,t5,t5
163	stq	t5,0(DSTREG)
164	addq	DSTREG,8,DSTREG
165	bne	t0,bcopy_da_lp
166
167bcopy_da_finish2:
168	/* Do the last new word */
169	mov	t2,t3
170
171bcopy_da_finish1:
172	/* Do the last partial word */
173	ldq_u	t2,-1(a3)
174	extql	t3,SRCREG,t3
175	extqh	t2,SRCREG,t2
176	or	t2,t3,t2
177	br	zero,bcopy_samealign_lp_end
178
179bcopy_da_finish:
180	/* Do the last word in the next source word */
181	ldq_u	t3,-1(a3)
182	extql	t2,SRCREG,t2
183	extqh	t3,SRCREG,t3
184	or	t2,t3,t2
185	insqh	t2,DSTREG,t3
186	insql	t2,DSTREG,t2
187	lda	t4,-1(zero)
188	mskql	t4,SIZEREG,t5
189	cmovne	t5,t5,t4
190	insqh	t4,DSTREG,t5
191	insql	t4,DSTREG,t4
192	addq	DSTREG,SIZEREG,a4
193	ldq_u	t6,0(DSTREG)
194	ldq_u	t7,-1(a4)
195	bic	t6,t4,t6
196	bic	t7,t5,t7
197	and	t2,t4,t2
198	and	t3,t5,t3
199	or	t2,t6,t2
200	or	t3,t7,t3
201	stq_u	t3,-1(a4)
202	stq_u	t2,0(DSTREG)
203	RET
204
205bcopy_overlap:
206	/*
207	 * Basically equivalent to previous case, only backwards.
208	 * Not quite as highly optimized
209	 */
210	addq	SRCREG,SIZEREG,a3
211	addq	DSTREG,SIZEREG,a4
212
213	/* less than 8 bytes - don't worry about overlap */
214	cmpule	SIZEREG,8,t0
215	bne	t0,bcopy_ov_short
216
217	/* Possibly do a partial first word */
218	and	a4,7,t4
219	beq	t4,bcopy_ov_nostart2
220	subq	a3,t4,a3
221	subq	a4,t4,a4
222	ldq_u	t1,0(a3)
223	subq	SIZEREG,t4,SIZEREG
224	ldq_u	t2,7(a3)
225	ldq	t3,0(a4)
226	extql	t1,a3,t1
227	extqh	t2,a3,t2
228	or	t1,t2,t1
229	mskqh	t3,t4,t3
230	mskql	t1,t4,t1
231	or	t1,t3,t1
232	stq	t1,0(a4)
233
234bcopy_ov_nostart2:
235	bic	SIZEREG,7,t4
236	and	SIZEREG,7,SIZEREG
237	beq	t4,bcopy_ov_lp_end
238
239bcopy_ov_lp:
240	/* This could be more pipelined, but it doesn't seem worth it */
241	ldq_u	t0,-8(a3)
242	subq	a4,8,a4
243	ldq_u	t1,-1(a3)
244	subq	a3,8,a3
245	extql	t0,a3,t0
246	extqh	t1,a3,t1
247	subq	t4,8,t4
248	or	t0,t1,t0
249	stq	t0,0(a4)
250	bne	t4,bcopy_ov_lp
251
252bcopy_ov_lp_end:
253	beq	SIZEREG,bcopy_done
254
255	ldq_u	t0,0(SRCREG)
256	ldq_u	t1,7(SRCREG)
257	ldq_u	t2,0(DSTREG)
258	extql	t0,SRCREG,t0
259	extqh	t1,SRCREG,t1
260	or	t0,t1,t0
261	insql	t0,DSTREG,t0
262	mskql	t2,DSTREG,t2
263	or	t2,t0,t2
264	stq_u	t2,0(DSTREG)
265
266bcopy_done:
267	RET
268
269bcopy_ov_short:
270	ldq_u	t2,0(SRCREG)
271	br	zero,bcopy_da_finish
272
273	END_STRONG(memmove)
274