Deleted Added
full compact
memcpy_arm.S (256281) memcpy_arm.S (271337)
1/* $NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
1/* $NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33__FBSDID("$FreeBSD: stable/10/lib/libc/arm/string/memcpy_arm.S 204607 2010-03-02 22:16:40Z joel $");
33__FBSDID("$FreeBSD: stable/10/lib/libc/arm/string/memcpy_arm.S 271337 2014-09-09 22:24:01Z ian $");
34/*
35 * This is one fun bit of code ...
36 * Some easy listening music is suggested while trying to understand this
37 * code e.g. Iron Maiden
38 *
39 * For anyone attempting to understand it :
40 *
41 * The core code is implemented here with simple stubs for memcpy().
42 *
43 * All local labels are prefixed with Lmemcpy_
44 * Following the prefix a label starting f is used in the forward copy code
45 * while a label using b is used in the backwards copy code
46 * The source and destination addresses determine whether a forward or
47 * backward copy is performed.
48 * Separate bits of code are used to deal with the following situations
49 * for both the forward and backwards copy.
50 * unaligned source address
51 * unaligned destination address
52 * Separate copy routines are used to produce an optimised result for each
53 * of these cases.
54 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
55 * a time where possible.
56 *
57 * Note: r12 (aka ip) can be trashed during the function along with
58 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
59 * Additional registers are preserved prior to use i.e. r4, r5 & lr
60 *
61 * Apologies for the state of the comments ;-)
62 */
63/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
64ENTRY(memcpy)
65 /* save leaf functions having to store this away */
66 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
67
68 subs r2, r2, #4
69 blt .Lmemcpy_l4 /* less than 4 bytes */
70 ands r12, r0, #3
71 bne .Lmemcpy_destul /* oh unaligned destination addr */
72 ands r12, r1, #3
73 bne .Lmemcpy_srcul /* oh unaligned source addr */
74
75.Lmemcpy_t8:
76 /* We have aligned source and destination */
77 subs r2, r2, #8
78 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
79 subs r2, r2, #0x14
80 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
81 stmdb sp!, {r4} /* borrow r4 */
82
83 /* blat 32 bytes at a time */
84 /* XXX for really big copies perhaps we should use more registers */
85.Lmemcpy_loop32:
86 ldmia r1!, {r3, r4, r12, lr}
87 stmia r0!, {r3, r4, r12, lr}
88 ldmia r1!, {r3, r4, r12, lr}
89 stmia r0!, {r3, r4, r12, lr}
90 subs r2, r2, #0x20
91 bge .Lmemcpy_loop32
92
93 cmn r2, #0x10
94 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
95 stmgeia r0!, {r3, r4, r12, lr}
96 subge r2, r2, #0x10
97 ldmia sp!, {r4} /* return r4 */
98
99.Lmemcpy_l32:
100 adds r2, r2, #0x14
101
102 /* blat 12 bytes at a time */
103.Lmemcpy_loop12:
104 ldmgeia r1!, {r3, r12, lr}
105 stmgeia r0!, {r3, r12, lr}
106 subges r2, r2, #0x0c
107 bge .Lmemcpy_loop12
108
109.Lmemcpy_l12:
110 adds r2, r2, #8
111 blt .Lmemcpy_l4
112
113 subs r2, r2, #4
114 ldrlt r3, [r1], #4
115 strlt r3, [r0], #4
116 ldmgeia r1!, {r3, r12}
117 stmgeia r0!, {r3, r12}
118 subge r2, r2, #4
119
120.Lmemcpy_l4:
121 /* less than 4 bytes to go */
122 adds r2, r2, #4
123#ifdef __APCS_26_
124 ldmeqia sp!, {r0, pc}^ /* done */
125#else
126 ldmeqia sp!, {r0, pc} /* done */
127#endif
128 /* copy the crud byte at a time */
129 cmp r2, #2
130 ldrb r3, [r1], #1
131 strb r3, [r0], #1
132 ldrgeb r3, [r1], #1
133 strgeb r3, [r0], #1
134 ldrgtb r3, [r1], #1
135 strgtb r3, [r0], #1
136 ldmia sp!, {r0, pc}
137
138 /* erg - unaligned destination */
139.Lmemcpy_destul:
140 rsb r12, r12, #4
141 cmp r12, #2
142
143 /* align destination with byte copies */
144 ldrb r3, [r1], #1
145 strb r3, [r0], #1
146 ldrgeb r3, [r1], #1
147 strgeb r3, [r0], #1
148 ldrgtb r3, [r1], #1
149 strgtb r3, [r0], #1
150 subs r2, r2, r12
151 blt .Lmemcpy_l4 /* less the 4 bytes */
152
153 ands r12, r1, #3
154 beq .Lmemcpy_t8 /* we have an aligned source */
155
156 /* erg - unaligned source */
157 /* This is where it gets nasty ... */
158.Lmemcpy_srcul:
159 bic r1, r1, #3
160 ldr lr, [r1], #4
161 cmp r12, #2
162 bgt .Lmemcpy_srcul3
163 beq .Lmemcpy_srcul2
164 cmp r2, #0x0c
165 blt .Lmemcpy_srcul1loop4
166 sub r2, r2, #0x0c
167 stmdb sp!, {r4, r5}
168
169.Lmemcpy_srcul1loop16:
170#ifdef __ARMEB__
171 mov r3, lr, lsl #8
172#else
173 mov r3, lr, lsr #8
174#endif
175 ldmia r1!, {r4, r5, r12, lr}
176#ifdef __ARMEB__
177 orr r3, r3, r4, lsr #24
178 mov r4, r4, lsl #8
179 orr r4, r4, r5, lsr #24
180 mov r5, r5, lsl #8
181 orr r5, r5, r12, lsr #24
182 mov r12, r12, lsl #8
183 orr r12, r12, lr, lsr #24
184#else
185 orr r3, r3, r4, lsl #24
186 mov r4, r4, lsr #8
187 orr r4, r4, r5, lsl #24
188 mov r5, r5, lsr #8
189 orr r5, r5, r12, lsl #24
190 mov r12, r12, lsr #8
191 orr r12, r12, lr, lsl #24
192#endif
193 stmia r0!, {r3-r5, r12}
194 subs r2, r2, #0x10
195 bge .Lmemcpy_srcul1loop16
196 ldmia sp!, {r4, r5}
197 adds r2, r2, #0x0c
198 blt .Lmemcpy_srcul1l4
199
200.Lmemcpy_srcul1loop4:
201#ifdef __ARMEB__
202 mov r12, lr, lsl #8
203#else
204 mov r12, lr, lsr #8
205#endif
206 ldr lr, [r1], #4
207#ifdef __ARMEB__
208 orr r12, r12, lr, lsr #24
209#else
210 orr r12, r12, lr, lsl #24
211#endif
212 str r12, [r0], #4
213 subs r2, r2, #4
214 bge .Lmemcpy_srcul1loop4
215
216.Lmemcpy_srcul1l4:
217 sub r1, r1, #3
218 b .Lmemcpy_l4
219
220.Lmemcpy_srcul2:
221 cmp r2, #0x0c
222 blt .Lmemcpy_srcul2loop4
223 sub r2, r2, #0x0c
224 stmdb sp!, {r4, r5}
225
226.Lmemcpy_srcul2loop16:
227#ifdef __ARMEB__
228 mov r3, lr, lsl #16
229#else
230 mov r3, lr, lsr #16
231#endif
232 ldmia r1!, {r4, r5, r12, lr}
233#ifdef __ARMEB__
234 orr r3, r3, r4, lsr #16
235 mov r4, r4, lsl #16
236 orr r4, r4, r5, lsr #16
237 mov r5, r5, lsl #16
238 orr r5, r5, r12, lsr #16
239 mov r12, r12, lsl #16
240 orr r12, r12, lr, lsr #16
241#else
242 orr r3, r3, r4, lsl #16
243 mov r4, r4, lsr #16
244 orr r4, r4, r5, lsl #16
245 mov r5, r5, lsr #16
246 orr r5, r5, r12, lsl #16
247 mov r12, r12, lsr #16
248 orr r12, r12, lr, lsl #16
249#endif
250 stmia r0!, {r3-r5, r12}
251 subs r2, r2, #0x10
252 bge .Lmemcpy_srcul2loop16
253 ldmia sp!, {r4, r5}
254 adds r2, r2, #0x0c
255 blt .Lmemcpy_srcul2l4
256
257.Lmemcpy_srcul2loop4:
258#ifdef __ARMEB__
259 mov r12, lr, lsl #16
260#else
261 mov r12, lr, lsr #16
262#endif
263 ldr lr, [r1], #4
264#ifdef __ARMEB__
265 orr r12, r12, lr, lsr #16
266#else
267 orr r12, r12, lr, lsl #16
268#endif
269 str r12, [r0], #4
270 subs r2, r2, #4
271 bge .Lmemcpy_srcul2loop4
272
273.Lmemcpy_srcul2l4:
274 sub r1, r1, #2
275 b .Lmemcpy_l4
276
277.Lmemcpy_srcul3:
278 cmp r2, #0x0c
279 blt .Lmemcpy_srcul3loop4
280 sub r2, r2, #0x0c
281 stmdb sp!, {r4, r5}
282
283.Lmemcpy_srcul3loop16:
284#ifdef __ARMEB__
285 mov r3, lr, lsl #24
286#else
287 mov r3, lr, lsr #24
288#endif
289 ldmia r1!, {r4, r5, r12, lr}
290#ifdef __ARMEB__
291 orr r3, r3, r4, lsr #8
292 mov r4, r4, lsl #24
293 orr r4, r4, r5, lsr #8
294 mov r5, r5, lsl #24
295 orr r5, r5, r12, lsr #8
296 mov r12, r12, lsl #24
297 orr r12, r12, lr, lsr #8
298#else
299 orr r3, r3, r4, lsl #8
300 mov r4, r4, lsr #24
301 orr r4, r4, r5, lsl #8
302 mov r5, r5, lsr #24
303 orr r5, r5, r12, lsl #8
304 mov r12, r12, lsr #24
305 orr r12, r12, lr, lsl #8
306#endif
307 stmia r0!, {r3-r5, r12}
308 subs r2, r2, #0x10
309 bge .Lmemcpy_srcul3loop16
310 ldmia sp!, {r4, r5}
311 adds r2, r2, #0x0c
312 blt .Lmemcpy_srcul3l4
313
314.Lmemcpy_srcul3loop4:
315#ifdef __ARMEB__
316 mov r12, lr, lsl #24
317#else
318 mov r12, lr, lsr #24
319#endif
320 ldr lr, [r1], #4
321#ifdef __ARMEB__
322 orr r12, r12, lr, lsr #8
323#else
324 orr r12, r12, lr, lsl #8
325#endif
326 str r12, [r0], #4
327 subs r2, r2, #4
328 bge .Lmemcpy_srcul3loop4
329
330.Lmemcpy_srcul3l4:
331 sub r1, r1, #1
332 b .Lmemcpy_l4
34/*
35 * This is one fun bit of code ...
36 * Some easy listening music is suggested while trying to understand this
37 * code e.g. Iron Maiden
38 *
39 * For anyone attempting to understand it :
40 *
41 * The core code is implemented here with simple stubs for memcpy().
42 *
43 * All local labels are prefixed with Lmemcpy_
44 * Following the prefix a label starting f is used in the forward copy code
45 * while a label using b is used in the backwards copy code
46 * The source and destination addresses determine whether a forward or
47 * backward copy is performed.
48 * Separate bits of code are used to deal with the following situations
49 * for both the forward and backwards copy.
50 * unaligned source address
51 * unaligned destination address
52 * Separate copy routines are used to produce an optimised result for each
53 * of these cases.
54 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
55 * a time where possible.
56 *
57 * Note: r12 (aka ip) can be trashed during the function along with
58 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
59 * Additional registers are preserved prior to use i.e. r4, r5 & lr
60 *
61 * Apologies for the state of the comments ;-)
62 */
63/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
64ENTRY(memcpy)
65 /* save leaf functions having to store this away */
66 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
67
68 subs r2, r2, #4
69 blt .Lmemcpy_l4 /* less than 4 bytes */
70 ands r12, r0, #3
71 bne .Lmemcpy_destul /* oh unaligned destination addr */
72 ands r12, r1, #3
73 bne .Lmemcpy_srcul /* oh unaligned source addr */
74
75.Lmemcpy_t8:
76 /* We have aligned source and destination */
77 subs r2, r2, #8
78 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
79 subs r2, r2, #0x14
80 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
81 stmdb sp!, {r4} /* borrow r4 */
82
83 /* blat 32 bytes at a time */
84 /* XXX for really big copies perhaps we should use more registers */
85.Lmemcpy_loop32:
86 ldmia r1!, {r3, r4, r12, lr}
87 stmia r0!, {r3, r4, r12, lr}
88 ldmia r1!, {r3, r4, r12, lr}
89 stmia r0!, {r3, r4, r12, lr}
90 subs r2, r2, #0x20
91 bge .Lmemcpy_loop32
92
93 cmn r2, #0x10
94 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
95 stmgeia r0!, {r3, r4, r12, lr}
96 subge r2, r2, #0x10
97 ldmia sp!, {r4} /* return r4 */
98
99.Lmemcpy_l32:
100 adds r2, r2, #0x14
101
102 /* blat 12 bytes at a time */
103.Lmemcpy_loop12:
104 ldmgeia r1!, {r3, r12, lr}
105 stmgeia r0!, {r3, r12, lr}
106 subges r2, r2, #0x0c
107 bge .Lmemcpy_loop12
108
109.Lmemcpy_l12:
110 adds r2, r2, #8
111 blt .Lmemcpy_l4
112
113 subs r2, r2, #4
114 ldrlt r3, [r1], #4
115 strlt r3, [r0], #4
116 ldmgeia r1!, {r3, r12}
117 stmgeia r0!, {r3, r12}
118 subge r2, r2, #4
119
120.Lmemcpy_l4:
121 /* less than 4 bytes to go */
122 adds r2, r2, #4
123#ifdef __APCS_26_
124 ldmeqia sp!, {r0, pc}^ /* done */
125#else
126 ldmeqia sp!, {r0, pc} /* done */
127#endif
128 /* copy the crud byte at a time */
129 cmp r2, #2
130 ldrb r3, [r1], #1
131 strb r3, [r0], #1
132 ldrgeb r3, [r1], #1
133 strgeb r3, [r0], #1
134 ldrgtb r3, [r1], #1
135 strgtb r3, [r0], #1
136 ldmia sp!, {r0, pc}
137
138 /* erg - unaligned destination */
139.Lmemcpy_destul:
140 rsb r12, r12, #4
141 cmp r12, #2
142
143 /* align destination with byte copies */
144 ldrb r3, [r1], #1
145 strb r3, [r0], #1
146 ldrgeb r3, [r1], #1
147 strgeb r3, [r0], #1
148 ldrgtb r3, [r1], #1
149 strgtb r3, [r0], #1
150 subs r2, r2, r12
151 blt .Lmemcpy_l4 /* less the 4 bytes */
152
153 ands r12, r1, #3
154 beq .Lmemcpy_t8 /* we have an aligned source */
155
156 /* erg - unaligned source */
157 /* This is where it gets nasty ... */
158.Lmemcpy_srcul:
159 bic r1, r1, #3
160 ldr lr, [r1], #4
161 cmp r12, #2
162 bgt .Lmemcpy_srcul3
163 beq .Lmemcpy_srcul2
164 cmp r2, #0x0c
165 blt .Lmemcpy_srcul1loop4
166 sub r2, r2, #0x0c
167 stmdb sp!, {r4, r5}
168
169.Lmemcpy_srcul1loop16:
170#ifdef __ARMEB__
171 mov r3, lr, lsl #8
172#else
173 mov r3, lr, lsr #8
174#endif
175 ldmia r1!, {r4, r5, r12, lr}
176#ifdef __ARMEB__
177 orr r3, r3, r4, lsr #24
178 mov r4, r4, lsl #8
179 orr r4, r4, r5, lsr #24
180 mov r5, r5, lsl #8
181 orr r5, r5, r12, lsr #24
182 mov r12, r12, lsl #8
183 orr r12, r12, lr, lsr #24
184#else
185 orr r3, r3, r4, lsl #24
186 mov r4, r4, lsr #8
187 orr r4, r4, r5, lsl #24
188 mov r5, r5, lsr #8
189 orr r5, r5, r12, lsl #24
190 mov r12, r12, lsr #8
191 orr r12, r12, lr, lsl #24
192#endif
193 stmia r0!, {r3-r5, r12}
194 subs r2, r2, #0x10
195 bge .Lmemcpy_srcul1loop16
196 ldmia sp!, {r4, r5}
197 adds r2, r2, #0x0c
198 blt .Lmemcpy_srcul1l4
199
200.Lmemcpy_srcul1loop4:
201#ifdef __ARMEB__
202 mov r12, lr, lsl #8
203#else
204 mov r12, lr, lsr #8
205#endif
206 ldr lr, [r1], #4
207#ifdef __ARMEB__
208 orr r12, r12, lr, lsr #24
209#else
210 orr r12, r12, lr, lsl #24
211#endif
212 str r12, [r0], #4
213 subs r2, r2, #4
214 bge .Lmemcpy_srcul1loop4
215
216.Lmemcpy_srcul1l4:
217 sub r1, r1, #3
218 b .Lmemcpy_l4
219
220.Lmemcpy_srcul2:
221 cmp r2, #0x0c
222 blt .Lmemcpy_srcul2loop4
223 sub r2, r2, #0x0c
224 stmdb sp!, {r4, r5}
225
226.Lmemcpy_srcul2loop16:
227#ifdef __ARMEB__
228 mov r3, lr, lsl #16
229#else
230 mov r3, lr, lsr #16
231#endif
232 ldmia r1!, {r4, r5, r12, lr}
233#ifdef __ARMEB__
234 orr r3, r3, r4, lsr #16
235 mov r4, r4, lsl #16
236 orr r4, r4, r5, lsr #16
237 mov r5, r5, lsl #16
238 orr r5, r5, r12, lsr #16
239 mov r12, r12, lsl #16
240 orr r12, r12, lr, lsr #16
241#else
242 orr r3, r3, r4, lsl #16
243 mov r4, r4, lsr #16
244 orr r4, r4, r5, lsl #16
245 mov r5, r5, lsr #16
246 orr r5, r5, r12, lsl #16
247 mov r12, r12, lsr #16
248 orr r12, r12, lr, lsl #16
249#endif
250 stmia r0!, {r3-r5, r12}
251 subs r2, r2, #0x10
252 bge .Lmemcpy_srcul2loop16
253 ldmia sp!, {r4, r5}
254 adds r2, r2, #0x0c
255 blt .Lmemcpy_srcul2l4
256
257.Lmemcpy_srcul2loop4:
258#ifdef __ARMEB__
259 mov r12, lr, lsl #16
260#else
261 mov r12, lr, lsr #16
262#endif
263 ldr lr, [r1], #4
264#ifdef __ARMEB__
265 orr r12, r12, lr, lsr #16
266#else
267 orr r12, r12, lr, lsl #16
268#endif
269 str r12, [r0], #4
270 subs r2, r2, #4
271 bge .Lmemcpy_srcul2loop4
272
273.Lmemcpy_srcul2l4:
274 sub r1, r1, #2
275 b .Lmemcpy_l4
276
277.Lmemcpy_srcul3:
278 cmp r2, #0x0c
279 blt .Lmemcpy_srcul3loop4
280 sub r2, r2, #0x0c
281 stmdb sp!, {r4, r5}
282
283.Lmemcpy_srcul3loop16:
284#ifdef __ARMEB__
285 mov r3, lr, lsl #24
286#else
287 mov r3, lr, lsr #24
288#endif
289 ldmia r1!, {r4, r5, r12, lr}
290#ifdef __ARMEB__
291 orr r3, r3, r4, lsr #8
292 mov r4, r4, lsl #24
293 orr r4, r4, r5, lsr #8
294 mov r5, r5, lsl #24
295 orr r5, r5, r12, lsr #8
296 mov r12, r12, lsl #24
297 orr r12, r12, lr, lsr #8
298#else
299 orr r3, r3, r4, lsl #8
300 mov r4, r4, lsr #24
301 orr r4, r4, r5, lsl #8
302 mov r5, r5, lsr #24
303 orr r5, r5, r12, lsl #8
304 mov r12, r12, lsr #24
305 orr r12, r12, lr, lsl #8
306#endif
307 stmia r0!, {r3-r5, r12}
308 subs r2, r2, #0x10
309 bge .Lmemcpy_srcul3loop16
310 ldmia sp!, {r4, r5}
311 adds r2, r2, #0x0c
312 blt .Lmemcpy_srcul3l4
313
314.Lmemcpy_srcul3loop4:
315#ifdef __ARMEB__
316 mov r12, lr, lsl #24
317#else
318 mov r12, lr, lsr #24
319#endif
320 ldr lr, [r1], #4
321#ifdef __ARMEB__
322 orr r12, r12, lr, lsr #8
323#else
324 orr r12, r12, lr, lsl #8
325#endif
326 str r12, [r0], #4
327 subs r2, r2, #4
328 bge .Lmemcpy_srcul3loop4
329
330.Lmemcpy_srcul3l4:
331 sub r1, r1, #1
332 b .Lmemcpy_l4
333END(memcpy)