11797Sphk/* SPDX-License-Identifier: GPL-2.0 */
21797Sphk
31797Sphk#include <linux/linkage.h>
41797Sphk#include <asm/asm.h>
51797Sphk
61797SphkENTRY(__memmove)
71797SphkWEAK(memmove)
81797Sphk	/*
9	 * Here we determine if forward copy is possible. Forward copy is
10	 * preferred to backward copy as it is more cache friendly.
11	 *
12	 * If a0 >= a1, t0 gives their distance, if t0 >= a2 then we can
13	 *   copy forward.
14	 * If a0 < a1, we can always copy forward. This will make t0 negative,
15	 *   so a *unsigned* comparison will always have t0 >= a2.
16	 *
17	 * For forward copy we just delegate the task to memcpy.
18	 */
19	sub	t0, a0, a1
20	bltu	t0, a2, 1f
21	tail	__memcpy
221:
23
24	/*
25	 * Register allocation for code below:
26	 * a0 - end of uncopied dst
27	 * a1 - end of uncopied src
28	 * t0 - start of uncopied dst
29	 */
30	mv	t0, a0
31	add	a0, a0, a2
32	add	a1, a1, a2
33
34	/*
35	 * Use bytewise copy if too small.
36	 *
37	 * This threshold must be at least 2*SZREG to ensure at least one
38	 * wordwise copy is performed. It is chosen to be 16 because it will
39	 * save at least 7 iterations of bytewise copy, which pays off the
40	 * fixed overhead.
41	 */
42	li	a3, 16
43	bltu	a2, a3, .Lbyte_copy_tail
44
45	/*
46	 * Bytewise copy first to align t0 to word boundary.
47	 */
48	andi	a2, a0, ~(SZREG-1)
49	beq	a0, a2, 2f
501:
51	addi	a1, a1, -1
52	lb	a5, 0(a1)
53	addi	a0, a0, -1
54	sb	a5, 0(a0)
55	bne	a0, a2, 1b
562:
57
58	/*
59	 * Now a0 is word-aligned. If a1 is also word aligned, we could perform
60	 * aligned word-wise copy. Otherwise we need to perform misaligned
61	 * word-wise copy.
62	 */
63	andi	a3, a1, SZREG-1
64	bnez	a3, .Lmisaligned_word_copy
65
66	/* Wordwise copy */
67	addi	t0, t0, SZREG-1
68	bleu	a0, t0, 2f
691:
70	addi	a1, a1, -SZREG
71	REG_L	a5, 0(a1)
72	addi	a0, a0, -SZREG
73	REG_S	a5, 0(a0)
74	bgtu	a0, t0, 1b
752:
76	addi	t0, t0, -(SZREG-1)
77
78.Lbyte_copy_tail:
79	/*
80	 * Bytewise copy anything left.
81	 */
82	beq	a0, t0, 2f
831:
84	addi	a1, a1, -1
85	lb	a5, 0(a1)
86	addi	a0, a0, -1
87	sb	a5, 0(a0)
88	bne	a0, t0, 1b
892:
90
91	mv	a0, t0
92	ret
93
94.Lmisaligned_word_copy:
95	/*
96	 * Misaligned word-wise copy.
97	 * For misaligned copy we still perform word-wise copy, but we need to
98	 * use the value fetched from the previous iteration and do some shifts.
99	 * This is safe because we wouldn't access more words than necessary.
100	 */
101
102	/* Calculate shifts */
103	slli	t3, a3, 3
104	sub	t4, x0, t3 /* negate is okay as shift will only look at LSBs */
105
106	/* Load the initial value and align a1 */
107	andi	a1, a1, ~(SZREG-1)
108	REG_L	a5, 0(a1)
109
110	addi	t0, t0, SZREG-1
111	/* At least one iteration will be executed here, no check */
1121:
113	sll	a4, a5, t4
114	addi	a1, a1, -SZREG
115	REG_L	a5, 0(a1)
116	srl	a2, a5, t3
117	or	a2, a2, a4
118	addi	a0, a0, -SZREG
119	REG_S	a2, 0(a0)
120	bgtu	a0, t0, 1b
121
122	/* Update pointers to correct value */
123	addi	t0, t0, -(SZREG-1)
124	add	a1, a1, a3
125
126	j	.Lbyte_copy_tail
127
128END(__memmove)
129