11797Sphk/* SPDX-License-Identifier: GPL-2.0 */ 21797Sphk 31797Sphk#include <linux/linkage.h> 41797Sphk#include <asm/asm.h> 51797Sphk 61797SphkENTRY(__memmove) 71797SphkWEAK(memmove) 81797Sphk /* 9 * Here we determine if forward copy is possible. Forward copy is 10 * preferred to backward copy as it is more cache friendly. 11 * 12 * If a0 >= a1, t0 gives their distance, if t0 >= a2 then we can 13 * copy forward. 14 * If a0 < a1, we can always copy forward. This will make t0 negative, 15 * so a *unsigned* comparison will always have t0 >= a2. 16 * 17 * For forward copy we just delegate the task to memcpy. 18 */ 19 sub t0, a0, a1 20 bltu t0, a2, 1f 21 tail __memcpy 221: 23 24 /* 25 * Register allocation for code below: 26 * a0 - end of uncopied dst 27 * a1 - end of uncopied src 28 * t0 - start of uncopied dst 29 */ 30 mv t0, a0 31 add a0, a0, a2 32 add a1, a1, a2 33 34 /* 35 * Use bytewise copy if too small. 36 * 37 * This threshold must be at least 2*SZREG to ensure at least one 38 * wordwise copy is performed. It is chosen to be 16 because it will 39 * save at least 7 iterations of bytewise copy, which pays off the 40 * fixed overhead. 41 */ 42 li a3, 16 43 bltu a2, a3, .Lbyte_copy_tail 44 45 /* 46 * Bytewise copy first to align t0 to word boundary. 47 */ 48 andi a2, a0, ~(SZREG-1) 49 beq a0, a2, 2f 501: 51 addi a1, a1, -1 52 lb a5, 0(a1) 53 addi a0, a0, -1 54 sb a5, 0(a0) 55 bne a0, a2, 1b 562: 57 58 /* 59 * Now a0 is word-aligned. If a1 is also word aligned, we could perform 60 * aligned word-wise copy. Otherwise we need to perform misaligned 61 * word-wise copy. 62 */ 63 andi a3, a1, SZREG-1 64 bnez a3, .Lmisaligned_word_copy 65 66 /* Wordwise copy */ 67 addi t0, t0, SZREG-1 68 bleu a0, t0, 2f 691: 70 addi a1, a1, -SZREG 71 REG_L a5, 0(a1) 72 addi a0, a0, -SZREG 73 REG_S a5, 0(a0) 74 bgtu a0, t0, 1b 752: 76 addi t0, t0, -(SZREG-1) 77 78.Lbyte_copy_tail: 79 /* 80 * Bytewise copy anything left. 81 */ 82 beq a0, t0, 2f 831: 84 addi a1, a1, -1 85 lb a5, 0(a1) 86 addi a0, a0, -1 87 sb a5, 0(a0) 88 bne a0, t0, 1b 892: 90 91 mv a0, t0 92 ret 93 94.Lmisaligned_word_copy: 95 /* 96 * Misaligned word-wise copy. 97 * For misaligned copy we still perform word-wise copy, but we need to 98 * use the value fetched from the previous iteration and do some shifts. 99 * This is safe because we wouldn't access more words than necessary. 100 */ 101 102 /* Calculate shifts */ 103 slli t3, a3, 3 104 sub t4, x0, t3 /* negate is okay as shift will only look at LSBs */ 105 106 /* Load the initial value and align a1 */ 107 andi a1, a1, ~(SZREG-1) 108 REG_L a5, 0(a1) 109 110 addi t0, t0, SZREG-1 111 /* At least one iteration will be executed here, no check */ 1121: 113 sll a4, a5, t4 114 addi a1, a1, -SZREG 115 REG_L a5, 0(a1) 116 srl a2, a5, t3 117 or a2, a2, a4 118 addi a0, a0, -SZREG 119 REG_S a2, 0(a0) 120 bgtu a0, t0, 1b 121 122 /* Update pointers to correct value */ 123 addi t0, t0, -(SZREG-1) 124 add a1, a1, a3 125 126 j .Lbyte_copy_tail 127 128END(__memmove) 129