/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include #include #include #include #include #include #include .macro fill_to_64 r0 bstrins.d \r0, \r0, 15, 8 bstrins.d \r0, \r0, 31, 16 bstrins.d \r0, \r0, 63, 32 .endm .section .noinstr.text, "ax" SYM_FUNC_START(memset) /* * Some CPUs support hardware unaligned access */ ALTERNATIVE "b __memset_generic", \ "b __memset_fast", CPU_FEATURE_UAL SYM_FUNC_END(memset) SYM_FUNC_ALIAS(__memset, memset) EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) _ASM_NOKPROBE(memset) _ASM_NOKPROBE(__memset) /* * void *__memset_generic(void *s, int c, size_t n) * * a0: s * a1: c * a2: n */ SYM_FUNC_START(__memset_generic) move a3, a0 beqz a2, 2f 1: st.b a1, a0, 0 addi.d a0, a0, 1 addi.d a2, a2, -1 bgt a2, zero, 1b 2: move a0, a3 jr ra SYM_FUNC_END(__memset_generic) _ASM_NOKPROBE(__memset_generic) /* * void *__memset_fast(void *s, int c, size_t n) * * a0: s * a1: c * a2: n */ SYM_FUNC_START(__memset_fast) /* fill a1 to 64 bits */ fill_to_64 a1 sltui t0, a2, 9 bnez t0, .Lsmall add.d a2, a0, a2 st.d a1, a0, 0 /* align up address */ addi.d a3, a0, 8 bstrins.d a3, zero, 2, 0 addi.d a4, a2, -64 bgeu a3, a4, .Llt64 /* set 64 bytes at a time */ .Lloop64: st.d a1, a3, 0 st.d a1, a3, 8 st.d a1, a3, 16 st.d a1, a3, 24 st.d a1, a3, 32 st.d a1, a3, 40 st.d a1, a3, 48 st.d a1, a3, 56 addi.d a3, a3, 64 bltu a3, a4, .Lloop64 /* set the remaining bytes */ .Llt64: addi.d a4, a2, -32 bgeu a3, a4, .Llt32 st.d a1, a3, 0 st.d a1, a3, 8 st.d a1, a3, 16 st.d a1, a3, 24 addi.d a3, a3, 32 .Llt32: addi.d a4, a2, -16 bgeu a3, a4, .Llt16 st.d a1, a3, 0 st.d a1, a3, 8 addi.d a3, a3, 16 .Llt16: addi.d a4, a2, -8 bgeu a3, a4, .Llt8 st.d a1, a3, 0 .Llt8: st.d a1, a2, -8 /* return */ jr ra .align 4 .Lsmall: pcaddi t0, 4 slli.d a2, a2, 4 add.d t0, t0, a2 jr t0 .align 4 0: jr ra .align 4 1: st.b a1, a0, 0 jr ra .align 4 2: st.h a1, a0, 0 jr ra .align 4 3: st.h a1, a0, 0 st.b a1, a0, 2 jr ra .align 4 4: st.w a1, a0, 0 jr ra .align 4 5: st.w a1, a0, 0 st.b a1, a0, 4 jr ra .align 4 6: st.w a1, a0, 0 st.h a1, a0, 4 jr ra .align 4 7: st.w a1, a0, 0 st.w a1, a0, 3 jr ra .align 4 8: st.d a1, a0, 0 jr ra SYM_FUNC_END(__memset_fast) _ASM_NOKPROBE(__memset_fast) STACK_FRAME_NON_STANDARD __memset_fast