1/* $NetBSD: memcmp.S,v 1.3 2018/07/09 06:07:06 ryo Exp $ */ 2 3/*- 4 * Copyright (c) 2014 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matt Thomas of 3am Software Foundry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33 34RCSID("$NetBSD: memcmp.S,v 1.3 2018/07/09 06:07:06 ryo Exp $") 35 36ENTRY(memcmp) 37 mov x9, x0 38 mov x10, x1 39 mov x0, xzr 40 cbz x2, .Lmemcmp_ret 41#ifdef _KERNEL 42 cmp x2, #6 43 b.eq .Lmemcmp_6bytes 44#endif 45 cmp x2, #8 46 b.ls .Lmemcmp_lessthan8 47 48 ands x3, x9, #7 49 b.eq .Lmemcmp_dword_loop 50 51/* 52 * The src1 address is not dword aligned. 53 */ 54 add x2, x2, x3 /* add unalignment to length */ 55 sub x2, x2, #8 /* now subtract a dword */ 56 57 sub x9, x9, x3 /* dword align src1 */ 58 59 ldr x6, [x10], #8 /* load dword from src2 */ 60 sub x10, x10, x3 /* src2 -= x3 */ 61 lsl x3, x3, #3 /* convert bytes to bits */ 62 ldr x4, [x9], #8 /* load dword from src1 */ 63#ifdef __AARCH64EB__ 64 lsl x4, x4, x3 /* discard leading bytes from data1 */ 65 lsr x6, x6, x3 /* discard leading bytes from data2 */ 66 lsl x6, x6, x3 /* get back bit position */ 67#else 68 lsr x4, x4, x3 /* discard leading bytes from data1 */ 69 lsl x6, x6, x3 /* discard leading bytes from data2 */ 70 lsr x6, x6, x3 /* get back bit position */ 71#endif 72 subs x0, x4, x6 /* compare data */ 73 b.ne .Lmemcmp_last_compare /* difference. find it */ 74 75.Lmemcmp_dword_loop: 76 subs x2, x2, #8 77 b.mi .Lmemcmp_finish_dword 78 ldr x4, [x9], #8 79 ldr x6, [x10], #8 80 subs x0, x4, x6 81 b.eq .Lmemcmp_dword_loop /* no difference. go to loop */ 82 b .Lmemcmp_last_compare /* go find the difference. */ 83 84.Lmemcmp_finish_dword: 85 /* 86 * we might have gotten here with nothing left. If so, just bail. 87 */ 88 tst x2, #7 89 b.eq .Lmemcmp_ret 90 mov x4, xzr 91 mov x6, xzr 92 /* 93 * 94 */ 95 tbz x2, #2, .Lmemcmp_finish_word 96 ldr w4, [x9], #4 97 ldr w6, [x10], #4 98#ifdef __AARCH64EB__ 99 lsl x4, x4, #32 /* move to MSW */ 100 lsl x6, x6, #32 /* move to MSW */ 101#endif 102 103.Lmemcmp_finish_word: 104 tbz x2, #1, .Lmemcmp_finish_hword 105 ldrh w5, [x9], #2 106 ldrh w7, [x10], #2 107#ifdef __AARCH64EB__ 108 orr x4, x4, x5, lsl #16 109 orr x6, x6, x7, lsl #16 110#else 111 orr x4, x4, x5, lsl #32 112 orr x6, x6, x7, lsl #32 113#endif 114 115.Lmemcmp_finish_hword: 116 tbz x2, #0, .Lmemcmp_last_compare0 117 118 ldrb w5, [x9] 119 ldrb w7, [x10] 120#ifdef __AARCH64EB__ 121 orr x4, x4, x5, lsl #8 122 orr x6, x6, x7, lsl #8 123#else 124 orr x4, x4, x5, lsl #48 125 orr x6, x6, x7, lsl #48 126#endif 127 b .Lmemcmp_last_compare0 /* go find the difference. */ 128 129/* 130 * D 131 */ 132.Lmemcmp_lessthan8: 133 sub x2, x2, #1 1341: ldrb w4, [x9], #1 135 ldrb w5, [x10], #1 136 subs x2, x2, #1 137 ccmp x4, x5, #0, cs 138 b.eq 1b 139 sub x0, x4, x5 140 141.Lmemcmp_ret: 142 ret 143 144#ifdef _KERNEL 145.Lmemcmp_6bytes: 146 ldr w4, [x9], #4 147 ldrh w5, [x9] 148#if __AARCH64EB__ 149 orr x4, x4, x5, lsl #48 150 rev x4, x4 151#else 152 orr x4, x4, x5, lsl #32 153#endif 154 ldr w6, [x10], #4 155 ldrh w7, [x10] 156#if __AARCH64EB__ 157 orr x6, x6, x7, lsl #48 158 rev x6, x6 159#else 160 orr x6, x6, x7, lsl #32 161#endif 162#endif /* _KERNEL */ 163 164/* 165 * We have loaded the final bytes in x4 and x6 in host-endian. Now we have 166 * to figure what the difference is (if any). First we subtract. Any bytes 167 * that are the same will be 0. So to find the first non-zero byte we byterev 168 * and then use clz to find that byte. 169 * We mask the location to get the start of the byte. We shift both 170 * data dwords left to remove the equal part. Then we shift right to discard 171 * the trailing bytes. Then we subtract and return. 172 */ 173.Lmemcmp_last_compare0: 174 subs x0, x4, x6 175 b.eq .Lmemcmp_ret 176.Lmemcmp_last_compare: 177#if __AARCH64EB__ 178 clz x1, x0 /* find first non-zero byte */ 179 rev x0, x0 180#else 181 rev x1, x0 182 clz x1, x1 /* find first non-zero byte */ 183#endif 184 bfi x1, xzr, #0, #3 /* make it byte aligned */ 185 lsr x1, x0, x1 /* shift to LSB */ 186#if __AARCH64EL__ 187 rev x4, x4 /* byte reverse */ 188 rev x6, x6 /* byte reverse */ 189#endif 190 subs x0, x4, x6 191 csetm x0, cc /* set mask bits as sign */ 192 bfm x0, x1, #0, #7 /* extend with sign bit */ 193 ret 194END(memcmp) 195