1/* $NetBSD: memcmp.S,v 1.3 2011/01/15 07:31:12 matt Exp $ */ 2 3/* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35 4 * ========================================================================== 5 * Optimized memcmp implementation for IBM PowerPC 405/440. 6 * 7 * Copyright (c) 2003, IBM Corporation 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * * Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * * Redistributions in binary form must reproduce the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer in the documentation and/or other materials 20 * provided with the distribution. 21 * * Neither the name of IBM nor the names of its contributors 22 * may be used to endorse or promote products derived from this 23 * software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 26 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 27 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 29 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 31 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 34 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 36 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 * 38 * ========================================================================== 39 * 40 * Function: Compare two character strings (up to n characters) 41 * 42 * int memcmp(const char *s1, const char *s2, int n) 43 * 44 * Input: r3 - buffer 1 address 45 * r4 - buffer 2 address 46 * r5 - maximum characters to compare 47 * Output: r3 <0 (less), 0 (equal), >0 (greater) 48 * 49 * ========================================================================== 50 */ 51 52#include <machine/asm.h> 53 54 .text 55 .align 4 56/* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */ 57ENTRY(memcmp) 58 59 /* 60 * Check count passed in R5. If zero, return 0; otherwise continue. 61 */ 62 cmpwi %r5,0 63 beq- ret_0; 64 65 /* 66 * Most of the time the difference is found in the first 67 * several bytes. The following code minimizes the number 68 * of load operations for short compares. 69 */ 70 71 mr %r11, %r3 /* Save buffer 1 */ 72 73again: 74 75 not %r10, %r4 /* buffer 2: bytes to page bdy */ 76 rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy */ 77 beq- bytebybyte /* If < 8 bytes to the page bdy */ 78 /* do byte by byte */ 79 lwz %r8, 0(%r4) /* load 1st buffer 2 word */ 80 81 not %r12, %r11 /* buffer 1: bytes to page bdy */ 82 rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy */ 83 beq- bytebybyte /* If < 8 bytes to the page bdy */ 84 /* do byte by byte */ 85 lwz %r6, 0(%r11) /* load 1st buffer 1 word */ 86 87 cmpwi %r5, 4 /* If remaining count <= 4 */ 88 ble+ first4 /* handle specially. DWG */ 89 90 cmplw %r8, %r6 /* compare buffer 2 and buffer 1*/ 91 bne+ all_done /* different => we're done */ 92 93 lwzu %r9, 4(%r4) /* load 2nd buffer 2 word */ 94 lwzu %r7, 4(%r11) /* load 2nd buffer 1 word */ 95 96 cmpwi %r5, 8 /* If remaining count <= 8 */ 97 ble+ last4 /* handle specially. DWG */ 98 99 cmplw %r9, %r7 /* compare buffer 2 and buffer 1*/ 100 bne+ all_done /* different => we're done */ 101 102 addi %r5, %r5, -8 /* Update character counter DWG */ 103 addi %r10, %r4, 0x0004 /* DWG*/ 104 not %r10, %r10 /* buffer 2: bytes to page bdy DWG */ 105 rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy DWG */ 106 addi %r12, %r11, 0x0004 /* DWG */ 107 not %r12, %r12 /* buffer 1: bytes to page bdy DWG */ 108 rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy DWG */ 109 110 /* The following section prior to loop: figures out whether */ 111 /* the buffer 1 or buffer 2 is closer to the page boundary. */ 112 /* The main loop count is then set up to reflect the number of */ 113 /* double words of the buffer that is closest */ 114 115 cmpw %r10, %r12 /* Find closest */ 116 blt lt 117 118 mr %r10, %r12 119 120lt: 121 122 srwi %r12, %r5, 3 /* Double check the total count */ 123 cmpw %r10, %r12 /* limitation */ 124 blt lt2 125 126 mr %r10, %r12 /* DWG */ 127lt2: /* DWG */ 128 cmpwi %r10, 0 /* DWG */ 129 bne lt3 /* DWG */ 130 addi %r4, %r4, 0x0004 /* DWG */ 131 addi %r11,%r11,0x0004 /* DWG */ 132 b again /* DWG */ 133lt3: /* DWG */ 134 mtctr %r10 /* dword count for loop */ 135 lwzu %r6, 4(%r11) /* pre-load buffer 1 word */ 136 137 b in /* To the loop */ 138 139loop: /* main loop */ 140 141 cmplw %r8, %r6 /* Compare first buffer 2 word */ 142 bne- all_done /* with first buffer 1 word */ 143 /* If different, we're done */ 144 cmplw %r9, %r7 /* Compare second buffer 2 word */ 145 /* with second buffer 1 word */ 146 lwzu %r6, 4(%r11) /* pre-load buffer 1 word */ 147 148 bne- all_done /* If different, we're done */ 149 150in: 151 152 lwzu %r7, 4(%r11) /* pre-load buffer 1 word */ 153 lwzu %r8, 4(%r4) /* pre-load buffer 2 word */ 154 lwzu %r9, 4(%r4) /* pre-load buffer 2 word */ 155 156 bdnz+ loop /* Do more DW's if cnt > 0 */ 157 158 /*mfctr %r12*/ /*DWG*/ /* number of dwords left */ 159 /*subf %r10, %r12, %r10*/ /*DWG*//* number of dwords compared */ 160 slwi %r10, %r10, 3 161 subf %r5, %r10, %r5 /* adjust byte counter */ 162 /*bne+ partial*/ /*DWG*/ /* If less than 8 bytes, handle */ 163 /* specially */ 164 /*cmpwi %r5, 8*/ /* Removed. DWG */ 165 /*blt partial*/ /* Removed. DWG */ 166 167 /*addic %r5, %r5, -8*/ /*DWG*/ /* Subtract two words from count*/ 168 169 cmplw %r8, %r6 /* compare last dword */ 170 addi %r4, %r4, 4 171 bne- all_done 172 173 cmplw %r9, %r7 174 addi %r11, %r11, 4 175 bne- all_done 176 177bytebybyte: 178 179 /* We've gotten close to a page boundary: do a byte-byte-byte 180 * compare for the following 8 bytes, and then go back to 181 * the full-word compare loop. 182 */ 183 184 li %r3, 8 /* loop count */ 185 cmpw %r3, %r5 /* take min(8, counter) */ 186 ble f2 187 188 mr. %r3, %r5 189 190 beqlr 191 192f2: 193 194 mtctr %r3 195 subf %r5, %r3, %r5 /* adjust counter */ 196 197bbb: 198 199 lbz %r6, 0(%r11) /* byte copy loop */ 200 201 addi %r11, %r11, 1 202 203 lbz %r8, 0(%r4) 204 205 addi %r4, %r4, 1 206 207 cmplw %r8, %r6 208 209 bdnzt+ eq, bbb 210 211 bne all_done 212 213 cmpwi %r5, 0 214 bgt again /* handle the rest */ 215 216 xor %r3,%r3,%r3 217 218 blr 219 220#if 0 /* Removed code section. DWG */ 221partial: 222 223 mr. %r3, %r5 224 225 beqlr /* If count -> 0, we're done */ 226 227f1: 228 229 subfic %r3, %r3, 4 /* zero/end in first word? */ 230 cmpwi %r3, 0 231 blt last4 232#endif /* DWG */ 233 234first4: 235 subfic %r3, %r5, 4 /* If count <= 4, handle */ 236 rlwinm %r3, %r3, 3, 0, 31 /* count *= 8 */ 237 srw %r6, %r6, %r3 /* align 1st buffer 1 word */ 238 srw %r8, %r8, %r3 /* align 1st buffer 2 word */ 239 240 cmplw %r8, %r6 /* get result */ 241 bne all_done 242 xor %r3,%r3,%r3 243 blr 244 245last4: 246 subfic %r10, %r5, 8 /*DWG*/ 247 rlwinm %r10, %r10, 3, 0, 31 /* count *= 8 */ 248 srw %r7, %r7, %r10 /* align 2nd buffer 1 word */ 249 srw %r9, %r9, %r10 /* align 2nd buffer 2 word */ 250 251 cmplw %r9, %r7 /* get result */ 252 bne all_done 253ret_0: 254 xor %r3,%r3,%r3 /* Equal result */ 255 blr 256 257all_done: 258 259 blt finish_lt 260 261 addi %r3,0,-1 /* Less than result */ 262 263 blr 264 265finish_lt: 266 267 addi %r3,0,1 /* Greater than result */ 268 269 blr 270END(memcmp) 271