1162911Ssimon/* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */ 2162911Ssimon 3162911Ssimon/*- 4162911Ssimon * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. 5162911Ssimon * 6162911Ssimon * Redistribution and use in source and binary forms, with or without 7162911Ssimon * modification, are permitted provided that the following conditions 8162911Ssimon * are met: 9162911Ssimon * 1. Redistributions of source code must retain the above copyright 10296465Sdelphij * notice, this list of conditions and the following disclaimer. 11162911Ssimon * 2. Redistributions in binary form must reproduce the above copyright 12162911Ssimon * notice, this list of conditions and the following disclaimer in the 13162911Ssimon * documentation and/or other materials provided with the distribution. 14162911Ssimon * 3. The name of the author may not be used to endorse or promote products 15162911Ssimon * derived from this software without specific prior written permission. 16162911Ssimon * 17162911Ssimon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18162911Ssimon * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19162911Ssimon * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20162911Ssimon * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21162911Ssimon * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22162911Ssimon * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23162911Ssimon * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24162911Ssimon * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25162911Ssimon * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26162911Ssimon * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27162911Ssimon */ 28162911Ssimon 29162911Ssimon#include <machine/asm.h> 30162911Ssimon 31162911Ssimon#if defined(LIBC_SCCS) && !defined(lint) 32162911Ssimon RCSID("$NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $") 33162911Ssimon#endif 34162911Ssimon 35162911Ssimon#define REG_PTR r0 36162911Ssimon#define REG_TMP1 r1 37162911Ssimon 38162911Ssimon#ifdef BZERO 39162911Ssimon# define REG_C r2 40162911Ssimon# define REG_DST r4 41162911Ssimon# define REG_LEN r5 42162911Ssimon#else 43162911Ssimon# define REG_DST0 r3 44162911Ssimon# define REG_DST r4 45162911Ssimon# define REG_C r5 46162911Ssimon# define REG_LEN r6 47162911Ssimon#endif 48162911Ssimon 49162911Ssimon#ifdef BZERO 50162911SsimonENTRY(bzero) 51162911Ssimon#else 52162911SsimonENTRY(memset) 53162911Ssimon mov REG_DST,REG_DST0 /* for return value */ 54162911Ssimon#endif 55162911Ssimon /* small amount to fill ? */ 56162911Ssimon mov #28,REG_TMP1 57296465Sdelphij cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ 58162911Ssimon bt/s large 59162911Ssimon mov #12,REG_TMP1 /* if (len >= 12) goto small; */ 60162911Ssimon cmp/hs REG_TMP1,REG_LEN 61162911Ssimon bt/s small 62162911Ssimon#ifdef BZERO 63162911Ssimon mov #0,REG_C 64296465Sdelphij#endif 65162911Ssimon /* very little fill (0 ~ 11 bytes) */ 66162911Ssimon tst REG_LEN,REG_LEN 67162911Ssimon add REG_DST,REG_LEN 68162911Ssimon bt/s done 69162911Ssimon add #1,REG_DST 70162911Ssimon 71296465Sdelphij /* unroll 4 loops */ 72162911Ssimon cmp/eq REG_DST,REG_LEN 73162911Ssimon1: mov.b REG_C,@-REG_LEN 74162911Ssimon bt/s done 75162911Ssimon cmp/eq REG_DST,REG_LEN 76162911Ssimon mov.b REG_C,@-REG_LEN 77162911Ssimon bt/s done 78162911Ssimon cmp/eq REG_DST,REG_LEN 79162911Ssimon mov.b REG_C,@-REG_LEN 80162911Ssimon bt/s done 81162911Ssimon cmp/eq REG_DST,REG_LEN 82162911Ssimon mov.b REG_C,@-REG_LEN 83162911Ssimon bf/s 1b 84162911Ssimon cmp/eq REG_DST,REG_LEN 85162911Ssimondone: 86296465Sdelphij#ifdef BZERO 87162911Ssimon rts 88162911Ssimon nop 89296465Sdelphij#else 90162911Ssimon rts 91162911Ssimon mov REG_DST0,r0 92162911Ssimon#endif 93162911Ssimon 94162911Ssimon 95162911Ssimonsmall: 96162911Ssimon mov REG_DST,r0 97162911Ssimon tst #1,r0 98162911Ssimon bt/s small_aligned 99162911Ssimon mov REG_DST,REG_TMP1 100162911Ssimon shll REG_LEN 101296465Sdelphij mova 1f,r0 /* 1f must be 4bytes aligned! */ 102162911Ssimon add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ 103162911Ssimon sub REG_LEN,r0 104162911Ssimon jmp @r0 105162911Ssimon mov REG_C,r0 106162911Ssimon 107162911Ssimon .align 2 108162911Ssimon mov.b r0,@(15,REG_TMP1) 109162911Ssimon mov.b r0,@(14,REG_TMP1) 110162911Ssimon mov.b r0,@(13,REG_TMP1) 111162911Ssimon mov.b r0,@(12,REG_TMP1) 112162911Ssimon mov.b r0,@(11,REG_TMP1) 113162911Ssimon mov.b r0,@(10,REG_TMP1) 114162911Ssimon mov.b r0,@(9,REG_TMP1) 115162911Ssimon mov.b r0,@(8,REG_TMP1) 116162911Ssimon mov.b r0,@(7,REG_TMP1) 117162911Ssimon mov.b r0,@(6,REG_TMP1) 118162911Ssimon mov.b r0,@(5,REG_TMP1) 119162911Ssimon mov.b r0,@(4,REG_TMP1) 120296465Sdelphij mov.b r0,@(3,REG_TMP1) 121296465Sdelphij mov.b r0,@(2,REG_TMP1) 122296465Sdelphij mov.b r0,@(1,REG_TMP1) 123296465Sdelphij mov.b r0,@REG_TMP1 124162911Ssimon mov.b r0,@(15,REG_DST) 125162911Ssimon mov.b r0,@(14,REG_DST) 126162911Ssimon mov.b r0,@(13,REG_DST) 127296465Sdelphij mov.b r0,@(12,REG_DST) 128296465Sdelphij mov.b r0,@(11,REG_DST) 129296465Sdelphij mov.b r0,@(10,REG_DST) 130296465Sdelphij mov.b r0,@(9,REG_DST) 131162911Ssimon mov.b r0,@(8,REG_DST) 132296465Sdelphij mov.b r0,@(7,REG_DST) 133296465Sdelphij mov.b r0,@(6,REG_DST) 134296465Sdelphij mov.b r0,@(5,REG_DST) 135162911Ssimon mov.b r0,@(4,REG_DST) 136296465Sdelphij mov.b r0,@(3,REG_DST) 137162911Ssimon mov.b r0,@(2,REG_DST) 138296465Sdelphij mov.b r0,@(1,REG_DST) 139162911Ssimon#ifdef BZERO 140296465Sdelphij rts 141296465Sdelphij1: mov.b r0,@REG_DST 142296465Sdelphij#else 143296465Sdelphij mov.b r0,@REG_DST 144296465Sdelphij1: rts 145296465Sdelphij mov REG_DST0,r0 146296465Sdelphij#endif 147296465Sdelphij 148296465Sdelphij 149296465Sdelphij/* 2 bytes aligned small fill */ 150296465Sdelphijsmall_aligned: 151296465Sdelphij#ifndef BZERO 152296465Sdelphij extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 153296465Sdelphij shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 154296465Sdelphij or REG_TMP1,REG_C /* REG_C = ????xxxx */ 155296465Sdelphij#endif 156296465Sdelphij 157296465Sdelphij mov REG_LEN,r0 158296465Sdelphij tst #1,r0 /* len is aligned? */ 159162911Ssimon bt/s 1f 160296465Sdelphij add #-1,r0 161296465Sdelphij mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ 162162911Ssimon mov r0,REG_LEN 163296465Sdelphij1: 164296465Sdelphij 165296465Sdelphij mova 1f,r0 /* 1f must be 4bytes aligned! */ 166296465Sdelphij sub REG_LEN,r0 167296465Sdelphij jmp @r0 168296465Sdelphij mov REG_C,r0 169296465Sdelphij 170296465Sdelphij .align 2 171296465Sdelphij mov.w r0,@(30,REG_DST) 172296465Sdelphij mov.w r0,@(28,REG_DST) 173162911Ssimon mov.w r0,@(26,REG_DST) 174296465Sdelphij mov.w r0,@(24,REG_DST) 175296465Sdelphij mov.w r0,@(22,REG_DST) 176162911Ssimon mov.w r0,@(20,REG_DST) 177296465Sdelphij mov.w r0,@(18,REG_DST) 178296465Sdelphij mov.w r0,@(16,REG_DST) 179296465Sdelphij mov.w r0,@(14,REG_DST) 180296465Sdelphij mov.w r0,@(12,REG_DST) 181296465Sdelphij mov.w r0,@(10,REG_DST) 182296465Sdelphij mov.w r0,@(8,REG_DST) 183296465Sdelphij mov.w r0,@(6,REG_DST) 184296465Sdelphij mov.w r0,@(4,REG_DST) 185296465Sdelphij mov.w r0,@(2,REG_DST) 186296465Sdelphij#ifdef BZERO 187296465Sdelphij rts 188296465Sdelphij1: mov.w r0,@REG_DST 189296465Sdelphij#else 190296465Sdelphij mov.w r0,@REG_DST 191296465Sdelphij1: rts 192296465Sdelphij mov REG_DST0,r0 193296465Sdelphij#endif 194296465Sdelphij 195296465Sdelphij 196162911Ssimon 197296465Sdelphij .align 2 198296465Sdelphijlarge: 199162911Ssimon#ifdef BZERO 200162911Ssimon mov #0,REG_C 201162911Ssimon#else 202296465Sdelphij extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 203296465Sdelphij shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 204296465Sdelphij or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ 205296465Sdelphij swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ 206296465Sdelphij xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ 207296465Sdelphij#endif 208162911Ssimon 209296465Sdelphij mov #3,REG_TMP1 210296465Sdelphij tst REG_TMP1,REG_DST 211162911Ssimon mov REG_DST,REG_PTR 212296465Sdelphij bf/s unaligned_dst 213296465Sdelphij add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ 214296465Sdelphij tst REG_TMP1,REG_LEN 215296465Sdelphij bf/s unaligned_len 216296465Sdelphij 217296465Sdelphijaligned: 218296465Sdelphij /* fill 32*n bytes */ 219296465Sdelphij mov #32,REG_TMP1 220162911Ssimon cmp/hi REG_LEN,REG_TMP1 221162911Ssimon bt 9f 222296465Sdelphij .align 2 223296465Sdelphij1: sub REG_TMP1,REG_PTR 224296465Sdelphij mov.l REG_C,@REG_PTR 225296465Sdelphij sub REG_TMP1,REG_LEN 226296465Sdelphij mov.l REG_C,@(4,REG_PTR) 227162911Ssimon cmp/hi REG_LEN,REG_TMP1 228296465Sdelphij mov.l REG_C,@(8,REG_PTR) 229296465Sdelphij mov.l REG_C,@(12,REG_PTR) 230162911Ssimon mov.l REG_C,@(16,REG_PTR) 231296465Sdelphij mov.l REG_C,@(20,REG_PTR) 232296465Sdelphij mov.l REG_C,@(24,REG_PTR) 233296465Sdelphij bf/s 1b 234 mov.l REG_C,@(28,REG_PTR) 2359: 236 237 /* fill left 4*n bytes */ 238 cmp/eq REG_DST,REG_PTR 239 bt 9f 240 add #4,REG_DST 241 cmp/eq REG_DST,REG_PTR 2421: mov.l REG_C,@-REG_PTR 243 bt/s 9f 244 cmp/eq REG_DST,REG_PTR 245 mov.l REG_C,@-REG_PTR 246 bt/s 9f 247 cmp/eq REG_DST,REG_PTR 248 mov.l REG_C,@-REG_PTR 249 bt/s 9f 250 cmp/eq REG_DST,REG_PTR 251 mov.l REG_C,@-REG_PTR 252 bf/s 1b 253 cmp/eq REG_DST,REG_PTR 2549: 255#ifdef BZERO 256 rts 257 nop 258#else 259 rts 260 mov REG_DST0,r0 261#endif 262 263 264unaligned_dst: 265 mov #1,REG_TMP1 266 tst REG_TMP1,REG_DST /* if (dst & 1) { */ 267 add #1,REG_TMP1 268 bt/s 2f 269 tst REG_TMP1,REG_DST 270 mov.b REG_C,@REG_DST /* *dst++ = c; */ 271 add #1,REG_DST 272 tst REG_TMP1,REG_DST 2732: /* } */ 274 /* if (dst & 2) { */ 275 bt 4f 276 mov.w REG_C,@REG_DST /* *(uint16_t*)dst++ = c; */ 277 add #2,REG_DST 2784: /* } */ 279 280 281 tst #3,REG_PTR /* if (ptr & 3) { */ 282 bt/s 4f /* */ 283unaligned_len: 284 tst #1,REG_PTR /* if (ptr & 1) { */ 285 bt/s 2f 286 tst #2,REG_PTR 287 mov.b REG_C,@-REG_PTR /* --ptr = c; */ 2882: /* } */ 289 /* if (ptr & 2) { */ 290 bt 4f 291 mov.w REG_C,@-REG_PTR /* *--(uint16_t*)ptr = c; */ 2924: /* } */ 293 /* } */ 294 295 mov REG_PTR,REG_LEN 296 bra aligned 297 sub REG_DST,REG_LEN 298 299