1/* $NetBSD: bcopy_page.S,v 1.7 2003/10/13 21:03:13 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1995 Scott Stevens 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by Scott Stevens. 18 * 4. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * 32 * RiscBSD kernel project 33 * 34 * bcopy_page.S 35 * 36 * page optimised bcopy and bzero routines 37 * 38 * Created : 08/04/95 39 */ 40 41#include <machine/asm.h> 42 43__FBSDID("$FreeBSD$"); 44 45#include "assym.s" 46 47#ifndef _ARM_ARCH_5E 48 49/* #define BIG_LOOPS */ 50 51/* 52 * bcopy_page(src, dest) 53 * 54 * Optimised copy page routine. 55 * 56 * On entry: 57 * r0 - src address 58 * r1 - dest address 59 * 60 * Requires: 61 * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128 62 * otherwise. 63 */ 64 65#define CHUNK_SIZE 32 66 67#define PREFETCH_FIRST_CHUNK /* nothing */ 68#define PREFETCH_NEXT_CHUNK /* nothing */ 69 70#ifndef COPY_CHUNK 71#define COPY_CHUNK \ 72 PREFETCH_NEXT_CHUNK ; \ 73 ldmia r0!, {r3-r8,ip,lr} ; \ 74 stmia r1!, {r3-r8,ip,lr} 75#endif /* ! COPY_CHUNK */ 76 77#ifndef SAVE_REGS 78#define SAVE_REGS stmfd sp!, {r4-r8, lr}; _SAVE({r4-r8, lr}) 79#define RESTORE_REGS ldmfd sp!, {r4-r8, pc} 80#endif 81 82ENTRY(bcopy_page) 83 PREFETCH_FIRST_CHUNK 84 SAVE_REGS 85#ifdef BIG_LOOPS 86 mov r2, #(PAGE_SIZE >> 9) 87#else 88 mov r2, #(PAGE_SIZE >> 7) 89#endif 90 911: 92 COPY_CHUNK 93 COPY_CHUNK 94 COPY_CHUNK 95 COPY_CHUNK 96 97#ifdef BIG_LOOPS 98 /* There is little point making the loop any larger; unless we are 99 running with the cache off, the load/store overheads will 100 completely dominate this loop. */ 101 COPY_CHUNK 102 COPY_CHUNK 103 COPY_CHUNK 104 COPY_CHUNK 105 106 COPY_CHUNK 107 COPY_CHUNK 108 COPY_CHUNK 109 COPY_CHUNK 110 111 COPY_CHUNK 112 COPY_CHUNK 113 COPY_CHUNK 114 COPY_CHUNK 115#endif 116 subs r2, r2, #1 117 bne 1b 118 119 RESTORE_REGS /* ...and return. */ 120END(bcopy_page) 121 122/* 123 * bzero_page(dest) 124 * 125 * Optimised zero page routine. 126 * 127 * On entry: 128 * r0 - dest address 129 * 130 * Requires: 131 * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128 132 * otherwise 133 */ 134 135ENTRY(bzero_page) 136 stmfd sp!, {r4-r8, lr} 137 _SAVE({r4-r8, lr}) 138#ifdef BIG_LOOPS 139 mov r2, #(PAGE_SIZE >> 9) 140#else 141 mov r2, #(PAGE_SIZE >> 7) 142#endif 143 mov r3, #0 144 mov r4, #0 145 mov r5, #0 146 mov r6, #0 147 mov r7, #0 148 mov r8, #0 149 mov ip, #0 150 mov lr, #0 151 1521: 153 stmia r0!, {r3-r8,ip,lr} 154 stmia r0!, {r3-r8,ip,lr} 155 stmia r0!, {r3-r8,ip,lr} 156 stmia r0!, {r3-r8,ip,lr} 157 158#ifdef BIG_LOOPS 159 /* There is little point making the loop any larger; unless we are 160 running with the cache off, the load/store overheads will 161 completely dominate this loop. */ 162 stmia r0!, {r3-r8,ip,lr} 163 stmia r0!, {r3-r8,ip,lr} 164 stmia r0!, {r3-r8,ip,lr} 165 stmia r0!, {r3-r8,ip,lr} 166 167 stmia r0!, {r3-r8,ip,lr} 168 stmia r0!, {r3-r8,ip,lr} 169 stmia r0!, {r3-r8,ip,lr} 170 stmia r0!, {r3-r8,ip,lr} 171 172 stmia r0!, {r3-r8,ip,lr} 173 stmia r0!, {r3-r8,ip,lr} 174 stmia r0!, {r3-r8,ip,lr} 175 stmia r0!, {r3-r8,ip,lr} 176 177#endif 178 179 subs r2, r2, #1 180 bne 1b 181 182 ldmfd sp!, {r4-r8, pc} 183END(bzero_page) 184 185#else /* _ARM_ARCH_5E */ 186 187/* 188 * armv5e version of bcopy_page 189 */ 190ENTRY(bcopy_page) 191 pld [r0] 192 stmfd sp!, {r4, r5} 193 _SAVE({r4, r5}) 194 mov ip, #32 195 ldr r2, [r0], #0x04 /* 0x00 */ 196 ldr r3, [r0], #0x04 /* 0x04 */ 1971: pld [r0, #0x18] /* Prefetch 0x20 */ 198 ldr r4, [r0], #0x04 /* 0x08 */ 199 ldr r5, [r0], #0x04 /* 0x0c */ 200 strd r2, [r1], #0x08 201 ldr r2, [r0], #0x04 /* 0x10 */ 202 ldr r3, [r0], #0x04 /* 0x14 */ 203 strd r4, [r1], #0x08 204 ldr r4, [r0], #0x04 /* 0x18 */ 205 ldr r5, [r0], #0x04 /* 0x1c */ 206 strd r2, [r1], #0x08 207 ldr r2, [r0], #0x04 /* 0x20 */ 208 ldr r3, [r0], #0x04 /* 0x24 */ 209 pld [r0, #0x18] /* Prefetch 0x40 */ 210 strd r4, [r1], #0x08 211 ldr r4, [r0], #0x04 /* 0x28 */ 212 ldr r5, [r0], #0x04 /* 0x2c */ 213 strd r2, [r1], #0x08 214 ldr r2, [r0], #0x04 /* 0x30 */ 215 ldr r3, [r0], #0x04 /* 0x34 */ 216 strd r4, [r1], #0x08 217 ldr r4, [r0], #0x04 /* 0x38 */ 218 ldr r5, [r0], #0x04 /* 0x3c */ 219 strd r2, [r1], #0x08 220 ldr r2, [r0], #0x04 /* 0x40 */ 221 ldr r3, [r0], #0x04 /* 0x44 */ 222 pld [r0, #0x18] /* Prefetch 0x60 */ 223 strd r4, [r1], #0x08 224 ldr r4, [r0], #0x04 /* 0x48 */ 225 ldr r5, [r0], #0x04 /* 0x4c */ 226 strd r2, [r1], #0x08 227 ldr r2, [r0], #0x04 /* 0x50 */ 228 ldr r3, [r0], #0x04 /* 0x54 */ 229 strd r4, [r1], #0x08 230 ldr r4, [r0], #0x04 /* 0x58 */ 231 ldr r5, [r0], #0x04 /* 0x5c */ 232 strd r2, [r1], #0x08 233 ldr r2, [r0], #0x04 /* 0x60 */ 234 ldr r3, [r0], #0x04 /* 0x64 */ 235 pld [r0, #0x18] /* Prefetch 0x80 */ 236 strd r4, [r1], #0x08 237 ldr r4, [r0], #0x04 /* 0x68 */ 238 ldr r5, [r0], #0x04 /* 0x6c */ 239 strd r2, [r1], #0x08 240 ldr r2, [r0], #0x04 /* 0x70 */ 241 ldr r3, [r0], #0x04 /* 0x74 */ 242 strd r4, [r1], #0x08 243 ldr r4, [r0], #0x04 /* 0x78 */ 244 ldr r5, [r0], #0x04 /* 0x7c */ 245 strd r2, [r1], #0x08 246 subs ip, ip, #0x01 247 ldrgt r2, [r0], #0x04 /* 0x80 */ 248 ldrgt r3, [r0], #0x04 /* 0x84 */ 249 strd r4, [r1], #0x08 250 bgt 1b 251 ldmfd sp!, {r4, r5} 252 RET 253END(bcopy_page) 254 255/* 256 * armv5e version of bzero_page 257 */ 258ENTRY(bzero_page) 259 mov r1, #PAGE_SIZE 260 mov r2, #0 261 mov r3, #0 2621: strd r2, [r0], #8 /* 32 */ 263 strd r2, [r0], #8 264 strd r2, [r0], #8 265 strd r2, [r0], #8 266 strd r2, [r0], #8 /* 64 */ 267 strd r2, [r0], #8 268 strd r2, [r0], #8 269 strd r2, [r0], #8 270 strd r2, [r0], #8 /* 96 */ 271 strd r2, [r0], #8 272 strd r2, [r0], #8 273 strd r2, [r0], #8 274 strd r2, [r0], #8 /* 128 */ 275 strd r2, [r0], #8 276 strd r2, [r0], #8 277 strd r2, [r0], #8 278 subs r1, r1, #128 279 bne 1b 280 RET 281END(bzero_page) 282#endif /* _ARM_ARCH_5E */ 283