1/* $NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $ */ 2 3/* 4 * Mach Operating System 5 * Copyright (c) 1993 Carnegie Mellon University 6 * All Rights Reserved. 7 * 8 * Permission to use, copy, modify and distribute this software and its 9 * documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie Mellon 26 * the rights to redistribute these changes. 27 */ 28 29/* 30 * File: mips_bcopy.s 31 * Author: Chris Maeda 32 * Date: June 1993 33 * 34 * Fast copy routine. Derived from aligned_block_copy. 35 */ 36 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD$"); 40 41#include <machine/endian.h> 42 43#if defined(LIBC_SCCS) && !defined(lint) 44#if 0 45 ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") 46#else 47 ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $") 48#endif 49#endif /* LIBC_SCCS and not lint */ 50 51#ifdef __ABICALLS__ 52 .abicalls 53#endif 54 55/* 56 * bcopy(caddr_t src, caddr_t dst, unsigned int len) 57 * 58 * a0 src address 59 * a1 dst address 60 * a2 length 61 */ 62 63#define SRCREG a0 64#define DSTREG a1 65#define SIZEREG a2 66 67LEAF(memcpy) 68 .set noat 69 .set noreorder 70 71 move v0, a0 72 move a0, a1 73 move a1, v0 74 75ALEAF(bcopy) 76ALEAF(ovbcopy) 77 /* 78 * Make sure we can copy forwards. 79 */ 80 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG 81 bne t0,zero,6f # copy backwards 82 83 /* 84 * There are four alignment cases (with frequency) 85 * (Based on measurements taken with a DECstation 5000/200 86 * inside a Mach kernel.) 87 * 88 * aligned -> aligned (mostly) 89 * unaligned -> aligned (sometimes) 90 * aligned,unaligned -> unaligned (almost never) 91 * 92 * Note that we could add another case that checks if 93 * the destination and source are unaligned but the 94 * copy is alignable. eg if src and dest are both 95 * on a halfword boundary. 96 */ 97 andi t1,DSTREG,(SZREG-1) # get last bits of dest 98 bne t1,zero,3f # dest unaligned 99 andi t0,SRCREG,(SZREG-1) # get last bits of src 100 bne t0,zero,5f 101 102 /* 103 * Forward aligned->aligned copy, 8 words at a time. 104 */ 10598: 106 li AT,-(SZREG*8) 107 and t0,SIZEREG,AT # count truncated to multiples 108 PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr 109 sltu AT,SRCREG,a3 # any work to do? 110 beq AT,zero,2f 111 PTR_SUBU SIZEREG,t0 112 113 /* 114 * loop body 115 */ 1161: # cp 117 REG_L t3,(0*SZREG)(SRCREG) 118 REG_L v1,(1*SZREG)(SRCREG) 119 REG_L t0,(2*SZREG)(SRCREG) 120 REG_L t1,(3*SZREG)(SRCREG) 121 PTR_ADDU SRCREG,SZREG*8 122 REG_S t3,(0*SZREG)(DSTREG) 123 REG_S v1,(1*SZREG)(DSTREG) 124 REG_S t0,(2*SZREG)(DSTREG) 125 REG_S t1,(3*SZREG)(DSTREG) 126 REG_L t1,(-1*SZREG)(SRCREG) 127 REG_L t0,(-2*SZREG)(SRCREG) 128 REG_L v1,(-3*SZREG)(SRCREG) 129 REG_L t3,(-4*SZREG)(SRCREG) 130 PTR_ADDU DSTREG,SZREG*8 131 REG_S t1,(-1*SZREG)(DSTREG) 132 REG_S t0,(-2*SZREG)(DSTREG) 133 REG_S v1,(-3*SZREG)(DSTREG) 134 bne SRCREG,a3,1b 135 REG_S t3,(-4*SZREG)(DSTREG) 136 137 /* 138 * Copy a word at a time, no loop unrolling. 139 */ 1402: # wordcopy 141 andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG 142 PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG 143 beq t2,zero,3f 144 PTR_ADDU t0,SRCREG,t2 # stop at t0 145 PTR_SUBU SIZEREG,SIZEREG,t2 1461: 147 REG_L t3,0(SRCREG) 148 PTR_ADDU SRCREG,SZREG 149 REG_S t3,0(DSTREG) 150 bne SRCREG,t0,1b 151 PTR_ADDU DSTREG,SZREG 152 1533: # bytecopy 154 beq SIZEREG,zero,4f # nothing left to do? 155 nop 1561: 157 lb t3,0(SRCREG) 158 PTR_ADDU SRCREG,1 159 sb t3,0(DSTREG) 160 PTR_SUBU SIZEREG,1 161 bgtz SIZEREG,1b 162 PTR_ADDU DSTREG,1 163 1644: # copydone 165 j ra 166 nop 167 168 /* 169 * Copy from unaligned source to aligned dest. 170 */ 1715: # destaligned 172 andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG 173 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 174 beq a3,zero,3b 175 nop 176 move SIZEREG,t0 # this many to do after we are done 177 PTR_ADDU a3,SRCREG,a3 # stop point 178 1791: 180 REG_LHI t3,0(SRCREG) 181 REG_LLO t3,SZREG-1(SRCREG) 182 PTR_ADDI SRCREG,SZREG 183 REG_S t3,0(DSTREG) 184 bne SRCREG,a3,1b 185 PTR_ADDI DSTREG,SZREG 186 187 b 3b 188 nop 189 1906: # backcopy -- based on above 191 PTR_ADDU SRCREG,SIZEREG 192 PTR_ADDU DSTREG,SIZEREG 193 andi t1,DSTREG,SZREG-1 # get last 3 bits of dest 194 bne t1,zero,3f 195 andi t0,SRCREG,SZREG-1 # get last 3 bits of src 196 bne t0,zero,5f 197 198 /* 199 * Forward aligned->aligned copy, 8*4 bytes at a time. 200 */ 201 li AT,(-8*SZREG) 202 and t0,SIZEREG,AT # count truncated to multiple of 32 203 beq t0,zero,2f # any work to do? 204 PTR_SUBU SIZEREG,t0 205 PTR_SUBU a3,SRCREG,t0 206 207 /* 208 * loop body 209 */ 2101: # cp 211 REG_L t3,(-4*SZREG)(SRCREG) 212 REG_L v1,(-3*SZREG)(SRCREG) 213 REG_L t0,(-2*SZREG)(SRCREG) 214 REG_L t1,(-1*SZREG)(SRCREG) 215 PTR_SUBU SRCREG,8*SZREG 216 REG_S t3,(-4*SZREG)(DSTREG) 217 REG_S v1,(-3*SZREG)(DSTREG) 218 REG_S t0,(-2*SZREG)(DSTREG) 219 REG_S t1,(-1*SZREG)(DSTREG) 220 REG_L t1,(3*SZREG)(SRCREG) 221 REG_L t0,(2*SZREG)(SRCREG) 222 REG_L v1,(1*SZREG)(SRCREG) 223 REG_L t3,(0*SZREG)(SRCREG) 224 PTR_SUBU DSTREG,8*SZREG 225 REG_S t1,(3*SZREG)(DSTREG) 226 REG_S t0,(2*SZREG)(DSTREG) 227 REG_S v1,(1*SZREG)(DSTREG) 228 bne SRCREG,a3,1b 229 REG_S t3,(0*SZREG)(DSTREG) 230 231 /* 232 * Copy a word at a time, no loop unrolling. 233 */ 2342: # wordcopy 235 andi t2,SIZEREG,SZREG-1 # get byte count / 4 236 PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy 237 beq t2,zero,3f 238 PTR_SUBU t0,SRCREG,t2 # stop at t0 239 PTR_SUBU SIZEREG,SIZEREG,t2 2401: 241 REG_L t3,-SZREG(SRCREG) 242 PTR_SUBU SRCREG,SZREG 243 REG_S t3,-SZREG(DSTREG) 244 bne SRCREG,t0,1b 245 PTR_SUBU DSTREG,SZREG 246 2473: # bytecopy 248 beq SIZEREG,zero,4f # nothing left to do? 249 nop 2501: 251 lb t3,-1(SRCREG) 252 PTR_SUBU SRCREG,1 253 sb t3,-1(DSTREG) 254 PTR_SUBU SIZEREG,1 255 bgtz SIZEREG,1b 256 PTR_SUBU DSTREG,1 257 2584: # copydone 259 j ra 260 nop 261 262 /* 263 * Copy from unaligned source to aligned dest. 264 */ 2655: # destaligned 266 andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4 267 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 268 beq a3,zero,3b 269 nop 270 move SIZEREG,t0 # this many to do after we are done 271 PTR_SUBU a3,SRCREG,a3 # stop point 272 2731: 274 REG_LHI t3,-SZREG(SRCREG) 275 REG_LLO t3,-1(SRCREG) 276 PTR_SUBU SRCREG,SZREG 277 REG_S t3,-SZREG(DSTREG) 278 bne SRCREG,a3,1b 279 PTR_SUBU DSTREG,SZREG 280 281 b 3b 282 nop 283 284 .set reorder 285 .set at 286END(memcpy) 287