1/* $NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $ */ 2 3/* 4 * Mach Operating System 5 * Copyright (c) 1993 Carnegie Mellon University 6 * All Rights Reserved. 7 * 8 * Permission to use, copy, modify and distribute this software and its 9 * documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie Mellon 26 * the rights to redistribute these changes. 27 */ 28 29/* 30 * File: mips_bcopy.s 31 * Author: Chris Maeda 32 * Date: June 1993 33 * 34 * Fast copy routine. Derived from aligned_block_copy. 35 */ 36 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD: releng/10.2/lib/libc/mips/string/bcopy.S 209231 2010-06-16 12:55:14Z jchandra $"); 40 41#define _LOCORE /* XXX not really, just assembly-code source */ 42#include <machine/endian.h> 43 44#if defined(LIBC_SCCS) && !defined(lint) 45#if 0 46 ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") 47#else 48 ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $") 49#endif 50#endif /* LIBC_SCCS and not lint */ 51 52#ifdef __ABICALLS__ 53 .abicalls 54#endif 55 56/* 57 * bcopy(caddr_t src, caddr_t dst, unsigned int len) 58 * 59 * a0 src address 60 * a1 dst address 61 * a2 length 62 */ 63 64#if defined(MEMCOPY) || defined(MEMMOVE) 65#ifdef MEMCOPY 66#define FUNCTION memcpy 67#else 68#define FUNCTION memmove 69#endif 70#define SRCREG a1 71#define DSTREG a0 72#else 73#define FUNCTION bcopy 74#define SRCREG a0 75#define DSTREG a1 76#endif 77 78#define SIZEREG a2 79 80LEAF(FUNCTION) 81 .set noat 82 .set noreorder 83 84#if defined(MEMCOPY) || defined(MEMMOVE) 85 /* set up return value, while we still can */ 86 move v0,DSTREG 87#endif 88 /* 89 * Make sure we can copy forwards. 90 */ 91 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG 92 bne t0,zero,6f # copy backwards 93 94 /* 95 * There are four alignment cases (with frequency) 96 * (Based on measurements taken with a DECstation 5000/200 97 * inside a Mach kernel.) 98 * 99 * aligned -> aligned (mostly) 100 * unaligned -> aligned (sometimes) 101 * aligned,unaligned -> unaligned (almost never) 102 * 103 * Note that we could add another case that checks if 104 * the destination and source are unaligned but the 105 * copy is alignable. eg if src and dest are both 106 * on a halfword boundary. 107 */ 108 andi t1,DSTREG,(SZREG-1) # get last bits of dest 109 bne t1,zero,3f # dest unaligned 110 andi t0,SRCREG,(SZREG-1) # get last bits of src 111 bne t0,zero,5f 112 113 /* 114 * Forward aligned->aligned copy, 8 words at a time. 115 */ 11698: 117 li AT,-(SZREG*8) 118 and t0,SIZEREG,AT # count truncated to multiples 119 PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr 120 sltu AT,SRCREG,a3 # any work to do? 121 beq AT,zero,2f 122 PTR_SUBU SIZEREG,t0 123 124 /* 125 * loop body 126 */ 1271: # cp 128 REG_L t3,(0*SZREG)(SRCREG) 129 REG_L v1,(1*SZREG)(SRCREG) 130 REG_L t0,(2*SZREG)(SRCREG) 131 REG_L t1,(3*SZREG)(SRCREG) 132 PTR_ADDU SRCREG,SZREG*8 133 REG_S t3,(0*SZREG)(DSTREG) 134 REG_S v1,(1*SZREG)(DSTREG) 135 REG_S t0,(2*SZREG)(DSTREG) 136 REG_S t1,(3*SZREG)(DSTREG) 137 REG_L t1,(-1*SZREG)(SRCREG) 138 REG_L t0,(-2*SZREG)(SRCREG) 139 REG_L v1,(-3*SZREG)(SRCREG) 140 REG_L t3,(-4*SZREG)(SRCREG) 141 PTR_ADDU DSTREG,SZREG*8 142 REG_S t1,(-1*SZREG)(DSTREG) 143 REG_S t0,(-2*SZREG)(DSTREG) 144 REG_S v1,(-3*SZREG)(DSTREG) 145 bne SRCREG,a3,1b 146 REG_S t3,(-4*SZREG)(DSTREG) 147 148 /* 149 * Copy a word at a time, no loop unrolling. 150 */ 1512: # wordcopy 152 andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG 153 PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG 154 beq t2,zero,3f 155 PTR_ADDU t0,SRCREG,t2 # stop at t0 156 PTR_SUBU SIZEREG,SIZEREG,t2 1571: 158 REG_L t3,0(SRCREG) 159 PTR_ADDU SRCREG,SZREG 160 REG_S t3,0(DSTREG) 161 bne SRCREG,t0,1b 162 PTR_ADDU DSTREG,SZREG 163 1643: # bytecopy 165 beq SIZEREG,zero,4f # nothing left to do? 166 nop 1671: 168 lb t3,0(SRCREG) 169 PTR_ADDU SRCREG,1 170 sb t3,0(DSTREG) 171 PTR_SUBU SIZEREG,1 172 bgtz SIZEREG,1b 173 PTR_ADDU DSTREG,1 174 1754: # copydone 176 j ra 177 nop 178 179 /* 180 * Copy from unaligned source to aligned dest. 181 */ 1825: # destaligned 183 andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG 184 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 185 beq a3,zero,3b 186 nop 187 move SIZEREG,t0 # this many to do after we are done 188 PTR_ADDU a3,SRCREG,a3 # stop point 189 1901: 191 REG_LHI t3,0(SRCREG) 192 REG_LLO t3,SZREG-1(SRCREG) 193 PTR_ADDI SRCREG,SZREG 194 REG_S t3,0(DSTREG) 195 bne SRCREG,a3,1b 196 PTR_ADDI DSTREG,SZREG 197 198 b 3b 199 nop 200 2016: # backcopy -- based on above 202 PTR_ADDU SRCREG,SIZEREG 203 PTR_ADDU DSTREG,SIZEREG 204 andi t1,DSTREG,SZREG-1 # get last 3 bits of dest 205 bne t1,zero,3f 206 andi t0,SRCREG,SZREG-1 # get last 3 bits of src 207 bne t0,zero,5f 208 209 /* 210 * Forward aligned->aligned copy, 8*4 bytes at a time. 211 */ 212 li AT,(-8*SZREG) 213 and t0,SIZEREG,AT # count truncated to multiple of 32 214 beq t0,zero,2f # any work to do? 215 PTR_SUBU SIZEREG,t0 216 PTR_SUBU a3,SRCREG,t0 217 218 /* 219 * loop body 220 */ 2211: # cp 222 REG_L t3,(-4*SZREG)(SRCREG) 223 REG_L v1,(-3*SZREG)(SRCREG) 224 REG_L t0,(-2*SZREG)(SRCREG) 225 REG_L t1,(-1*SZREG)(SRCREG) 226 PTR_SUBU SRCREG,8*SZREG 227 REG_S t3,(-4*SZREG)(DSTREG) 228 REG_S v1,(-3*SZREG)(DSTREG) 229 REG_S t0,(-2*SZREG)(DSTREG) 230 REG_S t1,(-1*SZREG)(DSTREG) 231 REG_L t1,(3*SZREG)(SRCREG) 232 REG_L t0,(2*SZREG)(SRCREG) 233 REG_L v1,(1*SZREG)(SRCREG) 234 REG_L t3,(0*SZREG)(SRCREG) 235 PTR_SUBU DSTREG,8*SZREG 236 REG_S t1,(3*SZREG)(DSTREG) 237 REG_S t0,(2*SZREG)(DSTREG) 238 REG_S v1,(1*SZREG)(DSTREG) 239 bne SRCREG,a3,1b 240 REG_S t3,(0*SZREG)(DSTREG) 241 242 /* 243 * Copy a word at a time, no loop unrolling. 244 */ 2452: # wordcopy 246 andi t2,SIZEREG,SZREG-1 # get byte count / 4 247 PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy 248 beq t2,zero,3f 249 PTR_SUBU t0,SRCREG,t2 # stop at t0 250 PTR_SUBU SIZEREG,SIZEREG,t2 2511: 252 REG_L t3,-SZREG(SRCREG) 253 PTR_SUBU SRCREG,SZREG 254 REG_S t3,-SZREG(DSTREG) 255 bne SRCREG,t0,1b 256 PTR_SUBU DSTREG,SZREG 257 2583: # bytecopy 259 beq SIZEREG,zero,4f # nothing left to do? 260 nop 2611: 262 lb t3,-1(SRCREG) 263 PTR_SUBU SRCREG,1 264 sb t3,-1(DSTREG) 265 PTR_SUBU SIZEREG,1 266 bgtz SIZEREG,1b 267 PTR_SUBU DSTREG,1 268 2694: # copydone 270 j ra 271 nop 272 273 /* 274 * Copy from unaligned source to aligned dest. 275 */ 2765: # destaligned 277 andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4 278 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 279 beq a3,zero,3b 280 nop 281 move SIZEREG,t0 # this many to do after we are done 282 PTR_SUBU a3,SRCREG,a3 # stop point 283 2841: 285 REG_LHI t3,-SZREG(SRCREG) 286 REG_LLO t3,-1(SRCREG) 287 PTR_SUBU SRCREG,SZREG 288 REG_S t3,-SZREG(DSTREG) 289 bne SRCREG,a3,1b 290 PTR_SUBU DSTREG,SZREG 291 292 b 3b 293 nop 294 295 .set reorder 296 .set at 297 END(FUNCTION) 298