bcopy.S revision 1.4
1/* $NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $ */ 2 3/* 4 * Mach Operating System 5 * Copyright (c) 1993 Carnegie Mellon University 6 * All Rights Reserved. 7 * 8 * Permission to use, copy, modify and distribute this software and its 9 * documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie Mellon 26 * the rights to redistribute these changes. 27 */ 28 29/* 30 * File: mips_bcopy.s 31 * Author: Chris Maeda 32 * Date: June 1993 33 * 34 * Fast copy routine. Derived from aligned_block_copy. 35 */ 36 37 38#include <mips/asm.h> 39#ifndef _LOCORE 40#define _LOCORE /* XXX not really, just assembly-code source */ 41#endif 42#include <machine/endian.h> 43 44 45#if defined(LIBC_SCCS) && !defined(lint) 46#if 0 47 RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") 48#else 49 RCSID("$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $") 50#endif 51#endif /* LIBC_SCCS and not lint */ 52 53/* 54 * bcopy(caddr_t src, caddr_t dst, unsigned int len) 55 * 56 * a0 src address 57 * a1 dst address 58 * a2 length 59 */ 60 61#if defined(MEMCOPY) || defined(MEMMOVE) 62#ifdef MEMCOPY 63#define FUNCTION memcpy 64#else 65#define FUNCTION memmove 66#endif 67#define SRCREG a1 68#define DSTREG a0 69#else 70#define FUNCTION bcopy 71#define SRCREG a0 72#define DSTREG a1 73#endif 74 75#define SIZEREG a2 76 77LEAF(FUNCTION) 78 .set noat 79 .set noreorder 80 81#if defined(MEMCOPY) || defined(MEMMOVE) 82 /* set up return value, while we still can */ 83 move v0,DSTREG 84#endif 85 /* 86 * Make sure we can copy forwards. 87 */ 88 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG 89 bne t0,zero,6f # copy backwards 90 91 /* 92 * There are four alignment cases (with frequency) 93 * (Based on measurements taken with a DECstation 5000/200 94 * inside a Mach kernel.) 95 * 96 * aligned -> aligned (mostly) 97 * unaligned -> aligned (sometimes) 98 * aligned,unaligned -> unaligned (almost never) 99 * 100 * Note that we could add another case that checks if 101 * the destination and source are unaligned but the 102 * copy is alignable. eg if src and dest are both 103 * on a halfword boundary. 104 */ 105 andi t1,DSTREG,(SZREG-1) # get last bits of dest 106 bne t1,zero,3f # dest unaligned 107 andi t0,SRCREG,(SZREG-1) # get last bits of src 108 bne t0,zero,5f 109 110 /* 111 * Forward aligned->aligned copy, 8 words at a time. 112 */ 11398: 114 li AT,-(SZREG*8) 115 and t0,SIZEREG,AT # count truncated to multiples 116 PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr 117 sltu AT,SRCREG,a3 # any work to do? 118 beq AT,zero,2f 119 PTR_SUBU SIZEREG,t0 120 121 /* 122 * loop body 123 */ 1241: # cp 125 REG_L t3,(0*SZREG)(SRCREG) 126 REG_L v1,(1*SZREG)(SRCREG) 127 REG_L t0,(2*SZREG)(SRCREG) 128 REG_L t1,(3*SZREG)(SRCREG) 129 PTR_ADDU SRCREG,SZREG*8 130 REG_S t3,(0*SZREG)(DSTREG) 131 REG_S v1,(1*SZREG)(DSTREG) 132 REG_S t0,(2*SZREG)(DSTREG) 133 REG_S t1,(3*SZREG)(DSTREG) 134 REG_L t1,(-1*SZREG)(SRCREG) 135 REG_L t0,(-2*SZREG)(SRCREG) 136 REG_L v1,(-3*SZREG)(SRCREG) 137 REG_L t3,(-4*SZREG)(SRCREG) 138 PTR_ADDU DSTREG,SZREG*8 139 REG_S t1,(-1*SZREG)(DSTREG) 140 REG_S t0,(-2*SZREG)(DSTREG) 141 REG_S v1,(-3*SZREG)(DSTREG) 142 bne SRCREG,a3,1b 143 REG_S t3,(-4*SZREG)(DSTREG) 144 145 /* 146 * Copy a word at a time, no loop unrolling. 147 */ 1482: # wordcopy 149 andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG 150 PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG 151 beq t2,zero,3f 152 PTR_ADDU t0,SRCREG,t2 # stop at t0 153 PTR_SUBU SIZEREG,SIZEREG,t2 1541: 155 REG_L t3,0(SRCREG) 156 PTR_ADDU SRCREG,SZREG 157 REG_S t3,0(DSTREG) 158 bne SRCREG,t0,1b 159 PTR_ADDU DSTREG,SZREG 160 1613: # bytecopy 162 beq SIZEREG,zero,4f # nothing left to do? 163 nop 1641: 165 lb t3,0(SRCREG) 166 PTR_ADDU SRCREG,1 167 sb t3,0(DSTREG) 168 PTR_SUBU SIZEREG,1 169 bgtz SIZEREG,1b 170 PTR_ADDU DSTREG,1 171 1724: # copydone 173 .set at #-mfix-loongson2f-btb 174 j ra 175 nop 176 .set noat 177 178 /* 179 * Copy from unaligned source to aligned dest. 180 */ 1815: # destaligned 182 andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG 183 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 184 beq a3,zero,3b 185 nop 186 move SIZEREG,t0 # this many to do after we are done 187 PTR_ADDU a3,SRCREG,a3 # stop point 188 1891: 190 REG_LHI t3,0(SRCREG) 191 REG_LLO t3,SZREG-1(SRCREG) 192 PTR_ADDI SRCREG,SZREG 193 REG_S t3,0(DSTREG) 194 bne SRCREG,a3,1b 195 PTR_ADDI DSTREG,SZREG 196 197 b 3b 198 nop 199 2006: # backcopy -- based on above 201 PTR_ADDU SRCREG,SIZEREG 202 PTR_ADDU DSTREG,SIZEREG 203 andi t1,DSTREG,SZREG-1 # get last 3 bits of dest 204 bne t1,zero,3f 205 andi t0,SRCREG,SZREG-1 # get last 3 bits of src 206 bne t0,zero,5f 207 208 /* 209 * Forward aligned->aligned copy, 8*4 bytes at a time. 210 */ 211 li AT,(-8*SZREG) 212 and t0,SIZEREG,AT # count truncated to multiple of 32 213 beq t0,zero,2f # any work to do? 214 PTR_SUBU SIZEREG,t0 215 PTR_SUBU a3,SRCREG,t0 216 217 /* 218 * loop body 219 */ 2201: # cp 221 REG_L t3,(-4*SZREG)(SRCREG) 222 REG_L v1,(-3*SZREG)(SRCREG) 223 REG_L t0,(-2*SZREG)(SRCREG) 224 REG_L t1,(-1*SZREG)(SRCREG) 225 PTR_SUBU SRCREG,8*SZREG 226 REG_S t3,(-4*SZREG)(DSTREG) 227 REG_S v1,(-3*SZREG)(DSTREG) 228 REG_S t0,(-2*SZREG)(DSTREG) 229 REG_S t1,(-1*SZREG)(DSTREG) 230 REG_L t1,(3*SZREG)(SRCREG) 231 REG_L t0,(2*SZREG)(SRCREG) 232 REG_L v1,(1*SZREG)(SRCREG) 233 REG_L t3,(0*SZREG)(SRCREG) 234 PTR_SUBU DSTREG,8*SZREG 235 REG_S t1,(3*SZREG)(DSTREG) 236 REG_S t0,(2*SZREG)(DSTREG) 237 REG_S v1,(1*SZREG)(DSTREG) 238 bne SRCREG,a3,1b 239 REG_S t3,(0*SZREG)(DSTREG) 240 241 /* 242 * Copy a word at a time, no loop unrolling. 243 */ 2442: # wordcopy 245 andi t2,SIZEREG,SZREG-1 # get byte count / 4 246 PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy 247 beq t2,zero,3f 248 PTR_SUBU t0,SRCREG,t2 # stop at t0 249 PTR_SUBU SIZEREG,SIZEREG,t2 2501: 251 REG_L t3,-SZREG(SRCREG) 252 PTR_SUBU SRCREG,SZREG 253 REG_S t3,-SZREG(DSTREG) 254 bne SRCREG,t0,1b 255 PTR_SUBU DSTREG,SZREG 256 2573: # bytecopy 258 beq SIZEREG,zero,4f # nothing left to do? 259 nop 2601: 261 lb t3,-1(SRCREG) 262 PTR_SUBU SRCREG,1 263 sb t3,-1(DSTREG) 264 PTR_SUBU SIZEREG,1 265 bgtz SIZEREG,1b 266 PTR_SUBU DSTREG,1 267 2684: # copydone 269 .set at #-mfix-loongson2f-btb 270 j ra 271 nop 272 .set noat 273 274 /* 275 * Copy from unaligned source to aligned dest. 276 */ 2775: # destaligned 278 andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4 279 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 280 beq a3,zero,3b 281 nop 282 move SIZEREG,t0 # this many to do after we are done 283 PTR_SUBU a3,SRCREG,a3 # stop point 284 2851: 286 REG_LHI t3,-SZREG(SRCREG) 287 REG_LLO t3,-1(SRCREG) 288 PTR_SUBU SRCREG,SZREG 289 REG_S t3,-SZREG(DSTREG) 290 bne SRCREG,a3,1b 291 PTR_SUBU DSTREG,SZREG 292 293 b 3b 294 nop 295 296 .set reorder 297 .set at 298 END(FUNCTION) 299