1209231Sjchandra/* $NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $ */ 2178580Simp 3178580Simp/* 4178580Simp * Mach Operating System 5178580Simp * Copyright (c) 1993 Carnegie Mellon University 6178580Simp * All Rights Reserved. 7178580Simp * 8178580Simp * Permission to use, copy, modify and distribute this software and its 9178580Simp * documentation is hereby granted, provided that both the copyright 10178580Simp * notice and this permission notice appear in all copies of the 11178580Simp * software, derivative works or modified versions, and any portions 12178580Simp * thereof, and that both notices appear in supporting documentation. 13178580Simp * 14178580Simp * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15178580Simp * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16178580Simp * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17178580Simp * 18178580Simp * Carnegie Mellon requests users of this software to return to 19178580Simp * 20178580Simp * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21178580Simp * School of Computer Science 22178580Simp * Carnegie Mellon University 23178580Simp * Pittsburgh PA 15213-3890 24178580Simp * 25178580Simp * any improvements or extensions that they make and grant Carnegie Mellon 26178580Simp * the rights to redistribute these changes. 27178580Simp */ 28178580Simp 29178580Simp/* 30178580Simp * File: mips_bcopy.s 31178580Simp * Author: Chris Maeda 32178580Simp * Date: June 1993 33178580Simp * 34178580Simp * Fast copy routine. Derived from aligned_block_copy. 35178580Simp */ 36178580Simp 37178580Simp 38178580Simp#include <machine/asm.h> 39178580Simp__FBSDID("$FreeBSD$"); 40178580Simp 41209231Sjchandra#define _LOCORE /* XXX not really, just assembly-code source */ 42209231Sjchandra#include <machine/endian.h> 43209231Sjchandra 44178580Simp#if defined(LIBC_SCCS) && !defined(lint) 45209231Sjchandra#if 0 46178580Simp ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") 47209231Sjchandra#else 48209231Sjchandra ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $") 49209231Sjchandra#endif 50178580Simp#endif /* LIBC_SCCS and not lint */ 51178580Simp 52178580Simp#ifdef __ABICALLS__ 53178580Simp .abicalls 54178580Simp#endif 55178580Simp 56178580Simp/* 57178580Simp * bcopy(caddr_t src, caddr_t dst, unsigned int len) 58178580Simp * 59178580Simp * a0 src address 60178580Simp * a1 dst address 61178580Simp * a2 length 62178580Simp */ 63178580Simp 64178580Simp#if defined(MEMCOPY) || defined(MEMMOVE) 65178580Simp#ifdef MEMCOPY 66178580Simp#define FUNCTION memcpy 67178580Simp#else 68178580Simp#define FUNCTION memmove 69178580Simp#endif 70178580Simp#define SRCREG a1 71178580Simp#define DSTREG a0 72178580Simp#else 73178580Simp#define FUNCTION bcopy 74178580Simp#define SRCREG a0 75178580Simp#define DSTREG a1 76178580Simp#endif 77178580Simp 78178580Simp#define SIZEREG a2 79178580Simp 80178580SimpLEAF(FUNCTION) 81178580Simp .set noat 82178580Simp .set noreorder 83178580Simp 84178580Simp#if defined(MEMCOPY) || defined(MEMMOVE) 85178580Simp /* set up return value, while we still can */ 86178580Simp move v0,DSTREG 87178580Simp#endif 88178580Simp /* 89178580Simp * Make sure we can copy forwards. 90178580Simp */ 91178580Simp sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG 92178580Simp bne t0,zero,6f # copy backwards 93178580Simp 94178580Simp /* 95178580Simp * There are four alignment cases (with frequency) 96178580Simp * (Based on measurements taken with a DECstation 5000/200 97178580Simp * inside a Mach kernel.) 98178580Simp * 99178580Simp * aligned -> aligned (mostly) 100178580Simp * unaligned -> aligned (sometimes) 101178580Simp * aligned,unaligned -> unaligned (almost never) 102178580Simp * 103178580Simp * Note that we could add another case that checks if 104178580Simp * the destination and source are unaligned but the 105178580Simp * copy is alignable. eg if src and dest are both 106178580Simp * on a halfword boundary. 107178580Simp */ 108209231Sjchandra andi t1,DSTREG,(SZREG-1) # get last bits of dest 109209231Sjchandra bne t1,zero,3f # dest unaligned 110209231Sjchandra andi t0,SRCREG,(SZREG-1) # get last bits of src 111209231Sjchandra bne t0,zero,5f 112178580Simp 113178580Simp /* 114209231Sjchandra * Forward aligned->aligned copy, 8 words at a time. 115178580Simp */ 116209231Sjchandra98: 117209231Sjchandra li AT,-(SZREG*8) 118209231Sjchandra and t0,SIZEREG,AT # count truncated to multiples 119209231Sjchandra PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr 120209231Sjchandra sltu AT,SRCREG,a3 # any work to do? 121209231Sjchandra beq AT,zero,2f 122209231Sjchandra PTR_SUBU SIZEREG,t0 123178580Simp 124178580Simp /* 125178580Simp * loop body 126178580Simp */ 127178580Simp1: # cp 128209231Sjchandra REG_L t3,(0*SZREG)(SRCREG) 129209231Sjchandra REG_L v1,(1*SZREG)(SRCREG) 130209231Sjchandra REG_L t0,(2*SZREG)(SRCREG) 131209231Sjchandra REG_L t1,(3*SZREG)(SRCREG) 132209231Sjchandra PTR_ADDU SRCREG,SZREG*8 133209231Sjchandra REG_S t3,(0*SZREG)(DSTREG) 134209231Sjchandra REG_S v1,(1*SZREG)(DSTREG) 135209231Sjchandra REG_S t0,(2*SZREG)(DSTREG) 136209231Sjchandra REG_S t1,(3*SZREG)(DSTREG) 137209231Sjchandra REG_L t1,(-1*SZREG)(SRCREG) 138209231Sjchandra REG_L t0,(-2*SZREG)(SRCREG) 139209231Sjchandra REG_L v1,(-3*SZREG)(SRCREG) 140209231Sjchandra REG_L t3,(-4*SZREG)(SRCREG) 141209231Sjchandra PTR_ADDU DSTREG,SZREG*8 142209231Sjchandra REG_S t1,(-1*SZREG)(DSTREG) 143209231Sjchandra REG_S t0,(-2*SZREG)(DSTREG) 144209231Sjchandra REG_S v1,(-3*SZREG)(DSTREG) 145209231Sjchandra bne SRCREG,a3,1b 146209231Sjchandra REG_S t3,(-4*SZREG)(DSTREG) 147178580Simp 148178580Simp /* 149178580Simp * Copy a word at a time, no loop unrolling. 150178580Simp */ 151178580Simp2: # wordcopy 152209231Sjchandra andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG 153209231Sjchandra PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG 154209231Sjchandra beq t2,zero,3f 155209231Sjchandra PTR_ADDU t0,SRCREG,t2 # stop at t0 156209231Sjchandra PTR_SUBU SIZEREG,SIZEREG,t2 157178580Simp1: 158209231Sjchandra REG_L t3,0(SRCREG) 159209231Sjchandra PTR_ADDU SRCREG,SZREG 160209231Sjchandra REG_S t3,0(DSTREG) 161209231Sjchandra bne SRCREG,t0,1b 162209231Sjchandra PTR_ADDU DSTREG,SZREG 163178580Simp 164178580Simp3: # bytecopy 165209231Sjchandra beq SIZEREG,zero,4f # nothing left to do? 166178580Simp nop 167178580Simp1: 168209231Sjchandra lb t3,0(SRCREG) 169209231Sjchandra PTR_ADDU SRCREG,1 170209231Sjchandra sb t3,0(DSTREG) 171209231Sjchandra PTR_SUBU SIZEREG,1 172209231Sjchandra bgtz SIZEREG,1b 173209231Sjchandra PTR_ADDU DSTREG,1 174178580Simp 175178580Simp4: # copydone 176178580Simp j ra 177178580Simp nop 178178580Simp 179178580Simp /* 180178580Simp * Copy from unaligned source to aligned dest. 181178580Simp */ 182178580Simp5: # destaligned 183209231Sjchandra andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG 184209231Sjchandra PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 185209231Sjchandra beq a3,zero,3b 186178580Simp nop 187209231Sjchandra move SIZEREG,t0 # this many to do after we are done 188209231Sjchandra PTR_ADDU a3,SRCREG,a3 # stop point 189178580Simp 190178580Simp1: 191209231Sjchandra REG_LHI t3,0(SRCREG) 192209231Sjchandra REG_LLO t3,SZREG-1(SRCREG) 193209231Sjchandra PTR_ADDI SRCREG,SZREG 194209231Sjchandra REG_S t3,0(DSTREG) 195209231Sjchandra bne SRCREG,a3,1b 196209231Sjchandra PTR_ADDI DSTREG,SZREG 197178580Simp 198209231Sjchandra b 3b 199178580Simp nop 200178580Simp 201178580Simp6: # backcopy -- based on above 202209231Sjchandra PTR_ADDU SRCREG,SIZEREG 203209231Sjchandra PTR_ADDU DSTREG,SIZEREG 204209231Sjchandra andi t1,DSTREG,SZREG-1 # get last 3 bits of dest 205209231Sjchandra bne t1,zero,3f 206209231Sjchandra andi t0,SRCREG,SZREG-1 # get last 3 bits of src 207209231Sjchandra bne t0,zero,5f 208178580Simp 209178580Simp /* 210178580Simp * Forward aligned->aligned copy, 8*4 bytes at a time. 211178580Simp */ 212209231Sjchandra li AT,(-8*SZREG) 213209231Sjchandra and t0,SIZEREG,AT # count truncated to multiple of 32 214209231Sjchandra beq t0,zero,2f # any work to do? 215209231Sjchandra PTR_SUBU SIZEREG,t0 216209231Sjchandra PTR_SUBU a3,SRCREG,t0 217178580Simp 218178580Simp /* 219178580Simp * loop body 220178580Simp */ 221178580Simp1: # cp 222209231Sjchandra REG_L t3,(-4*SZREG)(SRCREG) 223209231Sjchandra REG_L v1,(-3*SZREG)(SRCREG) 224209231Sjchandra REG_L t0,(-2*SZREG)(SRCREG) 225209231Sjchandra REG_L t1,(-1*SZREG)(SRCREG) 226209231Sjchandra PTR_SUBU SRCREG,8*SZREG 227209231Sjchandra REG_S t3,(-4*SZREG)(DSTREG) 228209231Sjchandra REG_S v1,(-3*SZREG)(DSTREG) 229209231Sjchandra REG_S t0,(-2*SZREG)(DSTREG) 230209231Sjchandra REG_S t1,(-1*SZREG)(DSTREG) 231209231Sjchandra REG_L t1,(3*SZREG)(SRCREG) 232209231Sjchandra REG_L t0,(2*SZREG)(SRCREG) 233209231Sjchandra REG_L v1,(1*SZREG)(SRCREG) 234209231Sjchandra REG_L t3,(0*SZREG)(SRCREG) 235209231Sjchandra PTR_SUBU DSTREG,8*SZREG 236209231Sjchandra REG_S t1,(3*SZREG)(DSTREG) 237209231Sjchandra REG_S t0,(2*SZREG)(DSTREG) 238209231Sjchandra REG_S v1,(1*SZREG)(DSTREG) 239209231Sjchandra bne SRCREG,a3,1b 240209231Sjchandra REG_S t3,(0*SZREG)(DSTREG) 241178580Simp 242178580Simp /* 243178580Simp * Copy a word at a time, no loop unrolling. 244178580Simp */ 245178580Simp2: # wordcopy 246209231Sjchandra andi t2,SIZEREG,SZREG-1 # get byte count / 4 247209231Sjchandra PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy 248209231Sjchandra beq t2,zero,3f 249209231Sjchandra PTR_SUBU t0,SRCREG,t2 # stop at t0 250209231Sjchandra PTR_SUBU SIZEREG,SIZEREG,t2 251178580Simp1: 252209231Sjchandra REG_L t3,-SZREG(SRCREG) 253209231Sjchandra PTR_SUBU SRCREG,SZREG 254209231Sjchandra REG_S t3,-SZREG(DSTREG) 255209231Sjchandra bne SRCREG,t0,1b 256209231Sjchandra PTR_SUBU DSTREG,SZREG 257178580Simp 258178580Simp3: # bytecopy 259209231Sjchandra beq SIZEREG,zero,4f # nothing left to do? 260178580Simp nop 261178580Simp1: 262209231Sjchandra lb t3,-1(SRCREG) 263209231Sjchandra PTR_SUBU SRCREG,1 264209231Sjchandra sb t3,-1(DSTREG) 265209231Sjchandra PTR_SUBU SIZEREG,1 266209231Sjchandra bgtz SIZEREG,1b 267209231Sjchandra PTR_SUBU DSTREG,1 268178580Simp 269178580Simp4: # copydone 270178580Simp j ra 271178580Simp nop 272178580Simp 273178580Simp /* 274178580Simp * Copy from unaligned source to aligned dest. 275178580Simp */ 276178580Simp5: # destaligned 277209231Sjchandra andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4 278209231Sjchandra PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 279209231Sjchandra beq a3,zero,3b 280178580Simp nop 281209231Sjchandra move SIZEREG,t0 # this many to do after we are done 282209231Sjchandra PTR_SUBU a3,SRCREG,a3 # stop point 283178580Simp 284178580Simp1: 285209231Sjchandra REG_LHI t3,-SZREG(SRCREG) 286209231Sjchandra REG_LLO t3,-1(SRCREG) 287209231Sjchandra PTR_SUBU SRCREG,SZREG 288209231Sjchandra REG_S t3,-SZREG(DSTREG) 289209231Sjchandra bne SRCREG,a3,1b 290209231Sjchandra PTR_SUBU DSTREG,SZREG 291178580Simp 292209231Sjchandra b 3b 293178580Simp nop 294178580Simp 295178580Simp .set reorder 296178580Simp .set at 297178580Simp END(FUNCTION) 298