mips/string/bcopy.S

209231Sjchandra/*	$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $	*/
178580Simp
178580Simp/*
178580Simp * Mach Operating System
178580Simp * Copyright (c) 1993 Carnegie Mellon University
178580Simp * All Rights Reserved.
178580Simp *
178580Simp * Permission to use, copy, modify and distribute this software and its
178580Simp * documentation is hereby granted, provided that both the copyright
178580Simp * notice and this permission notice appear in all copies of the
178580Simp * software, derivative works or modified versions, and any portions
178580Simp * thereof, and that both notices appear in supporting documentation.
178580Simp *
178580Simp * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
178580Simp * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
178580Simp * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
178580Simp *
178580Simp * Carnegie Mellon requests users of this software to return to
178580Simp *
178580Simp *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
178580Simp *  School of Computer Science
178580Simp *  Carnegie Mellon University
178580Simp *  Pittsburgh PA 15213-3890
178580Simp *
178580Simp * any improvements or extensions that they make and grant Carnegie Mellon
178580Simp * the rights to redistribute these changes.
178580Simp */
178580Simp
178580Simp/*
178580Simp *	File:	mips_bcopy.s
178580Simp *	Author:	Chris Maeda
178580Simp *	Date:	June 1993
178580Simp *
178580Simp *	Fast copy routine.  Derived from aligned_block_copy.
178580Simp */
178580Simp
178580Simp
178580Simp#include <machine/asm.h>
178580Simp__FBSDID("$FreeBSD$");
178580Simp
209231Sjchandra#define _LOCORE		/* XXX not really, just assembly-code source */
209231Sjchandra#include <machine/endian.h>
209231Sjchandra
178580Simp#if defined(LIBC_SCCS) && !defined(lint)
209231Sjchandra#if 0
178580Simp	ASMSTR("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
209231Sjchandra#else
209231Sjchandra	ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $")
209231Sjchandra#endif
178580Simp#endif /* LIBC_SCCS and not lint */
178580Simp
178580Simp#ifdef __ABICALLS__
178580Simp	.abicalls
178580Simp#endif
178580Simp
178580Simp/*
178580Simp *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
178580Simp *
178580Simp *	a0 	src address
178580Simp *	a1	dst address
178580Simp *	a2	length
178580Simp */
178580Simp
178580Simp#if defined(MEMCOPY) || defined(MEMMOVE)
178580Simp#ifdef MEMCOPY
178580Simp#define	FUNCTION	memcpy
178580Simp#else
178580Simp#define FUNCTION	memmove
178580Simp#endif
178580Simp#define	SRCREG		a1
178580Simp#define	DSTREG		a0
178580Simp#else
178580Simp#define	FUNCTION	bcopy
178580Simp#define	SRCREG		a0
178580Simp#define	DSTREG		a1
178580Simp#endif
178580Simp
178580Simp#define	SIZEREG		a2
178580Simp
178580SimpLEAF(FUNCTION)
178580Simp	.set	noat
178580Simp	.set	noreorder
178580Simp
178580Simp#if defined(MEMCOPY) || defined(MEMMOVE)
178580Simp	/* set up return value, while we still can */
178580Simp	move	v0,DSTREG
178580Simp#endif
178580Simp	/*
178580Simp	 *	Make sure we can copy forwards.
178580Simp	 */
178580Simp	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
178580Simp	bne	t0,zero,6f		# copy backwards
178580Simp
178580Simp	/*
178580Simp	 * 	There are four alignment cases (with frequency)
178580Simp	 *	(Based on measurements taken with a DECstation 5000/200
178580Simp	 *	 inside a Mach kernel.)
178580Simp	 *
178580Simp	 * 	aligned   -> aligned		(mostly)
178580Simp	 * 	unaligned -> aligned		(sometimes)
178580Simp	 * 	aligned,unaligned -> unaligned	(almost never)
178580Simp	 *
178580Simp	 *	Note that we could add another case that checks if
178580Simp	 *	the destination and source are unaligned but the
178580Simp	 *	copy is alignable.  eg if src and dest are both
178580Simp	 *	on a halfword boundary.
178580Simp	 */
209231Sjchandra	andi		t1,DSTREG,(SZREG-1)	# get last bits of dest
209231Sjchandra	bne		t1,zero,3f		# dest unaligned
209231Sjchandra	andi		t0,SRCREG,(SZREG-1)	# get last bits of src
209231Sjchandra	bne		t0,zero,5f
178580Simp
178580Simp	/*
209231Sjchandra	 *	Forward aligned->aligned copy, 8 words at a time.
178580Simp	 */
209231Sjchandra98:
209231Sjchandra	li		AT,-(SZREG*8)
209231Sjchandra	and		t0,SIZEREG,AT		# count truncated to multiples
209231Sjchandra	PTR_ADDU	a3,SRCREG,t0		# run fast loop up to this addr
209231Sjchandra	sltu		AT,SRCREG,a3		# any work to do?
209231Sjchandra	beq		AT,zero,2f
209231Sjchandra	PTR_SUBU	SIZEREG,t0
178580Simp
178580Simp	/*
178580Simp	 *	loop body
178580Simp	 */
178580Simp1:	# cp
209231Sjchandra	REG_L		t3,(0*SZREG)(SRCREG)
209231Sjchandra	REG_L		v1,(1*SZREG)(SRCREG)
209231Sjchandra	REG_L		t0,(2*SZREG)(SRCREG)
209231Sjchandra	REG_L		t1,(3*SZREG)(SRCREG)
209231Sjchandra	PTR_ADDU	SRCREG,SZREG*8
209231Sjchandra	REG_S		t3,(0*SZREG)(DSTREG)
209231Sjchandra	REG_S		v1,(1*SZREG)(DSTREG)
209231Sjchandra	REG_S		t0,(2*SZREG)(DSTREG)
209231Sjchandra	REG_S		t1,(3*SZREG)(DSTREG)
209231Sjchandra	REG_L		t1,(-1*SZREG)(SRCREG)
209231Sjchandra	REG_L		t0,(-2*SZREG)(SRCREG)
209231Sjchandra	REG_L		v1,(-3*SZREG)(SRCREG)
209231Sjchandra	REG_L		t3,(-4*SZREG)(SRCREG)
209231Sjchandra	PTR_ADDU	DSTREG,SZREG*8
209231Sjchandra	REG_S		t1,(-1*SZREG)(DSTREG)
209231Sjchandra	REG_S		t0,(-2*SZREG)(DSTREG)
209231Sjchandra	REG_S		v1,(-3*SZREG)(DSTREG)
209231Sjchandra	bne		SRCREG,a3,1b
209231Sjchandra	REG_S		t3,(-4*SZREG)(DSTREG)
178580Simp
178580Simp	/*
178580Simp	 *	Copy a word at a time, no loop unrolling.
178580Simp	 */
178580Simp2:	# wordcopy
209231Sjchandra	andi		t2,SIZEREG,(SZREG-1)	# get byte count / SZREG
209231Sjchandra	PTR_SUBU	t2,SIZEREG,t2		# t2 = words to copy * SZREG
209231Sjchandra	beq		t2,zero,3f
209231Sjchandra	PTR_ADDU	t0,SRCREG,t2		# stop at t0
209231Sjchandra	PTR_SUBU	SIZEREG,SIZEREG,t2
178580Simp1:
209231Sjchandra	REG_L		t3,0(SRCREG)
209231Sjchandra	PTR_ADDU	SRCREG,SZREG
209231Sjchandra	REG_S		t3,0(DSTREG)
209231Sjchandra	bne		SRCREG,t0,1b
209231Sjchandra	PTR_ADDU	DSTREG,SZREG
178580Simp
178580Simp3:	# bytecopy
209231Sjchandra	beq		SIZEREG,zero,4f		# nothing left to do?
178580Simp	nop
178580Simp1:
209231Sjchandra	lb		t3,0(SRCREG)
209231Sjchandra	PTR_ADDU	SRCREG,1
209231Sjchandra	sb		t3,0(DSTREG)
209231Sjchandra	PTR_SUBU	SIZEREG,1
209231Sjchandra	bgtz		SIZEREG,1b
209231Sjchandra	PTR_ADDU	DSTREG,1
178580Simp
178580Simp4:	# copydone
178580Simp	j	ra
178580Simp	nop
178580Simp
178580Simp	/*
178580Simp	 *	Copy from unaligned source to aligned dest.
178580Simp	 */
178580Simp5:	# destaligned
209231Sjchandra	andi		t0,SIZEREG,(SZREG-1)	# t0 = bytecount mod SZREG
209231Sjchandra	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
209231Sjchandra	beq		a3,zero,3b
178580Simp	nop
209231Sjchandra	move		SIZEREG,t0		# this many to do after we are done
209231Sjchandra	PTR_ADDU	a3,SRCREG,a3		# stop point
178580Simp
178580Simp1:
209231Sjchandra	REG_LHI		t3,0(SRCREG)
209231Sjchandra	REG_LLO		t3,SZREG-1(SRCREG)
209231Sjchandra	PTR_ADDI	SRCREG,SZREG
209231Sjchandra	REG_S		t3,0(DSTREG)
209231Sjchandra	bne		SRCREG,a3,1b
209231Sjchandra	PTR_ADDI	DSTREG,SZREG
178580Simp
209231Sjchandra	b		3b
178580Simp	nop
178580Simp
178580Simp6:	# backcopy -- based on above
209231Sjchandra	PTR_ADDU	SRCREG,SIZEREG
209231Sjchandra	PTR_ADDU	DSTREG,SIZEREG
209231Sjchandra	andi		t1,DSTREG,SZREG-1	# get last 3 bits of dest
209231Sjchandra	bne		t1,zero,3f
209231Sjchandra	andi		t0,SRCREG,SZREG-1	# get last 3 bits of src
209231Sjchandra	bne		t0,zero,5f
178580Simp
178580Simp	/*
178580Simp	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
178580Simp	 */
209231Sjchandra	li		AT,(-8*SZREG)
209231Sjchandra	and		t0,SIZEREG,AT		# count truncated to multiple of 32
209231Sjchandra	beq		t0,zero,2f		# any work to do?
209231Sjchandra	PTR_SUBU	SIZEREG,t0
209231Sjchandra	PTR_SUBU	a3,SRCREG,t0
178580Simp
178580Simp	/*
178580Simp	 *	loop body
178580Simp	 */
178580Simp1:	# cp
209231Sjchandra	REG_L		t3,(-4*SZREG)(SRCREG)
209231Sjchandra	REG_L		v1,(-3*SZREG)(SRCREG)
209231Sjchandra	REG_L		t0,(-2*SZREG)(SRCREG)
209231Sjchandra	REG_L		t1,(-1*SZREG)(SRCREG)
209231Sjchandra	PTR_SUBU	SRCREG,8*SZREG
209231Sjchandra	REG_S		t3,(-4*SZREG)(DSTREG)
209231Sjchandra	REG_S		v1,(-3*SZREG)(DSTREG)
209231Sjchandra	REG_S		t0,(-2*SZREG)(DSTREG)
209231Sjchandra	REG_S		t1,(-1*SZREG)(DSTREG)
209231Sjchandra	REG_L		t1,(3*SZREG)(SRCREG)
209231Sjchandra	REG_L		t0,(2*SZREG)(SRCREG)
209231Sjchandra	REG_L		v1,(1*SZREG)(SRCREG)
209231Sjchandra	REG_L		t3,(0*SZREG)(SRCREG)
209231Sjchandra	PTR_SUBU	DSTREG,8*SZREG
209231Sjchandra	REG_S		t1,(3*SZREG)(DSTREG)
209231Sjchandra	REG_S		t0,(2*SZREG)(DSTREG)
209231Sjchandra	REG_S		v1,(1*SZREG)(DSTREG)
209231Sjchandra	bne		SRCREG,a3,1b
209231Sjchandra	REG_S		t3,(0*SZREG)(DSTREG)
178580Simp
178580Simp	/*
178580Simp	 *	Copy a word at a time, no loop unrolling.
178580Simp	 */
178580Simp2:	# wordcopy
209231Sjchandra	andi		t2,SIZEREG,SZREG-1	# get byte count / 4
209231Sjchandra	PTR_SUBU	t2,SIZEREG,t2		# t2 = number of words to copy
209231Sjchandra	beq		t2,zero,3f
209231Sjchandra	PTR_SUBU	t0,SRCREG,t2		# stop at t0
209231Sjchandra	PTR_SUBU	SIZEREG,SIZEREG,t2
178580Simp1:
209231Sjchandra	REG_L		t3,-SZREG(SRCREG)
209231Sjchandra	PTR_SUBU	SRCREG,SZREG
209231Sjchandra	REG_S		t3,-SZREG(DSTREG)
209231Sjchandra	bne		SRCREG,t0,1b
209231Sjchandra	PTR_SUBU	DSTREG,SZREG
178580Simp
178580Simp3:	# bytecopy
209231Sjchandra	beq		SIZEREG,zero,4f		# nothing left to do?
178580Simp	nop
178580Simp1:
209231Sjchandra	lb		t3,-1(SRCREG)
209231Sjchandra	PTR_SUBU	SRCREG,1
209231Sjchandra	sb		t3,-1(DSTREG)
209231Sjchandra	PTR_SUBU	SIZEREG,1
209231Sjchandra	bgtz		SIZEREG,1b
209231Sjchandra	PTR_SUBU	DSTREG,1
178580Simp
178580Simp4:	# copydone
178580Simp	j	ra
178580Simp	nop
178580Simp
178580Simp	/*
178580Simp	 *	Copy from unaligned source to aligned dest.
178580Simp	 */
178580Simp5:	# destaligned
209231Sjchandra	andi		t0,SIZEREG,SZREG-1	# t0 = bytecount mod 4
209231Sjchandra	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
209231Sjchandra	beq		a3,zero,3b
178580Simp	nop
209231Sjchandra	move		SIZEREG,t0		# this many to do after we are done
209231Sjchandra	PTR_SUBU	a3,SRCREG,a3		# stop point
178580Simp
178580Simp1:
209231Sjchandra	REG_LHI		t3,-SZREG(SRCREG)
209231Sjchandra	REG_LLO		t3,-1(SRCREG)
209231Sjchandra	PTR_SUBU	SRCREG,SZREG
209231Sjchandra	REG_S		t3,-SZREG(DSTREG)
209231Sjchandra	bne		SRCREG,a3,1b
209231Sjchandra	PTR_SUBU	DSTREG,SZREG
178580Simp
209231Sjchandra	b		3b
178580Simp	nop
178580Simp
178580Simp	.set	reorder
178580Simp	.set	at
178580Simp	END(FUNCTION)