strncmp.S revision 5243
11849Swollman/*
21849Swollman * Copyright (c) 1993,94 Winning Strategies, Inc.
31849Swollman * All rights reserved.
41849Swollman *
51849Swollman * Redistribution and use in source and binary forms, with or without
61849Swollman * modification, are permitted provided that the following conditions
71849Swollman * are met:
81849Swollman * 1. Redistributions of source code must retain the above copyright
91849Swollman *    notice, this list of conditions and the following disclaimer.
101849Swollman * 2. Redistributions in binary form must reproduce the above copyright
111849Swollman *    notice, this list of conditions and the following disclaimer in the
121849Swollman *    documentation and/or other materials provided with the distribution.
131849Swollman * 3. All advertising materials mentioning features or use of this software
141849Swollman *    must display the following acknowledgement:
151849Swollman *      This product includes software developed by Winning Strategies, Inc.
161849Swollman * 4. The name of the author may not be used to endorse or promote products
171849Swollman *    derived from this software without specific prior written permission
181849Swollman *
191849Swollman * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
201849Swollman * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
211849Swollman * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
221849Swollman * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
231849Swollman * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
241849Swollman * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
251849Swollman * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
261849Swollman * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
271849Swollman * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
281849Swollman * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
291849Swollman *
305243Sbde *	$Id: strncmp.S,v 1.1 1994/08/05 01:18:36 wollman Exp $
311849Swollman */
321849Swollman
331849Swollman#if defined(LIBC_RCS) && !defined(lint)
345243Sbde        .asciz "$Id: strncmp.S,v 1.1 1994/08/05 01:18:36 wollman Exp $"
351849Swollman#endif /* LIBC_RCS and not lint */
361849Swollman
371849Swollman#include "DEFS.h"
381849Swollman
391849Swollman/*
401849Swollman * strncmp(s1, s2, n)
411849Swollman *	return an integer greater than, equal to, or less than 0,
421849Swollman *	according as the first n characters of string s1 is greater
431849Swollman *	than, equal to, or less than the string s2.
441849Swollman *
451849Swollman * %eax - pointer to s1
461849Swollman * %ecx - pointer to s2
471849Swollman * %edx - length
481849Swollman *
491849Swollman * Written by:
501849Swollman *	J.T. Conklin (jtc@wimsey.com), Winning Strategies, Inc.
511849Swollman */
521849Swollman
531849Swollman/*
541849Swollman * I've unrolled the loop eight times: large enough to make a
551849Swollman * significant difference, and small enough not to totally trash the
561849Swollman * cache.
575243Sbde *
585243Sbde * TODO: change all the jz's back to je for consistency.
591849Swollman */
601849Swollman
611849SwollmanENTRY(strncmp)
621849Swollman	pushl	%ebx
631849Swollman	movl	8(%esp),%eax
641849Swollman	movl	12(%esp),%ecx
651849Swollman	movl	16(%esp),%edx
661849Swollman	testl	%edx,%edx
671849Swollman	jmp	L2			/* Jump into the loop! */
681849Swollman
691849Swollman	.align 2,0x90
701849SwollmanL1:	incl	%eax
711849Swollman	incl	%ecx
721849Swollman	decl	%edx
731849SwollmanL2:	jz	L4			/* strings are equal */
741849Swollman	movb	(%eax),%bl
751849Swollman	testb	%bl,%bl
761849Swollman	jz	L3
771849Swollman	cmpb	%bl,(%ecx)
781849Swollman	jne	L3
791849Swollman
805243Sbde/*
815243Sbde * XXX it might be best to move the next 4 instructions to the end of the
825243Sbde * unrolled part of the loop.  The unrolled part would then be
835243Sbde *	movb n(%eax),%bl; testb %bl, %bl; je L3; cmpb n(%ecx); jne L3
845243Sbde * or maybe better
855243Sbde *	movb n(%eax),%bl; cmpb n(%ecx); jne L3; testb %bl,%bl; je return_0
865243Sbde * for n = 0, 1, ..., 8.  The end of the loop would be
875243Sbde *	L1: addl $8,%eax; addl $8,%ecx; subl $8,%edx; cmpl $8,%edx; jae Lx
885243Sbde * where residual counts of 0 to 7 are handled at Lx.  However, this would
895243Sbde * be slower for short strings.  Cache effects are probably not so
905243Sbde * important because we are only handling a byte at a time.
915243Sbde */
921849Swollman	incl	%eax
931849Swollman	incl	%ecx
941849Swollman	decl	%edx
951849Swollman	jz	L4
961849Swollman	movb	(%eax),%bl
971849Swollman	testb	%bl,%bl
981849Swollman	jz	L3
991849Swollman	cmpb	%bl,(%ecx)
1001849Swollman	jne	L3
1011849Swollman
1021849Swollman	incl	%eax
1031849Swollman	incl	%ecx
1041849Swollman	decl	%edx
1051849Swollman	jz	L4
1061849Swollman	movb	(%eax),%bl
1071849Swollman	testb	%bl,%bl
1081849Swollman	jz	L3
1091849Swollman	cmpb	%bl,(%ecx)
1101849Swollman	jne	L3
1111849Swollman
1121849Swollman	incl	%eax
1131849Swollman	incl	%ecx
1141849Swollman	decl	%edx
1151849Swollman	jz	L4
1161849Swollman	movb	(%eax),%bl
1171849Swollman	testb	%bl,%bl
1181849Swollman	jz	L3
1191849Swollman	cmpb	%bl,(%ecx)
1201849Swollman	jne	L3
1211849Swollman
1221849Swollman	incl	%eax
1231849Swollman	incl	%ecx
1241849Swollman	decl	%edx
1251849Swollman	jz	L4
1261849Swollman	movb	(%eax),%bl
1271849Swollman	testb	%bl,%bl
1281849Swollman	jz	L3
1291849Swollman	cmpb	%bl,(%ecx)
1301849Swollman	jne	L3
1311849Swollman
1321849Swollman	incl	%eax
1331849Swollman	incl	%ecx
1341849Swollman	decl	%edx
1351849Swollman	jz	L4
1361849Swollman	movb	(%eax),%bl
1371849Swollman	testb	%bl,%bl
1381849Swollman	jz	L3
1391849Swollman	cmpb	%bl,(%ecx)
1401849Swollman	jne	L3
1411849Swollman
1421849Swollman	incl	%eax
1431849Swollman	incl	%ecx
1441849Swollman	decl	%edx
1451849Swollman	jz	L4
1461849Swollman	movb	(%eax),%bl
1471849Swollman	testb	%bl,%bl
1481849Swollman	jz	L3
1491849Swollman	cmpb	%bl,(%ecx)
1501849Swollman	jne	L3
1511849Swollman
1521849Swollman	incl	%eax
1531849Swollman	incl	%ecx
1541849Swollman	decl	%edx
1551849Swollman	jz	L4
1561849Swollman	movb	(%eax),%bl
1571849Swollman	testb	%bl,%bl
1581849Swollman	jz	L3
1591849Swollman	cmpb	%bl,(%ecx)
1601849Swollman	je	L1
1611849Swollman
1621849Swollman	.align 2,0x90
1635243SbdeL3:	movzbl	(%eax),%eax		/* unsigned comparison */
1641849Swollman	movzbl	(%ecx),%ecx
1651849Swollman	subl	%ecx,%eax
1661849Swollman	popl	%ebx
1671849Swollman	ret
1681849Swollman	.align 2,0x90
1691849SwollmanL4:	xorl	%eax,%eax
1701849Swollman	popl	%ebx
1711849Swollman	ret
172