strcat.S revision 1.2
1/* 2 * Written by J.T. Conklin <jtc@acorntoolworks.com> 3 * Public domain. 4 */ 5 6#include <machine/asm.h> 7 8#if defined(LIBC_SCCS) 9 RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $") 10#endif 11 12ENTRY(strcat) 13 pushl %ebx 14 movl 8(%esp),%ecx 15 movl 12(%esp),%eax 16 17 /* 18 * Align destination to word boundary. 19 * Consider unrolling loop? 20 */ 21.Lscan: 22.Lscan_align: 23 testb $3,%cl 24 je .Lscan_aligned 25 cmpb $0,(%ecx) 26 je .Lcopy 27 incl %ecx 28 jmp .Lscan_align 29 30 _ALIGN_TEXT 31.Lscan_aligned: 32.Lscan_loop: 33 movl (%ecx),%ebx 34 addl $4,%ecx 35 leal -0x01010101(%ebx),%edx 36 testl $0x80808080,%edx 37 je .Lscan_loop 38 39 /* 40 * In rare cases, the above loop may exit prematurely. We must 41 * return to the loop if none of the bytes in the word equal 0. 42 */ 43 44 /* 45 * The optimal code for determining whether each byte is zero 46 * differs by processor. This space-optimized code should be 47 * acceptable on all, especially since we don't expect it to 48 * be run frequently, 49 */ 50 51 testb %bl,%bl /* 1st byte == 0? */ 52 jne 1f 53 subl $4,%ecx 54 jmp .Lcopy 55 561: testb %bh,%bh /* 2nd byte == 0? */ 57 jne 1f 58 subl $3,%ecx 59 jmp .Lcopy 60 611: shrl $16,%ebx 62 testb %bl,%bl /* 3rd byte == 0? */ 63 jne 1f 64 subl $2,%ecx 65 jmp .Lcopy 66 671: testb %bh,%bh /* 4th byte == 0? */ 68 jne .Lscan_loop 69 subl $1,%ecx 70 71 /* 72 * Align source to a word boundary. 73 * Consider unrolling loop? 74 */ 75.Lcopy: 76.Lcopy_align: 77 testl $3,%eax 78 je .Lcopy_aligned 79 movb (%eax),%bl 80 incl %eax 81 movb %bl,(%ecx) 82 incl %ecx 83 testb %bl,%bl 84 jne .Lcopy_align 85 jmp .Ldone 86 87 _ALIGN_TEXT 88.Lcopy_loop: 89 movl %ebx,(%ecx) 90 addl $4,%ecx 91.Lcopy_aligned: 92 movl (%eax),%ebx 93 addl $4,%eax 94 leal -0x01010101(%ebx),%edx 95 testl $0x80808080,%edx 96 je .Lcopy_loop 97 98 /* 99 * In rare cases, the above loop may exit prematurely. We must 100 * return to the loop if none of the bytes in the word equal 0. 101 */ 102 103 movb %bl,(%ecx) 104 incl %ecx 105 testb %bl,%bl 106 je .Ldone 107 108 movb %bh,(%ecx) 109 incl %ecx 110 testb %bh,%bh 111 je .Ldone 112 113 shrl $16,%ebx 114 movb %bl,(%ecx) 115 incl %ecx 116 testb %bl,%bl 117 je .Ldone 118 119 movb %bh,(%ecx) 120 incl %ecx 121 testb %bh,%bh 122 jne .Lcopy_aligned 123 124.Ldone: 125 movl 8(%esp),%eax 126 popl %ebx 127 ret 128END(strcat) 129