1144872Salc/* 2144872Salc * Written by J.T. Conklin <jtc@acorntoolworks.com> 3144872Salc * Public domain. 4144872Salc */ 5144872Salc 6144872Salc#include <machine/asm.h> 7144872Salc__FBSDID("$FreeBSD$"); 8144872Salc 9144872Salc#if 0 10144872Salc RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $") 11144872Salc#endif 12144872Salc 13144872SalcENTRY(strcat) 14144872Salc movq %rdi,%rax 15144872Salc movabsq $0x0101010101010101,%r8 16144872Salc movabsq $0x8080808080808080,%r9 17144872Salc 18144872Salc /* 19144872Salc * Align destination to word boundary. 20144872Salc * Consider unrolling loop? 21144872Salc */ 22144872Salc.Lscan: 23144872Salc.Lscan_align: 24144872Salc testb $7,%dil 25144872Salc je .Lscan_aligned 26144872Salc cmpb $0,(%rdi) 27144872Salc je .Lcopy 28144872Salc incq %rdi 29144872Salc jmp .Lscan_align 30144872Salc 31144872Salc .align 4 32144872Salc.Lscan_aligned: 33144872Salc.Lscan_loop: 34144872Salc movq (%rdi),%rdx 35144872Salc addq $8,%rdi 36144872Salc subq %r8,%rdx 37144872Salc testq %r9,%rdx 38144872Salc je .Lscan_loop 39144872Salc 40144872Salc /* 41144872Salc * In rare cases, the above loop may exit prematurely. We must 42144872Salc * return to the loop if none of the bytes in the word equal 0. 43144872Salc */ 44144872Salc 45144872Salc cmpb $0,-8(%rdi) /* 1st byte == 0? */ 46144872Salc jne 1f 47144872Salc subq $8,%rdi 48144872Salc jmp .Lcopy 49144872Salc 50144872Salc1: cmpb $0,-7(%rdi) /* 2nd byte == 0? */ 51144872Salc jne 1f 52144872Salc subq $7,%rdi 53144872Salc jmp .Lcopy 54144872Salc 55144872Salc1: cmpb $0,-6(%rdi) /* 3rd byte == 0? */ 56144872Salc jne 1f 57144872Salc subq $6,%rdi 58144872Salc jmp .Lcopy 59144872Salc 60144872Salc1: cmpb $0,-5(%rdi) /* 4th byte == 0? */ 61144872Salc jne 1f 62144872Salc subq $5,%rdi 63144872Salc jmp .Lcopy 64144872Salc 65144872Salc1: cmpb $0,-4(%rdi) /* 5th byte == 0? */ 66144872Salc jne 1f 67144872Salc subq $4,%rdi 68144872Salc jmp .Lcopy 69144872Salc 70144872Salc1: cmpb $0,-3(%rdi) /* 6th byte == 0? */ 71144872Salc jne 1f 72144872Salc subq $3,%rdi 73144872Salc jmp .Lcopy 74144872Salc 75144872Salc1: cmpb $0,-2(%rdi) /* 7th byte == 0? */ 76144872Salc jne 1f 77144872Salc subq $2,%rdi 78144872Salc jmp .Lcopy 79144872Salc 80144872Salc1: cmpb $0,-1(%rdi) /* 8th byte == 0? */ 81144872Salc jne .Lscan_loop 82144872Salc subq $1,%rdi 83144872Salc 84144872Salc /* 85144872Salc * Align source to a word boundary. 86144872Salc * Consider unrolling loop? 87144872Salc */ 88144872Salc.Lcopy: 89144872Salc.Lcopy_align: 90144872Salc testb $7,%sil 91144872Salc je .Lcopy_aligned 92144872Salc movb (%rsi),%dl 93144872Salc incq %rsi 94144872Salc movb %dl,(%rdi) 95144872Salc incq %rdi 96144872Salc testb %dl,%dl 97144872Salc jne .Lcopy_align 98144872Salc ret 99144872Salc 100144872Salc .align 4 101144872Salc.Lcopy_loop: 102144872Salc movq %rdx,(%rdi) 103144872Salc addq $8,%rdi 104144872Salc.Lcopy_aligned: 105144872Salc movq (%rsi),%rdx 106144872Salc movq %rdx,%rcx 107144872Salc addq $8,%rsi 108144872Salc subq %r8,%rcx 109144872Salc testq %r9,%rcx 110144872Salc je .Lcopy_loop 111144872Salc 112144872Salc /* 113144872Salc * In rare cases, the above loop may exit prematurely. We must 114144872Salc * return to the loop if none of the bytes in the word equal 0. 115144872Salc */ 116144872Salc 117144872Salc movb %dl,(%rdi) 118144872Salc incq %rdi 119144872Salc testb %dl,%dl /* 1st byte == 0? */ 120144872Salc je .Ldone 121144872Salc 122144872Salc shrq $8,%rdx 123144872Salc movb %dl,(%rdi) 124144872Salc incq %rdi 125144872Salc testb %dl,%dl /* 2nd byte == 0? */ 126144872Salc je .Ldone 127144872Salc 128144872Salc shrq $8,%rdx 129144872Salc movb %dl,(%rdi) 130144872Salc incq %rdi 131144872Salc testb %dl,%dl /* 3rd byte == 0? */ 132144872Salc je .Ldone 133144872Salc 134144872Salc shrq $8,%rdx 135144872Salc movb %dl,(%rdi) 136144872Salc incq %rdi 137144872Salc testb %dl,%dl /* 4th byte == 0? */ 138144872Salc je .Ldone 139144872Salc 140144872Salc shrq $8,%rdx 141144872Salc movb %dl,(%rdi) 142144872Salc incq %rdi 143144872Salc testb %dl,%dl /* 5th byte == 0? */ 144144872Salc je .Ldone 145144872Salc 146144872Salc shrq $8,%rdx 147144872Salc movb %dl,(%rdi) 148144872Salc incq %rdi 149144872Salc testb %dl,%dl /* 6th byte == 0? */ 150144872Salc je .Ldone 151144872Salc 152144872Salc shrq $8,%rdx 153144872Salc movb %dl,(%rdi) 154144872Salc incq %rdi 155144872Salc testb %dl,%dl /* 7th byte == 0? */ 156144872Salc je .Ldone 157144872Salc 158144872Salc shrq $8,%rdx 159144872Salc movb %dl,(%rdi) 160144872Salc incq %rdi 161144872Salc testb %dl,%dl /* 8th byte == 0? */ 162144872Salc jne .Lcopy_aligned 163144872Salc 164144872Salc.Ldone: 165144872Salc ret 166184547SpeterEND(strcat) 167217106Skib 168217106Skib .section .note.GNU-stack,"",%progbits 169