1/* 2 * arch/alpha/lib/stxncpy.S 3 * Contributed by Richard Henderson (rth@tamu.edu) 4 * 5 * Copy no more than COUNT bytes of the null-terminated string from 6 * SRC to DST. 7 * 8 * This is an internal routine used by strncpy, stpncpy, and strncat. 9 * As such, it uses special linkage conventions to make implementation 10 * of these public functions more efficient. 11 * 12 * On input: 13 * t9 = return address 14 * a0 = DST 15 * a1 = SRC 16 * a2 = COUNT 17 * 18 * Furthermore, COUNT may not be zero. 19 * 20 * On output: 21 * t0 = last word written 22 * t10 = bitmask (with one bit set) indicating the byte position of 23 * the end of the range specified by COUNT 24 * t12 = bitmask (with one bit set) indicating the last byte written 25 * a0 = unaligned address of the last *word* written 26 * a2 = the number of full words left in COUNT 27 * 28 * Furthermore, v0, a3-a5, t11, and $at are untouched. 29 */ 30 31#include <asm/regdef.h> 32 33 .set noat 34 .set noreorder 35 36 .text 37 38 39 .ent stxncpy_aligned 40 .align 3 41stxncpy_aligned: 42 .frame sp, 0, t9, 0 43 .prologue 0 44 45 /* On entry to this basic block: 46 t0 == the first destination word for masking back in 47 t1 == the first source word. */ 48 49 /* Create the 1st output word and detect 0's in the 1st input word. */ 50 lda t2, -1 # e1 : build a mask against false zero 51 mskqh t2, a1, t2 # e0 : detection in the src word 52 mskqh t1, a1, t3 # e0 : 53 ornot t1, t2, t2 # .. e1 : 54 mskql t0, a1, t0 # e0 : assemble the first output word 55 cmpbge zero, t2, t8 # .. e1 : bits set iff null found 56 or t0, t3, t0 # e0 : 57 beq a2, $a_eoc # .. e1 : 58 bne t8, $a_eos # .. e1 : 59 60 /* On entry to this basic block: 61 t0 == a source word not containing a null. */ 62 63$a_loop: 64 stq_u t0, 0(a0) # e0 : 65 addq a0, 8, a0 # .. e1 : 66 ldq_u t0, 0(a1) # e0 : 67 addq a1, 8, a1 # .. e1 : 68 subq a2, 1, a2 # e0 : 69 cmpbge zero, t0, t8 # .. e1 (stall) 70 beq a2, $a_eoc # e1 : 71 beq t8, $a_loop # e1 : 72 73 /* Take care of the final (partial) word store. At this point 74 the end-of-count bit is set in t8 iff it applies. 75 76 On entry to this basic block we have: 77 t0 == the source word containing the null 78 t8 == the cmpbge mask that found it. */ 79 80$a_eos: 81 negq t8, t12 # e0 : find low bit set 82 and t8, t12, t12 # e1 (stall) 83 84 /* For the sake of the cache, don't read a destination word 85 if we're not going to need it. */ 86 and t12, 0x80, t6 # e0 : 87 bne t6, 1f # .. e1 (zdb) 88 89 /* We're doing a partial word store and so need to combine 90 our source and original destination words. */ 91 ldq_u t1, 0(a0) # e0 : 92 subq t12, 1, t6 # .. e1 : 93 or t12, t6, t8 # e0 : 94 unop # 95 zapnot t0, t8, t0 # e0 : clear src bytes > null 96 zap t1, t8, t1 # .. e1 : clear dst bytes <= null 97 or t0, t1, t0 # e1 : 98 991: stq_u t0, 0(a0) # e0 : 100 ret (t9) # e1 : 101 102 /* Add the end-of-count bit to the eos detection bitmask. */ 103$a_eoc: 104 or t10, t8, t8 105 br $a_eos 106 107 .end stxncpy_aligned 108 109 .align 3 110 .ent __stxncpy 111 .globl __stxncpy 112__stxncpy: 113 .frame sp, 0, t9, 0 114 .prologue 0 115 116 /* Are source and destination co-aligned? */ 117 xor a0, a1, t1 # e0 : 118 and a0, 7, t0 # .. e1 : find dest misalignment 119 and t1, 7, t1 # e0 : 120 addq a2, t0, a2 # .. e1 : bias count by dest misalignment 121 subq a2, 1, a2 # e0 : 122 and a2, 7, t2 # e1 : 123 srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8 124 addq zero, 1, t10 # .. e1 : 125 sll t10, t2, t10 # e0 : t10 = bitmask of last count byte 126 bne t1, $unaligned # .. e1 : 127 128 /* We are co-aligned; take care of a partial first word. */ 129 130 ldq_u t1, 0(a1) # e0 : load first src word 131 addq a1, 8, a1 # .. e1 : 132 133 beq t0, stxncpy_aligned # avoid loading dest word if not needed 134 ldq_u t0, 0(a0) # e0 : 135 br stxncpy_aligned # .. e1 : 136 137 138/* The source and destination are not co-aligned. Align the destination 139 and cope. We have to be very careful about not reading too much and 140 causing a SEGV. */ 141 142 .align 3 143$u_head: 144 /* We know just enough now to be able to assemble the first 145 full source word. We can still find a zero at the end of it 146 that prevents us from outputting the whole thing. 147 148 On entry to this basic block: 149 t0 == the first dest word, unmasked 150 t1 == the shifted low bits of the first source word 151 t6 == bytemask that is -1 in dest word bytes */ 152 153 ldq_u t2, 8(a1) # e0 : load second src word 154 addq a1, 8, a1 # .. e1 : 155 mskql t0, a0, t0 # e0 : mask trailing garbage in dst 156 extqh t2, a1, t4 # e0 : 157 or t1, t4, t1 # e1 : first aligned src word complete 158 mskqh t1, a0, t1 # e0 : mask leading garbage in src 159 or t0, t1, t0 # e0 : first output word complete 160 or t0, t6, t6 # e1 : mask original data for zero test 161 cmpbge zero, t6, t8 # e0 : 162 beq a2, $u_eocfin # .. e1 : 163 lda t6, -1 # e0 : 164 bne t8, $u_final # .. e1 : 165 166 mskql t6, a1, t6 # e0 : mask out bits already seen 167 nop # .. e1 : 168 stq_u t0, 0(a0) # e0 : store first output word 169 or t6, t2, t2 # .. e1 : 170 cmpbge zero, t2, t8 # e0 : find nulls in second partial 171 addq a0, 8, a0 # .. e1 : 172 subq a2, 1, a2 # e0 : 173 bne t8, $u_late_head_exit # .. e1 : 174 175 /* Finally, we've got all the stupid leading edge cases taken care 176 of and we can set up to enter the main loop. */ 177 178 extql t2, a1, t1 # e0 : position hi-bits of lo word 179 beq a2, $u_eoc # .. e1 : 180 ldq_u t2, 8(a1) # e0 : read next high-order source word 181 addq a1, 8, a1 # .. e1 : 182 extqh t2, a1, t0 # e0 : position lo-bits of hi word (stall) 183 cmpbge zero, t2, t8 # .. e1 : 184 nop # e0 : 185 bne t8, $u_eos # .. e1 : 186 187 /* Unaligned copy main loop. In order to avoid reading too much, 188 the loop is structured to detect zeros in aligned source words. 189 This has, unfortunately, effectively pulled half of a loop 190 iteration out into the head and half into the tail, but it does 191 prevent nastiness from accumulating in the very thing we want 192 to run as fast as possible. 193 194 On entry to this basic block: 195 t0 == the shifted low-order bits from the current source word 196 t1 == the shifted high-order bits from the previous source word 197 t2 == the unshifted current source word 198 199 We further know that t2 does not contain a null terminator. */ 200 201 .align 3 202$u_loop: 203 or t0, t1, t0 # e0 : current dst word now complete 204 subq a2, 1, a2 # .. e1 : decrement word count 205 stq_u t0, 0(a0) # e0 : save the current word 206 addq a0, 8, a0 # .. e1 : 207 extql t2, a1, t1 # e0 : extract high bits for next time 208 beq a2, $u_eoc # .. e1 : 209 ldq_u t2, 8(a1) # e0 : load high word for next time 210 addq a1, 8, a1 # .. e1 : 211 nop # e0 : 212 cmpbge zero, t2, t8 # e1 : test new word for eos (stall) 213 extqh t2, a1, t0 # e0 : extract low bits for current word 214 beq t8, $u_loop # .. e1 : 215 216 /* We've found a zero somewhere in the source word we just read. 217 If it resides in the lower half, we have one (probably partial) 218 word to write out, and if it resides in the upper half, we 219 have one full and one partial word left to write out. 220 221 On entry to this basic block: 222 t0 == the shifted low-order bits from the current source word 223 t1 == the shifted high-order bits from the previous source word 224 t2 == the unshifted current source word. */ 225$u_eos: 226 or t0, t1, t0 # e0 : first (partial) source word complete 227 nop # .. e1 : 228 cmpbge zero, t0, t8 # e0 : is the null in this first bit? 229 bne t8, $u_final # .. e1 (zdb) 230 231 stq_u t0, 0(a0) # e0 : the null was in the high-order bits 232 addq a0, 8, a0 # .. e1 : 233 subq a2, 1, a2 # e1 : 234 235$u_late_head_exit: 236 extql t2, a1, t0 # .. e0 : 237 cmpbge zero, t0, t8 # e0 : 238 or t8, t10, t6 # e1 : 239 cmoveq a2, t6, t8 # e0 : 240 nop # .. e1 : 241 242 /* Take care of a final (probably partial) result word. 243 On entry to this basic block: 244 t0 == assembled source word 245 t8 == cmpbge mask that found the null. */ 246$u_final: 247 negq t8, t6 # e0 : isolate low bit set 248 and t6, t8, t12 # e1 : 249 250 and t12, 0x80, t6 # e0 : avoid dest word load if we can 251 bne t6, 1f # .. e1 (zdb) 252 253 ldq_u t1, 0(a0) # e0 : 254 subq t12, 1, t6 # .. e1 : 255 or t6, t12, t8 # e0 : 256 zapnot t0, t8, t0 # .. e1 : kill source bytes > null 257 zap t1, t8, t1 # e0 : kill dest bytes <= null 258 or t0, t1, t0 # e1 : 259 2601: stq_u t0, 0(a0) # e0 : 261 ret (t9) # .. e1 : 262 263 /* Got to end-of-count before end of string. 264 On entry to this basic block: 265 t1 == the shifted high-order bits from the previous source word */ 266$u_eoc: 267 and a1, 7, t6 # e1 : 268 sll t10, t6, t6 # e0 : 269 and t6, 0xff, t6 # e0 : 270 bne t6, 1f # .. e1 : 271 272 ldq_u t2, 8(a1) # e0 : load final src word 273 nop # .. e1 : 274 extqh t2, a1, t0 # e0 : extract low bits for last word 275 or t1, t0, t1 # e1 : 276 2771: cmpbge zero, t1, t8 278 mov t1, t0 279 280$u_eocfin: # end-of-count, final word 281 or t10, t8, t8 282 br $u_final 283 284 /* Unaligned copy entry point. */ 285 .align 3 286$unaligned: 287 288 ldq_u t1, 0(a1) # e0 : load first source word 289 290 and a0, 7, t4 # .. e1 : find dest misalignment 291 and a1, 7, t5 # e0 : find src misalignment 292 293 /* Conditionally load the first destination word and a bytemask 294 with 0xff indicating that the destination byte is sacrosanct. */ 295 296 mov zero, t0 # .. e1 : 297 mov zero, t6 # e0 : 298 beq t4, 1f # .. e1 : 299 ldq_u t0, 0(a0) # e0 : 300 lda t6, -1 # .. e1 : 301 mskql t6, a0, t6 # e0 : 302 subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr 303 304 /* If source misalignment is larger than dest misalignment, we need 305 extra startup checks to avoid SEGV. */ 306 3071: cmplt t4, t5, t12 # e1 : 308 extql t1, a1, t1 # .. e0 : shift src into place 309 lda t2, -1 # e0 : for creating masks later 310 beq t12, $u_head # .. e1 : 311 312 extql t2, a1, t2 # e0 : 313 cmpbge zero, t1, t8 # .. e1 : is there a zero? 314 andnot t2, t6, t2 # e0 : dest mask for a single word copy 315 or t8, t10, t5 # .. e1 : test for end-of-count too 316 cmpbge zero, t2, t3 # e0 : 317 cmoveq a2, t5, t8 # .. e1 : 318 andnot t8, t3, t8 # e0 : 319 beq t8, $u_head # .. e1 (zdb) 320 321 /* At this point we've found a zero in the first partial word of 322 the source. We need to isolate the valid source data and mask 323 it into the original destination data. (Incidentally, we know 324 that we'll need at least one byte of that original dest word.) */ 325 326 ldq_u t0, 0(a0) # e0 : 327 negq t8, t6 # .. e1 : build bitmask of bytes <= zero 328 mskqh t1, t4, t1 # e0 : 329 and t6, t8, t12 # .. e1 : 330 subq t12, 1, t6 # e0 : 331 or t6, t12, t8 # e1 : 332 333 zapnot t2, t8, t2 # e0 : prepare source word; mirror changes 334 zapnot t1, t8, t1 # .. e1 : to source validity mask 335 336 andnot t0, t2, t0 # e0 : zero place for source to reside 337 or t0, t1, t0 # e1 : and put it there 338 stq_u t0, 0(a0) # e0 : 339 ret (t9) # .. e1 : 340 341 .end __stxncpy 342