1/*
2 * arch/alpha/lib/strncpy_from_user.S
3 * Contributed by Richard Henderson (rth@tamu.edu)
4 *
5 * Just like strncpy except in the return value:
6 *
7 * -EFAULT       if an exception occurs before the terminator is copied.
8 * N             if the buffer filled.
9 *
10 * Otherwise the length of the string is returned.
11 */
12
13
14#include <asm/errno.h>
15#include <asm/regdef.h>
16
17
18/* Allow an exception for an insn; exit if we get one.  */
19#define EX(x,y...)			\
20	99: x,##y;			\
21	.section __ex_table,"a";	\
22	.long 99b - .;			\
23	lda $31, $exception-99b($0); 	\
24	.previous
25
26
27	.set noat
28	.set noreorder
29	.text
30
31	.globl __strncpy_from_user
32	.ent __strncpy_from_user
33	.frame $30, 0, $26
34	.prologue 0
35
36	.align 3
37$aligned:
38	/* On entry to this basic block:
39	   t0 == the first destination word for masking back in
40	   t1 == the first source word.  */
41
42	/* Create the 1st output word and detect 0's in the 1st input word.  */
43	lda	t2, -1		# e1    : build a mask against false zero
44	mskqh	t2, a1, t2	# e0    :   detection in the src word
45	mskqh	t1, a1, t3	# e0    :
46	ornot	t1, t2, t2	# .. e1 :
47	mskql	t0, a1, t0	# e0    : assemble the first output word
48	cmpbge	zero, t2, t8	# .. e1 : bits set iff null found
49	or	t0, t3, t0	# e0    :
50	beq	a2, $a_eoc	# .. e1 :
51	bne	t8, $a_eos	# .. e1 :
52
53	/* On entry to this basic block:
54	   t0 == a source word not containing a null.  */
55
56$a_loop:
57	stq_u	t0, 0(a0)	# e0    :
58	addq	a0, 8, a0	# .. e1 :
59	EX( ldq_u t0, 0(a1) )	# e0    :
60	addq	a1, 8, a1	# .. e1 :
61	subq	a2, 1, a2	# e0    :
62	cmpbge	zero, t0, t8	# .. e1 (stall)
63	beq	a2, $a_eoc      # e1    :
64	beq	t8, $a_loop	# e1    :
65
66	/* Take care of the final (partial) word store.  At this point
67	   the end-of-count bit is set in t8 iff it applies.
68
69	   On entry to this basic block we have:
70	   t0 == the source word containing the null
71	   t8 == the cmpbge mask that found it.  */
72
73$a_eos:
74	negq	t8, t12		# e0    : find low bit set
75	and	t8, t12, t12	# e1 (stall)
76
77	/* For the sake of the cache, don't read a destination word
78	   if we're not going to need it.  */
79	and	t12, 0x80, t6	# e0    :
80	bne	t6, 1f		# .. e1 (zdb)
81
82	/* We're doing a partial word store and so need to combine
83	   our source and original destination words.  */
84	ldq_u	t1, 0(a0)	# e0    :
85	subq	t12, 1, t6	# .. e1 :
86	or	t12, t6, t8	# e0    :
87	unop			#
88	zapnot	t0, t8, t0	# e0    : clear src bytes > null
89	zap	t1, t8, t1	# .. e1 : clear dst bytes <= null
90	or	t0, t1, t0	# e1    :
91
921:	stq_u	t0, 0(a0)
93	br	$finish_up
94
95	/* Add the end-of-count bit to the eos detection bitmask.  */
96$a_eoc:
97	or	t10, t8, t8
98	br	$a_eos
99
100	/*** The Function Entry Point ***/
101	.align 3
102__strncpy_from_user:
103	mov	a0, v0		# save the string start
104	beq	a2, $zerolength
105
106	/* Are source and destination co-aligned?  */
107	xor	a0, a1, t1	# e0    :
108	and	a0, 7, t0	# .. e1 : find dest misalignment
109	and	t1, 7, t1	# e0    :
110	addq	a2, t0, a2	# .. e1 : bias count by dest misalignment
111	subq	a2, 1, a2	# e0    :
112	and	a2, 7, t2	# e1    :
113	srl	a2, 3, a2	# e0    : a2 = loop counter = (count - 1)/8
114	addq	zero, 1, t10	# .. e1 :
115	sll	t10, t2, t10	# e0    : t10 = bitmask of last count byte
116	bne	t1, $unaligned	# .. e1 :
117
118	/* We are co-aligned; take care of a partial first word.  */
119
120	EX( ldq_u t1, 0(a1) )	# e0    : load first src word
121	addq	a1, 8, a1	# .. e1 :
122
123	beq	t0, $aligned	# avoid loading dest word if not needed
124	ldq_u	t0, 0(a0)	# e0    :
125	br	$aligned	# .. e1 :
126
127
128/* The source and destination are not co-aligned.  Align the destination
129   and cope.  We have to be very careful about not reading too much and
130   causing a SEGV.  */
131
132	.align 3
133$u_head:
134	/* We know just enough now to be able to assemble the first
135	   full source word.  We can still find a zero at the end of it
136	   that prevents us from outputting the whole thing.
137
138	   On entry to this basic block:
139	   t0 == the first dest word, unmasked
140	   t1 == the shifted low bits of the first source word
141	   t6 == bytemask that is -1 in dest word bytes */
142
143	EX( ldq_u t2, 8(a1) )	# e0    : load second src word
144	addq	a1, 8, a1	# .. e1 :
145	mskql	t0, a0, t0	# e0    : mask trailing garbage in dst
146	extqh	t2, a1, t4	# e0    :
147	or	t1, t4, t1	# e1    : first aligned src word complete
148	mskqh	t1, a0, t1	# e0    : mask leading garbage in src
149	or	t0, t1, t0	# e0    : first output word complete
150	or	t0, t6, t6	# e1    : mask original data for zero test
151	cmpbge	zero, t6, t8	# e0    :
152	beq	a2, $u_eocfin	# .. e1 :
153	bne	t8, $u_final	# e1    :
154
155	lda	t6, -1			# e1    : mask out the bits we have
156	mskql	t6, a1, t6		# e0    :   already seen
157	stq_u	t0, 0(a0)		# e0    : store first output word
158	or      t6, t2, t2		# .. e1 :
159	cmpbge	zero, t2, t8		# e0    : find nulls in second partial
160	addq	a0, 8, a0		# .. e1 :
161	subq	a2, 1, a2		# e0    :
162	bne	t8, $u_late_head_exit	# .. e1 :
163
164	/* Finally, we've got all the stupid leading edge cases taken care
165	   of and we can set up to enter the main loop.  */
166
167	extql	t2, a1, t1	# e0    : position hi-bits of lo word
168	EX( ldq_u t2, 8(a1) )	# .. e1 : read next high-order source word
169	addq	a1, 8, a1	# e0    :
170	cmpbge	zero, t2, t8	# e1 (stall)
171	beq	a2, $u_eoc	# e1    :
172	bne	t8, $u_eos	# e1    :
173
174	/* Unaligned copy main loop.  In order to avoid reading too much,
175	   the loop is structured to detect zeros in aligned source words.
176	   This has, unfortunately, effectively pulled half of a loop
177	   iteration out into the head and half into the tail, but it does
178	   prevent nastiness from accumulating in the very thing we want
179	   to run as fast as possible.
180
181	   On entry to this basic block:
182	   t1 == the shifted high-order bits from the previous source word
183	   t2 == the unshifted current source word
184
185	   We further know that t2 does not contain a null terminator.  */
186
187	.align 3
188$u_loop:
189	extqh	t2, a1, t0	# e0    : extract high bits for current word
190	addq	a1, 8, a1	# .. e1 :
191	extql	t2, a1, t3	# e0    : extract low bits for next time
192	addq	a0, 8, a0	# .. e1 :
193	or	t0, t1, t0	# e0    : current dst word now complete
194	EX( ldq_u t2, 0(a1) )	# .. e1 : load high word for next time
195	stq_u	t0, -8(a0)	# e0    : save the current word
196	mov	t3, t1		# .. e1 :
197	subq	a2, 1, a2	# e0    :
198	cmpbge	zero, t2, t8	# .. e1 : test new word for eos
199	beq	a2, $u_eoc	# e1    :
200	beq	t8, $u_loop	# e1    :
201
202	/* We've found a zero somewhere in the source word we just read.
203	   If it resides in the lower half, we have one (probably partial)
204	   word to write out, and if it resides in the upper half, we
205	   have one full and one partial word left to write out.
206
207	   On entry to this basic block:
208	   t1 == the shifted high-order bits from the previous source word
209	   t2 == the unshifted current source word.  */
210$u_eos:
211	extqh	t2, a1, t0	# e0    :
212	or	t0, t1, t0	# e1    : first (partial) source word complete
213
214	cmpbge	zero, t0, t8	# e0    : is the null in this first bit?
215	bne	t8, $u_final	# .. e1 (zdb)
216
217	stq_u	t0, 0(a0)	# e0    : the null was in the high-order bits
218	addq	a0, 8, a0	# .. e1 :
219	subq	a2, 1, a2	# e1    :
220
221$u_late_head_exit:
222	extql	t2, a1, t0	# .. e0 :
223	cmpbge	zero, t0, t8	# e0    :
224	or	t8, t10, t6	# e1    :
225	cmoveq	a2, t6, t8	# e0    :
226	nop			# .. e1 :
227
228	/* Take care of a final (probably partial) result word.
229	   On entry to this basic block:
230	   t0 == assembled source word
231	   t8 == cmpbge mask that found the null.  */
232$u_final:
233	negq	t8, t6		# e0    : isolate low bit set
234	and	t6, t8, t12	# e1    :
235
236	and	t12, 0x80, t6	# e0    : avoid dest word load if we can
237	bne	t6, 1f		# .. e1 (zdb)
238
239	ldq_u	t1, 0(a0)	# e0    :
240	subq	t12, 1, t6	# .. e1 :
241	or	t6, t12, t8	# e0    :
242	zapnot	t0, t8, t0	# .. e1 : kill source bytes > null
243	zap	t1, t8, t1	# e0    : kill dest bytes <= null
244	or	t0, t1, t0	# e1    :
245
2461:	stq_u	t0, 0(a0)	# e0    :
247	br	$finish_up
248
249$u_eoc:				# end-of-count
250	extqh	t2, a1, t0
251	or	t0, t1, t0
252	cmpbge	zero, t0, t8
253
254$u_eocfin:			# end-of-count, final word
255	or	t10, t8, t8
256	br	$u_final
257
258	/* Unaligned copy entry point.  */
259	.align 3
260$unaligned:
261
262	EX( ldq_u t1, 0(a1) )	# e0    : load first source word
263
264	and	a0, 7, t4	# .. e1 : find dest misalignment
265	and	a1, 7, t5	# e0    : find src misalignment
266
267	/* Conditionally load the first destination word and a bytemask
268	   with 0xff indicating that the destination byte is sacrosanct.  */
269
270	mov	zero, t0	# .. e1 :
271	mov	zero, t6	# e0    :
272	beq	t4, 1f		# .. e1 :
273	ldq_u	t0, 0(a0)	# e0    :
274	lda	t6, -1		# .. e1 :
275	mskql	t6, a0, t6	# e0    :
2761:
277	subq	a1, t4, a1	# .. e1 : sub dest misalignment from src addr
278
279	/* If source misalignment is larger than dest misalignment, we need
280	   extra startup checks to avoid SEGV.  */
281
282	cmplt	t4, t5, t12	# e1    :
283	extql	t1, a1, t1	# .. e0 : shift src into place
284	lda	t2, -1		# e0    : for creating masks later
285	beq	t12, $u_head	# e1    :
286
287	mskqh	t2, t5, t2	# e0    : begin src byte validity mask
288	cmpbge	zero, t1, t8	# .. e1 : is there a zero?
289	extql	t2, a1, t2	# e0    :
290	or	t8, t10, t5	# .. e1 : test for end-of-count too
291	cmpbge	zero, t2, t3	# e0    :
292	cmoveq	a2, t5, t8	# .. e1 :
293	andnot	t8, t3, t8	# e0    :
294	beq	t8, $u_head	# .. e1 (zdb)
295
296	/* At this point we've found a zero in the first partial word of
297	   the source.  We need to isolate the valid source data and mask
298	   it into the original destination data.  (Incidentally, we know
299	   that we'll need at least one byte of that original dest word.) */
300
301	ldq_u	t0, 0(a0)	# e0    :
302	negq	t8, t6		# .. e1 : build bitmask of bytes <= zero
303	mskqh	t1, t4, t1	# e0    :
304	and	t6, t8, t12	# .. e1 :
305	subq	t12, 1, t6	# e0    :
306	or	t6, t12, t8	# e1    :
307
308	zapnot	t2, t8, t2	# e0    : prepare source word; mirror changes
309	zapnot	t1, t8, t1	# .. e1 : to source validity mask
310
311	andnot	t0, t2, t0	# e0    : zero place for source to reside
312	or	t0, t1, t0	# e1    : and put it there
313	stq_u	t0, 0(a0)	# e0    :
314
315$finish_up:
316	zapnot	t0, t12, t4	# was last byte written null?
317	cmovne	t4, 1, t4
318
319	and	t12, 0xf0, t3	# binary search for the address of the
320	and	t12, 0xcc, t2	# last byte written
321	and	t12, 0xaa, t1
322	bic	a0, 7, t0
323	cmovne	t3, 4, t3
324	cmovne	t2, 2, t2
325	cmovne	t1, 1, t1
326	addq	t0, t3, t0
327	addq	t1, t2, t1
328	addq	t0, t1, t0
329	addq	t0, t4, t0	# add one if we filled the buffer
330
331	subq	t0, v0, v0	# find string length
332	ret
333
334$zerolength:
335	clr	v0
336$exception:
337	ret
338
339	.end __strncpy_from_user
340