1/* Assembly functions for the Xtensa version of libgcc1.
2   Copyright (C) 2001,2002,2003, 2005 Free Software Foundation, Inc.
3   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12In addition to the permissions in the GNU General Public License, the
13Free Software Foundation gives you unlimited permission to link the
14compiled version of this file into combinations with other programs,
15and to distribute those combinations without any restriction coming
16from the use of this file.  (The General Public License restrictions
17do apply in other respects; for example, they cover modification of
18the file, and distribution when not linked into a combine
19executable.)
20
21GCC is distributed in the hope that it will be useful, but WITHOUT ANY
22WARRANTY; without even the implied warranty of MERCHANTABILITY or
23FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
24for more details.
25
26You should have received a copy of the GNU General Public License
27along with GCC; see the file COPYING.  If not, write to the Free
28Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2902110-1301, USA.  */
30
31#include "xtensa-config.h"
32
33# Note: These functions use a minimum stack frame size of 32.  This is
34# necessary for Xtensa configurations that only support a fixed register
35# window size of 8, where even leaf functions (such as these) need to
36# allocate space for a 4-word "extra save area".
37
38# Define macros for the ABS and ADDX* instructions to handle cases
39# where they are not included in the Xtensa processor configuration.
40
41	.macro	do_abs dst, src, tmp
42#if XCHAL_HAVE_ABS
43	abs	\dst, \src
44#else
45	neg	\tmp, \src
46	movgez	\tmp, \src, \src
47	mov	\dst, \tmp
48#endif
49	.endm
50
51	.macro	do_addx2 dst, as, at, tmp
52#if XCHAL_HAVE_ADDX
53	addx2	\dst, \as, \at
54#else
55	slli	\tmp, \as, 1
56	add	\dst, \tmp, \at
57#endif
58	.endm
59
60	.macro	do_addx4 dst, as, at, tmp
61#if XCHAL_HAVE_ADDX
62	addx4	\dst, \as, \at
63#else
64	slli	\tmp, \as, 2
65	add	\dst, \tmp, \at
66#endif
67	.endm
68
69	.macro	do_addx8 dst, as, at, tmp
70#if XCHAL_HAVE_ADDX
71	addx8	\dst, \as, \at
72#else
73	slli	\tmp, \as, 3
74	add	\dst, \tmp, \at
75#endif
76	.endm
77
78# Define macros for function entry and return, supporting either the
79# standard register windowed ABI or the non-windowed call0 ABI.  These
80# macros do not allocate any extra stack space, so they only work for
81# leaf functions that do not need to spill anything to the stack.
82
83	.macro abi_entry reg, size
84#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
85	entry \reg, \size
86#else
87	/* do nothing */
88#endif
89	.endm
90
91	.macro abi_return
92#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
93	retw
94#else
95	ret
96#endif
97	.endm
98
99
100#ifdef L_mulsi3
101	.align	4
102	.global	__mulsi3
103	.type	__mulsi3,@function
104__mulsi3:
105	abi_entry sp, 32
106
107#if XCHAL_HAVE_MUL16
108	or	a4, a2, a3
109	srai	a4, a4, 16
110	bnez	a4, .LMUL16
111	mul16u	a2, a2, a3
112	abi_return
113.LMUL16:
114	srai	a4, a2, 16
115	srai	a5, a3, 16
116	mul16u	a7, a4, a3
117	mul16u	a6, a5, a2
118	mul16u	a4, a2, a3
119	add	a7, a7, a6
120	slli	a7, a7, 16
121	add	a2, a7, a4
122
123#elif XCHAL_HAVE_MAC16
124	mul.aa.hl a2, a3
125	mula.aa.lh a2, a3
126	rsr	a5, ACCLO
127	umul.aa.ll a2, a3
128	rsr	a4, ACCLO
129	slli	a5, a5, 16
130	add	a2, a4, a5
131
132#else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
133
134	# Multiply one bit at a time, but unroll the loop 4x to better
135	# exploit the addx instructions and avoid overhead.
136	# Peel the first iteration to save a cycle on init.
137
138	# Avoid negative numbers.
139	xor	a5, a2, a3  # top bit is 1 iff one of the inputs is negative
140	do_abs	a3, a3, a6
141	do_abs	a2, a2, a6
142
143	# Swap so the second argument is smaller.
144	sub	a7, a2, a3
145	mov	a4, a3
146	movgez	a4, a2, a7  # a4 = max(a2, a3) 
147	movltz	a3, a2, a7  # a3 = min(a2, a3)
148
149	movi	a2, 0
150	extui	a6, a3, 0, 1
151	movnez	a2, a4, a6
152
153	do_addx2 a7, a4, a2, a7
154	extui	a6, a3, 1, 1
155	movnez	a2, a7, a6
156
157	do_addx4 a7, a4, a2, a7
158	extui	a6, a3, 2, 1
159	movnez	a2, a7, a6
160
161	do_addx8 a7, a4, a2, a7
162	extui	a6, a3, 3, 1
163	movnez	a2, a7, a6
164
165	bgeui	a3, 16, .Lmult_main_loop
166	neg	a3, a2
167	movltz	a2, a3, a5
168	abi_return
169
170	.align	4
171.Lmult_main_loop:
172	srli	a3, a3, 4
173	slli	a4, a4, 4
174
175	add	a7, a4, a2
176	extui	a6, a3, 0, 1
177	movnez	a2, a7, a6
178
179	do_addx2 a7, a4, a2, a7
180	extui	a6, a3, 1, 1
181	movnez	a2, a7, a6
182
183	do_addx4 a7, a4, a2, a7
184	extui	a6, a3, 2, 1
185	movnez	a2, a7, a6
186
187	do_addx8 a7, a4, a2, a7
188	extui	a6, a3, 3, 1
189	movnez	a2, a7, a6
190
191	bgeui	a3, 16, .Lmult_main_loop
192
193	neg	a3, a2
194	movltz	a2, a3, a5
195
196#endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
197
198	abi_return
199	.size	__mulsi3,.-__mulsi3
200
201#endif /* L_mulsi3 */
202
203
204# Define a macro for the NSAU (unsigned normalize shift amount)
205# instruction, which computes the number of leading zero bits,
206# to handle cases where it is not included in the Xtensa processor
207# configuration.
208
209	.macro	do_nsau cnt, val, tmp, a
210#if XCHAL_HAVE_NSA
211	nsau	\cnt, \val
212#else
213	mov	\a, \val
214	movi	\cnt, 0
215	extui	\tmp, \a, 16, 16
216	bnez	\tmp, 0f
217	movi	\cnt, 16
218	slli	\a, \a, 16
2190:	
220	extui	\tmp, \a, 24, 8
221	bnez	\tmp, 1f
222	addi	\cnt, \cnt, 8
223	slli	\a, \a, 8
2241:	
225	movi	\tmp, __nsau_data
226	extui	\a, \a, 24, 8
227	add	\tmp, \tmp, \a
228	l8ui	\tmp, \tmp, 0
229	add	\cnt, \cnt, \tmp
230#endif /* !XCHAL_HAVE_NSA */
231	.endm
232
233#ifdef L_nsau
234	.section .rodata
235	.align	4
236	.global	__nsau_data
237	.type	__nsau_data,@object
238__nsau_data:	
239#if !XCHAL_HAVE_NSA
240	.byte	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
241	.byte	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
242	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
243	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
244	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
245	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
246	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
247	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
248	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
249	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
250	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
251	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
252	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
253	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
254	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
255	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
256#endif /* !XCHAL_HAVE_NSA */
257	.size	__nsau_data,.-__nsau_data
258	.hidden	__nsau_data
259#endif /* L_nsau */
260
261
262#ifdef L_udivsi3
263	.align	4
264	.global	__udivsi3
265	.type	__udivsi3,@function
266__udivsi3:
267	abi_entry sp, 32
268	bltui	a3, 2, .Lle_one	# check if the divisor <= 1
269
270	mov	a6, a2		# keep dividend in a6
271	do_nsau	a5, a6, a2, a7	# dividend_shift = nsau(dividend)
272	do_nsau	a4, a3, a2, a7	# divisor_shift = nsau(divisor)
273	bgeu	a5, a4, .Lspecial
274
275	sub	a4, a4, a5	# count = divisor_shift - dividend_shift
276	ssl	a4
277	sll	a3, a3		# divisor <<= count
278	movi	a2, 0		# quotient = 0
279
280	# test-subtract-and-shift loop; one quotient bit on each iteration
281#if XCHAL_HAVE_LOOPS
282	loopnez	a4, .Lloopend
283#endif /* XCHAL_HAVE_LOOPS */
284.Lloop:
285	bltu	a6, a3, .Lzerobit
286	sub	a6, a6, a3
287	addi	a2, a2, 1
288.Lzerobit:
289	slli	a2, a2, 1
290	srli	a3, a3, 1
291#if !XCHAL_HAVE_LOOPS
292	addi	a4, a4, -1
293	bnez	a4, .Lloop
294#endif /* !XCHAL_HAVE_LOOPS */
295.Lloopend:
296
297	bltu	a6, a3, .Lreturn
298	addi	a2, a2, 1	# increment quotient if dividend >= divisor
299.Lreturn:
300	abi_return
301
302.Lle_one:
303	beqz	a3, .Lerror	# if divisor == 1, return the dividend
304	abi_return
305
306.Lspecial:
307	# return dividend >= divisor
308	bltu	a6, a3, .Lreturn0
309	movi	a2, 1
310	abi_return
311
312.Lerror:
313	# just return 0; could throw an exception
314
315.Lreturn0:
316	movi	a2, 0
317	abi_return
318	.size	__udivsi3,.-__udivsi3
319
320#endif /* L_udivsi3 */
321
322
323#ifdef L_divsi3
324	.align	4
325	.global	__divsi3
326	.type	__divsi3,@function
327__divsi3:
328	abi_entry sp, 32
329	xor	a7, a2, a3	# sign = dividend ^ divisor
330	do_abs	a6, a2, a4	# udividend = abs(dividend)
331	do_abs	a3, a3, a4	# udivisor = abs(divisor)
332	bltui	a3, 2, .Lle_one	# check if udivisor <= 1
333	do_nsau	a5, a6, a2, a8	# udividend_shift = nsau(udividend)
334	do_nsau	a4, a3, a2, a8	# udivisor_shift = nsau(udivisor)
335	bgeu	a5, a4, .Lspecial
336
337	sub	a4, a4, a5	# count = udivisor_shift - udividend_shift
338	ssl	a4
339	sll	a3, a3		# udivisor <<= count
340	movi	a2, 0		# quotient = 0
341
342	# test-subtract-and-shift loop; one quotient bit on each iteration
343#if XCHAL_HAVE_LOOPS
344	loopnez	a4, .Lloopend
345#endif /* XCHAL_HAVE_LOOPS */
346.Lloop:
347	bltu	a6, a3, .Lzerobit
348	sub	a6, a6, a3
349	addi	a2, a2, 1
350.Lzerobit:
351	slli	a2, a2, 1
352	srli	a3, a3, 1
353#if !XCHAL_HAVE_LOOPS
354	addi	a4, a4, -1
355	bnez	a4, .Lloop
356#endif /* !XCHAL_HAVE_LOOPS */
357.Lloopend:
358
359	bltu	a6, a3, .Lreturn
360	addi	a2, a2, 1	# increment quotient if udividend >= udivisor
361.Lreturn:
362	neg	a5, a2
363	movltz	a2, a5, a7	# return (sign < 0) ? -quotient : quotient
364	abi_return
365
366.Lle_one:
367	beqz	a3, .Lerror
368	neg	a2, a6		# if udivisor == 1, then return...
369	movgez	a2, a6, a7	# (sign < 0) ? -udividend : udividend
370	abi_return
371
372.Lspecial:
373	bltu	a6, a3, .Lreturn0 #  if dividend < divisor, return 0
374	movi	a2, 1
375	movi	a4, -1
376	movltz	a2, a4, a7	# else return (sign < 0) ? -1 :	 1 
377	abi_return
378
379.Lerror:
380	# just return 0; could throw an exception
381
382.Lreturn0:
383	movi	a2, 0
384	abi_return
385	.size	__divsi3,.-__divsi3
386
387#endif /* L_divsi3 */
388
389
390#ifdef L_umodsi3
391	.align	4
392	.global	__umodsi3
393	.type	__umodsi3,@function
394__umodsi3:
395	abi_entry sp, 32
396	bltui	a3, 2, .Lle_one	# check if the divisor is <= 1
397
398	do_nsau	a5, a2, a6, a7	# dividend_shift = nsau(dividend)
399	do_nsau	a4, a3, a6, a7	# divisor_shift = nsau(divisor)
400	bgeu	a5, a4, .Lspecial
401
402	sub	a4, a4, a5	# count = divisor_shift - dividend_shift
403	ssl	a4
404	sll	a3, a3		# divisor <<= count
405
406	# test-subtract-and-shift loop
407#if XCHAL_HAVE_LOOPS
408	loopnez	a4, .Lloopend
409#endif /* XCHAL_HAVE_LOOPS */
410.Lloop:
411	bltu	a2, a3, .Lzerobit
412	sub	a2, a2, a3
413.Lzerobit:
414	srli	a3, a3, 1
415#if !XCHAL_HAVE_LOOPS
416	addi	a4, a4, -1
417	bnez	a4, .Lloop
418#endif /* !XCHAL_HAVE_LOOPS */
419.Lloopend:
420
421.Lspecial:
422	bltu	a2, a3, .Lreturn
423	sub	a2, a2, a3	# subtract once more if dividend >= divisor
424.Lreturn:
425	abi_return
426
427.Lle_one:
428	# the divisor is either 0 or 1, so just return 0.
429	# someday we may want to throw an exception if the divisor is 0.
430	movi	a2, 0
431	abi_return
432	.size	__umodsi3,.-__umodsi3
433
434#endif /* L_umodsi3 */
435
436
437#ifdef L_modsi3
438	.align	4
439	.global	__modsi3
440	.type	__modsi3,@function
441__modsi3:
442	abi_entry sp, 32
443	mov	a7, a2		# save original (signed) dividend
444	do_abs	a2, a2, a4	# udividend = abs(dividend)
445	do_abs	a3, a3, a4	# udivisor = abs(divisor)
446	bltui	a3, 2, .Lle_one	# check if udivisor <= 1
447	do_nsau	a5, a2, a6, a8	# udividend_shift = nsau(udividend)
448	do_nsau	a4, a3, a6, a8	# udivisor_shift = nsau(udivisor)
449	bgeu	a5, a4, .Lspecial
450
451	sub	a4, a4, a5	# count = udivisor_shift - udividend_shift
452	ssl	a4
453	sll	a3, a3		# udivisor <<= count
454
455	# test-subtract-and-shift loop
456#if XCHAL_HAVE_LOOPS
457	loopnez	a4, .Lloopend
458#endif /* XCHAL_HAVE_LOOPS */
459.Lloop:
460	bltu	a2, a3, .Lzerobit
461	sub	a2, a2, a3
462.Lzerobit:
463	srli	a3, a3, 1
464#if !XCHAL_HAVE_LOOPS
465	addi	a4, a4, -1
466	bnez	a4, .Lloop
467#endif /* !XCHAL_HAVE_LOOPS */
468.Lloopend:
469
470.Lspecial:
471	bltu	a2, a3, .Lreturn
472	sub	a2, a2, a3	# subtract once more if udividend >= udivisor
473.Lreturn:
474	bgez	a7, .Lpositive
475	neg	a2, a2		# if (dividend < 0), return -udividend
476.Lpositive:	
477	abi_return
478
479.Lle_one:
480	# udivisor is either 0 or 1, so just return 0.
481	# someday we may want to throw an exception if udivisor is 0.
482	movi	a2, 0
483	abi_return
484	.size	__modsi3,.-__modsi3
485
486#endif /* L_modsi3 */
487