clzdi2.S revision 341825
1/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
2 *
3 *               The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 * ===----------------------------------------------------------------------===
9 *
10 * This file implements count leading zeros for 64bit arguments.
11 *
12 * ===----------------------------------------------------------------------===
13 */
14#include "../assembly.h"
15
16	.syntax unified
17	.text
18	DEFINE_CODE_STATE
19
20	.p2align	2
21DEFINE_COMPILERRT_FUNCTION(__clzdi2)
22#ifdef __ARM_FEATURE_CLZ
23#ifdef __ARMEB__
24	cmp	r0, 0
25	itee ne
26	clzne	r0, r0
27	clzeq	r0, r1
28	addeq	r0, r0, 32
29#else
30	cmp	r1, 0
31	itee ne
32	clzne	r0, r1
33	clzeq	r0, r0
34	addeq	r0, r0, 32
35#endif
36	JMP(lr)
37#else
38	/* Assumption: n != 0 */
39
40	/*
41	 * r0: n
42	 * r1: upper half of n, overwritten after check
43	 * r1: count of leading zeros in n + 1
44	 * r2: scratch register for shifted r0
45	 */
46#ifdef __ARMEB__
47	cmp	r0, 0
48	moveq	r0, r1
49#else
50	cmp	r1, 0
51	movne	r0, r1
52#endif
53	movne	r1, 1
54	moveq	r1, 33
55
56	/*
57	 * Basic block:
58	 * if ((r0 >> SHIFT) == 0)
59	 *   r1 += SHIFT;
60	 * else
61	 *   r0 >>= SHIFT;
62	 * for descending powers of two as SHIFT.
63	 */
64#define BLOCK(shift) \
65	lsrs	r2, r0, shift; \
66	movne	r0, r2; \
67	addeq	r1, shift \
68
69	BLOCK(16)
70	BLOCK(8)
71	BLOCK(4)
72	BLOCK(2)
73
74	/*
75	 * The basic block invariants at this point are (r0 >> 2) == 0 and
76	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
77	 *
78	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
79	 * ---+----------------+----------------+------------+--------------
80	 * 1  | 1              | 0              | 0          | 1
81	 * 2  | 0              | 1              | -1         | 0
82	 * 3  | 0              | 1              | -1         | 0
83	 *
84	 * The r1's initial value of 1 compensates for the 1 here.
85	 */
86	sub	r0, r1, r0, lsr #1
87
88	JMP(lr)
89#endif // __ARM_FEATURE_CLZ
90END_COMPILERRT_FUNCTION(__clzdi2)
91
92NO_EXEC_STACK_DIRECTIVE
93
94