lshiftc.asm revision 1.1.1.1
1dnl  AMD64 mpn_lshiftc -- mpn left shift with complement.
2
3dnl  Copyright 2003, 2005, 2006, 2009 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or
8dnl  modify it under the terms of the GNU Lesser General Public License as
9dnl  published by the Free Software Foundation; either version 3 of the
10dnl  License, or (at your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful,
13dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
14dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15dnl  Lesser General Public License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C	     cycles/limb
24C K8,K9:	 2.75
25C K10:		 2.75
26C P4:		 ?
27C P6-15 (Core2): ?
28C P6-28 (Atom):	 ?
29
30
31C INPUT PARAMETERS
32define(`rp',	`%rdi')
33define(`up',	`%rsi')
34define(`n',	`%rdx')
35define(`cnt',	`%rcx')
36
37ASM_START()
38	TEXT
39	ALIGN(32)
40PROLOGUE(mpn_lshiftc)
41	neg	R32(%rcx)		C put rsh count in cl
42	mov	-8(up,n,8), %rax
43	shr	R8(%rcx), %rax		C function return value
44
45	neg	R32(%rcx)		C put lsh count in cl
46	lea	1(n), R32(%r8)
47	and	$3, R32(%r8)
48	je	L(rlx)			C jump for n = 3, 7, 11, ...
49
50	dec	R32(%r8)
51	jne	L(1)
52C	n = 4, 8, 12, ...
53	mov	-8(up,n,8), %r10
54	shl	R8(%rcx), %r10
55	neg	R32(%rcx)		C put rsh count in cl
56	mov	-16(up,n,8), %r8
57	shr	R8(%rcx), %r8
58	or	%r8, %r10
59	not	%r10
60	mov	%r10, -8(rp,n,8)
61	dec	n
62	jmp	L(rll)
63
64L(1):	dec	R32(%r8)
65	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
66C	n = 2, 6, 10, 16, ...
67	mov	-8(up,n,8), %r10
68	shl	R8(%rcx), %r10
69	neg	R32(%rcx)		C put rsh count in cl
70	mov	-16(up,n,8), %r8
71	shr	R8(%rcx), %r8
72	or	%r8, %r10
73	not	%r10
74	mov	%r10, -8(rp,n,8)
75	dec	n
76	neg	R32(%rcx)		C put lsh count in cl
77L(1x):
78	cmp	$1, n
79	je	L(ast)
80	mov	-8(up,n,8), %r10
81	shl	R8(%rcx), %r10
82	mov	-16(up,n,8), %r11
83	shl	R8(%rcx), %r11
84	neg	R32(%rcx)		C put rsh count in cl
85	mov	-16(up,n,8), %r8
86	mov	-24(up,n,8), %r9
87	shr	R8(%rcx), %r8
88	or	%r8, %r10
89	shr	R8(%rcx), %r9
90	or	%r9, %r11
91	not	%r10
92	not	%r11
93	mov	%r10, -8(rp,n,8)
94	mov	%r11, -16(rp,n,8)
95	sub	$2, n
96
97L(rll):	neg	R32(%rcx)		C put lsh count in cl
98L(rlx):	mov	-8(up,n,8), %r10
99	shl	R8(%rcx), %r10
100	mov	-16(up,n,8), %r11
101	shl	R8(%rcx), %r11
102
103	sub	$4, n			C				      4
104	jb	L(end)			C				      2
105	ALIGN(16)
106L(top):
107	C finish stuff from lsh block
108	neg	R32(%rcx)		C put rsh count in cl
109	mov	16(up,n,8), %r8
110	mov	8(up,n,8), %r9
111	shr	R8(%rcx), %r8
112	or	%r8, %r10
113	shr	R8(%rcx), %r9
114	or	%r9, %r11
115	not	%r10
116	not	%r11
117	mov	%r10, 24(rp,n,8)
118	mov	%r11, 16(rp,n,8)
119	C start two new rsh
120	mov	0(up,n,8), %r8
121	mov	-8(up,n,8), %r9
122	shr	R8(%rcx), %r8
123	shr	R8(%rcx), %r9
124
125	C finish stuff from rsh block
126	neg	R32(%rcx)		C put lsh count in cl
127	mov	8(up,n,8), %r10
128	mov	0(up,n,8), %r11
129	shl	R8(%rcx), %r10
130	or	%r10, %r8
131	shl	R8(%rcx), %r11
132	or	%r11, %r9
133	not	%r8
134	not	%r9
135	mov	%r8, 8(rp,n,8)
136	mov	%r9, 0(rp,n,8)
137	C start two new lsh
138	mov	-8(up,n,8), %r10
139	mov	-16(up,n,8), %r11
140	shl	R8(%rcx), %r10
141	shl	R8(%rcx), %r11
142
143	sub	$4, n
144	jae	L(top)			C				      2
145L(end):
146	neg	R32(%rcx)		C put rsh count in cl
147	mov	16(up,n,8), %r8
148	shr	R8(%rcx), %r8
149	or	%r8, %r10
150	mov	8(up,n,8), %r9
151	shr	R8(%rcx), %r9
152	or	%r9, %r11
153	not	%r10
154	not	%r11
155	mov	%r10, 24(rp,n,8)
156	mov	%r11, 16(rp,n,8)
157
158	neg	R32(%rcx)		C put lsh count in cl
159L(ast):	mov	(up), %r10
160	shl	R8(%rcx), %r10
161	not	%r10
162	mov	%r10, (rp)
163	ret
164EPILOGUE()
165