lshiftc.asm revision 1.1.1.1
1dnl  Intel Atom mpn_lshiftc -- mpn left shift with complement.
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or
10dnl  modify it under the terms of the GNU Lesser General Public License as
11dnl  published by the Free Software Foundation; either version 3 of the
12dnl  License, or (at your option) any later version.
13dnl
14dnl  The GNU MP Library is distributed in the hope that it will be useful,
15dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
16dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17dnl  Lesser General Public License for more details.
18dnl
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size,
25C			 unsigned cnt);
26
27C				cycles/limb
28C P5
29C P6 model 0-8,10-12
30C P6 model 9  (Banias)
31C P6 model 13 (Dothan)
32C P4 model 0  (Willamette)
33C P4 model 1  (?)
34C P4 model 2  (Northwood)
35C P4 model 3  (Prescott)
36C P4 model 4  (Nocona)
37C Intel Atom			 5.5
38C AMD K6
39C AMD K7
40C AMD K8
41C AMD K10
42
43defframe(PARAM_CNT, 16)
44defframe(PARAM_SIZE,12)
45defframe(PARAM_SRC,  8)
46defframe(PARAM_DST,  4)
47
48dnl  re-use parameter space
49define(SAVE_UP,`PARAM_CNT')
50define(VAR_COUNT,`PARAM_SIZE')
51define(SAVE_EBX,`PARAM_SRC')
52define(SAVE_EBP,`PARAM_DST')
53
54define(`rp',  `%edi')
55define(`up',  `%esi')
56define(`cnt',  `%ecx')
57
58ASM_START()
59	TEXT
60
61PROLOGUE(mpn_lshiftc)
62deflit(`FRAME',0)
63	mov	PARAM_CNT, cnt
64	mov	PARAM_SIZE, %edx
65	mov	up, SAVE_UP
66	mov	PARAM_SRC, up
67	push	rp			FRAME_pushl()
68	mov	PARAM_DST, rp
69
70	lea	-4(up,%edx,4), up
71	mov	%ebx, SAVE_EBX
72	lea	-4(rp,%edx,4), rp
73
74	shr	%edx
75	mov	(up), %eax
76	mov	%edx, VAR_COUNT
77	jnc	L(evn)
78
79	mov	%eax, %ebx
80	shl	%cl, %ebx
81	neg	cnt
82	shr	%cl, %eax
83	test	%edx, %edx
84	jnz	L(gt1)
85	not	%ebx
86	mov	%ebx, (rp)
87	jmp	L(quit)
88
89L(gt1):	mov	%ebp, SAVE_EBP
90	push	%eax
91	mov	-4(up), %eax
92	mov	%eax, %ebp
93	shr	%cl, %eax
94	jmp	L(lo1)
95
96L(evn):	mov	%ebp, SAVE_EBP
97	neg	cnt
98	mov	%eax, %ebp
99	mov	-4(up), %edx
100	shr	%cl, %eax
101	mov	%edx, %ebx
102	shr	%cl, %edx
103	neg	cnt
104	decl	VAR_COUNT
105	lea	4(rp), rp
106	lea	-4(up), up
107	jz	L(end)
108	push	%eax			FRAME_pushl()
109
110L(top):	shl	%cl, %ebp
111	or	%ebp, %edx
112	shl	%cl, %ebx
113	neg	cnt
114	not	%edx
115	mov	-4(up), %eax
116	mov	%eax, %ebp
117	mov	%edx, -4(rp)
118	shr	%cl, %eax
119	lea	-8(rp), rp
120L(lo1):	mov	-8(up), %edx
121	or	%ebx, %eax
122	mov	%edx, %ebx
123	shr	%cl, %edx
124	not	%eax
125	lea	-8(up), up
126	neg	cnt
127	mov	%eax, (rp)
128	decl	VAR_COUNT
129	jg	L(top)
130
131	pop	%eax			FRAME_popl()
132L(end):
133	shl	%cl, %ebp
134	shl	%cl, %ebx
135	or	%ebp, %edx
136	mov	SAVE_EBP, %ebp
137	not	%edx
138	not	%ebx
139	mov	%edx, -4(rp)
140	mov	%ebx, -8(rp)
141
142L(quit):
143	mov	SAVE_UP, up
144	mov	SAVE_EBX, %ebx
145	pop	rp			FRAME_popl()
146	ret
147EPILOGUE()
148ASM_END()
149