1dnl  X86-64 mpn_add_n, mpn_sub_n, optimised for Intel Atom.
2
3dnl  Copyright 2011, 2017 Free Software Foundation, Inc.
4
5dnl  Contributed to the GNU project by Marco Bodrato.  Ported to 64-bit by
6dnl  Torbj��rn Granlund.
7
8dnl  This file is part of the GNU MP Library.
9dnl
10dnl  The GNU MP Library is free software; you can redistribute it and/or modify
11dnl  it under the terms of either:
12dnl
13dnl    * the GNU Lesser General Public License as published by the Free
14dnl      Software Foundation; either version 3 of the License, or (at your
15dnl      option) any later version.
16dnl
17dnl  or
18dnl
19dnl    * the GNU General Public License as published by the Free Software
20dnl      Foundation; either version 2 of the License, or (at your option) any
21dnl      later version.
22dnl
23dnl  or both in parallel, as here.
24dnl
25dnl  The GNU MP Library is distributed in the hope that it will be useful, but
26dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28dnl  for more details.
29dnl
30dnl  You should have received copies of the GNU General Public License and the
31dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
32dnl  see https://www.gnu.org/licenses/.
33
34include(`../config.m4')
35
36C	    cycles/limb
37C AMD K8,K9	 2
38C AMD K10	 2
39C AMD bull	 2.34\2.63
40C AMD pile	 2.27\2.52
41C AMD steam
42C AMD excavator
43C AMD bobcat	 2.79
44C AMD jaguar	 2.78
45C Intel P4	11
46C Intel core2	 7.5
47C Intel NHM	 8.5
48C Intel SBR	 2.11
49C Intel IBR	 2.07
50C Intel HWL	 1.75
51C Intel BWL	 1.51
52C Intel SKL	 1.52
53C Intel atom	 3
54C Intel SLM	 4
55C VIA nano
56
57define(`rp',	`%rdi')	C rcx
58define(`up',	`%rsi')	C rdx
59define(`vp',	`%rdx')	C r8
60define(`n',	`%rcx')	C r9
61define(`cy',	`%r8')	C rsp+40    (mpn_add_nc and mpn_sub_nc)
62
63ifdef(`OPERATION_add_n', `
64  define(ADCSBB,    adc)
65  define(func_n,    mpn_add_n)
66  define(func_nc,   mpn_add_nc)')
67ifdef(`OPERATION_sub_n', `
68  define(ADCSBB,    sbb)
69  define(func_n,    mpn_sub_n)
70  define(func_nc,   mpn_sub_nc)')
71
72MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
73
74ABI_SUPPORT(DOS64)
75ABI_SUPPORT(STD64)
76
77ASM_START()
78	TEXT
79	ALIGN(16)
80PROLOGUE(func_n)
81	FUNC_ENTRY(4)
82	xor	cy, cy			C carry
83
84L(com):	shr	n			C n >> 1
85	jz	L(1)			C n == 1
86	jc	L(1m2)			C n % 2 == 1
87
88L(0m2):	shr	cy
89	mov	(up), %r10
90	lea	8(up), up
91	lea	8(vp), vp
92	lea	-8(rp), rp
93	jmp	L(mid)
94
95L(1):	shr	cy
96	mov	(up), %r9
97	jmp	L(end)
98
99L(1m2):	shr	cy
100	mov	(up), %r9
101
102	ALIGN(16)
103L(top):	ADCSBB	(vp), %r9
104	lea	16(up), up
105	mov	-8(up), %r10
106	lea	16(vp), vp
107	mov	%r9, (rp)
108L(mid):	ADCSBB	-8(vp), %r10
109	lea	16(rp), rp
110	dec	n
111	mov	(up), %r9
112	mov	%r10, -8(rp)
113	jnz	L(top)
114
115L(end):	ADCSBB	(vp), %r9
116	mov	$0, R32(%rax)
117	mov	%r9, (rp)
118	adc	R32(%rax), R32(%rax)
119	FUNC_EXIT()
120	ret
121EPILOGUE()
122
123PROLOGUE(func_nc)
124	FUNC_ENTRY(4)
125IFDOS(`	mov	56(%rsp), cy	')
126	jmp	L(com)
127EPILOGUE()
128ASM_END()
129