1dnl  x86 mpn_copyi -- copy limb vector, incrementing.
2
3dnl  Copyright 1999-2002 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C     cycles/limb  startup (approx)
35C P5	  1.0	      35
36C P6	  0.75	      45
37C K6	  1.0	      30
38C K7	  1.3	      65
39C P4	  1.0	     120
40C
41C (Startup time includes some function call overheads.)
42
43
44C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
45C
46C Copy src,size to dst,size, working from low to high addresses.
47C
48C The code here is very generic and can be expected to be reasonable on all
49C the x86 family.
50C
51C P6 -  An MMX based copy was tried, but was found to be slower than a rep
52C       movs in all cases.  The fastest MMX found was 0.8 cycles/limb (when
53C       fully aligned).  A rep movs seems to have a startup time of about 15
54C       cycles, but doing something special for small sizes could lead to a
55C       branch misprediction that would destroy any saving.  For now a plain
56C       rep movs seems ok.
57C
58C K62 - We used to have a big chunk of code doing an MMX copy at 0.56 c/l if
59C       aligned or a 1.0 rep movs if not.  But that seemed excessive since
60C       it only got an advantage half the time, and even then only showed it
61C       above 50 limbs or so.
62
63defframe(PARAM_SIZE,12)
64defframe(PARAM_SRC, 8)
65defframe(PARAM_DST, 4)
66deflit(`FRAME',0)
67
68	TEXT
69	ALIGN(32)
70
71	C eax	saved esi
72	C ebx
73	C ecx	counter
74	C edx	saved edi
75	C esi	src
76	C edi	dst
77	C ebp
78
79PROLOGUE(mpn_copyi)
80
81	movl	PARAM_SIZE, %ecx
82	movl	%esi, %eax
83
84	movl	PARAM_SRC, %esi
85	movl	%edi, %edx
86
87	movl	PARAM_DST, %edi
88
89	cld	C better safe than sorry, see mpn/x86/README
90
91	rep
92	movsl
93
94	movl	%eax, %esi
95	movl	%edx, %edi
96
97	ret
98
99EPILOGUE()
100