1/* AMD64 submul_1 -- Multiply a limb vector with a limb and add
2 *		      the result to a second limb vector.
3 *
4 *      Copyright (C) 1992, 1994, 1998,
5 *                    2001, 2002, 2006 Free Software Foundation, Inc.
6 *
7 * This file is part of Libgcrypt.
8 *
9 * Libgcrypt is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as
11 * published by the Free Software Foundation; either version 2.1 of
12 * the License, or (at your option) any later version.
13 *
14 * Libgcrypt is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
22 *
23 * Note: This code is heavily based on the GNU MP Library.
24 *	 Actually it's the same code with only minor changes in the
25 *	 way the data is stored; this is to support the abstraction
26 *	 of an optional secure memory allocation which may be used
27 *	 to avoid revealing of sensitive data due to paging etc.
28 */
29
30
31#include "sysdep.h"
32#include "asm-syntax.h"
33
34
35/*******************
36 * mpi_limb_t
37 * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,   (rdi)
38 *		     mpi_ptr_t s1_ptr,	     (rsi)
39 *		     mpi_size_t s1_size,     (rdx)
40 *		     mpi_limb_t s2_limb)     (rcx)
41 */
42	TEXT
43	GLOBL	C_SYMBOL_NAME(_gcry_mpih_submul_1)
44C_SYMBOL_NAME(_gcry_mpih_submul_1:)
45
46	movq	%rdx, %r11
47	leaq	(%rsi,%r11,8), %rsi
48	leaq	(%rdi,%r11,8), %rdi
49	negq	%r11
50	xorl	%r8d, %r8d
51
52	ALIGN(3)			/* minimal alignment for claimed speed */
53.Loop:	movq	(%rsi,%r11,8), %rax
54	movq	(%rdi,%r11,8), %r10
55	mulq	%rcx
56	subq	%r8, %r10
57	movl	$0, %r8d
58	adcl	%r8d, %r8d
59	subq	%rax, %r10
60	adcq	%rdx, %r8
61	movq	%r10, (%rdi,%r11,8)
62	incq	%r11
63	jne	.Loop
64
65	movq	%r8, %rax
66	ret
67