1/*-
2* The white paper of AES-NI instructions can be downloaded from:
3 *   http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
4 *
5 * Copyright (C) 2008-2010, Intel Corporation
6 *    Author: Huang Ying <ying.huang@intel.com>
7 *            Vinodh Gopal <vinodh.gopal@intel.com>
8 *            Kahraman Akdemir
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above copyright
15 *   notice, this list of conditions and the following disclaimer.
16 *
17 * - Redistributions in binary form must reproduce the above copyright
18 *   notice, this list of conditions and the following disclaimer in the
19 *   documentation and/or other materials provided with the
20 *   distribution.
21 *
22 * - Neither the name of Intel Corporation nor the names of its
23 *   contributors may be used to endorse or promote products
24 *   derived from this software without specific prior written
25 *   permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
31 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40#include <machine/asmacros.h>
41
42	.text
43
44ENTRY(_key_expansion_128)
45_key_expansion_256a:
46	.cfi_startproc
47	pshufd	$0b11111111,%xmm1,%xmm1
48	shufps	$0b00010000,%xmm0,%xmm4
49	pxor	%xmm4,%xmm0
50	shufps	$0b10001100,%xmm0,%xmm4
51	pxor	%xmm4,%xmm0
52	pxor	%xmm1,%xmm0
53	movaps	%xmm0,(%edx)
54	addl	$0x10,%edx
55	retl
56	.cfi_endproc
57END(_key_expansion_128)
58
59ENTRY(_key_expansion_192a)
60	.cfi_startproc
61	pshufd	$0b01010101,%xmm1,%xmm1
62	shufps	$0b00010000,%xmm0,%xmm4
63	pxor	%xmm4,%xmm0
64	shufps	$0b10001100,%xmm0,%xmm4
65	pxor	%xmm4,%xmm0
66	pxor	%xmm1,%xmm0
67	movaps	%xmm2,%xmm5
68	movaps	%xmm2,%xmm6
69	pslldq	$4,%xmm5
70	pshufd	$0b11111111,%xmm0,%xmm3
71	pxor	%xmm3,%xmm2
72	pxor	%xmm5,%xmm2
73	movaps	%xmm0,%xmm1
74	shufps	$0b01000100,%xmm0,%xmm6
75	movaps	%xmm6,(%edx)
76	shufps	$0b01001110,%xmm2,%xmm1
77	movaps	%xmm1,0x10(%edx)
78	addl	$0x20,%edx
79	retl
80	.cfi_endproc
81END(_key_expansion_192a)
82
83ENTRY(_key_expansion_192b)
84	.cfi_startproc
85	pshufd	$0b01010101,%xmm1,%xmm1
86	shufps	$0b00010000,%xmm0,%xmm4
87	pxor	%xmm4,%xmm0
88	shufps	$0b10001100,%xmm0,%xmm4
89	pxor	%xmm4,%xmm0
90	pxor	%xmm1,%xmm0
91	movaps	%xmm2,%xmm5
92	pslldq	$4,%xmm5
93	pshufd	$0b11111111,%xmm0,%xmm3
94	pxor	%xmm3,%xmm2
95	pxor	%xmm5,%xmm2
96	movaps	%xmm0,(%edx)
97	addl	$0x10,%edx
98	retl
99	.cfi_endproc
100END(_key_expansion_192b)
101
102ENTRY(_key_expansion_256b)
103	.cfi_startproc
104	pshufd	$0b10101010,%xmm1,%xmm1
105	shufps	$0b00010000,%xmm2,%xmm4
106	pxor	%xmm4,%xmm2
107	shufps	$0b10001100,%xmm2,%xmm4
108	pxor	%xmm4,%xmm2
109	pxor	%xmm1,%xmm2
110	movaps	%xmm2,(%edx)
111	addl	$0x10,%edx
112	retl
113	.cfi_endproc
114END(_key_expansion_256b)
115
116ENTRY(aesni_set_enckey)
117	.cfi_startproc
118	pushl	%ebp
119	.cfi_adjust_cfa_offset 4
120	movl	%esp,%ebp
121	movl	8(%ebp),%ecx
122	movl	12(%ebp),%edx
123	movups	(%ecx),%xmm0		# user key (first 16 bytes)
124	movaps	%xmm0,(%edx)
125	addl	$0x10,%edx		# key addr
126	pxor	%xmm4,%xmm4		# xmm4 is assumed 0 in _key_expansion_x
127	cmpl	$12,16(%ebp)		# rounds
128	jb	.Lenc_key128
129	je	.Lenc_key192
130	movups	0x10(%ecx),%xmm2	# other user key
131	movaps	%xmm2,(%edx)
132	addl	$0x10,%edx
133//	aeskeygenassist $0x1,%xmm2,%xmm1	# round 1
134	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x01
135	call	_key_expansion_256a
136//	aeskeygenassist $0x1,%xmm0,%xmm1
137	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x01
138	call	_key_expansion_256b
139//	aeskeygenassist $0x2,%xmm2,%xmm1	# round 2
140	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x02
141	call	_key_expansion_256a
142//	aeskeygenassist $0x2,%xmm0,%xmm1
143	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x02
144	call	_key_expansion_256b
145//	aeskeygenassist $0x4,%xmm2,%xmm1	# round 3
146	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x04
147	call	_key_expansion_256a
148//	aeskeygenassist $0x4,%xmm0,%xmm1
149	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x04
150	call	_key_expansion_256b
151//	aeskeygenassist $0x8,%xmm2,%xmm1	# round 4
152	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x08
153	call	_key_expansion_256a
154//	aeskeygenassist $0x8,%xmm0,%xmm1
155	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x08
156	call	_key_expansion_256b
157//	aeskeygenassist $0x10,%xmm2,%xmm1	# round 5
158	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x10
159	call	_key_expansion_256a
160//	aeskeygenassist $0x10,%xmm0,%xmm1
161	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x10
162	call	_key_expansion_256b
163//	aeskeygenassist $0x20,%xmm2,%xmm1	# round 6
164	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x20
165	call	_key_expansion_256a
166//	aeskeygenassist $0x20,%xmm0,%xmm1
167	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x20
168	call	_key_expansion_256b
169//	aeskeygenassist $0x40,%xmm2,%xmm1	# round 7
170	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x40
171	call	_key_expansion_256a
172	.cfi_adjust_cfa_offset -4
173	leave
174	retl
175.Lenc_key192:
176	movq	0x10(%ecx),%xmm2		# other user key
177//	aeskeygenassist $0x1,%xmm2,%xmm1	# round 1
178	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x01
179	call	_key_expansion_192a
180//	aeskeygenassist $0x2,%xmm2,%xmm1	# round 2
181	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x02
182	call	_key_expansion_192b
183//	aeskeygenassist $0x4,%xmm2,%xmm1	# round 3
184	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x04
185	call	_key_expansion_192a
186//	aeskeygenassist $0x8,%xmm2,%xmm1	# round 4
187	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x08
188	call	_key_expansion_192b
189//	aeskeygenassist $0x10,%xmm2,%xmm1	# round 5
190	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x10
191	call	_key_expansion_192a
192//	aeskeygenassist $0x20,%xmm2,%xmm1	# round 6
193	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x20
194	call	_key_expansion_192b
195//	aeskeygenassist $0x40,%xmm2,%xmm1	# round 7
196	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x40
197	call	_key_expansion_192a
198//	aeskeygenassist $0x80,%xmm2,%xmm1	# round 8
199	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x80
200	call	_key_expansion_192b
201	leave
202	.cfi_adjust_cfa_offset -4
203	retl
204.Lenc_key128:
205//	aeskeygenassist $0x1,%xmm0,%xmm1	# round 1
206	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x01
207	call	_key_expansion_128
208//	aeskeygenassist $0x2,%xmm0,%xmm1	# round 2
209	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x02
210	call	_key_expansion_128
211//	aeskeygenassist $0x4,%xmm0,%xmm1	# round 3
212	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x04
213	call	_key_expansion_128
214//	aeskeygenassist $0x8,%xmm0,%xmm1	# round 4
215	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x08
216	call	_key_expansion_128
217//	aeskeygenassist $0x10,%xmm0,%xmm1	# round 5
218	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x10
219	call	_key_expansion_128
220//	aeskeygenassist $0x20,%xmm0,%xmm1	# round 6
221	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x20
222	call	_key_expansion_128
223//	aeskeygenassist $0x40,%xmm0,%xmm1	# round 7
224	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x40
225	call	_key_expansion_128
226//	aeskeygenassist $0x80,%xmm0,%xmm1	# round 8
227	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x80
228	call	_key_expansion_128
229//	aeskeygenassist $0x1b,%xmm0,%xmm1	# round 9
230	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x1b
231	call	_key_expansion_128
232//	aeskeygenassist $0x36,%xmm0,%xmm1	# round 10
233	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x36
234	call	_key_expansion_128
235	leave
236	.cfi_adjust_cfa_offset -4
237	retl
238	.cfi_endproc
239END(aesni_set_enckey)
240
241ENTRY(aesni_set_deckey)
242	.cfi_startproc
243	pushl	%ebp
244	.cfi_adjust_cfa_offset 4
245	movl	%esp,%ebp
246	movl	16(%ebp),%eax	/* rounds */
247	movl	%eax,%ecx
248	shll	$4,%ecx
249	addl	8(%ebp),%ecx	/* encrypt_schedule last quad */
250	movl	12(%ebp),%edx	/* decrypt_schedule */
251	movdqa	(%ecx),%xmm0
252	movdqa	%xmm0,(%edx)
253	decl	%eax
2541:
255	addl	$0x10,%edx
256	subl	$0x10,%ecx
257//	aesimc	(%ecx),%xmm1
258	.byte	0x66,0x0f,0x38,0xdb,0x09
259	movdqa	%xmm1,(%edx)
260	decl	%eax
261	jne	1b
262
263	addl	$0x10,%edx
264	subl	$0x10,%ecx
265	movdqa	(%ecx),%xmm0
266	movdqa	%xmm0,(%edx)
267	leave
268	.cfi_adjust_cfa_offset -4
269	retl
270	.cfi_endproc
271END(aesni_set_deckey)
272
273	.ident	"$FreeBSD$"
274