1#ifndef __ASSEMBLER__
2# define __ASSEMBLER__ 1
3#endif
4#include "crypto/sparc_arch.h"
5
6#ifdef  __arch64__
7.register	%g2,#scratch
8.register	%g3,#scratch
9#endif
10
11.section	".text",#alloc,#execinstr
12
13.align	64
14rem_4bit:
15	.long	0,0,471859200,0,943718400,0,610271232,0
16	.long	1887436800,0,1822425088,0,1220542464,0,1423966208,0
17	.long	3774873600,0,4246732800,0,3644850176,0,3311403008,0
18	.long	2441084928,0,2376073216,0,2847932416,0,3051356160,0
19.type	rem_4bit,#object
20.size	rem_4bit,(.-rem_4bit)
21
22.globl	gcm_ghash_4bit
23.align	32
24gcm_ghash_4bit:
25	save	%sp,-STACK_FRAME,%sp
26	ldub	[%i2+15],%l1
27	ldub	[%i0+15],%l2
28	ldub	[%i0+14],%l3
29	add	%i3,%i2,%i3
30	add	%i1,8,%l6
31
321:	call	.+8
33	add	%o7,rem_4bit-1b,%l4
34
35.Louter:
36	xor	%l2,%l1,%l1
37	and	%l1,0xf0,%l0
38	and	%l1,0x0f,%l1
39	sll	%l1,4,%l1
40	ldx	[%l6+%l1],%o1
41	ldx	[%i1+%l1],%o0
42
43	ldub	[%i2+14],%l1
44
45	ldx	[%l6+%l0],%o3
46	and	%o1,0xf,%l5
47	ldx	[%i1+%l0],%o2
48	sll	%l5,3,%l5
49	ldx	[%l4+%l5],%o4
50	srlx	%o1,4,%o1
51	mov	13,%l7
52	sllx	%o0,60,%o5
53	xor	%o3,%o1,%o1
54	srlx	%o0,4,%o0
55	xor	%o1,%o5,%o1
56
57	xor	%l3,%l1,%l1
58	and	%o1,0xf,%l5
59	and	%l1,0xf0,%l0
60	and	%l1,0x0f,%l1
61	ba	.Lghash_inner
62	sll	%l1,4,%l1
63.align	32
64.Lghash_inner:
65	ldx	[%l6+%l1],%o3
66	sll	%l5,3,%l5
67	xor	%o2,%o0,%o0
68	ldx	[%i1+%l1],%o2
69	srlx	%o1,4,%o1
70	xor	%o4,%o0,%o0
71	ldx	[%l4+%l5],%o4
72	sllx	%o0,60,%o5
73	xor	%o3,%o1,%o1
74	ldub	[%i2+%l7],%l1
75	srlx	%o0,4,%o0
76	xor	%o1,%o5,%o1
77	ldub	[%i0+%l7],%l3
78	xor	%o2,%o0,%o0
79	and	%o1,0xf,%l5
80
81	ldx	[%l6+%l0],%o3
82	sll	%l5,3,%l5
83	xor	%o4,%o0,%o0
84	ldx	[%i1+%l0],%o2
85	srlx	%o1,4,%o1
86	ldx	[%l4+%l5],%o4
87	sllx	%o0,60,%o5
88	xor	%l3,%l1,%l1
89	srlx	%o0,4,%o0
90	and	%l1,0xf0,%l0
91	addcc	%l7,-1,%l7
92	xor	%o1,%o5,%o1
93	and	%l1,0x0f,%l1
94	xor	%o3,%o1,%o1
95	sll	%l1,4,%l1
96	blu	.Lghash_inner
97	and	%o1,0xf,%l5
98
99	ldx	[%l6+%l1],%o3
100	sll	%l5,3,%l5
101	xor	%o2,%o0,%o0
102	ldx	[%i1+%l1],%o2
103	srlx	%o1,4,%o1
104	xor	%o4,%o0,%o0
105	ldx	[%l4+%l5],%o4
106	sllx	%o0,60,%o5
107	xor	%o3,%o1,%o1
108	srlx	%o0,4,%o0
109	xor	%o1,%o5,%o1
110	xor	%o2,%o0,%o0
111
112	add	%i2,16,%i2
113	cmp	%i2,%i3
114	be,pn	SIZE_T_CC,.Ldone
115	and	%o1,0xf,%l5
116
117	ldx	[%l6+%l0],%o3
118	sll	%l5,3,%l5
119	xor	%o4,%o0,%o0
120	ldx	[%i1+%l0],%o2
121	srlx	%o1,4,%o1
122	ldx	[%l4+%l5],%o4
123	sllx	%o0,60,%o5
124	xor	%o3,%o1,%o1
125	ldub	[%i2+15],%l1
126	srlx	%o0,4,%o0
127	xor	%o1,%o5,%o1
128	xor	%o2,%o0,%o0
129	stx	%o1,[%i0+8]
130	xor	%o4,%o0,%o0
131	stx	%o0,[%i0]
132	srl	%o1,8,%l3
133	and	%o1,0xff,%l2
134	ba	.Louter
135	and	%l3,0xff,%l3
136.align	32
137.Ldone:
138	ldx	[%l6+%l0],%o3
139	sll	%l5,3,%l5
140	xor	%o4,%o0,%o0
141	ldx	[%i1+%l0],%o2
142	srlx	%o1,4,%o1
143	ldx	[%l4+%l5],%o4
144	sllx	%o0,60,%o5
145	xor	%o3,%o1,%o1
146	srlx	%o0,4,%o0
147	xor	%o1,%o5,%o1
148	xor	%o2,%o0,%o0
149	stx	%o1,[%i0+8]
150	xor	%o4,%o0,%o0
151	stx	%o0,[%i0]
152
153	ret
154	restore
155.type	gcm_ghash_4bit,#function
156.size	gcm_ghash_4bit,(.-gcm_ghash_4bit)
157.globl	gcm_gmult_4bit
158.align	32
159gcm_gmult_4bit:
160	save	%sp,-STACK_FRAME,%sp
161	ldub	[%i0+15],%l1
162	add	%i1,8,%l6
163
1641:	call	.+8
165	add	%o7,rem_4bit-1b,%l4
166
167	and	%l1,0xf0,%l0
168	and	%l1,0x0f,%l1
169	sll	%l1,4,%l1
170	ldx	[%l6+%l1],%o1
171	ldx	[%i1+%l1],%o0
172
173	ldub	[%i0+14],%l1
174
175	ldx	[%l6+%l0],%o3
176	and	%o1,0xf,%l5
177	ldx	[%i1+%l0],%o2
178	sll	%l5,3,%l5
179	ldx	[%l4+%l5],%o4
180	srlx	%o1,4,%o1
181	mov	13,%l7
182	sllx	%o0,60,%o5
183	xor	%o3,%o1,%o1
184	srlx	%o0,4,%o0
185	xor	%o1,%o5,%o1
186
187	and	%o1,0xf,%l5
188	and	%l1,0xf0,%l0
189	and	%l1,0x0f,%l1
190	ba	.Lgmult_inner
191	sll	%l1,4,%l1
192.align	32
193.Lgmult_inner:
194	ldx	[%l6+%l1],%o3
195	sll	%l5,3,%l5
196	xor	%o2,%o0,%o0
197	ldx	[%i1+%l1],%o2
198	srlx	%o1,4,%o1
199	xor	%o4,%o0,%o0
200	ldx	[%l4+%l5],%o4
201	sllx	%o0,60,%o5
202	xor	%o3,%o1,%o1
203	ldub	[%i0+%l7],%l1
204	srlx	%o0,4,%o0
205	xor	%o1,%o5,%o1
206	xor	%o2,%o0,%o0
207	and	%o1,0xf,%l5
208
209	ldx	[%l6+%l0],%o3
210	sll	%l5,3,%l5
211	xor	%o4,%o0,%o0
212	ldx	[%i1+%l0],%o2
213	srlx	%o1,4,%o1
214	ldx	[%l4+%l5],%o4
215	sllx	%o0,60,%o5
216	srlx	%o0,4,%o0
217	and	%l1,0xf0,%l0
218	addcc	%l7,-1,%l7
219	xor	%o1,%o5,%o1
220	and	%l1,0x0f,%l1
221	xor	%o3,%o1,%o1
222	sll	%l1,4,%l1
223	blu	.Lgmult_inner
224	and	%o1,0xf,%l5
225
226	ldx	[%l6+%l1],%o3
227	sll	%l5,3,%l5
228	xor	%o2,%o0,%o0
229	ldx	[%i1+%l1],%o2
230	srlx	%o1,4,%o1
231	xor	%o4,%o0,%o0
232	ldx	[%l4+%l5],%o4
233	sllx	%o0,60,%o5
234	xor	%o3,%o1,%o1
235	srlx	%o0,4,%o0
236	xor	%o1,%o5,%o1
237	xor	%o2,%o0,%o0
238	and	%o1,0xf,%l5
239
240	ldx	[%l6+%l0],%o3
241	sll	%l5,3,%l5
242	xor	%o4,%o0,%o0
243	ldx	[%i1+%l0],%o2
244	srlx	%o1,4,%o1
245	ldx	[%l4+%l5],%o4
246	sllx	%o0,60,%o5
247	xor	%o3,%o1,%o1
248	srlx	%o0,4,%o0
249	xor	%o1,%o5,%o1
250	xor	%o2,%o0,%o0
251	stx	%o1,[%i0+8]
252	xor	%o4,%o0,%o0
253	stx	%o0,[%i0]
254
255	ret
256	restore
257.type	gcm_gmult_4bit,#function
258.size	gcm_gmult_4bit,(.-gcm_gmult_4bit)
259.globl	gcm_init_vis3
260.align	32
261gcm_init_vis3:
262	save	%sp,-STACK_FRAME,%sp
263
264	ldx	[%i1+0],%o2
265	ldx	[%i1+8],%o1
266	mov	0xE1,%o4
267	mov	1,%o3
268	sllx	%o4,57,%o4
269	srax	%o2,63,%g1		! broadcast carry
270	addcc	%o1,%o1,%o1		! H<<=1
271	.word	0x95b2822a !addxc	%o2,%o2,%o2
272	and	%g1,%o3,%o3
273	and	%g1,%o4,%o4
274	xor	%o3,%o1,%o1
275	xor	%o4,%o2,%o2
276	stx	%o1,[%i0+8]		! save twisted H
277	stx	%o2,[%i0+0]
278
279	sethi	%hi(0xA0406080),%g5
280	sethi	%hi(0x20C0E000),%l0
281	or	%g5,%lo(0xA0406080),%g5
282	or	%l0,%lo(0x20C0E000),%l0
283	sllx	%g5,32,%g5
284	or	%l0,%g5,%g5		! (0xE0��i)&0xff=0xA040608020C0E000
285	stx	%g5,[%i0+16]
286
287	ret
288	restore
289.type	gcm_init_vis3,#function
290.size	gcm_init_vis3,.-gcm_init_vis3
291
292.globl	gcm_gmult_vis3
293.align	32
294gcm_gmult_vis3:
295	save	%sp,-STACK_FRAME,%sp
296
297	ldx	[%i0+8],%o3		! load Xi
298	ldx	[%i0+0],%o4
299	ldx	[%i1+8],%o1	! load twisted H
300	ldx	[%i1+0],%o2
301
302	mov	0xE1,%l7
303	sllx	%l7,57,%o5		! 57 is not a typo
304	ldx	[%i1+16],%g5		! (0xE0��i)&0xff=0xA040608020C0E000
305
306	xor	%o2,%o1,%o0		! Karatsuba pre-processing
307	.word	0x83b2e2a9 !xmulx	%o3,%o1,%g1
308	xor	%o3,%o4,%g3		! Karatsuba pre-processing
309	.word	0x85b0e2a8 !xmulx	%g3,%o0,%g2
310	.word	0x97b2e2c9 !xmulxhi	%o3,%o1,%o3
311	.word	0x87b0e2c8 !xmulxhi	%g3,%o0,%g3
312	.word	0x89b322ca !xmulxhi	%o4,%o2,%g4
313	.word	0x99b322aa !xmulx	%o4,%o2,%o4
314
315	sll	%g1,3,%o7
316	srlx	%g5,%o7,%o7		! ��0xE0 [implicit &(7<<3)]
317	xor	%g1,%o7,%o7
318	sllx	%o7,57,%o7		! (%g1��0xE1)<<1<<56 [implicit &0x7f]
319
320	xor	%g1,%g2,%g2		! Karatsuba post-processing
321	xor	%o3,%g3,%g3
322	 xor	%o7,%o3,%o3		! real destination is %g2
323	xor	%g4,%g3,%g3
324	xor	%o3,%g2,%g2
325	xor	%o4,%g3,%g3
326	xor	%o4,%g2,%g2
327
328	.word	0x97b062cd !xmulxhi	%g1,%o5,%o3		! ��0xE1<<1<<56
329	 xor	%g1,%g3,%g3
330	.word	0x83b0a2ad !xmulx	%g2,%o5,%g1
331	 xor	%g2,%g4,%g4
332	.word	0x85b0a2cd !xmulxhi	%g2,%o5,%g2
333
334	xor	%o3,%g3,%g3
335	xor	%g1,%g3,%g3
336	xor	%g2,%g4,%g4
337
338	stx	%g3,[%i0+8]		! save Xi
339	stx	%g4,[%i0+0]
340
341	ret
342	restore
343.type	gcm_gmult_vis3,#function
344.size	gcm_gmult_vis3,.-gcm_gmult_vis3
345
346.globl	gcm_ghash_vis3
347.align	32
348gcm_ghash_vis3:
349	save	%sp,-STACK_FRAME,%sp
350	nop
351	srln	%i3,0,%i3		! needed on v8+, "nop" on v9
352
353	ldx	[%i0+8],%g3		! load Xi
354	ldx	[%i0+0],%g4
355	ldx	[%i1+8],%o1	! load twisted H
356	ldx	[%i1+0],%o2
357
358	mov	0xE1,%l7
359	sllx	%l7,57,%o5		! 57 is not a typo
360	ldx	[%i1+16],%g5		! (0xE0��i)&0xff=0xA040608020C0E000
361
362	and	%i2,7,%l0
363	andn	%i2,7,%i2
364	sll	%l0,3,%l0
365	prefetch [%i2+63], 20
366	sub	%g0,%l0,%l1
367
368	xor	%o2,%o1,%o0		! Karatsuba pre-processing
369.Loop:
370	ldx	[%i2+8],%o3
371	brz,pt	%l0,1f
372	ldx	[%i2+0],%o4
373
374	ldx	[%i2+16],%g2		! align data
375	srlx	%o3,%l1,%g1
376	sllx	%o3,%l0,%o3
377	sllx	%o4,%l0,%o4
378	srlx	%g2,%l1,%g2
379	or	%g1,%o4,%o4
380	or	%g2,%o3,%o3
3811:
382	add	%i2,16,%i2
383	sub	%i3,16,%i3
384	xor	%g3,%o3,%o3
385	xor	%g4,%o4,%o4
386	prefetch [%i2+63], 20
387
388	.word	0x83b2e2a9 !xmulx	%o3,%o1,%g1
389	xor	%o3,%o4,%g3		! Karatsuba pre-processing
390	.word	0x85b0e2a8 !xmulx	%g3,%o0,%g2
391	.word	0x97b2e2c9 !xmulxhi	%o3,%o1,%o3
392	.word	0x87b0e2c8 !xmulxhi	%g3,%o0,%g3
393	.word	0x89b322ca !xmulxhi	%o4,%o2,%g4
394	.word	0x99b322aa !xmulx	%o4,%o2,%o4
395
396	sll	%g1,3,%o7
397	srlx	%g5,%o7,%o7		! ��0xE0 [implicit &(7<<3)]
398	xor	%g1,%o7,%o7
399	sllx	%o7,57,%o7		! (%g1��0xE1)<<1<<56 [implicit &0x7f]
400
401	xor	%g1,%g2,%g2		! Karatsuba post-processing
402	xor	%o3,%g3,%g3
403	 xor	%o7,%o3,%o3		! real destination is %g2
404	xor	%g4,%g3,%g3
405	xor	%o3,%g2,%g2
406	xor	%o4,%g3,%g3
407	xor	%o4,%g2,%g2
408
409	.word	0x97b062cd !xmulxhi	%g1,%o5,%o3		! ��0xE1<<1<<56
410	 xor	%g1,%g3,%g3
411	.word	0x83b0a2ad !xmulx	%g2,%o5,%g1
412	 xor	%g2,%g4,%g4
413	.word	0x85b0a2cd !xmulxhi	%g2,%o5,%g2
414
415	xor	%o3,%g3,%g3
416	xor	%g1,%g3,%g3
417	brnz,pt	%i3,.Loop
418	xor	%g2,%g4,%g4
419
420	stx	%g3,[%i0+8]		! save Xi
421	stx	%g4,[%i0+0]
422
423	ret
424	restore
425.type	gcm_ghash_vis3,#function
426.size	gcm_ghash_vis3,.-gcm_ghash_vis3
427.asciz	"GHASH for SPARCv9/VIS3, CRYPTOGAMS by <appro@openssl.org>"
428.align	4
429