1#include "sparc_arch.h"
2
3#ifdef  __arch64__
4.register	%g2,#scratch
5.register	%g3,#scratch
6#endif
7
8.section	".text",#alloc,#execinstr
9
10.align	64
11rem_4bit:
12	.long	0,0,471859200,0,943718400,0,610271232,0
13	.long	1887436800,0,1822425088,0,1220542464,0,1423966208,0
14	.long	3774873600,0,4246732800,0,3644850176,0,3311403008,0
15	.long	2441084928,0,2376073216,0,2847932416,0,3051356160,0
16.type	rem_4bit,#object
17.size	rem_4bit,(.-rem_4bit)
18
19.globl	gcm_ghash_4bit
20.align	32
21gcm_ghash_4bit:
22	save	%sp,-STACK_FRAME,%sp
23	ldub	[%i2+15],%l1
24	ldub	[%i0+15],%l2
25	ldub	[%i0+14],%l3
26	add	%i3,%i2,%i3
27	add	%i1,8,%l6
28
291:	call	.+8
30	add	%o7,rem_4bit-1b,%l4
31
32.Louter:
33	xor	%l2,%l1,%l1
34	and	%l1,0xf0,%l0
35	and	%l1,0x0f,%l1
36	sll	%l1,4,%l1
37	ldx	[%l6+%l1],%o1
38	ldx	[%i1+%l1],%o0
39
40	ldub	[%i2+14],%l1
41
42	ldx	[%l6+%l0],%o3
43	and	%o1,0xf,%l5
44	ldx	[%i1+%l0],%o2
45	sll	%l5,3,%l5
46	ldx	[%l4+%l5],%o4
47	srlx	%o1,4,%o1
48	mov	13,%l7
49	sllx	%o0,60,%o5
50	xor	%o3,%o1,%o1
51	srlx	%o0,4,%o0
52	xor	%o1,%o5,%o1
53
54	xor	%l3,%l1,%l1
55	and	%o1,0xf,%l5
56	and	%l1,0xf0,%l0
57	and	%l1,0x0f,%l1
58	ba	.Lghash_inner
59	sll	%l1,4,%l1
60.align	32
61.Lghash_inner:
62	ldx	[%l6+%l1],%o3
63	sll	%l5,3,%l5
64	xor	%o2,%o0,%o0
65	ldx	[%i1+%l1],%o2
66	srlx	%o1,4,%o1
67	xor	%o4,%o0,%o0
68	ldx	[%l4+%l5],%o4
69	sllx	%o0,60,%o5
70	xor	%o3,%o1,%o1
71	ldub	[%i2+%l7],%l1
72	srlx	%o0,4,%o0
73	xor	%o1,%o5,%o1
74	ldub	[%i0+%l7],%l3
75	xor	%o2,%o0,%o0
76	and	%o1,0xf,%l5
77
78	ldx	[%l6+%l0],%o3
79	sll	%l5,3,%l5
80	xor	%o4,%o0,%o0
81	ldx	[%i1+%l0],%o2
82	srlx	%o1,4,%o1
83	ldx	[%l4+%l5],%o4
84	sllx	%o0,60,%o5
85	xor	%l3,%l1,%l1
86	srlx	%o0,4,%o0
87	and	%l1,0xf0,%l0
88	addcc	%l7,-1,%l7
89	xor	%o1,%o5,%o1
90	and	%l1,0x0f,%l1
91	xor	%o3,%o1,%o1
92	sll	%l1,4,%l1
93	blu	.Lghash_inner
94	and	%o1,0xf,%l5
95
96	ldx	[%l6+%l1],%o3
97	sll	%l5,3,%l5
98	xor	%o2,%o0,%o0
99	ldx	[%i1+%l1],%o2
100	srlx	%o1,4,%o1
101	xor	%o4,%o0,%o0
102	ldx	[%l4+%l5],%o4
103	sllx	%o0,60,%o5
104	xor	%o3,%o1,%o1
105	srlx	%o0,4,%o0
106	xor	%o1,%o5,%o1
107	xor	%o2,%o0,%o0
108
109	add	%i2,16,%i2
110	cmp	%i2,%i3
111	be,pn	SIZE_T_CC,.Ldone
112	and	%o1,0xf,%l5
113
114	ldx	[%l6+%l0],%o3
115	sll	%l5,3,%l5
116	xor	%o4,%o0,%o0
117	ldx	[%i1+%l0],%o2
118	srlx	%o1,4,%o1
119	ldx	[%l4+%l5],%o4
120	sllx	%o0,60,%o5
121	xor	%o3,%o1,%o1
122	ldub	[%i2+15],%l1
123	srlx	%o0,4,%o0
124	xor	%o1,%o5,%o1
125	xor	%o2,%o0,%o0
126	stx	%o1,[%i0+8]
127	xor	%o4,%o0,%o0
128	stx	%o0,[%i0]
129	srl	%o1,8,%l3
130	and	%o1,0xff,%l2
131	ba	.Louter
132	and	%l3,0xff,%l3
133.align	32
134.Ldone:
135	ldx	[%l6+%l0],%o3
136	sll	%l5,3,%l5
137	xor	%o4,%o0,%o0
138	ldx	[%i1+%l0],%o2
139	srlx	%o1,4,%o1
140	ldx	[%l4+%l5],%o4
141	sllx	%o0,60,%o5
142	xor	%o3,%o1,%o1
143	srlx	%o0,4,%o0
144	xor	%o1,%o5,%o1
145	xor	%o2,%o0,%o0
146	stx	%o1,[%i0+8]
147	xor	%o4,%o0,%o0
148	stx	%o0,[%i0]
149
150	ret
151	restore
152.type	gcm_ghash_4bit,#function
153.size	gcm_ghash_4bit,(.-gcm_ghash_4bit)
154.globl	gcm_gmult_4bit
155.align	32
156gcm_gmult_4bit:
157	save	%sp,-STACK_FRAME,%sp
158	ldub	[%i0+15],%l1
159	add	%i1,8,%l6
160
1611:	call	.+8
162	add	%o7,rem_4bit-1b,%l4
163
164	and	%l1,0xf0,%l0
165	and	%l1,0x0f,%l1
166	sll	%l1,4,%l1
167	ldx	[%l6+%l1],%o1
168	ldx	[%i1+%l1],%o0
169
170	ldub	[%i0+14],%l1
171
172	ldx	[%l6+%l0],%o3
173	and	%o1,0xf,%l5
174	ldx	[%i1+%l0],%o2
175	sll	%l5,3,%l5
176	ldx	[%l4+%l5],%o4
177	srlx	%o1,4,%o1
178	mov	13,%l7
179	sllx	%o0,60,%o5
180	xor	%o3,%o1,%o1
181	srlx	%o0,4,%o0
182	xor	%o1,%o5,%o1
183
184	and	%o1,0xf,%l5
185	and	%l1,0xf0,%l0
186	and	%l1,0x0f,%l1
187	ba	.Lgmult_inner
188	sll	%l1,4,%l1
189.align	32
190.Lgmult_inner:
191	ldx	[%l6+%l1],%o3
192	sll	%l5,3,%l5
193	xor	%o2,%o0,%o0
194	ldx	[%i1+%l1],%o2
195	srlx	%o1,4,%o1
196	xor	%o4,%o0,%o0
197	ldx	[%l4+%l5],%o4
198	sllx	%o0,60,%o5
199	xor	%o3,%o1,%o1
200	ldub	[%i0+%l7],%l1
201	srlx	%o0,4,%o0
202	xor	%o1,%o5,%o1
203	xor	%o2,%o0,%o0
204	and	%o1,0xf,%l5
205
206	ldx	[%l6+%l0],%o3
207	sll	%l5,3,%l5
208	xor	%o4,%o0,%o0
209	ldx	[%i1+%l0],%o2
210	srlx	%o1,4,%o1
211	ldx	[%l4+%l5],%o4
212	sllx	%o0,60,%o5
213	srlx	%o0,4,%o0
214	and	%l1,0xf0,%l0
215	addcc	%l7,-1,%l7
216	xor	%o1,%o5,%o1
217	and	%l1,0x0f,%l1
218	xor	%o3,%o1,%o1
219	sll	%l1,4,%l1
220	blu	.Lgmult_inner
221	and	%o1,0xf,%l5
222
223	ldx	[%l6+%l1],%o3
224	sll	%l5,3,%l5
225	xor	%o2,%o0,%o0
226	ldx	[%i1+%l1],%o2
227	srlx	%o1,4,%o1
228	xor	%o4,%o0,%o0
229	ldx	[%l4+%l5],%o4
230	sllx	%o0,60,%o5
231	xor	%o3,%o1,%o1
232	srlx	%o0,4,%o0
233	xor	%o1,%o5,%o1
234	xor	%o2,%o0,%o0
235	and	%o1,0xf,%l5
236
237	ldx	[%l6+%l0],%o3
238	sll	%l5,3,%l5
239	xor	%o4,%o0,%o0
240	ldx	[%i1+%l0],%o2
241	srlx	%o1,4,%o1
242	ldx	[%l4+%l5],%o4
243	sllx	%o0,60,%o5
244	xor	%o3,%o1,%o1
245	srlx	%o0,4,%o0
246	xor	%o1,%o5,%o1
247	xor	%o2,%o0,%o0
248	stx	%o1,[%i0+8]
249	xor	%o4,%o0,%o0
250	stx	%o0,[%i0]
251
252	ret
253	restore
254.type	gcm_gmult_4bit,#function
255.size	gcm_gmult_4bit,(.-gcm_gmult_4bit)
256.globl	gcm_init_vis3
257.align	32
258gcm_init_vis3:
259	save	%sp,-STACK_FRAME,%sp
260
261	ldx	[%i1+0],%o2
262	ldx	[%i1+8],%o1
263	mov	0xE1,%o4
264	mov	1,%o3
265	sllx	%o4,57,%o4
266	srax	%o2,63,%g1		! broadcast carry
267	addcc	%o1,%o1,%o1		! H<<=1
268	.word	0x95b2822a !addxc	%o2,%o2,%o2
269	and	%g1,%o3,%o3
270	and	%g1,%o4,%o4
271	xor	%o3,%o1,%o1
272	xor	%o4,%o2,%o2
273	stx	%o1,[%i0+8]		! save twisted H
274	stx	%o2,[%i0+0]
275
276	sethi	%hi(0xA0406080),%g5
277	sethi	%hi(0x20C0E000),%l0
278	or	%g5,%lo(0xA0406080),%g5
279	or	%l0,%lo(0x20C0E000),%l0
280	sllx	%g5,32,%g5
281	or	%l0,%g5,%g5		! (0xE0��i)&0xff=0xA040608020C0E000
282	stx	%g5,[%i0+16]
283
284	ret
285	restore
286.type	gcm_init_vis3,#function
287.size	gcm_init_vis3,.-gcm_init_vis3
288
289.globl	gcm_gmult_vis3
290.align	32
291gcm_gmult_vis3:
292	save	%sp,-STACK_FRAME,%sp
293
294	ldx	[%i0+8],%o3		! load Xi
295	ldx	[%i0+0],%o4
296	ldx	[%i1+8],%o1	! load twisted H
297	ldx	[%i1+0],%o2
298
299	mov	0xE1,%l7
300	sllx	%l7,57,%o5		! 57 is not a typo
301	ldx	[%i1+16],%g5		! (0xE0��i)&0xff=0xA040608020C0E000
302
303	xor	%o2,%o1,%o0		! Karatsuba pre-processing
304	.word	0x83b2e2a9 !xmulx	%o3,%o1,%g1
305	xor	%o3,%o4,%g3		! Karatsuba pre-processing
306	.word	0x85b0e2a8 !xmulx	%g3,%o0,%g2
307	.word	0x97b2e2c9 !xmulxhi	%o3,%o1,%o3
308	.word	0x87b0e2c8 !xmulxhi	%g3,%o0,%g3
309	.word	0x89b322ca !xmulxhi	%o4,%o2,%g4
310	.word	0x99b322aa !xmulx	%o4,%o2,%o4
311
312	sll	%g1,3,%o7
313	srlx	%g5,%o7,%o7		! ��0xE0 [implicit &(7<<3)]
314	xor	%g1,%o7,%o7
315	sllx	%o7,57,%o7		! (%g1��0xE1)<<1<<56 [implicit &0x7f]
316
317	xor	%g1,%g2,%g2		! Karatsuba post-processing
318	xor	%o3,%g3,%g3
319	 xor	%o7,%o3,%o3		! real destination is %g2
320	xor	%g4,%g3,%g3
321	xor	%o3,%g2,%g2
322	xor	%o4,%g3,%g3
323	xor	%o4,%g2,%g2
324
325	.word	0x97b062cd !xmulxhi	%g1,%o5,%o3		! ��0xE1<<1<<56
326	 xor	%g1,%g3,%g3
327	.word	0x83b0a2ad !xmulx	%g2,%o5,%g1
328	 xor	%g2,%g4,%g4
329	.word	0x85b0a2cd !xmulxhi	%g2,%o5,%g2
330
331	xor	%o3,%g3,%g3
332	xor	%g1,%g3,%g3
333	xor	%g2,%g4,%g4
334
335	stx	%g3,[%i0+8]		! save Xi
336	stx	%g4,[%i0+0]
337
338	ret
339	restore
340.type	gcm_gmult_vis3,#function
341.size	gcm_gmult_vis3,.-gcm_gmult_vis3
342
343.globl	gcm_ghash_vis3
344.align	32
345gcm_ghash_vis3:
346	save	%sp,-STACK_FRAME,%sp
347	nop
348	srln	%i3,0,%i3		! needed on v8+, "nop" on v9
349
350	ldx	[%i0+8],%g3		! load Xi
351	ldx	[%i0+0],%g4
352	ldx	[%i1+8],%o1	! load twisted H
353	ldx	[%i1+0],%o2
354
355	mov	0xE1,%l7
356	sllx	%l7,57,%o5		! 57 is not a typo
357	ldx	[%i1+16],%g5		! (0xE0��i)&0xff=0xA040608020C0E000
358
359	and	%i2,7,%l0
360	andn	%i2,7,%i2
361	sll	%l0,3,%l0
362	prefetch [%i2+63], 20
363	sub	%g0,%l0,%l1
364
365	xor	%o2,%o1,%o0		! Karatsuba pre-processing
366.Loop:
367	ldx	[%i2+8],%o3
368	brz,pt	%l0,1f
369	ldx	[%i2+0],%o4
370
371	ldx	[%i2+16],%g2		! align data
372	srlx	%o3,%l1,%g1
373	sllx	%o3,%l0,%o3
374	sllx	%o4,%l0,%o4
375	srlx	%g2,%l1,%g2
376	or	%g1,%o4,%o4
377	or	%g2,%o3,%o3
3781:
379	add	%i2,16,%i2
380	sub	%i3,16,%i3
381	xor	%g3,%o3,%o3
382	xor	%g4,%o4,%o4
383	prefetch [%i2+63], 20
384
385	.word	0x83b2e2a9 !xmulx	%o3,%o1,%g1
386	xor	%o3,%o4,%g3		! Karatsuba pre-processing
387	.word	0x85b0e2a8 !xmulx	%g3,%o0,%g2
388	.word	0x97b2e2c9 !xmulxhi	%o3,%o1,%o3
389	.word	0x87b0e2c8 !xmulxhi	%g3,%o0,%g3
390	.word	0x89b322ca !xmulxhi	%o4,%o2,%g4
391	.word	0x99b322aa !xmulx	%o4,%o2,%o4
392
393	sll	%g1,3,%o7
394	srlx	%g5,%o7,%o7		! ��0xE0 [implicit &(7<<3)]
395	xor	%g1,%o7,%o7
396	sllx	%o7,57,%o7		! (%g1��0xE1)<<1<<56 [implicit &0x7f]
397
398	xor	%g1,%g2,%g2		! Karatsuba post-processing
399	xor	%o3,%g3,%g3
400	 xor	%o7,%o3,%o3		! real destination is %g2
401	xor	%g4,%g3,%g3
402	xor	%o3,%g2,%g2
403	xor	%o4,%g3,%g3
404	xor	%o4,%g2,%g2
405
406	.word	0x97b062cd !xmulxhi	%g1,%o5,%o3		! ��0xE1<<1<<56
407	 xor	%g1,%g3,%g3
408	.word	0x83b0a2ad !xmulx	%g2,%o5,%g1
409	 xor	%g2,%g4,%g4
410	.word	0x85b0a2cd !xmulxhi	%g2,%o5,%g2
411
412	xor	%o3,%g3,%g3
413	xor	%g1,%g3,%g3
414	brnz,pt	%i3,.Loop
415	xor	%g2,%g4,%g4
416
417	stx	%g3,[%i0+8]		! save Xi
418	stx	%g4,[%i0+0]
419
420	ret
421	restore
422.type	gcm_ghash_vis3,#function
423.size	gcm_ghash_vis3,.-gcm_ghash_vis3
424.asciz	"GHASH for SPARCv9/VIS3, CRYPTOGAMS by <appro@openssl.org>"
425.align	4
426