1#ifdef __linux__
2#include <asm/regdef.h>
3#else
4#include <asm.h>
5#include <regdef.h>
6#endif
7
8.text
9
10.set	noat
11.set	noreorder
12.globl	gcm_gmult_4bit
13.align	4
14.ent	gcm_gmult_4bit
15gcm_gmult_4bit:
16	.frame	sp,0,ra
17	.prologue 0
18
19	ldq	t11,8(a0)
20	ldq	t10,0(a0)
21
22	bsr	t0,picmeup
23	nop
24.align	4
25	extbl	t11,7,a4
26	and	a4,0xf0,a5
27	sll	a4,4,a4
28	and	a4,0xf0,a4
29
30	addq	a4,a1,a4
31	ldq	t9,8(a4)
32	addq	a5,a1,a5
33	ldq	t8,0(a4)
34
35	and	t9,0x0f,t12
36	sll	t8,60,t0
37	lda	v0,6(zero)
38	extbl	t11,6,a4
39
40	ldq	t6,8(a5)
41	s8addq	t12,AT,t12
42	ldq	t5,0(a5)
43	srl	t9,4,t9
44
45	ldq	t7,0(t12)
46	srl	t8,4,t8
47	xor	t0,t9,t9
48	and	a4,0xf0,a5
49
50	xor	t6,t9,t9
51	sll	a4,4,a4
52	xor	t5,t8,t8
53	and	a4,0xf0,a4
54
55	addq	a4,a1,a4
56	ldq	t4,8(a4)
57	addq	a5,a1,a5
58	ldq	t3,0(a4)
59
60.Looplo1:
61	and	t9,0x0f,t12
62	sll	t8,60,t0
63	subq	v0,1,v0
64	srl	t9,4,t9
65
66	ldq	t6,8(a5)
67	xor	t7,t8,t8
68	ldq	t5,0(a5)
69	s8addq	t12,AT,t12
70
71	ldq	t7,0(t12)
72	srl	t8,4,t8
73	xor	t0,t9,t9
74	extbl	t11,v0,a4
75
76	and	a4,0xf0,a5
77	xor	t3,t8,t8
78	xor	t4,t9,t9
79	sll	a4,4,a4
80
81
82	and	t9,0x0f,t12
83	sll	t8,60,t0
84	and	a4,0xf0,a4
85	srl	t9,4,t9
86
87	s8addq	t12,AT,t12
88	xor	t7,t8,t8
89	addq	a4,a1,a4
90	addq	a5,a1,a5
91
92	ldq	t7,0(t12)
93	srl	t8,4,t8
94	ldq	t4,8(a4)
95	xor	t0,t9,t9
96
97	xor	t6,t9,t9
98	xor	t5,t8,t8
99	ldq	t3,0(a4)
100	bne	v0,.Looplo1
101
102
103	and	t9,0x0f,t12
104	sll	t8,60,t0
105	lda	v0,7(zero)
106	srl	t9,4,t9
107
108	ldq	t6,8(a5)
109	xor	t7,t8,t8
110	ldq	t5,0(a5)
111	s8addq	t12,AT,t12
112
113	ldq	t7,0(t12)
114	srl	t8,4,t8
115	xor	t0,t9,t9
116	extbl	t10,v0,a4
117
118	and	a4,0xf0,a5
119	xor	t3,t8,t8
120	xor	t4,t9,t9
121	sll	a4,4,a4
122
123	and	t9,0x0f,t12
124	sll	t8,60,t0
125	and	a4,0xf0,a4
126	srl	t9,4,t9
127
128	s8addq	t12,AT,t12
129	xor	t7,t8,t8
130	addq	a4,a1,a4
131	addq	a5,a1,a5
132
133	ldq	t7,0(t12)
134	srl	t8,4,t8
135	ldq	t4,8(a4)
136	xor	t0,t9,t9
137
138	xor	t6,t9,t9
139	xor	t5,t8,t8
140	ldq	t3,0(a4)
141	unop
142
143
144.Loophi1:
145	and	t9,0x0f,t12
146	sll	t8,60,t0
147	subq	v0,1,v0
148	srl	t9,4,t9
149
150	ldq	t6,8(a5)
151	xor	t7,t8,t8
152	ldq	t5,0(a5)
153	s8addq	t12,AT,t12
154
155	ldq	t7,0(t12)
156	srl	t8,4,t8
157	xor	t0,t9,t9
158	extbl	t10,v0,a4
159
160	and	a4,0xf0,a5
161	xor	t3,t8,t8
162	xor	t4,t9,t9
163	sll	a4,4,a4
164
165
166	and	t9,0x0f,t12
167	sll	t8,60,t0
168	and	a4,0xf0,a4
169	srl	t9,4,t9
170
171	s8addq	t12,AT,t12
172	xor	t7,t8,t8
173	addq	a4,a1,a4
174	addq	a5,a1,a5
175
176	ldq	t7,0(t12)
177	srl	t8,4,t8
178	ldq	t4,8(a4)
179	xor	t0,t9,t9
180
181	xor	t6,t9,t9
182	xor	t5,t8,t8
183	ldq	t3,0(a4)
184	bne	v0,.Loophi1
185
186
187	and	t9,0x0f,t12
188	sll	t8,60,t0
189	srl	t9,4,t9
190
191	ldq	t6,8(a5)
192	xor	t7,t8,t8
193	ldq	t5,0(a5)
194	s8addq	t12,AT,t12
195
196	ldq	t7,0(t12)
197	srl	t8,4,t8
198	xor	t0,t9,t9
199
200	xor	t4,t9,t9
201	xor	t3,t8,t8
202
203	and	t9,0x0f,t12
204	sll	t8,60,t0
205	srl	t9,4,t9
206
207	s8addq	t12,AT,t12
208	xor	t7,t8,t8
209
210	ldq	t7,0(t12)
211	srl	t8,4,t8
212	xor	t6,t9,t9
213	xor	t5,t8,t8
214	xor	t0,t9,t9
215	xor	t7,t8,t8
216	srl	t9,24,t0	# byte swap
217	srl	t9,8,t1
218
219	sll	t9,8,t2
220	sll	t9,24,t9
221	zapnot	t0,0x11,t0
222	zapnot	t1,0x22,t1
223
224	zapnot	t9,0x88,t9
225	or	t0,t1,t0
226	zapnot	t2,0x44,t2
227
228	or	t9,t0,t9
229	srl	t8,24,t0
230	srl	t8,8,t1
231
232	or	t9,t2,t9
233	sll	t8,8,t2
234	sll	t8,24,t8
235
236	srl	t9,32,t11
237	sll	t9,32,t9
238
239	zapnot	t0,0x11,t0
240	zapnot	t1,0x22,t1
241	or	t9,t11,t11
242
243	zapnot	t8,0x88,t8
244	or	t0,t1,t0
245	zapnot	t2,0x44,t2
246
247	or	t8,t0,t8
248	or	t8,t2,t8
249
250	srl	t8,32,t10
251	sll	t8,32,t8
252
253	or	t8,t10,t10
254	stq	t11,8(a0)
255	stq	t10,0(a0)
256
257	ret	(ra)
258.end	gcm_gmult_4bit
259.globl	gcm_ghash_4bit
260.align	4
261.ent	gcm_ghash_4bit
262gcm_ghash_4bit:
263	lda	sp,-32(sp)
264	stq	ra,0(sp)
265	stq	s0,8(sp)
266	stq	s1,16(sp)
267	.mask	0x04000600,-32
268	.frame	sp,32,ra
269	.prologue 0
270
271	ldq_u	s0,0(a2)
272	ldq_u	t3,7(a2)
273	ldq_u	s1,8(a2)
274	ldq_u	t4,15(a2)
275	ldq	t10,0(a0)
276	ldq	t11,8(a0)
277
278	bsr	t0,picmeup
279	nop
280
281.Louter:
282	extql	s0,a2,s0
283	extqh	t3,a2,t3
284	or	s0,t3,s0
285	lda	a2,16(a2)
286
287	extql	s1,a2,s1
288	extqh	t4,a2,t4
289	or	s1,t4,s1
290	subq	a3,16,a3
291
292	xor	t11,s1,t11
293	xor	t10,s0,t10
294.align	4
295	extbl	t11,7,a4
296	and	a4,0xf0,a5
297	sll	a4,4,a4
298	and	a4,0xf0,a4
299
300	addq	a4,a1,a4
301	ldq	t9,8(a4)
302	addq	a5,a1,a5
303	ldq	t8,0(a4)
304
305	and	t9,0x0f,t12
306	sll	t8,60,t0
307	lda	v0,6(zero)
308	extbl	t11,6,a4
309
310	ldq	t6,8(a5)
311	s8addq	t12,AT,t12
312	ldq	t5,0(a5)
313	srl	t9,4,t9
314
315	ldq	t7,0(t12)
316	srl	t8,4,t8
317	xor	t0,t9,t9
318	and	a4,0xf0,a5
319
320	xor	t6,t9,t9
321	sll	a4,4,a4
322	xor	t5,t8,t8
323	and	a4,0xf0,a4
324
325	addq	a4,a1,a4
326	ldq	t4,8(a4)
327	addq	a5,a1,a5
328	ldq	t3,0(a4)
329
330.Looplo2:
331	and	t9,0x0f,t12
332	sll	t8,60,t0
333	subq	v0,1,v0
334	srl	t9,4,t9
335
336	ldq	t6,8(a5)
337	xor	t7,t8,t8
338	ldq	t5,0(a5)
339	s8addq	t12,AT,t12
340
341	ldq	t7,0(t12)
342	srl	t8,4,t8
343	xor	t0,t9,t9
344	extbl	t11,v0,a4
345
346	and	a4,0xf0,a5
347	xor	t3,t8,t8
348	xor	t4,t9,t9
349	sll	a4,4,a4
350
351
352	and	t9,0x0f,t12
353	sll	t8,60,t0
354	and	a4,0xf0,a4
355	srl	t9,4,t9
356
357	s8addq	t12,AT,t12
358	xor	t7,t8,t8
359	addq	a4,a1,a4
360	addq	a5,a1,a5
361
362	ldq	t7,0(t12)
363	srl	t8,4,t8
364	ldq	t4,8(a4)
365	xor	t0,t9,t9
366
367	xor	t6,t9,t9
368	xor	t5,t8,t8
369	ldq	t3,0(a4)
370	bne	v0,.Looplo2
371
372
373	and	t9,0x0f,t12
374	sll	t8,60,t0
375	lda	v0,7(zero)
376	srl	t9,4,t9
377
378	ldq	t6,8(a5)
379	xor	t7,t8,t8
380	ldq	t5,0(a5)
381	s8addq	t12,AT,t12
382
383	ldq	t7,0(t12)
384	srl	t8,4,t8
385	xor	t0,t9,t9
386	extbl	t10,v0,a4
387
388	and	a4,0xf0,a5
389	xor	t3,t8,t8
390	xor	t4,t9,t9
391	sll	a4,4,a4
392
393	and	t9,0x0f,t12
394	sll	t8,60,t0
395	and	a4,0xf0,a4
396	srl	t9,4,t9
397
398	s8addq	t12,AT,t12
399	xor	t7,t8,t8
400	addq	a4,a1,a4
401	addq	a5,a1,a5
402
403	ldq	t7,0(t12)
404	srl	t8,4,t8
405	ldq	t4,8(a4)
406	xor	t0,t9,t9
407
408	xor	t6,t9,t9
409	xor	t5,t8,t8
410	ldq	t3,0(a4)
411	unop
412
413
414.Loophi2:
415	and	t9,0x0f,t12
416	sll	t8,60,t0
417	subq	v0,1,v0
418	srl	t9,4,t9
419
420	ldq	t6,8(a5)
421	xor	t7,t8,t8
422	ldq	t5,0(a5)
423	s8addq	t12,AT,t12
424
425	ldq	t7,0(t12)
426	srl	t8,4,t8
427	xor	t0,t9,t9
428	extbl	t10,v0,a4
429
430	and	a4,0xf0,a5
431	xor	t3,t8,t8
432	xor	t4,t9,t9
433	sll	a4,4,a4
434
435
436	and	t9,0x0f,t12
437	sll	t8,60,t0
438	and	a4,0xf0,a4
439	srl	t9,4,t9
440
441	s8addq	t12,AT,t12
442	xor	t7,t8,t8
443	addq	a4,a1,a4
444	addq	a5,a1,a5
445
446	ldq	t7,0(t12)
447	srl	t8,4,t8
448	ldq	t4,8(a4)
449	xor	t0,t9,t9
450
451	xor	t6,t9,t9
452	xor	t5,t8,t8
453	ldq	t3,0(a4)
454	bne	v0,.Loophi2
455
456
457	and	t9,0x0f,t12
458	sll	t8,60,t0
459	srl	t9,4,t9
460
461	ldq	t6,8(a5)
462	xor	t7,t8,t8
463	ldq	t5,0(a5)
464	s8addq	t12,AT,t12
465
466	ldq	t7,0(t12)
467	srl	t8,4,t8
468	xor	t0,t9,t9
469
470	xor	t4,t9,t9
471	xor	t3,t8,t8
472
473	and	t9,0x0f,t12
474	sll	t8,60,t0
475	srl	t9,4,t9
476
477	s8addq	t12,AT,t12
478	xor	t7,t8,t8
479
480	ldq	t7,0(t12)
481	srl	t8,4,t8
482	xor	t6,t9,t9
483	xor	t5,t8,t8
484	xor	t0,t9,t9
485	xor	t7,t8,t8
486	srl	t9,24,t0	# byte swap
487	srl	t9,8,t1
488
489	sll	t9,8,t2
490	sll	t9,24,t9
491	zapnot	t0,0x11,t0
492	zapnot	t1,0x22,t1
493
494	zapnot	t9,0x88,t9
495	or	t0,t1,t0
496	zapnot	t2,0x44,t2
497
498	or	t9,t0,t9
499	srl	t8,24,t0
500	srl	t8,8,t1
501
502	or	t9,t2,t9
503	sll	t8,8,t2
504	sll	t8,24,t8
505
506	srl	t9,32,t11
507	sll	t9,32,t9
508	beq	a3,.Ldone
509
510	zapnot	t0,0x11,t0
511	zapnot	t1,0x22,t1
512	or	t9,t11,t11
513	ldq_u	s0,0(a2)
514
515	zapnot	t8,0x88,t8
516	or	t0,t1,t0
517	zapnot	t2,0x44,t2
518	ldq_u	t3,7(a2)
519
520	or	t8,t0,t8
521	or	t8,t2,t8
522	ldq_u	s1,8(a2)
523	ldq_u	t4,15(a2)
524
525	srl	t8,32,t10
526	sll	t8,32,t8
527
528	or	t8,t10,t10
529	br	zero,.Louter
530
531.Ldone:
532	zapnot	t0,0x11,t0
533	zapnot	t1,0x22,t1
534	or	t9,t11,t11
535
536	zapnot	t8,0x88,t8
537	or	t0,t1,t0
538	zapnot	t2,0x44,t2
539
540	or	t8,t0,t8
541	or	t8,t2,t8
542
543	srl	t8,32,t10
544	sll	t8,32,t8
545
546	or	t8,t10,t10
547
548	stq	t11,8(a0)
549	stq	t10,0(a0)
550
551	.set	noreorder
552	/*ldq	ra,0(sp)*/
553	ldq	s0,8(sp)
554	ldq	s1,16(sp)
555	lda	sp,32(sp)
556	ret	(ra)
557.end	gcm_ghash_4bit
558
559.align	4
560.ent	picmeup
561picmeup:
562	.frame	sp,0,t0
563	.prologue 0
564	br	AT,.Lpic
565.Lpic:	lda	AT,12(AT)
566	ret	(t0)
567.end	picmeup
568	nop
569rem_4bit:
570	.long	0,0x0000<<16, 0,0x1C20<<16, 0,0x3840<<16, 0,0x2460<<16
571	.long	0,0x7080<<16, 0,0x6CA0<<16, 0,0x48C0<<16, 0,0x54E0<<16
572	.long	0,0xE100<<16, 0,0xFD20<<16, 0,0xD940<<16, 0,0xC560<<16
573	.long	0,0x9180<<16, 0,0x8DA0<<16, 0,0xA9C0<<16, 0,0xB5E0<<16
574.ascii	"GHASH for Alpha, CRYPTOGAMS by <appro@openssl.org>"
575.align	4
576
577