1#include "mips_arch.h"
2
3#ifdef MIPSEB
4# define MSB 0
5# define LSB 7
6#else
7# define MSB 7
8# define LSB 0
9#endif
10
11.text
12.set	noat
13.set	noreorder
14
15.align	5
16.globl	poly1305_init
17.ent	poly1305_init
18poly1305_init:
19	.frame	$29,0,$31
20	.set	reorder
21
22	sd	$0,0($4)
23	sd	$0,8($4)
24	sd	$0,16($4)
25
26	beqz	$5,.Lno_key
27
28#if defined(_MIPS_ARCH_MIPS64R6)
29	ld	$8,0($5)
30	ld	$9,8($5)
31#else
32	ldl	$8,0+MSB($5)
33	ldl	$9,8+MSB($5)
34	ldr	$8,0+LSB($5)
35	ldr	$9,8+LSB($5)
36#endif
37#ifdef	MIPSEB
38# if defined(_MIPS_ARCH_MIPS64R2)
39	dsbh	$8,$8		# byte swap
40	 dsbh	$9,$9
41	dshd	$8,$8
42	 dshd	$9,$9
43# else
44	ori	$10,$0,0xFF
45	dsll	$1,$10,32
46	or	$10,$1		# 0x000000FF000000FF
47
48	and	$11,$8,$10	# byte swap
49	 and	$2,$9,$10
50	dsrl	$1,$8,24
51	 dsrl	$24,$9,24
52	dsll	$11,24
53	 dsll	$2,24
54	and	$1,$10
55	 and	$24,$10
56	dsll	$10,8			# 0x0000FF000000FF00
57	or	$11,$1
58	 or	$2,$24
59	and	$1,$8,$10
60	 and	$24,$9,$10
61	dsrl	$8,8
62	 dsrl	$9,8
63	dsll	$1,8
64	 dsll	$24,8
65	and	$8,$10
66	 and	$9,$10
67	or	$11,$1
68	 or	$2,$24
69	or	$8,$11
70	 or	$9,$2
71	dsrl	$11,$8,32
72	 dsrl	$2,$9,32
73	dsll	$8,32
74	 dsll	$9,32
75	or	$8,$11
76	 or	$9,$2
77# endif
78#endif
79	li	$10,1
80	dsll	$10,32
81	daddiu	$10,-63
82	dsll	$10,28
83	daddiu	$10,-1		# 0ffffffc0fffffff
84
85	and	$8,$10
86	daddiu	$10,-3		# 0ffffffc0ffffffc
87	and	$9,$10
88
89	sd	$8,24($4)
90	dsrl	$10,$9,2
91	sd	$9,32($4)
92	daddu	$10,$9		# s1 = r1 + (r1 >> 2)
93	sd	$10,40($4)
94
95.Lno_key:
96	li	$2,0			# return 0
97	jr	$31
98.end	poly1305_init
99.align	5
100.globl	poly1305_blocks
101.ent	poly1305_blocks
102poly1305_blocks:
103	.set	noreorder
104	dsrl	$6,4			# number of complete blocks
105	bnez	$6,poly1305_blocks_internal
106	nop
107	jr	$31
108	nop
109.end	poly1305_blocks
110
111.align	5
112.ent	poly1305_blocks_internal
113poly1305_blocks_internal:
114	.frame	$29,6*8,$31
115	.mask	0x00030000,-8
116	.set	noreorder
117	dsubu	$29,6*8
118	sd	$17,40($29)
119	sd	$16,32($29)
120	.set	reorder
121
122	ld	$12,0($4)		# load hash value
123	ld	$13,8($4)
124	ld	$14,16($4)
125
126	ld	$15,24($4)		# load key
127	ld	$16,32($4)
128	ld	$17,40($4)
129
130.Loop:
131#if defined(_MIPS_ARCH_MIPS64R6)
132	ld	$8,0($5)		# load input
133	ld	$9,8($5)
134#else
135	ldl	$8,0+MSB($5)	# load input
136	ldl	$9,8+MSB($5)
137	ldr	$8,0+LSB($5)
138	ldr	$9,8+LSB($5)
139#endif
140	daddiu	$6,-1
141	daddiu	$5,16
142#ifdef	MIPSEB
143# if defined(_MIPS_ARCH_MIPS64R2)
144	dsbh	$8,$8		# byte swap
145	 dsbh	$9,$9
146	dshd	$8,$8
147	 dshd	$9,$9
148# else
149	ori	$10,$0,0xFF
150	dsll	$1,$10,32
151	or	$10,$1		# 0x000000FF000000FF
152
153	and	$11,$8,$10	# byte swap
154	 and	$2,$9,$10
155	dsrl	$1,$8,24
156	 dsrl	$24,$9,24
157	dsll	$11,24
158	 dsll	$2,24
159	and	$1,$10
160	 and	$24,$10
161	dsll	$10,8			# 0x0000FF000000FF00
162	or	$11,$1
163	 or	$2,$24
164	and	$1,$8,$10
165	 and	$24,$9,$10
166	dsrl	$8,8
167	 dsrl	$9,8
168	dsll	$1,8
169	 dsll	$24,8
170	and	$8,$10
171	 and	$9,$10
172	or	$11,$1
173	 or	$2,$24
174	or	$8,$11
175	 or	$9,$2
176	dsrl	$11,$8,32
177	 dsrl	$2,$9,32
178	dsll	$8,32
179	 dsll	$9,32
180	or	$8,$11
181	 or	$9,$2
182# endif
183#endif
184	daddu	$12,$8		# accumulate input
185	daddu	$13,$9
186	sltu	$10,$12,$8
187	sltu	$11,$13,$9
188	daddu	$13,$10
189
190	dmultu	($15,$12)		# h0*r0
191	 daddu	$14,$7
192	 sltu	$10,$13,$10
193	mflo	($8,$15,$12)
194	mfhi	($9,$15,$12)
195
196	dmultu	($17,$13)		# h1*5*r1
197	 daddu	$10,$11
198	 daddu	$14,$10
199	mflo	($10,$17,$13)
200	mfhi	($11,$17,$13)
201
202	dmultu	($16,$12)		# h0*r1
203	 daddu	$8,$10
204	 daddu	$9,$11
205	mflo	($1,$16,$12)
206	mfhi	($25,$16,$12)
207	 sltu	$10,$8,$10
208	 daddu	$9,$10
209
210	dmultu	($15,$13)		# h1*r0
211	 daddu	$9,$1
212	 sltu	$1,$9,$1
213	mflo	($10,$15,$13)
214	mfhi	($11,$15,$13)
215	 daddu	$25,$1
216
217	dmultu	($17,$14)		# h2*5*r1
218	 daddu	$9,$10
219	 daddu	$25,$11
220	mflo	($1,$17,$14)
221
222	dmultu	($15,$14)		# h2*r0
223	 sltu	$10,$9,$10
224	 daddu	$25,$10
225	mflo	($2,$15,$14)
226
227	daddu	$9,$1
228	daddu	$25,$2
229	sltu	$1,$9,$1
230	daddu	$25,$1
231
232	li	$10,-4		# final reduction
233	and	$10,$25
234	dsrl	$11,$25,2
235	andi	$14,$25,3
236	daddu	$10,$11
237	daddu	$12,$8,$10
238	sltu	$10,$12,$10
239	daddu	$13,$9,$10
240	sltu	$10,$13,$10
241	daddu	$14,$14,$10
242
243	bnez	$6,.Loop
244
245	sd	$12,0($4)		# store hash value
246	sd	$13,8($4)
247	sd	$14,16($4)
248
249	.set	noreorder
250	ld	$17,40($29)		# epilogue
251	ld	$16,32($29)
252	jr	$31
253	daddu	$29,6*8
254.end	poly1305_blocks_internal
255.align	5
256.globl	poly1305_emit
257.ent	poly1305_emit
258poly1305_emit:
259	.frame	$29,0,$31
260	.set	reorder
261
262	ld	$10,0($4)
263	ld	$11,8($4)
264	ld	$1,16($4)
265
266	daddiu	$8,$10,5		# compare to modulus
267	sltiu	$2,$8,5
268	daddu	$9,$11,$2
269	sltu	$2,$9,$2
270	daddu	$1,$1,$2
271
272	dsrl	$1,2			# see if it carried/borrowed
273	dsubu	$1,$0,$1
274	nor	$2,$0,$1
275
276	and	$8,$1
277	and	$10,$2
278	and	$9,$1
279	and	$11,$2
280	or	$8,$10
281	or	$9,$11
282
283	lwu	$10,0($6)		# load nonce
284	lwu	$11,4($6)
285	lwu	$1,8($6)
286	lwu	$2,12($6)
287	dsll	$11,32
288	dsll	$2,32
289	or	$10,$11
290	or	$1,$2
291
292	daddu	$8,$10		# accumulate nonce
293	daddu	$9,$1
294	sltu	$10,$8,$10
295	daddu	$9,$10
296
297	dsrl	$10,$8,8		# write mac value
298	dsrl	$11,$8,16
299	dsrl	$1,$8,24
300	sb	$8,0($5)
301	dsrl	$2,$8,32
302	sb	$10,1($5)
303	dsrl	$10,$8,40
304	sb	$11,2($5)
305	dsrl	$11,$8,48
306	sb	$1,3($5)
307	dsrl	$1,$8,56
308	sb	$2,4($5)
309	dsrl	$2,$9,8
310	sb	$10,5($5)
311	dsrl	$10,$9,16
312	sb	$11,6($5)
313	dsrl	$11,$9,24
314	sb	$1,7($5)
315
316	sb	$9,8($5)
317	dsrl	$1,$9,32
318	sb	$2,9($5)
319	dsrl	$2,$9,40
320	sb	$10,10($5)
321	dsrl	$10,$9,48
322	sb	$11,11($5)
323	dsrl	$11,$9,56
324	sb	$1,12($5)
325	sb	$2,13($5)
326	sb	$10,14($5)
327	sb	$11,15($5)
328
329	jr	$31
330.end	poly1305_emit
331.rdata
332.asciiz	"Poly1305 for MIPS64, CRYPTOGAMS by <appro@openssl.org>"
333.align	2
334