poly1305-mips64.S revision 1.1
1#ifdef MIPSEB
2# define MSB 0
3# define LSB 7
4#else
5# define MSB 7
6# define LSB 0
7#endif
8
9.text
10.set	noat
11.set	noreorder
12
13.align	5
14.globl	poly1305_init
15.ent	poly1305_init
16poly1305_init:
17	.frame	$29,0,$31
18	.set	reorder
19
20	sd	$0,0($4)
21	sd	$0,8($4)
22	sd	$0,16($4)
23
24	beqz	$5,.Lno_key
25
26	ldl	$8,0+MSB($5)
27	ldl	$9,8+MSB($5)
28	ldr	$8,0+LSB($5)
29	ldr	$9,8+LSB($5)
30#ifdef	MIPSEB
31# if defined(_MIPS_ARCH_MIPS64R2)
32	dsbh	$8,$8		# byte swap
33	 dsbh	$9,$9
34	dshd	$8,$8
35	 dshd	$9,$9
36# else
37	ori	$10,$0,0xFF
38	dsll	$1,$10,32
39	or	$10,$1		# 0x000000FF000000FF
40
41	and	$11,$8,$10	# byte swap
42	 and	$2,$9,$10
43	dsrl	$1,$8,24
44	 dsrl	$24,$9,24
45	dsll	$11,24
46	 dsll	$2,24
47	and	$1,$10
48	 and	$24,$10
49	dsll	$10,8			# 0x0000FF000000FF00
50	or	$11,$1
51	 or	$2,$24
52	and	$1,$8,$10
53	 and	$24,$9,$10
54	dsrl	$8,8
55	 dsrl	$9,8
56	dsll	$1,8
57	 dsll	$24,8
58	and	$8,$10
59	 and	$9,$10
60	or	$11,$1
61	 or	$2,$24
62	or	$8,$11
63	 or	$9,$2
64	dsrl	$11,$8,32
65	 dsrl	$2,$9,32
66	dsll	$8,32
67	 dsll	$9,32
68	or	$8,$11
69	 or	$9,$2
70# endif
71#endif
72	li	$10,1
73	dsll	$10,32
74	daddiu	$10,-63
75	dsll	$10,28
76	daddiu	$10,-1		# 0ffffffc0fffffff
77
78	and	$8,$10
79	daddiu	$10,-3		# 0ffffffc0ffffffc
80	and	$9,$10
81
82	sd	$8,24($4)
83	dsrl	$10,$9,2
84	sd	$9,32($4)
85	daddu	$10,$9		# s1 = r1 + (r1 >> 2)
86	sd	$10,40($4)
87
88.Lno_key:
89	li	$2,0			# return 0
90	jr	$31
91.end	poly1305_init
92.align	5
93.globl	poly1305_blocks
94.ent	poly1305_blocks
95poly1305_blocks:
96	.set	noreorder
97	dsrl	$6,4			# number of complete blocks
98	bnez	$6,poly1305_blocks_internal
99	nop
100	jr	$31
101	nop
102.end	poly1305_blocks
103
104.align	5
105.ent	poly1305_blocks_internal
106poly1305_blocks_internal:
107	.frame	$29,6*8,$31
108	.mask	0x00030000,-8
109	.set	noreorder
110	dsub	$29,6*8
111	sd	$17,40($29)
112	sd	$16,32($29)
113	.set	reorder
114
115	ld	$12,0($4)		# load hash value
116	ld	$13,8($4)
117	ld	$14,16($4)
118
119	ld	$15,24($4)		# load key
120	ld	$16,32($4)
121	ld	$17,40($4)
122
123.Loop:
124	ldl	$8,0+MSB($5)	# load input
125	ldl	$9,8+MSB($5)
126	ldr	$8,0+LSB($5)
127	daddiu	$6,-1
128	ldr	$9,8+LSB($5)
129	daddiu	$5,16
130#ifdef	MIPSEB
131# if defined(_MIPS_ARCH_MIPS64R2)
132	dsbh	$8,$8		# byte swap
133	 dsbh	$9,$9
134	dshd	$8,$8
135	 dshd	$9,$9
136# else
137	ori	$10,$0,0xFF
138	dsll	$1,$10,32
139	or	$10,$1		# 0x000000FF000000FF
140
141	and	$11,$8,$10	# byte swap
142	 and	$2,$9,$10
143	dsrl	$1,$8,24
144	 dsrl	$24,$9,24
145	dsll	$11,24
146	 dsll	$2,24
147	and	$1,$10
148	 and	$24,$10
149	dsll	$10,8			# 0x0000FF000000FF00
150	or	$11,$1
151	 or	$2,$24
152	and	$1,$8,$10
153	 and	$24,$9,$10
154	dsrl	$8,8
155	 dsrl	$9,8
156	dsll	$1,8
157	 dsll	$24,8
158	and	$8,$10
159	 and	$9,$10
160	or	$11,$1
161	 or	$2,$24
162	or	$8,$11
163	 or	$9,$2
164	dsrl	$11,$8,32
165	 dsrl	$2,$9,32
166	dsll	$8,32
167	 dsll	$9,32
168	or	$8,$11
169	 or	$9,$2
170# endif
171#endif
172	daddu	$12,$8		# accumulate input
173	daddu	$13,$9
174	sltu	$10,$12,$8
175	sltu	$11,$13,$9
176	daddu	$13,$10
177
178	dmultu	$15,$12			# h0*r0
179	 daddu	$14,$7
180	 sltu	$10,$13,$10
181	mflo	$8
182	mfhi	$9
183
184	dmultu	$17,$13			# h1*5*r1
185	 daddu	$10,$11
186	 daddu	$14,$10
187	mflo	$10
188	mfhi	$11
189
190	dmultu	$16,$12			# h0*r1
191	 daddu	$8,$10
192	 daddu	$9,$11
193	mflo	$1
194	mfhi	$25
195	 sltu	$10,$8,$10
196	 daddu	$9,$10
197
198	dmultu	$15,$13			# h1*r0
199	 daddu	$9,$1
200	 sltu	$1,$9,$1
201	mflo	$10
202	mfhi	$11
203	 daddu	$25,$1
204
205	dmultu	$17,$14			# h2*5*r1
206	 daddu	$9,$10
207	 daddu	$25,$11
208	mflo	$1
209
210	dmultu	$15,$14			# h2*r0
211	 sltu	$10,$9,$10
212	 daddu	$25,$10
213	mflo	$2
214
215	daddu	$9,$1
216	daddu	$25,$2
217	sltu	$1,$9,$1
218	daddu	$25,$1
219
220	li	$10,-4		# final reduction
221	and	$10,$25
222	dsrl	$11,$25,2
223	andi	$14,$25,3
224	daddu	$10,$11
225	daddu	$12,$8,$10
226	sltu	$10,$12,$10
227	daddu	$13,$9,$10
228	sltu	$10,$13,$10
229	daddu	$14,$14,$10
230
231	bnez	$6,.Loop
232
233	sd	$12,0($4)		# store hash value
234	sd	$13,8($4)
235	sd	$14,16($4)
236
237	.set	noreorder
238	ld	$17,40($29)		# epilogue
239	ld	$16,32($29)
240	jr	$31
241	dadd	$29,6*8
242.end	poly1305_blocks_internal
243.align	5
244.globl	poly1305_emit
245.ent	poly1305_emit
246poly1305_emit:
247	.frame	$29,0,$31
248	.set	reorder
249
250	ld	$10,0($4)
251	ld	$11,8($4)
252	ld	$1,16($4)
253
254	daddiu	$8,$10,5		# compare to modulus
255	sltiu	$2,$8,5
256	daddu	$9,$11,$2
257	sltu	$2,$9,$2
258	daddu	$1,$1,$2
259
260	dsrl	$1,2			# see if it carried/borrowed
261	dsubu	$1,$0,$1
262	nor	$2,$0,$1
263
264	and	$8,$1
265	and	$10,$2
266	and	$9,$1
267	and	$11,$2
268	or	$8,$10
269	or	$9,$11
270
271	lwu	$10,0($6)		# load nonce
272	lwu	$11,4($6)
273	lwu	$1,8($6)
274	lwu	$2,12($6)
275	dsll	$11,32
276	dsll	$2,32
277	or	$10,$11
278	or	$1,$2
279
280	daddu	$8,$10		# accumulate nonce
281	daddu	$9,$1
282	sltu	$10,$8,$10
283	daddu	$9,$10
284
285	dsrl	$10,$8,8		# write mac value
286	dsrl	$11,$8,16
287	dsrl	$1,$8,24
288	sb	$8,0($5)
289	dsrl	$2,$8,32
290	sb	$10,1($5)
291	dsrl	$10,$8,40
292	sb	$11,2($5)
293	dsrl	$11,$8,48
294	sb	$1,3($5)
295	dsrl	$1,$8,56
296	sb	$2,4($5)
297	dsrl	$2,$9,8
298	sb	$10,5($5)
299	dsrl	$10,$9,16
300	sb	$11,6($5)
301	dsrl	$11,$9,24
302	sb	$1,7($5)
303
304	sb	$9,8($5)
305	dsrl	$1,$9,32
306	sb	$2,9($5)
307	dsrl	$2,$9,40
308	sb	$10,10($5)
309	dsrl	$10,$9,48
310	sb	$11,11($5)
311	dsrl	$11,$9,56
312	sb	$1,12($5)
313	sb	$2,13($5)
314	sb	$10,14($5)
315	sb	$11,15($5)
316
317	jr	$31
318.end	poly1305_emit
319.rdata
320.asciiz	"Poly1305 for MIPS64, CRYPTOGAMS by <appro@openssl.org>"
321.align	2
322