1#include "mips_arch.h"
2
3.text
4.set	noat
5#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
6.option	pic2
7#endif
8
9.align	5
10.globl	sha256_block_data_order
11.ent	sha256_block_data_order
12sha256_block_data_order:
13	.frame	$29,128,$31
14	.mask	0xc0ff0000,-4
15	.set	noreorder
16	.cpload	$25
17	subu $29,128
18	sw	$31,128-1*4($29)
19	sw	$30,128-2*4($29)
20	sw	$23,128-3*4($29)
21	sw	$22,128-4*4($29)
22	sw	$21,128-5*4($29)
23	sw	$20,128-6*4($29)
24	sw	$19,128-7*4($29)
25	sw	$18,128-8*4($29)
26	sw	$17,128-9*4($29)
27	sw	$16,128-10*4($29)
28	sll $23,$6,6
29	.set	reorder
30	la	$6,K256		# PIC-ified 'load address'
31
32	lw	$1,0*4($4)		# load context
33	lw	$2,1*4($4)
34	lw	$3,2*4($4)
35	lw	$7,3*4($4)
36	lw	$24,4*4($4)
37	lw	$25,5*4($4)
38	lw	$30,6*4($4)
39	lw	$31,7*4($4)
40
41	addu $23,$5		# pointer to the end of input
42	sw	$23,16*4($29)
43	b	.Loop
44
45.align	5
46.Loop:
47#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
48	lw	$8,($5)
49#else
50	lwl	$8,3($5)
51	lwr	$8,0($5)
52#endif
53#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
54	lw	$9,4($5)
55#else
56	lwl	$9,7($5)
57	lwr	$9,4($5)
58#endif
59#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
60	wsbh	$8,$8		# byte swap(0)
61	rotr	$8,$8,16
62#else
63	srl	$13,$8,24		# byte swap(0)
64	srl	$14,$8,8
65	andi	$15,$8,0xFF00
66	sll	$8,$8,24
67	andi	$14,0xFF00
68	sll	$15,$15,8
69	or	$8,$13
70	or	$14,$15
71	or	$8,$14
72#endif
73#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
74	xor	$15,$25,$30			# 0
75	rotr	$13,$24,6
76	addu	$12,$8,$31
77	rotr	$14,$24,11
78	and	$15,$24
79	rotr	$31,$24,25
80	xor	$13,$14
81	rotr	$14,$1,2
82	xor	$15,$30			# Ch(e,f,g)
83	xor	$13,$31			# Sigma1(e)
84
85	rotr	$31,$1,13
86	addu	$12,$15
87	lw	$15,0($6)		# K[0]
88	xor	$31,$14
89	rotr	$14,$1,22
90	addu	$12,$13
91	and	$13,$2,$3
92	xor	$31,$14			# Sigma0(a)
93	xor	$14,$2,$3
94#else
95	addu	$12,$8,$31			# 0
96	srl	$31,$24,6
97	xor	$15,$25,$30
98	sll	$14,$24,7
99	and	$15,$24
100	srl	$13,$24,11
101	xor	$31,$14
102	sll	$14,$24,21
103	xor	$31,$13
104	srl	$13,$24,25
105	xor	$31,$14
106	sll	$14,$24,26
107	xor	$31,$13
108	xor	$15,$30			# Ch(e,f,g)
109	xor	$13,$14,$31			# Sigma1(e)
110
111	srl	$31,$1,2
112	addu	$12,$15
113	lw	$15,0($6)		# K[0]
114	sll	$14,$1,10
115	addu	$12,$13
116	srl	$13,$1,13
117	xor	$31,$14
118	sll	$14,$1,19
119	xor	$31,$13
120	srl	$13,$1,22
121	xor	$31,$14
122	sll	$14,$1,30
123	xor	$31,$13
124	and	$13,$2,$3
125	xor	$31,$14			# Sigma0(a)
126	xor	$14,$2,$3
127#endif
128	sw	$8,0($29)	# offload to ring buffer
129	addu	$31,$13
130	and	$14,$1
131	addu	$12,$15			# +=K[0]
132	addu	$31,$14			# +=Maj(a,b,c)
133	addu	$7,$12
134	addu	$31,$12
135#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
136	lw	$10,8($5)
137#else
138	lwl	$10,11($5)
139	lwr	$10,8($5)
140#endif
141#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
142	wsbh	$9,$9		# byte swap(1)
143	rotr	$9,$9,16
144#else
145	srl	$14,$9,24		# byte swap(1)
146	srl	$15,$9,8
147	andi	$16,$9,0xFF00
148	sll	$9,$9,24
149	andi	$15,0xFF00
150	sll	$16,$16,8
151	or	$9,$14
152	or	$15,$16
153	or	$9,$15
154#endif
155#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
156	xor	$16,$24,$25			# 1
157	rotr	$14,$7,6
158	addu	$13,$9,$30
159	rotr	$15,$7,11
160	and	$16,$7
161	rotr	$30,$7,25
162	xor	$14,$15
163	rotr	$15,$31,2
164	xor	$16,$25			# Ch(e,f,g)
165	xor	$14,$30			# Sigma1(e)
166
167	rotr	$30,$31,13
168	addu	$13,$16
169	lw	$16,4($6)		# K[1]
170	xor	$30,$15
171	rotr	$15,$31,22
172	addu	$13,$14
173	and	$14,$1,$2
174	xor	$30,$15			# Sigma0(a)
175	xor	$15,$1,$2
176#else
177	addu	$13,$9,$30			# 1
178	srl	$30,$7,6
179	xor	$16,$24,$25
180	sll	$15,$7,7
181	and	$16,$7
182	srl	$14,$7,11
183	xor	$30,$15
184	sll	$15,$7,21
185	xor	$30,$14
186	srl	$14,$7,25
187	xor	$30,$15
188	sll	$15,$7,26
189	xor	$30,$14
190	xor	$16,$25			# Ch(e,f,g)
191	xor	$14,$15,$30			# Sigma1(e)
192
193	srl	$30,$31,2
194	addu	$13,$16
195	lw	$16,4($6)		# K[1]
196	sll	$15,$31,10
197	addu	$13,$14
198	srl	$14,$31,13
199	xor	$30,$15
200	sll	$15,$31,19
201	xor	$30,$14
202	srl	$14,$31,22
203	xor	$30,$15
204	sll	$15,$31,30
205	xor	$30,$14
206	and	$14,$1,$2
207	xor	$30,$15			# Sigma0(a)
208	xor	$15,$1,$2
209#endif
210	sw	$9,4($29)	# offload to ring buffer
211	addu	$30,$14
212	and	$15,$31
213	addu	$13,$16			# +=K[1]
214	addu	$30,$15			# +=Maj(a,b,c)
215	addu	$3,$13
216	addu	$30,$13
217#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
218	lw	$11,12($5)
219#else
220	lwl	$11,15($5)
221	lwr	$11,12($5)
222#endif
223#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
224	wsbh	$10,$10		# byte swap(2)
225	rotr	$10,$10,16
226#else
227	srl	$15,$10,24		# byte swap(2)
228	srl	$16,$10,8
229	andi	$17,$10,0xFF00
230	sll	$10,$10,24
231	andi	$16,0xFF00
232	sll	$17,$17,8
233	or	$10,$15
234	or	$16,$17
235	or	$10,$16
236#endif
237#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
238	xor	$17,$7,$24			# 2
239	rotr	$15,$3,6
240	addu	$14,$10,$25
241	rotr	$16,$3,11
242	and	$17,$3
243	rotr	$25,$3,25
244	xor	$15,$16
245	rotr	$16,$30,2
246	xor	$17,$24			# Ch(e,f,g)
247	xor	$15,$25			# Sigma1(e)
248
249	rotr	$25,$30,13
250	addu	$14,$17
251	lw	$17,8($6)		# K[2]
252	xor	$25,$16
253	rotr	$16,$30,22
254	addu	$14,$15
255	and	$15,$31,$1
256	xor	$25,$16			# Sigma0(a)
257	xor	$16,$31,$1
258#else
259	addu	$14,$10,$25			# 2
260	srl	$25,$3,6
261	xor	$17,$7,$24
262	sll	$16,$3,7
263	and	$17,$3
264	srl	$15,$3,11
265	xor	$25,$16
266	sll	$16,$3,21
267	xor	$25,$15
268	srl	$15,$3,25
269	xor	$25,$16
270	sll	$16,$3,26
271	xor	$25,$15
272	xor	$17,$24			# Ch(e,f,g)
273	xor	$15,$16,$25			# Sigma1(e)
274
275	srl	$25,$30,2
276	addu	$14,$17
277	lw	$17,8($6)		# K[2]
278	sll	$16,$30,10
279	addu	$14,$15
280	srl	$15,$30,13
281	xor	$25,$16
282	sll	$16,$30,19
283	xor	$25,$15
284	srl	$15,$30,22
285	xor	$25,$16
286	sll	$16,$30,30
287	xor	$25,$15
288	and	$15,$31,$1
289	xor	$25,$16			# Sigma0(a)
290	xor	$16,$31,$1
291#endif
292	sw	$10,8($29)	# offload to ring buffer
293	addu	$25,$15
294	and	$16,$30
295	addu	$14,$17			# +=K[2]
296	addu	$25,$16			# +=Maj(a,b,c)
297	addu	$2,$14
298	addu	$25,$14
299#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
300	lw	$12,16($5)
301#else
302	lwl	$12,19($5)
303	lwr	$12,16($5)
304#endif
305#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
306	wsbh	$11,$11		# byte swap(3)
307	rotr	$11,$11,16
308#else
309	srl	$16,$11,24		# byte swap(3)
310	srl	$17,$11,8
311	andi	$18,$11,0xFF00
312	sll	$11,$11,24
313	andi	$17,0xFF00
314	sll	$18,$18,8
315	or	$11,$16
316	or	$17,$18
317	or	$11,$17
318#endif
319#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
320	xor	$18,$3,$7			# 3
321	rotr	$16,$2,6
322	addu	$15,$11,$24
323	rotr	$17,$2,11
324	and	$18,$2
325	rotr	$24,$2,25
326	xor	$16,$17
327	rotr	$17,$25,2
328	xor	$18,$7			# Ch(e,f,g)
329	xor	$16,$24			# Sigma1(e)
330
331	rotr	$24,$25,13
332	addu	$15,$18
333	lw	$18,12($6)		# K[3]
334	xor	$24,$17
335	rotr	$17,$25,22
336	addu	$15,$16
337	and	$16,$30,$31
338	xor	$24,$17			# Sigma0(a)
339	xor	$17,$30,$31
340#else
341	addu	$15,$11,$24			# 3
342	srl	$24,$2,6
343	xor	$18,$3,$7
344	sll	$17,$2,7
345	and	$18,$2
346	srl	$16,$2,11
347	xor	$24,$17
348	sll	$17,$2,21
349	xor	$24,$16
350	srl	$16,$2,25
351	xor	$24,$17
352	sll	$17,$2,26
353	xor	$24,$16
354	xor	$18,$7			# Ch(e,f,g)
355	xor	$16,$17,$24			# Sigma1(e)
356
357	srl	$24,$25,2
358	addu	$15,$18
359	lw	$18,12($6)		# K[3]
360	sll	$17,$25,10
361	addu	$15,$16
362	srl	$16,$25,13
363	xor	$24,$17
364	sll	$17,$25,19
365	xor	$24,$16
366	srl	$16,$25,22
367	xor	$24,$17
368	sll	$17,$25,30
369	xor	$24,$16
370	and	$16,$30,$31
371	xor	$24,$17			# Sigma0(a)
372	xor	$17,$30,$31
373#endif
374	sw	$11,12($29)	# offload to ring buffer
375	addu	$24,$16
376	and	$17,$25
377	addu	$15,$18			# +=K[3]
378	addu	$24,$17			# +=Maj(a,b,c)
379	addu	$1,$15
380	addu	$24,$15
381#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
382	lw	$13,20($5)
383#else
384	lwl	$13,23($5)
385	lwr	$13,20($5)
386#endif
387#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
388	wsbh	$12,$12		# byte swap(4)
389	rotr	$12,$12,16
390#else
391	srl	$17,$12,24		# byte swap(4)
392	srl	$18,$12,8
393	andi	$19,$12,0xFF00
394	sll	$12,$12,24
395	andi	$18,0xFF00
396	sll	$19,$19,8
397	or	$12,$17
398	or	$18,$19
399	or	$12,$18
400#endif
401#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
402	xor	$19,$2,$3			# 4
403	rotr	$17,$1,6
404	addu	$16,$12,$7
405	rotr	$18,$1,11
406	and	$19,$1
407	rotr	$7,$1,25
408	xor	$17,$18
409	rotr	$18,$24,2
410	xor	$19,$3			# Ch(e,f,g)
411	xor	$17,$7			# Sigma1(e)
412
413	rotr	$7,$24,13
414	addu	$16,$19
415	lw	$19,16($6)		# K[4]
416	xor	$7,$18
417	rotr	$18,$24,22
418	addu	$16,$17
419	and	$17,$25,$30
420	xor	$7,$18			# Sigma0(a)
421	xor	$18,$25,$30
422#else
423	addu	$16,$12,$7			# 4
424	srl	$7,$1,6
425	xor	$19,$2,$3
426	sll	$18,$1,7
427	and	$19,$1
428	srl	$17,$1,11
429	xor	$7,$18
430	sll	$18,$1,21
431	xor	$7,$17
432	srl	$17,$1,25
433	xor	$7,$18
434	sll	$18,$1,26
435	xor	$7,$17
436	xor	$19,$3			# Ch(e,f,g)
437	xor	$17,$18,$7			# Sigma1(e)
438
439	srl	$7,$24,2
440	addu	$16,$19
441	lw	$19,16($6)		# K[4]
442	sll	$18,$24,10
443	addu	$16,$17
444	srl	$17,$24,13
445	xor	$7,$18
446	sll	$18,$24,19
447	xor	$7,$17
448	srl	$17,$24,22
449	xor	$7,$18
450	sll	$18,$24,30
451	xor	$7,$17
452	and	$17,$25,$30
453	xor	$7,$18			# Sigma0(a)
454	xor	$18,$25,$30
455#endif
456	sw	$12,16($29)	# offload to ring buffer
457	addu	$7,$17
458	and	$18,$24
459	addu	$16,$19			# +=K[4]
460	addu	$7,$18			# +=Maj(a,b,c)
461	addu	$31,$16
462	addu	$7,$16
463#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
464	lw	$14,24($5)
465#else
466	lwl	$14,27($5)
467	lwr	$14,24($5)
468#endif
469#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
470	wsbh	$13,$13		# byte swap(5)
471	rotr	$13,$13,16
472#else
473	srl	$18,$13,24		# byte swap(5)
474	srl	$19,$13,8
475	andi	$20,$13,0xFF00
476	sll	$13,$13,24
477	andi	$19,0xFF00
478	sll	$20,$20,8
479	or	$13,$18
480	or	$19,$20
481	or	$13,$19
482#endif
483#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
484	xor	$20,$1,$2			# 5
485	rotr	$18,$31,6
486	addu	$17,$13,$3
487	rotr	$19,$31,11
488	and	$20,$31
489	rotr	$3,$31,25
490	xor	$18,$19
491	rotr	$19,$7,2
492	xor	$20,$2			# Ch(e,f,g)
493	xor	$18,$3			# Sigma1(e)
494
495	rotr	$3,$7,13
496	addu	$17,$20
497	lw	$20,20($6)		# K[5]
498	xor	$3,$19
499	rotr	$19,$7,22
500	addu	$17,$18
501	and	$18,$24,$25
502	xor	$3,$19			# Sigma0(a)
503	xor	$19,$24,$25
504#else
505	addu	$17,$13,$3			# 5
506	srl	$3,$31,6
507	xor	$20,$1,$2
508	sll	$19,$31,7
509	and	$20,$31
510	srl	$18,$31,11
511	xor	$3,$19
512	sll	$19,$31,21
513	xor	$3,$18
514	srl	$18,$31,25
515	xor	$3,$19
516	sll	$19,$31,26
517	xor	$3,$18
518	xor	$20,$2			# Ch(e,f,g)
519	xor	$18,$19,$3			# Sigma1(e)
520
521	srl	$3,$7,2
522	addu	$17,$20
523	lw	$20,20($6)		# K[5]
524	sll	$19,$7,10
525	addu	$17,$18
526	srl	$18,$7,13
527	xor	$3,$19
528	sll	$19,$7,19
529	xor	$3,$18
530	srl	$18,$7,22
531	xor	$3,$19
532	sll	$19,$7,30
533	xor	$3,$18
534	and	$18,$24,$25
535	xor	$3,$19			# Sigma0(a)
536	xor	$19,$24,$25
537#endif
538	sw	$13,20($29)	# offload to ring buffer
539	addu	$3,$18
540	and	$19,$7
541	addu	$17,$20			# +=K[5]
542	addu	$3,$19			# +=Maj(a,b,c)
543	addu	$30,$17
544	addu	$3,$17
545#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
546	lw	$15,28($5)
547#else
548	lwl	$15,31($5)
549	lwr	$15,28($5)
550#endif
551#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
552	wsbh	$14,$14		# byte swap(6)
553	rotr	$14,$14,16
554#else
555	srl	$19,$14,24		# byte swap(6)
556	srl	$20,$14,8
557	andi	$21,$14,0xFF00
558	sll	$14,$14,24
559	andi	$20,0xFF00
560	sll	$21,$21,8
561	or	$14,$19
562	or	$20,$21
563	or	$14,$20
564#endif
565#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
566	xor	$21,$31,$1			# 6
567	rotr	$19,$30,6
568	addu	$18,$14,$2
569	rotr	$20,$30,11
570	and	$21,$30
571	rotr	$2,$30,25
572	xor	$19,$20
573	rotr	$20,$3,2
574	xor	$21,$1			# Ch(e,f,g)
575	xor	$19,$2			# Sigma1(e)
576
577	rotr	$2,$3,13
578	addu	$18,$21
579	lw	$21,24($6)		# K[6]
580	xor	$2,$20
581	rotr	$20,$3,22
582	addu	$18,$19
583	and	$19,$7,$24
584	xor	$2,$20			# Sigma0(a)
585	xor	$20,$7,$24
586#else
587	addu	$18,$14,$2			# 6
588	srl	$2,$30,6
589	xor	$21,$31,$1
590	sll	$20,$30,7
591	and	$21,$30
592	srl	$19,$30,11
593	xor	$2,$20
594	sll	$20,$30,21
595	xor	$2,$19
596	srl	$19,$30,25
597	xor	$2,$20
598	sll	$20,$30,26
599	xor	$2,$19
600	xor	$21,$1			# Ch(e,f,g)
601	xor	$19,$20,$2			# Sigma1(e)
602
603	srl	$2,$3,2
604	addu	$18,$21
605	lw	$21,24($6)		# K[6]
606	sll	$20,$3,10
607	addu	$18,$19
608	srl	$19,$3,13
609	xor	$2,$20
610	sll	$20,$3,19
611	xor	$2,$19
612	srl	$19,$3,22
613	xor	$2,$20
614	sll	$20,$3,30
615	xor	$2,$19
616	and	$19,$7,$24
617	xor	$2,$20			# Sigma0(a)
618	xor	$20,$7,$24
619#endif
620	sw	$14,24($29)	# offload to ring buffer
621	addu	$2,$19
622	and	$20,$3
623	addu	$18,$21			# +=K[6]
624	addu	$2,$20			# +=Maj(a,b,c)
625	addu	$25,$18
626	addu	$2,$18
627#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
628	lw	$16,32($5)
629#else
630	lwl	$16,35($5)
631	lwr	$16,32($5)
632#endif
633#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
634	wsbh	$15,$15		# byte swap(7)
635	rotr	$15,$15,16
636#else
637	srl	$20,$15,24		# byte swap(7)
638	srl	$21,$15,8
639	andi	$22,$15,0xFF00
640	sll	$15,$15,24
641	andi	$21,0xFF00
642	sll	$22,$22,8
643	or	$15,$20
644	or	$21,$22
645	or	$15,$21
646#endif
647#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
648	xor	$22,$30,$31			# 7
649	rotr	$20,$25,6
650	addu	$19,$15,$1
651	rotr	$21,$25,11
652	and	$22,$25
653	rotr	$1,$25,25
654	xor	$20,$21
655	rotr	$21,$2,2
656	xor	$22,$31			# Ch(e,f,g)
657	xor	$20,$1			# Sigma1(e)
658
659	rotr	$1,$2,13
660	addu	$19,$22
661	lw	$22,28($6)		# K[7]
662	xor	$1,$21
663	rotr	$21,$2,22
664	addu	$19,$20
665	and	$20,$3,$7
666	xor	$1,$21			# Sigma0(a)
667	xor	$21,$3,$7
668#else
669	addu	$19,$15,$1			# 7
670	srl	$1,$25,6
671	xor	$22,$30,$31
672	sll	$21,$25,7
673	and	$22,$25
674	srl	$20,$25,11
675	xor	$1,$21
676	sll	$21,$25,21
677	xor	$1,$20
678	srl	$20,$25,25
679	xor	$1,$21
680	sll	$21,$25,26
681	xor	$1,$20
682	xor	$22,$31			# Ch(e,f,g)
683	xor	$20,$21,$1			# Sigma1(e)
684
685	srl	$1,$2,2
686	addu	$19,$22
687	lw	$22,28($6)		# K[7]
688	sll	$21,$2,10
689	addu	$19,$20
690	srl	$20,$2,13
691	xor	$1,$21
692	sll	$21,$2,19
693	xor	$1,$20
694	srl	$20,$2,22
695	xor	$1,$21
696	sll	$21,$2,30
697	xor	$1,$20
698	and	$20,$3,$7
699	xor	$1,$21			# Sigma0(a)
700	xor	$21,$3,$7
701#endif
702	sw	$15,28($29)	# offload to ring buffer
703	addu	$1,$20
704	and	$21,$2
705	addu	$19,$22			# +=K[7]
706	addu	$1,$21			# +=Maj(a,b,c)
707	addu	$24,$19
708	addu	$1,$19
709#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
710	lw	$17,36($5)
711#else
712	lwl	$17,39($5)
713	lwr	$17,36($5)
714#endif
715#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
716	wsbh	$16,$16		# byte swap(8)
717	rotr	$16,$16,16
718#else
719	srl	$21,$16,24		# byte swap(8)
720	srl	$22,$16,8
721	andi	$23,$16,0xFF00
722	sll	$16,$16,24
723	andi	$22,0xFF00
724	sll	$23,$23,8
725	or	$16,$21
726	or	$22,$23
727	or	$16,$22
728#endif
729#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
730	xor	$23,$25,$30			# 8
731	rotr	$21,$24,6
732	addu	$20,$16,$31
733	rotr	$22,$24,11
734	and	$23,$24
735	rotr	$31,$24,25
736	xor	$21,$22
737	rotr	$22,$1,2
738	xor	$23,$30			# Ch(e,f,g)
739	xor	$21,$31			# Sigma1(e)
740
741	rotr	$31,$1,13
742	addu	$20,$23
743	lw	$23,32($6)		# K[8]
744	xor	$31,$22
745	rotr	$22,$1,22
746	addu	$20,$21
747	and	$21,$2,$3
748	xor	$31,$22			# Sigma0(a)
749	xor	$22,$2,$3
750#else
751	addu	$20,$16,$31			# 8
752	srl	$31,$24,6
753	xor	$23,$25,$30
754	sll	$22,$24,7
755	and	$23,$24
756	srl	$21,$24,11
757	xor	$31,$22
758	sll	$22,$24,21
759	xor	$31,$21
760	srl	$21,$24,25
761	xor	$31,$22
762	sll	$22,$24,26
763	xor	$31,$21
764	xor	$23,$30			# Ch(e,f,g)
765	xor	$21,$22,$31			# Sigma1(e)
766
767	srl	$31,$1,2
768	addu	$20,$23
769	lw	$23,32($6)		# K[8]
770	sll	$22,$1,10
771	addu	$20,$21
772	srl	$21,$1,13
773	xor	$31,$22
774	sll	$22,$1,19
775	xor	$31,$21
776	srl	$21,$1,22
777	xor	$31,$22
778	sll	$22,$1,30
779	xor	$31,$21
780	and	$21,$2,$3
781	xor	$31,$22			# Sigma0(a)
782	xor	$22,$2,$3
783#endif
784	sw	$16,32($29)	# offload to ring buffer
785	addu	$31,$21
786	and	$22,$1
787	addu	$20,$23			# +=K[8]
788	addu	$31,$22			# +=Maj(a,b,c)
789	addu	$7,$20
790	addu	$31,$20
791#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
792	lw	$18,40($5)
793#else
794	lwl	$18,43($5)
795	lwr	$18,40($5)
796#endif
797#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
798	wsbh	$17,$17		# byte swap(9)
799	rotr	$17,$17,16
800#else
801	srl	$22,$17,24		# byte swap(9)
802	srl	$23,$17,8
803	andi	$8,$17,0xFF00
804	sll	$17,$17,24
805	andi	$23,0xFF00
806	sll	$8,$8,8
807	or	$17,$22
808	or	$23,$8
809	or	$17,$23
810#endif
811#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
812	xor	$8,$24,$25			# 9
813	rotr	$22,$7,6
814	addu	$21,$17,$30
815	rotr	$23,$7,11
816	and	$8,$7
817	rotr	$30,$7,25
818	xor	$22,$23
819	rotr	$23,$31,2
820	xor	$8,$25			# Ch(e,f,g)
821	xor	$22,$30			# Sigma1(e)
822
823	rotr	$30,$31,13
824	addu	$21,$8
825	lw	$8,36($6)		# K[9]
826	xor	$30,$23
827	rotr	$23,$31,22
828	addu	$21,$22
829	and	$22,$1,$2
830	xor	$30,$23			# Sigma0(a)
831	xor	$23,$1,$2
832#else
833	addu	$21,$17,$30			# 9
834	srl	$30,$7,6
835	xor	$8,$24,$25
836	sll	$23,$7,7
837	and	$8,$7
838	srl	$22,$7,11
839	xor	$30,$23
840	sll	$23,$7,21
841	xor	$30,$22
842	srl	$22,$7,25
843	xor	$30,$23
844	sll	$23,$7,26
845	xor	$30,$22
846	xor	$8,$25			# Ch(e,f,g)
847	xor	$22,$23,$30			# Sigma1(e)
848
849	srl	$30,$31,2
850	addu	$21,$8
851	lw	$8,36($6)		# K[9]
852	sll	$23,$31,10
853	addu	$21,$22
854	srl	$22,$31,13
855	xor	$30,$23
856	sll	$23,$31,19
857	xor	$30,$22
858	srl	$22,$31,22
859	xor	$30,$23
860	sll	$23,$31,30
861	xor	$30,$22
862	and	$22,$1,$2
863	xor	$30,$23			# Sigma0(a)
864	xor	$23,$1,$2
865#endif
866	sw	$17,36($29)	# offload to ring buffer
867	addu	$30,$22
868	and	$23,$31
869	addu	$21,$8			# +=K[9]
870	addu	$30,$23			# +=Maj(a,b,c)
871	addu	$3,$21
872	addu	$30,$21
873#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
874	lw	$19,44($5)
875#else
876	lwl	$19,47($5)
877	lwr	$19,44($5)
878#endif
879#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
880	wsbh	$18,$18		# byte swap(10)
881	rotr	$18,$18,16
882#else
883	srl	$23,$18,24		# byte swap(10)
884	srl	$8,$18,8
885	andi	$9,$18,0xFF00
886	sll	$18,$18,24
887	andi	$8,0xFF00
888	sll	$9,$9,8
889	or	$18,$23
890	or	$8,$9
891	or	$18,$8
892#endif
893#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
894	xor	$9,$7,$24			# 10
895	rotr	$23,$3,6
896	addu	$22,$18,$25
897	rotr	$8,$3,11
898	and	$9,$3
899	rotr	$25,$3,25
900	xor	$23,$8
901	rotr	$8,$30,2
902	xor	$9,$24			# Ch(e,f,g)
903	xor	$23,$25			# Sigma1(e)
904
905	rotr	$25,$30,13
906	addu	$22,$9
907	lw	$9,40($6)		# K[10]
908	xor	$25,$8
909	rotr	$8,$30,22
910	addu	$22,$23
911	and	$23,$31,$1
912	xor	$25,$8			# Sigma0(a)
913	xor	$8,$31,$1
914#else
915	addu	$22,$18,$25			# 10
916	srl	$25,$3,6
917	xor	$9,$7,$24
918	sll	$8,$3,7
919	and	$9,$3
920	srl	$23,$3,11
921	xor	$25,$8
922	sll	$8,$3,21
923	xor	$25,$23
924	srl	$23,$3,25
925	xor	$25,$8
926	sll	$8,$3,26
927	xor	$25,$23
928	xor	$9,$24			# Ch(e,f,g)
929	xor	$23,$8,$25			# Sigma1(e)
930
931	srl	$25,$30,2
932	addu	$22,$9
933	lw	$9,40($6)		# K[10]
934	sll	$8,$30,10
935	addu	$22,$23
936	srl	$23,$30,13
937	xor	$25,$8
938	sll	$8,$30,19
939	xor	$25,$23
940	srl	$23,$30,22
941	xor	$25,$8
942	sll	$8,$30,30
943	xor	$25,$23
944	and	$23,$31,$1
945	xor	$25,$8			# Sigma0(a)
946	xor	$8,$31,$1
947#endif
948	sw	$18,40($29)	# offload to ring buffer
949	addu	$25,$23
950	and	$8,$30
951	addu	$22,$9			# +=K[10]
952	addu	$25,$8			# +=Maj(a,b,c)
953	addu	$2,$22
954	addu	$25,$22
955#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
956	lw	$20,48($5)
957#else
958	lwl	$20,51($5)
959	lwr	$20,48($5)
960#endif
961#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
962	wsbh	$19,$19		# byte swap(11)
963	rotr	$19,$19,16
964#else
965	srl	$8,$19,24		# byte swap(11)
966	srl	$9,$19,8
967	andi	$10,$19,0xFF00
968	sll	$19,$19,24
969	andi	$9,0xFF00
970	sll	$10,$10,8
971	or	$19,$8
972	or	$9,$10
973	or	$19,$9
974#endif
975#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
976	xor	$10,$3,$7			# 11
977	rotr	$8,$2,6
978	addu	$23,$19,$24
979	rotr	$9,$2,11
980	and	$10,$2
981	rotr	$24,$2,25
982	xor	$8,$9
983	rotr	$9,$25,2
984	xor	$10,$7			# Ch(e,f,g)
985	xor	$8,$24			# Sigma1(e)
986
987	rotr	$24,$25,13
988	addu	$23,$10
989	lw	$10,44($6)		# K[11]
990	xor	$24,$9
991	rotr	$9,$25,22
992	addu	$23,$8
993	and	$8,$30,$31
994	xor	$24,$9			# Sigma0(a)
995	xor	$9,$30,$31
996#else
997	addu	$23,$19,$24			# 11
998	srl	$24,$2,6
999	xor	$10,$3,$7
1000	sll	$9,$2,7
1001	and	$10,$2
1002	srl	$8,$2,11
1003	xor	$24,$9
1004	sll	$9,$2,21
1005	xor	$24,$8
1006	srl	$8,$2,25
1007	xor	$24,$9
1008	sll	$9,$2,26
1009	xor	$24,$8
1010	xor	$10,$7			# Ch(e,f,g)
1011	xor	$8,$9,$24			# Sigma1(e)
1012
1013	srl	$24,$25,2
1014	addu	$23,$10
1015	lw	$10,44($6)		# K[11]
1016	sll	$9,$25,10
1017	addu	$23,$8
1018	srl	$8,$25,13
1019	xor	$24,$9
1020	sll	$9,$25,19
1021	xor	$24,$8
1022	srl	$8,$25,22
1023	xor	$24,$9
1024	sll	$9,$25,30
1025	xor	$24,$8
1026	and	$8,$30,$31
1027	xor	$24,$9			# Sigma0(a)
1028	xor	$9,$30,$31
1029#endif
1030	sw	$19,44($29)	# offload to ring buffer
1031	addu	$24,$8
1032	and	$9,$25
1033	addu	$23,$10			# +=K[11]
1034	addu	$24,$9			# +=Maj(a,b,c)
1035	addu	$1,$23
1036	addu	$24,$23
1037#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1038	lw	$21,52($5)
1039#else
1040	lwl	$21,55($5)
1041	lwr	$21,52($5)
1042#endif
1043#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1044	wsbh	$20,$20		# byte swap(12)
1045	rotr	$20,$20,16
1046#else
1047	srl	$9,$20,24		# byte swap(12)
1048	srl	$10,$20,8
1049	andi	$11,$20,0xFF00
1050	sll	$20,$20,24
1051	andi	$10,0xFF00
1052	sll	$11,$11,8
1053	or	$20,$9
1054	or	$10,$11
1055	or	$20,$10
1056#endif
1057#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1058	xor	$11,$2,$3			# 12
1059	rotr	$9,$1,6
1060	addu	$8,$20,$7
1061	rotr	$10,$1,11
1062	and	$11,$1
1063	rotr	$7,$1,25
1064	xor	$9,$10
1065	rotr	$10,$24,2
1066	xor	$11,$3			# Ch(e,f,g)
1067	xor	$9,$7			# Sigma1(e)
1068
1069	rotr	$7,$24,13
1070	addu	$8,$11
1071	lw	$11,48($6)		# K[12]
1072	xor	$7,$10
1073	rotr	$10,$24,22
1074	addu	$8,$9
1075	and	$9,$25,$30
1076	xor	$7,$10			# Sigma0(a)
1077	xor	$10,$25,$30
1078#else
1079	addu	$8,$20,$7			# 12
1080	srl	$7,$1,6
1081	xor	$11,$2,$3
1082	sll	$10,$1,7
1083	and	$11,$1
1084	srl	$9,$1,11
1085	xor	$7,$10
1086	sll	$10,$1,21
1087	xor	$7,$9
1088	srl	$9,$1,25
1089	xor	$7,$10
1090	sll	$10,$1,26
1091	xor	$7,$9
1092	xor	$11,$3			# Ch(e,f,g)
1093	xor	$9,$10,$7			# Sigma1(e)
1094
1095	srl	$7,$24,2
1096	addu	$8,$11
1097	lw	$11,48($6)		# K[12]
1098	sll	$10,$24,10
1099	addu	$8,$9
1100	srl	$9,$24,13
1101	xor	$7,$10
1102	sll	$10,$24,19
1103	xor	$7,$9
1104	srl	$9,$24,22
1105	xor	$7,$10
1106	sll	$10,$24,30
1107	xor	$7,$9
1108	and	$9,$25,$30
1109	xor	$7,$10			# Sigma0(a)
1110	xor	$10,$25,$30
1111#endif
1112	sw	$20,48($29)	# offload to ring buffer
1113	addu	$7,$9
1114	and	$10,$24
1115	addu	$8,$11			# +=K[12]
1116	addu	$7,$10			# +=Maj(a,b,c)
1117	addu	$31,$8
1118	addu	$7,$8
1119#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1120	lw	$22,56($5)
1121#else
1122	lwl	$22,59($5)
1123	lwr	$22,56($5)
1124#endif
1125#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1126	wsbh	$21,$21		# byte swap(13)
1127	rotr	$21,$21,16
1128#else
1129	srl	$10,$21,24		# byte swap(13)
1130	srl	$11,$21,8
1131	andi	$12,$21,0xFF00
1132	sll	$21,$21,24
1133	andi	$11,0xFF00
1134	sll	$12,$12,8
1135	or	$21,$10
1136	or	$11,$12
1137	or	$21,$11
1138#endif
1139#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1140	xor	$12,$1,$2			# 13
1141	rotr	$10,$31,6
1142	addu	$9,$21,$3
1143	rotr	$11,$31,11
1144	and	$12,$31
1145	rotr	$3,$31,25
1146	xor	$10,$11
1147	rotr	$11,$7,2
1148	xor	$12,$2			# Ch(e,f,g)
1149	xor	$10,$3			# Sigma1(e)
1150
1151	rotr	$3,$7,13
1152	addu	$9,$12
1153	lw	$12,52($6)		# K[13]
1154	xor	$3,$11
1155	rotr	$11,$7,22
1156	addu	$9,$10
1157	and	$10,$24,$25
1158	xor	$3,$11			# Sigma0(a)
1159	xor	$11,$24,$25
1160#else
1161	addu	$9,$21,$3			# 13
1162	srl	$3,$31,6
1163	xor	$12,$1,$2
1164	sll	$11,$31,7
1165	and	$12,$31
1166	srl	$10,$31,11
1167	xor	$3,$11
1168	sll	$11,$31,21
1169	xor	$3,$10
1170	srl	$10,$31,25
1171	xor	$3,$11
1172	sll	$11,$31,26
1173	xor	$3,$10
1174	xor	$12,$2			# Ch(e,f,g)
1175	xor	$10,$11,$3			# Sigma1(e)
1176
1177	srl	$3,$7,2
1178	addu	$9,$12
1179	lw	$12,52($6)		# K[13]
1180	sll	$11,$7,10
1181	addu	$9,$10
1182	srl	$10,$7,13
1183	xor	$3,$11
1184	sll	$11,$7,19
1185	xor	$3,$10
1186	srl	$10,$7,22
1187	xor	$3,$11
1188	sll	$11,$7,30
1189	xor	$3,$10
1190	and	$10,$24,$25
1191	xor	$3,$11			# Sigma0(a)
1192	xor	$11,$24,$25
1193#endif
1194	sw	$21,52($29)	# offload to ring buffer
1195	addu	$3,$10
1196	and	$11,$7
1197	addu	$9,$12			# +=K[13]
1198	addu	$3,$11			# +=Maj(a,b,c)
1199	addu	$30,$9
1200	addu	$3,$9
1201	lw	$8,0($29)	# prefetch from ring buffer
1202#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1203	lw	$23,60($5)
1204#else
1205	lwl	$23,63($5)
1206	lwr	$23,60($5)
1207#endif
1208#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1209	wsbh	$22,$22		# byte swap(14)
1210	rotr	$22,$22,16
1211#else
1212	srl	$11,$22,24		# byte swap(14)
1213	srl	$12,$22,8
1214	andi	$13,$22,0xFF00
1215	sll	$22,$22,24
1216	andi	$12,0xFF00
1217	sll	$13,$13,8
1218	or	$22,$11
1219	or	$12,$13
1220	or	$22,$12
1221#endif
1222#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1223	xor	$13,$31,$1			# 14
1224	rotr	$11,$30,6
1225	addu	$10,$22,$2
1226	rotr	$12,$30,11
1227	and	$13,$30
1228	rotr	$2,$30,25
1229	xor	$11,$12
1230	rotr	$12,$3,2
1231	xor	$13,$1			# Ch(e,f,g)
1232	xor	$11,$2			# Sigma1(e)
1233
1234	rotr	$2,$3,13
1235	addu	$10,$13
1236	lw	$13,56($6)		# K[14]
1237	xor	$2,$12
1238	rotr	$12,$3,22
1239	addu	$10,$11
1240	and	$11,$7,$24
1241	xor	$2,$12			# Sigma0(a)
1242	xor	$12,$7,$24
1243#else
1244	addu	$10,$22,$2			# 14
1245	srl	$2,$30,6
1246	xor	$13,$31,$1
1247	sll	$12,$30,7
1248	and	$13,$30
1249	srl	$11,$30,11
1250	xor	$2,$12
1251	sll	$12,$30,21
1252	xor	$2,$11
1253	srl	$11,$30,25
1254	xor	$2,$12
1255	sll	$12,$30,26
1256	xor	$2,$11
1257	xor	$13,$1			# Ch(e,f,g)
1258	xor	$11,$12,$2			# Sigma1(e)
1259
1260	srl	$2,$3,2
1261	addu	$10,$13
1262	lw	$13,56($6)		# K[14]
1263	sll	$12,$3,10
1264	addu	$10,$11
1265	srl	$11,$3,13
1266	xor	$2,$12
1267	sll	$12,$3,19
1268	xor	$2,$11
1269	srl	$11,$3,22
1270	xor	$2,$12
1271	sll	$12,$3,30
1272	xor	$2,$11
1273	and	$11,$7,$24
1274	xor	$2,$12			# Sigma0(a)
1275	xor	$12,$7,$24
1276#endif
1277	sw	$22,56($29)	# offload to ring buffer
1278	addu	$2,$11
1279	and	$12,$3
1280	addu	$10,$13			# +=K[14]
1281	addu	$2,$12			# +=Maj(a,b,c)
1282	addu	$25,$10
1283	addu	$2,$10
1284	lw	$9,4($29)	# prefetch from ring buffer
1285#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1286	wsbh	$23,$23		# byte swap(15)
1287	rotr	$23,$23,16
1288#else
1289	srl	$12,$23,24		# byte swap(15)
1290	srl	$13,$23,8
1291	andi	$14,$23,0xFF00
1292	sll	$23,$23,24
1293	andi	$13,0xFF00
1294	sll	$14,$14,8
1295	or	$23,$12
1296	or	$13,$14
1297	or	$23,$13
1298#endif
1299#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1300	xor	$14,$30,$31			# 15
1301	rotr	$12,$25,6
1302	addu	$11,$23,$1
1303	rotr	$13,$25,11
1304	and	$14,$25
1305	rotr	$1,$25,25
1306	xor	$12,$13
1307	rotr	$13,$2,2
1308	xor	$14,$31			# Ch(e,f,g)
1309	xor	$12,$1			# Sigma1(e)
1310
1311	rotr	$1,$2,13
1312	addu	$11,$14
1313	lw	$14,60($6)		# K[15]
1314	xor	$1,$13
1315	rotr	$13,$2,22
1316	addu	$11,$12
1317	and	$12,$3,$7
1318	xor	$1,$13			# Sigma0(a)
1319	xor	$13,$3,$7
1320#else
1321	addu	$11,$23,$1			# 15
1322	srl	$1,$25,6
1323	xor	$14,$30,$31
1324	sll	$13,$25,7
1325	and	$14,$25
1326	srl	$12,$25,11
1327	xor	$1,$13
1328	sll	$13,$25,21
1329	xor	$1,$12
1330	srl	$12,$25,25
1331	xor	$1,$13
1332	sll	$13,$25,26
1333	xor	$1,$12
1334	xor	$14,$31			# Ch(e,f,g)
1335	xor	$12,$13,$1			# Sigma1(e)
1336
1337	srl	$1,$2,2
1338	addu	$11,$14
1339	lw	$14,60($6)		# K[15]
1340	sll	$13,$2,10
1341	addu	$11,$12
1342	srl	$12,$2,13
1343	xor	$1,$13
1344	sll	$13,$2,19
1345	xor	$1,$12
1346	srl	$12,$2,22
1347	xor	$1,$13
1348	sll	$13,$2,30
1349	xor	$1,$12
1350	and	$12,$3,$7
1351	xor	$1,$13			# Sigma0(a)
1352	xor	$13,$3,$7
1353#endif
1354	sw	$23,60($29)	# offload to ring buffer
1355	addu	$1,$12
1356	and	$13,$2
1357	addu	$11,$14			# +=K[15]
1358	addu	$1,$13			# +=Maj(a,b,c)
1359	addu	$24,$11
1360	addu	$1,$11
1361	lw	$10,8($29)	# prefetch from ring buffer
1362	b	.L16_xx
1363.align	4
1364.L16_xx:
1365#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1366	srl	$14,$9,3		# Xupdate(16)
1367	rotr	$12,$9,7
1368	addu	$8,$17			# +=X[i+9]
1369	xor	$14,$12
1370	rotr	$12,$9,18
1371
1372	srl	$15,$22,10
1373	rotr	$13,$22,17
1374	xor	$14,$12			# sigma0(X[i+1])
1375	rotr	$12,$22,19
1376	xor	$15,$13
1377	addu	$8,$14
1378#else
1379	srl	$14,$9,3		# Xupdate(16)
1380	addu	$8,$17			# +=X[i+9]
1381	sll	$13,$9,14
1382	srl	$12,$9,7
1383	xor	$14,$13
1384	sll	$13,11
1385	xor	$14,$12
1386	srl	$12,$9,18
1387	xor	$14,$13
1388
1389	srl	$15,$22,10
1390	xor	$14,$12			# sigma0(X[i+1])
1391	sll	$13,$22,13
1392	addu	$8,$14
1393	srl	$12,$22,17
1394	xor	$15,$13
1395	sll	$13,2
1396	xor	$15,$12
1397	srl	$12,$22,19
1398	xor	$15,$13
1399#endif
1400	xor	$15,$12			# sigma1(X[i+14])
1401	addu	$8,$15
1402#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1403	xor	$15,$25,$30			# 16
1404	rotr	$13,$24,6
1405	addu	$12,$8,$31
1406	rotr	$14,$24,11
1407	and	$15,$24
1408	rotr	$31,$24,25
1409	xor	$13,$14
1410	rotr	$14,$1,2
1411	xor	$15,$30			# Ch(e,f,g)
1412	xor	$13,$31			# Sigma1(e)
1413
1414	rotr	$31,$1,13
1415	addu	$12,$15
1416	lw	$15,64($6)		# K[16]
1417	xor	$31,$14
1418	rotr	$14,$1,22
1419	addu	$12,$13
1420	and	$13,$2,$3
1421	xor	$31,$14			# Sigma0(a)
1422	xor	$14,$2,$3
1423#else
1424	addu	$12,$8,$31			# 16
1425	srl	$31,$24,6
1426	xor	$15,$25,$30
1427	sll	$14,$24,7
1428	and	$15,$24
1429	srl	$13,$24,11
1430	xor	$31,$14
1431	sll	$14,$24,21
1432	xor	$31,$13
1433	srl	$13,$24,25
1434	xor	$31,$14
1435	sll	$14,$24,26
1436	xor	$31,$13
1437	xor	$15,$30			# Ch(e,f,g)
1438	xor	$13,$14,$31			# Sigma1(e)
1439
1440	srl	$31,$1,2
1441	addu	$12,$15
1442	lw	$15,64($6)		# K[16]
1443	sll	$14,$1,10
1444	addu	$12,$13
1445	srl	$13,$1,13
1446	xor	$31,$14
1447	sll	$14,$1,19
1448	xor	$31,$13
1449	srl	$13,$1,22
1450	xor	$31,$14
1451	sll	$14,$1,30
1452	xor	$31,$13
1453	and	$13,$2,$3
1454	xor	$31,$14			# Sigma0(a)
1455	xor	$14,$2,$3
1456#endif
1457	sw	$8,0($29)	# offload to ring buffer
1458	addu	$31,$13
1459	and	$14,$1
1460	addu	$12,$15			# +=K[16]
1461	addu	$31,$14			# +=Maj(a,b,c)
1462	addu	$7,$12
1463	addu	$31,$12
1464	lw	$11,12($29)	# prefetch from ring buffer
1465#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1466	srl	$15,$10,3		# Xupdate(17)
1467	rotr	$13,$10,7
1468	addu	$9,$18			# +=X[i+9]
1469	xor	$15,$13
1470	rotr	$13,$10,18
1471
1472	srl	$16,$23,10
1473	rotr	$14,$23,17
1474	xor	$15,$13			# sigma0(X[i+1])
1475	rotr	$13,$23,19
1476	xor	$16,$14
1477	addu	$9,$15
1478#else
1479	srl	$15,$10,3		# Xupdate(17)
1480	addu	$9,$18			# +=X[i+9]
1481	sll	$14,$10,14
1482	srl	$13,$10,7
1483	xor	$15,$14
1484	sll	$14,11
1485	xor	$15,$13
1486	srl	$13,$10,18
1487	xor	$15,$14
1488
1489	srl	$16,$23,10
1490	xor	$15,$13			# sigma0(X[i+1])
1491	sll	$14,$23,13
1492	addu	$9,$15
1493	srl	$13,$23,17
1494	xor	$16,$14
1495	sll	$14,2
1496	xor	$16,$13
1497	srl	$13,$23,19
1498	xor	$16,$14
1499#endif
1500	xor	$16,$13			# sigma1(X[i+14])
1501	addu	$9,$16
1502#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1503	xor	$16,$24,$25			# 17
1504	rotr	$14,$7,6
1505	addu	$13,$9,$30
1506	rotr	$15,$7,11
1507	and	$16,$7
1508	rotr	$30,$7,25
1509	xor	$14,$15
1510	rotr	$15,$31,2
1511	xor	$16,$25			# Ch(e,f,g)
1512	xor	$14,$30			# Sigma1(e)
1513
1514	rotr	$30,$31,13
1515	addu	$13,$16
1516	lw	$16,68($6)		# K[17]
1517	xor	$30,$15
1518	rotr	$15,$31,22
1519	addu	$13,$14
1520	and	$14,$1,$2
1521	xor	$30,$15			# Sigma0(a)
1522	xor	$15,$1,$2
1523#else
1524	addu	$13,$9,$30			# 17
1525	srl	$30,$7,6
1526	xor	$16,$24,$25
1527	sll	$15,$7,7
1528	and	$16,$7
1529	srl	$14,$7,11
1530	xor	$30,$15
1531	sll	$15,$7,21
1532	xor	$30,$14
1533	srl	$14,$7,25
1534	xor	$30,$15
1535	sll	$15,$7,26
1536	xor	$30,$14
1537	xor	$16,$25			# Ch(e,f,g)
1538	xor	$14,$15,$30			# Sigma1(e)
1539
1540	srl	$30,$31,2
1541	addu	$13,$16
1542	lw	$16,68($6)		# K[17]
1543	sll	$15,$31,10
1544	addu	$13,$14
1545	srl	$14,$31,13
1546	xor	$30,$15
1547	sll	$15,$31,19
1548	xor	$30,$14
1549	srl	$14,$31,22
1550	xor	$30,$15
1551	sll	$15,$31,30
1552	xor	$30,$14
1553	and	$14,$1,$2
1554	xor	$30,$15			# Sigma0(a)
1555	xor	$15,$1,$2
1556#endif
1557	sw	$9,4($29)	# offload to ring buffer
1558	addu	$30,$14
1559	and	$15,$31
1560	addu	$13,$16			# +=K[17]
1561	addu	$30,$15			# +=Maj(a,b,c)
1562	addu	$3,$13
1563	addu	$30,$13
1564	lw	$12,16($29)	# prefetch from ring buffer
1565#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1566	srl	$16,$11,3		# Xupdate(18)
1567	rotr	$14,$11,7
1568	addu	$10,$19			# +=X[i+9]
1569	xor	$16,$14
1570	rotr	$14,$11,18
1571
1572	srl	$17,$8,10
1573	rotr	$15,$8,17
1574	xor	$16,$14			# sigma0(X[i+1])
1575	rotr	$14,$8,19
1576	xor	$17,$15
1577	addu	$10,$16
1578#else
1579	srl	$16,$11,3		# Xupdate(18)
1580	addu	$10,$19			# +=X[i+9]
1581	sll	$15,$11,14
1582	srl	$14,$11,7
1583	xor	$16,$15
1584	sll	$15,11
1585	xor	$16,$14
1586	srl	$14,$11,18
1587	xor	$16,$15
1588
1589	srl	$17,$8,10
1590	xor	$16,$14			# sigma0(X[i+1])
1591	sll	$15,$8,13
1592	addu	$10,$16
1593	srl	$14,$8,17
1594	xor	$17,$15
1595	sll	$15,2
1596	xor	$17,$14
1597	srl	$14,$8,19
1598	xor	$17,$15
1599#endif
1600	xor	$17,$14			# sigma1(X[i+14])
1601	addu	$10,$17
1602#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1603	xor	$17,$7,$24			# 18
1604	rotr	$15,$3,6
1605	addu	$14,$10,$25
1606	rotr	$16,$3,11
1607	and	$17,$3
1608	rotr	$25,$3,25
1609	xor	$15,$16
1610	rotr	$16,$30,2
1611	xor	$17,$24			# Ch(e,f,g)
1612	xor	$15,$25			# Sigma1(e)
1613
1614	rotr	$25,$30,13
1615	addu	$14,$17
1616	lw	$17,72($6)		# K[18]
1617	xor	$25,$16
1618	rotr	$16,$30,22
1619	addu	$14,$15
1620	and	$15,$31,$1
1621	xor	$25,$16			# Sigma0(a)
1622	xor	$16,$31,$1
1623#else
1624	addu	$14,$10,$25			# 18
1625	srl	$25,$3,6
1626	xor	$17,$7,$24
1627	sll	$16,$3,7
1628	and	$17,$3
1629	srl	$15,$3,11
1630	xor	$25,$16
1631	sll	$16,$3,21
1632	xor	$25,$15
1633	srl	$15,$3,25
1634	xor	$25,$16
1635	sll	$16,$3,26
1636	xor	$25,$15
1637	xor	$17,$24			# Ch(e,f,g)
1638	xor	$15,$16,$25			# Sigma1(e)
1639
1640	srl	$25,$30,2
1641	addu	$14,$17
1642	lw	$17,72($6)		# K[18]
1643	sll	$16,$30,10
1644	addu	$14,$15
1645	srl	$15,$30,13
1646	xor	$25,$16
1647	sll	$16,$30,19
1648	xor	$25,$15
1649	srl	$15,$30,22
1650	xor	$25,$16
1651	sll	$16,$30,30
1652	xor	$25,$15
1653	and	$15,$31,$1
1654	xor	$25,$16			# Sigma0(a)
1655	xor	$16,$31,$1
1656#endif
1657	sw	$10,8($29)	# offload to ring buffer
1658	addu	$25,$15
1659	and	$16,$30
1660	addu	$14,$17			# +=K[18]
1661	addu	$25,$16			# +=Maj(a,b,c)
1662	addu	$2,$14
1663	addu	$25,$14
1664	lw	$13,20($29)	# prefetch from ring buffer
1665#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1666	srl	$17,$12,3		# Xupdate(19)
1667	rotr	$15,$12,7
1668	addu	$11,$20			# +=X[i+9]
1669	xor	$17,$15
1670	rotr	$15,$12,18
1671
1672	srl	$18,$9,10
1673	rotr	$16,$9,17
1674	xor	$17,$15			# sigma0(X[i+1])
1675	rotr	$15,$9,19
1676	xor	$18,$16
1677	addu	$11,$17
1678#else
1679	srl	$17,$12,3		# Xupdate(19)
1680	addu	$11,$20			# +=X[i+9]
1681	sll	$16,$12,14
1682	srl	$15,$12,7
1683	xor	$17,$16
1684	sll	$16,11
1685	xor	$17,$15
1686	srl	$15,$12,18
1687	xor	$17,$16
1688
1689	srl	$18,$9,10
1690	xor	$17,$15			# sigma0(X[i+1])
1691	sll	$16,$9,13
1692	addu	$11,$17
1693	srl	$15,$9,17
1694	xor	$18,$16
1695	sll	$16,2
1696	xor	$18,$15
1697	srl	$15,$9,19
1698	xor	$18,$16
1699#endif
1700	xor	$18,$15			# sigma1(X[i+14])
1701	addu	$11,$18
1702#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1703	xor	$18,$3,$7			# 19
1704	rotr	$16,$2,6
1705	addu	$15,$11,$24
1706	rotr	$17,$2,11
1707	and	$18,$2
1708	rotr	$24,$2,25
1709	xor	$16,$17
1710	rotr	$17,$25,2
1711	xor	$18,$7			# Ch(e,f,g)
1712	xor	$16,$24			# Sigma1(e)
1713
1714	rotr	$24,$25,13
1715	addu	$15,$18
1716	lw	$18,76($6)		# K[19]
1717	xor	$24,$17
1718	rotr	$17,$25,22
1719	addu	$15,$16
1720	and	$16,$30,$31
1721	xor	$24,$17			# Sigma0(a)
1722	xor	$17,$30,$31
1723#else
1724	addu	$15,$11,$24			# 19
1725	srl	$24,$2,6
1726	xor	$18,$3,$7
1727	sll	$17,$2,7
1728	and	$18,$2
1729	srl	$16,$2,11
1730	xor	$24,$17
1731	sll	$17,$2,21
1732	xor	$24,$16
1733	srl	$16,$2,25
1734	xor	$24,$17
1735	sll	$17,$2,26
1736	xor	$24,$16
1737	xor	$18,$7			# Ch(e,f,g)
1738	xor	$16,$17,$24			# Sigma1(e)
1739
1740	srl	$24,$25,2
1741	addu	$15,$18
1742	lw	$18,76($6)		# K[19]
1743	sll	$17,$25,10
1744	addu	$15,$16
1745	srl	$16,$25,13
1746	xor	$24,$17
1747	sll	$17,$25,19
1748	xor	$24,$16
1749	srl	$16,$25,22
1750	xor	$24,$17
1751	sll	$17,$25,30
1752	xor	$24,$16
1753	and	$16,$30,$31
1754	xor	$24,$17			# Sigma0(a)
1755	xor	$17,$30,$31
1756#endif
1757	sw	$11,12($29)	# offload to ring buffer
1758	addu	$24,$16
1759	and	$17,$25
1760	addu	$15,$18			# +=K[19]
1761	addu	$24,$17			# +=Maj(a,b,c)
1762	addu	$1,$15
1763	addu	$24,$15
1764	lw	$14,24($29)	# prefetch from ring buffer
1765#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1766	srl	$18,$13,3		# Xupdate(20)
1767	rotr	$16,$13,7
1768	addu	$12,$21			# +=X[i+9]
1769	xor	$18,$16
1770	rotr	$16,$13,18
1771
1772	srl	$19,$10,10
1773	rotr	$17,$10,17
1774	xor	$18,$16			# sigma0(X[i+1])
1775	rotr	$16,$10,19
1776	xor	$19,$17
1777	addu	$12,$18
1778#else
1779	srl	$18,$13,3		# Xupdate(20)
1780	addu	$12,$21			# +=X[i+9]
1781	sll	$17,$13,14
1782	srl	$16,$13,7
1783	xor	$18,$17
1784	sll	$17,11
1785	xor	$18,$16
1786	srl	$16,$13,18
1787	xor	$18,$17
1788
1789	srl	$19,$10,10
1790	xor	$18,$16			# sigma0(X[i+1])
1791	sll	$17,$10,13
1792	addu	$12,$18
1793	srl	$16,$10,17
1794	xor	$19,$17
1795	sll	$17,2
1796	xor	$19,$16
1797	srl	$16,$10,19
1798	xor	$19,$17
1799#endif
1800	xor	$19,$16			# sigma1(X[i+14])
1801	addu	$12,$19
1802#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1803	xor	$19,$2,$3			# 20
1804	rotr	$17,$1,6
1805	addu	$16,$12,$7
1806	rotr	$18,$1,11
1807	and	$19,$1
1808	rotr	$7,$1,25
1809	xor	$17,$18
1810	rotr	$18,$24,2
1811	xor	$19,$3			# Ch(e,f,g)
1812	xor	$17,$7			# Sigma1(e)
1813
1814	rotr	$7,$24,13
1815	addu	$16,$19
1816	lw	$19,80($6)		# K[20]
1817	xor	$7,$18
1818	rotr	$18,$24,22
1819	addu	$16,$17
1820	and	$17,$25,$30
1821	xor	$7,$18			# Sigma0(a)
1822	xor	$18,$25,$30
1823#else
1824	addu	$16,$12,$7			# 20
1825	srl	$7,$1,6
1826	xor	$19,$2,$3
1827	sll	$18,$1,7
1828	and	$19,$1
1829	srl	$17,$1,11
1830	xor	$7,$18
1831	sll	$18,$1,21
1832	xor	$7,$17
1833	srl	$17,$1,25
1834	xor	$7,$18
1835	sll	$18,$1,26
1836	xor	$7,$17
1837	xor	$19,$3			# Ch(e,f,g)
1838	xor	$17,$18,$7			# Sigma1(e)
1839
1840	srl	$7,$24,2
1841	addu	$16,$19
1842	lw	$19,80($6)		# K[20]
1843	sll	$18,$24,10
1844	addu	$16,$17
1845	srl	$17,$24,13
1846	xor	$7,$18
1847	sll	$18,$24,19
1848	xor	$7,$17
1849	srl	$17,$24,22
1850	xor	$7,$18
1851	sll	$18,$24,30
1852	xor	$7,$17
1853	and	$17,$25,$30
1854	xor	$7,$18			# Sigma0(a)
1855	xor	$18,$25,$30
1856#endif
1857	sw	$12,16($29)	# offload to ring buffer
1858	addu	$7,$17
1859	and	$18,$24
1860	addu	$16,$19			# +=K[20]
1861	addu	$7,$18			# +=Maj(a,b,c)
1862	addu	$31,$16
1863	addu	$7,$16
1864	lw	$15,28($29)	# prefetch from ring buffer
1865#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1866	srl	$19,$14,3		# Xupdate(21)
1867	rotr	$17,$14,7
1868	addu	$13,$22			# +=X[i+9]
1869	xor	$19,$17
1870	rotr	$17,$14,18
1871
1872	srl	$20,$11,10
1873	rotr	$18,$11,17
1874	xor	$19,$17			# sigma0(X[i+1])
1875	rotr	$17,$11,19
1876	xor	$20,$18
1877	addu	$13,$19
1878#else
1879	srl	$19,$14,3		# Xupdate(21)
1880	addu	$13,$22			# +=X[i+9]
1881	sll	$18,$14,14
1882	srl	$17,$14,7
1883	xor	$19,$18
1884	sll	$18,11
1885	xor	$19,$17
1886	srl	$17,$14,18
1887	xor	$19,$18
1888
1889	srl	$20,$11,10
1890	xor	$19,$17			# sigma0(X[i+1])
1891	sll	$18,$11,13
1892	addu	$13,$19
1893	srl	$17,$11,17
1894	xor	$20,$18
1895	sll	$18,2
1896	xor	$20,$17
1897	srl	$17,$11,19
1898	xor	$20,$18
1899#endif
1900	xor	$20,$17			# sigma1(X[i+14])
1901	addu	$13,$20
1902#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1903	xor	$20,$1,$2			# 21
1904	rotr	$18,$31,6
1905	addu	$17,$13,$3
1906	rotr	$19,$31,11
1907	and	$20,$31
1908	rotr	$3,$31,25
1909	xor	$18,$19
1910	rotr	$19,$7,2
1911	xor	$20,$2			# Ch(e,f,g)
1912	xor	$18,$3			# Sigma1(e)
1913
1914	rotr	$3,$7,13
1915	addu	$17,$20
1916	lw	$20,84($6)		# K[21]
1917	xor	$3,$19
1918	rotr	$19,$7,22
1919	addu	$17,$18
1920	and	$18,$24,$25
1921	xor	$3,$19			# Sigma0(a)
1922	xor	$19,$24,$25
1923#else
1924	addu	$17,$13,$3			# 21
1925	srl	$3,$31,6
1926	xor	$20,$1,$2
1927	sll	$19,$31,7
1928	and	$20,$31
1929	srl	$18,$31,11
1930	xor	$3,$19
1931	sll	$19,$31,21
1932	xor	$3,$18
1933	srl	$18,$31,25
1934	xor	$3,$19
1935	sll	$19,$31,26
1936	xor	$3,$18
1937	xor	$20,$2			# Ch(e,f,g)
1938	xor	$18,$19,$3			# Sigma1(e)
1939
1940	srl	$3,$7,2
1941	addu	$17,$20
1942	lw	$20,84($6)		# K[21]
1943	sll	$19,$7,10
1944	addu	$17,$18
1945	srl	$18,$7,13
1946	xor	$3,$19
1947	sll	$19,$7,19
1948	xor	$3,$18
1949	srl	$18,$7,22
1950	xor	$3,$19
1951	sll	$19,$7,30
1952	xor	$3,$18
1953	and	$18,$24,$25
1954	xor	$3,$19			# Sigma0(a)
1955	xor	$19,$24,$25
1956#endif
1957	sw	$13,20($29)	# offload to ring buffer
1958	addu	$3,$18
1959	and	$19,$7
1960	addu	$17,$20			# +=K[21]
1961	addu	$3,$19			# +=Maj(a,b,c)
1962	addu	$30,$17
1963	addu	$3,$17
1964	lw	$16,32($29)	# prefetch from ring buffer
1965#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1966	srl	$20,$15,3		# Xupdate(22)
1967	rotr	$18,$15,7
1968	addu	$14,$23			# +=X[i+9]
1969	xor	$20,$18
1970	rotr	$18,$15,18
1971
1972	srl	$21,$12,10
1973	rotr	$19,$12,17
1974	xor	$20,$18			# sigma0(X[i+1])
1975	rotr	$18,$12,19
1976	xor	$21,$19
1977	addu	$14,$20
1978#else
1979	srl	$20,$15,3		# Xupdate(22)
1980	addu	$14,$23			# +=X[i+9]
1981	sll	$19,$15,14
1982	srl	$18,$15,7
1983	xor	$20,$19
1984	sll	$19,11
1985	xor	$20,$18
1986	srl	$18,$15,18
1987	xor	$20,$19
1988
1989	srl	$21,$12,10
1990	xor	$20,$18			# sigma0(X[i+1])
1991	sll	$19,$12,13
1992	addu	$14,$20
1993	srl	$18,$12,17
1994	xor	$21,$19
1995	sll	$19,2
1996	xor	$21,$18
1997	srl	$18,$12,19
1998	xor	$21,$19
1999#endif
2000	xor	$21,$18			# sigma1(X[i+14])
2001	addu	$14,$21
2002#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2003	xor	$21,$31,$1			# 22
2004	rotr	$19,$30,6
2005	addu	$18,$14,$2
2006	rotr	$20,$30,11
2007	and	$21,$30
2008	rotr	$2,$30,25
2009	xor	$19,$20
2010	rotr	$20,$3,2
2011	xor	$21,$1			# Ch(e,f,g)
2012	xor	$19,$2			# Sigma1(e)
2013
2014	rotr	$2,$3,13
2015	addu	$18,$21
2016	lw	$21,88($6)		# K[22]
2017	xor	$2,$20
2018	rotr	$20,$3,22
2019	addu	$18,$19
2020	and	$19,$7,$24
2021	xor	$2,$20			# Sigma0(a)
2022	xor	$20,$7,$24
2023#else
2024	addu	$18,$14,$2			# 22
2025	srl	$2,$30,6
2026	xor	$21,$31,$1
2027	sll	$20,$30,7
2028	and	$21,$30
2029	srl	$19,$30,11
2030	xor	$2,$20
2031	sll	$20,$30,21
2032	xor	$2,$19
2033	srl	$19,$30,25
2034	xor	$2,$20
2035	sll	$20,$30,26
2036	xor	$2,$19
2037	xor	$21,$1			# Ch(e,f,g)
2038	xor	$19,$20,$2			# Sigma1(e)
2039
2040	srl	$2,$3,2
2041	addu	$18,$21
2042	lw	$21,88($6)		# K[22]
2043	sll	$20,$3,10
2044	addu	$18,$19
2045	srl	$19,$3,13
2046	xor	$2,$20
2047	sll	$20,$3,19
2048	xor	$2,$19
2049	srl	$19,$3,22
2050	xor	$2,$20
2051	sll	$20,$3,30
2052	xor	$2,$19
2053	and	$19,$7,$24
2054	xor	$2,$20			# Sigma0(a)
2055	xor	$20,$7,$24
2056#endif
2057	sw	$14,24($29)	# offload to ring buffer
2058	addu	$2,$19
2059	and	$20,$3
2060	addu	$18,$21			# +=K[22]
2061	addu	$2,$20			# +=Maj(a,b,c)
2062	addu	$25,$18
2063	addu	$2,$18
2064	lw	$17,36($29)	# prefetch from ring buffer
2065#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2066	srl	$21,$16,3		# Xupdate(23)
2067	rotr	$19,$16,7
2068	addu	$15,$8			# +=X[i+9]
2069	xor	$21,$19
2070	rotr	$19,$16,18
2071
2072	srl	$22,$13,10
2073	rotr	$20,$13,17
2074	xor	$21,$19			# sigma0(X[i+1])
2075	rotr	$19,$13,19
2076	xor	$22,$20
2077	addu	$15,$21
2078#else
2079	srl	$21,$16,3		# Xupdate(23)
2080	addu	$15,$8			# +=X[i+9]
2081	sll	$20,$16,14
2082	srl	$19,$16,7
2083	xor	$21,$20
2084	sll	$20,11
2085	xor	$21,$19
2086	srl	$19,$16,18
2087	xor	$21,$20
2088
2089	srl	$22,$13,10
2090	xor	$21,$19			# sigma0(X[i+1])
2091	sll	$20,$13,13
2092	addu	$15,$21
2093	srl	$19,$13,17
2094	xor	$22,$20
2095	sll	$20,2
2096	xor	$22,$19
2097	srl	$19,$13,19
2098	xor	$22,$20
2099#endif
2100	xor	$22,$19			# sigma1(X[i+14])
2101	addu	$15,$22
2102#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2103	xor	$22,$30,$31			# 23
2104	rotr	$20,$25,6
2105	addu	$19,$15,$1
2106	rotr	$21,$25,11
2107	and	$22,$25
2108	rotr	$1,$25,25
2109	xor	$20,$21
2110	rotr	$21,$2,2
2111	xor	$22,$31			# Ch(e,f,g)
2112	xor	$20,$1			# Sigma1(e)
2113
2114	rotr	$1,$2,13
2115	addu	$19,$22
2116	lw	$22,92($6)		# K[23]
2117	xor	$1,$21
2118	rotr	$21,$2,22
2119	addu	$19,$20
2120	and	$20,$3,$7
2121	xor	$1,$21			# Sigma0(a)
2122	xor	$21,$3,$7
2123#else
2124	addu	$19,$15,$1			# 23
2125	srl	$1,$25,6
2126	xor	$22,$30,$31
2127	sll	$21,$25,7
2128	and	$22,$25
2129	srl	$20,$25,11
2130	xor	$1,$21
2131	sll	$21,$25,21
2132	xor	$1,$20
2133	srl	$20,$25,25
2134	xor	$1,$21
2135	sll	$21,$25,26
2136	xor	$1,$20
2137	xor	$22,$31			# Ch(e,f,g)
2138	xor	$20,$21,$1			# Sigma1(e)
2139
2140	srl	$1,$2,2
2141	addu	$19,$22
2142	lw	$22,92($6)		# K[23]
2143	sll	$21,$2,10
2144	addu	$19,$20
2145	srl	$20,$2,13
2146	xor	$1,$21
2147	sll	$21,$2,19
2148	xor	$1,$20
2149	srl	$20,$2,22
2150	xor	$1,$21
2151	sll	$21,$2,30
2152	xor	$1,$20
2153	and	$20,$3,$7
2154	xor	$1,$21			# Sigma0(a)
2155	xor	$21,$3,$7
2156#endif
2157	sw	$15,28($29)	# offload to ring buffer
2158	addu	$1,$20
2159	and	$21,$2
2160	addu	$19,$22			# +=K[23]
2161	addu	$1,$21			# +=Maj(a,b,c)
2162	addu	$24,$19
2163	addu	$1,$19
2164	lw	$18,40($29)	# prefetch from ring buffer
2165#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2166	srl	$22,$17,3		# Xupdate(24)
2167	rotr	$20,$17,7
2168	addu	$16,$9			# +=X[i+9]
2169	xor	$22,$20
2170	rotr	$20,$17,18
2171
2172	srl	$23,$14,10
2173	rotr	$21,$14,17
2174	xor	$22,$20			# sigma0(X[i+1])
2175	rotr	$20,$14,19
2176	xor	$23,$21
2177	addu	$16,$22
2178#else
2179	srl	$22,$17,3		# Xupdate(24)
2180	addu	$16,$9			# +=X[i+9]
2181	sll	$21,$17,14
2182	srl	$20,$17,7
2183	xor	$22,$21
2184	sll	$21,11
2185	xor	$22,$20
2186	srl	$20,$17,18
2187	xor	$22,$21
2188
2189	srl	$23,$14,10
2190	xor	$22,$20			# sigma0(X[i+1])
2191	sll	$21,$14,13
2192	addu	$16,$22
2193	srl	$20,$14,17
2194	xor	$23,$21
2195	sll	$21,2
2196	xor	$23,$20
2197	srl	$20,$14,19
2198	xor	$23,$21
2199#endif
2200	xor	$23,$20			# sigma1(X[i+14])
2201	addu	$16,$23
2202#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2203	xor	$23,$25,$30			# 24
2204	rotr	$21,$24,6
2205	addu	$20,$16,$31
2206	rotr	$22,$24,11
2207	and	$23,$24
2208	rotr	$31,$24,25
2209	xor	$21,$22
2210	rotr	$22,$1,2
2211	xor	$23,$30			# Ch(e,f,g)
2212	xor	$21,$31			# Sigma1(e)
2213
2214	rotr	$31,$1,13
2215	addu	$20,$23
2216	lw	$23,96($6)		# K[24]
2217	xor	$31,$22
2218	rotr	$22,$1,22
2219	addu	$20,$21
2220	and	$21,$2,$3
2221	xor	$31,$22			# Sigma0(a)
2222	xor	$22,$2,$3
2223#else
2224	addu	$20,$16,$31			# 24
2225	srl	$31,$24,6
2226	xor	$23,$25,$30
2227	sll	$22,$24,7
2228	and	$23,$24
2229	srl	$21,$24,11
2230	xor	$31,$22
2231	sll	$22,$24,21
2232	xor	$31,$21
2233	srl	$21,$24,25
2234	xor	$31,$22
2235	sll	$22,$24,26
2236	xor	$31,$21
2237	xor	$23,$30			# Ch(e,f,g)
2238	xor	$21,$22,$31			# Sigma1(e)
2239
2240	srl	$31,$1,2
2241	addu	$20,$23
2242	lw	$23,96($6)		# K[24]
2243	sll	$22,$1,10
2244	addu	$20,$21
2245	srl	$21,$1,13
2246	xor	$31,$22
2247	sll	$22,$1,19
2248	xor	$31,$21
2249	srl	$21,$1,22
2250	xor	$31,$22
2251	sll	$22,$1,30
2252	xor	$31,$21
2253	and	$21,$2,$3
2254	xor	$31,$22			# Sigma0(a)
2255	xor	$22,$2,$3
2256#endif
2257	sw	$16,32($29)	# offload to ring buffer
2258	addu	$31,$21
2259	and	$22,$1
2260	addu	$20,$23			# +=K[24]
2261	addu	$31,$22			# +=Maj(a,b,c)
2262	addu	$7,$20
2263	addu	$31,$20
2264	lw	$19,44($29)	# prefetch from ring buffer
2265#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2266	srl	$23,$18,3		# Xupdate(25)
2267	rotr	$21,$18,7
2268	addu	$17,$10			# +=X[i+9]
2269	xor	$23,$21
2270	rotr	$21,$18,18
2271
2272	srl	$8,$15,10
2273	rotr	$22,$15,17
2274	xor	$23,$21			# sigma0(X[i+1])
2275	rotr	$21,$15,19
2276	xor	$8,$22
2277	addu	$17,$23
2278#else
2279	srl	$23,$18,3		# Xupdate(25)
2280	addu	$17,$10			# +=X[i+9]
2281	sll	$22,$18,14
2282	srl	$21,$18,7
2283	xor	$23,$22
2284	sll	$22,11
2285	xor	$23,$21
2286	srl	$21,$18,18
2287	xor	$23,$22
2288
2289	srl	$8,$15,10
2290	xor	$23,$21			# sigma0(X[i+1])
2291	sll	$22,$15,13
2292	addu	$17,$23
2293	srl	$21,$15,17
2294	xor	$8,$22
2295	sll	$22,2
2296	xor	$8,$21
2297	srl	$21,$15,19
2298	xor	$8,$22
2299#endif
2300	xor	$8,$21			# sigma1(X[i+14])
2301	addu	$17,$8
2302#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2303	xor	$8,$24,$25			# 25
2304	rotr	$22,$7,6
2305	addu	$21,$17,$30
2306	rotr	$23,$7,11
2307	and	$8,$7
2308	rotr	$30,$7,25
2309	xor	$22,$23
2310	rotr	$23,$31,2
2311	xor	$8,$25			# Ch(e,f,g)
2312	xor	$22,$30			# Sigma1(e)
2313
2314	rotr	$30,$31,13
2315	addu	$21,$8
2316	lw	$8,100($6)		# K[25]
2317	xor	$30,$23
2318	rotr	$23,$31,22
2319	addu	$21,$22
2320	and	$22,$1,$2
2321	xor	$30,$23			# Sigma0(a)
2322	xor	$23,$1,$2
2323#else
2324	addu	$21,$17,$30			# 25
2325	srl	$30,$7,6
2326	xor	$8,$24,$25
2327	sll	$23,$7,7
2328	and	$8,$7
2329	srl	$22,$7,11
2330	xor	$30,$23
2331	sll	$23,$7,21
2332	xor	$30,$22
2333	srl	$22,$7,25
2334	xor	$30,$23
2335	sll	$23,$7,26
2336	xor	$30,$22
2337	xor	$8,$25			# Ch(e,f,g)
2338	xor	$22,$23,$30			# Sigma1(e)
2339
2340	srl	$30,$31,2
2341	addu	$21,$8
2342	lw	$8,100($6)		# K[25]
2343	sll	$23,$31,10
2344	addu	$21,$22
2345	srl	$22,$31,13
2346	xor	$30,$23
2347	sll	$23,$31,19
2348	xor	$30,$22
2349	srl	$22,$31,22
2350	xor	$30,$23
2351	sll	$23,$31,30
2352	xor	$30,$22
2353	and	$22,$1,$2
2354	xor	$30,$23			# Sigma0(a)
2355	xor	$23,$1,$2
2356#endif
2357	sw	$17,36($29)	# offload to ring buffer
2358	addu	$30,$22
2359	and	$23,$31
2360	addu	$21,$8			# +=K[25]
2361	addu	$30,$23			# +=Maj(a,b,c)
2362	addu	$3,$21
2363	addu	$30,$21
2364	lw	$20,48($29)	# prefetch from ring buffer
2365#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2366	srl	$8,$19,3		# Xupdate(26)
2367	rotr	$22,$19,7
2368	addu	$18,$11			# +=X[i+9]
2369	xor	$8,$22
2370	rotr	$22,$19,18
2371
2372	srl	$9,$16,10
2373	rotr	$23,$16,17
2374	xor	$8,$22			# sigma0(X[i+1])
2375	rotr	$22,$16,19
2376	xor	$9,$23
2377	addu	$18,$8
2378#else
2379	srl	$8,$19,3		# Xupdate(26)
2380	addu	$18,$11			# +=X[i+9]
2381	sll	$23,$19,14
2382	srl	$22,$19,7
2383	xor	$8,$23
2384	sll	$23,11
2385	xor	$8,$22
2386	srl	$22,$19,18
2387	xor	$8,$23
2388
2389	srl	$9,$16,10
2390	xor	$8,$22			# sigma0(X[i+1])
2391	sll	$23,$16,13
2392	addu	$18,$8
2393	srl	$22,$16,17
2394	xor	$9,$23
2395	sll	$23,2
2396	xor	$9,$22
2397	srl	$22,$16,19
2398	xor	$9,$23
2399#endif
2400	xor	$9,$22			# sigma1(X[i+14])
2401	addu	$18,$9
2402#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2403	xor	$9,$7,$24			# 26
2404	rotr	$23,$3,6
2405	addu	$22,$18,$25
2406	rotr	$8,$3,11
2407	and	$9,$3
2408	rotr	$25,$3,25
2409	xor	$23,$8
2410	rotr	$8,$30,2
2411	xor	$9,$24			# Ch(e,f,g)
2412	xor	$23,$25			# Sigma1(e)
2413
2414	rotr	$25,$30,13
2415	addu	$22,$9
2416	lw	$9,104($6)		# K[26]
2417	xor	$25,$8
2418	rotr	$8,$30,22
2419	addu	$22,$23
2420	and	$23,$31,$1
2421	xor	$25,$8			# Sigma0(a)
2422	xor	$8,$31,$1
2423#else
2424	addu	$22,$18,$25			# 26
2425	srl	$25,$3,6
2426	xor	$9,$7,$24
2427	sll	$8,$3,7
2428	and	$9,$3
2429	srl	$23,$3,11
2430	xor	$25,$8
2431	sll	$8,$3,21
2432	xor	$25,$23
2433	srl	$23,$3,25
2434	xor	$25,$8
2435	sll	$8,$3,26
2436	xor	$25,$23
2437	xor	$9,$24			# Ch(e,f,g)
2438	xor	$23,$8,$25			# Sigma1(e)
2439
2440	srl	$25,$30,2
2441	addu	$22,$9
2442	lw	$9,104($6)		# K[26]
2443	sll	$8,$30,10
2444	addu	$22,$23
2445	srl	$23,$30,13
2446	xor	$25,$8
2447	sll	$8,$30,19
2448	xor	$25,$23
2449	srl	$23,$30,22
2450	xor	$25,$8
2451	sll	$8,$30,30
2452	xor	$25,$23
2453	and	$23,$31,$1
2454	xor	$25,$8			# Sigma0(a)
2455	xor	$8,$31,$1
2456#endif
2457	sw	$18,40($29)	# offload to ring buffer
2458	addu	$25,$23
2459	and	$8,$30
2460	addu	$22,$9			# +=K[26]
2461	addu	$25,$8			# +=Maj(a,b,c)
2462	addu	$2,$22
2463	addu	$25,$22
2464	lw	$21,52($29)	# prefetch from ring buffer
2465#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2466	srl	$9,$20,3		# Xupdate(27)
2467	rotr	$23,$20,7
2468	addu	$19,$12			# +=X[i+9]
2469	xor	$9,$23
2470	rotr	$23,$20,18
2471
2472	srl	$10,$17,10
2473	rotr	$8,$17,17
2474	xor	$9,$23			# sigma0(X[i+1])
2475	rotr	$23,$17,19
2476	xor	$10,$8
2477	addu	$19,$9
2478#else
2479	srl	$9,$20,3		# Xupdate(27)
2480	addu	$19,$12			# +=X[i+9]
2481	sll	$8,$20,14
2482	srl	$23,$20,7
2483	xor	$9,$8
2484	sll	$8,11
2485	xor	$9,$23
2486	srl	$23,$20,18
2487	xor	$9,$8
2488
2489	srl	$10,$17,10
2490	xor	$9,$23			# sigma0(X[i+1])
2491	sll	$8,$17,13
2492	addu	$19,$9
2493	srl	$23,$17,17
2494	xor	$10,$8
2495	sll	$8,2
2496	xor	$10,$23
2497	srl	$23,$17,19
2498	xor	$10,$8
2499#endif
2500	xor	$10,$23			# sigma1(X[i+14])
2501	addu	$19,$10
2502#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2503	xor	$10,$3,$7			# 27
2504	rotr	$8,$2,6
2505	addu	$23,$19,$24
2506	rotr	$9,$2,11
2507	and	$10,$2
2508	rotr	$24,$2,25
2509	xor	$8,$9
2510	rotr	$9,$25,2
2511	xor	$10,$7			# Ch(e,f,g)
2512	xor	$8,$24			# Sigma1(e)
2513
2514	rotr	$24,$25,13
2515	addu	$23,$10
2516	lw	$10,108($6)		# K[27]
2517	xor	$24,$9
2518	rotr	$9,$25,22
2519	addu	$23,$8
2520	and	$8,$30,$31
2521	xor	$24,$9			# Sigma0(a)
2522	xor	$9,$30,$31
2523#else
2524	addu	$23,$19,$24			# 27
2525	srl	$24,$2,6
2526	xor	$10,$3,$7
2527	sll	$9,$2,7
2528	and	$10,$2
2529	srl	$8,$2,11
2530	xor	$24,$9
2531	sll	$9,$2,21
2532	xor	$24,$8
2533	srl	$8,$2,25
2534	xor	$24,$9
2535	sll	$9,$2,26
2536	xor	$24,$8
2537	xor	$10,$7			# Ch(e,f,g)
2538	xor	$8,$9,$24			# Sigma1(e)
2539
2540	srl	$24,$25,2
2541	addu	$23,$10
2542	lw	$10,108($6)		# K[27]
2543	sll	$9,$25,10
2544	addu	$23,$8
2545	srl	$8,$25,13
2546	xor	$24,$9
2547	sll	$9,$25,19
2548	xor	$24,$8
2549	srl	$8,$25,22
2550	xor	$24,$9
2551	sll	$9,$25,30
2552	xor	$24,$8
2553	and	$8,$30,$31
2554	xor	$24,$9			# Sigma0(a)
2555	xor	$9,$30,$31
2556#endif
2557	sw	$19,44($29)	# offload to ring buffer
2558	addu	$24,$8
2559	and	$9,$25
2560	addu	$23,$10			# +=K[27]
2561	addu	$24,$9			# +=Maj(a,b,c)
2562	addu	$1,$23
2563	addu	$24,$23
2564	lw	$22,56($29)	# prefetch from ring buffer
2565#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2566	srl	$10,$21,3		# Xupdate(28)
2567	rotr	$8,$21,7
2568	addu	$20,$13			# +=X[i+9]
2569	xor	$10,$8
2570	rotr	$8,$21,18
2571
2572	srl	$11,$18,10
2573	rotr	$9,$18,17
2574	xor	$10,$8			# sigma0(X[i+1])
2575	rotr	$8,$18,19
2576	xor	$11,$9
2577	addu	$20,$10
2578#else
2579	srl	$10,$21,3		# Xupdate(28)
2580	addu	$20,$13			# +=X[i+9]
2581	sll	$9,$21,14
2582	srl	$8,$21,7
2583	xor	$10,$9
2584	sll	$9,11
2585	xor	$10,$8
2586	srl	$8,$21,18
2587	xor	$10,$9
2588
2589	srl	$11,$18,10
2590	xor	$10,$8			# sigma0(X[i+1])
2591	sll	$9,$18,13
2592	addu	$20,$10
2593	srl	$8,$18,17
2594	xor	$11,$9
2595	sll	$9,2
2596	xor	$11,$8
2597	srl	$8,$18,19
2598	xor	$11,$9
2599#endif
2600	xor	$11,$8			# sigma1(X[i+14])
2601	addu	$20,$11
2602#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2603	xor	$11,$2,$3			# 28
2604	rotr	$9,$1,6
2605	addu	$8,$20,$7
2606	rotr	$10,$1,11
2607	and	$11,$1
2608	rotr	$7,$1,25
2609	xor	$9,$10
2610	rotr	$10,$24,2
2611	xor	$11,$3			# Ch(e,f,g)
2612	xor	$9,$7			# Sigma1(e)
2613
2614	rotr	$7,$24,13
2615	addu	$8,$11
2616	lw	$11,112($6)		# K[28]
2617	xor	$7,$10
2618	rotr	$10,$24,22
2619	addu	$8,$9
2620	and	$9,$25,$30
2621	xor	$7,$10			# Sigma0(a)
2622	xor	$10,$25,$30
2623#else
2624	addu	$8,$20,$7			# 28
2625	srl	$7,$1,6
2626	xor	$11,$2,$3
2627	sll	$10,$1,7
2628	and	$11,$1
2629	srl	$9,$1,11
2630	xor	$7,$10
2631	sll	$10,$1,21
2632	xor	$7,$9
2633	srl	$9,$1,25
2634	xor	$7,$10
2635	sll	$10,$1,26
2636	xor	$7,$9
2637	xor	$11,$3			# Ch(e,f,g)
2638	xor	$9,$10,$7			# Sigma1(e)
2639
2640	srl	$7,$24,2
2641	addu	$8,$11
2642	lw	$11,112($6)		# K[28]
2643	sll	$10,$24,10
2644	addu	$8,$9
2645	srl	$9,$24,13
2646	xor	$7,$10
2647	sll	$10,$24,19
2648	xor	$7,$9
2649	srl	$9,$24,22
2650	xor	$7,$10
2651	sll	$10,$24,30
2652	xor	$7,$9
2653	and	$9,$25,$30
2654	xor	$7,$10			# Sigma0(a)
2655	xor	$10,$25,$30
2656#endif
2657	sw	$20,48($29)	# offload to ring buffer
2658	addu	$7,$9
2659	and	$10,$24
2660	addu	$8,$11			# +=K[28]
2661	addu	$7,$10			# +=Maj(a,b,c)
2662	addu	$31,$8
2663	addu	$7,$8
2664	lw	$23,60($29)	# prefetch from ring buffer
2665#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2666	srl	$11,$22,3		# Xupdate(29)
2667	rotr	$9,$22,7
2668	addu	$21,$14			# +=X[i+9]
2669	xor	$11,$9
2670	rotr	$9,$22,18
2671
2672	srl	$12,$19,10
2673	rotr	$10,$19,17
2674	xor	$11,$9			# sigma0(X[i+1])
2675	rotr	$9,$19,19
2676	xor	$12,$10
2677	addu	$21,$11
2678#else
2679	srl	$11,$22,3		# Xupdate(29)
2680	addu	$21,$14			# +=X[i+9]
2681	sll	$10,$22,14
2682	srl	$9,$22,7
2683	xor	$11,$10
2684	sll	$10,11
2685	xor	$11,$9
2686	srl	$9,$22,18
2687	xor	$11,$10
2688
2689	srl	$12,$19,10
2690	xor	$11,$9			# sigma0(X[i+1])
2691	sll	$10,$19,13
2692	addu	$21,$11
2693	srl	$9,$19,17
2694	xor	$12,$10
2695	sll	$10,2
2696	xor	$12,$9
2697	srl	$9,$19,19
2698	xor	$12,$10
2699#endif
2700	xor	$12,$9			# sigma1(X[i+14])
2701	addu	$21,$12
2702#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2703	xor	$12,$1,$2			# 29
2704	rotr	$10,$31,6
2705	addu	$9,$21,$3
2706	rotr	$11,$31,11
2707	and	$12,$31
2708	rotr	$3,$31,25
2709	xor	$10,$11
2710	rotr	$11,$7,2
2711	xor	$12,$2			# Ch(e,f,g)
2712	xor	$10,$3			# Sigma1(e)
2713
2714	rotr	$3,$7,13
2715	addu	$9,$12
2716	lw	$12,116($6)		# K[29]
2717	xor	$3,$11
2718	rotr	$11,$7,22
2719	addu	$9,$10
2720	and	$10,$24,$25
2721	xor	$3,$11			# Sigma0(a)
2722	xor	$11,$24,$25
2723#else
2724	addu	$9,$21,$3			# 29
2725	srl	$3,$31,6
2726	xor	$12,$1,$2
2727	sll	$11,$31,7
2728	and	$12,$31
2729	srl	$10,$31,11
2730	xor	$3,$11
2731	sll	$11,$31,21
2732	xor	$3,$10
2733	srl	$10,$31,25
2734	xor	$3,$11
2735	sll	$11,$31,26
2736	xor	$3,$10
2737	xor	$12,$2			# Ch(e,f,g)
2738	xor	$10,$11,$3			# Sigma1(e)
2739
2740	srl	$3,$7,2
2741	addu	$9,$12
2742	lw	$12,116($6)		# K[29]
2743	sll	$11,$7,10
2744	addu	$9,$10
2745	srl	$10,$7,13
2746	xor	$3,$11
2747	sll	$11,$7,19
2748	xor	$3,$10
2749	srl	$10,$7,22
2750	xor	$3,$11
2751	sll	$11,$7,30
2752	xor	$3,$10
2753	and	$10,$24,$25
2754	xor	$3,$11			# Sigma0(a)
2755	xor	$11,$24,$25
2756#endif
2757	sw	$21,52($29)	# offload to ring buffer
2758	addu	$3,$10
2759	and	$11,$7
2760	addu	$9,$12			# +=K[29]
2761	addu	$3,$11			# +=Maj(a,b,c)
2762	addu	$30,$9
2763	addu	$3,$9
2764	lw	$8,0($29)	# prefetch from ring buffer
2765#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2766	srl	$12,$23,3		# Xupdate(30)
2767	rotr	$10,$23,7
2768	addu	$22,$15			# +=X[i+9]
2769	xor	$12,$10
2770	rotr	$10,$23,18
2771
2772	srl	$13,$20,10
2773	rotr	$11,$20,17
2774	xor	$12,$10			# sigma0(X[i+1])
2775	rotr	$10,$20,19
2776	xor	$13,$11
2777	addu	$22,$12
2778#else
2779	srl	$12,$23,3		# Xupdate(30)
2780	addu	$22,$15			# +=X[i+9]
2781	sll	$11,$23,14
2782	srl	$10,$23,7
2783	xor	$12,$11
2784	sll	$11,11
2785	xor	$12,$10
2786	srl	$10,$23,18
2787	xor	$12,$11
2788
2789	srl	$13,$20,10
2790	xor	$12,$10			# sigma0(X[i+1])
2791	sll	$11,$20,13
2792	addu	$22,$12
2793	srl	$10,$20,17
2794	xor	$13,$11
2795	sll	$11,2
2796	xor	$13,$10
2797	srl	$10,$20,19
2798	xor	$13,$11
2799#endif
2800	xor	$13,$10			# sigma1(X[i+14])
2801	addu	$22,$13
2802#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2803	xor	$13,$31,$1			# 30
2804	rotr	$11,$30,6
2805	addu	$10,$22,$2
2806	rotr	$12,$30,11
2807	and	$13,$30
2808	rotr	$2,$30,25
2809	xor	$11,$12
2810	rotr	$12,$3,2
2811	xor	$13,$1			# Ch(e,f,g)
2812	xor	$11,$2			# Sigma1(e)
2813
2814	rotr	$2,$3,13
2815	addu	$10,$13
2816	lw	$13,120($6)		# K[30]
2817	xor	$2,$12
2818	rotr	$12,$3,22
2819	addu	$10,$11
2820	and	$11,$7,$24
2821	xor	$2,$12			# Sigma0(a)
2822	xor	$12,$7,$24
2823#else
2824	addu	$10,$22,$2			# 30
2825	srl	$2,$30,6
2826	xor	$13,$31,$1
2827	sll	$12,$30,7
2828	and	$13,$30
2829	srl	$11,$30,11
2830	xor	$2,$12
2831	sll	$12,$30,21
2832	xor	$2,$11
2833	srl	$11,$30,25
2834	xor	$2,$12
2835	sll	$12,$30,26
2836	xor	$2,$11
2837	xor	$13,$1			# Ch(e,f,g)
2838	xor	$11,$12,$2			# Sigma1(e)
2839
2840	srl	$2,$3,2
2841	addu	$10,$13
2842	lw	$13,120($6)		# K[30]
2843	sll	$12,$3,10
2844	addu	$10,$11
2845	srl	$11,$3,13
2846	xor	$2,$12
2847	sll	$12,$3,19
2848	xor	$2,$11
2849	srl	$11,$3,22
2850	xor	$2,$12
2851	sll	$12,$3,30
2852	xor	$2,$11
2853	and	$11,$7,$24
2854	xor	$2,$12			# Sigma0(a)
2855	xor	$12,$7,$24
2856#endif
2857	sw	$22,56($29)	# offload to ring buffer
2858	addu	$2,$11
2859	and	$12,$3
2860	addu	$10,$13			# +=K[30]
2861	addu	$2,$12			# +=Maj(a,b,c)
2862	addu	$25,$10
2863	addu	$2,$10
2864	lw	$9,4($29)	# prefetch from ring buffer
2865#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2866	srl	$13,$8,3		# Xupdate(31)
2867	rotr	$11,$8,7
2868	addu	$23,$16			# +=X[i+9]
2869	xor	$13,$11
2870	rotr	$11,$8,18
2871
2872	srl	$14,$21,10
2873	rotr	$12,$21,17
2874	xor	$13,$11			# sigma0(X[i+1])
2875	rotr	$11,$21,19
2876	xor	$14,$12
2877	addu	$23,$13
2878#else
2879	srl	$13,$8,3		# Xupdate(31)
2880	addu	$23,$16			# +=X[i+9]
2881	sll	$12,$8,14
2882	srl	$11,$8,7
2883	xor	$13,$12
2884	sll	$12,11
2885	xor	$13,$11
2886	srl	$11,$8,18
2887	xor	$13,$12
2888
2889	srl	$14,$21,10
2890	xor	$13,$11			# sigma0(X[i+1])
2891	sll	$12,$21,13
2892	addu	$23,$13
2893	srl	$11,$21,17
2894	xor	$14,$12
2895	sll	$12,2
2896	xor	$14,$11
2897	srl	$11,$21,19
2898	xor	$14,$12
2899#endif
2900	xor	$14,$11			# sigma1(X[i+14])
2901	addu	$23,$14
2902#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2903	xor	$14,$30,$31			# 31
2904	rotr	$12,$25,6
2905	addu	$11,$23,$1
2906	rotr	$13,$25,11
2907	and	$14,$25
2908	rotr	$1,$25,25
2909	xor	$12,$13
2910	rotr	$13,$2,2
2911	xor	$14,$31			# Ch(e,f,g)
2912	xor	$12,$1			# Sigma1(e)
2913
2914	rotr	$1,$2,13
2915	addu	$11,$14
2916	lw	$14,124($6)		# K[31]
2917	xor	$1,$13
2918	rotr	$13,$2,22
2919	addu	$11,$12
2920	and	$12,$3,$7
2921	xor	$1,$13			# Sigma0(a)
2922	xor	$13,$3,$7
2923#else
2924	addu	$11,$23,$1			# 31
2925	srl	$1,$25,6
2926	xor	$14,$30,$31
2927	sll	$13,$25,7
2928	and	$14,$25
2929	srl	$12,$25,11
2930	xor	$1,$13
2931	sll	$13,$25,21
2932	xor	$1,$12
2933	srl	$12,$25,25
2934	xor	$1,$13
2935	sll	$13,$25,26
2936	xor	$1,$12
2937	xor	$14,$31			# Ch(e,f,g)
2938	xor	$12,$13,$1			# Sigma1(e)
2939
2940	srl	$1,$2,2
2941	addu	$11,$14
2942	lw	$14,124($6)		# K[31]
2943	sll	$13,$2,10
2944	addu	$11,$12
2945	srl	$12,$2,13
2946	xor	$1,$13
2947	sll	$13,$2,19
2948	xor	$1,$12
2949	srl	$12,$2,22
2950	xor	$1,$13
2951	sll	$13,$2,30
2952	xor	$1,$12
2953	and	$12,$3,$7
2954	xor	$1,$13			# Sigma0(a)
2955	xor	$13,$3,$7
2956#endif
2957	sw	$23,60($29)	# offload to ring buffer
2958	addu	$1,$12
2959	and	$13,$2
2960	addu	$11,$14			# +=K[31]
2961	addu	$1,$13			# +=Maj(a,b,c)
2962	addu	$24,$11
2963	addu	$1,$11
2964	lw	$10,8($29)	# prefetch from ring buffer
2965	and	$14,0xfff
2966	li	$15,2290
2967	.set	noreorder
2968	bne	$14,$15,.L16_xx
2969	addu $6,16*4		# Ktbl+=16
2970
2971	lw	$23,16*4($29)	# restore pointer to the end of input
2972	lw	$8,0*4($4)
2973	lw	$9,1*4($4)
2974	lw	$10,2*4($4)
2975	addu $5,16*4
2976	lw	$11,3*4($4)
2977	addu	$1,$8
2978	lw	$12,4*4($4)
2979	addu	$2,$9
2980	lw	$13,5*4($4)
2981	addu	$3,$10
2982	lw	$14,6*4($4)
2983	addu	$7,$11
2984	lw	$15,7*4($4)
2985	addu	$24,$12
2986	sw	$1,0*4($4)
2987	addu	$25,$13
2988	sw	$2,1*4($4)
2989	addu	$30,$14
2990	sw	$3,2*4($4)
2991	addu	$31,$15
2992	sw	$7,3*4($4)
2993	sw	$24,4*4($4)
2994	sw	$25,5*4($4)
2995	sw	$30,6*4($4)
2996	sw	$31,7*4($4)
2997
2998	bne	$5,$23,.Loop
2999	subu $6,192	# rewind $6
3000
3001	lw	$31,128-1*4($29)
3002	lw	$30,128-2*4($29)
3003	lw	$23,128-3*4($29)
3004	lw	$22,128-4*4($29)
3005	lw	$21,128-5*4($29)
3006	lw	$20,128-6*4($29)
3007	lw	$19,128-7*4($29)
3008	lw	$18,128-8*4($29)
3009	lw	$17,128-9*4($29)
3010	lw	$16,128-10*4($29)
3011	jr	$31
3012	addu $29,128
3013.end	sha256_block_data_order
3014
3015.rdata
3016.align	5
3017K256:
3018	.word	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
3019	.word	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
3020	.word	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
3021	.word	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
3022	.word	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
3023	.word	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
3024	.word	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
3025	.word	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
3026	.word	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
3027	.word	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
3028	.word	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
3029	.word	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
3030	.word	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
3031	.word	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
3032	.word	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
3033	.word	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
3034.asciiz	"SHA256 for MIPS, CRYPTOGAMS by <appro@openssl.org>"
3035.align	5
3036
3037