1#if !(defined (__mips_isa_rev) && (__mips_isa_rev >= 6))
2.set     mips2
3#endif
4#include "mips_arch.h"
5
6#if defined(_MIPS_ARCH_MIPS64R6)
7# define ddivu(rs,rt)
8# define mfqt(rd,rs,rt)	ddivu	rd,rs,rt
9# define mfrm(rd,rs,rt)	dmodu	rd,rs,rt
10#elif defined(_MIPS_ARCH_MIPS32R6)
11# define divu(rs,rt)
12# define mfqt(rd,rs,rt)	divu	rd,rs,rt
13# define mfrm(rd,rs,rt)	modu	rd,rs,rt
14#else
15# define divu(rs,rt)	divu	$0,rs,rt
16# define mfqt(rd,rs,rt)	mflo	rd
17# define mfrm(rd,rs,rt)	mfhi	rd
18#endif
19
20.rdata
21.asciiz	"mips3.s, Version 1.2"
22.asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
23
24.text
25.set	noat
26
27.align	5
28.globl	bn_mul_add_words
29.ent	bn_mul_add_words
30bn_mul_add_words:
31	.set	noreorder
32	bgtz	$6,bn_mul_add_words_internal
33	move	$2,$0
34	jr	$31
35	move	$4,$2
36.end	bn_mul_add_words
37
38.align	5
39.ent	bn_mul_add_words_internal
40bn_mul_add_words_internal:
41	.set	reorder
42	li	$3,-4
43	and	$8,$6,$3
44	beqz	$8,.L_bn_mul_add_words_tail
45
46.L_bn_mul_add_words_loop:
47	lw	$12,0($5)
48	multu	($12,$7)
49	lw	$13,0($4)
50	lw	$14,4($5)
51	lw	$15,4($4)
52	lw	$8,2*4($5)
53	lw	$9,2*4($4)
54	addu	$13,$2
55	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
56				# values", but it seems to work fine
57				# even on 64-bit registers.
58	mflo	($1,$12,$7)
59	mfhi	($12,$12,$7)
60	addu	$13,$1
61	addu	$2,$12
62	 multu	($14,$7)
63	sltu	$1,$13,$1
64	sw	$13,0($4)
65	addu	$2,$1
66
67	lw	$10,3*4($5)
68	lw	$11,3*4($4)
69	addu	$15,$2
70	sltu	$2,$15,$2
71	mflo	($1,$14,$7)
72	mfhi	($14,$14,$7)
73	addu	$15,$1
74	addu	$2,$14
75	 multu	($8,$7)
76	sltu	$1,$15,$1
77	sw	$15,4($4)
78	addu	$2,$1
79
80	subu	$6,4
81	addu $4,4*4
82	addu $5,4*4
83	addu	$9,$2
84	sltu	$2,$9,$2
85	mflo	($1,$8,$7)
86	mfhi	($8,$8,$7)
87	addu	$9,$1
88	addu	$2,$8
89	 multu	($10,$7)
90	sltu	$1,$9,$1
91	sw	$9,-2*4($4)
92	addu	$2,$1
93
94
95	and	$8,$6,$3
96	addu	$11,$2
97	sltu	$2,$11,$2
98	mflo	($1,$10,$7)
99	mfhi	($10,$10,$7)
100	addu	$11,$1
101	addu	$2,$10
102	sltu	$1,$11,$1
103	sw	$11,-4($4)
104	.set	noreorder
105	bgtz	$8,.L_bn_mul_add_words_loop
106	addu	$2,$1
107
108	beqz	$6,.L_bn_mul_add_words_return
109	nop
110
111.L_bn_mul_add_words_tail:
112	.set	reorder
113	lw	$12,0($5)
114	multu	($12,$7)
115	lw	$13,0($4)
116	subu	$6,1
117	addu	$13,$2
118	sltu	$2,$13,$2
119	mflo	($1,$12,$7)
120	mfhi	($12,$12,$7)
121	addu	$13,$1
122	addu	$2,$12
123	sltu	$1,$13,$1
124	sw	$13,0($4)
125	addu	$2,$1
126	beqz	$6,.L_bn_mul_add_words_return
127
128	lw	$12,4($5)
129	multu	($12,$7)
130	lw	$13,4($4)
131	subu	$6,1
132	addu	$13,$2
133	sltu	$2,$13,$2
134	mflo	($1,$12,$7)
135	mfhi	($12,$12,$7)
136	addu	$13,$1
137	addu	$2,$12
138	sltu	$1,$13,$1
139	sw	$13,4($4)
140	addu	$2,$1
141	beqz	$6,.L_bn_mul_add_words_return
142
143	lw	$12,2*4($5)
144	multu	($12,$7)
145	lw	$13,2*4($4)
146	addu	$13,$2
147	sltu	$2,$13,$2
148	mflo	($1,$12,$7)
149	mfhi	($12,$12,$7)
150	addu	$13,$1
151	addu	$2,$12
152	sltu	$1,$13,$1
153	sw	$13,2*4($4)
154	addu	$2,$1
155
156.L_bn_mul_add_words_return:
157	.set	noreorder
158	jr	$31
159	move	$4,$2
160.end	bn_mul_add_words_internal
161
162.align	5
163.globl	bn_mul_words
164.ent	bn_mul_words
165bn_mul_words:
166	.set	noreorder
167	bgtz	$6,bn_mul_words_internal
168	move	$2,$0
169	jr	$31
170	move	$4,$2
171.end	bn_mul_words
172
173.align	5
174.ent	bn_mul_words_internal
175bn_mul_words_internal:
176	.set	reorder
177	li	$3,-4
178	and	$8,$6,$3
179	beqz	$8,.L_bn_mul_words_tail
180
181.L_bn_mul_words_loop:
182	lw	$12,0($5)
183	multu	($12,$7)
184	lw	$14,4($5)
185	lw	$8,2*4($5)
186	lw	$10,3*4($5)
187	mflo	($1,$12,$7)
188	mfhi	($12,$12,$7)
189	addu	$2,$1
190	sltu	$13,$2,$1
191	 multu	($14,$7)
192	sw	$2,0($4)
193	addu	$2,$13,$12
194
195	subu	$6,4
196	addu $4,4*4
197	addu $5,4*4
198	mflo	($1,$14,$7)
199	mfhi	($14,$14,$7)
200	addu	$2,$1
201	sltu	$15,$2,$1
202	 multu	($8,$7)
203	sw	$2,-3*4($4)
204	addu	$2,$15,$14
205
206	mflo	($1,$8,$7)
207	mfhi	($8,$8,$7)
208	addu	$2,$1
209	sltu	$9,$2,$1
210	 multu	($10,$7)
211	sw	$2,-2*4($4)
212	addu	$2,$9,$8
213
214	and	$8,$6,$3
215	mflo	($1,$10,$7)
216	mfhi	($10,$10,$7)
217	addu	$2,$1
218	sltu	$11,$2,$1
219	sw	$2,-4($4)
220	.set	noreorder
221	bgtz	$8,.L_bn_mul_words_loop
222	addu	$2,$11,$10
223
224	beqz	$6,.L_bn_mul_words_return
225	nop
226
227.L_bn_mul_words_tail:
228	.set	reorder
229	lw	$12,0($5)
230	multu	($12,$7)
231	subu	$6,1
232	mflo	($1,$12,$7)
233	mfhi	($12,$12,$7)
234	addu	$2,$1
235	sltu	$13,$2,$1
236	sw	$2,0($4)
237	addu	$2,$13,$12
238	beqz	$6,.L_bn_mul_words_return
239
240	lw	$12,4($5)
241	multu	($12,$7)
242	subu	$6,1
243	mflo	($1,$12,$7)
244	mfhi	($12,$12,$7)
245	addu	$2,$1
246	sltu	$13,$2,$1
247	sw	$2,4($4)
248	addu	$2,$13,$12
249	beqz	$6,.L_bn_mul_words_return
250
251	lw	$12,2*4($5)
252	multu	($12,$7)
253	mflo	($1,$12,$7)
254	mfhi	($12,$12,$7)
255	addu	$2,$1
256	sltu	$13,$2,$1
257	sw	$2,2*4($4)
258	addu	$2,$13,$12
259
260.L_bn_mul_words_return:
261	.set	noreorder
262	jr	$31
263	move	$4,$2
264.end	bn_mul_words_internal
265
266.align	5
267.globl	bn_sqr_words
268.ent	bn_sqr_words
269bn_sqr_words:
270	.set	noreorder
271	bgtz	$6,bn_sqr_words_internal
272	move	$2,$0
273	jr	$31
274	move	$4,$2
275.end	bn_sqr_words
276
277.align	5
278.ent	bn_sqr_words_internal
279bn_sqr_words_internal:
280	.set	reorder
281	li	$3,-4
282	and	$8,$6,$3
283	beqz	$8,.L_bn_sqr_words_tail
284
285.L_bn_sqr_words_loop:
286	lw	$12,0($5)
287	multu	($12,$12)
288	lw	$14,4($5)
289	lw	$8,2*4($5)
290	lw	$10,3*4($5)
291	mflo	($13,$12,$12)
292	mfhi	($12,$12,$12)
293	sw	$13,0($4)
294	sw	$12,4($4)
295
296	multu	($14,$14)
297	subu	$6,4
298	addu $4,8*4
299	addu $5,4*4
300	mflo	($15,$14,$14)
301	mfhi	($14,$14,$14)
302	sw	$15,-6*4($4)
303	sw	$14,-5*4($4)
304
305	multu	($8,$8)
306	mflo	($9,$8,$8)
307	mfhi	($8,$8,$8)
308	sw	$9,-4*4($4)
309	sw	$8,-3*4($4)
310
311
312	multu	($10,$10)
313	and	$8,$6,$3
314	mflo	($11,$10,$10)
315	mfhi	($10,$10,$10)
316	sw	$11,-2*4($4)
317
318	.set	noreorder
319	bgtz	$8,.L_bn_sqr_words_loop
320	sw	$10,-4($4)
321
322	beqz	$6,.L_bn_sqr_words_return
323	nop
324
325.L_bn_sqr_words_tail:
326	.set	reorder
327	lw	$12,0($5)
328	multu	($12,$12)
329	subu	$6,1
330	mflo	($13,$12,$12)
331	mfhi	($12,$12,$12)
332	sw	$13,0($4)
333	sw	$12,4($4)
334	beqz	$6,.L_bn_sqr_words_return
335
336	lw	$12,4($5)
337	multu	($12,$12)
338	subu	$6,1
339	mflo	($13,$12,$12)
340	mfhi	($12,$12,$12)
341	sw	$13,2*4($4)
342	sw	$12,3*4($4)
343	beqz	$6,.L_bn_sqr_words_return
344
345	lw	$12,2*4($5)
346	multu	($12,$12)
347	mflo	($13,$12,$12)
348	mfhi	($12,$12,$12)
349	sw	$13,4*4($4)
350	sw	$12,5*4($4)
351
352.L_bn_sqr_words_return:
353	.set	noreorder
354	jr	$31
355	move	$4,$2
356
357.end	bn_sqr_words_internal
358
359.align	5
360.globl	bn_add_words
361.ent	bn_add_words
362bn_add_words:
363	.set	noreorder
364	bgtz	$7,bn_add_words_internal
365	move	$2,$0
366	jr	$31
367	move	$4,$2
368.end	bn_add_words
369
370.align	5
371.ent	bn_add_words_internal
372bn_add_words_internal:
373	.set	reorder
374	li	$3,-4
375	and	$1,$7,$3
376	beqz	$1,.L_bn_add_words_tail
377
378.L_bn_add_words_loop:
379	lw	$12,0($5)
380	lw	$8,0($6)
381	subu	$7,4
382	lw	$13,4($5)
383	and	$1,$7,$3
384	lw	$14,2*4($5)
385	addu $6,4*4
386	lw	$15,3*4($5)
387	addu $4,4*4
388	lw	$9,-3*4($6)
389	addu $5,4*4
390	lw	$10,-2*4($6)
391	lw	$11,-4($6)
392	addu	$8,$12
393	sltu	$24,$8,$12
394	addu	$12,$8,$2
395	sltu	$2,$12,$8
396	sw	$12,-4*4($4)
397	addu	$2,$24
398
399	addu	$9,$13
400	sltu	$25,$9,$13
401	addu	$13,$9,$2
402	sltu	$2,$13,$9
403	sw	$13,-3*4($4)
404	addu	$2,$25
405
406	addu	$10,$14
407	sltu	$24,$10,$14
408	addu	$14,$10,$2
409	sltu	$2,$14,$10
410	sw	$14,-2*4($4)
411	addu	$2,$24
412
413	addu	$11,$15
414	sltu	$25,$11,$15
415	addu	$15,$11,$2
416	sltu	$2,$15,$11
417	sw	$15,-4($4)
418
419	.set	noreorder
420	bgtz	$1,.L_bn_add_words_loop
421	addu	$2,$25
422
423	beqz	$7,.L_bn_add_words_return
424	nop
425
426.L_bn_add_words_tail:
427	.set	reorder
428	lw	$12,0($5)
429	lw	$8,0($6)
430	addu	$8,$12
431	subu	$7,1
432	sltu	$24,$8,$12
433	addu	$12,$8,$2
434	sltu	$2,$12,$8
435	sw	$12,0($4)
436	addu	$2,$24
437	beqz	$7,.L_bn_add_words_return
438
439	lw	$13,4($5)
440	lw	$9,4($6)
441	addu	$9,$13
442	subu	$7,1
443	sltu	$25,$9,$13
444	addu	$13,$9,$2
445	sltu	$2,$13,$9
446	sw	$13,4($4)
447	addu	$2,$25
448	beqz	$7,.L_bn_add_words_return
449
450	lw	$14,2*4($5)
451	lw	$10,2*4($6)
452	addu	$10,$14
453	sltu	$24,$10,$14
454	addu	$14,$10,$2
455	sltu	$2,$14,$10
456	sw	$14,2*4($4)
457	addu	$2,$24
458
459.L_bn_add_words_return:
460	.set	noreorder
461	jr	$31
462	move	$4,$2
463
464.end	bn_add_words_internal
465
466.align	5
467.globl	bn_sub_words
468.ent	bn_sub_words
469bn_sub_words:
470	.set	noreorder
471	bgtz	$7,bn_sub_words_internal
472	move	$2,$0
473	jr	$31
474	move	$4,$0
475.end	bn_sub_words
476
477.align	5
478.ent	bn_sub_words_internal
479bn_sub_words_internal:
480	.set	reorder
481	li	$3,-4
482	and	$1,$7,$3
483	beqz	$1,.L_bn_sub_words_tail
484
485.L_bn_sub_words_loop:
486	lw	$12,0($5)
487	lw	$8,0($6)
488	subu	$7,4
489	lw	$13,4($5)
490	and	$1,$7,$3
491	lw	$14,2*4($5)
492	addu $6,4*4
493	lw	$15,3*4($5)
494	addu $4,4*4
495	lw	$9,-3*4($6)
496	addu $5,4*4
497	lw	$10,-2*4($6)
498	lw	$11,-4($6)
499	sltu	$24,$12,$8
500	subu	$8,$12,$8
501	subu	$12,$8,$2
502	sgtu	$2,$12,$8
503	sw	$12,-4*4($4)
504	addu	$2,$24
505
506	sltu	$25,$13,$9
507	subu	$9,$13,$9
508	subu	$13,$9,$2
509	sgtu	$2,$13,$9
510	sw	$13,-3*4($4)
511	addu	$2,$25
512
513
514	sltu	$24,$14,$10
515	subu	$10,$14,$10
516	subu	$14,$10,$2
517	sgtu	$2,$14,$10
518	sw	$14,-2*4($4)
519	addu	$2,$24
520
521	sltu	$25,$15,$11
522	subu	$11,$15,$11
523	subu	$15,$11,$2
524	sgtu	$2,$15,$11
525	sw	$15,-4($4)
526
527	.set	noreorder
528	bgtz	$1,.L_bn_sub_words_loop
529	addu	$2,$25
530
531	beqz	$7,.L_bn_sub_words_return
532	nop
533
534.L_bn_sub_words_tail:
535	.set	reorder
536	lw	$12,0($5)
537	lw	$8,0($6)
538	subu	$7,1
539	sltu	$24,$12,$8
540	subu	$8,$12,$8
541	subu	$12,$8,$2
542	sgtu	$2,$12,$8
543	sw	$12,0($4)
544	addu	$2,$24
545	beqz	$7,.L_bn_sub_words_return
546
547	lw	$13,4($5)
548	subu	$7,1
549	lw	$9,4($6)
550	sltu	$25,$13,$9
551	subu	$9,$13,$9
552	subu	$13,$9,$2
553	sgtu	$2,$13,$9
554	sw	$13,4($4)
555	addu	$2,$25
556	beqz	$7,.L_bn_sub_words_return
557
558	lw	$14,2*4($5)
559	lw	$10,2*4($6)
560	sltu	$24,$14,$10
561	subu	$10,$14,$10
562	subu	$14,$10,$2
563	sgtu	$2,$14,$10
564	sw	$14,2*4($4)
565	addu	$2,$24
566
567.L_bn_sub_words_return:
568	.set	noreorder
569	jr	$31
570	move	$4,$2
571.end	bn_sub_words_internal
572
573#if 0
574/*
575 * The bn_div_3_words entry point is re-used for constant-time interface.
576 * Implementation is retained as historical reference.
577 */
578.align 5
579.globl	bn_div_3_words
580.ent	bn_div_3_words
581bn_div_3_words:
582	.set	noreorder
583	move	$7,$4		# we know that bn_div_words does not
584				# touch $7, $10, $11 and preserves $6
585				# so that we can save two arguments
586				# and return address in registers
587				# instead of stack:-)
588
589	lw	$4,($7)
590	move	$10,$5
591	bne	$4,$6,bn_div_3_words_internal
592	lw	$5,-4($7)
593	li	$2,-1
594	jr	$31
595	move	$4,$2
596.end	bn_div_3_words
597
598.align	5
599.ent	bn_div_3_words_internal
600bn_div_3_words_internal:
601	.set	reorder
602	move	$11,$31
603	bal	bn_div_words_internal
604	move	$31,$11
605	multu	($10,$2)
606	lw	$14,-2*4($7)
607	move	$8,$0
608	mfhi	($13,$10,$2)
609	mflo	($12,$10,$2)
610	sltu	$24,$13,$5
611.L_bn_div_3_words_inner_loop:
612	bnez	$24,.L_bn_div_3_words_inner_loop_done
613	sgeu	$1,$14,$12
614	seq	$25,$13,$5
615	and	$1,$25
616	sltu	$15,$12,$10
617	addu	$5,$6
618	subu	$13,$15
619	subu	$12,$10
620	sltu	$24,$13,$5
621	sltu	$8,$5,$6
622	or	$24,$8
623	.set	noreorder
624	beqz	$1,.L_bn_div_3_words_inner_loop
625	subu	$2,1
626	addu	$2,1
627	.set	reorder
628.L_bn_div_3_words_inner_loop_done:
629	.set	noreorder
630	jr	$31
631	move	$4,$2
632.end	bn_div_3_words_internal
633#endif
634
635.align	5
636.globl	bn_div_words
637.ent	bn_div_words
638bn_div_words:
639	.set	noreorder
640	bnez	$6,bn_div_words_internal
641	li	$2,-1		# I would rather signal div-by-zero
642				# which can be done with 'break 7'
643	jr	$31
644	move	$4,$2
645.end	bn_div_words
646
647.align	5
648.ent	bn_div_words_internal
649bn_div_words_internal:
650	move	$3,$0
651	bltz	$6,.L_bn_div_words_body
652	move	$25,$3
653	sll	$6,1
654	bgtz	$6,.-4
655	addu	$25,1
656
657	.set	reorder
658	negu	$13,$25
659	li	$14,-1
660	sll	$14,$13
661	and	$14,$4
662	srl	$1,$5,$13
663	.set	noreorder
664	beqz	$14,.+12
665	nop
666	break	6		# signal overflow
667	.set	reorder
668	sll	$4,$25
669	sll	$5,$25
670	or	$4,$1
671.L_bn_div_words_body:
672	srl	$3,$6,4*4	# bits
673	sgeu	$1,$4,$6
674	.set	noreorder
675	beqz	$1,.+12
676	nop
677	subu	$4,$6
678	.set	reorder
679
680	li	$8,-1
681	srl	$9,$4,4*4	# bits
682	srl	$8,4*4	# q=0xffffffff
683	beq	$3,$9,.L_bn_div_words_skip_div1
684	divu	($4,$3)
685	mfqt	($8,$4,$3)
686.L_bn_div_words_skip_div1:
687	multu	($6,$8)
688	sll	$15,$4,4*4	# bits
689	srl	$1,$5,4*4	# bits
690	or	$15,$1
691	mflo	($12,$6,$8)
692	mfhi	($13,$6,$8)
693.L_bn_div_words_inner_loop1:
694	sltu	$14,$15,$12
695	seq	$24,$9,$13
696	sltu	$1,$9,$13
697	and	$14,$24
698	sltu	$2,$12,$6
699	or	$1,$14
700	.set	noreorder
701	beqz	$1,.L_bn_div_words_inner_loop1_done
702	subu	$13,$2
703	subu	$12,$6
704	b	.L_bn_div_words_inner_loop1
705	subu	$8,1
706	.set	reorder
707.L_bn_div_words_inner_loop1_done:
708
709	sll	$5,4*4	# bits
710	subu	$4,$15,$12
711	sll	$2,$8,4*4	# bits
712
713	li	$8,-1
714	srl	$9,$4,4*4	# bits
715	srl	$8,4*4	# q=0xffffffff
716	beq	$3,$9,.L_bn_div_words_skip_div2
717	divu	($4,$3)
718	mfqt	($8,$4,$3)
719.L_bn_div_words_skip_div2:
720	multu	($6,$8)
721	sll	$15,$4,4*4	# bits
722	srl	$1,$5,4*4	# bits
723	or	$15,$1
724	mflo	($12,$6,$8)
725	mfhi	($13,$6,$8)
726.L_bn_div_words_inner_loop2:
727	sltu	$14,$15,$12
728	seq	$24,$9,$13
729	sltu	$1,$9,$13
730	and	$14,$24
731	sltu	$3,$12,$6
732	or	$1,$14
733	.set	noreorder
734	beqz	$1,.L_bn_div_words_inner_loop2_done
735	subu	$13,$3
736	subu	$12,$6
737	b	.L_bn_div_words_inner_loop2
738	subu	$8,1
739	.set	reorder
740.L_bn_div_words_inner_loop2_done:
741
742	subu	$4,$15,$12
743	or	$2,$8
744	srl	$3,$4,$25	# $3 contains remainder if anybody wants it
745	srl	$6,$25		# restore $6
746
747	.set	noreorder
748	move	$5,$3
749	jr	$31
750	move	$4,$2
751.end	bn_div_words_internal
752
753.align	5
754.globl	bn_mul_comba8
755.ent	bn_mul_comba8
756bn_mul_comba8:
757	.set	noreorder
758	.frame	$29,6*4,$31
759	.mask	0x003f0000,-4
760	subu $29,6*4
761	sw	$21,5*4($29)
762	sw	$20,4*4($29)
763	sw	$19,3*4($29)
764	sw	$18,2*4($29)
765	sw	$17,1*4($29)
766	sw	$16,0*4($29)
767
768	.set	reorder
769	lw	$12,0($5)	# If compiled with -mips3 option on
770				# R5000 box assembler barks on this
771				# 1ine with "should not have mult/div
772				# as last instruction in bb (R10K
773				# bug)" warning. If anybody out there
774				# has a clue about how to circumvent
775				# this do send me a note.
776				#		<appro@fy.chalmers.se>
777
778	lw	$8,0($6)
779	lw	$13,4($5)
780	lw	$14,2*4($5)
781	multu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
782	lw	$15,3*4($5)
783	lw	$9,4($6)
784	lw	$10,2*4($6)
785	lw	$11,3*4($6)
786	mflo	($2,$12,$8)
787	mfhi	($3,$12,$8)
788
789	lw	$16,4*4($5)
790	lw	$18,5*4($5)
791	multu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
792	lw	$20,6*4($5)
793	lw	$5,7*4($5)
794	lw	$17,4*4($6)
795	lw	$19,5*4($6)
796	mflo	($24,$12,$9)
797	mfhi	($25,$12,$9)
798	addu	$3,$24
799	sltu	$1,$3,$24
800	multu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
801	addu	$7,$25,$1
802	lw	$21,6*4($6)
803	lw	$6,7*4($6)
804	sw	$2,0($4)	# r[0]=c1;
805	mflo	($24,$13,$8)
806	mfhi	($25,$13,$8)
807	addu	$3,$24
808	sltu	$1,$3,$24
809	 multu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
810	addu	$25,$1
811	addu	$7,$25
812	sltu	$2,$7,$25
813	sw	$3,4($4)	# r[1]=c2;
814
815	mflo	($24,$14,$8)
816	mfhi	($25,$14,$8)
817	addu	$7,$24
818	sltu	$1,$7,$24
819	multu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
820	addu	$25,$1
821	addu	$2,$25
822	mflo	($24,$13,$9)
823	mfhi	($25,$13,$9)
824	addu	$7,$24
825	sltu	$1,$7,$24
826	multu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
827	addu	$25,$1
828	addu	$2,$25
829	sltu	$3,$2,$25
830	mflo	($24,$12,$10)
831	mfhi	($25,$12,$10)
832	addu	$7,$24
833	sltu	$1,$7,$24
834	 multu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
835	addu	$25,$1
836	addu	$2,$25
837	sltu	$1,$2,$25
838	addu	$3,$1
839	sw	$7,2*4($4)	# r[2]=c3;
840
841	mflo	($24,$12,$11)
842	mfhi	($25,$12,$11)
843	addu	$2,$24
844	sltu	$1,$2,$24
845	multu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
846	addu	$25,$1
847	addu	$3,$25
848	sltu	$7,$3,$25
849	mflo	($24,$13,$10)
850	mfhi	($25,$13,$10)
851	addu	$2,$24
852	sltu	$1,$2,$24
853	multu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
854	addu	$25,$1
855	addu	$3,$25
856	sltu	$1,$3,$25
857	addu	$7,$1
858	mflo	($24,$14,$9)
859	mfhi	($25,$14,$9)
860	addu	$2,$24
861	sltu	$1,$2,$24
862	multu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
863	addu	$25,$1
864	addu	$3,$25
865	sltu	$1,$3,$25
866	addu	$7,$1
867	mflo	($24,$15,$8)
868	mfhi	($25,$15,$8)
869	addu	$2,$24
870	sltu	$1,$2,$24
871	 multu	($16,$8)		# mul_add_c(a[4],b[0],c2,c3,c1);
872	addu	$25,$1
873	addu	$3,$25
874	sltu	$1,$3,$25
875	addu	$7,$1
876	sw	$2,3*4($4)	# r[3]=c1;
877
878	mflo	($24,$16,$8)
879	mfhi	($25,$16,$8)
880	addu	$3,$24
881	sltu	$1,$3,$24
882	multu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
883	addu	$25,$1
884	addu	$7,$25
885	sltu	$2,$7,$25
886	mflo	($24,$15,$9)
887	mfhi	($25,$15,$9)
888	addu	$3,$24
889	sltu	$1,$3,$24
890	multu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
891	addu	$25,$1
892	addu	$7,$25
893	sltu	$1,$7,$25
894	addu	$2,$1
895	mflo	($24,$14,$10)
896	mfhi	($25,$14,$10)
897	addu	$3,$24
898	sltu	$1,$3,$24
899	multu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
900	addu	$25,$1
901	addu	$7,$25
902	sltu	$1,$7,$25
903	addu	$2,$1
904	mflo	($24,$13,$11)
905	mfhi	($25,$13,$11)
906	addu	$3,$24
907	sltu	$1,$3,$24
908	multu	($12,$17)		# mul_add_c(a[0],b[4],c2,c3,c1);
909	addu	$25,$1
910	addu	$7,$25
911	sltu	$1,$7,$25
912	addu	$2,$1
913	mflo	($24,$12,$17)
914	mfhi	($25,$12,$17)
915	addu	$3,$24
916	sltu	$1,$3,$24
917	 multu	($12,$19)		# mul_add_c(a[0],b[5],c3,c1,c2);
918	addu	$25,$1
919	addu	$7,$25
920	sltu	$1,$7,$25
921	addu	$2,$1
922	sw	$3,4*4($4)	# r[4]=c2;
923
924	mflo	($24,$12,$19)
925	mfhi	($25,$12,$19)
926	addu	$7,$24
927	sltu	$1,$7,$24
928	multu	($13,$17)		# mul_add_c(a[1],b[4],c3,c1,c2);
929	addu	$25,$1
930	addu	$2,$25
931	sltu	$3,$2,$25
932	mflo	($24,$13,$17)
933	mfhi	($25,$13,$17)
934	addu	$7,$24
935	sltu	$1,$7,$24
936	multu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
937	addu	$25,$1
938	addu	$2,$25
939	sltu	$1,$2,$25
940	addu	$3,$1
941	mflo	($24,$14,$11)
942	mfhi	($25,$14,$11)
943	addu	$7,$24
944	sltu	$1,$7,$24
945	multu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
946	addu	$25,$1
947	addu	$2,$25
948	sltu	$1,$2,$25
949	addu	$3,$1
950	mflo	($24,$15,$10)
951	mfhi	($25,$15,$10)
952	addu	$7,$24
953	sltu	$1,$7,$24
954	multu	($16,$9)		# mul_add_c(a[4],b[1],c3,c1,c2);
955	addu	$25,$1
956	addu	$2,$25
957	sltu	$1,$2,$25
958	addu	$3,$1
959	mflo	($24,$16,$9)
960	mfhi	($25,$16,$9)
961	addu	$7,$24
962	sltu	$1,$7,$24
963	multu	($18,$8)		# mul_add_c(a[5],b[0],c3,c1,c2);
964	addu	$25,$1
965	addu	$2,$25
966	sltu	$1,$2,$25
967	addu	$3,$1
968	mflo	($24,$18,$8)
969	mfhi	($25,$18,$8)
970	addu	$7,$24
971	sltu	$1,$7,$24
972	 multu	($20,$8)		# mul_add_c(a[6],b[0],c1,c2,c3);
973	addu	$25,$1
974	addu	$2,$25
975	sltu	$1,$2,$25
976	addu	$3,$1
977	sw	$7,5*4($4)	# r[5]=c3;
978
979	mflo	($24,$20,$8)
980	mfhi	($25,$20,$8)
981	addu	$2,$24
982	sltu	$1,$2,$24
983	multu	($18,$9)		# mul_add_c(a[5],b[1],c1,c2,c3);
984	addu	$25,$1
985	addu	$3,$25
986	sltu	$7,$3,$25
987	mflo	($24,$18,$9)
988	mfhi	($25,$18,$9)
989	addu	$2,$24
990	sltu	$1,$2,$24
991	multu	($16,$10)		# mul_add_c(a[4],b[2],c1,c2,c3);
992	addu	$25,$1
993	addu	$3,$25
994	sltu	$1,$3,$25
995	addu	$7,$1
996	mflo	($24,$16,$10)
997	mfhi	($25,$16,$10)
998	addu	$2,$24
999	sltu	$1,$2,$24
1000	multu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
1001	addu	$25,$1
1002	addu	$3,$25
1003	sltu	$1,$3,$25
1004	addu	$7,$1
1005	mflo	($24,$15,$11)
1006	mfhi	($25,$15,$11)
1007	addu	$2,$24
1008	sltu	$1,$2,$24
1009	multu	($14,$17)		# mul_add_c(a[2],b[4],c1,c2,c3);
1010	addu	$25,$1
1011	addu	$3,$25
1012	sltu	$1,$3,$25
1013	addu	$7,$1
1014	mflo	($24,$14,$17)
1015	mfhi	($25,$14,$17)
1016	addu	$2,$24
1017	sltu	$1,$2,$24
1018	multu	($13,$19)		# mul_add_c(a[1],b[5],c1,c2,c3);
1019	addu	$25,$1
1020	addu	$3,$25
1021	sltu	$1,$3,$25
1022	addu	$7,$1
1023	mflo	($24,$13,$19)
1024	mfhi	($25,$13,$19)
1025	addu	$2,$24
1026	sltu	$1,$2,$24
1027	multu	($12,$21)		# mul_add_c(a[0],b[6],c1,c2,c3);
1028	addu	$25,$1
1029	addu	$3,$25
1030	sltu	$1,$3,$25
1031	addu	$7,$1
1032	mflo	($24,$12,$21)
1033	mfhi	($25,$12,$21)
1034	addu	$2,$24
1035	sltu	$1,$2,$24
1036	 multu	($12,$6)		# mul_add_c(a[0],b[7],c2,c3,c1);
1037	addu	$25,$1
1038	addu	$3,$25
1039	sltu	$1,$3,$25
1040	addu	$7,$1
1041	sw	$2,6*4($4)	# r[6]=c1;
1042
1043	mflo	($24,$12,$6)
1044	mfhi	($25,$12,$6)
1045	addu	$3,$24
1046	sltu	$1,$3,$24
1047	multu	($13,$21)		# mul_add_c(a[1],b[6],c2,c3,c1);
1048	addu	$25,$1
1049	addu	$7,$25
1050	sltu	$2,$7,$25
1051	mflo	($24,$13,$21)
1052	mfhi	($25,$13,$21)
1053	addu	$3,$24
1054	sltu	$1,$3,$24
1055	multu	($14,$19)		# mul_add_c(a[2],b[5],c2,c3,c1);
1056	addu	$25,$1
1057	addu	$7,$25
1058	sltu	$1,$7,$25
1059	addu	$2,$1
1060	mflo	($24,$14,$19)
1061	mfhi	($25,$14,$19)
1062	addu	$3,$24
1063	sltu	$1,$3,$24
1064	multu	($15,$17)		# mul_add_c(a[3],b[4],c2,c3,c1);
1065	addu	$25,$1
1066	addu	$7,$25
1067	sltu	$1,$7,$25
1068	addu	$2,$1
1069	mflo	($24,$15,$17)
1070	mfhi	($25,$15,$17)
1071	addu	$3,$24
1072	sltu	$1,$3,$24
1073	multu	($16,$11)		# mul_add_c(a[4],b[3],c2,c3,c1);
1074	addu	$25,$1
1075	addu	$7,$25
1076	sltu	$1,$7,$25
1077	addu	$2,$1
1078	mflo	($24,$16,$11)
1079	mfhi	($25,$16,$11)
1080	addu	$3,$24
1081	sltu	$1,$3,$24
1082	multu	($18,$10)		# mul_add_c(a[5],b[2],c2,c3,c1);
1083	addu	$25,$1
1084	addu	$7,$25
1085	sltu	$1,$7,$25
1086	addu	$2,$1
1087	mflo	($24,$18,$10)
1088	mfhi	($25,$18,$10)
1089	addu	$3,$24
1090	sltu	$1,$3,$24
1091	multu	($20,$9)		# mul_add_c(a[6],b[1],c2,c3,c1);
1092	addu	$25,$1
1093	addu	$7,$25
1094	sltu	$1,$7,$25
1095	addu	$2,$1
1096	mflo	($24,$20,$9)
1097	mfhi	($25,$20,$9)
1098	addu	$3,$24
1099	sltu	$1,$3,$24
1100	multu	($5,$8)		# mul_add_c(a[7],b[0],c2,c3,c1);
1101	addu	$25,$1
1102	addu	$7,$25
1103	sltu	$1,$7,$25
1104	addu	$2,$1
1105	mflo	($24,$5,$8)
1106	mfhi	($25,$5,$8)
1107	addu	$3,$24
1108	sltu	$1,$3,$24
1109	 multu	($5,$9)		# mul_add_c(a[7],b[1],c3,c1,c2);
1110	addu	$25,$1
1111	addu	$7,$25
1112	sltu	$1,$7,$25
1113	addu	$2,$1
1114	sw	$3,7*4($4)	# r[7]=c2;
1115
1116	mflo	($24,$5,$9)
1117	mfhi	($25,$5,$9)
1118	addu	$7,$24
1119	sltu	$1,$7,$24
1120	multu	($20,$10)		# mul_add_c(a[6],b[2],c3,c1,c2);
1121	addu	$25,$1
1122	addu	$2,$25
1123	sltu	$3,$2,$25
1124	mflo	($24,$20,$10)
1125	mfhi	($25,$20,$10)
1126	addu	$7,$24
1127	sltu	$1,$7,$24
1128	multu	($18,$11)		# mul_add_c(a[5],b[3],c3,c1,c2);
1129	addu	$25,$1
1130	addu	$2,$25
1131	sltu	$1,$2,$25
1132	addu	$3,$1
1133	mflo	($24,$18,$11)
1134	mfhi	($25,$18,$11)
1135	addu	$7,$24
1136	sltu	$1,$7,$24
1137	multu	($16,$17)		# mul_add_c(a[4],b[4],c3,c1,c2);
1138	addu	$25,$1
1139	addu	$2,$25
1140	sltu	$1,$2,$25
1141	addu	$3,$1
1142	mflo	($24,$16,$17)
1143	mfhi	($25,$16,$17)
1144	addu	$7,$24
1145	sltu	$1,$7,$24
1146	multu	($15,$19)		# mul_add_c(a[3],b[5],c3,c1,c2);
1147	addu	$25,$1
1148	addu	$2,$25
1149	sltu	$1,$2,$25
1150	addu	$3,$1
1151	mflo	($24,$15,$19)
1152	mfhi	($25,$15,$19)
1153	addu	$7,$24
1154	sltu	$1,$7,$24
1155	multu	($14,$21)		# mul_add_c(a[2],b[6],c3,c1,c2);
1156	addu	$25,$1
1157	addu	$2,$25
1158	sltu	$1,$2,$25
1159	addu	$3,$1
1160	mflo	($24,$14,$21)
1161	mfhi	($25,$14,$21)
1162	addu	$7,$24
1163	sltu	$1,$7,$24
1164	multu	($13,$6)		# mul_add_c(a[1],b[7],c3,c1,c2);
1165	addu	$25,$1
1166	addu	$2,$25
1167	sltu	$1,$2,$25
1168	addu	$3,$1
1169	mflo	($24,$13,$6)
1170	mfhi	($25,$13,$6)
1171	addu	$7,$24
1172	sltu	$1,$7,$24
1173	 multu	($14,$6)		# mul_add_c(a[2],b[7],c1,c2,c3);
1174	addu	$25,$1
1175	addu	$2,$25
1176	sltu	$1,$2,$25
1177	addu	$3,$1
1178	sw	$7,8*4($4)	# r[8]=c3;
1179
1180	mflo	($24,$14,$6)
1181	mfhi	($25,$14,$6)
1182	addu	$2,$24
1183	sltu	$1,$2,$24
1184	multu	($15,$21)		# mul_add_c(a[3],b[6],c1,c2,c3);
1185	addu	$25,$1
1186	addu	$3,$25
1187	sltu	$7,$3,$25
1188	mflo	($24,$15,$21)
1189	mfhi	($25,$15,$21)
1190	addu	$2,$24
1191	sltu	$1,$2,$24
1192	multu	($16,$19)		# mul_add_c(a[4],b[5],c1,c2,c3);
1193	addu	$25,$1
1194	addu	$3,$25
1195	sltu	$1,$3,$25
1196	addu	$7,$1
1197	mflo	($24,$16,$19)
1198	mfhi	($25,$16,$19)
1199	addu	$2,$24
1200	sltu	$1,$2,$24
1201	multu	($18,$17)		# mul_add_c(a[5],b[4],c1,c2,c3);
1202	addu	$25,$1
1203	addu	$3,$25
1204	sltu	$1,$3,$25
1205	addu	$7,$1
1206	mflo	($24,$18,$17)
1207	mfhi	($25,$18,$17)
1208	addu	$2,$24
1209	sltu	$1,$2,$24
1210	multu	($20,$11)		# mul_add_c(a[6],b[3],c1,c2,c3);
1211	addu	$25,$1
1212	addu	$3,$25
1213	sltu	$1,$3,$25
1214	addu	$7,$1
1215	mflo	($24,$20,$11)
1216	mfhi	($25,$20,$11)
1217	addu	$2,$24
1218	sltu	$1,$2,$24
1219	multu	($5,$10)		# mul_add_c(a[7],b[2],c1,c2,c3);
1220	addu	$25,$1
1221	addu	$3,$25
1222	sltu	$1,$3,$25
1223	addu	$7,$1
1224	mflo	($24,$5,$10)
1225	mfhi	($25,$5,$10)
1226	addu	$2,$24
1227	sltu	$1,$2,$24
1228	 multu	($5,$11)		# mul_add_c(a[7],b[3],c2,c3,c1);
1229	addu	$25,$1
1230	addu	$3,$25
1231	sltu	$1,$3,$25
1232	addu	$7,$1
1233	sw	$2,9*4($4)	# r[9]=c1;
1234
1235	mflo	($24,$5,$11)
1236	mfhi	($25,$5,$11)
1237	addu	$3,$24
1238	sltu	$1,$3,$24
1239	multu	($20,$17)		# mul_add_c(a[6],b[4],c2,c3,c1);
1240	addu	$25,$1
1241	addu	$7,$25
1242	sltu	$2,$7,$25
1243	mflo	($24,$20,$17)
1244	mfhi	($25,$20,$17)
1245	addu	$3,$24
1246	sltu	$1,$3,$24
1247	multu	($18,$19)		# mul_add_c(a[5],b[5],c2,c3,c1);
1248	addu	$25,$1
1249	addu	$7,$25
1250	sltu	$1,$7,$25
1251	addu	$2,$1
1252	mflo	($24,$18,$19)
1253	mfhi	($25,$18,$19)
1254	addu	$3,$24
1255	sltu	$1,$3,$24
1256	multu	($16,$21)		# mul_add_c(a[4],b[6],c2,c3,c1);
1257	addu	$25,$1
1258	addu	$7,$25
1259	sltu	$1,$7,$25
1260	addu	$2,$1
1261	mflo	($24,$16,$21)
1262	mfhi	($25,$16,$21)
1263	addu	$3,$24
1264	sltu	$1,$3,$24
1265	multu	($15,$6)		# mul_add_c(a[3],b[7],c2,c3,c1);
1266	addu	$25,$1
1267	addu	$7,$25
1268	sltu	$1,$7,$25
1269	addu	$2,$1
1270	mflo	($24,$15,$6)
1271	mfhi	($25,$15,$6)
1272	addu	$3,$24
1273	sltu	$1,$3,$24
1274	multu	($16,$6)		# mul_add_c(a[4],b[7],c3,c1,c2);
1275	addu	$25,$1
1276	addu	$7,$25
1277	sltu	$1,$7,$25
1278	addu	$2,$1
1279	sw	$3,10*4($4)	# r[10]=c2;
1280
1281	mflo	($24,$16,$6)
1282	mfhi	($25,$16,$6)
1283	addu	$7,$24
1284	sltu	$1,$7,$24
1285	multu	($18,$21)		# mul_add_c(a[5],b[6],c3,c1,c2);
1286	addu	$25,$1
1287	addu	$2,$25
1288	sltu	$3,$2,$25
1289	mflo	($24,$18,$21)
1290	mfhi	($25,$18,$21)
1291	addu	$7,$24
1292	sltu	$1,$7,$24
1293	multu	($20,$19)		# mul_add_c(a[6],b[5],c3,c1,c2);
1294	addu	$25,$1
1295	addu	$2,$25
1296	sltu	$1,$2,$25
1297	addu	$3,$1
1298	mflo	($24,$20,$19)
1299	mfhi	($25,$20,$19)
1300	addu	$7,$24
1301	sltu	$1,$7,$24
1302	multu	($5,$17)		# mul_add_c(a[7],b[4],c3,c1,c2);
1303	addu	$25,$1
1304	addu	$2,$25
1305	sltu	$1,$2,$25
1306	addu	$3,$1
1307	mflo	($24,$5,$17)
1308	mfhi	($25,$5,$17)
1309	addu	$7,$24
1310	sltu	$1,$7,$24
1311	 multu	($5,$19)		# mul_add_c(a[7],b[5],c1,c2,c3);
1312	addu	$25,$1
1313	addu	$2,$25
1314	sltu	$1,$2,$25
1315	addu	$3,$1
1316	sw	$7,11*4($4)	# r[11]=c3;
1317
1318	mflo	($24,$5,$19)
1319	mfhi	($25,$5,$19)
1320	addu	$2,$24
1321	sltu	$1,$2,$24
1322	multu	($20,$21)		# mul_add_c(a[6],b[6],c1,c2,c3);
1323	addu	$25,$1
1324	addu	$3,$25
1325	sltu	$7,$3,$25
1326	mflo	($24,$20,$21)
1327	mfhi	($25,$20,$21)
1328	addu	$2,$24
1329	sltu	$1,$2,$24
1330	multu	($18,$6)		# mul_add_c(a[5],b[7],c1,c2,c3);
1331	addu	$25,$1
1332	addu	$3,$25
1333	sltu	$1,$3,$25
1334	addu	$7,$1
1335	mflo	($24,$18,$6)
1336	mfhi	($25,$18,$6)
1337	addu	$2,$24
1338	sltu	$1,$2,$24
1339	 multu	($20,$6)		# mul_add_c(a[6],b[7],c2,c3,c1);
1340	addu	$25,$1
1341	addu	$3,$25
1342	sltu	$1,$3,$25
1343	addu	$7,$1
1344	sw	$2,12*4($4)	# r[12]=c1;
1345
1346	mflo	($24,$20,$6)
1347	mfhi	($25,$20,$6)
1348	addu	$3,$24
1349	sltu	$1,$3,$24
1350	multu	($5,$21)		# mul_add_c(a[7],b[6],c2,c3,c1);
1351	addu	$25,$1
1352	addu	$7,$25
1353	sltu	$2,$7,$25
1354	mflo	($24,$5,$21)
1355	mfhi	($25,$5,$21)
1356	addu	$3,$24
1357	sltu	$1,$3,$24
1358	multu	($5,$6)		# mul_add_c(a[7],b[7],c3,c1,c2);
1359	addu	$25,$1
1360	addu	$7,$25
1361	sltu	$1,$7,$25
1362	addu	$2,$1
1363	sw	$3,13*4($4)	# r[13]=c2;
1364
1365	mflo	($24,$5,$6)
1366	mfhi	($25,$5,$6)
1367	addu	$7,$24
1368	sltu	$1,$7,$24
1369	addu	$25,$1
1370	addu	$2,$25
1371	sw	$7,14*4($4)	# r[14]=c3;
1372	sw	$2,15*4($4)	# r[15]=c1;
1373
1374	.set	noreorder
1375	lw	$21,5*4($29)
1376	lw	$20,4*4($29)
1377	lw	$19,3*4($29)
1378	lw	$18,2*4($29)
1379	lw	$17,1*4($29)
1380	lw	$16,0*4($29)
1381	jr	$31
1382	addu $29,6*4
1383.end	bn_mul_comba8
1384
1385.align	5
1386.globl	bn_mul_comba4
1387.ent	bn_mul_comba4
1388bn_mul_comba4:
1389	.set	reorder
1390	lw	$12,0($5)
1391	lw	$8,0($6)
1392	lw	$13,4($5)
1393	lw	$14,2*4($5)
1394	multu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
1395	lw	$15,3*4($5)
1396	lw	$9,4($6)
1397	lw	$10,2*4($6)
1398	lw	$11,3*4($6)
1399	mflo	($2,$12,$8)
1400	mfhi	($3,$12,$8)
1401	sw	$2,0($4)
1402
1403	multu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
1404	mflo	($24,$12,$9)
1405	mfhi	($25,$12,$9)
1406	addu	$3,$24
1407	sltu	$1,$3,$24
1408	multu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
1409	addu	$7,$25,$1
1410	mflo	($24,$13,$8)
1411	mfhi	($25,$13,$8)
1412	addu	$3,$24
1413	sltu	$1,$3,$24
1414	 multu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
1415	addu	$25,$1
1416	addu	$7,$25
1417	sltu	$2,$7,$25
1418	sw	$3,4($4)
1419
1420	mflo	($24,$14,$8)
1421	mfhi	($25,$14,$8)
1422	addu	$7,$24
1423	sltu	$1,$7,$24
1424	multu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
1425	addu	$25,$1
1426	addu	$2,$25
1427	mflo	($24,$13,$9)
1428	mfhi	($25,$13,$9)
1429	addu	$7,$24
1430	sltu	$1,$7,$24
1431	multu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
1432	addu	$25,$1
1433	addu	$2,$25
1434	sltu	$3,$2,$25
1435	mflo	($24,$12,$10)
1436	mfhi	($25,$12,$10)
1437	addu	$7,$24
1438	sltu	$1,$7,$24
1439	 multu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
1440	addu	$25,$1
1441	addu	$2,$25
1442	sltu	$1,$2,$25
1443	addu	$3,$1
1444	sw	$7,2*4($4)
1445
1446	mflo	($24,$12,$11)
1447	mfhi	($25,$12,$11)
1448	addu	$2,$24
1449	sltu	$1,$2,$24
1450	multu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
1451	addu	$25,$1
1452	addu	$3,$25
1453	sltu	$7,$3,$25
1454	mflo	($24,$13,$10)
1455	mfhi	($25,$13,$10)
1456	addu	$2,$24
1457	sltu	$1,$2,$24
1458	multu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
1459	addu	$25,$1
1460	addu	$3,$25
1461	sltu	$1,$3,$25
1462	addu	$7,$1
1463	mflo	($24,$14,$9)
1464	mfhi	($25,$14,$9)
1465	addu	$2,$24
1466	sltu	$1,$2,$24
1467	multu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
1468	addu	$25,$1
1469	addu	$3,$25
1470	sltu	$1,$3,$25
1471	addu	$7,$1
1472	mflo	($24,$15,$8)
1473	mfhi	($25,$15,$8)
1474	addu	$2,$24
1475	sltu	$1,$2,$24
1476	 multu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
1477	addu	$25,$1
1478	addu	$3,$25
1479	sltu	$1,$3,$25
1480	addu	$7,$1
1481	sw	$2,3*4($4)
1482
1483	mflo	($24,$15,$9)
1484	mfhi	($25,$15,$9)
1485	addu	$3,$24
1486	sltu	$1,$3,$24
1487	multu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
1488	addu	$25,$1
1489	addu	$7,$25
1490	sltu	$2,$7,$25
1491	mflo	($24,$14,$10)
1492	mfhi	($25,$14,$10)
1493	addu	$3,$24
1494	sltu	$1,$3,$24
1495	multu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
1496	addu	$25,$1
1497	addu	$7,$25
1498	sltu	$1,$7,$25
1499	addu	$2,$1
1500	mflo	($24,$13,$11)
1501	mfhi	($25,$13,$11)
1502	addu	$3,$24
1503	sltu	$1,$3,$24
1504	 multu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
1505	addu	$25,$1
1506	addu	$7,$25
1507	sltu	$1,$7,$25
1508	addu	$2,$1
1509	sw	$3,4*4($4)
1510
1511	mflo	($24,$14,$11)
1512	mfhi	($25,$14,$11)
1513	addu	$7,$24
1514	sltu	$1,$7,$24
1515	multu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
1516	addu	$25,$1
1517	addu	$2,$25
1518	sltu	$3,$2,$25
1519	mflo	($24,$15,$10)
1520	mfhi	($25,$15,$10)
1521	addu	$7,$24
1522	sltu	$1,$7,$24
1523	 multu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
1524	addu	$25,$1
1525	addu	$2,$25
1526	sltu	$1,$2,$25
1527	addu	$3,$1
1528	sw	$7,5*4($4)
1529
1530	mflo	($24,$15,$11)
1531	mfhi	($25,$15,$11)
1532	addu	$2,$24
1533	sltu	$1,$2,$24
1534	addu	$25,$1
1535	addu	$3,$25
1536	sw	$2,6*4($4)
1537	sw	$3,7*4($4)
1538
1539	.set	noreorder
1540	jr	$31
1541	nop
1542.end	bn_mul_comba4
1543
1544.align	5
1545.globl	bn_sqr_comba8
1546.ent	bn_sqr_comba8
1547bn_sqr_comba8:
1548	.set	reorder
1549	lw	$12,0($5)
1550	lw	$13,4($5)
1551	lw	$14,2*4($5)
1552	lw	$15,3*4($5)
1553
1554	multu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
1555	lw	$8,4*4($5)
1556	lw	$9,5*4($5)
1557	lw	$10,6*4($5)
1558	lw	$11,7*4($5)
1559	mflo	($2,$12,$12)
1560	mfhi	($3,$12,$12)
1561	sw	$2,0($4)
1562
1563	multu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
1564	mflo	($24,$12,$13)
1565	mfhi	($25,$12,$13)
1566	slt	$2,$25,$0
1567	sll	$25,1
1568	 multu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
1569	slt	$6,$24,$0
1570	addu	$25,$6
1571	sll	$24,1
1572	addu	$3,$24
1573	sltu	$1,$3,$24
1574	addu	$7,$25,$1
1575	sw	$3,4($4)
1576	mflo	($24,$14,$12)
1577	mfhi	($25,$14,$12)
1578	addu	$7,$24
1579	sltu	$1,$7,$24
1580	 multu	($13,$13)		# forward multiplication
1581	addu	$7,$24
1582	addu	$1,$25
1583	sltu	$24,$7,$24
1584	addu	$2,$1
1585	addu	$25,$24
1586	sltu	$3,$2,$1
1587	addu	$2,$25
1588	sltu	$25,$2,$25
1589	addu	$3,$25
1590	mflo	($24,$13,$13)
1591	mfhi	($25,$13,$13)
1592	addu	$7,$24
1593	sltu	$1,$7,$24
1594	 multu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
1595	addu	$25,$1
1596	addu	$2,$25
1597	sltu	$1,$2,$25
1598	addu	$3,$1
1599	sw	$7,2*4($4)
1600	mflo	($24,$12,$15)
1601	mfhi	($25,$12,$15)
1602	addu	$2,$24
1603	sltu	$1,$2,$24
1604	 multu	($13,$14)		# forward multiplication
1605	addu	$2,$24
1606	addu	$1,$25
1607	sltu	$24,$2,$24
1608	addu	$3,$1
1609	addu	$25,$24
1610	sltu	$7,$3,$1
1611	addu	$3,$25
1612	sltu	$25,$3,$25
1613	addu	$7,$25
1614	mflo	($24,$13,$14)
1615	mfhi	($25,$13,$14)
1616	addu	$2,$24
1617	sltu	$1,$2,$24
1618	 multu	($8,$12)		# forward multiplication
1619	addu	$2,$24
1620	addu	$1,$25
1621	sltu	$24,$2,$24
1622	addu	$3,$1
1623	addu	$25,$24
1624	sltu	$1,$3,$1
1625	addu	$3,$25
1626	addu	$7,$1
1627	sltu	$25,$3,$25
1628	addu	$7,$25
1629	mflo	($24,$8,$12)
1630	mfhi	($25,$8,$12)
1631	sw	$2,3*4($4)
1632	addu	$3,$24
1633	sltu	$1,$3,$24
1634	 multu	($15,$13)		# forward multiplication
1635	addu	$3,$24
1636	addu	$1,$25
1637	sltu	$24,$3,$24
1638	addu	$7,$1
1639	addu	$25,$24
1640	sltu	$2,$7,$1
1641	addu	$7,$25
1642	sltu	$25,$7,$25
1643	addu	$2,$25
1644	mflo	($24,$15,$13)
1645	mfhi	($25,$15,$13)
1646	addu	$3,$24
1647	sltu	$1,$3,$24
1648	 multu	($14,$14)		# forward multiplication
1649	addu	$3,$24
1650	addu	$1,$25
1651	sltu	$24,$3,$24
1652	addu	$7,$1
1653	addu	$25,$24
1654	sltu	$1,$7,$1
1655	addu	$7,$25
1656	addu	$2,$1
1657	sltu	$25,$7,$25
1658	addu	$2,$25
1659	mflo	($24,$14,$14)
1660	mfhi	($25,$14,$14)
1661	addu	$3,$24
1662	sltu	$1,$3,$24
1663	 multu	($12,$9)		# mul_add_c2(a[0],b[5],c3,c1,c2);
1664	addu	$25,$1
1665	addu	$7,$25
1666	sltu	$1,$7,$25
1667	addu	$2,$1
1668	sw	$3,4*4($4)
1669	mflo	($24,$12,$9)
1670	mfhi	($25,$12,$9)
1671	addu	$7,$24
1672	sltu	$1,$7,$24
1673	 multu	($13,$8)		# forward multiplication
1674	addu	$7,$24
1675	addu	$1,$25
1676	sltu	$24,$7,$24
1677	addu	$2,$1
1678	addu	$25,$24
1679	sltu	$3,$2,$1
1680	addu	$2,$25
1681	sltu	$25,$2,$25
1682	addu	$3,$25
1683	mflo	($24,$13,$8)
1684	mfhi	($25,$13,$8)
1685	addu	$7,$24
1686	sltu	$1,$7,$24
1687	 multu	($14,$15)		# forward multiplication
1688	addu	$7,$24
1689	addu	$1,$25
1690	sltu	$24,$7,$24
1691	addu	$2,$1
1692	addu	$25,$24
1693	sltu	$1,$2,$1
1694	addu	$2,$25
1695	addu	$3,$1
1696	sltu	$25,$2,$25
1697	addu	$3,$25
1698	mflo	($24,$14,$15)
1699	mfhi	($25,$14,$15)
1700	addu	$7,$24
1701	sltu	$1,$7,$24
1702	 multu	($10,$12)		# forward multiplication
1703	addu	$7,$24
1704	addu	$1,$25
1705	sltu	$24,$7,$24
1706	addu	$2,$1
1707	addu	$25,$24
1708	sltu	$1,$2,$1
1709	addu	$2,$25
1710	addu	$3,$1
1711	sltu	$25,$2,$25
1712	addu	$3,$25
1713	mflo	($24,$10,$12)
1714	mfhi	($25,$10,$12)
1715	sw	$7,5*4($4)
1716	addu	$2,$24
1717	sltu	$1,$2,$24
1718	 multu	($9,$13)		# forward multiplication
1719	addu	$2,$24
1720	addu	$1,$25
1721	sltu	$24,$2,$24
1722	addu	$3,$1
1723	addu	$25,$24
1724	sltu	$7,$3,$1
1725	addu	$3,$25
1726	sltu	$25,$3,$25
1727	addu	$7,$25
1728	mflo	($24,$9,$13)
1729	mfhi	($25,$9,$13)
1730	addu	$2,$24
1731	sltu	$1,$2,$24
1732	 multu	($8,$14)		# forward multiplication
1733	addu	$2,$24
1734	addu	$1,$25
1735	sltu	$24,$2,$24
1736	addu	$3,$1
1737	addu	$25,$24
1738	sltu	$1,$3,$1
1739	addu	$3,$25
1740	addu	$7,$1
1741	sltu	$25,$3,$25
1742	addu	$7,$25
1743	mflo	($24,$8,$14)
1744	mfhi	($25,$8,$14)
1745	addu	$2,$24
1746	sltu	$1,$2,$24
1747	 multu	($15,$15)		# forward multiplication
1748	addu	$2,$24
1749	addu	$1,$25
1750	sltu	$24,$2,$24
1751	addu	$3,$1
1752	addu	$25,$24
1753	sltu	$1,$3,$1
1754	addu	$3,$25
1755	addu	$7,$1
1756	sltu	$25,$3,$25
1757	addu	$7,$25
1758	mflo	($24,$15,$15)
1759	mfhi	($25,$15,$15)
1760	addu	$2,$24
1761	sltu	$1,$2,$24
1762	 multu	($12,$11)		# mul_add_c2(a[0],b[7],c2,c3,c1);
1763	addu	$25,$1
1764	addu	$3,$25
1765	sltu	$1,$3,$25
1766	addu	$7,$1
1767	sw	$2,6*4($4)
1768	mflo	($24,$12,$11)
1769	mfhi	($25,$12,$11)
1770	addu	$3,$24
1771	sltu	$1,$3,$24
1772	 multu	($13,$10)		# forward multiplication
1773	addu	$3,$24
1774	addu	$1,$25
1775	sltu	$24,$3,$24
1776	addu	$7,$1
1777	addu	$25,$24
1778	sltu	$2,$7,$1
1779	addu	$7,$25
1780	sltu	$25,$7,$25
1781	addu	$2,$25
1782	mflo	($24,$13,$10)
1783	mfhi	($25,$13,$10)
1784	addu	$3,$24
1785	sltu	$1,$3,$24
1786	 multu	($14,$9)		# forward multiplication
1787	addu	$3,$24
1788	addu	$1,$25
1789	sltu	$24,$3,$24
1790	addu	$7,$1
1791	addu	$25,$24
1792	sltu	$1,$7,$1
1793	addu	$7,$25
1794	addu	$2,$1
1795	sltu	$25,$7,$25
1796	addu	$2,$25
1797	mflo	($24,$14,$9)
1798	mfhi	($25,$14,$9)
1799	addu	$3,$24
1800	sltu	$1,$3,$24
1801	 multu	($15,$8)		# forward multiplication
1802	addu	$3,$24
1803	addu	$1,$25
1804	sltu	$24,$3,$24
1805	addu	$7,$1
1806	addu	$25,$24
1807	sltu	$1,$7,$1
1808	addu	$7,$25
1809	addu	$2,$1
1810	sltu	$25,$7,$25
1811	addu	$2,$25
1812	mflo	($24,$15,$8)
1813	mfhi	($25,$15,$8)
1814	addu	$3,$24
1815	sltu	$1,$3,$24
1816	 multu	($11,$13)		# forward multiplication
1817	addu	$3,$24
1818	addu	$1,$25
1819	sltu	$24,$3,$24
1820	addu	$7,$1
1821	addu	$25,$24
1822	sltu	$1,$7,$1
1823	addu	$7,$25
1824	addu	$2,$1
1825	sltu	$25,$7,$25
1826	addu	$2,$25
1827	mflo	($24,$11,$13)
1828	mfhi	($25,$11,$13)
1829	sw	$3,7*4($4)
1830	addu	$7,$24
1831	sltu	$1,$7,$24
1832	 multu	($10,$14)		# forward multiplication
1833	addu	$7,$24
1834	addu	$1,$25
1835	sltu	$24,$7,$24
1836	addu	$2,$1
1837	addu	$25,$24
1838	sltu	$3,$2,$1
1839	addu	$2,$25
1840	sltu	$25,$2,$25
1841	addu	$3,$25
1842	mflo	($24,$10,$14)
1843	mfhi	($25,$10,$14)
1844	addu	$7,$24
1845	sltu	$1,$7,$24
1846	 multu	($9,$15)		# forward multiplication
1847	addu	$7,$24
1848	addu	$1,$25
1849	sltu	$24,$7,$24
1850	addu	$2,$1
1851	addu	$25,$24
1852	sltu	$1,$2,$1
1853	addu	$2,$25
1854	addu	$3,$1
1855	sltu	$25,$2,$25
1856	addu	$3,$25
1857	mflo	($24,$9,$15)
1858	mfhi	($25,$9,$15)
1859	addu	$7,$24
1860	sltu	$1,$7,$24
1861	 multu	($8,$8)		# forward multiplication
1862	addu	$7,$24
1863	addu	$1,$25
1864	sltu	$24,$7,$24
1865	addu	$2,$1
1866	addu	$25,$24
1867	sltu	$1,$2,$1
1868	addu	$2,$25
1869	addu	$3,$1
1870	sltu	$25,$2,$25
1871	addu	$3,$25
1872	mflo	($24,$8,$8)
1873	mfhi	($25,$8,$8)
1874	addu	$7,$24
1875	sltu	$1,$7,$24
1876	 multu	($14,$11)		# mul_add_c2(a[2],b[7],c1,c2,c3);
1877	addu	$25,$1
1878	addu	$2,$25
1879	sltu	$1,$2,$25
1880	addu	$3,$1
1881	sw	$7,8*4($4)
1882	mflo	($24,$14,$11)
1883	mfhi	($25,$14,$11)
1884	addu	$2,$24
1885	sltu	$1,$2,$24
1886	 multu	($15,$10)		# forward multiplication
1887	addu	$2,$24
1888	addu	$1,$25
1889	sltu	$24,$2,$24
1890	addu	$3,$1
1891	addu	$25,$24
1892	sltu	$7,$3,$1
1893	addu	$3,$25
1894	sltu	$25,$3,$25
1895	addu	$7,$25
1896	mflo	($24,$15,$10)
1897	mfhi	($25,$15,$10)
1898	addu	$2,$24
1899	sltu	$1,$2,$24
1900	 multu	($8,$9)		# forward multiplication
1901	addu	$2,$24
1902	addu	$1,$25
1903	sltu	$24,$2,$24
1904	addu	$3,$1
1905	addu	$25,$24
1906	sltu	$1,$3,$1
1907	addu	$3,$25
1908	addu	$7,$1
1909	sltu	$25,$3,$25
1910	addu	$7,$25
1911	mflo	($24,$8,$9)
1912	mfhi	($25,$8,$9)
1913	addu	$2,$24
1914	sltu	$1,$2,$24
1915	 multu	($11,$15)		# forward multiplication
1916	addu	$2,$24
1917	addu	$1,$25
1918	sltu	$24,$2,$24
1919	addu	$3,$1
1920	addu	$25,$24
1921	sltu	$1,$3,$1
1922	addu	$3,$25
1923	addu	$7,$1
1924	sltu	$25,$3,$25
1925	addu	$7,$25
1926	mflo	($24,$11,$15)
1927	mfhi	($25,$11,$15)
1928	sw	$2,9*4($4)
1929	addu	$3,$24
1930	sltu	$1,$3,$24
1931	 multu	($10,$8)		# forward multiplication
1932	addu	$3,$24
1933	addu	$1,$25
1934	sltu	$24,$3,$24
1935	addu	$7,$1
1936	addu	$25,$24
1937	sltu	$2,$7,$1
1938	addu	$7,$25
1939	sltu	$25,$7,$25
1940	addu	$2,$25
1941	mflo	($24,$10,$8)
1942	mfhi	($25,$10,$8)
1943	addu	$3,$24
1944	sltu	$1,$3,$24
1945	 multu	($9,$9)		# forward multiplication
1946	addu	$3,$24
1947	addu	$1,$25
1948	sltu	$24,$3,$24
1949	addu	$7,$1
1950	addu	$25,$24
1951	sltu	$1,$7,$1
1952	addu	$7,$25
1953	addu	$2,$1
1954	sltu	$25,$7,$25
1955	addu	$2,$25
1956	mflo	($24,$9,$9)
1957	mfhi	($25,$9,$9)
1958	addu	$3,$24
1959	sltu	$1,$3,$24
1960	 multu	($8,$11)		# mul_add_c2(a[4],b[7],c3,c1,c2);
1961	addu	$25,$1
1962	addu	$7,$25
1963	sltu	$1,$7,$25
1964	addu	$2,$1
1965	sw	$3,10*4($4)
1966	mflo	($24,$8,$11)
1967	mfhi	($25,$8,$11)
1968	addu	$7,$24
1969	sltu	$1,$7,$24
1970	 multu	($9,$10)		# forward multiplication
1971	addu	$7,$24
1972	addu	$1,$25
1973	sltu	$24,$7,$24
1974	addu	$2,$1
1975	addu	$25,$24
1976	sltu	$3,$2,$1
1977	addu	$2,$25
1978	sltu	$25,$2,$25
1979	addu	$3,$25
1980	mflo	($24,$9,$10)
1981	mfhi	($25,$9,$10)
1982	addu	$7,$24
1983	sltu	$1,$7,$24
1984	 multu	($11,$9)		# forward multiplication
1985	addu	$7,$24
1986	addu	$1,$25
1987	sltu	$24,$7,$24
1988	addu	$2,$1
1989	addu	$25,$24
1990	sltu	$1,$2,$1
1991	addu	$2,$25
1992	addu	$3,$1
1993	sltu	$25,$2,$25
1994	addu	$3,$25
1995	mflo	($24,$11,$9)
1996	mfhi	($25,$11,$9)
1997	sw	$7,11*4($4)
1998	addu	$2,$24
1999	sltu	$1,$2,$24
2000	 multu	($10,$10)		# forward multiplication
2001	addu	$2,$24
2002	addu	$1,$25
2003	sltu	$24,$2,$24
2004	addu	$3,$1
2005	addu	$25,$24
2006	sltu	$7,$3,$1
2007	addu	$3,$25
2008	sltu	$25,$3,$25
2009	addu	$7,$25
2010	mflo	($24,$10,$10)
2011	mfhi	($25,$10,$10)
2012	addu	$2,$24
2013	sltu	$1,$2,$24
2014	 multu	($10,$11)		# mul_add_c2(a[6],b[7],c2,c3,c1);
2015	addu	$25,$1
2016	addu	$3,$25
2017	sltu	$1,$3,$25
2018	addu	$7,$1
2019	sw	$2,12*4($4)
2020	mflo	($24,$10,$11)
2021	mfhi	($25,$10,$11)
2022	addu	$3,$24
2023	sltu	$1,$3,$24
2024	 multu	($11,$11)		# forward multiplication
2025	addu	$3,$24
2026	addu	$1,$25
2027	sltu	$24,$3,$24
2028	addu	$7,$1
2029	addu	$25,$24
2030	sltu	$2,$7,$1
2031	addu	$7,$25
2032	sltu	$25,$7,$25
2033	addu	$2,$25
2034	mflo	($24,$11,$11)
2035	mfhi	($25,$11,$11)
2036	sw	$3,13*4($4)
2037
2038	addu	$7,$24
2039	sltu	$1,$7,$24
2040	addu	$25,$1
2041	addu	$2,$25
2042	sw	$7,14*4($4)
2043	sw	$2,15*4($4)
2044
2045	.set	noreorder
2046	jr	$31
2047	nop
2048.end	bn_sqr_comba8
2049
2050.align	5
2051.globl	bn_sqr_comba4
2052.ent	bn_sqr_comba4
2053bn_sqr_comba4:
2054	.set	reorder
2055	lw	$12,0($5)
2056	lw	$13,4($5)
2057	multu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
2058	lw	$14,2*4($5)
2059	lw	$15,3*4($5)
2060	mflo	($2,$12,$12)
2061	mfhi	($3,$12,$12)
2062	sw	$2,0($4)
2063
2064	multu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
2065	mflo	($24,$12,$13)
2066	mfhi	($25,$12,$13)
2067	slt	$2,$25,$0
2068	sll	$25,1
2069	 multu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
2070	slt	$6,$24,$0
2071	addu	$25,$6
2072	sll	$24,1
2073	addu	$3,$24
2074	sltu	$1,$3,$24
2075	addu	$7,$25,$1
2076	sw	$3,4($4)
2077	mflo	($24,$14,$12)
2078	mfhi	($25,$14,$12)
2079	addu	$7,$24
2080	sltu	$1,$7,$24
2081	 multu	($13,$13)		# forward multiplication
2082	addu	$7,$24
2083	addu	$1,$25
2084	sltu	$24,$7,$24
2085	addu	$2,$1
2086	addu	$25,$24
2087	sltu	$3,$2,$1
2088	addu	$2,$25
2089	sltu	$25,$2,$25
2090	addu	$3,$25
2091	mflo	($24,$13,$13)
2092	mfhi	($25,$13,$13)
2093	addu	$7,$24
2094	sltu	$1,$7,$24
2095	 multu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
2096	addu	$25,$1
2097	addu	$2,$25
2098	sltu	$1,$2,$25
2099	addu	$3,$1
2100	sw	$7,2*4($4)
2101	mflo	($24,$12,$15)
2102	mfhi	($25,$12,$15)
2103	addu	$2,$24
2104	sltu	$1,$2,$24
2105	 multu	($13,$14)		# forward multiplication
2106	addu	$2,$24
2107	addu	$1,$25
2108	sltu	$24,$2,$24
2109	addu	$3,$1
2110	addu	$25,$24
2111	sltu	$7,$3,$1
2112	addu	$3,$25
2113	sltu	$25,$3,$25
2114	addu	$7,$25
2115	mflo	($24,$13,$14)
2116	mfhi	($25,$13,$14)
2117	addu	$2,$24
2118	sltu	$1,$2,$24
2119	 multu	($15,$13)		# forward multiplication
2120	addu	$2,$24
2121	addu	$1,$25
2122	sltu	$24,$2,$24
2123	addu	$3,$1
2124	addu	$25,$24
2125	sltu	$1,$3,$1
2126	addu	$3,$25
2127	addu	$7,$1
2128	sltu	$25,$3,$25
2129	addu	$7,$25
2130	mflo	($24,$15,$13)
2131	mfhi	($25,$15,$13)
2132	sw	$2,3*4($4)
2133	addu	$3,$24
2134	sltu	$1,$3,$24
2135	 multu	($14,$14)		# forward multiplication
2136	addu	$3,$24
2137	addu	$1,$25
2138	sltu	$24,$3,$24
2139	addu	$7,$1
2140	addu	$25,$24
2141	sltu	$2,$7,$1
2142	addu	$7,$25
2143	sltu	$25,$7,$25
2144	addu	$2,$25
2145	mflo	($24,$14,$14)
2146	mfhi	($25,$14,$14)
2147	addu	$3,$24
2148	sltu	$1,$3,$24
2149	 multu	($14,$15)		# mul_add_c2(a[2],b[3],c3,c1,c2);
2150	addu	$25,$1
2151	addu	$7,$25
2152	sltu	$1,$7,$25
2153	addu	$2,$1
2154	sw	$3,4*4($4)
2155	mflo	($24,$14,$15)
2156	mfhi	($25,$14,$15)
2157	addu	$7,$24
2158	sltu	$1,$7,$24
2159	 multu	($15,$15)		# forward multiplication
2160	addu	$7,$24
2161	addu	$1,$25
2162	sltu	$24,$7,$24
2163	addu	$2,$1
2164	addu	$25,$24
2165	sltu	$3,$2,$1
2166	addu	$2,$25
2167	sltu	$25,$2,$25
2168	addu	$3,$25
2169	mflo	($24,$15,$15)
2170	mfhi	($25,$15,$15)
2171	sw	$7,5*4($4)
2172
2173	addu	$2,$24
2174	sltu	$1,$2,$24
2175	addu	$25,$1
2176	addu	$3,$25
2177	sw	$2,6*4($4)
2178	sw	$3,7*4($4)
2179
2180	.set	noreorder
2181	jr	$31
2182	nop
2183.end	bn_sqr_comba4
2184