1/* This assember is for R2000/R3000 machines, or higher ones that do
2 * no want to do any 64 bit arithmatic.
3 * Make sure that the SSLeay bignum library is compiled with
4 * THIRTY_TWO_BIT set.
5 * This must either be compiled with the system CC, or, if you use GNU gas,
6 * cc -E mips1.s|gas -o mips1.o
7 */
8	.set	reorder
9	.set	noat
10
11#define R1	$1
12#define CC	$2
13#define	R2	$3
14#define R3	$8
15#define R4	$9
16#define L1	$10
17#define L2 	$11
18#define L3	$12
19#define L4 	$13
20#define H1 	$14
21#define H2	$15
22#define H3	$24
23#define H4	$25
24
25#define P1	$4
26#define P2	$5
27#define P3	$6
28#define P4	$7
29
30	.align	2
31	.ent	bn_mul_add_words
32	.globl	bn_mul_add_words
33.text
34bn_mul_add_words:
35	.frame	$sp,0,$31
36	.mask	0x00000000,0
37	.fmask	0x00000000,0
38
39	#blt	P3,4,$lab34
40
41	subu	R1,P3,4
42	move	CC,$0
43	bltz	R1,$lab34
44$lab2:
45	lw	R1,0(P1)
46	 lw	L1,0(P2)
47	lw	R2,4(P1)
48	 lw	L2,4(P2)
49	lw	R3,8(P1)
50	 lw	L3,8(P2)
51	lw	R4,12(P1)
52	 lw	L4,12(P2)
53	multu	L1,P4
54	 addu	R1,R1,CC
55	mflo	L1
56	 sltu	CC,R1,CC
57	addu	R1,R1,L1
58	 mfhi	H1
59	sltu	L1,R1,L1
60	 sw	R1,0(P1)
61	addu	CC,CC,L1
62	 multu	L2,P4
63	addu	CC,H1,CC
64	mflo	L2
65	 addu	R2,R2,CC
66	sltu	CC,R2,CC
67	 mfhi	H2
68	addu	R2,R2,L2
69	 addu	P2,P2,16
70	sltu	L2,R2,L2
71	 sw	R2,4(P1)
72	addu	CC,CC,L2
73	 multu	L3,P4
74	addu	CC,H2,CC
75	mflo	L3
76	 addu	R3,R3,CC
77	sltu	CC,R3,CC
78	 mfhi	H3
79	addu	R3,R3,L3
80	 addu	P1,P1,16
81	sltu	L3,R3,L3
82	 sw	R3,-8(P1)
83	addu	CC,CC,L3
84	 multu	L4,P4
85	addu	CC,H3,CC
86	mflo	L4
87	 addu	R4,R4,CC
88	sltu	CC,R4,CC
89	 mfhi	H4
90	addu	R4,R4,L4
91	 subu	P3,P3,4
92	sltu	L4,R4,L4
93	addu	CC,CC,L4
94	addu	CC,H4,CC
95
96	subu	R1,P3,4
97	sw	R4,-4(P1)	# delay slot
98	bgez	R1,$lab2
99
100	bleu	P3,0,$lab3
101	.align	2
102$lab33:
103	lw	L1,0(P2)
104	 lw	R1,0(P1)
105	multu	L1,P4
106	 addu	R1,R1,CC
107	sltu	CC,R1,CC
108	 addu	P1,P1,4
109	mflo	L1
110	 mfhi	H1
111	addu	R1,R1,L1
112	 addu	P2,P2,4
113	sltu	L1,R1,L1
114	 subu	P3,P3,1
115	addu	CC,CC,L1
116	 sw	R1,-4(P1)
117	addu	CC,H1,CC
118	 bgtz	P3,$lab33
119	j	$31
120	.align	2
121$lab3:
122	j	$31
123	.align	2
124$lab34:
125	bgt	P3,0,$lab33
126	j	$31
127	.end	bn_mul_add_words
128
129	.align	2
130	# Program Unit: bn_mul_words
131	.ent	bn_mul_words
132	.globl	bn_mul_words
133.text
134bn_mul_words:
135	.frame	$sp,0,$31
136	.mask	0x00000000,0
137	.fmask	0x00000000,0
138
139	subu	P3,P3,4
140	move	CC,$0
141	bltz	P3,$lab45
142$lab44:
143	lw	L1,0(P2)
144	 lw	L2,4(P2)
145	lw	L3,8(P2)
146	 lw	L4,12(P2)
147	multu	L1,P4
148	 subu	P3,P3,4
149	mflo	L1
150	 mfhi	H1
151	addu	L1,L1,CC
152	 multu	L2,P4
153	sltu	CC,L1,CC
154	 sw	L1,0(P1)
155	addu	CC,H1,CC
156	 mflo	L2
157	mfhi	H2
158	 addu	L2,L2,CC
159	multu	L3,P4
160	 sltu	CC,L2,CC
161	sw	L2,4(P1)
162	 addu	CC,H2,CC
163	mflo	L3
164	 mfhi	H3
165	addu	L3,L3,CC
166	 multu	L4,P4
167	sltu	CC,L3,CC
168	 sw	L3,8(P1)
169	addu	CC,H3,CC
170	 mflo	L4
171	mfhi	H4
172	 addu	L4,L4,CC
173	addu	P1,P1,16
174	 sltu	CC,L4,CC
175	addu	P2,P2,16
176	 addu	CC,H4,CC
177	sw	L4,-4(P1)
178
179	bgez	P3,$lab44
180	b	$lab45
181$lab46:
182	lw	L1,0(P2)
183	 addu	P1,P1,4
184	multu	L1,P4
185	 addu	P2,P2,4
186	mflo	L1
187	 mfhi	H1
188	addu	L1,L1,CC
189	 subu	P3,P3,1
190	sltu	CC,L1,CC
191	 sw	L1,-4(P1)
192	addu	CC,H1,CC
193	 bgtz	P3,$lab46
194	j	$31
195$lab45:
196	addu	P3,P3,4
197	bgtz	P3,$lab46
198	j	$31
199	.align	2
200	.end	bn_mul_words
201
202	# Program Unit: bn_sqr_words
203	.ent	bn_sqr_words
204	.globl	bn_sqr_words
205.text
206bn_sqr_words:
207	.frame	$sp,0,$31
208	.mask	0x00000000,0
209	.fmask	0x00000000,0
210
211	subu	P3,P3,4
212	bltz	P3,$lab55
213$lab54:
214	lw	L1,0(P2)
215	 lw	L2,4(P2)
216	lw	L3,8(P2)
217	 lw	L4,12(P2)
218
219	multu	L1,L1
220	 subu	P3,P3,4
221	mflo	L1
222	 mfhi	H1
223	sw	L1,0(P1)
224	 sw	H1,4(P1)
225
226	multu	L2,L2
227	 addu	P1,P1,32
228	mflo	L2
229	 mfhi	H2
230	sw	L2,-24(P1)
231	 sw	H2,-20(P1)
232
233	multu	L3,L3
234	 addu	P2,P2,16
235	mflo	L3
236	 mfhi	H3
237	sw	L3,-16(P1)
238	 sw	H3,-12(P1)
239
240	multu	L4,L4
241
242	mflo	L4
243	 mfhi	H4
244	sw	L4,-8(P1)
245	 sw	H4,-4(P1)
246
247	bgtz	P3,$lab54
248	b	$lab55
249$lab56:
250	lw	L1,0(P2)
251	addu	P1,P1,8
252	multu	L1,L1
253	addu	P2,P2,4
254	subu	P3,P3,1
255	mflo	L1
256	mfhi	H1
257	sw	L1,-8(P1)
258	sw	H1,-4(P1)
259
260	bgtz	P3,$lab56
261	j	$31
262$lab55:
263	addu	P3,P3,4
264	bgtz	P3,$lab56
265	j	$31
266	.align	2
267	.end	bn_sqr_words
268
269	# Program Unit: bn_add_words
270	.ent	bn_add_words
271	.globl	bn_add_words
272.text
273bn_add_words: 	 # 0x590
274	.frame	$sp,0,$31
275	.mask	0x00000000,0
276	.fmask	0x00000000,0
277
278	subu	P4,P4,4
279	move	CC,$0
280	bltz	P4,$lab65
281$lab64:
282	lw	L1,0(P2)
283	lw	R1,0(P3)
284	lw	L2,4(P2)
285	lw	R2,4(P3)
286
287	addu	L1,L1,CC
288	 lw	L3,8(P2)
289	sltu	CC,L1,CC
290	 addu	L1,L1,R1
291	sltu	R1,L1,R1
292	 lw	R3,8(P3)
293	addu	CC,CC,R1
294	 lw	L4,12(P2)
295
296	addu	L2,L2,CC
297	 lw	R4,12(P3)
298	sltu	CC,L2,CC
299	 addu	L2,L2,R2
300	sltu	R2,L2,R2
301	 sw	L1,0(P1)
302	addu	CC,CC,R2
303	 addu	P1,P1,16
304	addu	L3,L3,CC
305	 sw	L2,-12(P1)
306
307	sltu	CC,L3,CC
308	 addu	L3,L3,R3
309	sltu	R3,L3,R3
310	 addu	P2,P2,16
311	addu	CC,CC,R3
312
313	addu	L4,L4,CC
314	 addu	P3,P3,16
315	sltu	CC,L4,CC
316	 addu	L4,L4,R4
317	subu	P4,P4,4
318	 sltu	R4,L4,R4
319	sw	L3,-8(P1)
320	 addu	CC,CC,R4
321	sw	L4,-4(P1)
322
323	bgtz	P4,$lab64
324	b	$lab65
325$lab66:
326	lw	L1,0(P2)
327	 lw	R1,0(P3)
328	addu	L1,L1,CC
329	 addu	P1,P1,4
330	sltu	CC,L1,CC
331	 addu	P2,P2,4
332	addu	P3,P3,4
333	 addu	L1,L1,R1
334	subu	P4,P4,1
335	 sltu	R1,L1,R1
336	sw	L1,-4(P1)
337	 addu	CC,CC,R1
338
339	bgtz	P4,$lab66
340	j	$31
341$lab65:
342	addu	P4,P4,4
343	bgtz	P4,$lab66
344	j	$31
345	.end	bn_add_words
346
347	# Program Unit: bn_div64
348	.set	at
349	.set	reorder
350	.text
351	.align	2
352	.globl	bn_div64
353 # 321		{
354	.ent	bn_div64 2
355bn_div64:
356	subu	$sp, 64
357	sw	$31, 56($sp)
358	sw	$16, 48($sp)
359	.mask	0x80010000, -56
360	.frame	$sp, 64, $31
361	move	$9, $4
362	move	$12, $5
363	move	$16, $6
364 # 322		BN_ULONG dh,dl,q,ret=0,th,tl,t;
365	move	$31, $0
366 # 323		int i,count=2;
367	li	$13, 2
368 # 324
369 # 325		if (d == 0) return(BN_MASK2);
370	bne	$16, 0, $80
371	li	$2, -1
372	b	$93
373$80:
374 # 326
375 # 327		i=BN_num_bits_word(d);
376	move	$4, $16
377	sw	$31, 16($sp)
378	sw	$9, 24($sp)
379	sw	$12, 32($sp)
380	sw	$13, 40($sp)
381	.livereg	0x800ff0e,0xfff
382	jal	BN_num_bits_word
383	li	$4, 32
384	lw	$31, 16($sp)
385	lw	$9, 24($sp)
386	lw	$12, 32($sp)
387	lw	$13, 40($sp)
388	move	$3, $2
389 # 328		if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
390	beq	$2, $4, $81
391	li	$14, 1
392	sll	$15, $14, $2
393	bleu	$9, $15, $81
394 # 329			{
395 # 330	#if !defined(NO_STDIO) && !defined(WIN16)
396 # 331			fprintf(stderr,"Division would overflow (%d)\n",i);
397 # 332	#endif
398 # 333			abort();
399	sw	$3, 8($sp)
400	sw	$9, 24($sp)
401	sw	$12, 32($sp)
402	sw	$13, 40($sp)
403	sw	$31, 26($sp)
404	.livereg	0xff0e,0xfff
405	jal	abort
406	lw	$3, 8($sp)
407	li	$4, 32
408	lw	$9, 24($sp)
409	lw	$12, 32($sp)
410	lw	$13, 40($sp)
411	lw	$31, 26($sp)
412 # 334			}
413$81:
414 # 335		i=BN_BITS2-i;
415	subu	$3, $4, $3
416 # 336		if (h >= d) h-=d;
417	bltu	$9, $16, $82
418	subu	$9, $9, $16
419$82:
420 # 337
421 # 338		if (i)
422	beq	$3, 0, $83
423 # 339			{
424 # 340			d<<=i;
425	sll	$16, $16, $3
426 # 341			h=(h<<i)|(l>>(BN_BITS2-i));
427	sll	$24, $9, $3
428	subu	$25, $4, $3
429	srl	$14, $12, $25
430	or	$9, $24, $14
431 # 342			l<<=i;
432	sll	$12, $12, $3
433 # 343			}
434$83:
435 # 344		dh=(d&BN_MASK2h)>>BN_BITS4;
436 # 345		dl=(d&BN_MASK2l);
437	and	$8, $16, -65536
438	srl	$8, $8, 16
439	and	$10, $16, 65535
440	li	$6, -65536
441$84:
442 # 346		for (;;)
443 # 347			{
444 # 348			if ((h>>BN_BITS4) == dh)
445	srl	$15, $9, 16
446	bne	$8, $15, $85
447 # 349				q=BN_MASK2l;
448	li	$5, 65535
449	b	$86
450$85:
451 # 350			else
452 # 351				q=h/dh;
453	divu	$5, $9, $8
454$86:
455 # 352
456 # 353			for (;;)
457 # 354				{
458 # 355				t=(h-q*dh);
459	mul	$4, $5, $8
460	subu	$2, $9, $4
461	move	$3, $2
462 # 356				if ((t&BN_MASK2h) ||
463 # 357					((dl*q) <= (
464 # 358						(t<<BN_BITS4)+
465 # 359						((l&BN_MASK2h)>>BN_BITS4))))
466	and	$25, $2, $6
467	bne	$25, $0, $87
468	mul	$24, $10, $5
469	sll	$14, $3, 16
470	and	$15, $12, $6
471	srl	$25, $15, 16
472	addu	$15, $14, $25
473	bgtu	$24, $15, $88
474$87:
475 # 360					break;
476	mul	$3, $10, $5
477	b	$89
478$88:
479 # 361				q--;
480	addu	$5, $5, -1
481 # 362				}
482	b	$86
483$89:
484 # 363			th=q*dh;
485 # 364			tl=q*dl;
486 # 365			t=(tl>>BN_BITS4);
487 # 366			tl=(tl<<BN_BITS4)&BN_MASK2h;
488	sll	$14, $3, 16
489	and	$2, $14, $6
490	move	$11, $2
491 # 367			th+=t;
492	srl	$25, $3, 16
493	addu	$7, $4, $25
494 # 368
495 # 369			if (l < tl) th++;
496	bgeu	$12, $2, $90
497	addu	$7, $7, 1
498$90:
499 # 370			l-=tl;
500	subu	$12, $12, $11
501 # 371			if (h < th)
502	bgeu	$9, $7, $91
503 # 372				{
504 # 373				h+=d;
505	addu	$9, $9, $16
506 # 374				q--;
507	addu	$5, $5, -1
508 # 375				}
509$91:
510 # 376			h-=th;
511	subu	$9, $9, $7
512 # 377
513 # 378			if (--count == 0) break;
514	addu	$13, $13, -1
515	beq	$13, 0, $92
516 # 379
517 # 380			ret=q<<BN_BITS4;
518	sll	$31, $5, 16
519 # 381			h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
520	sll	$24, $9, 16
521	srl	$15, $12, 16
522	or	$9, $24, $15
523 # 382			l=(l&BN_MASK2l)<<BN_BITS4;
524	and	$12, $12, 65535
525	sll	$12, $12, 16
526 # 383			}
527	b	$84
528$92:
529 # 384		ret|=q;
530	or	$31, $31, $5
531 # 385		return(ret);
532	move	$2, $31
533$93:
534	lw	$16, 48($sp)
535	lw	$31, 56($sp)
536	addu	$sp, 64
537	j	$31
538	.end	bn_div64
539
540