1# Test new instructions
2branchpoint:
3
4	.text
5	.global bcl
6bcl:
7	bcl branchpoint
8
9	.text
10	.global bncl
11bncl:
12	bncl branchpoint
13
14	.text
15	.global cmpz
16cmpz:
17	cmpz fp
18
19	.text
20	.global cmpeq
21cmpeq:
22	cmpeq fp, fp
23
24	.text
25	.global maclh1
26maclh1:
27	maclh1 fp, fp
28
29	.text
30	.global macsl0
31msblo:
32	msblo fp, fp
33
34	.text
35	.global mulwu1
36mulwu1:
37	mulwu1 fp, fp
38
39	.text
40	.global macwu1
41macwu1:
42	macwu1 fp, fp
43
44	.text
45	.global sadd
46sadd:
47	sadd
48
49	.text
50	.global satb
51satb:
52	satb fp, fp
53
54
55	.text
56	.global mulhi
57mulhi:
58	mulhi fp, fp, a1
59
60	.text
61	.global mullo
62mullo:
63	mullo fp, fp, a0
64
65	.text
66	.global divh
67divh:
68	divh fp, fp
69
70	.text
71	.global machi
72machi:
73	machi fp, fp, a1
74
75	.text
76	.global maclo
77maclo:
78	maclo fp, fp, a0
79
80	.text
81	.global mvfachi
82mvfachi:
83	mvfachi fp, a1
84
85	.text
86	.global mvfacmi
87mvfacmi:
88	mvfacmi fp, a1
89
90	.text
91	.global mvfaclo
92mvfaclo:
93	mvfaclo fp, a1
94
95	.text
96	.global mvtachi
97mvtachi:
98	mvtachi fp, a1
99
100	.text
101	.global mvtaclo
102mvtaclo:
103	mvtaclo fp, a0
104
105	.text
106	.global rac
107rac:
108	rac a1
109
110	.text
111	.global rac_ds
112rac_ds:
113	rac a1, a0
114
115	.text
116	.global rac_dsi
117rac_dsi:
118	rac a0, a1, #1
119
120	.text
121	.global rach
122rach:
123	rach a1
124
125	.text
126	.global rach_ds
127rach_ds:
128	rach a0, a1
129
130	.text
131	.global rach_dsi
132rach_dsi:
133	rach a1, a0, #2
134
135# Test explicitly parallel and implicitly parallel instructions
136# Including apparent instruction sequence reordering.
137	.text
138	.global bc__add
139bc__add:
140	bc bcl || add fp, fp
141# Use bc.s here as bc is relaxable and thus a nop will be emitted.
142	bc.s bcl
143	add fp, fp
144
145	.text
146	.global bcl__addi
147bcl__addi:
148	bcl bcl || addi fp, #77
149	addi fp, #77
150# Use bcl.s here as bcl is relaxable and thus the parallelization won't happen.
151	bcl.s bcl
152
153	.text
154	.global bl__addv
155bl__addv:
156	bl bcl || addv fp, fp
157	addv fp, fp
158# Use bl.s here as bl is relaxable and thus the parallelization won't happen.
159	bl.s bcl
160
161	.text
162	.global bnc__addx
163bnc__addx:
164	bnc bcl || addx fp, fp
165# Use bnc.s here as bnc is relaxable and thus the parallelization attempt won't
166# happen.  Things still won't be parallelized, but we want this test to try.
167	bnc.s bcl
168	addx fp, fp
169
170	.text
171	.global bncl__and
172bncl__and:
173	bncl bcl || and fp, fp
174	and fp, fp
175	bncl.s bcl
176
177	.text
178	.global bra__cmp
179bra__cmp:
180	bra bcl || cmp fp, fp
181	cmp fp, fp
182# Use bra.s here as bra is relaxable and thus the parallelization won't happen.
183	bra.s bcl
184
185	.text
186	.global jl__cmpeq
187jl__cmpeq:
188	jl fp || cmpeq fp, fp
189	cmpeq fp, fp
190	jl fp
191
192	.text
193	.global jmp__cmpu
194jmp__cmpu:
195	jmp fp || cmpu fp, fp
196	cmpu fp, fp
197	jmp fp
198
199	.text
200	.global ld__cmpz
201ld__cmpz:
202	ld fp, @fp || cmpz r1
203	cmpz r1
204	ld fp, @fp
205
206	.text
207	.global ld__ldi
208ld__ldi:
209	ld fp, @r1+ || ldi r2, #77
210	ld fp, @r1+
211	ldi r2, #77
212
213	.text
214	.global ldb__mv
215ldb__mv:
216	ldb fp, @fp || mv r2, fp
217	ldb fp, @fp
218	mv r2, fp
219
220	.text
221	.global ldh__neg
222ldh__neg:
223	ldh fp, @fp || neg r2, fp
224	ldh fp, @fp
225	neg r2, fp
226
227	.text
228	.global ldub__nop
229ldub__nop:
230	ldub fp, @fp || nop
231	ldub fp, @fp
232	nop
233
234	.text
235	.global lduh__not
236lduh__not:
237	lduh fp, @fp || not r2, fp
238	lduh fp, @fp
239	not r2, fp
240
241	.text
242	.global lock__or
243lock__or:
244	lock fp, @fp || or r2, fp
245	lock fp, @fp
246	or r2, fp
247
248	.text
249	.global mvfc__sub
250mvfc__sub:
251	mvfc fp, cr1 || sub r2, fp
252	mvfc fp, cr1
253	sub r2, fp
254
255	.text
256	.global mvtc__subv
257mvtc__subv:
258	mvtc fp, cr2 || subv r2, fp
259	mvtc fp, cr2
260	subv r2, fp
261
262	.text
263	.global rte__subx
264rte__subx:
265	rte || sub r2, fp
266	rte
267	subx r2, fp
268
269	.text
270	.global sll__xor
271sll__xor:
272	sll fp, r1 || xor r2, fp
273	sll fp, r1
274	xor r2, fp
275
276	.text
277	.global slli__machi
278slli__machi:
279	slli fp, #22 || machi r2, fp
280	slli fp, #22
281	machi r2, fp
282
283	.text
284	.global sra__maclh1
285sra__maclh1:
286	sra fp, fp || maclh1 r2, fp
287	sra fp, fp
288	maclh1 r2, fp
289
290	.text
291	.global srai__maclo
292srai__maclo:
293	srai fp, #22 || maclo r2, fp
294	srai fp, #22
295	maclo r2, fp
296
297	.text
298	.global srl__macwhi
299srl__macwhi:
300	srl fp, fp || macwhi r2, fp
301	srl fp, fp
302	macwhi r2, fp
303
304	.text
305	.global srli__macwlo
306srli__macwlo:
307	srli fp, #22 || macwlo r2, fp
308	srli fp, #22
309	macwlo r2, fp
310
311	.text
312	.global st__macwu1
313st__macwu1:
314	st fp, @fp || macwu1 r2, fp
315	st fp, @fp
316	macwu1 r2, fp
317
318	.text
319	.global st__msblo
320st__msblo:
321	st fp, @+fp || msblo r2, fp
322	st fp, @+fp
323	msblo r2, fp
324
325	.text
326	.global st__mul
327st__mul:
328	st fp, @-fp || mul r2, fp
329	st fp, @-fp
330	mul r2, fp
331
332	.text
333	.global stb__mulhi
334stb__mulhi:
335	stb fp, @fp || mulhi r2, fp
336	stb fp, @fp
337	mulhi r2, fp
338
339	.text
340	.global sth__mullo
341sth__mullo:
342	sth fp, @fp || mullo r2, fp
343	sth fp, @fp
344	mullo r2, fp
345
346	.text
347	.global trap__mulwhi
348trap__mulwhi:
349	trap #2 || mulwhi r2, fp
350	trap #2
351	mulwhi r2, fp
352
353	.text
354	.global unlock__mulwlo
355unlock__mulwlo:
356	unlock fp, @fp || mulwlo r2, fp
357	unlock fp, @fp
358	mulwlo r2, fp
359
360	.text
361	.global add__mulwu1
362add__mulwu1:
363	add fp, fp || mulwu1 r2, fp
364	add fp, fp
365	mulwu1 r2, fp
366
367	.text
368	.global addi__mvfachi
369addi__mvfachi:
370	addi fp, #77 || mvfachi r2, a0
371	addi fp, #77
372	mvfachi r2, a0
373
374	.text
375	.global addv__mvfaclo
376addv__mvfaclo:
377	addv fp, fp || mvfaclo r2, a1
378	addv fp, fp
379	mvfaclo r2, a1
380
381	.text
382	.global addx__mvfacmi
383addx__mvfacmi:
384	addx fp, fp || mvfacmi r2, a0
385	addx fp, fp
386	mvfacmi r2, a0
387
388	.text
389	.global and__mvtachi
390and__mvtachi:
391	and fp, fp || mvtachi r2, a0
392	and fp, fp
393	mvtachi r2, a0
394
395	.text
396	.global cmp__mvtaclo
397cmp__mvtaclo:
398	cmp fp, fp || mvtaclo r2, a0
399	cmp fp, fp
400	mvtaclo r2, a0
401
402	.text
403	.global cmpeq__rac
404cmpeq__rac:
405	cmpeq fp, fp || rac a1
406	cmpeq fp, fp
407	rac a1
408
409	.text
410	.global cmpu__rach
411cmpu__rach:
412	cmpu fp, fp || rach a0, a1
413	cmpu fp, fp
414	rach a1, a1, #1
415
416	.text
417	.global cmpz__sadd
418cmpz__sadd:
419	cmpz fp || sadd
420	cmpz fp
421	sadd
422
423
424
425# Test private instructions
426	.text
427	.global sc
428sc:
429	sc
430	sadd
431
432	.text
433	.global snc
434snc:
435	snc
436	sadd
437
438	.text
439	.global jc
440jc:
441	jc fp
442
443	.text
444	.global jnc
445jnc:
446	jnc fp
447
448	.text
449	.global pcmpbz
450pcmpbz:
451	pcmpbz fp
452
453	.text
454	.global sat
455sat:
456	sat fp, fp
457
458	.text
459	.global sath
460sath:
461	sath fp, fp
462
463
464# Test parallel versions of the private instructions
465
466	.text
467	.global jc__pcmpbz
468jc__pcmpbz:
469	jc fp || pcmpbz fp
470	jc fp
471	pcmpbz fp
472
473	.text
474	.global jnc__ldi
475jnc__ldi:
476	jnc fp || ldi fp, #77
477	jnc fp
478	ldi fp, #77
479
480	.text
481	.global sc__mv
482sc__mv:
483	sc || mv fp, r2
484	sc
485	mv fp, r2
486
487	.text
488	.global snc__neg
489snc__neg:
490	snc || neg fp, r2
491	snc
492	neg fp, r2
493
494# Test automatic and explicit parallelisation of instructions
495	.text
496	.global nop__sadd
497nop__sadd:
498	nop
499	sadd
500
501	.text
502	.global sadd__nop
503sadd__nop:
504	sadd
505	nop
506
507	.text
508	.global sadd__nop_reverse
509sadd__nop_reverse:
510	sadd || nop
511
512	.text
513	.global add__not
514add__not:
515	add  r0, r1
516	not  r3, r5
517
518	.text
519	.global add__not__dest_clash
520add__not_dest_clash:
521	add  r3, r4
522	not  r3, r5
523
524	.text
525	.global add__not__src_clash
526add__not__src_clash:
527	add  r3, r4
528	not  r5, r3
529
530	.text
531	.global add__not__no_clash
532add__not__no_clash:
533	add  r3, r4
534	not  r4, r5
535
536	.text
537	.global mul__sra
538mul__sra:
539	mul  r1, r2
540	sra  r3, r4
541
542	.text
543	.global mul__sra__reverse_src_clash
544mul__sra__reverse_src_clash:
545	mul  r1, r3
546	sra  r3, r4
547
548	.text
549	.global bc__add_
550bc__add_:
551	bc.s label
552	add r1, r2
553
554	.text
555	.global add__bc
556add__bc:
557	add r3, r4
558	bc.s  label
559
560	.text
561	.global bc__add__forced_parallel
562bc__add__forced_parallel:
563	bc label || add r5, r6
564
565	.text
566	.global add__bc__forced_parallel
567add__bc__forced_parallel:
568	add r7, r8 || bc label
569label:
570	nop
571
572; Additional testcases.
573; These insns were added to the chip later.
574
575	.text
576mulwhi:
577	mulwhi fp, fp, a0
578	mulwhi fp, fp, a1
579
580mulwlo:
581	mulwlo fp, fp, a0
582	mulwlo fp, fp, a1
583
584macwhi:
585	macwhi fp, fp, a0
586	macwhi fp, fp, a1
587
588macwlo:
589	macwlo fp, fp, a0
590	macwlo fp, fp, a1
591