1#mach: crisv32
2#output: Basic clock cycles, total @: *\n
3#output: Memory source stall cycles: 82\n
4#output: Memory read-after-write stall cycles: 0\n
5#output: Movem source stall cycles: 6\n
6#output: Movem destination stall cycles: 880\n
7#output: Movem address stall cycles: 4\n
8#output: Multiplication source stall cycles: 18\n
9#output: Jump source stall cycles: 6\n
10#output: Branch misprediction stall cycles: 0\n
11#output: Jump target stall cycles: 0\n
12#sim: --cris-cycles=basic
13
14 .include "testutils.inc"
15
16; Macros for testing correctness of movem destination stall
17; cycles for various insn types.  Beware: macro parameters can
18; be comma or space-delimited.  There are problems (i.e. bugs)
19; with using space-delimited operands and operands with
20; non-alphanumeric characters, like "[]-." so use comma for
21; them.  Lots of trouble passing empty parameters and parameters
22; with comma.  Ugh.  FIXME: Report bugs, fix bugs, fix other
23; shortcomings, fix that darn old macro-parameter-in-string.
24
25; Helper macro.  Unfortunately I find no cleaner way to unify
26; one and two-operand cases, the main problem being the comma
27; operand delimiter clashing with macro operand delimiter.
28 .macro t_S_x_y S insn x y=none
29 movem [r7],r6
30 .ifc \y,none
31  .ifc \S,none
32   \insn \x
33  .else
34   \insn\S \x
35  .endif
36 .else
37  .ifc \S,none
38   \insn \x,\y
39  .else
40   \insn\S \x,\y
41  .endif
42 .endif
43 nop
44 nop
45 nop
46 .endm
47
48; An insn-type that has a single register operand.  The register
49; may or may not be a source register for the insn.
50 .macro t_r insn
51 t_S_x_y none,\insn,r3
52 t_S_x_y none,\insn,r8
53 .endm
54
55; An insn-type that jumps to the destination of the register.
56 .macro t_r_j insn
57 move.d 0f,r7
58 move.d 1f,r8
59 move.d r8,r9
60 nop
61 nop
62 nop
63 .section ".rodata"
64 .p2align 5
650:
66 .dword 1f
67 .dword 1f
68 .dword 1f
69 .dword 1f
70 .dword 1f
71 .dword 1f
72 .dword 1f
73 .previous
74 t_r \insn
751:
76 .endm
77
78; An insn-type that has a size-modifier and two register
79; operands.
80 .macro t_xr_r S insn
81 t_S_x_y \S \insn r3 r8
82 t_S_x_y \S \insn r8 r3
83 move.d r3,r9
84 t_S_x_y \S \insn r4 r3
85 t_S_x_y \S \insn r8 r9
86 .endm
87
88; An insn-type that has two register operands.
89 .macro t_r_r insn
90 t_xr_r none \insn
91 .endm
92
93; An t_r_rx insn with a byte or word-size modifier.
94 .macro t_wbr_r insn
95 t_xr_r .b,\insn
96 t_xr_r .w,\insn
97 .endm
98
99; Ditto with a dword-size modifier.
100 .macro t_dwbr_r insn
101 t_xr_r .d,\insn
102 t_wbr_r \insn
103 .endm
104
105; An insn-type that has a size-modifier, a constant and a
106; register operand.
107 .macro t_xc_r S insn
108 t_S_x_y \S \insn 24 r3
109 move.d r3,r9
110 t_S_x_y \S \insn 24 r8
111 .endm
112
113; An insn-type that has a constant and a register operand.
114 .macro t_c_r insn
115 t_xc_r none \insn
116 .endm
117
118; An t_c_r insn with a byte or word-size modifier.
119 .macro t_wbc_r insn
120 t_xc_r .b,\insn
121 t_xc_r .w,\insn
122 .endm
123
124; Ditto with a dword-size modifier.
125 .macro t_dwbc_r insn
126 t_xc_r .d,\insn
127 t_wbc_r \insn
128 .endm
129
130; An insn-type that has size-modifier, a memory operand and a
131; register operand.
132 .macro t_xm_r S insn
133 move.d 9b,r8
134 t_S_x_y \S,\insn,[r4],r3
135 move.d r3,r9
136 t_S_x_y \S,\insn,[r8],r5
137 move.d r5,r9
138 t_S_x_y \S,\insn,[r3],r9
139 t_S_x_y \S,\insn,[r8],r9
140 .endm
141
142; Ditto, to memory.
143 .macro t_xr_m S insn
144 move.d 9b,r8
145 t_S_x_y \S,\insn,r3,[r4]
146 t_S_x_y \S,\insn,r8,[r3]
147 t_S_x_y \S,\insn,r3,[r8]
148 t_S_x_y \S,\insn,r9,[r8]
149 .endm
150
151; An insn-type that has a memory operand and a register operand.
152 .macro t_m_r insn
153 t_xm_r none \insn
154 .endm
155
156; An t_m_r insn with a byte or word-size modifier.
157 .macro t_wbm_r insn
158 t_xm_r .b,\insn
159 t_xm_r .w,\insn
160 .endm
161
162; Ditto with a dword-size modifier.
163 .macro t_dwbm_r insn
164 t_xm_r .d,\insn
165 t_wbm_r \insn
166 .endm
167
168; Insn types of the regular type (r, c, m, size d w b).
169 .macro t_dwb insn
170 t_dwbr_r \insn
171 t_dwbc_r \insn
172 t_dwbm_r \insn
173 .endm
174
175; Similar, sizes w b.
176 .macro t_wb insn
177 t_wbr_r \insn
178 t_wbc_r \insn
179 t_wbm_r \insn
180 .endm
181
182;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
183
184 startnostack
185
186; Initialize registers so they don't contain unknowns.
187
188 move.d 9f,r7
189 move.d r7,r8
190 moveq 0,r9
191
192; Movem source area.  Register contents must be valid
193; addresses, aligned on a cache boundary.
194 .section ".rodata"
195 .p2align 5
1969:
197 .dword 9b
198 .dword 9b
199 .dword 9b
200 .dword 9b
201 .dword 9b
202 .dword 9b
203 .dword 9b
204 .dword 9b
205 .dword 9b
206 .dword 9b
207 .previous
208
209; The actual tests.  The numbers in the comments specify the
210; number of movem destination stall cycles.  Some of them may be
211; filed as memory source address stalls, multiplication source
212; stalls or jump source stalls, duly marked so.
213
214 t_r_r abs		; 3+3
215
216 t_dwb add		; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
217
218 t_r_r addc		; (3+3+3)
219 t_c_r addc		; 3
220 t_m_r addc		; (3+3+3) (2 mem src)
221
222 t_dwb move		; (3+3)+(3+3+3)*2+3*2+(3+3+3)*3 (6 mem src)
223 t_xr_m .b move		; 3+3+3 (2 mem src)
224 t_xr_m .w move		; 3+3+3 (2 mem src)
225 t_xr_m .d move		; 3+3+3 (2 mem src)
226
227 t_S_x_y none addi r3.b r8	; 3
228 t_S_x_y none addi r8.w r3	; 3
229 t_S_x_y none addi r4.d r3	; 3
230 t_S_x_y none addi r8.w r9
231
232 ; Addo has three-operand syntax, so we have to expand (a useful
233 ; subset of) "t_dwb".
234 t_S_x_y none addi r3.b "r8,acr"	; 3
235 t_S_x_y none addi r8.w "r3,acr"	; 3
236 t_S_x_y none addi r4.d "r3,acr"	; 3
237 t_S_x_y none addi r8.w "r9,acr"
238
239 t_S_x_y .b addo 42 "r8,acr"
240 t_S_x_y .w addo 4200 "r3,acr"		; 3
241 t_S_x_y .d addo 420000 "r3,acr"	; 3
242
243 move.d 9b,r8
244 t_S_x_y .d,addo,[r4],"r3,acr"		; 3 (1 mem src)
245 t_S_x_y .b,addo,[r3],"r8,acr"		; 3 (1 mem src)
246 t_S_x_y .w,addo,[r8],"r3,acr"		; 3
247 t_S_x_y .w,addo,[r8],"r9,acr"
248
249 ; Similar for addoq.
250 t_S_x_y none addoq 42 "r8,acr"
251 t_S_x_y none addoq 42 "r3,acr"		; 3
252
253 t_c_r addq				; 3
254
255 t_wb adds		; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
256 t_wb addu		; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
257
258 t_dwb and		; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
259 t_c_r andq		; 3
260
261 t_dwbr_r asr		; (3+3+3)*3
262 t_c_r asrq		; 3
263
264 t_dwbr_r bound		; (3+3+3)*3
265 t_dwbc_r bound		; 3*3
266
267 t_r_r btst		; (3+3+3)
268 t_c_r btstq		; 3
269
270 t_dwb cmp		; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
271 t_c_r cmpq		; 3
272
273 t_wbc_r cmps		; 3*2
274 t_wbc_r cmpu		; 3*2
275 t_wbm_r cmps		; (3+3+3)*2 (4 mem src)
276 t_wbm_r cmpu		; (3+3+3)*2 (4 mem src)
277
278 t_r_r dstep		; (3+3+3)
279
280 ; FIXME: idxd, fidxi, ftagd, ftagi when supported.
281
282 t_r_j jsr		; 3 (2 jump src)
283 t_r_j jump		; 3 (2 jump src)
284
285 t_c_r lapc.d
286
287; The "quick operand" must be in range [. to .+15*2] so we can't
288; use t_c_r.
289 t_S_x_y none lapcq .+4 r3
290 t_S_x_y none lapcq .+4 r8
291
292 t_dwbr_r lsl		; (3+3+3)*3
293 t_c_r lslq		; 3
294
295 t_dwbr_r lsr		; (3+3+3)*3
296 t_c_r lsrq		; 3
297
298 t_r_r lz		; 3+3
299
300 t_S_x_y none mcp srp r3	; 3
301 t_S_x_y none mcp srp r8
302
303 t_c_r moveq
304
305 t_S_x_y none move srp r8
306 t_S_x_y none move srp r3
307 t_S_x_y none move r8 srp
308 t_S_x_y none move r3 srp	; 3
309
310; FIXME: move supreg,Rd and move Rs,supreg when supported.
311
312 t_wb movs	; (3+3)*2+0+(3+3)*2 (4 mem src)
313 t_wb movu	; (3+3)*2+0+(3+3)*2 (4 mem src)
314
315 t_dwbr_r muls	; (3+3+3)*3 (9 mul src)
316 t_dwbr_r mulu	; (3+3+3)*3 (9 mul src)
317
318 t_dwbr_r neg	; (3+3)*3
319
320 t_r not	; 3 cycles.
321
322 t_dwb or	; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
323 t_c_r orq	; 3
324
325 t_r seq
326
327 t_dwb sub	; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
328 t_c_r subq	; 3
329
330 t_wb subs	; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
331 t_wb subu	; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
332
333 t_r swapw	; 3 cycles.
334 t_r swapnwbr	; 3 cycles.
335
336 t_r_j jsrc	; 3 (2 jump src)
337
338 t_r_r xor	; (3+3+3)
339
340 move.d 9b,r7
341 nop
342 nop
343 nop
344 t_xm_r none movem	; (3+3) (2 mem src, 1+1 movem addr)
345 ; As implied by the comment, all movem destination penalty
346 ; cycles (but one) are accounted for as memory source address
347 ; and movem source penalties.  There are also two movem address
348 ; cache-line straddle penalties.
349 t_xr_m none movem	; (3+3+2+2) (2 mem, 6 movem src, +2 movem addr)
350
351 break 15
352