1#mach: crisv32 2#output: Basic clock cycles, total @: *\n 3#output: Memory source stall cycles: 82\n 4#output: Memory read-after-write stall cycles: 0\n 5#output: Movem source stall cycles: 6\n 6#output: Movem destination stall cycles: 880\n 7#output: Movem address stall cycles: 4\n 8#output: Multiplication source stall cycles: 18\n 9#output: Jump source stall cycles: 6\n 10#output: Branch misprediction stall cycles: 0\n 11#output: Jump target stall cycles: 0\n 12#sim: --cris-cycles=basic 13 14 .include "testutils.inc" 15 16; Macros for testing correctness of movem destination stall 17; cycles for various insn types. Beware: macro parameters can 18; be comma or space-delimited. There are problems (i.e. bugs) 19; with using space-delimited operands and operands with 20; non-alphanumeric characters, like "[]-." so use comma for 21; them. Lots of trouble passing empty parameters and parameters 22; with comma. Ugh. FIXME: Report bugs, fix bugs, fix other 23; shortcomings, fix that darn old macro-parameter-in-string. 24 25; Helper macro. Unfortunately I find no cleaner way to unify 26; one and two-operand cases, the main problem being the comma 27; operand delimiter clashing with macro operand delimiter. 28 .macro t_S_x_y S insn x y=none 29 movem [r7],r6 30 .ifc \y,none 31 .ifc \S,none 32 \insn \x 33 .else 34 \insn\S \x 35 .endif 36 .else 37 .ifc \S,none 38 \insn \x,\y 39 .else 40 \insn\S \x,\y 41 .endif 42 .endif 43 nop 44 nop 45 nop 46 .endm 47 48; An insn-type that has a single register operand. The register 49; may or may not be a source register for the insn. 50 .macro t_r insn 51 t_S_x_y none,\insn,r3 52 t_S_x_y none,\insn,r8 53 .endm 54 55; An insn-type that jumps to the destination of the register. 56 .macro t_r_j insn 57 move.d 0f,r7 58 move.d 1f,r8 59 move.d r8,r9 60 nop 61 nop 62 nop 63 .section ".rodata" 64 .p2align 5 650: 66 .dword 1f 67 .dword 1f 68 .dword 1f 69 .dword 1f 70 .dword 1f 71 .dword 1f 72 .dword 1f 73 .previous 74 t_r \insn 751: 76 .endm 77 78; An insn-type that has a size-modifier and two register 79; operands. 80 .macro t_xr_r S insn 81 t_S_x_y \S \insn r3 r8 82 t_S_x_y \S \insn r8 r3 83 move.d r3,r9 84 t_S_x_y \S \insn r4 r3 85 t_S_x_y \S \insn r8 r9 86 .endm 87 88; An insn-type that has two register operands. 89 .macro t_r_r insn 90 t_xr_r none \insn 91 .endm 92 93; An t_r_rx insn with a byte or word-size modifier. 94 .macro t_wbr_r insn 95 t_xr_r .b,\insn 96 t_xr_r .w,\insn 97 .endm 98 99; Ditto with a dword-size modifier. 100 .macro t_dwbr_r insn 101 t_xr_r .d,\insn 102 t_wbr_r \insn 103 .endm 104 105; An insn-type that has a size-modifier, a constant and a 106; register operand. 107 .macro t_xc_r S insn 108 t_S_x_y \S \insn 24 r3 109 move.d r3,r9 110 t_S_x_y \S \insn 24 r8 111 .endm 112 113; An insn-type that has a constant and a register operand. 114 .macro t_c_r insn 115 t_xc_r none \insn 116 .endm 117 118; An t_c_r insn with a byte or word-size modifier. 119 .macro t_wbc_r insn 120 t_xc_r .b,\insn 121 t_xc_r .w,\insn 122 .endm 123 124; Ditto with a dword-size modifier. 125 .macro t_dwbc_r insn 126 t_xc_r .d,\insn 127 t_wbc_r \insn 128 .endm 129 130; An insn-type that has size-modifier, a memory operand and a 131; register operand. 132 .macro t_xm_r S insn 133 move.d 9b,r8 134 t_S_x_y \S,\insn,[r4],r3 135 move.d r3,r9 136 t_S_x_y \S,\insn,[r8],r5 137 move.d r5,r9 138 t_S_x_y \S,\insn,[r3],r9 139 t_S_x_y \S,\insn,[r8],r9 140 .endm 141 142; Ditto, to memory. 143 .macro t_xr_m S insn 144 move.d 9b,r8 145 t_S_x_y \S,\insn,r3,[r4] 146 t_S_x_y \S,\insn,r8,[r3] 147 t_S_x_y \S,\insn,r3,[r8] 148 t_S_x_y \S,\insn,r9,[r8] 149 .endm 150 151; An insn-type that has a memory operand and a register operand. 152 .macro t_m_r insn 153 t_xm_r none \insn 154 .endm 155 156; An t_m_r insn with a byte or word-size modifier. 157 .macro t_wbm_r insn 158 t_xm_r .b,\insn 159 t_xm_r .w,\insn 160 .endm 161 162; Ditto with a dword-size modifier. 163 .macro t_dwbm_r insn 164 t_xm_r .d,\insn 165 t_wbm_r \insn 166 .endm 167 168; Insn types of the regular type (r, c, m, size d w b). 169 .macro t_dwb insn 170 t_dwbr_r \insn 171 t_dwbc_r \insn 172 t_dwbm_r \insn 173 .endm 174 175; Similar, sizes w b. 176 .macro t_wb insn 177 t_wbr_r \insn 178 t_wbc_r \insn 179 t_wbm_r \insn 180 .endm 181 182;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 183 184 startnostack 185 186; Initialize registers so they don't contain unknowns. 187 188 move.d 9f,r7 189 move.d r7,r8 190 moveq 0,r9 191 192; Movem source area. Register contents must be valid 193; addresses, aligned on a cache boundary. 194 .section ".rodata" 195 .p2align 5 1969: 197 .dword 9b 198 .dword 9b 199 .dword 9b 200 .dword 9b 201 .dword 9b 202 .dword 9b 203 .dword 9b 204 .dword 9b 205 .dword 9b 206 .dword 9b 207 .previous 208 209; The actual tests. The numbers in the comments specify the 210; number of movem destination stall cycles. Some of them may be 211; filed as memory source address stalls, multiplication source 212; stalls or jump source stalls, duly marked so. 213 214 t_r_r abs ; 3+3 215 216 t_dwb add ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src) 217 218 t_r_r addc ; (3+3+3) 219 t_c_r addc ; 3 220 t_m_r addc ; (3+3+3) (2 mem src) 221 222 t_dwb move ; (3+3)+(3+3+3)*2+3*2+(3+3+3)*3 (6 mem src) 223 t_xr_m .b move ; 3+3+3 (2 mem src) 224 t_xr_m .w move ; 3+3+3 (2 mem src) 225 t_xr_m .d move ; 3+3+3 (2 mem src) 226 227 t_S_x_y none addi r3.b r8 ; 3 228 t_S_x_y none addi r8.w r3 ; 3 229 t_S_x_y none addi r4.d r3 ; 3 230 t_S_x_y none addi r8.w r9 231 232 ; Addo has three-operand syntax, so we have to expand (a useful 233 ; subset of) "t_dwb". 234 t_S_x_y none addi r3.b "r8,acr" ; 3 235 t_S_x_y none addi r8.w "r3,acr" ; 3 236 t_S_x_y none addi r4.d "r3,acr" ; 3 237 t_S_x_y none addi r8.w "r9,acr" 238 239 t_S_x_y .b addo 42 "r8,acr" 240 t_S_x_y .w addo 4200 "r3,acr" ; 3 241 t_S_x_y .d addo 420000 "r3,acr" ; 3 242 243 move.d 9b,r8 244 t_S_x_y .d,addo,[r4],"r3,acr" ; 3 (1 mem src) 245 t_S_x_y .b,addo,[r3],"r8,acr" ; 3 (1 mem src) 246 t_S_x_y .w,addo,[r8],"r3,acr" ; 3 247 t_S_x_y .w,addo,[r8],"r9,acr" 248 249 ; Similar for addoq. 250 t_S_x_y none addoq 42 "r8,acr" 251 t_S_x_y none addoq 42 "r3,acr" ; 3 252 253 t_c_r addq ; 3 254 255 t_wb adds ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src) 256 t_wb addu ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src) 257 258 t_dwb and ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src) 259 t_c_r andq ; 3 260 261 t_dwbr_r asr ; (3+3+3)*3 262 t_c_r asrq ; 3 263 264 t_dwbr_r bound ; (3+3+3)*3 265 t_dwbc_r bound ; 3*3 266 267 t_r_r btst ; (3+3+3) 268 t_c_r btstq ; 3 269 270 t_dwb cmp ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src) 271 t_c_r cmpq ; 3 272 273 t_wbc_r cmps ; 3*2 274 t_wbc_r cmpu ; 3*2 275 t_wbm_r cmps ; (3+3+3)*2 (4 mem src) 276 t_wbm_r cmpu ; (3+3+3)*2 (4 mem src) 277 278 t_r_r dstep ; (3+3+3) 279 280 ; FIXME: idxd, fidxi, ftagd, ftagi when supported. 281 282 t_r_j jsr ; 3 (2 jump src) 283 t_r_j jump ; 3 (2 jump src) 284 285 t_c_r lapc.d 286 287; The "quick operand" must be in range [. to .+15*2] so we can't 288; use t_c_r. 289 t_S_x_y none lapcq .+4 r3 290 t_S_x_y none lapcq .+4 r8 291 292 t_dwbr_r lsl ; (3+3+3)*3 293 t_c_r lslq ; 3 294 295 t_dwbr_r lsr ; (3+3+3)*3 296 t_c_r lsrq ; 3 297 298 t_r_r lz ; 3+3 299 300 t_S_x_y none mcp srp r3 ; 3 301 t_S_x_y none mcp srp r8 302 303 t_c_r moveq 304 305 t_S_x_y none move srp r8 306 t_S_x_y none move srp r3 307 t_S_x_y none move r8 srp 308 t_S_x_y none move r3 srp ; 3 309 310; FIXME: move supreg,Rd and move Rs,supreg when supported. 311 312 t_wb movs ; (3+3)*2+0+(3+3)*2 (4 mem src) 313 t_wb movu ; (3+3)*2+0+(3+3)*2 (4 mem src) 314 315 t_dwbr_r muls ; (3+3+3)*3 (9 mul src) 316 t_dwbr_r mulu ; (3+3+3)*3 (9 mul src) 317 318 t_dwbr_r neg ; (3+3)*3 319 320 t_r not ; 3 cycles. 321 322 t_dwb or ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src) 323 t_c_r orq ; 3 324 325 t_r seq 326 327 t_dwb sub ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src) 328 t_c_r subq ; 3 329 330 t_wb subs ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src) 331 t_wb subu ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src) 332 333 t_r swapw ; 3 cycles. 334 t_r swapnwbr ; 3 cycles. 335 336 t_r_j jsrc ; 3 (2 jump src) 337 338 t_r_r xor ; (3+3+3) 339 340 move.d 9b,r7 341 nop 342 nop 343 nop 344 t_xm_r none movem ; (3+3) (2 mem src, 1+1 movem addr) 345 ; As implied by the comment, all movem destination penalty 346 ; cycles (but one) are accounted for as memory source address 347 ; and movem source penalties. There are also two movem address 348 ; cache-line straddle penalties. 349 t_xr_m none movem ; (3+3+2+2) (2 mem, 6 movem src, +2 movem addr) 350 351 break 15 352