4130.md revision 169689
1169689Skan;; 2169689Skan;; Pipeline description for the VR4130 family. 3169689Skan;; 4169689Skan;; The processor issues each 8-byte aligned pair of instructions together, 5169689Skan;; stalling the second instruction if it depends on the first. Thus, if we 6169689Skan;; want two instructions to issue in parallel, we need to make sure that the 7169689Skan;; first one is 8-byte aligned. 8169689Skan;; 9169689Skan;; For the purposes of this pipeline description, we treat the processor 10169689Skan;; like a standard two-way superscalar architecture. If scheduling were 11169689Skan;; the last pass to run, we could use the scheduler hooks to vary the 12169689Skan;; issue rate depending on whether an instruction is at an aligned or 13169689Skan;; unaligned address. Unfortunately, delayed branch scheduling and 14169689Skan;; hazard avoidance are done after the final scheduling pass, and they 15169689Skan;; can change the addresses of many instructions. 16169689Skan;; 17169689Skan;; We get around this in two ways: 18169689Skan;; 19169689Skan;; (1) By running an extra pass at the end of compilation. This pass goes 20169689Skan;; through the function looking for pairs of instructions that could 21169689Skan;; execute in parallel. It makes sure that the first instruction in 22169689Skan;; each pair is suitably aligned, inserting nops if necessary. Doing 23169689Skan;; this gives the same kind of pipeline behavior we would see on a 24169689Skan;; normal superscalar target. 25169689Skan;; 26169689Skan;; This pass is generally a speed improvement, but the extra nops will 27169689Skan;; obviously make the program bigger. It is therefore unsuitable for 28169689Skan;; -Os (at the very least). 29169689Skan;; 30169689Skan;; (2) By modifying the scheduler hooks so that, where possible: 31169689Skan;; 32169689Skan;; (a) dependent instructions are separated by a non-dependent 33169689Skan;; instruction; 34169689Skan;; 35169689Skan;; (b) instructions that use the multiplication unit are separated 36169689Skan;; by non-multiplication instructions; and 37169689Skan;; 38169689Skan;; (c) memory access instructions are separated by non-memory 39169689Skan;; instructions. 40169689Skan;; 41169689Skan;; The idea is to keep conflicting instructions apart wherever possible 42169689Skan;; and thus make the schedule less dependent on alignment. 43169689Skan 44169689Skan(define_automaton "vr4130_main, vr4130_muldiv, vr4130_mulpre") 45169689Skan 46169689Skan(define_cpu_unit "vr4130_alu1, vr4130_alu2, vr4130_dcache" "vr4130_main") 47169689Skan(define_cpu_unit "vr4130_muldiv" "vr4130_muldiv") 48169689Skan 49169689Skan;; This is a fake unit for pre-reload scheduling of multiplications. 50169689Skan;; It enforces the true post-reload repeat rate. 51169689Skan(define_cpu_unit "vr4130_mulpre" "vr4130_mulpre") 52169689Skan 53169689Skan;; The scheduling hooks use this attribute for (b) above. 54169689Skan(define_attr "vr4130_class" "mul,mem,alu" 55169689Skan (cond [(eq_attr "type" "load,store") 56169689Skan (const_string "mem") 57169689Skan 58169689Skan (eq_attr "type" "mfhilo,mthilo,imul,imul3,imadd,idiv") 59169689Skan (const_string "mul")] 60169689Skan (const_string "alu"))) 61169689Skan 62169689Skan(define_insn_reservation "vr4130_multi" 1 63169689Skan (and (eq_attr "cpu" "r4130") 64169689Skan (eq_attr "type" "multi,unknown")) 65169689Skan "vr4130_alu1 + vr4130_alu2 + vr4130_dcache + vr4130_muldiv") 66169689Skan 67169689Skan(define_insn_reservation "vr4130_int" 1 68169689Skan (and (eq_attr "cpu" "r4130") 69169689Skan (eq_attr "type" "const,arith,shift,slt,nop")) 70169689Skan "vr4130_alu1 | vr4130_alu2") 71169689Skan 72169689Skan(define_insn_reservation "vr4130_load" 3 73169689Skan (and (eq_attr "cpu" "r4130") 74169689Skan (eq_attr "type" "load")) 75169689Skan "vr4130_dcache") 76169689Skan 77169689Skan(define_insn_reservation "vr4130_store" 1 78169689Skan (and (eq_attr "cpu" "r4130") 79169689Skan (eq_attr "type" "store")) 80169689Skan "vr4130_dcache") 81169689Skan 82169689Skan(define_insn_reservation "vr4130_mfhilo" 3 83169689Skan (and (eq_attr "cpu" "r4130") 84169689Skan (eq_attr "type" "mfhilo")) 85169689Skan "vr4130_muldiv") 86169689Skan 87169689Skan(define_insn_reservation "vr4130_mthilo" 1 88169689Skan (and (eq_attr "cpu" "r4130") 89169689Skan (eq_attr "type" "mthilo")) 90169689Skan "vr4130_muldiv") 91169689Skan 92169689Skan;; The product is available in LO & HI after one cycle. Moving the result 93169689Skan;; into an integer register will take an additional three cycles, see mflo 94169689Skan;; & mfhi above. Note that the same latencies and repeat rates apply if we 95169689Skan;; use "mtlo; macc" instead of "mult; mflo". 96169689Skan(define_insn_reservation "vr4130_mulsi" 4 97169689Skan (and (eq_attr "cpu" "r4130") 98169689Skan (and (eq_attr "type" "imul,imul3") 99169689Skan (eq_attr "mode" "SI"))) 100169689Skan "vr4130_muldiv + (vr4130_mulpre * 2)") 101169689Skan 102169689Skan;; As for vr4130_mulsi, but the product is available in LO and HI 103169689Skan;; after 3 cycles. 104169689Skan(define_insn_reservation "vr4130_muldi" 6 105169689Skan (and (eq_attr "cpu" "r4130") 106169689Skan (and (eq_attr "type" "imul,imul3") 107169689Skan (eq_attr "mode" "DI"))) 108169689Skan "(vr4130_muldiv * 3) + (vr4130_mulpre * 4)") 109169689Skan 110169689Skan;; maccs can execute in consecutive cycles without stalling, but it 111169689Skan;; is 3 cycles before the integer destination can be read. 112169689Skan(define_insn_reservation "vr4130_macc" 3 113169689Skan (and (eq_attr "cpu" "r4130") 114169689Skan (eq_attr "type" "imadd")) 115169689Skan "vr4130_muldiv") 116169689Skan 117169689Skan(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_macc" "mips_linked_madd_p") 118169689Skan(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_mfhilo") 119169689Skan(define_bypass 3 "vr4130_muldi" "vr4130_mfhilo") 120169689Skan 121169689Skan(define_insn_reservation "vr4130_divsi" 36 122169689Skan (and (eq_attr "cpu" "r4130") 123169689Skan (and (eq_attr "type" "idiv") 124169689Skan (eq_attr "mode" "SI"))) 125169689Skan "vr4130_muldiv * 36") 126169689Skan 127169689Skan(define_insn_reservation "vr4130_divdi" 72 128169689Skan (and (eq_attr "cpu" "r4130") 129169689Skan (and (eq_attr "type" "idiv") 130169689Skan (eq_attr "mode" "DI"))) 131169689Skan "vr4130_muldiv * 72") 132169689Skan 133169689Skan(define_insn_reservation "vr4130_branch" 0 134169689Skan (and (eq_attr "cpu" "r4130") 135169689Skan (eq_attr "type" "branch,jump,call")) 136169689Skan "vr4130_alu1 | vr4130_alu2") 137