1;; Copyright (C) 2004-2015 Free Software Foundation, Inc. 2;; 3;; This file is part of GCC. 4;; 5;; GCC is free software; you can redistribute it and/or modify 6;; it under the terms of the GNU General Public License as published by 7;; the Free Software Foundation; either version 3, or (at your option) 8;; any later version. 9;; 10;; GCC is distributed in the hope that it will be useful, 11;; but WITHOUT ANY WARRANTY; without even the implied warranty of 12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13;; GNU General Public License for more details. 14;; 15;; You should have received a copy of the GNU General Public License 16;; along with GCC; see the file COPYING3. If not see 17;; <http://www.gnu.org/licenses/>. */ 18;; 19;; Pipeline description for the VR4130 family. 20;; 21;; The processor issues each 8-byte aligned pair of instructions together, 22;; stalling the second instruction if it depends on the first. Thus, if we 23;; want two instructions to issue in parallel, we need to make sure that the 24;; first one is 8-byte aligned. 25;; 26;; For the purposes of this pipeline description, we treat the processor 27;; like a standard two-way superscalar architecture. If scheduling were 28;; the last pass to run, we could use the scheduler hooks to vary the 29;; issue rate depending on whether an instruction is at an aligned or 30;; unaligned address. Unfortunately, delayed branch scheduling and 31;; hazard avoidance are done after the final scheduling pass, and they 32;; can change the addresses of many instructions. 33;; 34;; We get around this in two ways: 35;; 36;; (1) By running an extra pass at the end of compilation. This pass goes 37;; through the function looking for pairs of instructions that could 38;; execute in parallel. It makes sure that the first instruction in 39;; each pair is suitably aligned, inserting nops if necessary. Doing 40;; this gives the same kind of pipeline behavior we would see on a 41;; normal superscalar target. 42;; 43;; This pass is generally a speed improvement, but the extra nops will 44;; obviously make the program bigger. It is therefore unsuitable for 45;; -Os (at the very least). 46;; 47;; (2) By modifying the scheduler hooks so that, where possible: 48;; 49;; (a) dependent instructions are separated by a non-dependent 50;; instruction; 51;; 52;; (b) instructions that use the multiplication unit are separated 53;; by non-multiplication instructions; and 54;; 55;; (c) memory access instructions are separated by non-memory 56;; instructions. 57;; 58;; The idea is to keep conflicting instructions apart wherever possible 59;; and thus make the schedule less dependent on alignment. 60 61(define_automaton "vr4130_main, vr4130_muldiv, vr4130_mulpre") 62 63(define_cpu_unit "vr4130_alu1, vr4130_alu2, vr4130_dcache" "vr4130_main") 64(define_cpu_unit "vr4130_muldiv" "vr4130_muldiv") 65 66;; This is a fake unit for pre-reload scheduling of multiplications. 67;; It enforces the true post-reload repeat rate. 68(define_cpu_unit "vr4130_mulpre" "vr4130_mulpre") 69 70;; The scheduling hooks use this attribute for (b) above. 71(define_attr "vr4130_class" "mul,mem,alu" 72 (cond [(eq_attr "type" "load,store") 73 (const_string "mem") 74 75 (eq_attr "type" "mfhi,mflo,mthi,mtlo,imul,imul3,imadd,idiv") 76 (const_string "mul")] 77 (const_string "alu"))) 78 79(define_insn_reservation "vr4130_multi" 1 80 (and (eq_attr "cpu" "r4130") 81 (eq_attr "type" "multi,unknown,atomic,syncloop")) 82 "vr4130_alu1 + vr4130_alu2 + vr4130_dcache + vr4130_muldiv") 83 84(define_insn_reservation "vr4130_int" 1 85 (and (eq_attr "cpu" "r4130") 86 (eq_attr "type" "arith,const,logical,move,nop,shift,signext,slt")) 87 "vr4130_alu1 | vr4130_alu2") 88 89(define_insn_reservation "vr4130_load" 3 90 (and (eq_attr "cpu" "r4130") 91 (eq_attr "type" "load")) 92 "vr4130_dcache") 93 94(define_insn_reservation "vr4130_store" 1 95 (and (eq_attr "cpu" "r4130") 96 (eq_attr "type" "store")) 97 "vr4130_dcache") 98 99(define_insn_reservation "vr4130_mfhilo" 3 100 (and (eq_attr "cpu" "r4130") 101 (eq_attr "type" "mfhi,mflo")) 102 "vr4130_muldiv") 103 104(define_insn_reservation "vr4130_mthilo" 1 105 (and (eq_attr "cpu" "r4130") 106 (eq_attr "type" "mthi,mtlo")) 107 "vr4130_muldiv") 108 109;; The product is available in LO & HI after one cycle. Moving the result 110;; into an integer register will take an additional three cycles, see mflo 111;; & mfhi above. Note that the same latencies and repeat rates apply if we 112;; use "mtlo; macc" instead of "mult; mflo". 113(define_insn_reservation "vr4130_mulsi" 4 114 (and (eq_attr "cpu" "r4130") 115 (and (eq_attr "type" "imul,imul3") 116 (eq_attr "mode" "SI"))) 117 "vr4130_muldiv + (vr4130_mulpre * 2)") 118 119;; As for vr4130_mulsi, but the product is available in LO and HI 120;; after 3 cycles. 121(define_insn_reservation "vr4130_muldi" 6 122 (and (eq_attr "cpu" "r4130") 123 (and (eq_attr "type" "imul,imul3") 124 (eq_attr "mode" "DI"))) 125 "(vr4130_muldiv * 3) + (vr4130_mulpre * 4)") 126 127;; maccs can execute in consecutive cycles without stalling, but it 128;; is 3 cycles before the integer destination can be read. 129(define_insn_reservation "vr4130_macc" 3 130 (and (eq_attr "cpu" "r4130") 131 (eq_attr "type" "imadd")) 132 "vr4130_muldiv") 133 134(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_macc" "mips_linked_madd_p") 135(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_mfhilo") 136(define_bypass 3 "vr4130_muldi" "vr4130_mfhilo") 137 138(define_insn_reservation "vr4130_divsi" 36 139 (and (eq_attr "cpu" "r4130") 140 (and (eq_attr "type" "idiv") 141 (eq_attr "mode" "SI"))) 142 "vr4130_muldiv * 36") 143 144(define_insn_reservation "vr4130_divdi" 72 145 (and (eq_attr "cpu" "r4130") 146 (and (eq_attr "type" "idiv") 147 (eq_attr "mode" "DI"))) 148 "vr4130_muldiv * 72") 149 150(define_insn_reservation "vr4130_branch" 0 151 (and (eq_attr "cpu" "r4130") 152 (eq_attr "type" "branch,jump,call")) 153 "vr4130_alu1 | vr4130_alu2") 154