1;; DFA-based pipeline description for the VR1x000.
2;;   Copyright (C) 2005-2015 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5
6;; GCC is free software; you can redistribute it and/or modify it
7;; under the terms of the GNU General Public License as published
8;; by the Free Software Foundation; either version 3, or (at your
9;; option) any later version.
10
11;; GCC is distributed in the hope that it will be useful, but WITHOUT
12;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14;; License for more details.
15
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19
20
21;; R12K/R14K/R16K are derivatives of R10K, thus copy its description
22;; until specific tuning for each is added.
23
24;; R10000 has an int queue, fp queue, address queue.
25;; The int queue feeds ALU1 and ALU2.
26;; The fp queue feeds the fp-adder and fp-multiplier.
27;; The addr queue feeds the Load/Store unit.
28;;
29;; However, we define the fp-adder and fp-multiplier as
30;; separate automatons, because the fp-multiplier is
31;; divided into fp-multiplier, fp-division, and
32;; fp-squareroot units, all of which share the same
33;; issue and completion logic, yet can operate in
34;; parallel.
35;;
36;; This is based on the model described in the R10K Manual
37;; and it helps to reduce the size of the automata.
38(define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr,
39                   r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt")
40
41(define_cpu_unit "r10k_alu1" "r10k_a_int")
42(define_cpu_unit "r10k_alu2" "r10k_a_int")
43(define_cpu_unit "r10k_fpadd" "r10k_a_fpadder")
44(define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy")
45(define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv")
46(define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt")
47(define_cpu_unit "r10k_loadstore" "r10k_a_addr")
48
49
50;; R10k Loads and Stores.
51(define_insn_reservation "r10k_load" 2
52  (and (eq_attr "cpu" "r10000")
53       (eq_attr "type" "load,prefetch,prefetchx"))
54  "r10k_loadstore")
55
56(define_insn_reservation "r10k_store" 0
57  (and (eq_attr "cpu" "r10000")
58       (eq_attr "type" "store,fpstore,fpidxstore"))
59  "r10k_loadstore")
60
61(define_insn_reservation "r10k_fpload" 3
62  (and (eq_attr "cpu" "r10000")
63       (eq_attr "type" "fpload,fpidxload"))
64  "r10k_loadstore")
65
66
67;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2.
68;; Miscellaneous arith goes here too (this is a guess).
69(define_insn_reservation "r10k_arith" 1
70  (and (eq_attr "cpu" "r10000")
71       (eq_attr "type" "arith,mthi,mtlo,slt,clz,const,nop,trap,logical"))
72  "r10k_alu1 | r10k_alu2")
73
74;; We treat mfhilo differently, because we need to know when
75;; it's HI and when it's LO.
76(define_insn_reservation "r10k_mfhi" 1
77  (and (eq_attr "cpu" "r10000")
78       (eq_attr "type" "mfhi"))
79  "r10k_alu1 | r10k_alu2")
80
81(define_insn_reservation "r10k_mflo" 1
82  (and (eq_attr "cpu" "r10000")
83       (eq_attr "type" "mflo"))
84  "r10k_alu1 | r10k_alu2")
85
86
87;; ALU1 handles shifts, branch eval, and condmove.
88;;
89;; Brancher is separate, but part of ALU1, but can only
90;; do one branch per cycle (is this even implementable?).
91;;
92;; Unsure if the brancher handles jumps and calls as well, but since
93;; they're related, we'll add them here for now.
94(define_insn_reservation "r10k_brancher" 1
95  (and (eq_attr "cpu" "r10000")
96       (eq_attr "type" "shift,branch,jump,call"))
97  "r10k_alu1")
98
99(define_insn_reservation "r10k_int_cmove" 1
100  (and (eq_attr "cpu" "r10000")
101       (and (eq_attr "type" "condmove")
102            (eq_attr "mode" "SI,DI")))
103  "r10k_alu1")
104
105
106;; Coprocessor Moves.
107;; mtc1/dmtc1 are handled by ALU1.
108;; mfc1/dmfc1 are handled by the fp-multiplier.
109(define_insn_reservation "r10k_mt_xfer" 3
110  (and (eq_attr "cpu" "r10000")
111       (eq_attr "type" "mtc"))
112  "r10k_alu1")
113
114(define_insn_reservation "r10k_mf_xfer" 2
115  (and (eq_attr "cpu" "r10000")
116       (eq_attr "type" "mfc"))
117  "r10k_fpmpy")
118
119
120;; Only ALU2 does int multiplications and divisions.
121;;
122;; According to the Vr10000 series user manual,
123;; integer mult and div insns can be issued one
124;; cycle earlier if using register Lo.  We model
125;; this by using the Lo value by default, as it
126;; is the more common value, and use a bypass
127;; for the Hi value when needed.
128;;
129;; Also of note, There are different latencies
130;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7).
131;; However, gcc does not have separate types
132;; for these insns.  Thus to strike a balance,
133;; we use the Hi latency value for imul
134;; operations until the imul type can be split.
135(define_insn_reservation "r10k_imul_single" 6
136  (and (eq_attr "cpu" "r10000")
137       (and (eq_attr "type" "imul,imul3")
138            (eq_attr "mode" "SI")))
139  "r10k_alu2 * 6")
140
141(define_insn_reservation "r10k_imul_double" 10
142  (and (eq_attr "cpu" "r10000")
143       (and (eq_attr "type" "imul,imul3")
144            (eq_attr "mode" "DI")))
145  "r10k_alu2 * 10")
146
147;; Divides keep ALU2 busy.
148(define_insn_reservation "r10k_idiv_single" 34
149  (and (eq_attr "cpu" "r10000")
150       (and (eq_attr "type" "idiv")
151            (eq_attr "mode" "SI")))
152  "r10k_alu2 * 35")
153
154(define_insn_reservation "r10k_idiv_double" 66
155  (and (eq_attr "cpu" "r10000")
156       (and (eq_attr "type" "idiv")
157            (eq_attr "mode" "DI")))
158  "r10k_alu2 * 67")
159
160(define_bypass 35 "r10k_idiv_single" "r10k_mfhi")
161(define_bypass 67 "r10k_idiv_double" "r10k_mfhi")
162
163
164;; Floating point add/sub, mul, abs value, neg, comp, & moves.
165(define_insn_reservation "r10k_fp_miscadd" 2
166  (and (eq_attr "cpu" "r10000")
167       (eq_attr "type" "fadd,fabs,fneg,fcmp"))
168  "r10k_fpadd")
169
170(define_insn_reservation "r10k_fp_miscmul" 2
171  (and (eq_attr "cpu" "r10000")
172       (eq_attr "type" "fmul,fmove"))
173  "r10k_fpmpy")
174
175(define_insn_reservation "r10k_fp_cmove" 2
176  (and (eq_attr "cpu" "r10000")
177       (and (eq_attr "type" "condmove")
178            (eq_attr "mode" "SF,DF")))
179  "r10k_fpmpy")
180
181
182;; The fcvt.s.[wl] insn has latency 4, repeat 2.
183;; All other fcvt insns have latency 2, repeat 1.
184(define_insn_reservation "r10k_fcvt_single" 4
185  (and (eq_attr "cpu" "r10000")
186       (and (eq_attr "type" "fcvt")
187            (eq_attr "cnv_mode" "I2S")))
188  "r10k_fpadd * 2")
189
190(define_insn_reservation "r10k_fcvt_other" 2
191  (and (eq_attr "cpu" "r10000")
192       (and (eq_attr "type" "fcvt")
193            (eq_attr "cnv_mode" "!I2S")))
194  "r10k_fpadd")
195
196
197;; Run the fmadd insn through fp-adder first, then fp-multiplier.
198;;
199;; The latency for fmadd is 2 cycles if the result is used
200;; by another fmadd instruction.
201(define_insn_reservation "r10k_fmadd" 4
202  (and (eq_attr "cpu" "r10000")
203       (eq_attr "type" "fmadd"))
204  "r10k_fpadd, r10k_fpmpy")
205
206(define_bypass 2 "r10k_fmadd" "r10k_fmadd")
207
208
209;; Floating point Divisions & square roots.
210(define_insn_reservation "r10k_fdiv_single" 12
211  (and (eq_attr "cpu" "r10000")
212       (and (eq_attr "type" "fdiv,frdiv")
213            (eq_attr "mode" "SF")))
214  "r10k_fpdiv * 14")
215
216(define_insn_reservation "r10k_fdiv_double" 19
217  (and (eq_attr "cpu" "r10000")
218       (and (eq_attr "type" "fdiv,frdiv")
219            (eq_attr "mode" "DF")))
220  "r10k_fpdiv * 21")
221
222(define_insn_reservation "r10k_fsqrt_single" 18
223  (and (eq_attr "cpu" "r10000")
224       (and (eq_attr "type" "fsqrt")
225            (eq_attr "mode" "SF")))
226  "r10k_fpsqrt * 20")
227
228(define_insn_reservation "r10k_fsqrt_double" 33
229  (and (eq_attr "cpu" "r10000")
230       (and (eq_attr "type" "fsqrt")
231            (eq_attr "mode" "DF")))
232  "r10k_fpsqrt * 35")
233
234(define_insn_reservation "r10k_frsqrt_single" 30
235  (and (eq_attr "cpu" "r10000")
236       (and (eq_attr "type" "frsqrt")
237            (eq_attr "mode" "SF")))
238  "r10k_fpsqrt * 20")
239
240(define_insn_reservation "r10k_frsqrt_double" 52
241  (and (eq_attr "cpu" "r10000")
242       (and (eq_attr "type" "frsqrt")
243            (eq_attr "mode" "DF")))
244  "r10k_fpsqrt * 35")
245
246
247;; Handle unknown/multi insns here (this is a guess).
248(define_insn_reservation "r10k_unknown" 1
249  (and (eq_attr "cpu" "r10000")
250       (eq_attr "type" "unknown,multi,atomic,syncloop"))
251  "r10k_alu1 + r10k_alu2")
252