1260401Sscottl;;
2260401Sscottl;; DFA-based pipeline description for Broadcom SB-1
3260401Sscottl;;
4260401Sscottl
5260401Sscottl;; The Broadcom SB-1 core is 4-way superscalar, in-order.  It has 2 load/store
6260401Sscottl;; pipes (one of which can support some ALU operations), 2 alu pipes, 2 FP
7260401Sscottl;; pipes, and 1 MDMX pipes.  It can issue 2 ls insns and 2 exe/fpu/mdmx insns
8260401Sscottl;; each cycle.
9260401Sscottl
10260401Sscottl;; We model the 4-way issue by ordering unit choices.  The possible choices are
11260401Sscottl;; {ex1,fp1}|{ex0,fp0}|ls1|ls0.  Instructions issue to the first eligible unit
12260401Sscottl;; in the list in most cases.  Non-indexed load/stores issue to ls0 first.
13260401Sscottl;; simple alu operations issue to ls1 if it is still available, and their
14260401Sscottl;; operands are ready (no co-issue with loads), otherwise to the first
15260401Sscottl;; available ex unit.
16260401Sscottl
17260401Sscottl;; When exceptions are enabled, can only issue FP insns to fp1.  This is
18260401Sscottl;; to ensure that instructions complete in order.  The -mfp-exceptions option
19260401Sscottl;; can be used to specify whether the system has FP exceptions enabled or not.
20260401Sscottl
21260401Sscottl;; In 32-bit mode, dependent FP can't co-issue with load, and only one FP exe
22260401Sscottl;; insn can issue per cycle (fp1).
23260401Sscottl
24260401Sscottl;; The A1 MDMX pipe is separate from the FP pipes, but uses the same register
25260401Sscottl;; file.  As a result, once an MDMX insn is issued, no FP insns can be issued
26260401Sscottl;; for 3 cycles.  When an FP insn is issued, no MDMX insn can be issued for
27260401Sscottl;; 5 cycles.  This is currently not handled because there is no MDMX insn
28260401Sscottl;; support as yet.
29260401Sscottl
30260401Sscottl;;
31260401Sscottl;; We use two automata.  sb1_cpu_div is for the integer divides, which are
32260401Sscottl;; not pipelined.  sb1_cpu is for everything else.
33260401Sscottl;;
34260401Sscottl(define_automaton "sb1_cpu, sb1_cpu_div")
35260401Sscottl
36260401Sscottl;; Load/store function units.
37260401Sscottl(define_cpu_unit "sb1_ls0" "sb1_cpu")
38260401Sscottl(define_cpu_unit "sb1_ls1" "sb1_cpu")
39260401Sscottl
40260401Sscottl;; CPU function units.
41260401Sscottl(define_cpu_unit "sb1_ex0" "sb1_cpu")
42260401Sscottl(define_cpu_unit "sb1_ex1" "sb1_cpu")
43260401Sscottl
44260401Sscottl;; The divide unit is not pipelined, and blocks hi/lo reads and writes.
45260401Sscottl(define_cpu_unit "sb1_div" "sb1_cpu_div")
46260401Sscottl;; DMULT block any multiply from issuing in the next cycle.
47260401Sscottl(define_cpu_unit "sb1_mul" "sb1_cpu")
48260401Sscottl
49260401Sscottl;; Floating-point units.
50260401Sscottl(define_cpu_unit "sb1_fp0" "sb1_cpu")
51260401Sscottl(define_cpu_unit "sb1_fp1" "sb1_cpu")
52260401Sscottl
53260401Sscottl;; Can only issue to one of the ex and fp pipes at a time.
54260401Sscottl(exclusion_set "sb1_ex0" "sb1_fp0")
55260401Sscottl(exclusion_set "sb1_ex1" "sb1_fp1")
56260401Sscottl
57260401Sscottl;; Define an SB-1 specific attribute to simplify some FP descriptions.
58260401Sscottl;; We can use 2 FP pipes only if we have 64-bit FP code, and exceptions are
59260401Sscottl;; disabled.
60260401Sscottl
61260401Sscottl(define_attr "sb1_fp_pipes" "one,two"
62260401Sscottl  (cond [(and (ne (symbol_ref "TARGET_FLOAT64") (const_int 0))
63260401Sscottl	      (eq (symbol_ref "TARGET_FP_EXCEPTIONS") (const_int 0)))
64260401Sscottl	 (const_string "two")]
65260401Sscottl	(const_string "one")))
66260401Sscottl
67260401Sscottl;; Define reservations for common combinations.
68260401Sscottl
69260401Sscottl;; For long cycle operations, the FPU has a 4 cycle pipeline that repeats,
70260401Sscottl;; effectively re-issuing the operation every 4 cycles.  This means that we
71260401Sscottl;; can have at most 4 long-cycle operations per pipe.
72260401Sscottl
73260401Sscottl;; ??? The fdiv operations should be e.g.
74260401Sscottl;; sb1_fp1_4cycles*7" | "sb1_fp0_4cycle*7
75260401Sscottl;; but the DFA is too large when we do that.  Perhaps have to use scheduler
76260401Sscottl;; hooks here.
77260401Sscottl
78260401Sscottl;; ??? Try limiting scheduler to 2 long latency operations, and see if this
79260401Sscottl;; results in a usable DFA, and whether it helps code performance.
80260401Sscottl
81260401Sscottl;;(define_reservation "sb1_fp0_4cycles" "sb1_fp0, nothing*3")
82260401Sscottl;;(define_reservation "sb1_fp1_4cycles" "sb1_fp1, nothing*3")
83260401Sscottl
84260401Sscottl;;
85260401Sscottl;; The ordering of the instruction-execution-path/resource-usage
86260401Sscottl;; descriptions (also known as reservation RTL) is roughly ordered
87260401Sscottl;; based on the define attribute RTL for the "type" classification.
88260401Sscottl;; When modifying, remember that the first test that matches is the
89260401Sscottl;; reservation used!
90260401Sscottl;;
91260401Sscottl
92260401Sscottl(define_insn_reservation "ir_sb1_unknown" 1
93260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
94260401Sscottl       (eq_attr "type" "unknown,multi"))
95260401Sscottl  "sb1_ls0+sb1_ls1+sb1_ex0+sb1_ex1+sb1_fp0+sb1_fp1")
96260401Sscottl
97260401Sscottl;; predicted taken branch causes 2 cycle ifetch bubble.  predicted not
98260401Sscottl;; taken branch causes 0 cycle ifetch bubble.  mispredicted branch causes 8
99260401Sscottl;; cycle ifetch bubble.  We assume all branches predicted not taken.
100260401Sscottl
101260401Sscottl;; ??? This assumption that branches are predicated not taken should be
102260401Sscottl;; investigated.  Maybe using 2 here will give better results.
103260401Sscottl
104260401Sscottl(define_insn_reservation "ir_sb1_branch" 0
105260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
106260401Sscottl       (eq_attr "type" "branch,jump,call"))
107260401Sscottl  "sb1_ex0")
108260401Sscottl
109260401Sscottl;; ??? This is 1 cycle for ldl/ldr to ldl/ldr when they use the same data
110260401Sscottl;; register as destination.
111260401Sscottl
112260401Sscottl;; ??? SB-1 can co-issue a load with a dependent arith insn if it executes on
113260401Sscottl;; an EX unit.  Can not co-issue if the dependent insn executes on an LS unit.
114260401Sscottl;; SB-1A can always co-issue here.
115260401Sscottl
116260401Sscottl;; A load normally has a latency of zero cycles.  In some cases, dependent
117260401Sscottl;; insns can be issued in the same cycle.  However, a value of 1 gives
118260401Sscottl;; better performance in empirical testing.
119260401Sscottl
120260401Sscottl(define_insn_reservation "ir_sb1_load" 1
121260401Sscottl  (and (eq_attr "cpu" "sb1")
122260401Sscottl       (eq_attr "type" "load,prefetch"))
123260401Sscottl  "sb1_ls0 | sb1_ls1")
124260401Sscottl
125260401Sscottl(define_insn_reservation "ir_sb1a_load" 0
126260401Sscottl  (and (eq_attr "cpu" "sb1a")
127260401Sscottl       (eq_attr "type" "load,prefetch"))
128260401Sscottl  "sb1_ls0 | sb1_ls1")
129260401Sscottl
130260401Sscottl;; Can not co-issue fpload with fp exe when in 32-bit mode.
131260401Sscottl
132260401Sscottl(define_insn_reservation "ir_sb1_fpload" 0
133260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
134260401Sscottl       (and (eq_attr "type" "fpload")
135260401Sscottl	    (ne (symbol_ref "TARGET_FLOAT64")
136260401Sscottl		(const_int 0))))
137260401Sscottl  "sb1_ls0 | sb1_ls1")
138260401Sscottl
139260401Sscottl(define_insn_reservation "ir_sb1_fpload_32bitfp" 1
140260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
141260401Sscottl       (and (eq_attr "type" "fpload")
142260401Sscottl	    (eq (symbol_ref "TARGET_FLOAT64")
143260401Sscottl		(const_int 0))))
144260401Sscottl  "sb1_ls0 | sb1_ls1")
145260401Sscottl
146260401Sscottl;; Indexed loads can only execute on LS1 pipe.
147260401Sscottl
148260401Sscottl(define_insn_reservation "ir_sb1_fpidxload" 0
149260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
150260401Sscottl       (and (eq_attr "type" "fpidxload")
151260401Sscottl	    (ne (symbol_ref "TARGET_FLOAT64")
152260401Sscottl		(const_int 0))))
153260401Sscottl  "sb1_ls1")
154260401Sscottl
155260401Sscottl(define_insn_reservation "ir_sb1_fpidxload_32bitfp" 1
156260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
157260401Sscottl       (and (eq_attr "type" "fpidxload")
158260401Sscottl	    (eq (symbol_ref "TARGET_FLOAT64")
159260401Sscottl		(const_int 0))))
160260401Sscottl  "sb1_ls1")
161260401Sscottl
162260401Sscottl;; prefx can only execute on the ls1 pipe.
163260401Sscottl
164260401Sscottl(define_insn_reservation "ir_sb1_prefetchx" 0
165260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
166260401Sscottl       (eq_attr "type" "prefetchx"))
167260401Sscottl  "sb1_ls1")
168260401Sscottl
169260401Sscottl;; ??? There is a 4.5 cycle latency if a store is followed by a load, and
170260401Sscottl;; there is a RAW dependency.
171260401Sscottl
172260401Sscottl(define_insn_reservation "ir_sb1_store" 1
173260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
174260401Sscottl       (eq_attr "type" "store"))
175260401Sscottl  "sb1_ls0+sb1_ex1 | sb1_ls0+sb1_ex0 | sb1_ls1+sb1_ex1 | sb1_ls1+sb1_ex0")
176260401Sscottl
177260401Sscottl(define_insn_reservation "ir_sb1_fpstore" 1
178260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
179260401Sscottl       (eq_attr "type" "fpstore"))
180260401Sscottl  "sb1_ls0+sb1_fp1 | sb1_ls0+sb1_fp0 | sb1_ls1+sb1_fp1 | sb1_ls1+sb1_fp0")
181260401Sscottl
182260401Sscottl;; Indexed stores can only execute on LS1 pipe.
183260401Sscottl
184260401Sscottl(define_insn_reservation "ir_sb1_fpidxstore" 1
185260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
186260401Sscottl       (eq_attr "type" "fpidxstore"))
187260401Sscottl  "sb1_ls1+sb1_fp1 | sb1_ls1+sb1_fp0")
188260401Sscottl
189260401Sscottl;; Load latencies are 3 cycles for one load to another load or store (address
190260401Sscottl;; only).  This is 0 cycles for one load to a store using it as the data
191260401Sscottl;; written.
192260401Sscottl
193260401Sscottl;; This assumes that if a load is dependent on a previous insn, then it must
194260401Sscottl;; be an address dependence.
195260401Sscottl
196260401Sscottl(define_bypass 3
197260401Sscottl  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
198260401Sscottl   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp"
199260401Sscottl  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
200260401Sscottl   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp,ir_sb1_prefetchx")
201260401Sscottl
202260401Sscottl(define_bypass 3
203260401Sscottl  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
204260401Sscottl   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp"
205260401Sscottl  "ir_sb1_store,ir_sb1_fpstore,ir_sb1_fpidxstore"
206260401Sscottl  "mips_store_data_bypass_p")
207260401Sscottl
208260401Sscottl;; On SB-1, simple alu instructions can execute on the LS1 unit.
209260401Sscottl
210260401Sscottl;; ??? A simple alu insn issued on an LS unit has 0 cycle latency to an EX
211260401Sscottl;; insn, to a store (for data), and to an xfer insn.  It has 1 cycle latency to
212260401Sscottl;; another LS insn (excluding store data).  A simple alu insn issued on an EX
213260401Sscottl;; unit has a latency of 5 cycles when the results goes to a LS unit (excluding
214260401Sscottl;; store data), otherwise a latency of 1 cycle.
215260401Sscottl
216260401Sscottl;; ??? We cannot handle latencies properly for simple alu instructions
217260401Sscottl;; within the DFA pipeline model.  Latencies can be defined only from one
218260401Sscottl;; insn reservation to another.  We can't make them depend on which function
219260401Sscottl;; unit was used.  This isn't a DFA flaw.  There is a conflict here, as we
220260401Sscottl;; need to know the latency before we can determine which unit will be
221260401Sscottl;; available, but we need to know which unit it is issued to before we can
222260401Sscottl;; compute the latency.  Perhaps this can be handled via scheduler hooks.
223260401Sscottl;; This needs to be investigated.
224260401Sscottl
225260401Sscottl;; ??? Optimal scheduling taking the LS units into account seems to require
226260401Sscottl;; a pre-scheduling pass.  We need to determine which instructions feed results
227260401Sscottl;; into store/load addresses, and thus benefit most from being issued to the
228260401Sscottl;; LS unit.  Also, we need to prune the list to ensure we don't overschedule
229260401Sscottl;; insns to the LS unit, and that we don't conflict with insns that need LS1
230260401Sscottl;; such as indexed loads.  We then need to emit nops to ensure that simple
231260401Sscottl;; alu instructions that are not supposed to be scheduled to LS1 don't
232260401Sscottl;; accidentally end up there because LS1 is free when they are issued.  This
233260401Sscottl;; will be a lot of work, and it isn't clear how useful it will be.
234260401Sscottl
235260401Sscottl;; Empirical testing shows that 2 gives the best result.
236260401Sscottl
237260401Sscottl(define_insn_reservation "ir_sb1_simple_alu" 2
238260401Sscottl  (and (eq_attr "cpu" "sb1")
239260401Sscottl       (eq_attr "type" "const,arith"))
240260401Sscottl  "sb1_ls1 | sb1_ex1 | sb1_ex0")
241260401Sscottl
242260401Sscottl;; On SB-1A, simple alu instructions can not execute on the LS1 unit, and we
243260401Sscottl;; have none of the above problems.
244260401Sscottl
245260401Sscottl(define_insn_reservation "ir_sb1a_simple_alu" 1
246260401Sscottl  (and (eq_attr "cpu" "sb1a")
247260401Sscottl       (eq_attr "type" "const,arith"))
248260401Sscottl  "sb1_ex1 | sb1_ex0")
249260401Sscottl
250260401Sscottl;; ??? condmove also includes some FP instructions that execute on the FP
251260401Sscottl;; units.  This needs to be clarified.
252260401Sscottl
253260401Sscottl(define_insn_reservation "ir_sb1_alu" 1
254260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
255260401Sscottl       (eq_attr "type" "condmove,nop,shift"))
256260401Sscottl  "sb1_ex1 | sb1_ex0")
257260401Sscottl
258260401Sscottl;; These are type arith/darith that only execute on the EX0 unit.
259260401Sscottl
260260401Sscottl(define_insn_reservation "ir_sb1_alu_0" 1
261260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
262260401Sscottl       (eq_attr "type" "slt,clz,trap"))
263260401Sscottl  "sb1_ex0")
264260401Sscottl
265260401Sscottl;; An alu insn issued on an EX unit has a latency of 5 cycles when the
266260401Sscottl;; result goes to a LS unit (excluding store data).
267260401Sscottl
268260401Sscottl;; This assumes that if a load is dependent on a previous insn, then it must
269260401Sscottl;; be an address dependence.
270260401Sscottl
271260401Sscottl(define_bypass 5
272260401Sscottl  "ir_sb1a_simple_alu,ir_sb1_alu,ir_sb1_alu_0,ir_sb1_mfhi,ir_sb1_mflo"
273260401Sscottl  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
274260401Sscottl   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp,ir_sb1_prefetchx")
275260401Sscottl
276260401Sscottl(define_bypass 5
277260401Sscottl  "ir_sb1a_simple_alu,ir_sb1_alu,ir_sb1_alu_0,ir_sb1_mfhi,ir_sb1_mflo"
278260401Sscottl  "ir_sb1_store,ir_sb1_fpstore,ir_sb1_fpidxstore"
279260401Sscottl  "mips_store_data_bypass_p")
280260401Sscottl
281260401Sscottl;; mf{hi,lo} is 1 cycle.  
282260401Sscottl
283260401Sscottl(define_insn_reservation "ir_sb1_mfhi" 1
284260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
285260401Sscottl       (and (eq_attr "type" "mfhilo")
286260401Sscottl	    (not (match_operand 1 "lo_operand"))))
287260401Sscottl  "sb1_ex1")
288260401Sscottl
289260401Sscottl(define_insn_reservation "ir_sb1_mflo" 1
290260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
291260401Sscottl       (and (eq_attr "type" "mfhilo")
292260401Sscottl	    (match_operand 1 "lo_operand")))
293260401Sscottl  "sb1_ex1")
294260401Sscottl
295260401Sscottl;; mt{hi,lo} to mul/div is 4 cycles.
296260401Sscottl
297260401Sscottl(define_insn_reservation "ir_sb1_mthilo" 4
298260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
299260401Sscottl       (eq_attr "type" "mthilo"))
300260401Sscottl  "sb1_ex1")
301260401Sscottl
302260401Sscottl;; mt{hi,lo} to mf{hi,lo} is 3 cycles.
303260401Sscottl
304260401Sscottl(define_bypass 3 "ir_sb1_mthilo" "ir_sb1_mfhi,ir_sb1_mflo")
305260401Sscottl
306260401Sscottl;; multiply latency to an EX operation is 3 cycles.
307260401Sscottl
308260401Sscottl;; ??? Should check whether we need to make multiply conflict with moves
309260401Sscottl;; to/from hilo registers.
310260401Sscottl
311260401Sscottl(define_insn_reservation "ir_sb1_mulsi" 3
312260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
313260401Sscottl       (and (eq_attr "type" "imul,imul3,imadd")
314260401Sscottl	    (eq_attr "mode" "SI")))
315260401Sscottl  "sb1_ex1+sb1_mul")
316260401Sscottl
317260401Sscottl;; muldi to mfhi is 4 cycles.
318260401Sscottl;; Blocks any other multiply insn issue for 1 cycle.
319260401Sscottl
320260401Sscottl(define_insn_reservation "ir_sb1_muldi" 4
321260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
322260401Sscottl       (and (eq_attr "type" "imul,imul3")
323260401Sscottl	    (eq_attr "mode" "DI")))
324260401Sscottl  "sb1_ex1+sb1_mul, sb1_mul")
325260401Sscottl
326260401Sscottl;; muldi to mflo is 3 cycles.
327260401Sscottl
328260401Sscottl(define_bypass 3 "ir_sb1_muldi" "ir_sb1_mflo")
329260401Sscottl
330260401Sscottl;;  mul latency is 7 cycles if the result is used by any LS insn.
331260401Sscottl
332260401Sscottl;; This assumes that if a load is dependent on a previous insn, then it must
333260401Sscottl;; be an address dependence.
334260401Sscottl
335260401Sscottl(define_bypass 7
336260401Sscottl  "ir_sb1_mulsi,ir_sb1_muldi"
337260401Sscottl  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
338260401Sscottl   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp,ir_sb1_prefetchx")
339260401Sscottl
340260401Sscottl(define_bypass 7
341260401Sscottl  "ir_sb1_mulsi,ir_sb1_muldi"
342260401Sscottl  "ir_sb1_store,ir_sb1_fpstore,ir_sb1_fpidxstore"
343260401Sscottl  "mips_store_data_bypass_p")
344260401Sscottl
345260401Sscottl;; The divide unit is not pipelined.  Divide busy is asserted in the 4th
346260401Sscottl;; cycle, and then deasserted on the latency cycle.  So only one divide at
347260401Sscottl;; a time, but the first/last 4 cycles can overlap.
348260401Sscottl
349260401Sscottl;; ??? All divides block writes to hi/lo regs.  hi/lo regs are written 4 cycles
350260401Sscottl;; after the latency cycle for divides (e.g. 40/72).  dmult writes lo in
351260401Sscottl;; cycle 7, and hi in cycle 8.  All other insns write hi/lo regs in cycle 7.
352260401Sscottl;; Default for output dependencies is the difference in latencies, which is
353260401Sscottl;; only 1 cycle off here, e.g. div to mtlo stalls for 32 cycles, but should
354260401Sscottl;; stall for 33 cycles.  This does not seem significant enough to worry about.
355260401Sscottl
356260401Sscottl(define_insn_reservation "ir_sb1_divsi" 36
357260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
358260401Sscottl       (and (eq_attr "type" "idiv")
359260401Sscottl	    (eq_attr "mode" "SI")))
360260401Sscottl  "sb1_ex1, nothing*3, sb1_div*32")
361260401Sscottl
362260401Sscottl(define_insn_reservation "ir_sb1_divdi" 68
363260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
364260401Sscottl       (and (eq_attr "type" "idiv")
365260401Sscottl	    (eq_attr "mode" "DI")))
366260401Sscottl  "sb1_ex1, nothing*3, sb1_div*64")
367260401Sscottl
368260401Sscottl(define_insn_reservation "ir_sb1_fpu_2pipes" 4
369260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
370260401Sscottl       (and (eq_attr "type" "fmove,fadd,fmul,fabs,fneg,fcvt,frdiv1,frsqrt1")
371260401Sscottl	    (eq_attr "sb1_fp_pipes" "two")))
372260401Sscottl  "sb1_fp1 | sb1_fp0")
373260401Sscottl
374260401Sscottl(define_insn_reservation "ir_sb1_fpu_1pipe" 4
375260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
376260401Sscottl       (and (eq_attr "type" "fmove,fadd,fmul,fabs,fneg,fcvt,frdiv1,frsqrt1")
377260401Sscottl	    (eq_attr "sb1_fp_pipes" "one")))
378260401Sscottl  "sb1_fp1")
379260401Sscottl
380260401Sscottl(define_insn_reservation "ir_sb1_fpu_step2_2pipes" 8
381260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
382260401Sscottl       (and (eq_attr "type" "frdiv2,frsqrt2")
383260401Sscottl	    (eq_attr "sb1_fp_pipes" "two")))
384260401Sscottl  "sb1_fp1 | sb1_fp0")
385260401Sscottl
386260401Sscottl(define_insn_reservation "ir_sb1_fpu_step2_1pipe" 8
387260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
388260401Sscottl       (and (eq_attr "type" "frdiv2,frsqrt2")
389260401Sscottl	    (eq_attr "sb1_fp_pipes" "one")))
390260401Sscottl  "sb1_fp1")
391260401Sscottl
392260401Sscottl;; ??? madd/msub 4-cycle latency to itself (same fr?), but 8 cycle latency
393260401Sscottl;; otherwise.
394260401Sscottl
395260401Sscottl;; ??? Blocks issue of another non-madd/msub after 4 cycles.
396260401Sscottl
397260401Sscottl(define_insn_reservation "ir_sb1_fmadd_2pipes" 8
398260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
399260401Sscottl       (and (eq_attr "type" "fmadd")
400260401Sscottl	    (eq_attr "sb1_fp_pipes" "two")))
401260401Sscottl  "sb1_fp1 | sb1_fp0")
402260401Sscottl
403260401Sscottl(define_insn_reservation "ir_sb1_fmadd_1pipe" 8
404260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
405260401Sscottl       (and (eq_attr "type" "fmadd")
406260401Sscottl	    (eq_attr "sb1_fp_pipes" "one")))
407260401Sscottl  "sb1_fp1")
408260401Sscottl
409260401Sscottl(define_insn_reservation "ir_sb1_fcmp" 4
410260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
411260401Sscottl       (eq_attr "type" "fcmp"))
412260401Sscottl  "sb1_fp1")
413260401Sscottl
414260401Sscottl;; mtc1 latency 5 cycles.
415260401Sscottl
416260401Sscottl(define_insn_reservation "ir_sb1_mtxfer" 5
417260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
418260401Sscottl       (and (eq_attr "type" "xfer")
419260401Sscottl	    (match_operand 0 "fpr_operand")))
420260401Sscottl  "sb1_fp0")
421260401Sscottl
422260401Sscottl;; mfc1 latency 1 cycle.  
423260401Sscottl
424260401Sscottl(define_insn_reservation "ir_sb1_mfxfer" 1
425260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
426260401Sscottl       (and (eq_attr "type" "xfer")
427260401Sscottl	    (not (match_operand 0 "fpr_operand"))))
428260401Sscottl  "sb1_fp0")
429260401Sscottl
430260401Sscottl;; ??? Can deliver at most 1 result per every 6 cycles because of issue
431260401Sscottl;; restrictions.
432260401Sscottl
433260401Sscottl(define_insn_reservation "ir_sb1_divsf_2pipes" 24
434260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
435260401Sscottl       (and (eq_attr "type" "fdiv")
436260401Sscottl	    (and (eq_attr "mode" "SF")
437260401Sscottl		 (eq_attr "sb1_fp_pipes" "two"))))
438260401Sscottl  "sb1_fp1 | sb1_fp0")
439260401Sscottl
440260401Sscottl(define_insn_reservation "ir_sb1_divsf_1pipe" 24
441260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
442260401Sscottl       (and (eq_attr "type" "fdiv")
443260401Sscottl	    (and (eq_attr "mode" "SF")
444260401Sscottl		 (eq_attr "sb1_fp_pipes" "one"))))
445260401Sscottl  "sb1_fp1")
446260401Sscottl
447260401Sscottl;; ??? Can deliver at most 1 result per every 8 cycles because of issue
448260401Sscottl;; restrictions.
449260401Sscottl
450260401Sscottl(define_insn_reservation "ir_sb1_divdf_2pipes" 32
451260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
452260401Sscottl       (and (eq_attr "type" "fdiv")
453260401Sscottl	    (and (eq_attr "mode" "DF")
454260401Sscottl		 (eq_attr "sb1_fp_pipes" "two"))))
455260401Sscottl  "sb1_fp1 | sb1_fp0")
456260401Sscottl
457260401Sscottl(define_insn_reservation "ir_sb1_divdf_1pipe" 32
458260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
459260401Sscottl       (and (eq_attr "type" "fdiv")
460260401Sscottl	    (and (eq_attr "mode" "DF")
461260401Sscottl		 (eq_attr "sb1_fp_pipes" "one"))))
462260401Sscottl  "sb1_fp1")
463260401Sscottl
464260401Sscottl;; ??? Can deliver at most 1 result per every 3 cycles because of issue
465260401Sscottl;; restrictions.
466260401Sscottl
467260401Sscottl(define_insn_reservation "ir_sb1_recipsf_2pipes" 12
468260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
469260401Sscottl       (and (eq_attr "type" "frdiv")
470260401Sscottl	    (and (eq_attr "mode" "SF")
471260401Sscottl		 (eq_attr "sb1_fp_pipes" "two"))))
472260401Sscottl  "sb1_fp1 | sb1_fp0")
473260401Sscottl
474260401Sscottl(define_insn_reservation "ir_sb1_recipsf_1pipe" 12
475260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
476260401Sscottl       (and (eq_attr "type" "frdiv")
477260401Sscottl	    (and (eq_attr "mode" "SF")
478260401Sscottl		 (eq_attr "sb1_fp_pipes" "one"))))
479260401Sscottl  "sb1_fp1")
480260401Sscottl
481260401Sscottl;; ??? Can deliver at most 1 result per every 5 cycles because of issue
482260401Sscottl;; restrictions.
483260401Sscottl
484260401Sscottl(define_insn_reservation "ir_sb1_recipdf_2pipes" 20
485260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
486260401Sscottl       (and (eq_attr "type" "frdiv")
487260401Sscottl	    (and (eq_attr "mode" "DF")
488260401Sscottl		 (eq_attr "sb1_fp_pipes" "two"))))
489260401Sscottl  "sb1_fp1 | sb1_fp0")
490260401Sscottl
491260401Sscottl(define_insn_reservation "ir_sb1_recipdf_1pipe" 20
492260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
493260401Sscottl       (and (eq_attr "type" "frdiv")
494260401Sscottl	    (and (eq_attr "mode" "DF")
495260401Sscottl		 (eq_attr "sb1_fp_pipes" "one"))))
496260401Sscottl  "sb1_fp1")
497260401Sscottl
498260401Sscottl;; ??? Can deliver at most 1 result per every 7 cycles because of issue
499260401Sscottl;; restrictions.
500260401Sscottl
501260401Sscottl(define_insn_reservation "ir_sb1_sqrtsf_2pipes" 28
502260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
503260401Sscottl       (and (eq_attr "type" "fsqrt")
504260401Sscottl	    (and (eq_attr "mode" "SF")
505260401Sscottl		 (eq_attr "sb1_fp_pipes" "two"))))
506260401Sscottl  "sb1_fp1 | sb1_fp0")
507260401Sscottl
508260401Sscottl(define_insn_reservation "ir_sb1_sqrtsf_1pipe" 28
509260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
510260401Sscottl       (and (eq_attr "type" "fsqrt")
511260401Sscottl	    (and (eq_attr "mode" "SF")
512260401Sscottl		 (eq_attr "sb1_fp_pipes" "one"))))
513260401Sscottl  "sb1_fp1")
514260401Sscottl
515260401Sscottl;; ??? Can deliver at most 1 result per every 10 cycles because of issue
516260401Sscottl;; restrictions.
517260401Sscottl
518260401Sscottl(define_insn_reservation "ir_sb1_sqrtdf_2pipes" 40
519260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
520260401Sscottl       (and (eq_attr "type" "fsqrt")
521260401Sscottl	    (and (eq_attr "mode" "DF")
522260401Sscottl		 (eq_attr "sb1_fp_pipes" "two"))))
523260401Sscottl  "sb1_fp1 | sb1_fp0")
524260401Sscottl
525260401Sscottl(define_insn_reservation "ir_sb1_sqrtdf_1pipe" 40
526260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
527260401Sscottl       (and (eq_attr "type" "fsqrt")
528260401Sscottl	    (and (eq_attr "mode" "DF")
529260401Sscottl		 (eq_attr "sb1_fp_pipes" "one"))))
530260401Sscottl  "sb1_fp1")
531260401Sscottl
532260401Sscottl;; ??? Can deliver at most 1 result per every 4 cycles because of issue
533260401Sscottl;; restrictions.
534260401Sscottl
535260401Sscottl(define_insn_reservation "ir_sb1_rsqrtsf_2pipes" 16
536260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
537260401Sscottl       (and (eq_attr "type" "frsqrt")
538260401Sscottl	    (and (eq_attr "mode" "SF")
539260401Sscottl		 (eq_attr "sb1_fp_pipes" "two"))))
540260401Sscottl  "sb1_fp1 | sb1_fp0")
541260401Sscottl
542260401Sscottl(define_insn_reservation "ir_sb1_rsqrtsf_1pipe" 16
543260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
544260401Sscottl       (and (eq_attr "type" "frsqrt")
545260401Sscottl	    (and (eq_attr "mode" "SF")
546260401Sscottl		 (eq_attr "sb1_fp_pipes" "one"))))
547260401Sscottl  "sb1_fp1")
548260401Sscottl
549260401Sscottl;; ??? Can deliver at most 1 result per every 7 cycles because of issue
550260401Sscottl;; restrictions.
551260401Sscottl
552260401Sscottl(define_insn_reservation "ir_sb1_rsqrtdf_2pipes" 28
553260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
554260401Sscottl       (and (eq_attr "type" "frsqrt")
555260401Sscottl	    (and (eq_attr "mode" "DF")
556260401Sscottl		 (eq_attr "sb1_fp_pipes" "two"))))
557260401Sscottl  "sb1_fp1 | sb1_fp0")
558260401Sscottl
559260401Sscottl(define_insn_reservation "ir_sb1_rsqrtdf_1pipe" 28
560260401Sscottl  (and (eq_attr "cpu" "sb1,sb1a")
561260401Sscottl       (and (eq_attr "type" "frsqrt")
562260401Sscottl	    (and (eq_attr "mode" "DF")
563260401Sscottl		 (eq_attr "sb1_fp_pipes" "one"))))
564260401Sscottl  "sb1_fp1")
565260401Sscottl