1169689Skan;; ARM 926EJ-S Pipeline Description
2169689Skan;; Copyright (C) 2003 Free Software Foundation, Inc.
3169689Skan;; Written by CodeSourcery, LLC.
4169689Skan;;
5169689Skan;; This file is part of GCC.
6169689Skan;;
7169689Skan;; GCC is free software; you can redistribute it and/or modify it
8169689Skan;; under the terms of the GNU General Public License as published by
9169689Skan;; the Free Software Foundation; either version 2, or (at your option)
10169689Skan;; any later version.
11169689Skan;;
12169689Skan;; GCC is distributed in the hope that it will be useful, but
13169689Skan;; WITHOUT ANY WARRANTY; without even the implied warranty of
14169689Skan;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15169689Skan;; General Public License for more details.
16169689Skan;;
17169689Skan;; You should have received a copy of the GNU General Public License
18169689Skan;; along with GCC; see the file COPYING.  If not, write to the Free
19169689Skan;; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20169689Skan;; 02110-1301, USA.  */
21169689Skan
22169689Skan;; These descriptions are based on the information contained in the
23169689Skan;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM
24169689Skan;; Limited.
25169689Skan;;
26169689Skan
27169689Skan;; This automaton provides a pipeline description for the ARM
28169689Skan;; 926EJ-S core.
29169689Skan;;
30169689Skan;; The model given here assumes that the condition for all conditional
31169689Skan;; instructions is "true", i.e., that all of the instructions are
32169689Skan;; actually executed.
33169689Skan
34169689Skan(define_automaton "arm926ejs")
35169689Skan
36169689Skan;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37169689Skan;; Pipelines
38169689Skan;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
39169689Skan
40169689Skan;; There is a single pipeline
41169689Skan;;
42169689Skan;;   The ALU pipeline has fetch, decode, execute, memory, and
43169689Skan;;   write stages. We only need to model the execute, memory and write
44169689Skan;;   stages.
45169689Skan
46169689Skan(define_cpu_unit "e,m,w" "arm926ejs")
47169689Skan
48169689Skan;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49169689Skan;; ALU Instructions
50169689Skan;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
51169689Skan
52169689Skan;; ALU instructions require three cycles to execute, and use the ALU
53169689Skan;; pipeline in each of the three stages.  The results are available
54169689Skan;; after the execute stage stage has finished.
55169689Skan;;
56169689Skan;; If the destination register is the PC, the pipelines are stalled
57169689Skan;; for several cycles.  That case is not modeled here.
58169689Skan
59169689Skan;; ALU operations with no shifted operand
60169689Skan(define_insn_reservation "9_alu_op" 1 
61169689Skan (and (eq_attr "tune" "arm926ejs")
62169689Skan      (eq_attr "type" "alu,alu_shift"))
63169689Skan "e,m,w")
64169689Skan
65169689Skan;; ALU operations with a shift-by-register operand
66169689Skan;; These really stall in the decoder, in order to read
67169689Skan;; the shift value in a second cycle. Pretend we take two cycles in
68169689Skan;; the execute stage.
69169689Skan(define_insn_reservation "9_alu_shift_reg_op" 2 
70169689Skan (and (eq_attr "tune" "arm926ejs")
71169689Skan      (eq_attr "type" "alu_shift_reg"))
72169689Skan "e*2,m,w")
73169689Skan
74169689Skan;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
75169689Skan;; Multiplication Instructions
76169689Skan;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
77169689Skan
78169689Skan;; Multiplication instructions loop in the execute stage until the
79169689Skan;; instruction has been passed through the multiplier array enough
80169689Skan;; times. Multiply operations occur in both the execute and memory
81169689Skan;; stages of the pipeline
82169689Skan
83169689Skan(define_insn_reservation "9_mult1" 3
84169689Skan (and (eq_attr "tune" "arm926ejs")
85169689Skan      (eq_attr "insn" "smlalxy,mul,mla"))
86169689Skan "e*2,m,w")
87169689Skan
88169689Skan(define_insn_reservation "9_mult2" 4
89169689Skan (and (eq_attr "tune" "arm926ejs")
90169689Skan      (eq_attr "insn" "muls,mlas"))
91169689Skan "e*3,m,w")
92169689Skan
93169689Skan(define_insn_reservation "9_mult3" 4
94169689Skan (and (eq_attr "tune" "arm926ejs")
95169689Skan      (eq_attr "insn" "umull,umlal,smull,smlal"))
96169689Skan "e*3,m,w")
97169689Skan
98169689Skan(define_insn_reservation "9_mult4" 5
99169689Skan (and (eq_attr "tune" "arm926ejs")
100169689Skan      (eq_attr "insn" "umulls,umlals,smulls,smlals"))
101169689Skan "e*4,m,w")
102169689Skan
103169689Skan(define_insn_reservation "9_mult5" 2
104169689Skan (and (eq_attr "tune" "arm926ejs")
105169689Skan      (eq_attr "insn" "smulxy,smlaxy,smlawx"))
106169689Skan "e,m,w")
107169689Skan
108169689Skan(define_insn_reservation "9_mult6" 3
109169689Skan (and (eq_attr "tune" "arm926ejs")
110169689Skan      (eq_attr "insn" "smlalxy"))
111169689Skan "e*2,m,w")
112169689Skan
113169689Skan;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
114169689Skan;; Load/Store Instructions
115169689Skan;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
116169689Skan
117169689Skan;; The models for load/store instructions do not accurately describe
118169689Skan;; the difference between operations with a base register writeback
119169689Skan;; (such as "ldm!").  These models assume that all memory references
120169689Skan;; hit in dcache.
121169689Skan
122169689Skan;; Loads with a shifted offset take 3 cycles, and are (a) probably the
123169689Skan;; most common and (b) the pessimistic assumption will lead to fewer stalls.
124169689Skan(define_insn_reservation "9_load1_op" 3
125169689Skan (and (eq_attr "tune" "arm926ejs")
126169689Skan      (eq_attr "type" "load1,load_byte"))
127169689Skan "e*2,m,w")
128169689Skan
129169689Skan(define_insn_reservation "9_store1_op" 0
130169689Skan (and (eq_attr "tune" "arm926ejs")
131169689Skan      (eq_attr "type" "store1"))
132169689Skan "e,m,w")
133169689Skan
134169689Skan;; multiple word loads and stores
135169689Skan(define_insn_reservation "9_load2_op" 3
136169689Skan (and (eq_attr "tune" "arm926ejs")
137169689Skan      (eq_attr "type" "load2"))
138169689Skan "e,m*2,w")
139169689Skan
140169689Skan(define_insn_reservation "9_load3_op" 4
141169689Skan (and (eq_attr "tune" "arm926ejs")
142169689Skan      (eq_attr "type" "load3"))
143169689Skan "e,m*3,w")
144169689Skan
145169689Skan(define_insn_reservation "9_load4_op" 5
146169689Skan (and (eq_attr "tune" "arm926ejs")
147169689Skan      (eq_attr "type" "load4"))
148169689Skan "e,m*4,w")
149169689Skan
150169689Skan(define_insn_reservation "9_store2_op" 0
151169689Skan (and (eq_attr "tune" "arm926ejs")
152169689Skan      (eq_attr "type" "store2"))
153169689Skan "e,m*2,w")
154169689Skan
155169689Skan(define_insn_reservation "9_store3_op" 0
156169689Skan (and (eq_attr "tune" "arm926ejs")
157169689Skan      (eq_attr "type" "store3"))
158169689Skan "e,m*3,w")
159169689Skan
160169689Skan(define_insn_reservation "9_store4_op" 0
161169689Skan (and (eq_attr "tune" "arm926ejs")
162169689Skan      (eq_attr "type" "store4"))
163169689Skan "e,m*4,w")
164169689Skan
165169689Skan;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166169689Skan;; Branch and Call Instructions
167169689Skan;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
168169689Skan
169169689Skan;; Branch instructions are difficult to model accurately.  The ARM
170169689Skan;; core can predict most branches.  If the branch is predicted
171169689Skan;; correctly, and predicted early enough, the branch can be completely
172169689Skan;; eliminated from the instruction stream.  Some branches can
173169689Skan;; therefore appear to require zero cycles to execute.  We assume that
174169689Skan;; all branches are predicted correctly, and that the latency is
175169689Skan;; therefore the minimum value.
176169689Skan
177169689Skan(define_insn_reservation "9_branch_op" 0
178169689Skan (and (eq_attr "tune" "arm926ejs")
179169689Skan      (eq_attr "type" "branch"))
180169689Skan "nothing")
181169689Skan
182169689Skan;; The latency for a call is not predictable.  Therefore, we use 32 as
183169689Skan;; roughly equivalent to positive infinity.
184169689Skan
185169689Skan(define_insn_reservation "9_call_op" 32
186169689Skan (and (eq_attr "tune" "arm926ejs")
187169689Skan      (eq_attr "type" "call"))
188169689Skan "nothing")
189