arm926ejs.md revision 169690
1272106Sgavin;; ARM 926EJ-S Pipeline Description
2272106Sgavin;; Copyright (C) 2003 Free Software Foundation, Inc.
3272106Sgavin;; Written by CodeSourcery, LLC.
4272106Sgavin;;
5272106Sgavin;; This file is part of GCC.
6272106Sgavin;;
7272106Sgavin;; GCC is free software; you can redistribute it and/or modify it
8272106Sgavin;; under the terms of the GNU General Public License as published by
9272106Sgavin;; the Free Software Foundation; either version 2, or (at your option)
10272106Sgavin;; any later version.
11272106Sgavin;;
12272106Sgavin;; GCC is distributed in the hope that it will be useful, but
13272106Sgavin;; WITHOUT ANY WARRANTY; without even the implied warranty of
14272106Sgavin;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15272106Sgavin;; General Public License for more details.
16272106Sgavin;;
17272106Sgavin;; You should have received a copy of the GNU General Public License
18272106Sgavin;; along with GCC; see the file COPYING.  If not, write to the Free
19272106Sgavin;; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20272106Sgavin;; 02110-1301, USA.  */
21272106Sgavin
22272106Sgavin;; These descriptions are based on the information contained in the
23272106Sgavin;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM
24272106Sgavin;; Limited.
25272106Sgavin;;
26272106Sgavin
27272106Sgavin;; This automaton provides a pipeline description for the ARM
28272106Sgavin;; 926EJ-S core.
29272106Sgavin;;
30277755Strasz;; The model given here assumes that the condition for all conditional
31272106Sgavin;; instructions is "true", i.e., that all of the instructions are
32272106Sgavin;; actually executed.
33272106Sgavin
34272106Sgavin(define_automaton "arm926ejs")
35277755Strasz
36272106Sgavin;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37272106Sgavin;; Pipelines
38272106Sgavin;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
39272106Sgavin
40272106Sgavin;; There is a single pipeline
41272106Sgavin;;
42272106Sgavin;;   The ALU pipeline has fetch, decode, execute, memory, and
43272106Sgavin;;   write stages. We only need to model the execute, memory and write
44272106Sgavin;;   stages.
45272106Sgavin
46272106Sgavin(define_cpu_unit "e,m,w" "arm926ejs")
47272106Sgavin
48272106Sgavin;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49272106Sgavin;; ALU Instructions
50272106Sgavin;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
51272106Sgavin
52272106Sgavin;; ALU instructions require three cycles to execute, and use the ALU
53272106Sgavin;; pipeline in each of the three stages.  The results are available
54272106Sgavin;; after the execute stage stage has finished.
55272106Sgavin;;
56272106Sgavin;; If the destination register is the PC, the pipelines are stalled
57272106Sgavin;; for several cycles.  That case is not modeled here.
58272106Sgavin
59272106Sgavin;; ALU operations with no shifted operand
60272106Sgavin(define_insn_reservation "9_alu_op" 1 
61272106Sgavin (and (eq_attr "tune" "arm926ejs")
62272106Sgavin      (eq_attr "type" "alu,alu_shift"))
63272106Sgavin "e,m,w")
64277755Strasz
65272106Sgavin;; ALU operations with a shift-by-register operand
66272106Sgavin;; These really stall in the decoder, in order to read
67272106Sgavin;; the shift value in a second cycle. Pretend we take two cycles in
68272106Sgavin;; the execute stage.
69272106Sgavin(define_insn_reservation "9_alu_shift_reg_op" 2 
70272106Sgavin (and (eq_attr "tune" "arm926ejs")
71272106Sgavin      (eq_attr "type" "alu_shift_reg"))
72277755Strasz "e*2,m,w")
73272106Sgavin
74277755Strasz;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
75272106Sgavin;; Multiplication Instructions
76272106Sgavin;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
77272106Sgavin
78272106Sgavin;; Multiplication instructions loop in the execute stage until the
79272106Sgavin;; instruction has been passed through the multiplier array enough
80272106Sgavin;; times. Multiply operations occur in both the execute and memory
81272106Sgavin;; stages of the pipeline
82272106Sgavin
83272106Sgavin(define_insn_reservation "9_mult1" 3
84277755Strasz (and (eq_attr "tune" "arm926ejs")
85272106Sgavin      (eq_attr "insn" "smlalxy,mul,mla"))
86272106Sgavin "e*2,m,w")
87272106Sgavin
88272106Sgavin(define_insn_reservation "9_mult2" 4
89272106Sgavin (and (eq_attr "tune" "arm926ejs")
90272106Sgavin      (eq_attr "insn" "muls,mlas"))
91272106Sgavin "e*3,m,w")
92272106Sgavin
93272106Sgavin(define_insn_reservation "9_mult3" 4
94272106Sgavin (and (eq_attr "tune" "arm926ejs")
95272106Sgavin      (eq_attr "insn" "umull,umlal,smull,smlal"))
96277755Strasz "e*3,m,w")
97277755Strasz
98277755Strasz(define_insn_reservation "9_mult4" 5
99277755Strasz (and (eq_attr "tune" "arm926ejs")
100277755Strasz      (eq_attr "insn" "umulls,umlals,smulls,smlals"))
101277755Strasz "e*4,m,w")
102277755Strasz
103277755Strasz(define_insn_reservation "9_mult5" 2
104277755Strasz (and (eq_attr "tune" "arm926ejs")
105277755Strasz      (eq_attr "insn" "smulxy,smlaxy,smlawx"))
106 "e,m,w")
107
108(define_insn_reservation "9_mult6" 3
109 (and (eq_attr "tune" "arm926ejs")
110      (eq_attr "insn" "smlalxy"))
111 "e*2,m,w")
112
113;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
114;; Load/Store Instructions
115;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
116
117;; The models for load/store instructions do not accurately describe
118;; the difference between operations with a base register writeback
119;; (such as "ldm!").  These models assume that all memory references
120;; hit in dcache.
121
122;; Loads with a shifted offset take 3 cycles, and are (a) probably the
123;; most common and (b) the pessimistic assumption will lead to fewer stalls.
124(define_insn_reservation "9_load1_op" 3
125 (and (eq_attr "tune" "arm926ejs")
126      (eq_attr "type" "load1,load_byte"))
127 "e*2,m,w")
128
129(define_insn_reservation "9_store1_op" 0
130 (and (eq_attr "tune" "arm926ejs")
131      (eq_attr "type" "store1"))
132 "e,m,w")
133
134;; multiple word loads and stores
135(define_insn_reservation "9_load2_op" 3
136 (and (eq_attr "tune" "arm926ejs")
137      (eq_attr "type" "load2"))
138 "e,m*2,w")
139
140(define_insn_reservation "9_load3_op" 4
141 (and (eq_attr "tune" "arm926ejs")
142      (eq_attr "type" "load3"))
143 "e,m*3,w")
144
145(define_insn_reservation "9_load4_op" 5
146 (and (eq_attr "tune" "arm926ejs")
147      (eq_attr "type" "load4"))
148 "e,m*4,w")
149
150(define_insn_reservation "9_store2_op" 0
151 (and (eq_attr "tune" "arm926ejs")
152      (eq_attr "type" "store2"))
153 "e,m*2,w")
154
155(define_insn_reservation "9_store3_op" 0
156 (and (eq_attr "tune" "arm926ejs")
157      (eq_attr "type" "store3"))
158 "e,m*3,w")
159
160(define_insn_reservation "9_store4_op" 0
161 (and (eq_attr "tune" "arm926ejs")
162      (eq_attr "type" "store4"))
163 "e,m*4,w")
164
165;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166;; Branch and Call Instructions
167;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
168
169;; Branch instructions are difficult to model accurately.  The ARM
170;; core can predict most branches.  If the branch is predicted
171;; correctly, and predicted early enough, the branch can be completely
172;; eliminated from the instruction stream.  Some branches can
173;; therefore appear to require zero cycles to execute.  We assume that
174;; all branches are predicted correctly, and that the latency is
175;; therefore the minimum value.
176
177(define_insn_reservation "9_branch_op" 0
178 (and (eq_attr "tune" "arm926ejs")
179      (eq_attr "type" "branch"))
180 "nothing")
181
182;; The latency for a call is not predictable.  Therefore, we use 32 as
183;; roughly equivalent to positive infinity.
184
185(define_insn_reservation "9_call_op" 32
186 (and (eq_attr "tune" "arm926ejs")
187      (eq_attr "type" "call"))
188 "nothing")
189