1;; ARM 926EJ-S Pipeline Description
2;; Copyright (C) 2003 Free Software Foundation, Inc.
3;; Written by CodeSourcery, LLC.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 2, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING.  If not, write to the Free
19;; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20;; 02110-1301, USA.  */
21
22;; These descriptions are based on the information contained in the
23;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM
24;; Limited.
25;;
26
27;; This automaton provides a pipeline description for the ARM
28;; 926EJ-S core.
29;;
30;; The model given here assumes that the condition for all conditional
31;; instructions is "true", i.e., that all of the instructions are
32;; actually executed.
33
34(define_automaton "arm926ejs")
35
36;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37;; Pipelines
38;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
39
40;; There is a single pipeline
41;;
42;;   The ALU pipeline has fetch, decode, execute, memory, and
43;;   write stages. We only need to model the execute, memory and write
44;;   stages.
45
46(define_cpu_unit "e,m,w" "arm926ejs")
47
48;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49;; ALU Instructions
50;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
51
52;; ALU instructions require three cycles to execute, and use the ALU
53;; pipeline in each of the three stages.  The results are available
54;; after the execute stage stage has finished.
55;;
56;; If the destination register is the PC, the pipelines are stalled
57;; for several cycles.  That case is not modeled here.
58
59;; ALU operations with no shifted operand
60(define_insn_reservation "9_alu_op" 1 
61 (and (eq_attr "tune" "arm926ejs")
62      (eq_attr "type" "alu,alu_shift"))
63 "e,m,w")
64
65;; ALU operations with a shift-by-register operand
66;; These really stall in the decoder, in order to read
67;; the shift value in a second cycle. Pretend we take two cycles in
68;; the execute stage.
69(define_insn_reservation "9_alu_shift_reg_op" 2 
70 (and (eq_attr "tune" "arm926ejs")
71      (eq_attr "type" "alu_shift_reg"))
72 "e*2,m,w")
73
74;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
75;; Multiplication Instructions
76;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
77
78;; Multiplication instructions loop in the execute stage until the
79;; instruction has been passed through the multiplier array enough
80;; times. Multiply operations occur in both the execute and memory
81;; stages of the pipeline
82
83(define_insn_reservation "9_mult1" 3
84 (and (eq_attr "tune" "arm926ejs")
85      (eq_attr "insn" "smlalxy,mul,mla"))
86 "e*2,m,w")
87
88(define_insn_reservation "9_mult2" 4
89 (and (eq_attr "tune" "arm926ejs")
90      (eq_attr "insn" "muls,mlas"))
91 "e*3,m,w")
92
93(define_insn_reservation "9_mult3" 4
94 (and (eq_attr "tune" "arm926ejs")
95      (eq_attr "insn" "umull,umlal,smull,smlal"))
96 "e*3,m,w")
97
98(define_insn_reservation "9_mult4" 5
99 (and (eq_attr "tune" "arm926ejs")
100      (eq_attr "insn" "umulls,umlals,smulls,smlals"))
101 "e*4,m,w")
102
103(define_insn_reservation "9_mult5" 2
104 (and (eq_attr "tune" "arm926ejs")
105      (eq_attr "insn" "smulxy,smlaxy,smlawx"))
106 "e,m,w")
107
108(define_insn_reservation "9_mult6" 3
109 (and (eq_attr "tune" "arm926ejs")
110      (eq_attr "insn" "smlalxy"))
111 "e*2,m,w")
112
113;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
114;; Load/Store Instructions
115;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
116
117;; The models for load/store instructions do not accurately describe
118;; the difference between operations with a base register writeback
119;; (such as "ldm!").  These models assume that all memory references
120;; hit in dcache.
121
122;; Loads with a shifted offset take 3 cycles, and are (a) probably the
123;; most common and (b) the pessimistic assumption will lead to fewer stalls.
124(define_insn_reservation "9_load1_op" 3
125 (and (eq_attr "tune" "arm926ejs")
126      (eq_attr "type" "load1,load_byte"))
127 "e*2,m,w")
128
129(define_insn_reservation "9_store1_op" 0
130 (and (eq_attr "tune" "arm926ejs")
131      (eq_attr "type" "store1"))
132 "e,m,w")
133
134;; multiple word loads and stores
135(define_insn_reservation "9_load2_op" 3
136 (and (eq_attr "tune" "arm926ejs")
137      (eq_attr "type" "load2"))
138 "e,m*2,w")
139
140(define_insn_reservation "9_load3_op" 4
141 (and (eq_attr "tune" "arm926ejs")
142      (eq_attr "type" "load3"))
143 "e,m*3,w")
144
145(define_insn_reservation "9_load4_op" 5
146 (and (eq_attr "tune" "arm926ejs")
147      (eq_attr "type" "load4"))
148 "e,m*4,w")
149
150(define_insn_reservation "9_store2_op" 0
151 (and (eq_attr "tune" "arm926ejs")
152      (eq_attr "type" "store2"))
153 "e,m*2,w")
154
155(define_insn_reservation "9_store3_op" 0
156 (and (eq_attr "tune" "arm926ejs")
157      (eq_attr "type" "store3"))
158 "e,m*3,w")
159
160(define_insn_reservation "9_store4_op" 0
161 (and (eq_attr "tune" "arm926ejs")
162      (eq_attr "type" "store4"))
163 "e,m*4,w")
164
165;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166;; Branch and Call Instructions
167;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
168
169;; Branch instructions are difficult to model accurately.  The ARM
170;; core can predict most branches.  If the branch is predicted
171;; correctly, and predicted early enough, the branch can be completely
172;; eliminated from the instruction stream.  Some branches can
173;; therefore appear to require zero cycles to execute.  We assume that
174;; all branches are predicted correctly, and that the latency is
175;; therefore the minimum value.
176
177(define_insn_reservation "9_branch_op" 0
178 (and (eq_attr "tune" "arm926ejs")
179      (eq_attr "type" "branch"))
180 "nothing")
181
182;; The latency for a call is not predictable.  Therefore, we use 32 as
183;; roughly equivalent to positive infinity.
184
185(define_insn_reservation "9_call_op" 32
186 (and (eq_attr "tune" "arm926ejs")
187      (eq_attr "type" "call"))
188 "nothing")
189