ppro.md revision 132718
183098Smp;; Pentium Pro/PII Scheduling
283098Smp;; Copyright (C) 2002 Free Software Foundation, Inc.
383098Smp;;
483098Smp;; This file is part of GCC.
583098Smp;;
683098Smp;; GCC is free software; you can redistribute it and/or modify
783098Smp;; it under the terms of the GNU General Public License as published by
883098Smp;; the Free Software Foundation; either version 2, or (at your option)
983098Smp;; any later version.
1083098Smp;;
1183098Smp;; GCC is distributed in the hope that it will be useful,
1283098Smp;; but WITHOUT ANY WARRANTY; without even the implied warranty of
1383098Smp;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1483098Smp;; GNU General Public License for more details.
1583098Smp;;
1683098Smp;; You should have received a copy of the GNU General Public License
1783098Smp;; along with GCC; see the file COPYING.  If not, write to
1883098Smp;; the Free Software Foundation, 59 Temple Place - Suite 330,
1983098Smp;; Boston, MA 02111-1307, USA.  */
2083098Smp
2183098Smp;; Categorize how many uops an ia32 instruction evaluates to:
2283098Smp;;   one --  an instruction with 1 uop can be decoded by any of the
2383098Smp;;           three decoders.
2483098Smp;;   few --  an instruction with 1 to 4 uops can be decoded only by 
2583098Smp;;	     decoder 0.
2683098Smp;;   many -- a complex instruction may take an unspecified number of
2783098Smp;;	     cycles to decode in decoder 0.
2883098Smp
2983098Smp(define_attr "ppro_uops" "one,few,many"
3083098Smp  (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str")
31	   (const_string "many")
32	 (eq_attr "type" "icmov,fcmov,str,cld,leave")
33	   (const_string "few")
34	 (eq_attr "type" "imov")
35	   (if_then_else (eq_attr "memory" "store,both")
36	     (const_string "few")
37	     (const_string "one"))
38	 (eq_attr "memory" "!none")
39	   (const_string "few")
40	]
41	(const_string "one")))
42
43;;
44;; The PPro has an out-of-order core, but the instruction decoders are
45;; naturally in-order and asymmetric.  We get best performance by scheduling
46;; for the decoders, for in doing so we give the oo execution unit the 
47;; most choices.
48;;
49;; Rough readiness numbers.  Fine tuning happens in i386.c.
50;;
51;; p0	describes port 0.
52;; p01	describes ports 0 and 1 as a pair; alu insns can issue to either.
53;; p2	describes port 2 for loads.
54;; p34	describes ports 3 and 4 for stores.
55;; fpu	describes the fpu accessed via port 0. 
56;;	??? It is less than clear if there are separate fadd and fmul units
57;;	that could operate in parallel.
58;;
59;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
60
61(define_function_unit "ppro_p0" 1 0
62  (and (eq_attr "cpu" "pentiumpro")
63       (eq_attr "type" "ishift,rotate,ishift1,rotate1,lea,ibr,cld"))
64  1 1)
65
66(define_function_unit "ppro_p0" 1 0
67  (and (eq_attr "cpu" "pentiumpro")
68       (eq_attr "type" "imul"))
69  4 1)
70
71;; ??? Does the divider lock out the pipe while it works,
72;; or is there a disconnected unit?
73(define_function_unit "ppro_p0" 1 0
74  (and (eq_attr "cpu" "pentiumpro")
75       (eq_attr "type" "idiv"))
76  17 17)
77
78(define_function_unit "ppro_p0" 1 0
79  (and (eq_attr "cpu" "pentiumpro")
80       (eq_attr "type" "fop,fsgn,fistp"))
81  3 1)
82
83(define_function_unit "ppro_p0" 1 0
84  (and (eq_attr "cpu" "pentiumpro")
85       (eq_attr "type" "fcmov"))
86  2 1)
87
88(define_function_unit "ppro_p0" 1 0
89  (and (eq_attr "cpu" "pentiumpro")
90       (eq_attr "type" "fcmp"))
91  1 1)
92
93(define_function_unit "ppro_p0" 1 0
94  (and (eq_attr "cpu" "pentiumpro")
95       (eq_attr "type" "fmov"))
96  1 1)
97
98(define_function_unit "ppro_p0" 1 0
99  (and (eq_attr "cpu" "pentiumpro")
100       (eq_attr "type" "fmul"))
101  5 1)
102
103(define_function_unit "ppro_p0" 1 0
104  (and (eq_attr "cpu" "pentiumpro")
105       (eq_attr "type" "fdiv,fpspc"))
106  56 1)
107
108(define_function_unit "ppro_p01" 2 0
109  (and (eq_attr "cpu" "pentiumpro")
110       (eq_attr "type" "!imov,fmov"))
111  1 1)
112
113(define_function_unit "ppro_p01" 2 0
114  (and (and (eq_attr "cpu" "pentiumpro")
115            (eq_attr "type" "imov,fmov"))
116       (eq_attr "memory" "none"))
117  1 1)
118
119(define_function_unit "ppro_p2" 1 0
120  (and (eq_attr "cpu" "pentiumpro")
121       (ior (eq_attr "type" "pop,leave")
122	    (eq_attr "memory" "load,both")))
123  3 1)
124
125(define_function_unit "ppro_p34" 1 0
126  (and (eq_attr "cpu" "pentiumpro")
127       (ior (eq_attr "type" "push")
128	    (eq_attr "memory" "store,both")))
129  1 1)
130
131(define_function_unit "fpu" 1 0
132  (and (eq_attr "cpu" "pentiumpro")
133       (eq_attr "type" "fop,fsgn,fmov,fcmp,fcmov,fistp"))
134  1 1)
135
136(define_function_unit "fpu" 1 0
137  (and (eq_attr "cpu" "pentiumpro")
138       (eq_attr "type" "fmul"))
139  5 2)
140
141(define_function_unit "fpu" 1 0
142  (and (eq_attr "cpu" "pentiumpro")
143       (eq_attr "type" "fdiv,fpspc"))
144  56 56)
145
146;; imul uses the fpu.  ??? does it have the same throughput as fmul?
147(define_function_unit "fpu" 1 0
148  (and (eq_attr "cpu" "pentiumpro")
149       (eq_attr "type" "imul"))
150  4 1)
151