ppro.md revision 117395
185213Sdarrenr;; Pentium Pro/PII Scheduling
285213Sdarrenr;; Copyright (C) 2002 Free Software Foundation, Inc.
385213Sdarrenr;;
485213Sdarrenr;; This file is part of GNU CC.
585213Sdarrenr;;
685213Sdarrenr;; GNU CC is free software; you can redistribute it and/or modify
785213Sdarrenr;; it under the terms of the GNU General Public License as published by
885213Sdarrenr;; the Free Software Foundation; either version 2, or (at your option)
985213Sdarrenr;; any later version.
1085213Sdarrenr;;
1185213Sdarrenr;; GNU CC is distributed in the hope that it will be useful,
1285213Sdarrenr;; but WITHOUT ANY WARRANTY; without even the implied warranty of
1385213Sdarrenr;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1485213Sdarrenr;; GNU General Public License for more details.
1585213Sdarrenr;;
1685213Sdarrenr;; You should have received a copy of the GNU General Public License
1785213Sdarrenr;; along with GNU CC; see the file COPYING.  If not, write to
1885213Sdarrenr;; the Free Software Foundation, 59 Temple Place - Suite 330,
1985213Sdarrenr;; Boston, MA 02111-1307, USA.  */
2085213Sdarrenr
2185213Sdarrenr;; Categorize how many uops an ia32 instruction evaluates to:
2285213Sdarrenr;;   one --  an instruction with 1 uop can be decoded by any of the
2385213Sdarrenr;;           three decoders.
2485213Sdarrenr;;   few --  an instruction with 1 to 4 uops can be decoded only by 
2585213Sdarrenr;;	     decoder 0.
2685213Sdarrenr;;   many -- a complex instruction may take an unspecified number of
2785213Sdarrenr;;	     cycles to decode in decoder 0.
2885213Sdarrenr
2985213Sdarrenr(define_attr "ppro_uops" "one,few,many"
3085213Sdarrenr  (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str")
3185213Sdarrenr	   (const_string "many")
3285213Sdarrenr	 (eq_attr "type" "icmov,fcmov,str,cld")
3385213Sdarrenr	   (const_string "few")
3485213Sdarrenr	 (eq_attr "type" "imov")
3585213Sdarrenr	   (if_then_else (eq_attr "memory" "store,both")
3685213Sdarrenr	     (const_string "few")
3785213Sdarrenr	     (const_string "one"))
3885213Sdarrenr	 (eq_attr "memory" "!none")
3985213Sdarrenr	   (const_string "few")
4085213Sdarrenr	]
4185213Sdarrenr	(const_string "one")))
4285213Sdarrenr
4385213Sdarrenr;;
4485213Sdarrenr;; The PPro has an out-of-order core, but the instruction decoders are
4585213Sdarrenr;; naturally in-order and asymmetric.  We get best performance by scheduling
4685213Sdarrenr;; for the decoders, for in doing so we give the oo execution unit the 
4785213Sdarrenr;; most choices.
4885213Sdarrenr;;
4985213Sdarrenr;; Rough readiness numbers.  Fine tuning happens in i386.c.
5085213Sdarrenr;;
5185213Sdarrenr;; p0	describes port 0.
5285213Sdarrenr;; p01	describes ports 0 and 1 as a pair; alu insns can issue to either.
5385213Sdarrenr;; p2	describes port 2 for loads.
5485213Sdarrenr;; p34	describes ports 3 and 4 for stores.
5585213Sdarrenr;; fpu	describes the fpu accessed via port 0. 
5685213Sdarrenr;;	??? It is less than clear if there are separate fadd and fmul units
5785213Sdarrenr;;	that could operate in parallel.
5885213Sdarrenr;;
5985213Sdarrenr;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
6085213Sdarrenr
6185213Sdarrenr(define_function_unit "ppro_p0" 1 0
6285213Sdarrenr  (and (eq_attr "cpu" "pentiumpro")
63       (eq_attr "type" "ishift,rotate,ishift1,rotate1,lea,ibr,cld"))
64  1 1)
65
66(define_function_unit "ppro_p0" 1 0
67  (and (eq_attr "cpu" "pentiumpro")
68       (eq_attr "type" "imul"))
69  4 1)
70
71;; ??? Does the divider lock out the pipe while it works,
72;; or is there a disconnected unit?
73(define_function_unit "ppro_p0" 1 0
74  (and (eq_attr "cpu" "pentiumpro")
75       (eq_attr "type" "idiv"))
76  17 17)
77
78(define_function_unit "ppro_p0" 1 0
79  (and (eq_attr "cpu" "pentiumpro")
80       (eq_attr "type" "fop,fsgn,fistp"))
81  3 1)
82
83(define_function_unit "ppro_p0" 1 0
84  (and (eq_attr "cpu" "pentiumpro")
85       (eq_attr "type" "fcmov"))
86  2 1)
87
88(define_function_unit "ppro_p0" 1 0
89  (and (eq_attr "cpu" "pentiumpro")
90       (eq_attr "type" "fcmp"))
91  1 1)
92
93(define_function_unit "ppro_p0" 1 0
94  (and (eq_attr "cpu" "pentiumpro")
95       (eq_attr "type" "fmov"))
96  1 1)
97
98(define_function_unit "ppro_p0" 1 0
99  (and (eq_attr "cpu" "pentiumpro")
100       (eq_attr "type" "fmul"))
101  5 1)
102
103(define_function_unit "ppro_p0" 1 0
104  (and (eq_attr "cpu" "pentiumpro")
105       (eq_attr "type" "fdiv,fpspc"))
106  56 1)
107
108(define_function_unit "ppro_p01" 2 0
109  (and (eq_attr "cpu" "pentiumpro")
110       (eq_attr "type" "!imov,fmov"))
111  1 1)
112
113(define_function_unit "ppro_p01" 2 0
114  (and (and (eq_attr "cpu" "pentiumpro")
115            (eq_attr "type" "imov,fmov"))
116       (eq_attr "memory" "none"))
117  1 1)
118
119(define_function_unit "ppro_p2" 1 0
120  (and (eq_attr "cpu" "pentiumpro")
121       (ior (eq_attr "type" "pop")
122	    (eq_attr "memory" "load,both")))
123  3 1)
124
125(define_function_unit "ppro_p34" 1 0
126  (and (eq_attr "cpu" "pentiumpro")
127       (ior (eq_attr "type" "push")
128	    (eq_attr "memory" "store,both")))
129  1 1)
130
131(define_function_unit "fpu" 1 0
132  (and (eq_attr "cpu" "pentiumpro")
133       (eq_attr "type" "fop,fsgn,fmov,fcmp,fcmov,fistp"))
134  1 1)
135
136(define_function_unit "fpu" 1 0
137  (and (eq_attr "cpu" "pentiumpro")
138       (eq_attr "type" "fmul"))
139  5 2)
140
141(define_function_unit "fpu" 1 0
142  (and (eq_attr "cpu" "pentiumpro")
143       (eq_attr "type" "fdiv,fpspc"))
144  56 56)
145
146;; imul uses the fpu.  ??? does it have the same throughput as fmul?
147(define_function_unit "fpu" 1 0
148  (and (eq_attr "cpu" "pentiumpro")
149       (eq_attr "type" "imul"))
150  4 1)
151