k6.md revision 132718
1239268Sgonzo;; AMD K6/K6-2 Scheduling
2239268Sgonzo;; Copyright (C) 2002 ;; Free Software Foundation, Inc.
3239268Sgonzo;;
4239268Sgonzo;; This file is part of GCC.
5239268Sgonzo;;
6239268Sgonzo;; GCC is free software; you can redistribute it and/or modify
7239268Sgonzo;; it under the terms of the GNU General Public License as published by
8239268Sgonzo;; the Free Software Foundation; either version 2, or (at your option)
9239268Sgonzo;; any later version.
10239268Sgonzo;;
11239268Sgonzo;; GCC is distributed in the hope that it will be useful,
12239268Sgonzo;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13239268Sgonzo;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14239268Sgonzo;; GNU General Public License for more details.
15239268Sgonzo;;
16239268Sgonzo;; You should have received a copy of the GNU General Public License
17239268Sgonzo;; along with GCC; see the file COPYING.  If not, write to
18239268Sgonzo;; the Free Software Foundation, 59 Temple Place - Suite 330,
19239268Sgonzo;; Boston, MA 02111-1307, USA.  */
20239268Sgonzo;;
21239268Sgonzo;; The K6 has similar architecture to PPro.  Important difference is, that
22239268Sgonzo;; there are only two decoders and they seems to be much slower than execution
23239268Sgonzo;; units.  So we have to pay much more attention to proper decoding for
24239268Sgonzo;; schedulers.  We share most of scheduler code for PPro in i386.c
25239268Sgonzo;;
26239268Sgonzo;; The fp unit is not pipelined and do one operation per two cycles including
27239268Sgonzo;; the FXCH.
28239268Sgonzo;;
29239268Sgonzo;; alu	  describes both ALU units (ALU-X and ALU-Y).
30239268Sgonzo;; alux   describes X alu unit
31239268Sgonzo;; fpu    describes FPU unit
32239268Sgonzo;; load   describes load unit.
33239268Sgonzo;; branch describes branch unit.
34239268Sgonzo;; store  describes store unit.  This unit is not modelled completely and only
35239268Sgonzo;;        used to model lea operation.  Otherwise it lie outside of the critical
36239268Sgonzo;;        path.
37239268Sgonzo;;
38239268Sgonzo;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
39239268Sgonzo
40239268Sgonzo;; The decoder specification is in the PPro section above!
41239268Sgonzo
42239268Sgonzo;; Shift instructions and certain arithmetic are issued only to X pipe.
43239268Sgonzo(define_function_unit "k6_alux" 1 0
44239268Sgonzo  (and (eq_attr "cpu" "k6")
45239268Sgonzo       (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld"))
46239268Sgonzo  1 1)
47239268Sgonzo
48239268Sgonzo;; The QI mode arithmetic is issued to X pipe only.
49239268Sgonzo(define_function_unit "k6_alux" 1 0
50239268Sgonzo  (and (eq_attr "cpu" "k6")
51239268Sgonzo       (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec")
52239268Sgonzo	    (eq_attr "mode" "QI")))
53239268Sgonzo  1 1)
54239268Sgonzo
55239268Sgonzo(define_function_unit "k6_alu" 2 0
56239268Sgonzo  (and (eq_attr "cpu" "k6")
57239268Sgonzo       (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea"))
58239268Sgonzo  1 1)
59239268Sgonzo
60239268Sgonzo(define_function_unit "k6_alu" 2 0
61239268Sgonzo  (and (eq_attr "cpu" "k6")
62239268Sgonzo       (and (eq_attr "type" "imov")
63239268Sgonzo       	    (eq_attr "memory" "none")))
64239268Sgonzo  1 1)
65239268Sgonzo
66239268Sgonzo(define_function_unit "k6_branch" 1 0
67239268Sgonzo  (and (eq_attr "cpu" "k6")
68239268Sgonzo       (eq_attr "type" "call,callv,ibr"))
69239268Sgonzo  1 1)
70239268Sgonzo
71239268Sgonzo;; Load unit have two cycle latency, but we take care for it in adjust_cost
72239268Sgonzo(define_function_unit "k6_load" 1 0
73239268Sgonzo  (and (eq_attr "cpu" "k6")
74239268Sgonzo       (ior (eq_attr "type" "pop,leave")
75239268Sgonzo	    (eq_attr "memory" "load,both")))
76239268Sgonzo  1 1)
77239268Sgonzo
78239268Sgonzo(define_function_unit "k6_load" 1 0
79239268Sgonzo  (and (eq_attr "cpu" "k6")
80239268Sgonzo       (and (eq_attr "type" "str")
81239268Sgonzo	    (eq_attr "memory" "load,both")))
82239268Sgonzo  10 10)
83239268Sgonzo
84239268Sgonzo;; Lea have two instructions, so latency is probably 2
85239268Sgonzo(define_function_unit "k6_store" 1 0
86266755Sian  (and (eq_attr "cpu" "k6")
87266755Sian       (eq_attr "type" "lea"))
88266070Sian  2 1)
89266070Sian
90266070Sian(define_function_unit "k6_store" 1 0
91266070Sian  (and (eq_attr "cpu" "k6")
92266070Sian       (eq_attr "type" "str"))
93266070Sian  10 10)
94266070Sian
95266070Sian(define_function_unit "k6_store" 1 0
96266070Sian  (and (eq_attr "cpu" "k6")
97239268Sgonzo       (ior (eq_attr "type" "push")
98239268Sgonzo	    (eq_attr "memory" "store,both")))
99239268Sgonzo  1 1)
100239268Sgonzo
101239268Sgonzo(define_function_unit "k6_fpu" 1 1
102239268Sgonzo  (and (eq_attr "cpu" "k6")
103239268Sgonzo       (eq_attr "type" "fop,fmov,fcmp,fistp"))
104266070Sian  2 2)
105266070Sian
106266070Sian(define_function_unit "k6_fpu" 1 1
107239268Sgonzo  (and (eq_attr "cpu" "k6")
108239268Sgonzo       (eq_attr "type" "fmul"))
109239268Sgonzo  2 2)
110239268Sgonzo
111239268Sgonzo;; ??? Guess
112239268Sgonzo(define_function_unit "k6_fpu" 1 1
113239268Sgonzo  (and (eq_attr "cpu" "k6")
114239268Sgonzo       (eq_attr "type" "fdiv,fpspc"))
115239268Sgonzo  56 56)
116239268Sgonzo
117239268Sgonzo(define_function_unit "k6_alu" 2 0
118239268Sgonzo  (and (eq_attr "cpu" "k6")
119239268Sgonzo       (eq_attr "type" "imul"))
120239268Sgonzo  2 2)
121239268Sgonzo
122239268Sgonzo(define_function_unit "k6_alux" 1 0
123239268Sgonzo  (and (eq_attr "cpu" "k6")
124239268Sgonzo       (eq_attr "type" "imul"))
125239268Sgonzo  2 2)
126266070Sian
127266070Sian;; ??? Guess
128239268Sgonzo(define_function_unit "k6_alu" 2 0
129239268Sgonzo  (and (eq_attr "cpu" "k6")
130239268Sgonzo       (eq_attr "type" "idiv"))
131239268Sgonzo  17 17)
132239268Sgonzo
133252370Sray(define_function_unit "k6_alux" 1 0
134266152Sian  (and (eq_attr "cpu" "k6")
135266152Sian       (eq_attr "type" "idiv"))
136266152Sian  17 17)
137239268Sgonzo