P9InstrResources.td revision 363496
1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLEQVOnes,
68    XXLNAND,
69    XXLNOR,
70    XXLOR,
71    XXLORf,
72    XXLORC,
73    XXLXOR,
74    XXLXORdpz,
75    XXLXORspz,
76    XXLXORz,
77    XXSEL,
78    XSABSQP,
79    XSCPSGNQP,
80    XSIEXPQP,
81    XSNABSQP,
82    XSNEGQP,
83    XSXEXPQP
84)>;
85
86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
87// single slice. However, since it is Restricted, it requires all 3 dispatches
88// (DISP) for that superslice.
89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
90      (instrs
91    (instregex "TABORT(D|W)C(I)?$"),
92    (instregex "MTFSB(0|1)$"),
93    (instregex "MFFSC(D)?RN(I)?$"),
94    (instregex "CMPRB(8)?$"),
95    (instregex "TD(I)?$"),
96    (instregex "TW(I)?$"),
97    (instregex "FCMPU(S|D)$"),
98    (instregex "XSTSTDC(S|D)P$"),
99    FTDIV,
100    FTSQRT,
101    CMPEQB
102)>;
103
104// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
106      (instrs
107    (instregex "XSMAX(C|J)?DP$"),
108    (instregex "XSMIN(C|J)?DP$"),
109    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
110    (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
111    (instregex "POPCNT(D|W)$"),
112    (instregex "CMPB(8)?$"),
113    (instregex "SETB(8)?$"),
114    XSTDIVDP,
115    XSTSQRTDP,
116    XSXSIGDP,
117    XSCVSPDPN,
118    BPERMD
119)>;
120
121// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
123      (instrs
124    (instregex "S(L|R)D$"),
125    (instregex "SRAD(I)?$"),
126    (instregex "EXTSWSLI_32_64$"),
127    (instregex "MFV(S)?RD$"),
128    (instregex "MTV(S)?RD$"),
129    (instregex "MTV(S)?RW(A|Z)$"),
130    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
131    (instregex "CMP(L)?D(I)?$"),
132    (instregex "SUBF(I)?C(8)?(O)?$"),
133    (instregex "ANDI(S)?(8)?(_rec)?$"),
134    (instregex "ADDC(8)?(O)?$"),
135    (instregex "ADDIC(8)?(_rec)?$"),
136    (instregex "ADD(8|4)(O)?(_rec)?$"),
137    (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
138    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
139    (instregex "NEG(8)?(O)?(_rec)?$"),
140    (instregex "POPCNTB$"),
141    (instregex "ADD(I|IS)?(8)?$"),
142    (instregex "LI(S)?(8)?$"),
143    (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
144    (instregex "NAND(8)?(_rec)?$"),
145    (instregex "AND(C)?(8)?(_rec)?$"),
146    (instregex "NOR(8)?(_rec)?$"),
147    (instregex "OR(C)?(8)?(_rec)?$"),
148    (instregex "EQV(8)?(_rec)?$"),
149    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
150    (instregex "ADD(4|8)(TLS)?(_)?$"),
151    (instregex "NEG(8)?(O)?$"),
152    (instregex "ADDI(S)?toc(HA|L)(8)?$"),
153    COPY,
154    MCRF,
155    MCRXRX,
156    XSNABSDP,
157    XSXEXPDP,
158    XSABSDP,
159    XSNEGDP,
160    XSCPSGNDP,
161    MFVSRWZ,
162    MFVRWZ,
163    EXTSWSLI,
164    SRADI_32,
165    RLDIC,
166    RFEBB,
167    LA,
168    TBEGIN,
169    TRECHKPT,
170    NOP,
171    WAIT
172)>;
173
174// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
175// single slice. However, since it is Restricted, it requires all 3 dispatches
176// (DISP) for that superslice.
177def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
178      (instrs
179    (instregex "RLDC(L|R)$"),
180    (instregex "RLWIMI(8)?$"),
181    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
182    (instregex "M(F|T)OCRF(8)?$"),
183    (instregex "CR(6)?(UN)?SET$"),
184    (instregex "CR(N)?(OR|AND)(C)?$"),
185    (instregex "S(L|R)W(8)?$"),
186    (instregex "RLW(INM|NM)(8)?$"),
187    (instregex "F(N)?ABS(D|S)$"),
188    (instregex "FNEG(D|S)$"),
189    (instregex "FCPSGN(D|S)$"),
190    (instregex "SRAW(I)?$"),
191    (instregex "ISEL(8)?$"),
192    RLDIMI,
193    XSIEXPDP,
194    FMR,
195    CREQV,
196    CRXOR,
197    TRECLAIM,
198    TSR,
199    TABORT
200)>;
201
202// Three cycle ALU vector operation that uses an entire superslice.
203// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
204// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
205def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
206      (instrs
207    (instregex "M(T|F)VSCR$"),
208    (instregex "VCMPNEZ(B|H|W)$"),
209    (instregex "VCMPEQU(B|H|W|D)$"),
210    (instregex "VCMPNE(B|H|W)$"),
211    (instregex "VABSDU(B|H|W)$"),
212    (instregex "VADDU(B|H|W)S$"),
213    (instregex "VAVG(S|U)(B|H|W)$"),
214    (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
215    (instregex "VCMPBFP(_rec)?$"),
216    (instregex "VC(L|T)Z(B|H|W|D)$"),
217    (instregex "VADDS(B|H|W)S$"),
218    (instregex "V(MIN|MAX)FP$"),
219    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
220    VBPERMD,
221    VADDCUW,
222    VPOPCNTW,
223    VPOPCNTD,
224    VPRTYBD,
225    VPRTYBW,
226    VSHASIGMAD,
227    VSHASIGMAW,
228    VSUBSBS,
229    VSUBSHS,
230    VSUBSWS,
231    VSUBUBS,
232    VSUBUHS,
233    VSUBUWS,
234    VSUBCUW,
235    VCMPGTSB,
236    VCMPGTSB_rec,
237    VCMPGTSD,
238    VCMPGTSD_rec,
239    VCMPGTSH,
240    VCMPGTSH_rec,
241    VCMPGTSW,
242    VCMPGTSW_rec,
243    VCMPGTUB,
244    VCMPGTUB_rec,
245    VCMPGTUD,
246    VCMPGTUD_rec,
247    VCMPGTUH,
248    VCMPGTUH_rec,
249    VCMPGTUW,
250    VCMPGTUW_rec,
251    VCMPNEB_rec,
252    VCMPNEH_rec,
253    VCMPNEW_rec,
254    VCMPNEZB_rec,
255    VCMPNEZH_rec,
256    VCMPNEZW_rec,
257    VCMPEQUB_rec,
258    VCMPEQUD_rec,
259    VCMPEQUH_rec,
260    VCMPEQUW_rec,
261    XVCMPEQDP,
262    XVCMPEQDP_rec,
263    XVCMPEQSP,
264    XVCMPEQSP_rec,
265    XVCMPGEDP,
266    XVCMPGEDP_rec,
267    XVCMPGESP,
268    XVCMPGESP_rec,
269    XVCMPGTDP,
270    XVCMPGTDP_rec,
271    XVCMPGTSP,
272    XVCMPGTSP_rec,
273    XVMAXDP,
274    XVMAXSP,
275    XVMINDP,
276    XVMINSP,
277    XVTDIVDP,
278    XVTDIVSP,
279    XVTSQRTDP,
280    XVTSQRTSP,
281    XVTSTDCDP,
282    XVTSTDCSP,
283    XVXSIGDP,
284    XVXSIGSP
285)>;
286
287// 7 cycle DP vector operation that uses an entire superslice.
288// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
289// EXECO) and all three dispatches (DISP) to the given superslice.
290def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
291      (instrs
292    VADDFP,
293    VCTSXS,
294    VCTSXS_0,
295    VCTUXS,
296    VCTUXS_0,
297    VEXPTEFP,
298    VLOGEFP,
299    VMADDFP,
300    VMHADDSHS,
301    VNMSUBFP,
302    VREFP,
303    VRFIM,
304    VRFIN,
305    VRFIP,
306    VRFIZ,
307    VRSQRTEFP,
308    VSUBFP,
309    XVADDDP,
310    XVADDSP,
311    XVCVDPSP,
312    XVCVDPSXDS,
313    XVCVDPSXWS,
314    XVCVDPUXDS,
315    XVCVDPUXWS,
316    XVCVHPSP,
317    XVCVSPDP,
318    XVCVSPHP,
319    XVCVSPSXDS,
320    XVCVSPSXWS,
321    XVCVSPUXDS,
322    XVCVSPUXWS,
323    XVCVSXDDP,
324    XVCVSXDSP,
325    XVCVSXWDP,
326    XVCVSXWSP,
327    XVCVUXDDP,
328    XVCVUXDSP,
329    XVCVUXWDP,
330    XVCVUXWSP,
331    XVMADDADP,
332    XVMADDASP,
333    XVMADDMDP,
334    XVMADDMSP,
335    XVMSUBADP,
336    XVMSUBASP,
337    XVMSUBMDP,
338    XVMSUBMSP,
339    XVMULDP,
340    XVMULSP,
341    XVNMADDADP,
342    XVNMADDASP,
343    XVNMADDMDP,
344    XVNMADDMSP,
345    XVNMSUBADP,
346    XVNMSUBASP,
347    XVNMSUBMDP,
348    XVNMSUBMSP,
349    XVRDPI,
350    XVRDPIC,
351    XVRDPIM,
352    XVRDPIP,
353    XVRDPIZ,
354    XVREDP,
355    XVRESP,
356    XVRSPI,
357    XVRSPIC,
358    XVRSPIM,
359    XVRSPIP,
360    XVRSPIZ,
361    XVRSQRTEDP,
362    XVRSQRTESP,
363    XVSUBDP,
364    XVSUBSP,
365    VCFSX,
366    VCFSX_0,
367    VCFUX,
368    VCFUX_0,
369    VMHRADDSHS,
370    VMLADDUHM,
371    VMSUMMBM,
372    VMSUMSHM,
373    VMSUMSHS,
374    VMSUMUBM,
375    VMSUMUHM,
376    VMSUMUDM,
377    VMSUMUHS,
378    VMULESB,
379    VMULESH,
380    VMULESW,
381    VMULEUB,
382    VMULEUH,
383    VMULEUW,
384    VMULOSB,
385    VMULOSH,
386    VMULOSW,
387    VMULOUB,
388    VMULOUH,
389    VMULOUW,
390    VMULUWM,
391    VSUM2SWS,
392    VSUM4SBS,
393    VSUM4SHS,
394    VSUM4UBS,
395    VSUMSWS
396)>;
397
398// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
399// dispatch units for the superslice.
400def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
401      (instrs
402    (instregex "MADD(HD|HDU|LD|LD8)$"),
403    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
404)>;
405
406// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
407// dispatch units for the superslice.
408def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
409      (instrs
410    FRSP,
411    (instregex "FRI(N|P|Z|M)(D|S)$"),
412    (instregex "FRE(S)?$"),
413    (instregex "FADD(S)?$"),
414    (instregex "FMSUB(S)?$"),
415    (instregex "FMADD(S)?$"),
416    (instregex "FSUB(S)?$"),
417    (instregex "FCFID(U)?(S)?$"),
418    (instregex "FCTID(U)?(Z)?$"),
419    (instregex "FCTIW(U)?(Z)?$"),
420    (instregex "FRSQRTE(S)?$"),
421    FNMADDS,
422    FNMADD,
423    FNMSUBS,
424    FNMSUB,
425    FSELD,
426    FSELS,
427    FMULS,
428    FMUL,
429    XSMADDADP,
430    XSMADDASP,
431    XSMADDMDP,
432    XSMADDMSP,
433    XSMSUBADP,
434    XSMSUBASP,
435    XSMSUBMDP,
436    XSMSUBMSP,
437    XSMULDP,
438    XSMULSP,
439    XSNMADDADP,
440    XSNMADDASP,
441    XSNMADDMDP,
442    XSNMADDMSP,
443    XSNMSUBADP,
444    XSNMSUBASP,
445    XSNMSUBMDP,
446    XSNMSUBMSP
447)>;
448
449// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
450// These operations can be done in parallel. The DP is restricted so we need a
451// full 4 dispatches.
452def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
453              DISP_3SLOTS_1C, DISP_1C],
454      (instrs
455    (instregex "FSEL(D|S)_rec$")
456)>;
457
458// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
459def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
460              DISP_3SLOTS_1C, DISP_1C],
461      (instrs
462    (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
463)>;
464
465// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
466// These operations must be done sequentially.The DP is restricted so we need a
467// full 4 dispatches.
468def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
469              DISP_3SLOTS_1C, DISP_1C],
470      (instrs
471    (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
472    (instregex "FRE(S)?_rec$"),
473    (instregex "FADD(S)?_rec$"),
474    (instregex "FSUB(S)?_rec$"),
475    (instregex "F(N)?MSUB(S)?_rec$"),
476    (instregex "F(N)?MADD(S)?_rec$"),
477    (instregex "FCFID(U)?(S)?_rec$"),
478    (instregex "FCTID(U)?(Z)?_rec$"),
479    (instregex "FCTIW(U)?(Z)?_rec$"),
480    (instregex "FMUL(S)?_rec$"),
481    (instregex "FRSQRTE(S)?_rec$"),
482    FRSP_rec
483)>;
484
485// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
486def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
487      (instrs
488    XSADDDP,
489    XSADDSP,
490    XSCVDPHP,
491    XSCVDPSP,
492    XSCVDPSXDS,
493    XSCVDPSXDSs,
494    XSCVDPSXWS,
495    XSCVDPUXDS,
496    XSCVDPUXDSs,
497    XSCVDPUXWS,
498    XSCVDPSXWSs,
499    XSCVDPUXWSs,
500    XSCVHPDP,
501    XSCVSPDP,
502    XSCVSXDDP,
503    XSCVSXDSP,
504    XSCVUXDDP,
505    XSCVUXDSP,
506    XSRDPI,
507    XSRDPIC,
508    XSRDPIM,
509    XSRDPIP,
510    XSRDPIZ,
511    XSREDP,
512    XSRESP,
513    XSRSQRTEDP,
514    XSRSQRTESP,
515    XSSUBDP,
516    XSSUBSP,
517    XSCVDPSPN,
518    XSRSP
519)>;
520
521// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
522// superslice. That includes both exec pipelines (EXECO, EXECE) and one
523// dispatch.
524def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
525      (instrs
526    (instregex "LVS(L|R)$"),
527    (instregex "VSPLTIS(W|H|B)$"),
528    (instregex "VSPLT(W|H|B)(s)?$"),
529    (instregex "V_SETALLONES(B|H)?$"),
530    (instregex "VEXTRACTU(B|H|W)$"),
531    (instregex "VINSERT(B|H|W|D)$"),
532    MFVSRLD,
533    MTVSRWS,
534    VBPERMQ,
535    VCLZLSBB,
536    VCTZLSBB,
537    VEXTRACTD,
538    VEXTUBLX,
539    VEXTUBRX,
540    VEXTUHLX,
541    VEXTUHRX,
542    VEXTUWLX,
543    VEXTUWRX,
544    VGBBD,
545    VMRGHB,
546    VMRGHH,
547    VMRGHW,
548    VMRGLB,
549    VMRGLH,
550    VMRGLW,
551    VPERM,
552    VPERMR,
553    VPERMXOR,
554    VPKPX,
555    VPKSDSS,
556    VPKSDUS,
557    VPKSHSS,
558    VPKSHUS,
559    VPKSWSS,
560    VPKSWUS,
561    VPKUDUM,
562    VPKUDUS,
563    VPKUHUM,
564    VPKUHUS,
565    VPKUWUM,
566    VPKUWUS,
567    VPRTYBQ,
568    VSL,
569    VSLDOI,
570    VSLO,
571    VSLV,
572    VSR,
573    VSRO,
574    VSRV,
575    VUPKHPX,
576    VUPKHSB,
577    VUPKHSH,
578    VUPKHSW,
579    VUPKLPX,
580    VUPKLSB,
581    VUPKLSH,
582    VUPKLSW,
583    XXBRD,
584    XXBRH,
585    XXBRQ,
586    XXBRW,
587    XXEXTRACTUW,
588    XXINSERTW,
589    XXMRGHW,
590    XXMRGLW,
591    XXPERM,
592    XXPERMR,
593    XXSLDWI,
594    XXSLDWIs,
595    XXSPLTIB,
596    XXSPLTW,
597    XXSPLTWs,
598    XXPERMDI,
599    XXPERMDIs,
600    VADDCUQ,
601    VADDECUQ,
602    VADDEUQM,
603    VADDUQM,
604    VMUL10CUQ,
605    VMUL10ECUQ,
606    VMUL10EUQ,
607    VMUL10UQ,
608    VSUBCUQ,
609    VSUBECUQ,
610    VSUBEUQM,
611    VSUBUQM,
612    XSCMPEXPQP,
613    XSCMPOQP,
614    XSCMPUQP,
615    XSTSTDCQP,
616    XSXSIGQP,
617    BCDCFN_rec,
618    BCDCFZ_rec,
619    BCDCPSGN_rec,
620    BCDCTN_rec,
621    BCDCTZ_rec,
622    BCDSETSGN_rec,
623    BCDS_rec,
624    BCDTRUNC_rec,
625    BCDUS_rec,
626    BCDUTRUNC_rec
627)>;
628
629// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
630// superslice. That includes both exec pipelines (EXECO, EXECE) and one
631// dispatch.
632def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
633      (instrs
634    BCDSR_rec,
635    XSADDQP,
636    XSADDQPO,
637    XSCVDPQP,
638    XSCVQPDP,
639    XSCVQPDPO,
640    XSCVQPSDZ,
641    XSCVQPSWZ,
642    XSCVQPUDZ,
643    XSCVQPUWZ,
644    XSCVSDQP,
645    XSCVUDQP,
646    XSRQPI,
647    XSRQPIX,
648    XSRQPXP,
649    XSSUBQP,
650    XSSUBQPO
651)>;
652
653// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
654// superslice. That includes both exec pipelines (EXECO, EXECE) and one
655// dispatch.
656def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
657      (instrs
658    BCDCTSQ_rec
659)>;
660
661// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
662// superslice. That includes both exec pipelines (EXECO, EXECE) and one
663// dispatch.
664def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
665      (instrs
666    XSMADDQP,
667    XSMADDQPO,
668    XSMSUBQP,
669    XSMSUBQPO,
670    XSMULQP,
671    XSMULQPO,
672    XSNMADDQP,
673    XSNMADDQPO,
674    XSNMSUBQP,
675    XSNMSUBQPO
676)>;
677
678// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
679// superslice. That includes both exec pipelines (EXECO, EXECE) and one
680// dispatch.
681def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
682      (instrs
683    BCDCFSQ_rec
684)>;
685
686// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
687// superslice. That includes both exec pipelines (EXECO, EXECE) and one
688// dispatch.
689def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
690      (instrs
691    XSDIVQP,
692    XSDIVQPO
693)>;
694
695// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
696// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
697// dispatches.
698def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
699      (instrs
700    XSSQRTQP,
701    XSSQRTQPO
702)>;
703
704// 6 Cycle Load uses a single slice.
705def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
706      (instrs
707    (instregex "LXVL(L)?")
708)>;
709
710// 5 Cycle Load uses a single slice.
711def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
712      (instrs
713    (instregex "LVE(B|H|W)X$"),
714    (instregex "LVX(L)?"),
715    (instregex "LXSI(B|H)ZX$"),
716    LXSDX,
717    LXVB16X,
718    LXVD2X,
719    LXVWSX,
720    LXSIWZX,
721    LXV,
722    LXVX,
723    LXSD,
724    DFLOADf64,
725    XFLOADf64,
726    LIWZX
727)>;
728
729// 4 Cycle Load uses a single slice.
730def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
731      (instrs
732    (instregex "DCB(F|T|ST)(EP)?$"),
733    (instregex "DCBZ(L)?(EP)?$"),
734    (instregex "DCBTST(EP)?$"),
735    (instregex "CP_COPY(8)?$"),
736    (instregex "CP_PASTE(8)?$"),
737    (instregex "ICBI(EP)?$"),
738    (instregex "ICBT(LS)?$"),
739    (instregex "LBARX(L)?$"),
740    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
741    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
742    (instregex "LH(A|B)RX(L)?(8)?$"),
743    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
744    (instregex "LWARX(L)?$"),
745    (instregex "LWBRX(8)?$"),
746    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
747    CP_ABORT,
748    DARN,
749    EnforceIEIO,
750    ISYNC,
751    MSGSYNC,
752    TLBSYNC,
753    SYNC,
754    LMW,
755    LSWI
756)>;
757
758// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
759// superslice.
760def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
761      (instrs
762    LFIWZX,
763    LFDX,
764    LFD
765)>;
766
767// Cracked Load Instructions.
768// Load instructions that can be done in parallel.
769def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
770              DISP_PAIR_1C],
771      (instrs
772    SLBIA,
773    SLBIE,
774    SLBMFEE,
775    SLBMFEV,
776    SLBMTE,
777    TLBIEL
778)>;
779
780// Cracked Load Instruction.
781// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
782// operations can be run in parallel.
783def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
784              DISP_PAIR_1C, DISP_PAIR_1C],
785      (instrs
786    (instregex "L(W|H)ZU(X)?(8)?$")
787)>;
788
789// Cracked TEND Instruction.
790// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
791// operations can be run in parallel.
792def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
793              DISP_1C, DISP_1C],
794      (instrs
795    TEND
796)>;
797
798
799// Cracked Store Instruction
800// Consecutive Store and ALU instructions. The store is restricted and requires
801// three dispatches.
802def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
803              DISP_3SLOTS_1C, DISP_1C],
804      (instrs
805    (instregex "ST(B|H|W|D)CX$")
806)>;
807
808// Cracked Load Instruction.
809// Two consecutive load operations for a total of 8 cycles.
810def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
811              DISP_1C, DISP_1C],
812      (instrs
813    LDMX
814)>;
815
816// Cracked Load instruction.
817// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
818// operations cannot be done at the same time and so their latencies are added.
819def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
820              DISP_1C, DISP_1C],
821      (instrs
822    (instregex "LHA(X)?(8)?$"),
823    (instregex "CP_PASTE(8)?_rec$"),
824    (instregex "LWA(X)?(_32)?$"),
825    TCHECK
826)>;
827
828// Cracked Restricted Load instruction.
829// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
830// operations cannot be done at the same time and so their latencies are added.
831// Full 6 dispatches are required as this is both cracked and restricted.
832def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
833              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
834      (instrs
835    LFIWAX
836)>;
837
838// Cracked Load instruction.
839// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
840// operations cannot be done at the same time and so their latencies are added.
841// Full 4 dispatches are required as this is a cracked instruction.
842def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
843      (instrs
844    LXSIWAX,
845    LIWAX
846)>;
847
848// Cracked Load instruction.
849// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
850// cycles. The Load and ALU operations cannot be done at the same time and so
851// their latencies are added.
852// Full 6 dispatches are required as this is a restricted instruction.
853def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
854              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
855      (instrs
856    LFSX,
857    LFS
858)>;
859
860// Cracked Load instruction.
861// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
862// operations cannot be done at the same time and so their latencies are added.
863// Full 4 dispatches are required as this is a cracked instruction.
864def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
865      (instrs
866    LXSSP,
867    LXSSPX,
868    XFLOADf32,
869    DFLOADf32
870)>;
871
872// Cracked 3-Way Load Instruction
873// Load with two ALU operations that depend on each other
874def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
875              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
876      (instrs
877    (instregex "LHAU(X)?(8)?$"),
878    LWAUX
879)>;
880
881// Cracked Load that requires the PM resource.
882// Since the Load and the PM cannot be done at the same time the latencies are
883// added. Requires 8 cycles. Since the PM requires the full superslice we need
884// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
885// requires the remaining 1 dispatch.
886def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
887              DISP_1C, DISP_1C],
888      (instrs
889    LXVH8X,
890    LXVDSX,
891    LXVW4X
892)>;
893
894// Single slice Restricted store operation. The restricted operation requires
895// all three dispatches for the superslice.
896def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
897      (instrs
898    (instregex "STF(S|D|IWX|SX|DX)$"),
899    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
900    (instregex "STW(8)?$"),
901    (instregex "(D|X)FSTORE(f32|f64)$"),
902    (instregex "ST(W|H|D)BRX$"),
903    (instregex "ST(B|H|D)(8)?$"),
904    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
905    STIWX,
906    SLBIEG,
907    STMW,
908    STSWI,
909    TLBIE
910)>;
911
912// Vector Store Instruction
913// Requires the whole superslice and therefore requires one dispatch
914// as well as both the Even and Odd exec pipelines.
915def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
916      (instrs
917    (instregex "STVE(B|H|W)X$"),
918    (instregex "STVX(L)?$"),
919    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
920)>;
921
922// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
923// superslice. That includes both exec pipelines (EXECO, EXECE) and two
924// dispatches.
925def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
926      (instrs
927    (instregex "MTCTR(8)?(loop)?$"),
928    (instregex "MTLR(8)?$")
929)>;
930
931// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
932// superslice. That includes both exec pipelines (EXECO, EXECE) and two
933// dispatches.
934def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
935      (instrs
936    (instregex "M(T|F)VRSAVE(v)?$"),
937    (instregex "M(T|F)PMR$"),
938    (instregex "M(T|F)TB(8)?$"),
939    (instregex "MF(SPR|CTR|LR)(8)?$"),
940    (instregex "M(T|F)MSR(D)?$"),
941    (instregex "MTSPR(8)?$")
942)>;
943
944// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
945// superslice. That includes both exec pipelines (EXECO, EXECE) and two
946// dispatches.
947def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
948      (instrs
949    DIVW,
950    DIVWO,
951    DIVWU,
952    DIVWUO,
953    MODSW
954)>;
955
956// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
957// superslice. That includes both exec pipelines (EXECO, EXECE) and two
958// dispatches.
959def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
960      (instrs
961    DIVWE,
962    DIVWEO,
963    DIVD,
964    DIVDO,
965    DIVWEU,
966    DIVWEUO,
967    DIVDU,
968    DIVDUO,
969    MODSD,
970    MODUD,
971    MODUW
972)>;
973
974// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
975// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
976// dispatches.
977def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
978      (instrs
979    DIVDE,
980    DIVDEO,
981    DIVDEU,
982    DIVDEUO
983)>;
984
985// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
986// and one full superslice for the DIV operation since there is only one DIV per
987// superslice. Latency of DIV plus ALU is 26.
988def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
989              DISP_EVEN_1C, DISP_1C],
990      (instrs
991    (instregex "DIVW(U)?(O)?_rec$")
992)>;
993
994// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
995// and one full superslice for the DIV operation since there is only one DIV per
996// superslice. Latency of DIV plus ALU is 26.
997def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
998              DISP_EVEN_1C, DISP_1C],
999      (instrs
1000    DIVD_rec,
1001    DIVDO_rec,
1002    DIVDU_rec,
1003    DIVDUO_rec,
1004    DIVWE_rec,
1005    DIVWEO_rec,
1006    DIVWEU_rec,
1007    DIVWEUO_rec
1008)>;
1009
1010// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
1011// and one full superslice for the DIV operation since there is only one DIV per
1012// superslice. Latency of DIV plus ALU is 42.
1013def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1014              DISP_EVEN_1C, DISP_1C],
1015      (instrs
1016    DIVDE_rec,
1017    DIVDEO_rec,
1018    DIVDEU_rec,
1019    DIVDEUO_rec
1020)>;
1021
1022// CR access instructions in _BrMCR, IIC_BrMCRX.
1023
1024// Cracked, restricted, ALU operations.
1025// Here the two ALU ops can actually be done in parallel and therefore the
1026// latencies are not added together. Otherwise this is like having two
1027// instructions running together on two pipelines and 6 dispatches. ALU ops are
1028// 2 cycles each.
1029def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1030              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1031      (instrs
1032    MTCRF,
1033    MTCRF8
1034)>;
1035
1036// Cracked ALU operations.
1037// Here the two ALU ops can actually be done in parallel and therefore the
1038// latencies are not added together. Otherwise this is like having two
1039// instructions running together on two pipelines and 2 dispatches. ALU ops are
1040// 2 cycles each.
1041def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1042              DISP_1C, DISP_1C],
1043      (instrs
1044    (instregex "ADDC(8)?(O)?_rec$"),
1045    (instregex "SUBFC(8)?(O)?_rec$")
1046)>;
1047
1048// Cracked ALU operations.
1049// Two ALU ops can be done in parallel.
1050// One is three cycle ALU the ohter is a two cycle ALU.
1051// One of the ALU ops is restricted the other is not so we have a total of
1052// 5 dispatches.
1053def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1054              DISP_3SLOTS_1C, DISP_1C],
1055      (instrs
1056    (instregex "F(N)?ABS(D|S)_rec$"),
1057    (instregex "FCPSGN(D|S)_rec$"),
1058    (instregex "FNEG(D|S)_rec$"),
1059    FMR_rec
1060)>;
1061
1062// Cracked ALU operations.
1063// Here the two ALU ops can actually be done in parallel and therefore the
1064// latencies are not added together. Otherwise this is like having two
1065// instructions running together on two pipelines and 2 dispatches.
1066// ALU ops are 3 cycles each.
1067def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1068              DISP_1C, DISP_1C],
1069      (instrs
1070    MCRFS
1071)>;
1072
1073// Cracked Restricted ALU operations.
1074// Here the two ALU ops can actually be done in parallel and therefore the
1075// latencies are not added together. Otherwise this is like having two
1076// instructions running together on two pipelines and 6 dispatches.
1077// ALU ops are 3 cycles each.
1078def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1079              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1080      (instrs
1081    (instregex "MTFSF(b|_rec)?$"),
1082    (instregex "MTFSFI(_rec)?$")
1083)>;
1084
1085// Cracked instruction made of two ALU ops.
1086// The two ops cannot be done in parallel.
1087// One of the ALU ops is restricted and takes 3 dispatches.
1088def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1089              DISP_3SLOTS_1C, DISP_1C],
1090      (instrs
1091    (instregex "RLD(I)?C(R|L)_rec$"),
1092    (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
1093    (instregex "SLW(8)?_rec$"),
1094    (instregex "SRAW(I)?_rec$"),
1095    (instregex "SRW(8)?_rec$"),
1096    RLDICL_32_rec,
1097    RLDIMI_rec
1098)>;
1099
1100// Cracked instruction made of two ALU ops.
1101// The two ops cannot be done in parallel.
1102// Both of the ALU ops are restricted and take 3 dispatches.
1103def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1104              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1105      (instrs
1106    (instregex "MFFS(L|CE|_rec)?$")
1107)>;
1108
1109// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1110// total of 6 cycles. All of the ALU operations are also restricted so each
1111// takes 3 dispatches for a total of 9.
1112def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1113              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1114      (instrs
1115    (instregex "MFCR(8)?$")
1116)>;
1117
1118// Cracked instruction made of two ALU ops.
1119// The two ops cannot be done in parallel.
1120def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1121      (instrs
1122    (instregex "EXTSWSLI_32_64_rec$"),
1123    (instregex "SRAD(I)?_rec$"),
1124    EXTSWSLI_rec,
1125    SLD_rec,
1126    SRD_rec,
1127    RLDIC_rec
1128)>;
1129
1130// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1131def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1132      (instrs
1133    FDIV
1134)>;
1135
1136// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1137def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1138              DISP_3SLOTS_1C, DISP_1C],
1139      (instrs
1140    FDIV_rec
1141)>;
1142
1143// 36 Cycle DP Instruction.
1144// Instruction can be done on a single slice.
1145def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1146      (instrs
1147    XSSQRTDP
1148)>;
1149
1150// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1151def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1152      (instrs
1153    FSQRT
1154)>;
1155
1156// 36 Cycle DP Vector Instruction.
1157def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1158              DISP_1C],
1159      (instrs
1160    XVSQRTDP
1161)>;
1162
1163// 27 Cycle DP Vector Instruction.
1164def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1165              DISP_1C],
1166      (instrs
1167    XVSQRTSP
1168)>;
1169
1170// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1171def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1172              DISP_3SLOTS_1C, DISP_1C],
1173      (instrs
1174    FSQRT_rec
1175)>;
1176
1177// 26 Cycle DP Instruction.
1178def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1179      (instrs
1180    XSSQRTSP
1181)>;
1182
1183// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1184def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1185      (instrs
1186    FSQRTS
1187)>;
1188
1189// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1190def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1191              DISP_3SLOTS_1C, DISP_1C],
1192      (instrs
1193    FSQRTS_rec
1194)>;
1195
1196// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1197def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1198      (instrs
1199    XSDIVDP
1200)>;
1201
1202// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1203def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1204      (instrs
1205    FDIVS
1206)>;
1207
1208// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1209def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1210              DISP_3SLOTS_1C, DISP_1C],
1211      (instrs
1212    FDIVS_rec
1213)>;
1214
1215// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1216def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1217      (instrs
1218    XSDIVSP
1219)>;
1220
1221// 24 Cycle DP Vector Instruction. Takes one full superslice.
1222// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1223// superslice.
1224def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1225              DISP_1C],
1226      (instrs
1227    XVDIVSP
1228)>;
1229
1230// 33 Cycle DP Vector Instruction. Takes one full superslice.
1231// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1232// superslice.
1233def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1234              DISP_1C],
1235      (instrs
1236    XVDIVDP
1237)>;
1238
1239// Instruction cracked into three pieces. One Load and two ALU operations.
1240// The Load and one of the ALU ops cannot be run at the same time and so the
1241// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1242// Both the load and the ALU that depends on it are restricted and so they take
1243// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1244// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1245def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1246              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1247              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1248      (instrs
1249    (instregex "LF(SU|SUX)$")
1250)>;
1251
1252// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1253// the store and so it can be run at the same time as the store. The store is
1254// also restricted.
1255def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1256              DISP_3SLOTS_1C, DISP_1C],
1257      (instrs
1258    (instregex "STF(S|D)U(X)?$"),
1259    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1260)>;
1261
1262// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1263// the load and so it can be run at the same time as the load.
1264def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1265              DISP_PAIR_1C, DISP_PAIR_1C],
1266      (instrs
1267    (instregex "LBZU(X)?(8)?$"),
1268    (instregex "LDU(X)?$")
1269)>;
1270
1271// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1272// the load and so it can be run at the same time as the load. The load is also
1273// restricted. 3 dispatches are from the restricted load while the other two
1274// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1275// is required for the ALU.
1276def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1277              DISP_3SLOTS_1C, DISP_1C],
1278      (instrs
1279    (instregex "LF(DU|DUX)$")
1280)>;
1281
1282// Crypto Instructions
1283
1284// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1285// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1286// dispatch.
1287def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1288      (instrs
1289    (instregex "VPMSUM(B|H|W|D)$"),
1290    (instregex "V(N)?CIPHER(LAST)?$"),
1291    VSBOX
1292)>;
1293
1294// Branch Instructions
1295
1296// Two Cycle Branch
1297def : InstRW<[P9_BR_2C, DISP_BR_1C],
1298      (instrs
1299  (instregex "BCCCTR(L)?(8)?$"),
1300  (instregex "BCCL(A|R|RL)?$"),
1301  (instregex "BCCTR(L)?(8)?(n)?$"),
1302  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1303  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1304  (instregex "BL(_TLS|_NOP)?$"),
1305  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
1306  (instregex "BLA(8|8_NOP)?$"),
1307  (instregex "BLR(8|L)?$"),
1308  (instregex "TAILB(A)?(8)?$"),
1309  (instregex "TAILBCTR(8)?$"),
1310  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1311  (instregex "BCLR(L)?(n)?$"),
1312  (instregex "BCTR(L)?(8)?$"),
1313  B,
1314  BA,
1315  BC,
1316  BCC,
1317  BCCA,
1318  BCL,
1319  BCLalways,
1320  BCLn,
1321  BCTRL8_LDinto_toc,
1322  BCTRL_LWZinto_toc,
1323  BCn,
1324  CTRL_DEP
1325)>;
1326
1327// Five Cycle Branch with a 2 Cycle ALU Op
1328// Operations must be done consecutively and not in parallel.
1329def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1330      (instrs
1331    ADDPCIS
1332)>;
1333
1334// Special Extracted Instructions For Atomics
1335
1336// Atomic Load
1337def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1338              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1339              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 
1340              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1341      (instrs
1342    (instregex "L(D|W)AT$")
1343)>;
1344
1345// Atomic Store
1346def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1347              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1348      (instrs
1349    (instregex "ST(D|W)AT$")
1350)>;
1351
1352// Signal Processing Engine (SPE) Instructions
1353// These instructions are not supported on Power 9
1354def : InstRW<[],
1355    (instrs
1356  BRINC,
1357  EVABS,
1358  EVEQV,
1359  EVMRA,
1360  EVNAND,
1361  EVNEG,
1362  (instregex "EVADD(I)?W$"),
1363  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1364  (instregex "EVAND(C)?$"),
1365  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1366  (instregex "EVCNTL(S|Z)W$"),
1367  (instregex "EVDIVW(S|U)$"),
1368  (instregex "EVEXTS(B|H)$"),
1369  (instregex "EVLD(H|W|D)(X)?$"),
1370  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1371  (instregex "EVLWHE(X)?$"),
1372  (instregex "EVLWHO(S|U)(X)?$"),
1373  (instregex "EVLW(H|W)SPLAT(X)?$"),
1374  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1375  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1376  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1377  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1378  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1379  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1380  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1381  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1382  (instregex "EVMWHUMI(A)?$"),
1383  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1384  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1385  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1386  (instregex "EVMWSSF(A|AA|AN)?$"),
1387  (instregex "EVMWUMI(A|AA|AN)?$"),
1388  (instregex "EV(N|X)?OR(C)?$"),
1389  (instregex "EVR(LW|LWI|NDW)$"),
1390  (instregex "EVSLW(I)?$"),
1391  (instregex "EVSPLAT(F)?I$"),
1392  (instregex "EVSRW(I)?(S|U)$"),
1393  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1394  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1395  (instregex "EVSUB(I)?FW$")
1396)> { let Unsupported = 1; }
1397
1398// General Instructions without scheduling support.
1399def : InstRW<[],
1400    (instrs
1401  (instregex "(H)?RFI(D)?$"),
1402  (instregex "DSS(ALL)?$"),
1403  (instregex "DST(ST)?(T)?(64)?$"),
1404  (instregex "ICBL(C|Q)$"),
1405  (instregex "L(W|H|B)EPX$"),
1406  (instregex "ST(W|H|B)EPX$"),
1407  (instregex "(L|ST)FDEPX$"),
1408  (instregex "M(T|F)SR(IN)?$"),
1409  (instregex "M(T|F)DCR$"),
1410  (instregex "NOP_GT_PWR(6|7)$"),
1411  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1412  (instregex "WRTEE(I)?$"),
1413  ATTN,
1414  CLRBHRB,
1415  MFBHRBE,
1416  MBAR,
1417  MSYNC,
1418  SLBSYNC,
1419  SLBFEE_rec,
1420  NAP,
1421  STOP,
1422  TRAP,
1423  RFCI,
1424  RFDI,
1425  RFMCI,
1426  SC,
1427  DCBA,
1428  DCBI,
1429  DCCCI,
1430  ICCCI
1431)> { let Unsupported = 1; }
1432