P9InstrResources.td revision 360661
1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLNAND,
68    XXLNOR,
69    XXLOR,
70    XXLORf,
71    XXLORC,
72    XXLXOR,
73    XXLXORdpz,
74    XXLXORspz,
75    XXLXORz,
76    XXSEL,
77    XSABSQP,
78    XSCPSGNQP,
79    XSIEXPQP,
80    XSNABSQP,
81    XSNEGQP,
82    XSXEXPQP
83)>;
84
85// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
86// single slice. However, since it is Restricted, it requires all 3 dispatches
87// (DISP) for that superslice.
88def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
89      (instrs
90    (instregex "TABORT(D|W)C(I)?$"),
91    (instregex "MTFSB(0|1)$"),
92    (instregex "MFFSC(D)?RN(I)?$"),
93    (instregex "CMPRB(8)?$"),
94    (instregex "TD(I)?$"),
95    (instregex "TW(I)?$"),
96    (instregex "FCMPU(S|D)$"),
97    (instregex "XSTSTDC(S|D)P$"),
98    FTDIV,
99    FTSQRT,
100    CMPEQB
101)>;
102
103// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
104def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
105      (instrs
106    (instregex "XSMAX(C|J)?DP$"),
107    (instregex "XSMIN(C|J)?DP$"),
108    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
109    (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
110    (instregex "POPCNT(D|W)$"),
111    (instregex "CMPB(8)?$"),
112    (instregex "SETB(8)?$"),
113    XSTDIVDP,
114    XSTSQRTDP,
115    XSXSIGDP,
116    XSCVSPDPN,
117    BPERMD
118)>;
119
120// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
121def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
122      (instrs
123    (instregex "S(L|R)D$"),
124    (instregex "SRAD(I)?$"),
125    (instregex "EXTSWSLI_32_64$"),
126    (instregex "MFV(S)?RD$"),
127    (instregex "MTVSRD$"),
128    (instregex "MTVSRW(A|Z)$"),
129    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
130    (instregex "CMP(L)?D(I)?$"),
131    (instregex "SUBF(I)?C(8)?(O)?$"),
132    (instregex "ANDI(S)?o(8)?$"),
133    (instregex "ADDC(8)?(O)?$"),
134    (instregex "ADDIC(8)?(o)?$"),
135    (instregex "ADD(8|4)(O)?(o)?$"),
136    (instregex "ADD(E|ME|ZE)(8)?(O)?(o)?$"),
137    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(o)?$"),
138    (instregex "NEG(8)?(O)?(o)?$"),
139    (instregex "POPCNTB$"),
140    (instregex "ADD(I|IS)?(8)?$"),
141    (instregex "LI(S)?(8)?$"),
142    (instregex "(X)?OR(I|IS)?(8)?(o)?$"),
143    (instregex "NAND(8)?(o)?$"),
144    (instregex "AND(C)?(8)?(o)?$"),
145    (instregex "NOR(8)?(o)?$"),
146    (instregex "OR(C)?(8)?(o)?$"),
147    (instregex "EQV(8)?(o)?$"),
148    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
149    (instregex "ADD(4|8)(TLS)?(_)?$"),
150    (instregex "NEG(8)?(O)?$"),
151    (instregex "ADDI(S)?toc(HA|L)$"),
152    COPY,
153    MCRF,
154    MCRXRX,
155    XSNABSDP,
156    XSXEXPDP,
157    XSABSDP,
158    XSNEGDP,
159    XSCPSGNDP,
160    MFVSRWZ,
161    EXTSWSLI,
162    SRADI_32,
163    RLDIC,
164    RFEBB,
165    LA,
166    TBEGIN,
167    TRECHKPT,
168    NOP,
169    WAIT
170)>;
171
172// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
173// single slice. However, since it is Restricted, it requires all 3 dispatches
174// (DISP) for that superslice.
175def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
176      (instrs
177    (instregex "RLDC(L|R)$"),
178    (instregex "RLWIMI(8)?$"),
179    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
180    (instregex "M(F|T)OCRF(8)?$"),
181    (instregex "CR(6)?(UN)?SET$"),
182    (instregex "CR(N)?(OR|AND)(C)?$"),
183    (instregex "S(L|R)W(8)?$"),
184    (instregex "RLW(INM|NM)(8)?$"),
185    (instregex "F(N)?ABS(D|S)$"),
186    (instregex "FNEG(D|S)$"),
187    (instregex "FCPSGN(D|S)$"),
188    (instregex "SRAW(I)?$"),
189    (instregex "ISEL(8)?$"),
190    RLDIMI,
191    XSIEXPDP,
192    FMR,
193    CREQV,
194    CRXOR,
195    TRECLAIM,
196    TSR,
197    TABORT
198)>;
199
200// Three cycle ALU vector operation that uses an entire superslice.
201// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
202// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
203def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
204      (instrs
205    (instregex "M(T|F)VSCR$"),
206    (instregex "VCMPNEZ(B|H|W)$"),
207    (instregex "VCMPEQU(B|H|W|D)$"),
208    (instregex "VCMPNE(B|H|W)$"),
209    (instregex "VABSDU(B|H|W)$"),
210    (instregex "VADDU(B|H|W)S$"),
211    (instregex "VAVG(S|U)(B|H|W)$"),
212    (instregex "VCMP(EQ|GE|GT)FP(o)?$"),
213    (instregex "VCMPBFP(o)?$"),
214    (instregex "VC(L|T)Z(B|H|W|D)$"),
215    (instregex "VADDS(B|H|W)S$"),
216    (instregex "V(MIN|MAX)FP$"),
217    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
218    VBPERMD,
219    VADDCUW,
220    VPOPCNTW,
221    VPOPCNTD,
222    VPRTYBD,
223    VPRTYBW,
224    VSHASIGMAD,
225    VSHASIGMAW,
226    VSUBSBS,
227    VSUBSHS,
228    VSUBSWS,
229    VSUBUBS,
230    VSUBUHS,
231    VSUBUWS,
232    VSUBCUW,
233    VCMPGTSB,
234    VCMPGTSBo,
235    VCMPGTSD,
236    VCMPGTSDo,
237    VCMPGTSH,
238    VCMPGTSHo,
239    VCMPGTSW,
240    VCMPGTSWo,
241    VCMPGTUB,
242    VCMPGTUBo,
243    VCMPGTUD,
244    VCMPGTUDo,
245    VCMPGTUH,
246    VCMPGTUHo,
247    VCMPGTUW,
248    VCMPGTUWo,
249    VCMPNEBo,
250    VCMPNEHo,
251    VCMPNEWo,
252    VCMPNEZBo,
253    VCMPNEZHo,
254    VCMPNEZWo,
255    VCMPEQUBo,
256    VCMPEQUDo,
257    VCMPEQUHo,
258    VCMPEQUWo,
259    XVCMPEQDP,
260    XVCMPEQDPo,
261    XVCMPEQSP,
262    XVCMPEQSPo,
263    XVCMPGEDP,
264    XVCMPGEDPo,
265    XVCMPGESP,
266    XVCMPGESPo,
267    XVCMPGTDP,
268    XVCMPGTDPo,
269    XVCMPGTSP,
270    XVCMPGTSPo,
271    XVMAXDP,
272    XVMAXSP,
273    XVMINDP,
274    XVMINSP,
275    XVTDIVDP,
276    XVTDIVSP,
277    XVTSQRTDP,
278    XVTSQRTSP,
279    XVTSTDCDP,
280    XVTSTDCSP,
281    XVXSIGDP,
282    XVXSIGSP
283)>;
284
285// 7 cycle DP vector operation that uses an entire superslice.
286// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
287// EXECO) and all three dispatches (DISP) to the given superslice.
288def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
289      (instrs
290    VADDFP,
291    VCTSXS,
292    VCTSXS_0,
293    VCTUXS,
294    VCTUXS_0,
295    VEXPTEFP,
296    VLOGEFP,
297    VMADDFP,
298    VMHADDSHS,
299    VNMSUBFP,
300    VREFP,
301    VRFIM,
302    VRFIN,
303    VRFIP,
304    VRFIZ,
305    VRSQRTEFP,
306    VSUBFP,
307    XVADDDP,
308    XVADDSP,
309    XVCVDPSP,
310    XVCVDPSXDS,
311    XVCVDPSXWS,
312    XVCVDPUXDS,
313    XVCVDPUXWS,
314    XVCVHPSP,
315    XVCVSPDP,
316    XVCVSPHP,
317    XVCVSPSXDS,
318    XVCVSPSXWS,
319    XVCVSPUXDS,
320    XVCVSPUXWS,
321    XVCVSXDDP,
322    XVCVSXDSP,
323    XVCVSXWDP,
324    XVCVSXWSP,
325    XVCVUXDDP,
326    XVCVUXDSP,
327    XVCVUXWDP,
328    XVCVUXWSP,
329    XVMADDADP,
330    XVMADDASP,
331    XVMADDMDP,
332    XVMADDMSP,
333    XVMSUBADP,
334    XVMSUBASP,
335    XVMSUBMDP,
336    XVMSUBMSP,
337    XVMULDP,
338    XVMULSP,
339    XVNMADDADP,
340    XVNMADDASP,
341    XVNMADDMDP,
342    XVNMADDMSP,
343    XVNMSUBADP,
344    XVNMSUBASP,
345    XVNMSUBMDP,
346    XVNMSUBMSP,
347    XVRDPI,
348    XVRDPIC,
349    XVRDPIM,
350    XVRDPIP,
351    XVRDPIZ,
352    XVREDP,
353    XVRESP,
354    XVRSPI,
355    XVRSPIC,
356    XVRSPIM,
357    XVRSPIP,
358    XVRSPIZ,
359    XVRSQRTEDP,
360    XVRSQRTESP,
361    XVSUBDP,
362    XVSUBSP,
363    VCFSX,
364    VCFSX_0,
365    VCFUX,
366    VCFUX_0,
367    VMHRADDSHS,
368    VMLADDUHM,
369    VMSUMMBM,
370    VMSUMSHM,
371    VMSUMSHS,
372    VMSUMUBM,
373    VMSUMUHM,
374    VMSUMUHS,
375    VMULESB,
376    VMULESH,
377    VMULESW,
378    VMULEUB,
379    VMULEUH,
380    VMULEUW,
381    VMULOSB,
382    VMULOSH,
383    VMULOSW,
384    VMULOUB,
385    VMULOUH,
386    VMULOUW,
387    VMULUWM,
388    VSUM2SWS,
389    VSUM4SBS,
390    VSUM4SHS,
391    VSUM4UBS,
392    VSUMSWS
393)>;
394
395// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
396// dispatch units for the superslice.
397def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
398      (instrs
399    (instregex "MADD(HD|HDU|LD|LD8)$"),
400    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
401)>;
402
403// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
404// dispatch units for the superslice.
405def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
406      (instrs
407    FRSP,
408    (instregex "FRI(N|P|Z|M)(D|S)$"),
409    (instregex "FRE(S)?$"),
410    (instregex "FADD(S)?$"),
411    (instregex "FMSUB(S)?$"),
412    (instregex "FMADD(S)?$"),
413    (instregex "FSUB(S)?$"),
414    (instregex "FCFID(U)?(S)?$"),
415    (instregex "FCTID(U)?(Z)?$"),
416    (instregex "FCTIW(U)?(Z)?$"),
417    (instregex "FRSQRTE(S)?$"),
418    FNMADDS,
419    FNMADD,
420    FNMSUBS,
421    FNMSUB,
422    FSELD,
423    FSELS,
424    FMULS,
425    FMUL,
426    XSMADDADP,
427    XSMADDASP,
428    XSMADDMDP,
429    XSMADDMSP,
430    XSMSUBADP,
431    XSMSUBASP,
432    XSMSUBMDP,
433    XSMSUBMSP,
434    XSMULDP,
435    XSMULSP,
436    XSNMADDADP,
437    XSNMADDASP,
438    XSNMADDMDP,
439    XSNMADDMSP,
440    XSNMSUBADP,
441    XSNMSUBASP,
442    XSNMSUBMDP,
443    XSNMSUBMSP
444)>;
445
446// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
447// These operations can be done in parallel. The DP is restricted so we need a
448// full 4 dispatches.
449def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
450              DISP_3SLOTS_1C, DISP_1C],
451      (instrs
452    (instregex "FSEL(D|S)o$")
453)>;
454
455// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
456def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
457              DISP_3SLOTS_1C, DISP_1C],
458      (instrs
459    (instregex "MUL(H|L)(D|W)(U)?(O)?o$")
460)>;
461
462// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
463// These operations must be done sequentially.The DP is restricted so we need a
464// full 4 dispatches.
465def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
466              DISP_3SLOTS_1C, DISP_1C],
467      (instrs
468    (instregex "FRI(N|P|Z|M)(D|S)o$"),
469    (instregex "FRE(S)?o$"),
470    (instregex "FADD(S)?o$"),
471    (instregex "FSUB(S)?o$"),
472    (instregex "F(N)?MSUB(S)?o$"),
473    (instregex "F(N)?MADD(S)?o$"),
474    (instregex "FCFID(U)?(S)?o$"),
475    (instregex "FCTID(U)?(Z)?o$"),
476    (instregex "FCTIW(U)?(Z)?o$"),
477    (instregex "FMUL(S)?o$"),
478    (instregex "FRSQRTE(S)?o$"),
479    FRSPo
480)>;
481
482// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
483def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
484      (instrs
485    XSADDDP,
486    XSADDSP,
487    XSCVDPHP,
488    XSCVDPSP,
489    XSCVDPSXDS,
490    XSCVDPSXDSs,
491    XSCVDPSXWS,
492    XSCVDPUXDS,
493    XSCVDPUXDSs,
494    XSCVDPUXWS,
495    XSCVDPSXWSs,
496    XSCVDPUXWSs,
497    XSCVHPDP,
498    XSCVSPDP,
499    XSCVSXDDP,
500    XSCVSXDSP,
501    XSCVUXDDP,
502    XSCVUXDSP,
503    XSRDPI,
504    XSRDPIC,
505    XSRDPIM,
506    XSRDPIP,
507    XSRDPIZ,
508    XSREDP,
509    XSRESP,
510    XSRSQRTEDP,
511    XSRSQRTESP,
512    XSSUBDP,
513    XSSUBSP,
514    XSCVDPSPN,
515    XSRSP
516)>;
517
518// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
519// superslice. That includes both exec pipelines (EXECO, EXECE) and one
520// dispatch.
521def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
522      (instrs
523    (instregex "LVS(L|R)$"),
524    (instregex "VSPLTIS(W|H|B)$"),
525    (instregex "VSPLT(W|H|B)(s)?$"),
526    (instregex "V_SETALLONES(B|H)?$"),
527    (instregex "VEXTRACTU(B|H|W)$"),
528    (instregex "VINSERT(B|H|W|D)$"),
529    MFVSRLD,
530    MTVSRWS,
531    VBPERMQ,
532    VCLZLSBB,
533    VCTZLSBB,
534    VEXTRACTD,
535    VEXTUBLX,
536    VEXTUBRX,
537    VEXTUHLX,
538    VEXTUHRX,
539    VEXTUWLX,
540    VEXTUWRX,
541    VGBBD,
542    VMRGHB,
543    VMRGHH,
544    VMRGHW,
545    VMRGLB,
546    VMRGLH,
547    VMRGLW,
548    VPERM,
549    VPERMR,
550    VPERMXOR,
551    VPKPX,
552    VPKSDSS,
553    VPKSDUS,
554    VPKSHSS,
555    VPKSHUS,
556    VPKSWSS,
557    VPKSWUS,
558    VPKUDUM,
559    VPKUDUS,
560    VPKUHUM,
561    VPKUHUS,
562    VPKUWUM,
563    VPKUWUS,
564    VPRTYBQ,
565    VSL,
566    VSLDOI,
567    VSLO,
568    VSLV,
569    VSR,
570    VSRO,
571    VSRV,
572    VUPKHPX,
573    VUPKHSB,
574    VUPKHSH,
575    VUPKHSW,
576    VUPKLPX,
577    VUPKLSB,
578    VUPKLSH,
579    VUPKLSW,
580    XXBRD,
581    XXBRH,
582    XXBRQ,
583    XXBRW,
584    XXEXTRACTUW,
585    XXINSERTW,
586    XXMRGHW,
587    XXMRGLW,
588    XXPERM,
589    XXPERMR,
590    XXSLDWI,
591    XXSLDWIs,
592    XXSPLTIB,
593    XXSPLTW,
594    XXSPLTWs,
595    XXPERMDI,
596    XXPERMDIs,
597    VADDCUQ,
598    VADDECUQ,
599    VADDEUQM,
600    VADDUQM,
601    VMUL10CUQ,
602    VMUL10ECUQ,
603    VMUL10EUQ,
604    VMUL10UQ,
605    VSUBCUQ,
606    VSUBECUQ,
607    VSUBEUQM,
608    VSUBUQM,
609    XSCMPEXPQP,
610    XSCMPOQP,
611    XSCMPUQP,
612    XSTSTDCQP,
613    XSXSIGQP,
614    BCDCFNo,
615    BCDCFZo,
616    BCDCPSGNo,
617    BCDCTNo,
618    BCDCTZo,
619    BCDSETSGNo,
620    BCDSo,
621    BCDTRUNCo,
622    BCDUSo,
623    BCDUTRUNCo
624)>;
625
626// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
627// superslice. That includes both exec pipelines (EXECO, EXECE) and one
628// dispatch.
629def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
630      (instrs
631    BCDSRo,
632    XSADDQP,
633    XSADDQPO,
634    XSCVDPQP,
635    XSCVQPDP,
636    XSCVQPDPO,
637    XSCVQPSDZ,
638    XSCVQPSWZ,
639    XSCVQPUDZ,
640    XSCVQPUWZ,
641    XSCVSDQP,
642    XSCVUDQP,
643    XSRQPI,
644    XSRQPIX,
645    XSRQPXP,
646    XSSUBQP,
647    XSSUBQPO
648)>;
649
650// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
651// superslice. That includes both exec pipelines (EXECO, EXECE) and one
652// dispatch.
653def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
654      (instrs
655    BCDCTSQo
656)>;
657
658// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
659// superslice. That includes both exec pipelines (EXECO, EXECE) and one
660// dispatch.
661def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
662      (instrs
663    XSMADDQP,
664    XSMADDQPO,
665    XSMSUBQP,
666    XSMSUBQPO,
667    XSMULQP,
668    XSMULQPO,
669    XSNMADDQP,
670    XSNMADDQPO,
671    XSNMSUBQP,
672    XSNMSUBQPO
673)>;
674
675// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
676// superslice. That includes both exec pipelines (EXECO, EXECE) and one
677// dispatch.
678def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
679      (instrs
680    BCDCFSQo
681)>;
682
683// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
684// superslice. That includes both exec pipelines (EXECO, EXECE) and one
685// dispatch.
686def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
687      (instrs
688    XSDIVQP,
689    XSDIVQPO
690)>;
691
692// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
693// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
694// dispatches.
695def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
696      (instrs
697    XSSQRTQP,
698    XSSQRTQPO
699)>;
700
701// 6 Cycle Load uses a single slice.
702def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
703      (instrs
704    (instregex "LXVL(L)?")
705)>;
706
707// 5 Cycle Load uses a single slice.
708def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
709      (instrs
710    (instregex "LVE(B|H|W)X$"),
711    (instregex "LVX(L)?"),
712    (instregex "LXSI(B|H)ZX$"),
713    LXSDX,
714    LXVB16X,
715    LXVD2X,
716    LXVWSX,
717    LXSIWZX,
718    LXV,
719    LXVX,
720    LXSD,
721    DFLOADf64,
722    XFLOADf64,
723    LIWZX
724)>;
725
726// 4 Cycle Load uses a single slice.
727def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
728      (instrs
729    (instregex "DCB(F|T|ST)(EP)?$"),
730    (instregex "DCBZ(L)?(EP)?$"),
731    (instregex "DCBTST(EP)?$"),
732    (instregex "CP_COPY(8)?$"),
733    (instregex "CP_PASTE(8)?$"),
734    (instregex "ICBI(EP)?$"),
735    (instregex "ICBT(LS)?$"),
736    (instregex "LBARX(L)?$"),
737    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
738    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
739    (instregex "LH(A|B)RX(L)?(8)?$"),
740    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
741    (instregex "LWARX(L)?$"),
742    (instregex "LWBRX(8)?$"),
743    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
744    CP_ABORT,
745    DARN,
746    EnforceIEIO,
747    ISYNC,
748    MSGSYNC,
749    TLBSYNC,
750    SYNC,
751    LMW,
752    LSWI
753)>;
754
755// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
756// superslice.
757def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
758      (instrs
759    LFIWZX,
760    LFDX,
761    LFD
762)>;
763
764// Cracked Load Instructions.
765// Load instructions that can be done in parallel.
766def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
767              DISP_PAIR_1C],
768      (instrs
769    SLBIA,
770    SLBIE,
771    SLBMFEE,
772    SLBMFEV,
773    SLBMTE,
774    TLBIEL
775)>;
776
777// Cracked Load Instruction.
778// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
779// operations can be run in parallel.
780def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
781              DISP_PAIR_1C, DISP_PAIR_1C],
782      (instrs
783    (instregex "L(W|H)ZU(X)?(8)?$")
784)>;
785
786// Cracked TEND Instruction.
787// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
788// operations can be run in parallel.
789def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
790              DISP_1C, DISP_1C],
791      (instrs
792    TEND
793)>;
794
795
796// Cracked Store Instruction
797// Consecutive Store and ALU instructions. The store is restricted and requires
798// three dispatches.
799def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
800              DISP_3SLOTS_1C, DISP_1C],
801      (instrs
802    (instregex "ST(B|H|W|D)CX$")
803)>;
804
805// Cracked Load Instruction.
806// Two consecutive load operations for a total of 8 cycles.
807def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
808              DISP_1C, DISP_1C],
809      (instrs
810    LDMX
811)>;
812
813// Cracked Load instruction.
814// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
815// operations cannot be done at the same time and so their latencies are added.
816def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
817              DISP_1C, DISP_1C],
818      (instrs
819    (instregex "LHA(X)?(8)?$"),
820    (instregex "CP_PASTE(8)?o$"),
821    (instregex "LWA(X)?(_32)?$"),
822    TCHECK
823)>;
824
825// Cracked Restricted Load instruction.
826// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
827// operations cannot be done at the same time and so their latencies are added.
828// Full 6 dispatches are required as this is both cracked and restricted.
829def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
830              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
831      (instrs
832    LFIWAX
833)>;
834
835// Cracked Load instruction.
836// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
837// operations cannot be done at the same time and so their latencies are added.
838// Full 4 dispatches are required as this is a cracked instruction.
839def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
840      (instrs
841    LXSIWAX,
842    LIWAX
843)>;
844
845// Cracked Load instruction.
846// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
847// cycles. The Load and ALU operations cannot be done at the same time and so
848// their latencies are added.
849// Full 6 dispatches are required as this is a restricted instruction.
850def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
851              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
852      (instrs
853    LFSX,
854    LFS
855)>;
856
857// Cracked Load instruction.
858// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
859// operations cannot be done at the same time and so their latencies are added.
860// Full 4 dispatches are required as this is a cracked instruction.
861def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
862      (instrs
863    LXSSP,
864    LXSSPX,
865    XFLOADf32,
866    DFLOADf32
867)>;
868
869// Cracked 3-Way Load Instruction
870// Load with two ALU operations that depend on each other
871def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
872              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
873      (instrs
874    (instregex "LHAU(X)?(8)?$"),
875    LWAUX
876)>;
877
878// Cracked Load that requires the PM resource.
879// Since the Load and the PM cannot be done at the same time the latencies are
880// added. Requires 8 cycles. Since the PM requires the full superslice we need
881// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
882// requires the remaining 1 dispatch.
883def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
884              DISP_1C, DISP_1C],
885      (instrs
886    LXVH8X,
887    LXVDSX,
888    LXVW4X
889)>;
890
891// Single slice Restricted store operation. The restricted operation requires
892// all three dispatches for the superslice.
893def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
894      (instrs
895    (instregex "STF(S|D|IWX|SX|DX)$"),
896    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
897    (instregex "STW(8)?$"),
898    (instregex "(D|X)FSTORE(f32|f64)$"),
899    (instregex "ST(W|H|D)BRX$"),
900    (instregex "ST(B|H|D)(8)?$"),
901    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
902    STIWX,
903    SLBIEG,
904    STMW,
905    STSWI,
906    TLBIE
907)>;
908
909// Vector Store Instruction
910// Requires the whole superslice and therefore requires one dispatch
911// as well as both the Even and Odd exec pipelines.
912def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
913      (instrs
914    (instregex "STVE(B|H|W)X$"),
915    (instregex "STVX(L)?$"),
916    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
917)>;
918
919// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
920// superslice. That includes both exec pipelines (EXECO, EXECE) and two
921// dispatches.
922def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
923      (instrs
924    (instregex "MTCTR(8)?(loop)?$"),
925    (instregex "MTLR(8)?$")
926)>;
927
928// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
929// superslice. That includes both exec pipelines (EXECO, EXECE) and two
930// dispatches.
931def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
932      (instrs
933    (instregex "M(T|F)VRSAVE(v)?$"),
934    (instregex "M(T|F)PMR$"),
935    (instregex "M(T|F)TB(8)?$"),
936    (instregex "MF(SPR|CTR|LR)(8)?$"),
937    (instregex "M(T|F)MSR(D)?$"),
938    (instregex "MTSPR(8)?$")
939)>;
940
941// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
942// superslice. That includes both exec pipelines (EXECO, EXECE) and two
943// dispatches.
944def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
945      (instrs
946    DIVW,
947    DIVWO,
948    DIVWU,
949    DIVWUO,
950    MODSW
951)>;
952
953// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
954// superslice. That includes both exec pipelines (EXECO, EXECE) and two
955// dispatches.
956def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
957      (instrs
958    DIVWE,
959    DIVWEO,
960    DIVD,
961    DIVDO,
962    DIVWEU,
963    DIVWEUO,
964    DIVDU,
965    DIVDUO,
966    MODSD,
967    MODUD,
968    MODUW
969)>;
970
971// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
972// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
973// dispatches.
974def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
975      (instrs
976    DIVDE,
977    DIVDEO,
978    DIVDEU,
979    DIVDEUO
980)>;
981
982// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
983// and one full superslice for the DIV operation since there is only one DIV per
984// superslice. Latency of DIV plus ALU is 26.
985def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
986              DISP_EVEN_1C, DISP_1C],
987      (instrs
988    (instregex "DIVW(U)?(O)?o$")
989)>;
990
991// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
992// and one full superslice for the DIV operation since there is only one DIV per
993// superslice. Latency of DIV plus ALU is 26.
994def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
995              DISP_EVEN_1C, DISP_1C],
996      (instrs
997    DIVDo,
998    DIVDOo,
999    DIVDUo,
1000    DIVDUOo,
1001    DIVWEo,
1002    DIVWEOo,
1003    DIVWEUo,
1004    DIVWEUOo
1005)>;
1006
1007// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
1008// and one full superslice for the DIV operation since there is only one DIV per
1009// superslice. Latency of DIV plus ALU is 42.
1010def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1011              DISP_EVEN_1C, DISP_1C],
1012      (instrs
1013    DIVDEo,
1014    DIVDEOo,
1015    DIVDEUo,
1016    DIVDEUOo
1017)>;
1018
1019// CR access instructions in _BrMCR, IIC_BrMCRX.
1020
1021// Cracked, restricted, ALU operations.
1022// Here the two ALU ops can actually be done in parallel and therefore the
1023// latencies are not added together. Otherwise this is like having two
1024// instructions running together on two pipelines and 6 dispatches. ALU ops are
1025// 2 cycles each.
1026def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1027              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1028      (instrs
1029    MTCRF,
1030    MTCRF8
1031)>;
1032
1033// Cracked ALU operations.
1034// Here the two ALU ops can actually be done in parallel and therefore the
1035// latencies are not added together. Otherwise this is like having two
1036// instructions running together on two pipelines and 2 dispatches. ALU ops are
1037// 2 cycles each.
1038def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1039              DISP_1C, DISP_1C],
1040      (instrs
1041    (instregex "ADDC(8)?(O)?o$"),
1042    (instregex "SUBFC(8)?(O)?o$")
1043)>;
1044
1045// Cracked ALU operations.
1046// Two ALU ops can be done in parallel.
1047// One is three cycle ALU the ohter is a two cycle ALU.
1048// One of the ALU ops is restricted the other is not so we have a total of
1049// 5 dispatches.
1050def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1051              DISP_3SLOTS_1C, DISP_1C],
1052      (instrs
1053    (instregex "F(N)?ABS(D|S)o$"),
1054    (instregex "FCPSGN(D|S)o$"),
1055    (instregex "FNEG(D|S)o$"),
1056    FMRo
1057)>;
1058
1059// Cracked ALU operations.
1060// Here the two ALU ops can actually be done in parallel and therefore the
1061// latencies are not added together. Otherwise this is like having two
1062// instructions running together on two pipelines and 2 dispatches.
1063// ALU ops are 3 cycles each.
1064def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1065              DISP_1C, DISP_1C],
1066      (instrs
1067    MCRFS
1068)>;
1069
1070// Cracked Restricted ALU operations.
1071// Here the two ALU ops can actually be done in parallel and therefore the
1072// latencies are not added together. Otherwise this is like having two
1073// instructions running together on two pipelines and 6 dispatches.
1074// ALU ops are 3 cycles each.
1075def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1076              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1077      (instrs
1078    (instregex "MTFSF(b|o)?$"),
1079    (instregex "MTFSFI(o)?$")
1080)>;
1081
1082// Cracked instruction made of two ALU ops.
1083// The two ops cannot be done in parallel.
1084// One of the ALU ops is restricted and takes 3 dispatches.
1085def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1086              DISP_3SLOTS_1C, DISP_1C],
1087      (instrs
1088    (instregex "RLD(I)?C(R|L)o$"),
1089    (instregex "RLW(IMI|INM|NM)(8)?o$"),
1090    (instregex "SLW(8)?o$"),
1091    (instregex "SRAW(I)?o$"),
1092    (instregex "SRW(8)?o$"),
1093    RLDICL_32o,
1094    RLDIMIo
1095)>;
1096
1097// Cracked instruction made of two ALU ops.
1098// The two ops cannot be done in parallel.
1099// Both of the ALU ops are restricted and take 3 dispatches.
1100def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1101              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1102      (instrs
1103    (instregex "MFFS(L|CE|o)?$")
1104)>;
1105
1106// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1107// total of 6 cycles. All of the ALU operations are also restricted so each
1108// takes 3 dispatches for a total of 9.
1109def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1110              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1111      (instrs
1112    (instregex "MFCR(8)?$")
1113)>;
1114
1115// Cracked instruction made of two ALU ops.
1116// The two ops cannot be done in parallel.
1117def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1118      (instrs
1119    (instregex "EXTSWSLI_32_64o$"),
1120    (instregex "SRAD(I)?o$"),
1121    EXTSWSLIo,
1122    SLDo,
1123    SRDo,
1124    RLDICo
1125)>;
1126
1127// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1128def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1129      (instrs
1130    FDIV
1131)>;
1132
1133// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1134def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1135              DISP_3SLOTS_1C, DISP_1C],
1136      (instrs
1137    FDIVo
1138)>;
1139
1140// 36 Cycle DP Instruction.
1141// Instruction can be done on a single slice.
1142def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1143      (instrs
1144    XSSQRTDP
1145)>;
1146
1147// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1148def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1149      (instrs
1150    FSQRT
1151)>;
1152
1153// 36 Cycle DP Vector Instruction.
1154def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1155              DISP_1C],
1156      (instrs
1157    XVSQRTDP
1158)>;
1159
1160// 27 Cycle DP Vector Instruction.
1161def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1162              DISP_1C],
1163      (instrs
1164    XVSQRTSP
1165)>;
1166
1167// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1168def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1169              DISP_3SLOTS_1C, DISP_1C],
1170      (instrs
1171    FSQRTo
1172)>;
1173
1174// 26 Cycle DP Instruction.
1175def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1176      (instrs
1177    XSSQRTSP
1178)>;
1179
1180// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1181def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1182      (instrs
1183    FSQRTS
1184)>;
1185
1186// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1187def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1188              DISP_3SLOTS_1C, DISP_1C],
1189      (instrs
1190    FSQRTSo
1191)>;
1192
1193// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1194def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1195      (instrs
1196    XSDIVDP
1197)>;
1198
1199// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1200def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1201      (instrs
1202    FDIVS
1203)>;
1204
1205// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1206def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1207              DISP_3SLOTS_1C, DISP_1C],
1208      (instrs
1209    FDIVSo
1210)>;
1211
1212// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1213def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1214      (instrs
1215    XSDIVSP
1216)>;
1217
1218// 24 Cycle DP Vector Instruction. Takes one full superslice.
1219// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1220// superslice.
1221def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1222              DISP_1C],
1223      (instrs
1224    XVDIVSP
1225)>;
1226
1227// 33 Cycle DP Vector Instruction. Takes one full superslice.
1228// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1229// superslice.
1230def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1231              DISP_1C],
1232      (instrs
1233    XVDIVDP
1234)>;
1235
1236// Instruction cracked into three pieces. One Load and two ALU operations.
1237// The Load and one of the ALU ops cannot be run at the same time and so the
1238// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1239// Both the load and the ALU that depends on it are restricted and so they take
1240// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1241// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1242def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1243              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1244              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1245      (instrs
1246    (instregex "LF(SU|SUX)$")
1247)>;
1248
1249// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1250// the store and so it can be run at the same time as the store. The store is
1251// also restricted.
1252def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1253              DISP_3SLOTS_1C, DISP_1C],
1254      (instrs
1255    (instregex "STF(S|D)U(X)?$"),
1256    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1257)>;
1258
1259// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1260// the load and so it can be run at the same time as the load.
1261def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1262              DISP_PAIR_1C, DISP_PAIR_1C],
1263      (instrs
1264    (instregex "LBZU(X)?(8)?$"),
1265    (instregex "LDU(X)?$")
1266)>;
1267
1268// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1269// the load and so it can be run at the same time as the load. The load is also
1270// restricted. 3 dispatches are from the restricted load while the other two
1271// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1272// is required for the ALU.
1273def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1274              DISP_3SLOTS_1C, DISP_1C],
1275      (instrs
1276    (instregex "LF(DU|DUX)$")
1277)>;
1278
1279// Crypto Instructions
1280
1281// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1282// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1283// dispatch.
1284def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1285      (instrs
1286    (instregex "VPMSUM(B|H|W|D)$"),
1287    (instregex "V(N)?CIPHER(LAST)?$"),
1288    VSBOX
1289)>;
1290
1291// Branch Instructions
1292
1293// Two Cycle Branch
1294def : InstRW<[P9_BR_2C, DISP_BR_1C],
1295      (instrs
1296  (instregex "BCCCTR(L)?(8)?$"),
1297  (instregex "BCCL(A|R|RL)?$"),
1298  (instregex "BCCTR(L)?(8)?(n)?$"),
1299  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1300  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1301  (instregex "BL(_TLS|_NOP)?$"),
1302  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
1303  (instregex "BLA(8|8_NOP)?$"),
1304  (instregex "BLR(8|L)?$"),
1305  (instregex "TAILB(A)?(8)?$"),
1306  (instregex "TAILBCTR(8)?$"),
1307  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1308  (instregex "BCLR(L)?(n)?$"),
1309  (instregex "BCTR(L)?(8)?$"),
1310  B,
1311  BA,
1312  BC,
1313  BCC,
1314  BCCA,
1315  BCL,
1316  BCLalways,
1317  BCLn,
1318  BCTRL8_LDinto_toc,
1319  BCn,
1320  CTRL_DEP
1321)>;
1322
1323// Five Cycle Branch with a 2 Cycle ALU Op
1324// Operations must be done consecutively and not in parallel.
1325def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1326      (instrs
1327    ADDPCIS
1328)>;
1329
1330// Special Extracted Instructions For Atomics
1331
1332// Atomic Load
1333def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1334              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1335              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 
1336              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1337      (instrs
1338    (instregex "L(D|W)AT$")
1339)>;
1340
1341// Atomic Store
1342def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1343              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1344      (instrs
1345    (instregex "ST(D|W)AT$")
1346)>;
1347
1348// Signal Processing Engine (SPE) Instructions
1349// These instructions are not supported on Power 9
1350def : InstRW<[],
1351    (instrs
1352  BRINC,
1353  EVABS,
1354  EVEQV,
1355  EVMRA,
1356  EVNAND,
1357  EVNEG,
1358  (instregex "EVADD(I)?W$"),
1359  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1360  (instregex "EVAND(C)?$"),
1361  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1362  (instregex "EVCNTL(S|Z)W$"),
1363  (instregex "EVDIVW(S|U)$"),
1364  (instregex "EVEXTS(B|H)$"),
1365  (instregex "EVLD(H|W|D)(X)?$"),
1366  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1367  (instregex "EVLWHE(X)?$"),
1368  (instregex "EVLWHO(S|U)(X)?$"),
1369  (instregex "EVLW(H|W)SPLAT(X)?$"),
1370  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1371  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1372  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1373  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1374  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1375  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1376  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1377  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1378  (instregex "EVMWHUMI(A)?$"),
1379  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1380  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1381  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1382  (instregex "EVMWSSF(A|AA|AN)?$"),
1383  (instregex "EVMWUMI(A|AA|AN)?$"),
1384  (instregex "EV(N|X)?OR(C)?$"),
1385  (instregex "EVR(LW|LWI|NDW)$"),
1386  (instregex "EVSLW(I)?$"),
1387  (instregex "EVSPLAT(F)?I$"),
1388  (instregex "EVSRW(I)?(S|U)$"),
1389  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1390  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1391  (instregex "EVSUB(I)?FW$")
1392)> { let Unsupported = 1; }
1393
1394// General Instructions without scheduling support.
1395def : InstRW<[],
1396    (instrs
1397  (instregex "(H)?RFI(D)?$"),
1398  (instregex "DSS(ALL)?$"),
1399  (instregex "DST(ST)?(T)?(64)?$"),
1400  (instregex "ICBL(C|Q)$"),
1401  (instregex "L(W|H|B)EPX$"),
1402  (instregex "ST(W|H|B)EPX$"),
1403  (instregex "(L|ST)FDEPX$"),
1404  (instregex "M(T|F)SR(IN)?$"),
1405  (instregex "M(T|F)DCR$"),
1406  (instregex "NOP_GT_PWR(6|7)$"),
1407  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1408  (instregex "WRTEE(I)?$"),
1409  ATTN,
1410  CLRBHRB,
1411  MFBHRBE,
1412  MBAR,
1413  MSYNC,
1414  SLBSYNC,
1415  SLBFEEo,
1416  NAP,
1417  STOP,
1418  TRAP,
1419  RFCI,
1420  RFDI,
1421  RFMCI,
1422  SC,
1423  DCBA,
1424  DCBI,
1425  DCCCI,
1426  ICCCI
1427)> { let Unsupported = 1; }
1428