1//===-- SIMachineScheduler.h - SI Scheduler Interface -*- C++ -*-------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Machine Scheduler interface
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17
18#include "SIInstrInfo.h"
19#include "llvm/CodeGen/MachineScheduler.h"
20#include "llvm/CodeGen/RegisterPressure.h"
21
22using namespace llvm;
23
24namespace llvm {
25
26enum SIScheduleCandReason {
27  NoCand,
28  RegUsage,
29  Latency,
30  Successor,
31  Depth,
32  NodeOrder
33};
34
35struct SISchedulerCandidate {
36  // The reason for this candidate.
37  SIScheduleCandReason Reason;
38
39  // Set of reasons that apply to multiple candidates.
40  uint32_t RepeatReasonSet;
41
42  SISchedulerCandidate()
43    :  Reason(NoCand), RepeatReasonSet(0) {}
44
45  bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
46  void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
47};
48
49class SIScheduleDAGMI;
50class SIScheduleBlockCreator;
51
52class SIScheduleBlock {
53  SIScheduleDAGMI *DAG;
54  SIScheduleBlockCreator *BC;
55
56  std::vector<SUnit*> SUnits;
57  std::map<unsigned, unsigned> NodeNum2Index;
58  std::vector<SUnit*> TopReadySUs;
59  std::vector<SUnit*> ScheduledSUnits;
60
61  /// The top of the unscheduled zone.
62  IntervalPressure TopPressure;
63  RegPressureTracker TopRPTracker;
64
65  // Pressure: number of said class of registers needed to
66  // store the live virtual and real registers.
67  // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
68  // Pressure of additional registers required inside the block.
69  std::vector<unsigned> InternalAdditionnalPressure;
70  // Pressure of input and output registers
71  std::vector<unsigned> LiveInPressure;
72  std::vector<unsigned> LiveOutPressure;
73  // Registers required by the block, and outputs.
74  // We do track only virtual registers.
75  // Note that some registers are not 32 bits,
76  // and thus the pressure is not equal
77  // to the number of live registers.
78  std::set<unsigned> LiveInRegs;
79  std::set<unsigned> LiveOutRegs;
80
81  bool Scheduled;
82  bool HighLatencyBlock;
83
84  std::vector<unsigned> HasLowLatencyNonWaitedParent;
85
86  // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
87  unsigned ID;
88
89  std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
90  std::vector<SIScheduleBlock*> Succs;  // All blocks successors.
91  unsigned NumHighLatencySuccessors;
92
93public:
94  SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
95                  unsigned ID):
96    DAG(DAG), BC(BC), SUnits(), TopReadySUs(), ScheduledSUnits(),
97    TopRPTracker(TopPressure), Scheduled(false),
98    HighLatencyBlock(false), ID(ID),
99    Preds(), Succs(), NumHighLatencySuccessors(0) {};
100
101  ~SIScheduleBlock() {};
102
103  unsigned getID() const { return ID; }
104
105  /// Functions for Block construction.
106  void addUnit(SUnit *SU);
107
108  // When all SUs have been added.
109  void finalizeUnits();
110
111  // Add block pred, which has instruction predecessor of SU.
112  void addPred(SIScheduleBlock *Pred);
113  void addSucc(SIScheduleBlock *Succ);
114
115  const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
116  const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
117
118  unsigned Height;  // Maximum topdown path length to block without outputs
119  unsigned Depth;   // Maximum bottomup path length to block without inputs
120
121  unsigned getNumHighLatencySuccessors() const {
122    return NumHighLatencySuccessors;
123  }
124
125  bool isHighLatencyBlock() { return HighLatencyBlock; }
126
127  // This is approximative.
128  // Ideally should take into accounts some instructions (rcp, etc)
129  // are 4 times slower.
130  int getCost() { return SUnits.size(); }
131
132  // The block Predecessors and Successors must be all registered
133  // before fastSchedule().
134  // Fast schedule with no particular requirement.
135  void fastSchedule();
136
137  std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
138
139  // Complete schedule that will try to minimize reg pressure and
140  // low latencies, and will fill liveins and liveouts.
141  // Needs all MIs to be grouped between BeginBlock and EndBlock.
142  // The MIs can be moved after the scheduling,
143  // it is just used to allow correct track of live registers.
144  void schedule(MachineBasicBlock::iterator BeginBlock,
145                MachineBasicBlock::iterator EndBlock);
146
147  bool isScheduled() { return Scheduled; }
148
149
150  // Needs the block to be scheduled inside
151  // TODO: find a way to compute it.
152  std::vector<unsigned> &getInternalAdditionnalRegUsage() {
153    return InternalAdditionnalPressure;
154  }
155
156  std::set<unsigned> &getInRegs() { return LiveInRegs; }
157  std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
158
159  void printDebug(bool Full);
160
161private:
162  struct SISchedCandidate : SISchedulerCandidate {
163    // The best SUnit candidate.
164    SUnit *SU;
165
166    unsigned SGPRUsage;
167    unsigned VGPRUsage;
168    bool IsLowLatency;
169    unsigned LowLatencyOffset;
170    bool HasLowLatencyNonWaitedParent;
171
172    SISchedCandidate()
173      : SU(nullptr) {}
174
175    bool isValid() const { return SU; }
176
177    // Copy the status of another candidate without changing policy.
178    void setBest(SISchedCandidate &Best) {
179      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
180      SU = Best.SU;
181      Reason = Best.Reason;
182      SGPRUsage = Best.SGPRUsage;
183      VGPRUsage = Best.VGPRUsage;
184      IsLowLatency = Best.IsLowLatency;
185      LowLatencyOffset = Best.LowLatencyOffset;
186      HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
187    }
188  };
189
190  void undoSchedule();
191
192  void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
193  void releaseSucc(SUnit *SU, SDep *SuccEdge);
194  // InOrOutBlock: restrict to links pointing inside the block (true),
195  // or restrict to links pointing outside the block (false).
196  void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
197
198  void nodeScheduled(SUnit *SU);
199  void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
200  void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
201  SUnit* pickNode();
202  void traceCandidate(const SISchedCandidate &Cand);
203  void initRegPressure(MachineBasicBlock::iterator BeginBlock,
204                       MachineBasicBlock::iterator EndBlock);
205};
206
207struct SIScheduleBlocks {
208  std::vector<SIScheduleBlock*> Blocks;
209  std::vector<int> TopDownIndex2Block;
210  std::vector<int> TopDownBlock2Index;
211};
212
213enum SISchedulerBlockCreatorVariant {
214    LatenciesAlone,
215    LatenciesGrouped,
216    LatenciesAlonePlusConsecutive
217};
218
219class SIScheduleBlockCreator {
220  SIScheduleDAGMI *DAG;
221  // unique_ptr handles freeing memory for us.
222  std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
223  std::map<SISchedulerBlockCreatorVariant,
224           SIScheduleBlocks> Blocks;
225  std::vector<SIScheduleBlock*> CurrentBlocks;
226  std::vector<int> Node2CurrentBlock;
227
228  // Topological sort
229  // Maps topological index to the node number.
230  std::vector<int> TopDownIndex2Block;
231  std::vector<int> TopDownBlock2Index;
232  std::vector<int> BottomUpIndex2Block;
233
234  // 0 -> Color not given.
235  // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
236  // Above -> Other groups.
237  int NextReservedID;
238  int NextNonReservedID;
239  std::vector<int> CurrentColoring;
240  std::vector<int> CurrentTopDownReservedDependencyColoring;
241  std::vector<int> CurrentBottomUpReservedDependencyColoring;
242
243public:
244  SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
245  ~SIScheduleBlockCreator();
246
247  SIScheduleBlocks
248  getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
249
250  bool isSUInBlock(SUnit *SU, unsigned ID);
251
252private:
253  // Give a Reserved color to every high latency.
254  void colorHighLatenciesAlone();
255
256  // Create groups of high latencies with a Reserved color.
257  void colorHighLatenciesGroups();
258
259  // Compute coloring for topdown and bottom traversals with
260  // different colors depending on dependencies on Reserved colors.
261  void colorComputeReservedDependencies();
262
263  // Give color to all non-colored SUs according to Reserved groups dependencies.
264  void colorAccordingToReservedDependencies();
265
266  // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
267  // The new colors are computed according to the dependencies on the other blocks
268  // formed with colorAccordingToReservedDependencies.
269  void colorEndsAccordingToDependencies();
270
271  // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
272  void colorForceConsecutiveOrderInGroup();
273
274  // Merge Constant loads that have all their users into another group to the group.
275  // (TODO: else if all their users depend on the same group, put them there)
276  void colorMergeConstantLoadsNextGroup();
277
278  // Merge SUs that have all their users into another group to the group
279  void colorMergeIfPossibleNextGroup();
280
281  // Merge SUs that have all their users into another group to the group,
282  // but only for Reserved groups.
283  void colorMergeIfPossibleNextGroupOnlyForReserved();
284
285  // Merge SUs that have all their users into another group to the group,
286  // but only if the group is no more than a few SUs.
287  void colorMergeIfPossibleSmallGroupsToNextGroup();
288
289  // Divides Blocks with important size.
290  // Idea of implementation: attribute new colors depending on topdown and
291  // bottom up links to other blocks.
292  void cutHugeBlocks();
293
294  // Put in one group all instructions with no users in this scheduling region
295  // (we'd want these groups be at the end).
296  void regroupNoUserInstructions();
297
298  void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
299
300  void topologicalSort();
301
302  void scheduleInsideBlocks();
303
304  void fillStats();
305};
306
307enum SISchedulerBlockSchedulerVariant {
308  BlockLatencyRegUsage,
309  BlockRegUsageLatency,
310  BlockRegUsage
311};
312
313class SIScheduleBlockScheduler {
314  SIScheduleDAGMI *DAG;
315  SISchedulerBlockSchedulerVariant Variant;
316  std::vector<SIScheduleBlock*> Blocks;
317
318  std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
319  std::set<unsigned> LiveRegs;
320  // Num of schedulable unscheduled blocks reading the register.
321  std::map<unsigned, unsigned> LiveRegsConsumers;
322
323  std::vector<unsigned> LastPosHighLatencyParentScheduled;
324  int LastPosWaitedHighLatency;
325
326  std::vector<SIScheduleBlock*> BlocksScheduled;
327  unsigned NumBlockScheduled;
328  std::vector<SIScheduleBlock*> ReadyBlocks;
329
330  unsigned VregCurrentUsage;
331  unsigned SregCurrentUsage;
332
333  // Currently is only approximation.
334  unsigned maxVregUsage;
335  unsigned maxSregUsage;
336
337  std::vector<unsigned> BlockNumPredsLeft;
338  std::vector<unsigned> BlockNumSuccsLeft;
339
340public:
341  SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
342                           SISchedulerBlockSchedulerVariant Variant,
343                           SIScheduleBlocks BlocksStruct);
344  ~SIScheduleBlockScheduler() {};
345
346  std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; };
347
348  unsigned getVGPRUsage() { return maxVregUsage; };
349  unsigned getSGPRUsage() { return maxSregUsage; };
350
351private:
352  struct SIBlockSchedCandidate : SISchedulerCandidate {
353    // The best Block candidate.
354    SIScheduleBlock *Block;
355
356    bool IsHighLatency;
357    int VGPRUsageDiff;
358    unsigned NumSuccessors;
359    unsigned NumHighLatencySuccessors;
360    unsigned LastPosHighLatParentScheduled;
361    unsigned Height;
362
363    SIBlockSchedCandidate()
364      : Block(nullptr) {}
365
366    bool isValid() const { return Block; }
367
368    // Copy the status of another candidate without changing policy.
369    void setBest(SIBlockSchedCandidate &Best) {
370      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
371      Block = Best.Block;
372      Reason = Best.Reason;
373      IsHighLatency = Best.IsHighLatency;
374      VGPRUsageDiff = Best.VGPRUsageDiff;
375      NumSuccessors = Best.NumSuccessors;
376      NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
377      LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
378      Height = Best.Height;
379    }
380  };
381
382  bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
383                           SIBlockSchedCandidate &TryCand);
384  bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
385                            SIBlockSchedCandidate &TryCand);
386  SIScheduleBlock *pickBlock();
387
388  void addLiveRegs(std::set<unsigned> &Regs);
389  void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
390  void releaseBlockSuccs(SIScheduleBlock *Parent);
391  void blockScheduled(SIScheduleBlock *Block);
392
393  // Check register pressure change
394  // by scheduling a block with these LiveIn and LiveOut.
395  std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
396                                       std::set<unsigned> &OutRegs);
397
398  void schedule();
399};
400
401struct SIScheduleBlockResult {
402  std::vector<unsigned> SUs;
403  unsigned MaxSGPRUsage;
404  unsigned MaxVGPRUsage;
405};
406
407class SIScheduler {
408  SIScheduleDAGMI *DAG;
409  SIScheduleBlockCreator BlockCreator;
410
411public:
412  SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {};
413
414  ~SIScheduler() {};
415
416  struct SIScheduleBlockResult
417  scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
418                  SISchedulerBlockSchedulerVariant ScheduleVariant);
419};
420
421class SIScheduleDAGMI : public ScheduleDAGMILive {
422  const SIInstrInfo *SITII;
423  const SIRegisterInfo *SITRI;
424
425  std::vector<SUnit> SUnitsLinksBackup;
426
427  // For moveLowLatencies. After all Scheduling variants are tested.
428  std::vector<unsigned> ScheduledSUnits;
429  std::vector<unsigned> ScheduledSUnitsInv;
430
431  unsigned VGPRSetID;
432  unsigned SGPRSetID;
433
434public:
435  SIScheduleDAGMI(MachineSchedContext *C);
436
437  ~SIScheduleDAGMI() override;
438
439  // Entry point for the schedule.
440  void schedule() override;
441
442  // To init Block's RPTracker.
443  void initRPTracker(RegPressureTracker &RPTracker) {
444    RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
445  }
446
447  MachineBasicBlock *getBB() { return BB; }
448  MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; };
449  MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; };
450  LiveIntervals *getLIS() { return LIS; }
451  MachineRegisterInfo *getMRI() { return &MRI; }
452  const TargetRegisterInfo *getTRI() { return TRI; }
453  SUnit& getEntrySU() { return EntrySU; };
454  SUnit& getExitSU() { return ExitSU; };
455
456  void restoreSULinksLeft();
457
458  template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
459                                                     _Iterator End,
460                                                     unsigned &VgprUsage,
461                                                     unsigned &SgprUsage);
462  std::set<unsigned> getInRegs() {
463    std::set<unsigned> InRegs (RPTracker.getPressure().LiveInRegs.begin(),
464                               RPTracker.getPressure().LiveInRegs.end());
465    return InRegs;
466  };
467
468  unsigned getVGPRSetID() const { return VGPRSetID; }
469  unsigned getSGPRSetID() const { return SGPRSetID; }
470
471private:
472  void topologicalSort();
473  // After scheduling is done, improve low latency placements.
474  void moveLowLatencies();
475
476public:
477  // Some stats for scheduling inside blocks.
478  std::vector<unsigned> IsLowLatencySU;
479  std::vector<unsigned> LowLatencyOffset;
480  std::vector<unsigned> IsHighLatencySU;
481  // Topological sort
482  // Maps topological index to the node number.
483  std::vector<int> TopDownIndex2SU;
484  std::vector<int> BottomUpIndex2SU;
485};
486
487} // namespace llvm
488
489#endif /* SIMACHINESCHEDULER_H_ */
490