1//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering  -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a DAG scheduling mutation to cluster shader
10///       exports.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUExportClustering.h"
15#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16#include "SIInstrInfo.h"
17#include "llvm/CodeGen/ScheduleDAGInstrs.h"
18
19using namespace llvm;
20
21namespace {
22
23class ExportClustering : public ScheduleDAGMutation {
24public:
25  ExportClustering() = default;
26  void apply(ScheduleDAGInstrs *DAG) override;
27};
28
29static bool isExport(const SUnit &SU) {
30  return SIInstrInfo::isEXP(*SU.getInstr());
31}
32
33static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
34  const MachineInstr *MI = SU->getInstr();
35  unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
36  return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
37}
38
39static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
40                      unsigned PosCount) {
41  if (!PosCount || PosCount == Chain.size())
42    return;
43
44  // Position exports should occur as soon as possible in the shader
45  // for optimal performance.  This moves position exports before
46  // other exports while preserving the order within different export
47  // types (pos or other).
48  SmallVector<SUnit *, 8> Copy(Chain);
49  unsigned PosIdx = 0;
50  unsigned OtherIdx = PosCount;
51  for (SUnit *SU : Copy) {
52    if (isPositionExport(TII, SU))
53      Chain[PosIdx++] = SU;
54    else
55      Chain[OtherIdx++] = SU;
56  }
57}
58
59static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
60  SUnit *ChainHead = Exports.front();
61
62  // Now construct cluster from chain by adding new edges.
63  for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
64    SUnit *SUa = Exports[Idx];
65    SUnit *SUb = Exports[Idx + 1];
66
67    // Copy all dependencies to the head of the chain to avoid any
68    // computation being inserted into the chain.
69    for (const SDep &Pred : SUb->Preds) {
70      SUnit *PredSU = Pred.getSUnit();
71      if (!isExport(*PredSU) && !Pred.isWeak())
72        DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
73    }
74
75    // New barrier edge ordering exports
76    DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
77    // Also add cluster edge
78    DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
79  }
80}
81
82static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
83  SmallVector<SDep, 2> ToAdd, ToRemove;
84
85  for (const SDep &Pred : SU.Preds) {
86    SUnit *PredSU = Pred.getSUnit();
87    if (Pred.isBarrier() && isExport(*PredSU)) {
88      ToRemove.push_back(Pred);
89      if (isExport(SU))
90        continue;
91
92      // If we remove a barrier we need to copy dependencies
93      // from the predecessor to maintain order.
94      for (const SDep &ExportPred : PredSU->Preds) {
95        SUnit *ExportPredSU = ExportPred.getSUnit();
96        if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
97          ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
98      }
99    }
100  }
101
102  for (SDep Pred : ToRemove)
103    SU.removePred(Pred);
104  for (SDep Pred : ToAdd)
105    DAG->addEdge(&SU, Pred);
106}
107
108void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
109  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
110
111  SmallVector<SUnit *, 8> Chain;
112
113  // Pass through DAG gathering a list of exports and removing barrier edges
114  // creating dependencies on exports. Freeing exports of successor edges
115  // allows more scheduling freedom, and nothing should be order dependent
116  // on exports.  Edges will be added later to order the exports.
117  unsigned PosCount = 0;
118  for (SUnit &SU : DAG->SUnits) {
119    if (!isExport(SU))
120      continue;
121
122    Chain.push_back(&SU);
123    if (isPositionExport(TII, &SU))
124      PosCount++;
125
126    removeExportDependencies(DAG, SU);
127
128    SmallVector<SDep, 4> Succs(SU.Succs);
129    for (SDep Succ : Succs)
130      removeExportDependencies(DAG, *Succ.getSUnit());
131  }
132
133  // Apply clustering if there are multiple exports
134  if (Chain.size() > 1) {
135    sortChain(TII, Chain, PosCount);
136    buildCluster(Chain, DAG);
137  }
138}
139
140} // end namespace
141
142namespace llvm {
143
144std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
145  return std::make_unique<ExportClustering>();
146}
147
148} // end namespace llvm
149