1//===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands atomic pseudo instructions into
10// target instructions post RA. With such method, LL/SC loop is considered as
11// a whole blob and make spilling unlikely happens in the LL/SC loop.
12//
13//===----------------------------------------------------------------------===//
14
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCInstrInfo.h"
18#include "PPCTargetMachine.h"
19
20#include "llvm/CodeGen/LivePhysRegs.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23
24using namespace llvm;
25
26#define DEBUG_TYPE "ppc-atomic-expand"
27
28namespace {
29
30class PPCExpandAtomicPseudo : public MachineFunctionPass {
31public:
32  const PPCInstrInfo *TII;
33  const PPCRegisterInfo *TRI;
34  static char ID;
35
36  PPCExpandAtomicPseudo() : MachineFunctionPass(ID) {
37    initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
38  }
39
40  bool runOnMachineFunction(MachineFunction &MF) override;
41
42private:
43  bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
44                MachineBasicBlock::iterator &NMBBI);
45  bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI,
46                          MachineBasicBlock::iterator &NMBBI);
47  bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI,
48                              MachineBasicBlock::iterator &NMBBI);
49};
50
51static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB,
52                       MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
53                       Register Dest0, Register Dest1, Register Src0,
54                       Register Src1) {
55  const MCInstrDesc &OR = TII->get(PPC::OR8);
56  const MCInstrDesc &XOR = TII->get(PPC::XOR8);
57  if (Dest0 == Src1 && Dest1 == Src0) {
58    // The most tricky case, swapping values.
59    BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1);
60    BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1);
61    BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1);
62  } else if (Dest0 != Src0 || Dest1 != Src1) {
63    if (Dest0 == Src1 || Dest1 != Src0) {
64      BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1);
65      BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0);
66    } else {
67      BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0);
68      BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1);
69    }
70  }
71}
72
73bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
74  bool Changed = false;
75  TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
76  TRI = &TII->getRegisterInfo();
77  for (MachineBasicBlock &MBB : MF) {
78    for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
79         MBBI != MBBE;) {
80      MachineInstr &MI = *MBBI;
81      MachineBasicBlock::iterator NMBBI = std::next(MBBI);
82      Changed |= expandMI(MBB, MI, NMBBI);
83      MBBI = NMBBI;
84    }
85  }
86  if (Changed)
87    MF.RenumberBlocks();
88  return Changed;
89}
90
91bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
92                                     MachineBasicBlock::iterator &NMBBI) {
93  switch (MI.getOpcode()) {
94  case PPC::ATOMIC_SWAP_I128:
95  case PPC::ATOMIC_LOAD_ADD_I128:
96  case PPC::ATOMIC_LOAD_SUB_I128:
97  case PPC::ATOMIC_LOAD_XOR_I128:
98  case PPC::ATOMIC_LOAD_NAND_I128:
99  case PPC::ATOMIC_LOAD_AND_I128:
100  case PPC::ATOMIC_LOAD_OR_I128:
101    return expandAtomicRMW128(MBB, MI, NMBBI);
102  case PPC::ATOMIC_CMP_SWAP_I128:
103    return expandAtomicCmpSwap128(MBB, MI, NMBBI);
104  case PPC::BUILD_QUADWORD: {
105    Register Dst = MI.getOperand(0).getReg();
106    Register DstHi = TRI->getSubReg(Dst, PPC::sub_gp8_x0);
107    Register DstLo = TRI->getSubReg(Dst, PPC::sub_gp8_x1);
108    Register Lo = MI.getOperand(1).getReg();
109    Register Hi = MI.getOperand(2).getReg();
110    PairedCopy(TII, MBB, MI, MI.getDebugLoc(), DstHi, DstLo, Hi, Lo);
111    MI.eraseFromParent();
112    return true;
113  }
114  default:
115    return false;
116  }
117}
118
119bool PPCExpandAtomicPseudo::expandAtomicRMW128(
120    MachineBasicBlock &MBB, MachineInstr &MI,
121    MachineBasicBlock::iterator &NMBBI) {
122  const MCInstrDesc &LL = TII->get(PPC::LQARX);
123  const MCInstrDesc &SC = TII->get(PPC::STQCX);
124  DebugLoc DL = MI.getDebugLoc();
125  MachineFunction *MF = MBB.getParent();
126  const BasicBlock *BB = MBB.getBasicBlock();
127  // Create layout of control flow.
128  MachineFunction::iterator MFI = ++MBB.getIterator();
129  MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB);
130  MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
131  MF->insert(MFI, LoopMBB);
132  MF->insert(MFI, ExitMBB);
133  ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
134                  MBB.end());
135  ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
136  MBB.addSuccessor(LoopMBB);
137
138  // For non-min/max operations, control flow is kinda like:
139  // MBB:
140  //   ...
141  // LoopMBB:
142  //   lqarx in, ptr
143  //   addc out.sub_x1, in.sub_x1, op.sub_x1
144  //   adde out.sub_x0, in.sub_x0, op.sub_x0
145  //   stqcx out, ptr
146  //   bne- LoopMBB
147  // ExitMBB:
148  //   ...
149  Register Old = MI.getOperand(0).getReg();
150  Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0);
151  Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1);
152  Register Scratch = MI.getOperand(1).getReg();
153  Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0);
154  Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1);
155  Register RA = MI.getOperand(2).getReg();
156  Register RB = MI.getOperand(3).getReg();
157  Register IncrLo = MI.getOperand(4).getReg();
158  Register IncrHi = MI.getOperand(5).getReg();
159  unsigned RMWOpcode = MI.getOpcode();
160
161  MachineBasicBlock *CurrentMBB = LoopMBB;
162  BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB);
163
164  switch (RMWOpcode) {
165  case PPC::ATOMIC_SWAP_I128:
166    PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
167               IncrHi, IncrLo);
168    break;
169  case PPC::ATOMIC_LOAD_ADD_I128:
170    BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo)
171        .addReg(IncrLo)
172        .addReg(OldLo);
173    BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi)
174        .addReg(IncrHi)
175        .addReg(OldHi);
176    break;
177  case PPC::ATOMIC_LOAD_SUB_I128:
178    BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo)
179        .addReg(IncrLo)
180        .addReg(OldLo);
181    BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi)
182        .addReg(IncrHi)
183        .addReg(OldHi);
184    break;
185
186#define TRIVIAL_ATOMICRMW(Opcode, Instr)                                       \
187  case Opcode:                                                                 \
188    BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo)                      \
189        .addReg(IncrLo)                                                        \
190        .addReg(OldLo);                                                        \
191    BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi)                      \
192        .addReg(IncrHi)                                                        \
193        .addReg(OldHi);                                                        \
194    break
195
196    TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8);
197    TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8);
198    TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8);
199    TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8);
200#undef TRIVIAL_ATOMICRMW
201  default:
202    llvm_unreachable("Unhandled atomic RMW operation");
203  }
204  BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB);
205  BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
206      .addImm(PPC::PRED_NE)
207      .addReg(PPC::CR0)
208      .addMBB(LoopMBB);
209  CurrentMBB->addSuccessor(LoopMBB);
210  CurrentMBB->addSuccessor(ExitMBB);
211  bool anyChange = false;
212  do {
213    anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB);
214  } while (anyChange);
215  NMBBI = MBB.end();
216  MI.eraseFromParent();
217  return true;
218}
219
220bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
221    MachineBasicBlock &MBB, MachineInstr &MI,
222    MachineBasicBlock::iterator &NMBBI) {
223  const MCInstrDesc &LL = TII->get(PPC::LQARX);
224  const MCInstrDesc &SC = TII->get(PPC::STQCX);
225  DebugLoc DL = MI.getDebugLoc();
226  MachineFunction *MF = MBB.getParent();
227  const BasicBlock *BB = MBB.getBasicBlock();
228  Register Old = MI.getOperand(0).getReg();
229  Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0);
230  Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1);
231  Register Scratch = MI.getOperand(1).getReg();
232  Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0);
233  Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1);
234  Register RA = MI.getOperand(2).getReg();
235  Register RB = MI.getOperand(3).getReg();
236  Register CmpLo = MI.getOperand(4).getReg();
237  Register CmpHi = MI.getOperand(5).getReg();
238  Register NewLo = MI.getOperand(6).getReg();
239  Register NewHi = MI.getOperand(7).getReg();
240  // Create layout of control flow.
241  // loop:
242  //   old = lqarx ptr
243  //   <compare old, cmp>
244  //   bne 0, exit
245  // succ:
246  //   stqcx new ptr
247  //   bne 0, loop
248  // exit:
249  //   ....
250  MachineFunction::iterator MFI = ++MBB.getIterator();
251  MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB);
252  MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB);
253  MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
254  MF->insert(MFI, LoopCmpMBB);
255  MF->insert(MFI, CmpSuccMBB);
256  MF->insert(MFI, ExitMBB);
257  ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
258                  MBB.end());
259  ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
260  MBB.addSuccessor(LoopCmpMBB);
261  // Build loop.
262  MachineBasicBlock *CurrentMBB = LoopCmpMBB;
263  BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB);
264  BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchLo)
265      .addReg(OldLo)
266      .addReg(CmpLo);
267  BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchHi)
268      .addReg(OldHi)
269      .addReg(CmpHi);
270  BuildMI(CurrentMBB, DL, TII->get(PPC::OR8_rec), ScratchLo)
271      .addReg(ScratchLo)
272      .addReg(ScratchHi);
273  BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
274      .addImm(PPC::PRED_NE)
275      .addReg(PPC::CR0)
276      .addMBB(ExitMBB);
277  CurrentMBB->addSuccessor(CmpSuccMBB);
278  CurrentMBB->addSuccessor(ExitMBB);
279  // Build succ.
280  CurrentMBB = CmpSuccMBB;
281  PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
282             NewHi, NewLo);
283  BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB);
284  BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
285      .addImm(PPC::PRED_NE)
286      .addReg(PPC::CR0)
287      .addMBB(LoopCmpMBB);
288  CurrentMBB->addSuccessor(LoopCmpMBB);
289  CurrentMBB->addSuccessor(ExitMBB);
290
291  bool anyChange = false;
292  do {
293    anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*CmpSuccMBB) ||
294                recomputeLiveIns(*LoopCmpMBB);
295  } while (anyChange);
296  NMBBI = MBB.end();
297  MI.eraseFromParent();
298  return true;
299}
300
301} // namespace
302
303INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic",
304                false, false)
305
306char PPCExpandAtomicPseudo::ID = 0;
307FunctionPass *llvm::createPPCExpandAtomicPseudoPass() {
308  return new PPCExpandAtomicPseudo();
309}
310