1//===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups  -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to cleanup ugly code sequences at
10// MachineInstruction layer.
11//
12// Currently, there are two optimizations implemented:
13//  - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
14//    zero extend 32-bit subregisters to 64-bit registers, if the compiler
15//    could prove the subregisters is defined by 32-bit operations in which
16//    case the upper half of the underlying 64-bit registers were zeroed
17//    implicitly.
18//
19//  - One post-RA PreEmit pass to do final cleanup on some redundant
20//    instructions generated due to bad RA on subregister.
21//===----------------------------------------------------------------------===//
22
23#include "BPF.h"
24#include "BPFInstrInfo.h"
25#include "BPFTargetMachine.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/Support/Debug.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "bpf-mi-zext-elim"
34
35STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
36
37namespace {
38
39struct BPFMIPeephole : public MachineFunctionPass {
40
41  static char ID;
42  const BPFInstrInfo *TII;
43  MachineFunction *MF;
44  MachineRegisterInfo *MRI;
45
46  BPFMIPeephole() : MachineFunctionPass(ID) {
47    initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
48  }
49
50private:
51  // Initialize class variables.
52  void initialize(MachineFunction &MFParm);
53
54  bool isCopyFrom32Def(MachineInstr *CopyMI);
55  bool isInsnFrom32Def(MachineInstr *DefInsn);
56  bool isPhiFrom32Def(MachineInstr *MovMI);
57  bool isMovFrom32Def(MachineInstr *MovMI);
58  bool eliminateZExtSeq(void);
59
60  std::set<MachineInstr *> PhiInsns;
61
62public:
63
64  // Main entry point for this pass.
65  bool runOnMachineFunction(MachineFunction &MF) override {
66    if (skipFunction(MF.getFunction()))
67      return false;
68
69    initialize(MF);
70
71    return eliminateZExtSeq();
72  }
73};
74
75// Initialize class variables.
76void BPFMIPeephole::initialize(MachineFunction &MFParm) {
77  MF = &MFParm;
78  MRI = &MF->getRegInfo();
79  TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
80  LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
81}
82
83bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
84{
85  MachineOperand &opnd = CopyMI->getOperand(1);
86
87  if (!opnd.isReg())
88    return false;
89
90  // Return false if getting value from a 32bit physical register.
91  // Most likely, this physical register is aliased to
92  // function call return value or current function parameters.
93  Register Reg = opnd.getReg();
94  if (!Register::isVirtualRegister(Reg))
95    return false;
96
97  if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
98    return false;
99
100  MachineInstr *DefInsn = MRI->getVRegDef(Reg);
101  if (!isInsnFrom32Def(DefInsn))
102    return false;
103
104  return true;
105}
106
107bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
108{
109  for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
110    MachineOperand &opnd = PhiMI->getOperand(i);
111
112    if (!opnd.isReg())
113      return false;
114
115    MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
116    if (!PhiDef)
117      return false;
118    if (PhiDef->isPHI()) {
119      if (PhiInsns.find(PhiDef) != PhiInsns.end())
120        return false;
121      PhiInsns.insert(PhiDef);
122      if (!isPhiFrom32Def(PhiDef))
123        return false;
124    }
125    if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
126      return false;
127  }
128
129  return true;
130}
131
132// The \p DefInsn instruction defines a virtual register.
133bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
134{
135  if (!DefInsn)
136    return false;
137
138  if (DefInsn->isPHI()) {
139    if (PhiInsns.find(DefInsn) != PhiInsns.end())
140      return false;
141    PhiInsns.insert(DefInsn);
142    if (!isPhiFrom32Def(DefInsn))
143      return false;
144  } else if (DefInsn->getOpcode() == BPF::COPY) {
145    if (!isCopyFrom32Def(DefInsn))
146      return false;
147  }
148
149  return true;
150}
151
152bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
153{
154  MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
155
156  LLVM_DEBUG(dbgs() << "  Def of Mov Src:");
157  LLVM_DEBUG(DefInsn->dump());
158
159  PhiInsns.clear();
160  if (!isInsnFrom32Def(DefInsn))
161    return false;
162
163  LLVM_DEBUG(dbgs() << "  One ZExt elim sequence identified.\n");
164
165  return true;
166}
167
168bool BPFMIPeephole::eliminateZExtSeq(void) {
169  MachineInstr* ToErase = nullptr;
170  bool Eliminated = false;
171
172  for (MachineBasicBlock &MBB : *MF) {
173    for (MachineInstr &MI : MBB) {
174      // If the previous instruction was marked for elimination, remove it now.
175      if (ToErase) {
176        ToErase->eraseFromParent();
177        ToErase = nullptr;
178      }
179
180      // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
181      //
182      //   MOV_32_64 rB, wA
183      //   SLL_ri    rB, rB, 32
184      //   SRL_ri    rB, rB, 32
185      if (MI.getOpcode() == BPF::SRL_ri &&
186          MI.getOperand(2).getImm() == 32) {
187        Register DstReg = MI.getOperand(0).getReg();
188        Register ShfReg = MI.getOperand(1).getReg();
189        MachineInstr *SllMI = MRI->getVRegDef(ShfReg);
190
191        LLVM_DEBUG(dbgs() << "Starting SRL found:");
192        LLVM_DEBUG(MI.dump());
193
194        if (!SllMI ||
195            SllMI->isPHI() ||
196            SllMI->getOpcode() != BPF::SLL_ri ||
197            SllMI->getOperand(2).getImm() != 32)
198          continue;
199
200        LLVM_DEBUG(dbgs() << "  SLL found:");
201        LLVM_DEBUG(SllMI->dump());
202
203        MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
204        if (!MovMI ||
205            MovMI->isPHI() ||
206            MovMI->getOpcode() != BPF::MOV_32_64)
207          continue;
208
209        LLVM_DEBUG(dbgs() << "  Type cast Mov found:");
210        LLVM_DEBUG(MovMI->dump());
211
212        Register SubReg = MovMI->getOperand(1).getReg();
213        if (!isMovFrom32Def(MovMI)) {
214          LLVM_DEBUG(dbgs()
215                     << "  One ZExt elim sequence failed qualifying elim.\n");
216          continue;
217        }
218
219        BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
220          .addImm(0).addReg(SubReg).addImm(BPF::sub_32);
221
222        SllMI->eraseFromParent();
223        MovMI->eraseFromParent();
224        // MI is the right shift, we can't erase it in it's own iteration.
225        // Mark it to ToErase, and erase in the next iteration.
226        ToErase = &MI;
227        ZExtElemNum++;
228        Eliminated = true;
229      }
230    }
231  }
232
233  return Eliminated;
234}
235
236} // end default namespace
237
238INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
239                "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
240                false, false)
241
242char BPFMIPeephole::ID = 0;
243FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
244
245STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
246
247namespace {
248
249struct BPFMIPreEmitPeephole : public MachineFunctionPass {
250
251  static char ID;
252  MachineFunction *MF;
253  const TargetRegisterInfo *TRI;
254
255  BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
256    initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
257  }
258
259private:
260  // Initialize class variables.
261  void initialize(MachineFunction &MFParm);
262
263  bool eliminateRedundantMov(void);
264
265public:
266
267  // Main entry point for this pass.
268  bool runOnMachineFunction(MachineFunction &MF) override {
269    if (skipFunction(MF.getFunction()))
270      return false;
271
272    initialize(MF);
273
274    return eliminateRedundantMov();
275  }
276};
277
278// Initialize class variables.
279void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
280  MF = &MFParm;
281  TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
282  LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
283}
284
285bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) {
286  MachineInstr* ToErase = nullptr;
287  bool Eliminated = false;
288
289  for (MachineBasicBlock &MBB : *MF) {
290    for (MachineInstr &MI : MBB) {
291      // If the previous instruction was marked for elimination, remove it now.
292      if (ToErase) {
293        LLVM_DEBUG(dbgs() << "  Redundant Mov Eliminated:");
294        LLVM_DEBUG(ToErase->dump());
295        ToErase->eraseFromParent();
296        ToErase = nullptr;
297      }
298
299      // Eliminate identical move:
300      //
301      //   MOV rA, rA
302      //
303      // This is particularly possible to happen when sub-register support
304      // enabled. The special type cast insn MOV_32_64 involves different
305      // register class on src (i32) and dst (i64), RA could generate useless
306      // instruction due to this.
307      unsigned Opcode = MI.getOpcode();
308      if (Opcode == BPF::MOV_32_64 ||
309          Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) {
310        Register dst = MI.getOperand(0).getReg();
311        Register src = MI.getOperand(1).getReg();
312
313        if (Opcode == BPF::MOV_32_64)
314          dst = TRI->getSubReg(dst, BPF::sub_32);
315
316        if (dst != src)
317          continue;
318
319        ToErase = &MI;
320        RedundantMovElemNum++;
321        Eliminated = true;
322      }
323    }
324  }
325
326  return Eliminated;
327}
328
329} // end default namespace
330
331INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
332                "BPF PreEmit Peephole Optimization", false, false)
333
334char BPFMIPreEmitPeephole::ID = 0;
335FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
336{
337  return new BPFMIPreEmitPeephole();
338}
339
340STATISTIC(TruncElemNum, "Number of truncation eliminated");
341
342namespace {
343
344struct BPFMIPeepholeTruncElim : public MachineFunctionPass {
345
346  static char ID;
347  const BPFInstrInfo *TII;
348  MachineFunction *MF;
349  MachineRegisterInfo *MRI;
350
351  BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) {
352    initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry());
353  }
354
355private:
356  // Initialize class variables.
357  void initialize(MachineFunction &MFParm);
358
359  bool eliminateTruncSeq(void);
360
361public:
362
363  // Main entry point for this pass.
364  bool runOnMachineFunction(MachineFunction &MF) override {
365    if (skipFunction(MF.getFunction()))
366      return false;
367
368    initialize(MF);
369
370    return eliminateTruncSeq();
371  }
372};
373
374static bool TruncSizeCompatible(int TruncSize, unsigned opcode)
375{
376  if (TruncSize == 1)
377    return opcode == BPF::LDB || opcode == BPF::LDB32;
378
379  if (TruncSize == 2)
380    return opcode == BPF::LDH || opcode == BPF::LDH32;
381
382  if (TruncSize == 4)
383    return opcode == BPF::LDW || opcode == BPF::LDW32;
384
385  return false;
386}
387
388// Initialize class variables.
389void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) {
390  MF = &MFParm;
391  MRI = &MF->getRegInfo();
392  TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
393  LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n");
394}
395
396// Reg truncating is often the result of 8/16/32bit->64bit or
397// 8/16bit->32bit conversion. If the reg value is loaded with
398// masked byte width, the AND operation can be removed since
399// BPF LOAD already has zero extension.
400//
401// This also solved a correctness issue.
402// In BPF socket-related program, e.g., __sk_buff->{data, data_end}
403// are 32-bit registers, but later on, kernel verifier will rewrite
404// it with 64-bit value. Therefore, truncating the value after the
405// load will result in incorrect code.
406bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
407  MachineInstr* ToErase = nullptr;
408  bool Eliminated = false;
409
410  for (MachineBasicBlock &MBB : *MF) {
411    for (MachineInstr &MI : MBB) {
412      // The second insn to remove if the eliminate candidate is a pair.
413      MachineInstr *MI2 = nullptr;
414      Register DstReg, SrcReg;
415      MachineInstr *DefMI;
416      int TruncSize = -1;
417
418      // If the previous instruction was marked for elimination, remove it now.
419      if (ToErase) {
420        ToErase->eraseFromParent();
421        ToErase = nullptr;
422      }
423
424      // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate
425      // for BPF ANDI is i32, and this case only happens on ALU64.
426      if (MI.getOpcode() == BPF::SRL_ri &&
427          MI.getOperand(2).getImm() == 32) {
428        SrcReg = MI.getOperand(1).getReg();
429        MI2 = MRI->getVRegDef(SrcReg);
430        DstReg = MI.getOperand(0).getReg();
431
432        if (!MI2 ||
433            MI2->getOpcode() != BPF::SLL_ri ||
434            MI2->getOperand(2).getImm() != 32)
435          continue;
436
437        // Update SrcReg.
438        SrcReg = MI2->getOperand(1).getReg();
439        DefMI = MRI->getVRegDef(SrcReg);
440        if (DefMI)
441          TruncSize = 4;
442      } else if (MI.getOpcode() == BPF::AND_ri ||
443                 MI.getOpcode() == BPF::AND_ri_32) {
444        SrcReg = MI.getOperand(1).getReg();
445        DstReg = MI.getOperand(0).getReg();
446        DefMI = MRI->getVRegDef(SrcReg);
447
448        if (!DefMI)
449          continue;
450
451        int64_t imm = MI.getOperand(2).getImm();
452        if (imm == 0xff)
453          TruncSize = 1;
454        else if (imm == 0xffff)
455          TruncSize = 2;
456      }
457
458      if (TruncSize == -1)
459        continue;
460
461      // The definition is PHI node, check all inputs.
462      if (DefMI->isPHI()) {
463        bool CheckFail = false;
464
465        for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
466          MachineOperand &opnd = DefMI->getOperand(i);
467          if (!opnd.isReg()) {
468            CheckFail = true;
469            break;
470          }
471
472          MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
473          if (!PhiDef || PhiDef->isPHI() ||
474              !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) {
475            CheckFail = true;
476            break;
477          }
478        }
479
480        if (CheckFail)
481          continue;
482      } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) {
483        continue;
484      }
485
486      BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg)
487              .addReg(SrcReg);
488
489      if (MI2)
490        MI2->eraseFromParent();
491
492      // Mark it to ToErase, and erase in the next iteration.
493      ToErase = &MI;
494      TruncElemNum++;
495      Eliminated = true;
496    }
497  }
498
499  return Eliminated;
500}
501
502} // end default namespace
503
504INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim",
505                "BPF MachineSSA Peephole Optimization For TRUNC Eliminate",
506                false, false)
507
508char BPFMIPeepholeTruncElim::ID = 0;
509FunctionPass* llvm::createBPFMIPeepholeTruncElimPass()
510{
511  return new BPFMIPeepholeTruncElim();
512}
513