1//===---- X86FixupSetCC.cpp - optimize usage of LEA instructions ----------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines a pass that fixes zero-extension of setcc patterns. 10// X86 setcc instructions are modeled to have no input arguments, and a single 11// GR8 output argument. This is consistent with other similar instructions 12// (e.g. movb), but means it is impossible to directly generate a setcc into 13// the lower GR8 of a specified GR32. 14// This means that ISel must select (zext (setcc)) into something like 15// seta %al; movzbl %al, %eax. 16// Unfortunately, this can cause a stall due to the partial register write 17// performed by the setcc. Instead, we can use: 18// xor %eax, %eax; seta %al 19// This both avoids the stall, and encodes shorter. 20//===----------------------------------------------------------------------===// 21 22#include "X86.h" 23#include "X86InstrInfo.h" 24#include "X86Subtarget.h" 25#include "llvm/ADT/Statistic.h" 26#include "llvm/CodeGen/MachineFunctionPass.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/MachineRegisterInfo.h" 29 30using namespace llvm; 31 32#define DEBUG_TYPE "x86-fixup-setcc" 33 34STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted"); 35 36namespace { 37class X86FixupSetCCPass : public MachineFunctionPass { 38public: 39 static char ID; 40 41 X86FixupSetCCPass() : MachineFunctionPass(ID) {} 42 43 StringRef getPassName() const override { return "X86 Fixup SetCC"; } 44 45 bool runOnMachineFunction(MachineFunction &MF) override; 46 47private: 48 MachineRegisterInfo *MRI = nullptr; 49 const X86InstrInfo *TII = nullptr; 50 51 enum { SearchBound = 16 }; 52}; 53} // end anonymous namespace 54 55char X86FixupSetCCPass::ID = 0; 56 57INITIALIZE_PASS(X86FixupSetCCPass, DEBUG_TYPE, DEBUG_TYPE, false, false) 58 59FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); } 60 61bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { 62 bool Changed = false; 63 MRI = &MF.getRegInfo(); 64 TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); 65 66 SmallVector<MachineInstr*, 4> ToErase; 67 68 for (auto &MBB : MF) { 69 MachineInstr *FlagsDefMI = nullptr; 70 for (auto &MI : MBB) { 71 // Remember the most recent preceding eflags defining instruction. 72 if (MI.definesRegister(X86::EFLAGS)) 73 FlagsDefMI = &MI; 74 75 // Find a setcc that is used by a zext. 76 // This doesn't have to be the only use, the transformation is safe 77 // regardless. 78 if (MI.getOpcode() != X86::SETCCr) 79 continue; 80 81 MachineInstr *ZExt = nullptr; 82 for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg())) 83 if (Use.getOpcode() == X86::MOVZX32rr8) 84 ZExt = &Use; 85 86 if (!ZExt) 87 continue; 88 89 if (!FlagsDefMI) 90 continue; 91 92 // We'd like to put something that clobbers eflags directly before 93 // FlagsDefMI. This can't hurt anything after FlagsDefMI, because 94 // it, itself, by definition, clobbers eflags. But it may happen that 95 // FlagsDefMI also *uses* eflags, in which case the transformation is 96 // invalid. 97 if (FlagsDefMI->readsRegister(X86::EFLAGS)) 98 continue; 99 100 // On 32-bit, we need to be careful to force an ABCD register. 101 const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit() 102 ? &X86::GR32RegClass 103 : &X86::GR32_ABCDRegClass; 104 if (!MRI->constrainRegClass(ZExt->getOperand(0).getReg(), RC)) { 105 // If we cannot constrain the register, we would need an additional copy 106 // and are better off keeping the MOVZX32rr8 we have now. 107 continue; 108 } 109 110 ++NumSubstZexts; 111 Changed = true; 112 113 // Initialize a register with 0. This must go before the eflags def 114 Register ZeroReg = MRI->createVirtualRegister(RC); 115 BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), 116 ZeroReg); 117 118 // X86 setcc only takes an output GR8, so fake a GR32 input by inserting 119 // the setcc result into the low byte of the zeroed register. 120 BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(), 121 TII->get(X86::INSERT_SUBREG), ZExt->getOperand(0).getReg()) 122 .addReg(ZeroReg) 123 .addReg(MI.getOperand(0).getReg()) 124 .addImm(X86::sub_8bit); 125 ToErase.push_back(ZExt); 126 } 127 } 128 129 for (auto &I : ToErase) 130 I->eraseFromParent(); 131 132 return Changed; 133} 134