1277323Sdim//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=// 2277323Sdim// 3277323Sdim// The LLVM Compiler Infrastructure 4277323Sdim// 5277323Sdim// This file is distributed under the University of Illinois Open Source 6277323Sdim// License. See LICENSE.TXT for details. 7277323Sdim// 8277323Sdim//===----------------------------------------------------------------------===// 9277323Sdim// 10277323Sdim// This pass tries to make consecutive compares of values use same operands to 11277323Sdim// allow CSE pass to remove duplicated instructions. For this it analyzes 12277323Sdim// branches and adjusts comparisons with immediate values by converting: 13277323Sdim// * GE -> GT 14277323Sdim// * GT -> GE 15277323Sdim// * LT -> LE 16277323Sdim// * LE -> LT 17277323Sdim// and adjusting immediate values appropriately. It basically corrects two 18277323Sdim// immediate values towards each other to make them equal. 19277323Sdim// 20277323Sdim// Consider the following example in C: 21277323Sdim// 22277323Sdim// if ((a < 5 && ...) || (a > 5 && ...)) { 23277323Sdim// ~~~~~ ~~~~~ 24277323Sdim// ^ ^ 25277323Sdim// x y 26277323Sdim// 27277323Sdim// Here both "x" and "y" expressions compare "a" with "5". When "x" evaluates 28277323Sdim// to "false", "y" can just check flags set by the first comparison. As a 29277323Sdim// result of the canonicalization employed by 30277323Sdim// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific 31277323Sdim// code, assembly ends up in the form that is not CSE friendly: 32277323Sdim// 33277323Sdim// ... 34277323Sdim// cmp w8, #4 35277323Sdim// b.gt .LBB0_3 36277323Sdim// ... 37277323Sdim// .LBB0_3: 38277323Sdim// cmp w8, #6 39277323Sdim// b.lt .LBB0_6 40277323Sdim// ... 41277323Sdim// 42277323Sdim// Same assembly after the pass: 43277323Sdim// 44277323Sdim// ... 45277323Sdim// cmp w8, #5 46277323Sdim// b.ge .LBB0_3 47277323Sdim// ... 48277323Sdim// .LBB0_3: 49277323Sdim// cmp w8, #5 // <-- CSE pass removes this instruction 50277323Sdim// b.le .LBB0_6 51277323Sdim// ... 52277323Sdim// 53277323Sdim// Currently only SUBS and ADDS followed by b.?? are supported. 54277323Sdim// 55277323Sdim// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0" 56277323Sdim// TODO: handle other conditional instructions (e.g. CSET) 57277323Sdim// TODO: allow second branching to be anything if it doesn't require adjusting 58277323Sdim// 59277323Sdim//===----------------------------------------------------------------------===// 60277323Sdim 61277323Sdim#include "AArch64.h" 62296417Sdim#include "MCTargetDesc/AArch64AddressingModes.h" 63277323Sdim#include "llvm/ADT/DepthFirstIterator.h" 64277323Sdim#include "llvm/ADT/SmallVector.h" 65277323Sdim#include "llvm/ADT/Statistic.h" 66277323Sdim#include "llvm/CodeGen/LiveIntervalAnalysis.h" 67277323Sdim#include "llvm/CodeGen/MachineDominators.h" 68277323Sdim#include "llvm/CodeGen/MachineFunction.h" 69277323Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 70277323Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 71288943Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 72277323Sdim#include "llvm/CodeGen/Passes.h" 73277323Sdim#include "llvm/Support/CommandLine.h" 74277323Sdim#include "llvm/Support/Debug.h" 75277323Sdim#include "llvm/Support/raw_ostream.h" 76277323Sdim#include "llvm/Target/TargetInstrInfo.h" 77277323Sdim#include "llvm/Target/TargetSubtargetInfo.h" 78277323Sdim#include <cstdlib> 79277323Sdim#include <tuple> 80277323Sdim 81277323Sdimusing namespace llvm; 82277323Sdim 83277323Sdim#define DEBUG_TYPE "aarch64-condopt" 84277323Sdim 85277323SdimSTATISTIC(NumConditionsAdjusted, "Number of conditions adjusted"); 86277323Sdim 87277323Sdimnamespace { 88277323Sdimclass AArch64ConditionOptimizer : public MachineFunctionPass { 89277323Sdim const TargetInstrInfo *TII; 90277323Sdim MachineDominatorTree *DomTree; 91288943Sdim const MachineRegisterInfo *MRI; 92277323Sdim 93277323Sdimpublic: 94277323Sdim // Stores immediate, compare instruction opcode and branch condition (in this 95277323Sdim // order) of adjusted comparison. 96288943Sdim typedef std::tuple<int, unsigned, AArch64CC::CondCode> CmpInfo; 97277323Sdim 98277323Sdim static char ID; 99277323Sdim AArch64ConditionOptimizer() : MachineFunctionPass(ID) {} 100277323Sdim void getAnalysisUsage(AnalysisUsage &AU) const override; 101277323Sdim MachineInstr *findSuitableCompare(MachineBasicBlock *MBB); 102277323Sdim CmpInfo adjustCmp(MachineInstr *CmpMI, AArch64CC::CondCode Cmp); 103277323Sdim void modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info); 104277323Sdim bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To, 105277323Sdim int ToImm); 106277323Sdim bool runOnMachineFunction(MachineFunction &MF) override; 107277323Sdim const char *getPassName() const override { 108277323Sdim return "AArch64 Condition Optimizer"; 109277323Sdim } 110277323Sdim}; 111277323Sdim} // end anonymous namespace 112277323Sdim 113277323Sdimchar AArch64ConditionOptimizer::ID = 0; 114277323Sdim 115277323Sdimnamespace llvm { 116277323Sdimvoid initializeAArch64ConditionOptimizerPass(PassRegistry &); 117277323Sdim} 118277323Sdim 119277323SdimINITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt", 120277323Sdim "AArch64 CondOpt Pass", false, false) 121277323SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 122277323SdimINITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt", 123277323Sdim "AArch64 CondOpt Pass", false, false) 124277323Sdim 125277323SdimFunctionPass *llvm::createAArch64ConditionOptimizerPass() { 126277323Sdim return new AArch64ConditionOptimizer(); 127277323Sdim} 128277323Sdim 129277323Sdimvoid AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const { 130277323Sdim AU.addRequired<MachineDominatorTree>(); 131277323Sdim AU.addPreserved<MachineDominatorTree>(); 132277323Sdim MachineFunctionPass::getAnalysisUsage(AU); 133277323Sdim} 134277323Sdim 135277323Sdim// Finds compare instruction that corresponds to supported types of branching. 136277323Sdim// Returns the instruction or nullptr on failures or detecting unsupported 137277323Sdim// instructions. 138277323SdimMachineInstr *AArch64ConditionOptimizer::findSuitableCompare( 139277323Sdim MachineBasicBlock *MBB) { 140277323Sdim MachineBasicBlock::iterator I = MBB->getFirstTerminator(); 141277323Sdim if (I == MBB->end()) 142277323Sdim return nullptr; 143277323Sdim 144277323Sdim if (I->getOpcode() != AArch64::Bcc) 145277323Sdim return nullptr; 146277323Sdim 147277323Sdim // Now find the instruction controlling the terminator. 148277323Sdim for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) { 149277323Sdim --I; 150277323Sdim assert(!I->isTerminator() && "Spurious terminator"); 151277323Sdim switch (I->getOpcode()) { 152277323Sdim // cmp is an alias for subs with a dead destination register. 153277323Sdim case AArch64::SUBSWri: 154277323Sdim case AArch64::SUBSXri: 155277323Sdim // cmn is an alias for adds with a dead destination register. 156277323Sdim case AArch64::ADDSWri: 157296417Sdim case AArch64::ADDSXri: { 158296417Sdim unsigned ShiftAmt = AArch64_AM::getShiftValue(I->getOperand(3).getImm()); 159296417Sdim if (!I->getOperand(2).isImm()) { 160296417Sdim DEBUG(dbgs() << "Immediate of cmp is symbolic, " << *I << '\n'); 161296417Sdim return nullptr; 162296417Sdim } else if (I->getOperand(2).getImm() << ShiftAmt >= 0xfff) { 163296417Sdim DEBUG(dbgs() << "Immediate of cmp may be out of range, " << *I << '\n'); 164296417Sdim return nullptr; 165296417Sdim } else if (!MRI->use_empty(I->getOperand(0).getReg())) { 166296417Sdim DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n'); 167296417Sdim return nullptr; 168296417Sdim } 169296417Sdim return I; 170296417Sdim } 171277323Sdim // Prevent false positive case like: 172277323Sdim // cmp w19, #0 173277323Sdim // cinc w0, w19, gt 174277323Sdim // ... 175277323Sdim // fcmp d8, #0.0 176277323Sdim // b.gt .LBB0_5 177277323Sdim case AArch64::FCMPDri: 178277323Sdim case AArch64::FCMPSri: 179277323Sdim case AArch64::FCMPESri: 180277323Sdim case AArch64::FCMPEDri: 181277323Sdim 182277323Sdim case AArch64::SUBSWrr: 183277323Sdim case AArch64::SUBSXrr: 184277323Sdim case AArch64::ADDSWrr: 185277323Sdim case AArch64::ADDSXrr: 186277323Sdim case AArch64::FCMPSrr: 187277323Sdim case AArch64::FCMPDrr: 188277323Sdim case AArch64::FCMPESrr: 189277323Sdim case AArch64::FCMPEDrr: 190277323Sdim // Skip comparison instructions without immediate operands. 191277323Sdim return nullptr; 192277323Sdim } 193277323Sdim } 194277323Sdim DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); 195277323Sdim return nullptr; 196277323Sdim} 197277323Sdim 198277323Sdim// Changes opcode adds <-> subs considering register operand width. 199277323Sdimstatic int getComplementOpc(int Opc) { 200277323Sdim switch (Opc) { 201277323Sdim case AArch64::ADDSWri: return AArch64::SUBSWri; 202277323Sdim case AArch64::ADDSXri: return AArch64::SUBSXri; 203277323Sdim case AArch64::SUBSWri: return AArch64::ADDSWri; 204277323Sdim case AArch64::SUBSXri: return AArch64::ADDSXri; 205277323Sdim default: 206277323Sdim llvm_unreachable("Unexpected opcode"); 207277323Sdim } 208277323Sdim} 209277323Sdim 210277323Sdim// Changes form of comparison inclusive <-> exclusive. 211277323Sdimstatic AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) { 212277323Sdim switch (Cmp) { 213277323Sdim case AArch64CC::GT: return AArch64CC::GE; 214277323Sdim case AArch64CC::GE: return AArch64CC::GT; 215277323Sdim case AArch64CC::LT: return AArch64CC::LE; 216277323Sdim case AArch64CC::LE: return AArch64CC::LT; 217277323Sdim default: 218277323Sdim llvm_unreachable("Unexpected condition code"); 219277323Sdim } 220277323Sdim} 221277323Sdim 222277323Sdim// Transforms GT -> GE, GE -> GT, LT -> LE, LE -> LT by updating comparison 223277323Sdim// operator and condition code. 224277323SdimAArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp( 225277323Sdim MachineInstr *CmpMI, AArch64CC::CondCode Cmp) { 226288943Sdim unsigned Opc = CmpMI->getOpcode(); 227277323Sdim 228277323Sdim // CMN (compare with negative immediate) is an alias to ADDS (as 229277323Sdim // "operand - negative" == "operand + positive") 230277323Sdim bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri); 231277323Sdim 232277323Sdim int Correction = (Cmp == AArch64CC::GT) ? 1 : -1; 233277323Sdim // Negate Correction value for comparison with negative immediate (CMN). 234277323Sdim if (Negative) { 235277323Sdim Correction = -Correction; 236277323Sdim } 237277323Sdim 238277323Sdim const int OldImm = (int)CmpMI->getOperand(2).getImm(); 239277323Sdim const int NewImm = std::abs(OldImm + Correction); 240277323Sdim 241277323Sdim // Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by 242277323Sdim // adjusting compare instruction opcode. 243277323Sdim if (OldImm == 0 && ((Negative && Correction == 1) || 244277323Sdim (!Negative && Correction == -1))) { 245277323Sdim Opc = getComplementOpc(Opc); 246277323Sdim } 247277323Sdim 248277323Sdim return CmpInfo(NewImm, Opc, getAdjustedCmp(Cmp)); 249277323Sdim} 250277323Sdim 251277323Sdim// Applies changes to comparison instruction suggested by adjustCmp(). 252277323Sdimvoid AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI, 253277323Sdim const CmpInfo &Info) { 254277323Sdim int Imm; 255288943Sdim unsigned Opc; 256277323Sdim AArch64CC::CondCode Cmp; 257277323Sdim std::tie(Imm, Opc, Cmp) = Info; 258277323Sdim 259277323Sdim MachineBasicBlock *const MBB = CmpMI->getParent(); 260277323Sdim 261277323Sdim // Change immediate in comparison instruction (ADDS or SUBS). 262277323Sdim BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc)) 263277323Sdim .addOperand(CmpMI->getOperand(0)) 264277323Sdim .addOperand(CmpMI->getOperand(1)) 265277323Sdim .addImm(Imm) 266277323Sdim .addOperand(CmpMI->getOperand(3)); 267277323Sdim CmpMI->eraseFromParent(); 268277323Sdim 269277323Sdim // The fact that this comparison was picked ensures that it's related to the 270277323Sdim // first terminator instruction. 271277323Sdim MachineInstr *BrMI = MBB->getFirstTerminator(); 272277323Sdim 273277323Sdim // Change condition in branch instruction. 274277323Sdim BuildMI(*MBB, BrMI, BrMI->getDebugLoc(), TII->get(AArch64::Bcc)) 275277323Sdim .addImm(Cmp) 276277323Sdim .addOperand(BrMI->getOperand(1)); 277277323Sdim BrMI->eraseFromParent(); 278277323Sdim 279277323Sdim MBB->updateTerminator(); 280277323Sdim 281277323Sdim ++NumConditionsAdjusted; 282277323Sdim} 283277323Sdim 284277323Sdim// Parse a condition code returned by AnalyzeBranch, and compute the CondCode 285277323Sdim// corresponding to TBB. 286277323Sdim// Returns true if parsing was successful, otherwise false is returned. 287277323Sdimstatic bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) { 288277323Sdim // A normal br.cond simply has the condition code. 289277323Sdim if (Cond[0].getImm() != -1) { 290277323Sdim assert(Cond.size() == 1 && "Unknown Cond array format"); 291277323Sdim CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 292277323Sdim return true; 293277323Sdim } 294277323Sdim return false; 295277323Sdim} 296277323Sdim 297277323Sdim// Adjusts one cmp instruction to another one if result of adjustment will allow 298277323Sdim// CSE. Returns true if compare instruction was changed, otherwise false is 299277323Sdim// returned. 300277323Sdimbool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI, 301277323Sdim AArch64CC::CondCode Cmp, MachineInstr *To, int ToImm) 302277323Sdim{ 303277323Sdim CmpInfo Info = adjustCmp(CmpMI, Cmp); 304277323Sdim if (std::get<0>(Info) == ToImm && std::get<1>(Info) == To->getOpcode()) { 305277323Sdim modifyCmp(CmpMI, Info); 306277323Sdim return true; 307277323Sdim } 308277323Sdim return false; 309277323Sdim} 310277323Sdim 311277323Sdimbool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) { 312277323Sdim DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" 313277323Sdim << "********** Function: " << MF.getName() << '\n'); 314288943Sdim TII = MF.getSubtarget().getInstrInfo(); 315277323Sdim DomTree = &getAnalysis<MachineDominatorTree>(); 316288943Sdim MRI = &MF.getRegInfo(); 317277323Sdim 318277323Sdim bool Changed = false; 319277323Sdim 320277323Sdim // Visit blocks in dominator tree pre-order. The pre-order enables multiple 321277323Sdim // cmp-conversions from the same head block. 322277323Sdim // Note that updateDomTree() modifies the children of the DomTree node 323277323Sdim // currently being visited. The df_iterator supports that; it doesn't look at 324277323Sdim // child_begin() / child_end() until after a node has been visited. 325277323Sdim for (MachineDomTreeNode *I : depth_first(DomTree)) { 326277323Sdim MachineBasicBlock *HBB = I->getBlock(); 327277323Sdim 328277323Sdim SmallVector<MachineOperand, 4> HeadCond; 329277323Sdim MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 330277323Sdim if (TII->AnalyzeBranch(*HBB, TBB, FBB, HeadCond)) { 331277323Sdim continue; 332277323Sdim } 333277323Sdim 334277323Sdim // Equivalence check is to skip loops. 335277323Sdim if (!TBB || TBB == HBB) { 336277323Sdim continue; 337277323Sdim } 338277323Sdim 339277323Sdim SmallVector<MachineOperand, 4> TrueCond; 340277323Sdim MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr; 341277323Sdim if (TII->AnalyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) { 342277323Sdim continue; 343277323Sdim } 344277323Sdim 345277323Sdim MachineInstr *HeadCmpMI = findSuitableCompare(HBB); 346277323Sdim if (!HeadCmpMI) { 347277323Sdim continue; 348277323Sdim } 349277323Sdim 350277323Sdim MachineInstr *TrueCmpMI = findSuitableCompare(TBB); 351277323Sdim if (!TrueCmpMI) { 352277323Sdim continue; 353277323Sdim } 354277323Sdim 355277323Sdim AArch64CC::CondCode HeadCmp; 356277323Sdim if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) { 357277323Sdim continue; 358277323Sdim } 359277323Sdim 360277323Sdim AArch64CC::CondCode TrueCmp; 361277323Sdim if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) { 362277323Sdim continue; 363277323Sdim } 364277323Sdim 365277323Sdim const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm(); 366277323Sdim const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm(); 367277323Sdim 368277323Sdim DEBUG(dbgs() << "Head branch:\n"); 369277323Sdim DEBUG(dbgs() << "\tcondition: " 370277323Sdim << AArch64CC::getCondCodeName(HeadCmp) << '\n'); 371277323Sdim DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n'); 372277323Sdim 373277323Sdim DEBUG(dbgs() << "True branch:\n"); 374277323Sdim DEBUG(dbgs() << "\tcondition: " 375277323Sdim << AArch64CC::getCondCodeName(TrueCmp) << '\n'); 376277323Sdim DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n'); 377277323Sdim 378277323Sdim if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) || 379277323Sdim (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) && 380277323Sdim std::abs(TrueImm - HeadImm) == 2) { 381277323Sdim // This branch transforms machine instructions that correspond to 382277323Sdim // 383277323Sdim // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...) 384277323Sdim // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...) 385277323Sdim // 386277323Sdim // into 387277323Sdim // 388277323Sdim // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...) 389277323Sdim // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...) 390277323Sdim 391277323Sdim CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp); 392277323Sdim CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp); 393277323Sdim if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) && 394277323Sdim std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) { 395277323Sdim modifyCmp(HeadCmpMI, HeadCmpInfo); 396277323Sdim modifyCmp(TrueCmpMI, TrueCmpInfo); 397277323Sdim Changed = true; 398277323Sdim } 399277323Sdim } else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) || 400277323Sdim (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) && 401277323Sdim std::abs(TrueImm - HeadImm) == 1) { 402277323Sdim // This branch transforms machine instructions that correspond to 403277323Sdim // 404277323Sdim // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...) 405277323Sdim // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...) 406277323Sdim // 407277323Sdim // into 408277323Sdim // 409277323Sdim // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...) 410277323Sdim // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...) 411277323Sdim 412277323Sdim // GT -> GE transformation increases immediate value, so picking the 413277323Sdim // smaller one; LT -> LE decreases immediate value so invert the choice. 414277323Sdim bool adjustHeadCond = (HeadImm < TrueImm); 415277323Sdim if (HeadCmp == AArch64CC::LT) { 416277323Sdim adjustHeadCond = !adjustHeadCond; 417277323Sdim } 418277323Sdim 419277323Sdim if (adjustHeadCond) { 420277323Sdim Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm); 421277323Sdim } else { 422277323Sdim Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm); 423277323Sdim } 424277323Sdim } 425277323Sdim // Other transformation cases almost never occur due to generation of < or > 426277323Sdim // comparisons instead of <= and >=. 427277323Sdim } 428277323Sdim 429277323Sdim return Changed; 430277323Sdim} 431