//===-- MVEVPTOptimisationsPass.cpp ---------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file This pass does a few optimisations related to MVE VPT blocks before /// register allocation is performed. The goal is to maximize the sizes of the /// blocks that will be created by the MVE VPT Block Insertion pass (which runs /// after register allocation). The first optimisation done by this pass is the /// replacement of "opposite" VCMPs with VPNOTs, so the Block Insertion pass /// can delete them later to create larger VPT blocks. /// The second optimisation replaces re-uses of old VCCR values with VPNOTs when /// inside a block of predicated instructions. This is done to avoid /// spill/reloads of VPR in the middle of a block, which prevents the Block /// Insertion pass from creating large blocks. // //===----------------------------------------------------------------------===// #include "ARM.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/Debug.h" #include using namespace llvm; #define DEBUG_TYPE "arm-mve-vpt-opts" namespace { class MVEVPTOptimisations : public MachineFunctionPass { public: static char ID; const Thumb2InstrInfo *TII; MachineRegisterInfo *MRI; MVEVPTOptimisations() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &Fn) override; StringRef getPassName() const override { return "ARM MVE VPT Optimisation Pass"; } private: MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User, Register Target); bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB); bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB); }; char MVEVPTOptimisations::ID = 0; } // end anonymous namespace INITIALIZE_PASS(MVEVPTOptimisations, DEBUG_TYPE, "ARM MVE VPT Optimisations pass", false, false) // Returns true if Opcode is any VCMP Opcode. static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; } // Returns true if a VCMP with this Opcode can have its operands swapped. // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs, // and VCMPr instructions (since the r is always on the right). static bool CanHaveSwappedOperands(unsigned Opcode) { switch (Opcode) { default: return true; case ARM::MVE_VCMPf32: case ARM::MVE_VCMPf16: case ARM::MVE_VCMPf32r: case ARM::MVE_VCMPf16r: case ARM::MVE_VCMPi8r: case ARM::MVE_VCMPi16r: case ARM::MVE_VCMPi32r: case ARM::MVE_VCMPu8r: case ARM::MVE_VCMPu16r: case ARM::MVE_VCMPu32r: case ARM::MVE_VCMPs8r: case ARM::MVE_VCMPs16r: case ARM::MVE_VCMPs32r: return false; } } // Returns the CondCode of a VCMP Instruction. static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) { assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP"); return ARMCC::CondCodes(Instr.getOperand(3).getImm()); } // Returns true if Cond is equivalent to a VPNOT instruction on the result of // Prev. Cond and Prev must be VCMPs. static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) { assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode())); // Opcodes must match. if (Cond.getOpcode() != Prev.getOpcode()) return false; MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2); MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2); // If the VCMP has the opposite condition with the same operands, we can // replace it with a VPNOT ARMCC::CondCodes ExpectedCode = GetCondCode(Cond); ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode); if (ExpectedCode == GetCondCode(Prev)) if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2)) return true; // Check again with operands swapped if possible if (!CanHaveSwappedOperands(Cond.getOpcode())) return false; ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode); return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) && CondOP2.isIdenticalTo(PrevOP1); } // Returns true if Instr writes to VCCR. static bool IsWritingToVCCR(MachineInstr &Instr) { if (Instr.getNumOperands() == 0) return false; MachineOperand &Dst = Instr.getOperand(0); if (!Dst.isReg()) return false; Register DstReg = Dst.getReg(); if (!DstReg.isVirtual()) return false; MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo(); const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg); return RegClass && (RegClass->getID() == ARM::VCCRRegClassID); } // Transforms // // Into // %K = VPNOT %Target // // And returns the newly inserted VPNOT. // This optimization is done in the hopes of preventing spills/reloads of VPR by // reducing the number of VCCR values with overlapping lifetimes. MachineInstr &MVEVPTOptimisations::ReplaceRegisterUseWithVPNOT( MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User, Register Target) { Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target)); MachineInstrBuilder MIBuilder = BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) .addDef(NewResult) .addReg(Target); addUnpredicatedMveVpredNOp(MIBuilder); // Make the user use NewResult instead, and clear its kill flag. User.setReg(NewResult); User.setIsKill(false); LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): "; MIBuilder.getInstr()->dump()); return *MIBuilder.getInstr(); } // Moves a VPNOT before its first user if an instruction that uses Reg is found // in-between the VPNOT and its user. // Returns true if there is at least one user of the VPNOT in the block. static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg) { assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!"); assert(getVPTInstrPredicate(*Iter) == ARMVCC::None && "The VPNOT cannot be predicated"); MachineInstr &VPNOT = *Iter; Register VPNOTResult = VPNOT.getOperand(0).getReg(); Register VPNOTOperand = VPNOT.getOperand(1).getReg(); // Whether the VPNOT will need to be moved, and whether we found a user of the // VPNOT. bool MustMove = false, HasUser = false; MachineOperand *VPNOTOperandKiller = nullptr; for (; Iter != MBB.end(); ++Iter) { if (MachineOperand *MO = Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) { // If we find the operand that kills the VPNOTOperand's result, save it. VPNOTOperandKiller = MO; } if (Iter->findRegisterUseOperandIdx(Reg) != -1) { MustMove = true; continue; } if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1) continue; HasUser = true; if (!MustMove) break; // Move the VPNOT right before Iter LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: "; Iter->dump()); MBB.splice(Iter, &MBB, VPNOT.getIterator()); // If we move the instr, and its operand was killed earlier, remove the kill // flag. if (VPNOTOperandKiller) VPNOTOperandKiller->setIsKill(false); break; } return HasUser; } // This optimisation attempts to reduce the number of overlapping lifetimes of // VCCR values by replacing uses of old VCCR values with VPNOTs. For example, // this replaces // %A:vccr = (something) // %B:vccr = VPNOT %A // %Foo = (some op that uses %B) // %Bar = (some op that uses %A) // With // %A:vccr = (something) // %B:vccr = VPNOT %A // %Foo = (some op that uses %B) // %TMP2:vccr = VPNOT %B // %Bar = (some op that uses %A) bool MVEVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) { MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end(); SmallVector DeadInstructions; bool Modified = false; while (Iter != End) { Register VCCRValue, OppositeVCCRValue; // The first loop looks for 2 unpredicated instructions: // %A:vccr = (instr) ; A is stored in VCCRValue // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue for (; Iter != End; ++Iter) { // We're only interested in unpredicated instructions that write to VCCR. if (!IsWritingToVCCR(*Iter) || getVPTInstrPredicate(*Iter) != ARMVCC::None) continue; Register Dst = Iter->getOperand(0).getReg(); // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've // found what we were looking for. if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT && Iter->findRegisterUseOperandIdx(VCCRValue) != -1) { // Move the VPNOT closer to its first user if needed, and ignore if it // has no users. if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue)) continue; OppositeVCCRValue = Dst; ++Iter; break; } // Else, just set VCCRValue. VCCRValue = Dst; } // If the first inner loop didn't find anything, stop here. if (Iter == End) break; assert(VCCRValue && OppositeVCCRValue && "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop " "stopped before the end of the block!"); assert(VCCRValue != OppositeVCCRValue && "VCCRValue should not be equal to OppositeVCCRValue!"); // LastVPNOTResult always contains the same value as OppositeVCCRValue. Register LastVPNOTResult = OppositeVCCRValue; // This second loop tries to optimize the remaining instructions. for (; Iter != End; ++Iter) { bool IsInteresting = false; if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) { IsInteresting = true; // - If the instruction is a VPNOT, it can be removed, and we can just // replace its uses with LastVPNOTResult. // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue. if (Iter->getOpcode() == ARM::MVE_VPNOT) { Register Result = Iter->getOperand(0).getReg(); MRI->replaceRegWith(Result, LastVPNOTResult); DeadInstructions.push_back(&*Iter); Modified = true; LLVM_DEBUG(dbgs() << "Replacing all uses of '" << printReg(Result) << "' with '" << printReg(LastVPNOTResult) << "'\n"); } else { MachineInstr &VPNOT = ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult); Modified = true; LastVPNOTResult = VPNOT.getOperand(0).getReg(); std::swap(VCCRValue, OppositeVCCRValue); LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue) << "' with '" << printReg(LastVPNOTResult) << "' in instr: " << *Iter); } } else { // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult // instead as they contain the same value. if (MachineOperand *MO = Iter->findRegisterUseOperand(OppositeVCCRValue)) { IsInteresting = true; // This is pointless if LastVPNOTResult == OppositeVCCRValue. if (LastVPNOTResult != OppositeVCCRValue) { LLVM_DEBUG(dbgs() << "Replacing usage of '" << printReg(OppositeVCCRValue) << "' with '" << printReg(LastVPNOTResult) << " for instr: "; Iter->dump()); MO->setReg(LastVPNOTResult); Modified = true; } MO->setIsKill(false); } // If this is an unpredicated VPNOT on // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it. if (Iter->getOpcode() == ARM::MVE_VPNOT && getVPTInstrPredicate(*Iter) == ARMVCC::None) { Register VPNOTOperand = Iter->getOperand(1).getReg(); if (VPNOTOperand == LastVPNOTResult || VPNOTOperand == OppositeVCCRValue) { IsInteresting = true; std::swap(VCCRValue, OppositeVCCRValue); LastVPNOTResult = Iter->getOperand(0).getReg(); } } } // If this instruction was not interesting, and it writes to VCCR, stop. if (!IsInteresting && IsWritingToVCCR(*Iter)) break; } } for (MachineInstr *DeadInstruction : DeadInstructions) DeadInstruction->removeFromParent(); return Modified; } // This optimisation replaces VCMPs with VPNOTs when they are equivalent. bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) { SmallVector DeadInstructions; // The last VCMP that we have seen and that couldn't be replaced. // This is reset when an instruction that writes to VCCR/VPR is found, or when // a VCMP is replaced with a VPNOT. // We'll only replace VCMPs with VPNOTs when this is not null, and when the // current VCMP is the opposite of PrevVCMP. MachineInstr *PrevVCMP = nullptr; // If we find an instruction that kills the result of PrevVCMP, we save the // operand here to remove the kill flag in case we need to use PrevVCMP's // result. MachineOperand *PrevVCMPResultKiller = nullptr; for (MachineInstr &Instr : MBB.instrs()) { if (PrevVCMP) { if (MachineOperand *MO = Instr.findRegisterUseOperand( PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) { // If we come accross the instr that kills PrevVCMP's result, record it // so we can remove the kill flag later if we need to. PrevVCMPResultKiller = MO; } } // Ignore predicated instructions. if (getVPTInstrPredicate(Instr) != ARMVCC::None) continue; // Only look at VCMPs if (!IsVCMP(Instr.getOpcode())) { // If the instruction writes to VCCR, forget the previous VCMP. if (IsWritingToVCCR(Instr)) PrevVCMP = nullptr; continue; } if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) { PrevVCMP = &Instr; continue; } // The register containing the result of the VCMP that we're going to // replace. Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg(); // Build a VPNOT to replace the VCMP, reusing its operands. MachineInstrBuilder MIBuilder = BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) .add(Instr.getOperand(0)) .addReg(PrevVCMPResultReg); addUnpredicatedMveVpredNOp(MIBuilder); LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): "; MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: "; Instr.dump()); // If we found an instruction that uses, and kills PrevVCMP's result, // remove the kill flag. if (PrevVCMPResultKiller) PrevVCMPResultKiller->setIsKill(false); // Finally, mark the old VCMP for removal and reset // PrevVCMP/PrevVCMPResultKiller. DeadInstructions.push_back(&Instr); PrevVCMP = nullptr; PrevVCMPResultKiller = nullptr; } for (MachineInstr *DeadInstruction : DeadInstructions) DeadInstruction->removeFromParent(); return !DeadInstructions.empty(); } bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { const ARMSubtarget &STI = static_cast(Fn.getSubtarget()); if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) return false; TII = static_cast(STI.getInstrInfo()); MRI = &Fn.getRegInfo(); LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n" << "********** Function: " << Fn.getName() << '\n'); bool Modified = false; for (MachineBasicBlock &MBB : Fn) { Modified |= ReplaceVCMPsByVPNOTs(MBB); Modified |= ReduceOldVCCRValueUses(MBB); } LLVM_DEBUG(dbgs() << "**************************************\n"); return Modified; } /// createMVEVPTOptimisationsPass FunctionPass *llvm::createMVEVPTOptimisationsPass() { return new MVEVPTOptimisations(); }