Cross Reference: /freebsd-11.0-release/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Deleted Added

sdiff udiff text old ( 280031 ) new ( 283526 )

full compact

DAGCombiner.cpp (280031)	DAGCombiner.cpp (283526)
1//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11// both before and after the DAG is legalized. 12// 13// This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14// primarily intended to handle simplification opportunities that are implicit 15// in the LLVM IR and exposed by the various codegen lowering phases. 16// 17//===----------------------------------------------------------------------===// 18 19#include "llvm/CodeGen/SelectionDAG.h" 20#include "llvm/ADT/SetVector.h" 21#include "llvm/ADT/SmallBitVector.h" 22#include "llvm/ADT/SmallPtrSet.h" 23#include "llvm/ADT/Statistic.h" 24#include "llvm/Analysis/AliasAnalysis.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/IR/DataLayout.h" 28#include "llvm/IR/DerivedTypes.h" 29#include "llvm/IR/Function.h" 30#include "llvm/IR/LLVMContext.h" 31#include "llvm/Support/CommandLine.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Support/raw_ostream.h" 36#include "llvm/Target/TargetLowering.h" 37#include "llvm/Target/TargetOptions.h" 38#include "llvm/Target/TargetRegisterInfo.h" 39#include "llvm/Target/TargetSubtargetInfo.h" 40#include <algorithm> 41using namespace llvm; 42 43#define DEBUG_TYPE "dagcombine" 44 45STATISTIC(NodesCombined , "Number of dag nodes combined"); 46STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 47STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 48STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 49STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 50STATISTIC(SlicedLoads, "Number of load sliced"); 51 52namespace { 53 static cl::opt<bool> 54 CombinerAA("combiner-alias-analysis", cl::Hidden, 55 cl::desc("Enable DAG combiner alias-analysis heuristics")); 56 57 static cl::opt<bool> 58 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 59 cl::desc("Enable DAG combiner's use of IR alias analysis")); 60 61 static cl::opt<bool> 62 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 63 cl::desc("Enable DAG combiner's use of TBAA")); 64 65#ifndef NDEBUG 66 static cl::opt<std::string> 67 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 68 cl::desc("Only use DAG-combiner alias analysis in this" 69 " function")); 70#endif 71 72 /// Hidden option to stress test load slicing, i.e., when this option 73 /// is enabled, load slicing bypasses most of its profitability guards. 74 static cl::opt<bool> 75 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 76 cl::desc("Bypass the profitability model of load " 77 "slicing"), 78 cl::init(false)); 79 80 static cl::opt<bool> 81 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 82 cl::desc("DAG combiner may split indexing from loads")); 83 84//------------------------------ DAGCombiner ---------------------------------// 85 86 class DAGCombiner { 87 SelectionDAG &DAG; 88 const TargetLowering &TLI; 89 CombineLevel Level; 90 CodeGenOpt::Level OptLevel; 91 bool LegalOperations; 92 bool LegalTypes; 93 bool ForCodeSize; 94 95 /// \brief Worklist of all of the nodes that need to be simplified. 96 /// 97 /// This must behave as a stack -- new nodes to process are pushed onto the 98 /// back and when processing we pop off of the back. 99 /// 100 /// The worklist will not contain duplicates but may contain null entries 101 /// due to nodes being deleted from the underlying DAG. 102 SmallVector<SDNode , 64> Worklist; 103* 104 /// \brief Mapping from an SDNode to its position on the worklist. 105 /// 106 /// This is used to find and remove nodes from the worklist (by nulling 107 /// them) when they are deleted from the underlying DAG. It relies on 108 /// stable indices of nodes within the worklist. 109 DenseMap<SDNode , unsigned> WorklistMap; 110* 111 /// \brief Set of nodes which have been combined (at least once). 112 /// 113 /// This is used to allow us to reliably add any operands of a DAG node 114 /// which have not yet been combined to the worklist. 115 SmallPtrSet<SDNode , 64> CombinedNodes; 116* 117 // AA - Used for DAG load/store alias analysis. 118 AliasAnalysis &AA; 119 120 /// When an instruction is simplified, add all users of the instruction to 121 /// the work lists because they might get more simplified now. 122 void AddUsersToWorklist(SDNode N) { 123* for (SDNode Node : N->uses()) 124* AddToWorklist(Node); 125 } 126 127 /// Call the node-specific routine that folds each particular type of node. 128 SDValue visit(SDNode N); 129* 130 public: 131 /// Add to the worklist making sure its instance is at the back (next to be 132 /// processed.) 133 void AddToWorklist(SDNode N) { 134* // Skip handle nodes as they can't usefully be combined and confuse the 135 // zero-use deletion strategy. 136 if (N->getOpcode() == ISD::HANDLENODE) 137 return; 138 139 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 140 Worklist.push_back(N); 141 } 142 143 /// Remove all instances of N from the worklist. 144 void removeFromWorklist(SDNode N) { 145* CombinedNodes.erase(N); 146 147 auto It = WorklistMap.find(N); 148 if (It == WorklistMap.end()) 149 return; // Not in the worklist. 150 151 // Null out the entry rather than erasing it to avoid a linear operation. 152 Worklist[It->second] = nullptr; 153 WorklistMap.erase(It); 154 } 155 156 void deleteAndRecombine(SDNode N); 157* bool recursivelyDeleteUnusedNodes(SDNode N); 158* 159 SDValue CombineTo(SDNode N, const SDValue To, unsigned NumTo, 160 bool AddTo = true); 161 162 SDValue CombineTo(SDNode N, SDValue Res, bool AddTo = true) { 163* return CombineTo(N, &Res, 1, AddTo); 164 } 165 166 SDValue CombineTo(SDNode N, SDValue Res0, SDValue Res1, 167* bool AddTo = true) { 168 SDValue To[] = { Res0, Res1 }; 169 return CombineTo(N, To, 2, AddTo); 170 } 171 172 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 173 174 private: 175 176 /// Check the specified integer node value to see if it can be simplified or 177 /// if things it uses can be simplified by bit propagation. 178 /// If so, return true. 179 bool SimplifyDemandedBits(SDValue Op) { 180 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 181 APInt Demanded = APInt::getAllOnesValue(BitWidth); 182 return SimplifyDemandedBits(Op, Demanded); 183 } 184 185 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 186 187 bool CombineToPreIndexedLoadStore(SDNode N); 188* bool CombineToPostIndexedLoadStore(SDNode N); 189* SDValue SplitIndexingFromLoad(LoadSDNode LD); 190* bool SliceUpLoad(SDNode N); 191* 192 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 193 /// load. 194 /// 195 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 196 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 197 /// \param EltNo index of the vector element to load. 198 /// \param OriginalLoad load that EVE came from to be replaced. 199 /// \returns EVE on success SDValue() on failure. 200 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 201 SDNode EVE, EVT InVecVT, SDValue EltNo, LoadSDNode OriginalLoad); 202 void ReplaceLoadWithPromotedLoad(SDNode Load, SDNode ExtLoad); 203 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 204 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 205 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 206 SDValue PromoteIntBinOp(SDValue Op); 207 SDValue PromoteIntShiftOp(SDValue Op); 208 SDValue PromoteExtend(SDValue Op); 209 bool PromoteLoad(SDValue Op); 210 211 void ExtendSetCCUses(const SmallVectorImpl<SDNode > &SetCCs, 212* SDValue Trunc, SDValue ExtLoad, SDLoc DL, 213 ISD::NodeType ExtType); 214 215 /// Call the node-specific routine that knows how to fold each 216 /// particular type of node. If that doesn't do anything, try the 217 /// target-specific DAG combines. 218 SDValue combine(SDNode N); 219* 220 // Visitation implementation - Implement dag node combining for different 221 // node types. The semantics are as follows: 222 // Return Value: 223 // SDValue.getNode() == 0 - No change was made 224 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 225 // otherwise - N should be replaced by the returned Operand. 226 // 227 SDValue visitTokenFactor(SDNode N); 228* SDValue visitMERGE_VALUES(SDNode N); 229* SDValue visitADD(SDNode N); 230* SDValue visitSUB(SDNode N); 231* SDValue visitADDC(SDNode N); 232* SDValue visitSUBC(SDNode N); 233* SDValue visitADDE(SDNode N); 234* SDValue visitSUBE(SDNode N); 235* SDValue visitMUL(SDNode N); 236* SDValue visitSDIV(SDNode N); 237* SDValue visitUDIV(SDNode N); 238* SDValue visitSREM(SDNode N); 239* SDValue visitUREM(SDNode N); 240* SDValue visitMULHU(SDNode N); 241* SDValue visitMULHS(SDNode N); 242* SDValue visitSMUL_LOHI(SDNode N); 243* SDValue visitUMUL_LOHI(SDNode N); 244* SDValue visitSMULO(SDNode N); 245* SDValue visitUMULO(SDNode N); 246* SDValue visitSDIVREM(SDNode N); 247* SDValue visitUDIVREM(SDNode N); 248* SDValue visitAND(SDNode N); 249* SDValue visitOR(SDNode N); 250* SDValue visitXOR(SDNode N); 251* SDValue SimplifyVBinOp(SDNode N); 252* SDValue SimplifyVUnaryOp(SDNode N); 253* SDValue visitSHL(SDNode N); 254* SDValue visitSRA(SDNode N); 255* SDValue visitSRL(SDNode N); 256* SDValue visitRotate(SDNode N); 257* SDValue visitCTLZ(SDNode N); 258* SDValue visitCTLZ_ZERO_UNDEF(SDNode N); 259* SDValue visitCTTZ(SDNode N); 260* SDValue visitCTTZ_ZERO_UNDEF(SDNode N); 261* SDValue visitCTPOP(SDNode N); 262* SDValue visitSELECT(SDNode N); 263* SDValue visitVSELECT(SDNode N); 264* SDValue visitSELECT_CC(SDNode N); 265* SDValue visitSETCC(SDNode N); 266* SDValue visitSIGN_EXTEND(SDNode N); 267* SDValue visitZERO_EXTEND(SDNode N); 268* SDValue visitANY_EXTEND(SDNode N); 269* SDValue visitSIGN_EXTEND_INREG(SDNode N); 270* SDValue visitTRUNCATE(SDNode N); 271* SDValue visitBITCAST(SDNode N); 272* SDValue visitBUILD_PAIR(SDNode N); 273* SDValue visitFADD(SDNode N); 274* SDValue visitFSUB(SDNode N); 275* SDValue visitFMUL(SDNode N); 276* SDValue visitFMA(SDNode N); 277* SDValue visitFDIV(SDNode N); 278* SDValue visitFREM(SDNode N); 279* SDValue visitFSQRT(SDNode N); 280* SDValue visitFCOPYSIGN(SDNode N); 281* SDValue visitSINT_TO_FP(SDNode N); 282* SDValue visitUINT_TO_FP(SDNode N); 283* SDValue visitFP_TO_SINT(SDNode N); 284* SDValue visitFP_TO_UINT(SDNode N); 285* SDValue visitFP_ROUND(SDNode N); 286* SDValue visitFP_ROUND_INREG(SDNode N); 287* SDValue visitFP_EXTEND(SDNode N); 288* SDValue visitFNEG(SDNode N); 289* SDValue visitFABS(SDNode N); 290* SDValue visitFCEIL(SDNode N); 291* SDValue visitFTRUNC(SDNode N); 292* SDValue visitFFLOOR(SDNode N); 293* SDValue visitFMINNUM(SDNode N); 294* SDValue visitFMAXNUM(SDNode N); 295* SDValue visitBRCOND(SDNode N); 296* SDValue visitBR_CC(SDNode N); 297* SDValue visitLOAD(SDNode N); 298* SDValue visitSTORE(SDNode N); 299* SDValue visitINSERT_VECTOR_ELT(SDNode N); 300* SDValue visitEXTRACT_VECTOR_ELT(SDNode N); 301* SDValue visitBUILD_VECTOR(SDNode N); 302* SDValue visitCONCAT_VECTORS(SDNode N); 303* SDValue visitEXTRACT_SUBVECTOR(SDNode N); 304* SDValue visitVECTOR_SHUFFLE(SDNode N); 305* SDValue visitINSERT_SUBVECTOR(SDNode N); 306* SDValue visitMLOAD(SDNode N); 307* SDValue visitMSTORE(SDNode N); 308* 309 SDValue XformToShuffleWithZero(SDNode N); 310* SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); 311 312 SDValue visitShiftByConstant(SDNode N, ConstantSDNode Amt); 313 314 bool SimplifySelectOps(SDNode SELECT, SDValue LHS, SDValue RHS); 315* SDValue SimplifyBinOpWithSameOpcodeHands(SDNode N); 316* SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); 317 SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, 318 SDValue N3, ISD::CondCode CC, 319 bool NotExtCompare = false); 320 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 321 SDLoc DL, bool foldBooleans = true); 322 323 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 324 SDValue &CC) const; 325 bool isOneUseSetCC(SDValue N) const; 326 327 SDValue SimplifyNodeWithTwoResults(SDNode N, unsigned LoOp, 328* unsigned HiOp); 329 SDValue CombineConsecutiveLoads(SDNode N, EVT VT); 330* SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode , EVT); 331* SDValue BuildSDIV(SDNode N); 332* SDValue BuildSDIVPow2(SDNode N); 333* SDValue BuildUDIV(SDNode N); 334* SDValue BuildReciprocalEstimate(SDValue Op); 335 SDValue BuildRsqrtEstimate(SDValue Op); 336 SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); 337 SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); 338 SDValue MatchBSwapHWordLow(SDNode N, SDValue N0, SDValue N1, 339* bool DemandHighBits = true); 340 SDValue MatchBSwapHWord(SDNode N, SDValue N0, SDValue N1); 341* SDNode MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 342* SDValue InnerPos, SDValue InnerNeg, 343 unsigned PosOpcode, unsigned NegOpcode, 344 SDLoc DL); 345 SDNode MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); 346* SDValue ReduceLoadWidth(SDNode N); 347* SDValue ReduceLoadOpStoreWidth(SDNode N); 348* SDValue TransformFPLoadStorePair(SDNode N); 349* SDValue reduceBuildVecExtToExtBuildVec(SDNode N); 350* SDValue reduceBuildVecConvertToConvertBuildVec(SDNode N); 351* 352 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 353 354 /// Walk up chain skipping non-aliasing memory nodes, 355 /// looking for aliasing nodes and adding them to the Aliases vector. 356 void GatherAllAliases(SDNode N, SDValue OriginalChain, 357* SmallVectorImpl<SDValue> &Aliases); 358 359 /// Return true if there is any possibility that the two addresses overlap. 360 bool isAlias(LSBaseSDNode Op0, LSBaseSDNode Op1) const; 361 362 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 363 /// chain (aliasing node.) 364 SDValue FindBetterChain(SDNode N, SDValue Chain); 365* 366 /// Merge consecutive store operations into a wide store. 367 /// This optimization uses wide integers or vectors when possible. 368 /// \return True if some memory operations were changed. 369 bool MergeConsecutiveStores(StoreSDNode N); 370* 371 /// \brief Try to transform a truncation where C is a constant: 372 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 373 /// 374 /// \p N needs to be a truncation and its first operand an AND. Other 375 /// requirements are checked by the function (e.g. that trunc is 376 /// single-use) and if missed an empty SDValue is returned. 377 SDValue distributeTruncateThroughAnd(SDNode N); 378* 379 public: 380 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 381 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 382 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { 383 AttributeSet FnAttrs = 384 DAG.getMachineFunction().getFunction()->getAttributes(); 385 ForCodeSize = 386 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, 387 Attribute::OptimizeForSize) \|\| 388 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 389 } 390 391 /// Runs the dag combiner on all nodes in the work list 392 void Run(CombineLevel AtLevel); 393 394 SelectionDAG &getDAG() const { return DAG; } 395 396 /// Returns a type large enough to hold any valid shift amount - before type 397 /// legalization these can be huge. 398 EVT getShiftAmountTy(EVT LHSTy) { 399 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 400 if (LHSTy.isVector()) 401 return LHSTy; 402 return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) 403 : TLI.getPointerTy(); 404 } 405 406 /// This method returns true if we are running before type legalization or 407 /// if the specified VT is legal. 408 bool isTypeLegal(const EVT &VT) { 409 if (!LegalTypes) return true; 410 return TLI.isTypeLegal(VT); 411 } 412 413 /// Convenience wrapper around TargetLowering::getSetCCResultType 414 EVT getSetCCResultType(EVT VT) const { 415 return TLI.getSetCCResultType(DAG.getContext(), VT); 416* } 417 }; 418} 419 420 421namespace { 422/// This class is a DAGUpdateListener that removes any deleted 423/// nodes from the worklist. 424class WorklistRemover : public SelectionDAG::DAGUpdateListener { 425 DAGCombiner &DC; 426public: 427 explicit WorklistRemover(DAGCombiner &dc) 428 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 429 430 void NodeDeleted(SDNode N, SDNode E) override { 431 DC.removeFromWorklist(N); 432 } 433}; 434} 435 436//===----------------------------------------------------------------------===// 437// TargetLowering::DAGCombinerInfo implementation 438//===----------------------------------------------------------------------===// 439 440void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode N) { 441* ((DAGCombiner)DC)->AddToWorklist(N); 442} 443* 444void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode N) { 445* ((DAGCombiner)DC)->removeFromWorklist(N); 446} 447* 448SDValue TargetLowering::DAGCombinerInfo:: 449CombineTo(SDNode N, const std::vector<SDValue> &To, bool AddTo) { 450* return ((DAGCombiner)DC)->CombineTo(N, &To[0], To.size(), AddTo); 451} 452* 453SDValue TargetLowering::DAGCombinerInfo:: 454CombineTo(SDNode N, SDValue Res, bool AddTo) { 455* return ((DAGCombiner)DC)->CombineTo(N, Res, AddTo); 456} 457* 458 459SDValue TargetLowering::DAGCombinerInfo:: 460CombineTo(SDNode N, SDValue Res0, SDValue Res1, bool AddTo) { 461* return ((DAGCombiner)DC)->CombineTo(N, Res0, Res1, AddTo); 462} 463* 464void TargetLowering::DAGCombinerInfo:: 465CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 466 return ((DAGCombiner)DC)->CommitTargetLoweringOpt(TLO); 467} 468* 469//===----------------------------------------------------------------------===// 470// Helper Functions 471//===----------------------------------------------------------------------===// 472 473void DAGCombiner::deleteAndRecombine(SDNode N) { 474* removeFromWorklist(N); 475 476 // If the operands of this node are only used by the node, they will now be 477 // dead. Make sure to re-visit them and recursively delete dead nodes. 478 for (const SDValue &Op : N->ops()) 479 // For an operand generating multiple values, one of the values may 480 // become dead allowing further simplification (e.g. split index 481 // arithmetic from an indexed load). 482 if (Op->hasOneUse() \|\| Op->getNumValues() > 1) 483 AddToWorklist(Op.getNode()); 484 485 DAG.DeleteNode(N); 486} 487 488/// Return 1 if we can compute the negated form of the specified expression for 489/// the same cost as the expression itself, or 2 if we can compute the negated 490/// form more cheaply than the expression itself. 491static char isNegatibleForFree(SDValue Op, bool LegalOperations, 492 const TargetLowering &TLI, 493 const TargetOptions Options, 494* unsigned Depth = 0) { 495 // fneg is removable even if it has multiple uses. 496 if (Op.getOpcode() == ISD::FNEG) return 2; 497 498 // Don't allow anything with multiple uses. 499 if (!Op.hasOneUse()) return 0; 500 501 // Don't recurse exponentially. 502 if (Depth > 6) return 0; 503 504 switch (Op.getOpcode()) { 505 default: return false; 506 case ISD::ConstantFP: 507 // Don't invert constant FP values after legalize. The negated constant 508 // isn't necessarily legal. 509 return LegalOperations ? 0 : 1; 510 case ISD::FADD: 511 // FIXME: determine better conditions for this xform. 512 if (!Options->UnsafeFPMath) return 0; 513 514 // After operation legalization, it might not be legal to create new FSUBs. 515 if (LegalOperations && 516 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 517 return 0; 518 519 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 520 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 521 Options, Depth + 1)) 522 return V; 523 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 524 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 525 Depth + 1); 526 case ISD::FSUB: 527 // We can't turn -(A-B) into B-A when we honor signed zeros. 528 if (!Options->UnsafeFPMath) return 0; 529 530 // fold (fneg (fsub A, B)) -> (fsub B, A) 531 return 1; 532 533 case ISD::FMUL: 534 case ISD::FDIV: 535 if (Options->HonorSignDependentRoundingFPMath()) return 0; 536 537 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 538 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 539 Options, Depth + 1)) 540 return V; 541 542 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 543 Depth + 1); 544 545 case ISD::FP_EXTEND: 546 case ISD::FP_ROUND: 547 case ISD::FSIN: 548 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 549 Depth + 1); 550 } 551} 552 553/// If isNegatibleForFree returns true, return the newly negated expression. 554static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 555 bool LegalOperations, unsigned Depth = 0) { 556 const TargetOptions &Options = DAG.getTarget().Options; 557 // fneg is removable even if it has multiple uses. 558 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 559 560 // Don't allow anything with multiple uses. 561 assert(Op.hasOneUse() && "Unknown reuse!"); 562 563 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 564 switch (Op.getOpcode()) { 565 default: llvm_unreachable("Unknown code"); 566 case ISD::ConstantFP: { 567 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 568 V.changeSign(); 569 return DAG.getConstantFP(V, Op.getValueType()); 570 } 571 case ISD::FADD: 572 // FIXME: determine better conditions for this xform. 573 assert(Options.UnsafeFPMath); 574 575 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 576 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 577 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 578 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 579 GetNegatedExpression(Op.getOperand(0), DAG, 580 LegalOperations, Depth+1), 581 Op.getOperand(1)); 582 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 583 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 584 GetNegatedExpression(Op.getOperand(1), DAG, 585 LegalOperations, Depth+1), 586 Op.getOperand(0)); 587 case ISD::FSUB: 588 // We can't turn -(A-B) into B-A when we honor signed zeros. 589 assert(Options.UnsafeFPMath); 590 591 // fold (fneg (fsub 0, B)) -> B 592 if (ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 593* if (N0CFP->getValueAPF().isZero()) 594 return Op.getOperand(1); 595 596 // fold (fneg (fsub A, B)) -> (fsub B, A) 597 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 598 Op.getOperand(1), Op.getOperand(0)); 599 600 case ISD::FMUL: 601 case ISD::FDIV: 602 assert(!Options.HonorSignDependentRoundingFPMath()); 603 604 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 605 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 606 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 607 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 608 GetNegatedExpression(Op.getOperand(0), DAG, 609 LegalOperations, Depth+1), 610 Op.getOperand(1)); 611 612 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 613 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 614 Op.getOperand(0), 615 GetNegatedExpression(Op.getOperand(1), DAG, 616 LegalOperations, Depth+1)); 617 618 case ISD::FP_EXTEND: 619 case ISD::FSIN: 620 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 621 GetNegatedExpression(Op.getOperand(0), DAG, 622 LegalOperations, Depth+1)); 623 case ISD::FP_ROUND: 624 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 625 GetNegatedExpression(Op.getOperand(0), DAG, 626 LegalOperations, Depth+1), 627 Op.getOperand(1)); 628 } 629} 630 631// Return true if this node is a setcc, or is a select_cc 632// that selects between the target values used for true and false, making it 633// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 634// the appropriate nodes based on the type of node we are checking. This 635// simplifies life a bit for the callers. 636bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 637 SDValue &CC) const { 638 if (N.getOpcode() == ISD::SETCC) { 639 LHS = N.getOperand(0); 640 RHS = N.getOperand(1); 641 CC = N.getOperand(2); 642 return true; 643 } 644 645 if (N.getOpcode() != ISD::SELECT_CC \|\| 646 !TLI.isConstTrueVal(N.getOperand(2).getNode()) \|\| 647 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 648 return false; 649 650 if (TLI.getBooleanContents(N.getValueType()) == 651 TargetLowering::UndefinedBooleanContent) 652 return false; 653 654 LHS = N.getOperand(0); 655 RHS = N.getOperand(1); 656 CC = N.getOperand(4); 657 return true; 658} 659 660/// Return true if this is a SetCC-equivalent operation with only one use. 661/// If this is true, it allows the users to invert the operation for free when 662/// it is profitable to do so. 663bool DAGCombiner::isOneUseSetCC(SDValue N) const { 664 SDValue N0, N1, N2; 665 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 666 return true; 667 return false; 668} 669 670/// Returns true if N is a BUILD_VECTOR node whose 671/// elements are all the same constant or undefined. 672static bool isConstantSplatVector(SDNode N, APInt& SplatValue) { 673* BuildVectorSDNode C = dyn_cast<BuildVectorSDNode>(N); 674* if (!C) 675 return false; 676 677 APInt SplatUndef; 678 unsigned SplatBitSize; 679 bool HasAnyUndefs; 680 EVT EltVT = N->getValueType(0).getVectorElementType(); 681 return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 682 HasAnyUndefs) && 683 EltVT.getSizeInBits() >= SplatBitSize); 684} 685 686// \brief Returns the SDNode if it is a constant BuildVector or constant. 687static SDNode isConstantBuildVectorOrConstantInt(SDValue N) { 688* if (isa<ConstantSDNode>(N)) 689 return N.getNode(); 690 BuildVectorSDNode BV = dyn_cast<BuildVectorSDNode>(N); 691* if (BV && BV->isConstant()) 692 return BV; 693 return nullptr; 694} 695 696// \brief Returns the SDNode if it is a constant splat BuildVector or constant 697// int. 698static ConstantSDNode isConstOrConstSplat(SDValue N) { 699* if (ConstantSDNode CN = dyn_cast<ConstantSDNode>(N)) 700* return CN; 701 702 if (BuildVectorSDNode BV = dyn_cast<BuildVectorSDNode>(N)) { 703* BitVector UndefElements; 704 ConstantSDNode CN = BV->getConstantSplatNode(&UndefElements); 705* 706 // BuildVectors can truncate their operands. Ignore that case here. 707 // FIXME: We blindly ignore splats which include undef which is overly 708 // pessimistic. 709 if (CN && UndefElements.none() && 710 CN->getValueType(0) == N.getValueType().getScalarType()) 711 return CN; 712 } 713 714 return nullptr; 715} 716 717// \brief Returns the SDNode if it is a constant splat BuildVector or constant 718// float. 719static ConstantFPSDNode isConstOrConstSplatFP(SDValue N) { 720* if (ConstantFPSDNode CN = dyn_cast<ConstantFPSDNode>(N)) 721* return CN; 722 723 if (BuildVectorSDNode BV = dyn_cast<BuildVectorSDNode>(N)) { 724* BitVector UndefElements; 725 ConstantFPSDNode CN = BV->getConstantFPSplatNode(&UndefElements); 726* 727 if (CN && UndefElements.none()) 728 return CN; 729 } 730 731 return nullptr; 732} 733 734SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, 735 SDValue N0, SDValue N1) { 736 EVT VT = N0.getValueType(); 737 if (N0.getOpcode() == Opc) { 738 if (SDNode L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { 739* if (SDNode R = isConstantBuildVectorOrConstantInt(N1)) { 740* // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 741 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) 742 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 743 return SDValue(); 744 } 745 if (N0.hasOneUse()) { 746 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 747 // use 748 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 749 if (!OpNode.getNode()) 750 return SDValue(); 751 AddToWorklist(OpNode.getNode()); 752 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 753 } 754 } 755 } 756 757 if (N1.getOpcode() == Opc) { 758 if (SDNode R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { 759* if (SDNode L = isConstantBuildVectorOrConstantInt(N0)) { 760* // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 761 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) 762 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 763 return SDValue(); 764 } 765 if (N1.hasOneUse()) { 766 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one 767 // use 768 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); 769 if (!OpNode.getNode()) 770 return SDValue(); 771 AddToWorklist(OpNode.getNode()); 772 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 773 } 774 } 775 } 776 777 return SDValue(); 778} 779 780SDValue DAGCombiner::CombineTo(SDNode N, const SDValue To, unsigned NumTo, 781 bool AddTo) { 782 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 783 ++NodesCombined; 784 DEBUG(dbgs() << "\nReplacing.1 "; 785 N->dump(&DAG); 786 dbgs() << "\nWith: "; 787 To[0].getNode()->dump(&DAG); 788 dbgs() << " and " << NumTo-1 << " other values\n"); 789 for (unsigned i = 0, e = NumTo; i != e; ++i) 790 assert((!To[i].getNode() \|\| 791 N->getValueType(i) == To[i].getValueType()) && 792 "Cannot combine value to value of different type!"); 793 794 WorklistRemover DeadNodes(this); 795* DAG.ReplaceAllUsesWith(N, To); 796 if (AddTo) { 797 // Push the new nodes and any users onto the worklist 798 for (unsigned i = 0, e = NumTo; i != e; ++i) { 799 if (To[i].getNode()) { 800 AddToWorklist(To[i].getNode()); 801 AddUsersToWorklist(To[i].getNode()); 802 } 803 } 804 } 805 806 // Finally, if the node is now dead, remove it from the graph. The node 807 // may not be dead if the replacement process recursively simplified to 808 // something else needing this node. 809 if (N->use_empty()) 810 deleteAndRecombine(N); 811 return SDValue(N, 0); 812} 813 814void DAGCombiner:: 815CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 816 // Replace all uses. If any nodes become isomorphic to other nodes and 817 // are deleted, make sure to remove them from our worklist. 818 WorklistRemover DeadNodes(this); 819* DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 820 821 // Push the new node and any (possibly new) users onto the worklist. 822 AddToWorklist(TLO.New.getNode()); 823 AddUsersToWorklist(TLO.New.getNode()); 824 825 // Finally, if the node is now dead, remove it from the graph. The node 826 // may not be dead if the replacement process recursively simplified to 827 // something else needing this node. 828 if (TLO.Old.getNode()->use_empty()) 829 deleteAndRecombine(TLO.Old.getNode()); 830} 831 832/// Check the specified integer node value to see if it can be simplified or if 833/// things it uses can be simplified by bit propagation. If so, return true. 834bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 835 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 836 APInt KnownZero, KnownOne; 837 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 838 return false; 839 840 // Revisit the node. 841 AddToWorklist(Op.getNode()); 842 843 // Replace the old value with the new one. 844 ++NodesCombined; 845 DEBUG(dbgs() << "\nReplacing.2 "; 846 TLO.Old.getNode()->dump(&DAG); 847 dbgs() << "\nWith: "; 848 TLO.New.getNode()->dump(&DAG); 849 dbgs() << '\n'); 850 851 CommitTargetLoweringOpt(TLO); 852 return true; 853} 854 855void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode Load, SDNode ExtLoad) { 856 SDLoc dl(Load); 857 EVT VT = Load->getValueType(0); 858 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 859 860 DEBUG(dbgs() << "\nReplacing.9 "; 861 Load->dump(&DAG); 862 dbgs() << "\nWith: "; 863 Trunc.getNode()->dump(&DAG); 864 dbgs() << '\n'); 865 WorklistRemover DeadNodes(this); 866* DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 867 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 868 deleteAndRecombine(Load); 869 AddToWorklist(Trunc.getNode()); 870} 871 872SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 873 Replace = false; 874 SDLoc dl(Op); 875 if (LoadSDNode LD = dyn_cast<LoadSDNode>(Op)) { 876* EVT MemVT = LD->getMemoryVT(); 877 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 878 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 879 : ISD::EXTLOAD) 880 : LD->getExtensionType(); 881 Replace = true; 882 return DAG.getExtLoad(ExtType, dl, PVT, 883 LD->getChain(), LD->getBasePtr(), 884 MemVT, LD->getMemOperand()); 885 } 886 887 unsigned Opc = Op.getOpcode(); 888 switch (Opc) { 889 default: break; 890 case ISD::AssertSext: 891 return DAG.getNode(ISD::AssertSext, dl, PVT, 892 SExtPromoteOperand(Op.getOperand(0), PVT), 893 Op.getOperand(1)); 894 case ISD::AssertZext: 895 return DAG.getNode(ISD::AssertZext, dl, PVT, 896 ZExtPromoteOperand(Op.getOperand(0), PVT), 897 Op.getOperand(1)); 898 case ISD::Constant: { 899 unsigned ExtOpc = 900 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 901 return DAG.getNode(ExtOpc, dl, PVT, Op); 902 } 903 } 904 905 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 906 return SDValue(); 907 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 908} 909 910SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 911 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 912 return SDValue(); 913 EVT OldVT = Op.getValueType(); 914 SDLoc dl(Op); 915 bool Replace = false; 916 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 917 if (!NewOp.getNode()) 918 return SDValue(); 919 AddToWorklist(NewOp.getNode()); 920 921 if (Replace) 922 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 923 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 924 DAG.getValueType(OldVT)); 925} 926 927SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 928 EVT OldVT = Op.getValueType(); 929 SDLoc dl(Op); 930 bool Replace = false; 931 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 932 if (!NewOp.getNode()) 933 return SDValue(); 934 AddToWorklist(NewOp.getNode()); 935 936 if (Replace) 937 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 938 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 939} 940 941/// Promote the specified integer binary operation if the target indicates it is 942/// beneficial. e.g. On x86, it's usually better to promote i16 operations to 943/// i32 since i16 instructions are longer. 944SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 945 if (!LegalOperations) 946 return SDValue(); 947 948 EVT VT = Op.getValueType(); 949 if (VT.isVector() \|\| !VT.isInteger()) 950 return SDValue(); 951 952 // If operation type is 'undesirable', e.g. i16 on x86, consider 953 // promoting it. 954 unsigned Opc = Op.getOpcode(); 955 if (TLI.isTypeDesirableForOp(Opc, VT)) 956 return SDValue(); 957 958 EVT PVT = VT; 959 // Consult target whether it is a good idea to promote this operation and 960 // what's the right type to promote it to. 961 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 962 assert(PVT != VT && "Don't know what type to promote to!"); 963 964 bool Replace0 = false; 965 SDValue N0 = Op.getOperand(0); 966 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 967 if (!NN0.getNode()) 968 return SDValue(); 969 970 bool Replace1 = false; 971 SDValue N1 = Op.getOperand(1); 972 SDValue NN1; 973 if (N0 == N1) 974 NN1 = NN0; 975 else { 976 NN1 = PromoteOperand(N1, PVT, Replace1); 977 if (!NN1.getNode()) 978 return SDValue(); 979 } 980 981 AddToWorklist(NN0.getNode()); 982 if (NN1.getNode()) 983 AddToWorklist(NN1.getNode()); 984 985 if (Replace0) 986 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 987 if (Replace1) 988 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 989 990 DEBUG(dbgs() << "\nPromoting "; 991 Op.getNode()->dump(&DAG)); 992 SDLoc dl(Op); 993 return DAG.getNode(ISD::TRUNCATE, dl, VT, 994 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 995 } 996 return SDValue(); 997} 998 999/// Promote the specified integer shift operation if the target indicates it is 1000/// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1001/// i32 since i16 instructions are longer. 1002SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1003 if (!LegalOperations) 1004 return SDValue(); 1005 1006 EVT VT = Op.getValueType(); 1007 if (VT.isVector() \|\| !VT.isInteger()) 1008 return SDValue(); 1009 1010 // If operation type is 'undesirable', e.g. i16 on x86, consider 1011 // promoting it. 1012 unsigned Opc = Op.getOpcode(); 1013 if (TLI.isTypeDesirableForOp(Opc, VT)) 1014 return SDValue(); 1015 1016 EVT PVT = VT; 1017 // Consult target whether it is a good idea to promote this operation and 1018 // what's the right type to promote it to. 1019 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1020 assert(PVT != VT && "Don't know what type to promote to!"); 1021 1022 bool Replace = false; 1023 SDValue N0 = Op.getOperand(0); 1024 if (Opc == ISD::SRA) 1025 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 1026 else if (Opc == ISD::SRL) 1027 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 1028 else 1029 N0 = PromoteOperand(N0, PVT, Replace); 1030 if (!N0.getNode()) 1031 return SDValue(); 1032 1033 AddToWorklist(N0.getNode()); 1034 if (Replace) 1035 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1036 1037 DEBUG(dbgs() << "\nPromoting "; 1038 Op.getNode()->dump(&DAG)); 1039 SDLoc dl(Op); 1040 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1041 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 1042 } 1043 return SDValue(); 1044} 1045 1046SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1047 if (!LegalOperations) 1048 return SDValue(); 1049 1050 EVT VT = Op.getValueType(); 1051 if (VT.isVector() \|\| !VT.isInteger()) 1052 return SDValue(); 1053 1054 // If operation type is 'undesirable', e.g. i16 on x86, consider 1055 // promoting it. 1056 unsigned Opc = Op.getOpcode(); 1057 if (TLI.isTypeDesirableForOp(Opc, VT)) 1058 return SDValue(); 1059 1060 EVT PVT = VT; 1061 // Consult target whether it is a good idea to promote this operation and 1062 // what's the right type to promote it to. 1063 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1064 assert(PVT != VT && "Don't know what type to promote to!"); 1065 // fold (aext (aext x)) -> (aext x) 1066 // fold (aext (zext x)) -> (zext x) 1067 // fold (aext (sext x)) -> (sext x) 1068 DEBUG(dbgs() << "\nPromoting "; 1069 Op.getNode()->dump(&DAG)); 1070 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1071 } 1072 return SDValue(); 1073} 1074 1075bool DAGCombiner::PromoteLoad(SDValue Op) { 1076 if (!LegalOperations) 1077 return false; 1078 1079 EVT VT = Op.getValueType(); 1080 if (VT.isVector() \|\| !VT.isInteger()) 1081 return false; 1082 1083 // If operation type is 'undesirable', e.g. i16 on x86, consider 1084 // promoting it. 1085 unsigned Opc = Op.getOpcode(); 1086 if (TLI.isTypeDesirableForOp(Opc, VT)) 1087 return false; 1088 1089 EVT PVT = VT; 1090 // Consult target whether it is a good idea to promote this operation and 1091 // what's the right type to promote it to. 1092 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1093 assert(PVT != VT && "Don't know what type to promote to!"); 1094 1095 SDLoc dl(Op); 1096 SDNode N = Op.getNode(); 1097* LoadSDNode LD = cast<LoadSDNode>(N); 1098* EVT MemVT = LD->getMemoryVT(); 1099 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 1100 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 1101 : ISD::EXTLOAD) 1102 : LD->getExtensionType(); 1103 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 1104 LD->getChain(), LD->getBasePtr(), 1105 MemVT, LD->getMemOperand()); 1106 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 1107 1108 DEBUG(dbgs() << "\nPromoting "; 1109 N->dump(&DAG); 1110 dbgs() << "\nTo: "; 1111 Result.getNode()->dump(&DAG); 1112 dbgs() << '\n'); 1113 WorklistRemover DeadNodes(this); 1114* DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1115 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1116 deleteAndRecombine(N); 1117 AddToWorklist(Result.getNode()); 1118 return true; 1119 } 1120 return false; 1121} 1122 1123/// \brief Recursively delete a node which has no uses and any operands for 1124/// which it is the only use. 1125/// 1126/// Note that this both deletes the nodes and removes them from the worklist. 1127/// It also adds any nodes who have had a user deleted to the worklist as they 1128/// may now have only one use and subject to other combines. 1129bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode N) { 1130* if (!N->use_empty()) 1131 return false; 1132 1133 SmallSetVector<SDNode , 16> Nodes; 1134* Nodes.insert(N); 1135 do { 1136 N = Nodes.pop_back_val(); 1137 if (!N) 1138 continue; 1139 1140 if (N->use_empty()) { 1141 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1142 Nodes.insert(N->getOperand(i).getNode()); 1143 1144 removeFromWorklist(N); 1145 DAG.DeleteNode(N); 1146 } else { 1147 AddToWorklist(N); 1148 } 1149 } while (!Nodes.empty()); 1150 return true; 1151} 1152 1153//===----------------------------------------------------------------------===// 1154// Main DAG Combiner implementation 1155//===----------------------------------------------------------------------===// 1156 1157void DAGCombiner::Run(CombineLevel AtLevel) { 1158 // set the instance variables, so that the various visit routines may use it. 1159 Level = AtLevel; 1160 LegalOperations = Level >= AfterLegalizeVectorOps; 1161 LegalTypes = Level >= AfterLegalizeTypes; 1162	1//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11// both before and after the DAG is legalized. 12// 13// This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14// primarily intended to handle simplification opportunities that are implicit 15// in the LLVM IR and exposed by the various codegen lowering phases. 16// 17//===----------------------------------------------------------------------===// 18 19#include "llvm/CodeGen/SelectionDAG.h" 20#include "llvm/ADT/SetVector.h" 21#include "llvm/ADT/SmallBitVector.h" 22#include "llvm/ADT/SmallPtrSet.h" 23#include "llvm/ADT/Statistic.h" 24#include "llvm/Analysis/AliasAnalysis.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/IR/DataLayout.h" 28#include "llvm/IR/DerivedTypes.h" 29#include "llvm/IR/Function.h" 30#include "llvm/IR/LLVMContext.h" 31#include "llvm/Support/CommandLine.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Support/raw_ostream.h" 36#include "llvm/Target/TargetLowering.h" 37#include "llvm/Target/TargetOptions.h" 38#include "llvm/Target/TargetRegisterInfo.h" 39#include "llvm/Target/TargetSubtargetInfo.h" 40#include <algorithm> 41using namespace llvm; 42 43#define DEBUG_TYPE "dagcombine" 44 45STATISTIC(NodesCombined , "Number of dag nodes combined"); 46STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 47STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 48STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 49STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 50STATISTIC(SlicedLoads, "Number of load sliced"); 51 52namespace { 53 static cl::opt<bool> 54 CombinerAA("combiner-alias-analysis", cl::Hidden, 55 cl::desc("Enable DAG combiner alias-analysis heuristics")); 56 57 static cl::opt<bool> 58 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 59 cl::desc("Enable DAG combiner's use of IR alias analysis")); 60 61 static cl::opt<bool> 62 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 63 cl::desc("Enable DAG combiner's use of TBAA")); 64 65#ifndef NDEBUG 66 static cl::opt<std::string> 67 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 68 cl::desc("Only use DAG-combiner alias analysis in this" 69 " function")); 70#endif 71 72 /// Hidden option to stress test load slicing, i.e., when this option 73 /// is enabled, load slicing bypasses most of its profitability guards. 74 static cl::opt<bool> 75 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 76 cl::desc("Bypass the profitability model of load " 77 "slicing"), 78 cl::init(false)); 79 80 static cl::opt<bool> 81 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 82 cl::desc("DAG combiner may split indexing from loads")); 83 84//------------------------------ DAGCombiner ---------------------------------// 85 86 class DAGCombiner { 87 SelectionDAG &DAG; 88 const TargetLowering &TLI; 89 CombineLevel Level; 90 CodeGenOpt::Level OptLevel; 91 bool LegalOperations; 92 bool LegalTypes; 93 bool ForCodeSize; 94 95 /// \brief Worklist of all of the nodes that need to be simplified. 96 /// 97 /// This must behave as a stack -- new nodes to process are pushed onto the 98 /// back and when processing we pop off of the back. 99 /// 100 /// The worklist will not contain duplicates but may contain null entries 101 /// due to nodes being deleted from the underlying DAG. 102 SmallVector<SDNode , 64> Worklist; 103* 104 /// \brief Mapping from an SDNode to its position on the worklist. 105 /// 106 /// This is used to find and remove nodes from the worklist (by nulling 107 /// them) when they are deleted from the underlying DAG. It relies on 108 /// stable indices of nodes within the worklist. 109 DenseMap<SDNode , unsigned> WorklistMap; 110* 111 /// \brief Set of nodes which have been combined (at least once). 112 /// 113 /// This is used to allow us to reliably add any operands of a DAG node 114 /// which have not yet been combined to the worklist. 115 SmallPtrSet<SDNode , 64> CombinedNodes; 116* 117 // AA - Used for DAG load/store alias analysis. 118 AliasAnalysis &AA; 119 120 /// When an instruction is simplified, add all users of the instruction to 121 /// the work lists because they might get more simplified now. 122 void AddUsersToWorklist(SDNode N) { 123* for (SDNode Node : N->uses()) 124* AddToWorklist(Node); 125 } 126 127 /// Call the node-specific routine that folds each particular type of node. 128 SDValue visit(SDNode N); 129* 130 public: 131 /// Add to the worklist making sure its instance is at the back (next to be 132 /// processed.) 133 void AddToWorklist(SDNode N) { 134* // Skip handle nodes as they can't usefully be combined and confuse the 135 // zero-use deletion strategy. 136 if (N->getOpcode() == ISD::HANDLENODE) 137 return; 138 139 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 140 Worklist.push_back(N); 141 } 142 143 /// Remove all instances of N from the worklist. 144 void removeFromWorklist(SDNode N) { 145* CombinedNodes.erase(N); 146 147 auto It = WorklistMap.find(N); 148 if (It == WorklistMap.end()) 149 return; // Not in the worklist. 150 151 // Null out the entry rather than erasing it to avoid a linear operation. 152 Worklist[It->second] = nullptr; 153 WorklistMap.erase(It); 154 } 155 156 void deleteAndRecombine(SDNode N); 157* bool recursivelyDeleteUnusedNodes(SDNode N); 158* 159 SDValue CombineTo(SDNode N, const SDValue To, unsigned NumTo, 160 bool AddTo = true); 161 162 SDValue CombineTo(SDNode N, SDValue Res, bool AddTo = true) { 163* return CombineTo(N, &Res, 1, AddTo); 164 } 165 166 SDValue CombineTo(SDNode N, SDValue Res0, SDValue Res1, 167* bool AddTo = true) { 168 SDValue To[] = { Res0, Res1 }; 169 return CombineTo(N, To, 2, AddTo); 170 } 171 172 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 173 174 private: 175 176 /// Check the specified integer node value to see if it can be simplified or 177 /// if things it uses can be simplified by bit propagation. 178 /// If so, return true. 179 bool SimplifyDemandedBits(SDValue Op) { 180 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 181 APInt Demanded = APInt::getAllOnesValue(BitWidth); 182 return SimplifyDemandedBits(Op, Demanded); 183 } 184 185 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 186 187 bool CombineToPreIndexedLoadStore(SDNode N); 188* bool CombineToPostIndexedLoadStore(SDNode N); 189* SDValue SplitIndexingFromLoad(LoadSDNode LD); 190* bool SliceUpLoad(SDNode N); 191* 192 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 193 /// load. 194 /// 195 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 196 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 197 /// \param EltNo index of the vector element to load. 198 /// \param OriginalLoad load that EVE came from to be replaced. 199 /// \returns EVE on success SDValue() on failure. 200 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 201 SDNode EVE, EVT InVecVT, SDValue EltNo, LoadSDNode OriginalLoad); 202 void ReplaceLoadWithPromotedLoad(SDNode Load, SDNode ExtLoad); 203 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 204 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 205 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 206 SDValue PromoteIntBinOp(SDValue Op); 207 SDValue PromoteIntShiftOp(SDValue Op); 208 SDValue PromoteExtend(SDValue Op); 209 bool PromoteLoad(SDValue Op); 210 211 void ExtendSetCCUses(const SmallVectorImpl<SDNode > &SetCCs, 212* SDValue Trunc, SDValue ExtLoad, SDLoc DL, 213 ISD::NodeType ExtType); 214 215 /// Call the node-specific routine that knows how to fold each 216 /// particular type of node. If that doesn't do anything, try the 217 /// target-specific DAG combines. 218 SDValue combine(SDNode N); 219* 220 // Visitation implementation - Implement dag node combining for different 221 // node types. The semantics are as follows: 222 // Return Value: 223 // SDValue.getNode() == 0 - No change was made 224 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 225 // otherwise - N should be replaced by the returned Operand. 226 // 227 SDValue visitTokenFactor(SDNode N); 228* SDValue visitMERGE_VALUES(SDNode N); 229* SDValue visitADD(SDNode N); 230* SDValue visitSUB(SDNode N); 231* SDValue visitADDC(SDNode N); 232* SDValue visitSUBC(SDNode N); 233* SDValue visitADDE(SDNode N); 234* SDValue visitSUBE(SDNode N); 235* SDValue visitMUL(SDNode N); 236* SDValue visitSDIV(SDNode N); 237* SDValue visitUDIV(SDNode N); 238* SDValue visitSREM(SDNode N); 239* SDValue visitUREM(SDNode N); 240* SDValue visitMULHU(SDNode N); 241* SDValue visitMULHS(SDNode N); 242* SDValue visitSMUL_LOHI(SDNode N); 243* SDValue visitUMUL_LOHI(SDNode N); 244* SDValue visitSMULO(SDNode N); 245* SDValue visitUMULO(SDNode N); 246* SDValue visitSDIVREM(SDNode N); 247* SDValue visitUDIVREM(SDNode N); 248* SDValue visitAND(SDNode N); 249* SDValue visitOR(SDNode N); 250* SDValue visitXOR(SDNode N); 251* SDValue SimplifyVBinOp(SDNode N); 252* SDValue SimplifyVUnaryOp(SDNode N); 253* SDValue visitSHL(SDNode N); 254* SDValue visitSRA(SDNode N); 255* SDValue visitSRL(SDNode N); 256* SDValue visitRotate(SDNode N); 257* SDValue visitCTLZ(SDNode N); 258* SDValue visitCTLZ_ZERO_UNDEF(SDNode N); 259* SDValue visitCTTZ(SDNode N); 260* SDValue visitCTTZ_ZERO_UNDEF(SDNode N); 261* SDValue visitCTPOP(SDNode N); 262* SDValue visitSELECT(SDNode N); 263* SDValue visitVSELECT(SDNode N); 264* SDValue visitSELECT_CC(SDNode N); 265* SDValue visitSETCC(SDNode N); 266* SDValue visitSIGN_EXTEND(SDNode N); 267* SDValue visitZERO_EXTEND(SDNode N); 268* SDValue visitANY_EXTEND(SDNode N); 269* SDValue visitSIGN_EXTEND_INREG(SDNode N); 270* SDValue visitTRUNCATE(SDNode N); 271* SDValue visitBITCAST(SDNode N); 272* SDValue visitBUILD_PAIR(SDNode N); 273* SDValue visitFADD(SDNode N); 274* SDValue visitFSUB(SDNode N); 275* SDValue visitFMUL(SDNode N); 276* SDValue visitFMA(SDNode N); 277* SDValue visitFDIV(SDNode N); 278* SDValue visitFREM(SDNode N); 279* SDValue visitFSQRT(SDNode N); 280* SDValue visitFCOPYSIGN(SDNode N); 281* SDValue visitSINT_TO_FP(SDNode N); 282* SDValue visitUINT_TO_FP(SDNode N); 283* SDValue visitFP_TO_SINT(SDNode N); 284* SDValue visitFP_TO_UINT(SDNode N); 285* SDValue visitFP_ROUND(SDNode N); 286* SDValue visitFP_ROUND_INREG(SDNode N); 287* SDValue visitFP_EXTEND(SDNode N); 288* SDValue visitFNEG(SDNode N); 289* SDValue visitFABS(SDNode N); 290* SDValue visitFCEIL(SDNode N); 291* SDValue visitFTRUNC(SDNode N); 292* SDValue visitFFLOOR(SDNode N); 293* SDValue visitFMINNUM(SDNode N); 294* SDValue visitFMAXNUM(SDNode N); 295* SDValue visitBRCOND(SDNode N); 296* SDValue visitBR_CC(SDNode N); 297* SDValue visitLOAD(SDNode N); 298* SDValue visitSTORE(SDNode N); 299* SDValue visitINSERT_VECTOR_ELT(SDNode N); 300* SDValue visitEXTRACT_VECTOR_ELT(SDNode N); 301* SDValue visitBUILD_VECTOR(SDNode N); 302* SDValue visitCONCAT_VECTORS(SDNode N); 303* SDValue visitEXTRACT_SUBVECTOR(SDNode N); 304* SDValue visitVECTOR_SHUFFLE(SDNode N); 305* SDValue visitINSERT_SUBVECTOR(SDNode N); 306* SDValue visitMLOAD(SDNode N); 307* SDValue visitMSTORE(SDNode N); 308* 309 SDValue XformToShuffleWithZero(SDNode N); 310* SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); 311 312 SDValue visitShiftByConstant(SDNode N, ConstantSDNode Amt); 313 314 bool SimplifySelectOps(SDNode SELECT, SDValue LHS, SDValue RHS); 315* SDValue SimplifyBinOpWithSameOpcodeHands(SDNode N); 316* SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); 317 SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, 318 SDValue N3, ISD::CondCode CC, 319 bool NotExtCompare = false); 320 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 321 SDLoc DL, bool foldBooleans = true); 322 323 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 324 SDValue &CC) const; 325 bool isOneUseSetCC(SDValue N) const; 326 327 SDValue SimplifyNodeWithTwoResults(SDNode N, unsigned LoOp, 328* unsigned HiOp); 329 SDValue CombineConsecutiveLoads(SDNode N, EVT VT); 330* SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode , EVT); 331* SDValue BuildSDIV(SDNode N); 332* SDValue BuildSDIVPow2(SDNode N); 333* SDValue BuildUDIV(SDNode N); 334* SDValue BuildReciprocalEstimate(SDValue Op); 335 SDValue BuildRsqrtEstimate(SDValue Op); 336 SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); 337 SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); 338 SDValue MatchBSwapHWordLow(SDNode N, SDValue N0, SDValue N1, 339* bool DemandHighBits = true); 340 SDValue MatchBSwapHWord(SDNode N, SDValue N0, SDValue N1); 341* SDNode MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 342* SDValue InnerPos, SDValue InnerNeg, 343 unsigned PosOpcode, unsigned NegOpcode, 344 SDLoc DL); 345 SDNode MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); 346* SDValue ReduceLoadWidth(SDNode N); 347* SDValue ReduceLoadOpStoreWidth(SDNode N); 348* SDValue TransformFPLoadStorePair(SDNode N); 349* SDValue reduceBuildVecExtToExtBuildVec(SDNode N); 350* SDValue reduceBuildVecConvertToConvertBuildVec(SDNode N); 351* 352 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 353 354 /// Walk up chain skipping non-aliasing memory nodes, 355 /// looking for aliasing nodes and adding them to the Aliases vector. 356 void GatherAllAliases(SDNode N, SDValue OriginalChain, 357* SmallVectorImpl<SDValue> &Aliases); 358 359 /// Return true if there is any possibility that the two addresses overlap. 360 bool isAlias(LSBaseSDNode Op0, LSBaseSDNode Op1) const; 361 362 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 363 /// chain (aliasing node.) 364 SDValue FindBetterChain(SDNode N, SDValue Chain); 365* 366 /// Merge consecutive store operations into a wide store. 367 /// This optimization uses wide integers or vectors when possible. 368 /// \return True if some memory operations were changed. 369 bool MergeConsecutiveStores(StoreSDNode N); 370* 371 /// \brief Try to transform a truncation where C is a constant: 372 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 373 /// 374 /// \p N needs to be a truncation and its first operand an AND. Other 375 /// requirements are checked by the function (e.g. that trunc is 376 /// single-use) and if missed an empty SDValue is returned. 377 SDValue distributeTruncateThroughAnd(SDNode N); 378* 379 public: 380 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 381 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 382 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { 383 AttributeSet FnAttrs = 384 DAG.getMachineFunction().getFunction()->getAttributes(); 385 ForCodeSize = 386 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, 387 Attribute::OptimizeForSize) \|\| 388 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 389 } 390 391 /// Runs the dag combiner on all nodes in the work list 392 void Run(CombineLevel AtLevel); 393 394 SelectionDAG &getDAG() const { return DAG; } 395 396 /// Returns a type large enough to hold any valid shift amount - before type 397 /// legalization these can be huge. 398 EVT getShiftAmountTy(EVT LHSTy) { 399 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 400 if (LHSTy.isVector()) 401 return LHSTy; 402 return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) 403 : TLI.getPointerTy(); 404 } 405 406 /// This method returns true if we are running before type legalization or 407 /// if the specified VT is legal. 408 bool isTypeLegal(const EVT &VT) { 409 if (!LegalTypes) return true; 410 return TLI.isTypeLegal(VT); 411 } 412 413 /// Convenience wrapper around TargetLowering::getSetCCResultType 414 EVT getSetCCResultType(EVT VT) const { 415 return TLI.getSetCCResultType(DAG.getContext(), VT); 416* } 417 }; 418} 419 420 421namespace { 422/// This class is a DAGUpdateListener that removes any deleted 423/// nodes from the worklist. 424class WorklistRemover : public SelectionDAG::DAGUpdateListener { 425 DAGCombiner &DC; 426public: 427 explicit WorklistRemover(DAGCombiner &dc) 428 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 429 430 void NodeDeleted(SDNode N, SDNode E) override { 431 DC.removeFromWorklist(N); 432 } 433}; 434} 435 436//===----------------------------------------------------------------------===// 437// TargetLowering::DAGCombinerInfo implementation 438//===----------------------------------------------------------------------===// 439 440void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode N) { 441* ((DAGCombiner)DC)->AddToWorklist(N); 442} 443* 444void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode N) { 445* ((DAGCombiner)DC)->removeFromWorklist(N); 446} 447* 448SDValue TargetLowering::DAGCombinerInfo:: 449CombineTo(SDNode N, const std::vector<SDValue> &To, bool AddTo) { 450* return ((DAGCombiner)DC)->CombineTo(N, &To[0], To.size(), AddTo); 451} 452* 453SDValue TargetLowering::DAGCombinerInfo:: 454CombineTo(SDNode N, SDValue Res, bool AddTo) { 455* return ((DAGCombiner)DC)->CombineTo(N, Res, AddTo); 456} 457* 458 459SDValue TargetLowering::DAGCombinerInfo:: 460CombineTo(SDNode N, SDValue Res0, SDValue Res1, bool AddTo) { 461* return ((DAGCombiner)DC)->CombineTo(N, Res0, Res1, AddTo); 462} 463* 464void TargetLowering::DAGCombinerInfo:: 465CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 466 return ((DAGCombiner)DC)->CommitTargetLoweringOpt(TLO); 467} 468* 469//===----------------------------------------------------------------------===// 470// Helper Functions 471//===----------------------------------------------------------------------===// 472 473void DAGCombiner::deleteAndRecombine(SDNode N) { 474* removeFromWorklist(N); 475 476 // If the operands of this node are only used by the node, they will now be 477 // dead. Make sure to re-visit them and recursively delete dead nodes. 478 for (const SDValue &Op : N->ops()) 479 // For an operand generating multiple values, one of the values may 480 // become dead allowing further simplification (e.g. split index 481 // arithmetic from an indexed load). 482 if (Op->hasOneUse() \|\| Op->getNumValues() > 1) 483 AddToWorklist(Op.getNode()); 484 485 DAG.DeleteNode(N); 486} 487 488/// Return 1 if we can compute the negated form of the specified expression for 489/// the same cost as the expression itself, or 2 if we can compute the negated 490/// form more cheaply than the expression itself. 491static char isNegatibleForFree(SDValue Op, bool LegalOperations, 492 const TargetLowering &TLI, 493 const TargetOptions Options, 494* unsigned Depth = 0) { 495 // fneg is removable even if it has multiple uses. 496 if (Op.getOpcode() == ISD::FNEG) return 2; 497 498 // Don't allow anything with multiple uses. 499 if (!Op.hasOneUse()) return 0; 500 501 // Don't recurse exponentially. 502 if (Depth > 6) return 0; 503 504 switch (Op.getOpcode()) { 505 default: return false; 506 case ISD::ConstantFP: 507 // Don't invert constant FP values after legalize. The negated constant 508 // isn't necessarily legal. 509 return LegalOperations ? 0 : 1; 510 case ISD::FADD: 511 // FIXME: determine better conditions for this xform. 512 if (!Options->UnsafeFPMath) return 0; 513 514 // After operation legalization, it might not be legal to create new FSUBs. 515 if (LegalOperations && 516 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 517 return 0; 518 519 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 520 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 521 Options, Depth + 1)) 522 return V; 523 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 524 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 525 Depth + 1); 526 case ISD::FSUB: 527 // We can't turn -(A-B) into B-A when we honor signed zeros. 528 if (!Options->UnsafeFPMath) return 0; 529 530 // fold (fneg (fsub A, B)) -> (fsub B, A) 531 return 1; 532 533 case ISD::FMUL: 534 case ISD::FDIV: 535 if (Options->HonorSignDependentRoundingFPMath()) return 0; 536 537 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 538 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 539 Options, Depth + 1)) 540 return V; 541 542 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 543 Depth + 1); 544 545 case ISD::FP_EXTEND: 546 case ISD::FP_ROUND: 547 case ISD::FSIN: 548 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 549 Depth + 1); 550 } 551} 552 553/// If isNegatibleForFree returns true, return the newly negated expression. 554static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 555 bool LegalOperations, unsigned Depth = 0) { 556 const TargetOptions &Options = DAG.getTarget().Options; 557 // fneg is removable even if it has multiple uses. 558 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 559 560 // Don't allow anything with multiple uses. 561 assert(Op.hasOneUse() && "Unknown reuse!"); 562 563 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 564 switch (Op.getOpcode()) { 565 default: llvm_unreachable("Unknown code"); 566 case ISD::ConstantFP: { 567 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 568 V.changeSign(); 569 return DAG.getConstantFP(V, Op.getValueType()); 570 } 571 case ISD::FADD: 572 // FIXME: determine better conditions for this xform. 573 assert(Options.UnsafeFPMath); 574 575 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 576 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 577 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 578 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 579 GetNegatedExpression(Op.getOperand(0), DAG, 580 LegalOperations, Depth+1), 581 Op.getOperand(1)); 582 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 583 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 584 GetNegatedExpression(Op.getOperand(1), DAG, 585 LegalOperations, Depth+1), 586 Op.getOperand(0)); 587 case ISD::FSUB: 588 // We can't turn -(A-B) into B-A when we honor signed zeros. 589 assert(Options.UnsafeFPMath); 590 591 // fold (fneg (fsub 0, B)) -> B 592 if (ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 593* if (N0CFP->getValueAPF().isZero()) 594 return Op.getOperand(1); 595 596 // fold (fneg (fsub A, B)) -> (fsub B, A) 597 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 598 Op.getOperand(1), Op.getOperand(0)); 599 600 case ISD::FMUL: 601 case ISD::FDIV: 602 assert(!Options.HonorSignDependentRoundingFPMath()); 603 604 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 605 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 606 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 607 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 608 GetNegatedExpression(Op.getOperand(0), DAG, 609 LegalOperations, Depth+1), 610 Op.getOperand(1)); 611 612 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 613 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 614 Op.getOperand(0), 615 GetNegatedExpression(Op.getOperand(1), DAG, 616 LegalOperations, Depth+1)); 617 618 case ISD::FP_EXTEND: 619 case ISD::FSIN: 620 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 621 GetNegatedExpression(Op.getOperand(0), DAG, 622 LegalOperations, Depth+1)); 623 case ISD::FP_ROUND: 624 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 625 GetNegatedExpression(Op.getOperand(0), DAG, 626 LegalOperations, Depth+1), 627 Op.getOperand(1)); 628 } 629} 630 631// Return true if this node is a setcc, or is a select_cc 632// that selects between the target values used for true and false, making it 633// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 634// the appropriate nodes based on the type of node we are checking. This 635// simplifies life a bit for the callers. 636bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 637 SDValue &CC) const { 638 if (N.getOpcode() == ISD::SETCC) { 639 LHS = N.getOperand(0); 640 RHS = N.getOperand(1); 641 CC = N.getOperand(2); 642 return true; 643 } 644 645 if (N.getOpcode() != ISD::SELECT_CC \|\| 646 !TLI.isConstTrueVal(N.getOperand(2).getNode()) \|\| 647 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 648 return false; 649 650 if (TLI.getBooleanContents(N.getValueType()) == 651 TargetLowering::UndefinedBooleanContent) 652 return false; 653 654 LHS = N.getOperand(0); 655 RHS = N.getOperand(1); 656 CC = N.getOperand(4); 657 return true; 658} 659 660/// Return true if this is a SetCC-equivalent operation with only one use. 661/// If this is true, it allows the users to invert the operation for free when 662/// it is profitable to do so. 663bool DAGCombiner::isOneUseSetCC(SDValue N) const { 664 SDValue N0, N1, N2; 665 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 666 return true; 667 return false; 668} 669 670/// Returns true if N is a BUILD_VECTOR node whose 671/// elements are all the same constant or undefined. 672static bool isConstantSplatVector(SDNode N, APInt& SplatValue) { 673* BuildVectorSDNode C = dyn_cast<BuildVectorSDNode>(N); 674* if (!C) 675 return false; 676 677 APInt SplatUndef; 678 unsigned SplatBitSize; 679 bool HasAnyUndefs; 680 EVT EltVT = N->getValueType(0).getVectorElementType(); 681 return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 682 HasAnyUndefs) && 683 EltVT.getSizeInBits() >= SplatBitSize); 684} 685 686// \brief Returns the SDNode if it is a constant BuildVector or constant. 687static SDNode isConstantBuildVectorOrConstantInt(SDValue N) { 688* if (isa<ConstantSDNode>(N)) 689 return N.getNode(); 690 BuildVectorSDNode BV = dyn_cast<BuildVectorSDNode>(N); 691* if (BV && BV->isConstant()) 692 return BV; 693 return nullptr; 694} 695 696// \brief Returns the SDNode if it is a constant splat BuildVector or constant 697// int. 698static ConstantSDNode isConstOrConstSplat(SDValue N) { 699* if (ConstantSDNode CN = dyn_cast<ConstantSDNode>(N)) 700* return CN; 701 702 if (BuildVectorSDNode BV = dyn_cast<BuildVectorSDNode>(N)) { 703* BitVector UndefElements; 704 ConstantSDNode CN = BV->getConstantSplatNode(&UndefElements); 705* 706 // BuildVectors can truncate their operands. Ignore that case here. 707 // FIXME: We blindly ignore splats which include undef which is overly 708 // pessimistic. 709 if (CN && UndefElements.none() && 710 CN->getValueType(0) == N.getValueType().getScalarType()) 711 return CN; 712 } 713 714 return nullptr; 715} 716 717// \brief Returns the SDNode if it is a constant splat BuildVector or constant 718// float. 719static ConstantFPSDNode isConstOrConstSplatFP(SDValue N) { 720* if (ConstantFPSDNode CN = dyn_cast<ConstantFPSDNode>(N)) 721* return CN; 722 723 if (BuildVectorSDNode BV = dyn_cast<BuildVectorSDNode>(N)) { 724* BitVector UndefElements; 725 ConstantFPSDNode CN = BV->getConstantFPSplatNode(&UndefElements); 726* 727 if (CN && UndefElements.none()) 728 return CN; 729 } 730 731 return nullptr; 732} 733 734SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, 735 SDValue N0, SDValue N1) { 736 EVT VT = N0.getValueType(); 737 if (N0.getOpcode() == Opc) { 738 if (SDNode L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { 739* if (SDNode R = isConstantBuildVectorOrConstantInt(N1)) { 740* // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 741 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) 742 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 743 return SDValue(); 744 } 745 if (N0.hasOneUse()) { 746 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 747 // use 748 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 749 if (!OpNode.getNode()) 750 return SDValue(); 751 AddToWorklist(OpNode.getNode()); 752 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 753 } 754 } 755 } 756 757 if (N1.getOpcode() == Opc) { 758 if (SDNode R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { 759* if (SDNode L = isConstantBuildVectorOrConstantInt(N0)) { 760* // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 761 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) 762 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 763 return SDValue(); 764 } 765 if (N1.hasOneUse()) { 766 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one 767 // use 768 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); 769 if (!OpNode.getNode()) 770 return SDValue(); 771 AddToWorklist(OpNode.getNode()); 772 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 773 } 774 } 775 } 776 777 return SDValue(); 778} 779 780SDValue DAGCombiner::CombineTo(SDNode N, const SDValue To, unsigned NumTo, 781 bool AddTo) { 782 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 783 ++NodesCombined; 784 DEBUG(dbgs() << "\nReplacing.1 "; 785 N->dump(&DAG); 786 dbgs() << "\nWith: "; 787 To[0].getNode()->dump(&DAG); 788 dbgs() << " and " << NumTo-1 << " other values\n"); 789 for (unsigned i = 0, e = NumTo; i != e; ++i) 790 assert((!To[i].getNode() \|\| 791 N->getValueType(i) == To[i].getValueType()) && 792 "Cannot combine value to value of different type!"); 793 794 WorklistRemover DeadNodes(this); 795* DAG.ReplaceAllUsesWith(N, To); 796 if (AddTo) { 797 // Push the new nodes and any users onto the worklist 798 for (unsigned i = 0, e = NumTo; i != e; ++i) { 799 if (To[i].getNode()) { 800 AddToWorklist(To[i].getNode()); 801 AddUsersToWorklist(To[i].getNode()); 802 } 803 } 804 } 805 806 // Finally, if the node is now dead, remove it from the graph. The node 807 // may not be dead if the replacement process recursively simplified to 808 // something else needing this node. 809 if (N->use_empty()) 810 deleteAndRecombine(N); 811 return SDValue(N, 0); 812} 813 814void DAGCombiner:: 815CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 816 // Replace all uses. If any nodes become isomorphic to other nodes and 817 // are deleted, make sure to remove them from our worklist. 818 WorklistRemover DeadNodes(this); 819* DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 820 821 // Push the new node and any (possibly new) users onto the worklist. 822 AddToWorklist(TLO.New.getNode()); 823 AddUsersToWorklist(TLO.New.getNode()); 824 825 // Finally, if the node is now dead, remove it from the graph. The node 826 // may not be dead if the replacement process recursively simplified to 827 // something else needing this node. 828 if (TLO.Old.getNode()->use_empty()) 829 deleteAndRecombine(TLO.Old.getNode()); 830} 831 832/// Check the specified integer node value to see if it can be simplified or if 833/// things it uses can be simplified by bit propagation. If so, return true. 834bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 835 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 836 APInt KnownZero, KnownOne; 837 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 838 return false; 839 840 // Revisit the node. 841 AddToWorklist(Op.getNode()); 842 843 // Replace the old value with the new one. 844 ++NodesCombined; 845 DEBUG(dbgs() << "\nReplacing.2 "; 846 TLO.Old.getNode()->dump(&DAG); 847 dbgs() << "\nWith: "; 848 TLO.New.getNode()->dump(&DAG); 849 dbgs() << '\n'); 850 851 CommitTargetLoweringOpt(TLO); 852 return true; 853} 854 855void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode Load, SDNode ExtLoad) { 856 SDLoc dl(Load); 857 EVT VT = Load->getValueType(0); 858 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 859 860 DEBUG(dbgs() << "\nReplacing.9 "; 861 Load->dump(&DAG); 862 dbgs() << "\nWith: "; 863 Trunc.getNode()->dump(&DAG); 864 dbgs() << '\n'); 865 WorklistRemover DeadNodes(this); 866* DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 867 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 868 deleteAndRecombine(Load); 869 AddToWorklist(Trunc.getNode()); 870} 871 872SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 873 Replace = false; 874 SDLoc dl(Op); 875 if (LoadSDNode LD = dyn_cast<LoadSDNode>(Op)) { 876* EVT MemVT = LD->getMemoryVT(); 877 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 878 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 879 : ISD::EXTLOAD) 880 : LD->getExtensionType(); 881 Replace = true; 882 return DAG.getExtLoad(ExtType, dl, PVT, 883 LD->getChain(), LD->getBasePtr(), 884 MemVT, LD->getMemOperand()); 885 } 886 887 unsigned Opc = Op.getOpcode(); 888 switch (Opc) { 889 default: break; 890 case ISD::AssertSext: 891 return DAG.getNode(ISD::AssertSext, dl, PVT, 892 SExtPromoteOperand(Op.getOperand(0), PVT), 893 Op.getOperand(1)); 894 case ISD::AssertZext: 895 return DAG.getNode(ISD::AssertZext, dl, PVT, 896 ZExtPromoteOperand(Op.getOperand(0), PVT), 897 Op.getOperand(1)); 898 case ISD::Constant: { 899 unsigned ExtOpc = 900 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 901 return DAG.getNode(ExtOpc, dl, PVT, Op); 902 } 903 } 904 905 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 906 return SDValue(); 907 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 908} 909 910SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 911 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 912 return SDValue(); 913 EVT OldVT = Op.getValueType(); 914 SDLoc dl(Op); 915 bool Replace = false; 916 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 917 if (!NewOp.getNode()) 918 return SDValue(); 919 AddToWorklist(NewOp.getNode()); 920 921 if (Replace) 922 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 923 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 924 DAG.getValueType(OldVT)); 925} 926 927SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 928 EVT OldVT = Op.getValueType(); 929 SDLoc dl(Op); 930 bool Replace = false; 931 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 932 if (!NewOp.getNode()) 933 return SDValue(); 934 AddToWorklist(NewOp.getNode()); 935 936 if (Replace) 937 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 938 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 939} 940 941/// Promote the specified integer binary operation if the target indicates it is 942/// beneficial. e.g. On x86, it's usually better to promote i16 operations to 943/// i32 since i16 instructions are longer. 944SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 945 if (!LegalOperations) 946 return SDValue(); 947 948 EVT VT = Op.getValueType(); 949 if (VT.isVector() \|\| !VT.isInteger()) 950 return SDValue(); 951 952 // If operation type is 'undesirable', e.g. i16 on x86, consider 953 // promoting it. 954 unsigned Opc = Op.getOpcode(); 955 if (TLI.isTypeDesirableForOp(Opc, VT)) 956 return SDValue(); 957 958 EVT PVT = VT; 959 // Consult target whether it is a good idea to promote this operation and 960 // what's the right type to promote it to. 961 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 962 assert(PVT != VT && "Don't know what type to promote to!"); 963 964 bool Replace0 = false; 965 SDValue N0 = Op.getOperand(0); 966 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 967 if (!NN0.getNode()) 968 return SDValue(); 969 970 bool Replace1 = false; 971 SDValue N1 = Op.getOperand(1); 972 SDValue NN1; 973 if (N0 == N1) 974 NN1 = NN0; 975 else { 976 NN1 = PromoteOperand(N1, PVT, Replace1); 977 if (!NN1.getNode()) 978 return SDValue(); 979 } 980 981 AddToWorklist(NN0.getNode()); 982 if (NN1.getNode()) 983 AddToWorklist(NN1.getNode()); 984 985 if (Replace0) 986 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 987 if (Replace1) 988 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 989 990 DEBUG(dbgs() << "\nPromoting "; 991 Op.getNode()->dump(&DAG)); 992 SDLoc dl(Op); 993 return DAG.getNode(ISD::TRUNCATE, dl, VT, 994 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 995 } 996 return SDValue(); 997} 998 999/// Promote the specified integer shift operation if the target indicates it is 1000/// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1001/// i32 since i16 instructions are longer. 1002SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1003 if (!LegalOperations) 1004 return SDValue(); 1005 1006 EVT VT = Op.getValueType(); 1007 if (VT.isVector() \|\| !VT.isInteger()) 1008 return SDValue(); 1009 1010 // If operation type is 'undesirable', e.g. i16 on x86, consider 1011 // promoting it. 1012 unsigned Opc = Op.getOpcode(); 1013 if (TLI.isTypeDesirableForOp(Opc, VT)) 1014 return SDValue(); 1015 1016 EVT PVT = VT; 1017 // Consult target whether it is a good idea to promote this operation and 1018 // what's the right type to promote it to. 1019 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1020 assert(PVT != VT && "Don't know what type to promote to!"); 1021 1022 bool Replace = false; 1023 SDValue N0 = Op.getOperand(0); 1024 if (Opc == ISD::SRA) 1025 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 1026 else if (Opc == ISD::SRL) 1027 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 1028 else 1029 N0 = PromoteOperand(N0, PVT, Replace); 1030 if (!N0.getNode()) 1031 return SDValue(); 1032 1033 AddToWorklist(N0.getNode()); 1034 if (Replace) 1035 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1036 1037 DEBUG(dbgs() << "\nPromoting "; 1038 Op.getNode()->dump(&DAG)); 1039 SDLoc dl(Op); 1040 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1041 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 1042 } 1043 return SDValue(); 1044} 1045 1046SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1047 if (!LegalOperations) 1048 return SDValue(); 1049 1050 EVT VT = Op.getValueType(); 1051 if (VT.isVector() \|\| !VT.isInteger()) 1052 return SDValue(); 1053 1054 // If operation type is 'undesirable', e.g. i16 on x86, consider 1055 // promoting it. 1056 unsigned Opc = Op.getOpcode(); 1057 if (TLI.isTypeDesirableForOp(Opc, VT)) 1058 return SDValue(); 1059 1060 EVT PVT = VT; 1061 // Consult target whether it is a good idea to promote this operation and 1062 // what's the right type to promote it to. 1063 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1064 assert(PVT != VT && "Don't know what type to promote to!"); 1065 // fold (aext (aext x)) -> (aext x) 1066 // fold (aext (zext x)) -> (zext x) 1067 // fold (aext (sext x)) -> (sext x) 1068 DEBUG(dbgs() << "\nPromoting "; 1069 Op.getNode()->dump(&DAG)); 1070 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1071 } 1072 return SDValue(); 1073} 1074 1075bool DAGCombiner::PromoteLoad(SDValue Op) { 1076 if (!LegalOperations) 1077 return false; 1078 1079 EVT VT = Op.getValueType(); 1080 if (VT.isVector() \|\| !VT.isInteger()) 1081 return false; 1082 1083 // If operation type is 'undesirable', e.g. i16 on x86, consider 1084 // promoting it. 1085 unsigned Opc = Op.getOpcode(); 1086 if (TLI.isTypeDesirableForOp(Opc, VT)) 1087 return false; 1088 1089 EVT PVT = VT; 1090 // Consult target whether it is a good idea to promote this operation and 1091 // what's the right type to promote it to. 1092 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1093 assert(PVT != VT && "Don't know what type to promote to!"); 1094 1095 SDLoc dl(Op); 1096 SDNode N = Op.getNode(); 1097* LoadSDNode LD = cast<LoadSDNode>(N); 1098* EVT MemVT = LD->getMemoryVT(); 1099 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 1100 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 1101 : ISD::EXTLOAD) 1102 : LD->getExtensionType(); 1103 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 1104 LD->getChain(), LD->getBasePtr(), 1105 MemVT, LD->getMemOperand()); 1106 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 1107 1108 DEBUG(dbgs() << "\nPromoting "; 1109 N->dump(&DAG); 1110 dbgs() << "\nTo: "; 1111 Result.getNode()->dump(&DAG); 1112 dbgs() << '\n'); 1113 WorklistRemover DeadNodes(this); 1114* DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1115 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1116 deleteAndRecombine(N); 1117 AddToWorklist(Result.getNode()); 1118 return true; 1119 } 1120 return false; 1121} 1122 1123/// \brief Recursively delete a node which has no uses and any operands for 1124/// which it is the only use. 1125/// 1126/// Note that this both deletes the nodes and removes them from the worklist. 1127/// It also adds any nodes who have had a user deleted to the worklist as they 1128/// may now have only one use and subject to other combines. 1129bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode N) { 1130* if (!N->use_empty()) 1131 return false; 1132 1133 SmallSetVector<SDNode , 16> Nodes; 1134* Nodes.insert(N); 1135 do { 1136 N = Nodes.pop_back_val(); 1137 if (!N) 1138 continue; 1139 1140 if (N->use_empty()) { 1141 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1142 Nodes.insert(N->getOperand(i).getNode()); 1143 1144 removeFromWorklist(N); 1145 DAG.DeleteNode(N); 1146 } else { 1147 AddToWorklist(N); 1148 } 1149 } while (!Nodes.empty()); 1150 return true; 1151} 1152 1153//===----------------------------------------------------------------------===// 1154// Main DAG Combiner implementation 1155//===----------------------------------------------------------------------===// 1156 1157void DAGCombiner::Run(CombineLevel AtLevel) { 1158 // set the instance variables, so that the various visit routines may use it. 1159 Level = AtLevel; 1160 LegalOperations = Level >= AfterLegalizeVectorOps; 1161 LegalTypes = Level >= AfterLegalizeTypes; 1162
1163 // Early exit if this basic block is in an optnone function. 1164 AttributeSet FnAttrs = 1165 DAG.getMachineFunction().getFunction()->getAttributes(); 1166 if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex, 1167 Attribute::OptimizeNone)) 1168 return; 1169
1170 // Add all the dag nodes to the worklist. 1171 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 1172 E = DAG.allnodes_end(); I != E; ++I) 1173 AddToWorklist(I); 1174 1175 // Create a dummy node (which is not added to allnodes), that adds a reference 1176 // to the root node, preventing it from being deleted, and tracking any 1177 // changes of the root. 1178 HandleSDNode Dummy(DAG.getRoot()); 1179 1180 // while the worklist isn't empty, find a node and 1181 // try and combine it. 1182 while (!WorklistMap.empty()) { 1183 SDNode N; 1184* // The Worklist holds the SDNodes in order, but it may contain null entries. 1185 do { 1186 N = Worklist.pop_back_val(); 1187 } while (!N); 1188 1189 bool GoodWorklistEntry = WorklistMap.erase(N); 1190 (void)GoodWorklistEntry; 1191 assert(GoodWorklistEntry && 1192 "Found a worklist entry without a corresponding map entry!"); 1193 1194 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1195 // N is deleted from the DAG, since they too may now be dead or may have a 1196 // reduced number of uses, allowing other xforms. 1197 if (recursivelyDeleteUnusedNodes(N)) 1198 continue; 1199 1200 WorklistRemover DeadNodes(this); 1201* 1202 // If this combine is running after legalizing the DAG, re-legalize any 1203 // nodes pulled off the worklist. 1204 if (Level == AfterLegalizeDAG) { 1205 SmallSetVector<SDNode , 16> UpdatedNodes; 1206* bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1207 1208 for (SDNode LN : UpdatedNodes) { 1209* AddToWorklist(LN); 1210 AddUsersToWorklist(LN); 1211 } 1212 if (!NIsValid) 1213 continue; 1214 } 1215 1216 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1217 1218 // Add any operands of the new node which have not yet been combined to the 1219 // worklist as well. Because the worklist uniques things already, this 1220 // won't repeatedly process the same operand. 1221 CombinedNodes.insert(N); 1222 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1223 if (!CombinedNodes.count(N->getOperand(i).getNode())) 1224 AddToWorklist(N->getOperand(i).getNode()); 1225 1226 SDValue RV = combine(N); 1227 1228 if (!RV.getNode()) 1229 continue; 1230 1231 ++NodesCombined; 1232 1233 // If we get back the same node we passed in, rather than a new node or 1234 // zero, we know that the node must have defined multiple values and 1235 // CombineTo was used. Since CombineTo takes care of the worklist 1236 // mechanics for us, we have no work to do in this case. 1237 if (RV.getNode() == N) 1238 continue; 1239 1240 assert(N->getOpcode() != ISD::DELETED_NODE && 1241 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1242 "Node was deleted but visit returned new node!"); 1243 1244 DEBUG(dbgs() << " ... into: "; 1245 RV.getNode()->dump(&DAG)); 1246 1247 // Transfer debug value. 1248 DAG.TransferDbgValues(SDValue(N, 0), RV); 1249 if (N->getNumValues() == RV.getNode()->getNumValues()) 1250 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1251 else { 1252 assert(N->getValueType(0) == RV.getValueType() && 1253 N->getNumValues() == 1 && "Type mismatch"); 1254 SDValue OpV = RV; 1255 DAG.ReplaceAllUsesWith(N, &OpV); 1256 } 1257 1258 // Push the new node and any users onto the worklist 1259 AddToWorklist(RV.getNode()); 1260 AddUsersToWorklist(RV.getNode()); 1261 1262 // Finally, if the node is now dead, remove it from the graph. The node 1263 // may not be dead if the replacement process recursively simplified to 1264 // something else needing this node. This will also take care of adding any 1265 // operands which have lost a user to the worklist. 1266 recursivelyDeleteUnusedNodes(N); 1267 } 1268 1269 // If the root changed (e.g. it was a dead load, update the root). 1270 DAG.setRoot(Dummy.getValue()); 1271 DAG.RemoveDeadNodes(); 1272} 1273 1274SDValue DAGCombiner::visit(SDNode N) { 1275* switch (N->getOpcode()) { 1276 default: break; 1277 case ISD::TokenFactor: return visitTokenFactor(N); 1278 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1279 case ISD::ADD: return visitADD(N); 1280 case ISD::SUB: return visitSUB(N); 1281 case ISD::ADDC: return visitADDC(N); 1282 case ISD::SUBC: return visitSUBC(N); 1283 case ISD::ADDE: return visitADDE(N); 1284 case ISD::SUBE: return visitSUBE(N); 1285 case ISD::MUL: return visitMUL(N); 1286 case ISD::SDIV: return visitSDIV(N); 1287 case ISD::UDIV: return visitUDIV(N); 1288 case ISD::SREM: return visitSREM(N); 1289 case ISD::UREM: return visitUREM(N); 1290 case ISD::MULHU: return visitMULHU(N); 1291 case ISD::MULHS: return visitMULHS(N); 1292 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1293 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1294 case ISD::SMULO: return visitSMULO(N); 1295 case ISD::UMULO: return visitUMULO(N); 1296 case ISD::SDIVREM: return visitSDIVREM(N); 1297 case ISD::UDIVREM: return visitUDIVREM(N); 1298 case ISD::AND: return visitAND(N); 1299 case ISD::OR: return visitOR(N); 1300 case ISD::XOR: return visitXOR(N); 1301 case ISD::SHL: return visitSHL(N); 1302 case ISD::SRA: return visitSRA(N); 1303 case ISD::SRL: return visitSRL(N); 1304 case ISD::ROTR: 1305 case ISD::ROTL: return visitRotate(N); 1306 case ISD::CTLZ: return visitCTLZ(N); 1307 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1308 case ISD::CTTZ: return visitCTTZ(N); 1309 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1310 case ISD::CTPOP: return visitCTPOP(N); 1311 case ISD::SELECT: return visitSELECT(N); 1312 case ISD::VSELECT: return visitVSELECT(N); 1313 case ISD::SELECT_CC: return visitSELECT_CC(N); 1314 case ISD::SETCC: return visitSETCC(N); 1315 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1316 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1317 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1318 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1319 case ISD::TRUNCATE: return visitTRUNCATE(N); 1320 case ISD::BITCAST: return visitBITCAST(N); 1321 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1322 case ISD::FADD: return visitFADD(N); 1323 case ISD::FSUB: return visitFSUB(N); 1324 case ISD::FMUL: return visitFMUL(N); 1325 case ISD::FMA: return visitFMA(N); 1326 case ISD::FDIV: return visitFDIV(N); 1327 case ISD::FREM: return visitFREM(N); 1328 case ISD::FSQRT: return visitFSQRT(N); 1329 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1330 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1331 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1332 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1333 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1334 case ISD::FP_ROUND: return visitFP_ROUND(N); 1335 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1336 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1337 case ISD::FNEG: return visitFNEG(N); 1338 case ISD::FABS: return visitFABS(N); 1339 case ISD::FFLOOR: return visitFFLOOR(N); 1340 case ISD::FMINNUM: return visitFMINNUM(N); 1341 case ISD::FMAXNUM: return visitFMAXNUM(N); 1342 case ISD::FCEIL: return visitFCEIL(N); 1343 case ISD::FTRUNC: return visitFTRUNC(N); 1344 case ISD::BRCOND: return visitBRCOND(N); 1345 case ISD::BR_CC: return visitBR_CC(N); 1346 case ISD::LOAD: return visitLOAD(N); 1347 case ISD::STORE: return visitSTORE(N); 1348 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1349 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1350 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1351 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1352 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1353 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1354 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1355 case ISD::MLOAD: return visitMLOAD(N); 1356 case ISD::MSTORE: return visitMSTORE(N); 1357 } 1358 return SDValue(); 1359} 1360 1361SDValue DAGCombiner::combine(SDNode N) { 1362* SDValue RV = visit(N); 1363 1364 // If nothing happened, try a target-specific DAG combine. 1365 if (!RV.getNode()) { 1366 assert(N->getOpcode() != ISD::DELETED_NODE && 1367 "Node was deleted but visit returned NULL!"); 1368 1369 if (N->getOpcode() >= ISD::BUILTIN_OP_END \|\| 1370 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1371 1372 // Expose the DAG combiner to the target combiner impls. 1373 TargetLowering::DAGCombinerInfo 1374 DagCombineInfo(DAG, Level, false, this); 1375 1376 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1377 } 1378 } 1379 1380 // If nothing happened still, try promoting the operation. 1381 if (!RV.getNode()) { 1382 switch (N->getOpcode()) { 1383 default: break; 1384 case ISD::ADD: 1385 case ISD::SUB: 1386 case ISD::MUL: 1387 case ISD::AND: 1388 case ISD::OR: 1389 case ISD::XOR: 1390 RV = PromoteIntBinOp(SDValue(N, 0)); 1391 break; 1392 case ISD::SHL: 1393 case ISD::SRA: 1394 case ISD::SRL: 1395 RV = PromoteIntShiftOp(SDValue(N, 0)); 1396 break; 1397 case ISD::SIGN_EXTEND: 1398 case ISD::ZERO_EXTEND: 1399 case ISD::ANY_EXTEND: 1400 RV = PromoteExtend(SDValue(N, 0)); 1401 break; 1402 case ISD::LOAD: 1403 if (PromoteLoad(SDValue(N, 0))) 1404 RV = SDValue(N, 0); 1405 break; 1406 } 1407 } 1408 1409 // If N is a commutative binary node, try commuting it to enable more 1410 // sdisel CSE. 1411 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1412 N->getNumValues() == 1) { 1413 SDValue N0 = N->getOperand(0); 1414 SDValue N1 = N->getOperand(1); 1415 1416 // Constant operands are canonicalized to RHS. 1417 if (isa<ConstantSDNode>(N0) \|\| !isa<ConstantSDNode>(N1)) { 1418 SDValue Ops[] = {N1, N0}; 1419 SDNode CSENode; 1420* if (const BinaryWithFlagsSDNode BinNode = 1421* dyn_cast<BinaryWithFlagsSDNode>(N)) { 1422 CSENode = DAG.getNodeIfExists( 1423 N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), 1424 BinNode->hasNoSignedWrap(), BinNode->isExact()); 1425 } else { 1426 CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); 1427 } 1428 if (CSENode) 1429 return SDValue(CSENode, 0); 1430 } 1431 } 1432 1433 return RV; 1434} 1435 1436/// Given a node, return its input chain if it has one, otherwise return a null 1437/// sd operand. 1438static SDValue getInputChainForNode(SDNode N) { 1439* if (unsigned NumOps = N->getNumOperands()) { 1440 if (N->getOperand(0).getValueType() == MVT::Other) 1441 return N->getOperand(0); 1442 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1443 return N->getOperand(NumOps-1); 1444 for (unsigned i = 1; i < NumOps-1; ++i) 1445 if (N->getOperand(i).getValueType() == MVT::Other) 1446 return N->getOperand(i); 1447 } 1448 return SDValue(); 1449} 1450 1451SDValue DAGCombiner::visitTokenFactor(SDNode N) { 1452* // If N has two operands, where one has an input chain equal to the other, 1453 // the 'other' chain is redundant. 1454 if (N->getNumOperands() == 2) { 1455 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1456 return N->getOperand(0); 1457 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1458 return N->getOperand(1); 1459 } 1460 1461 SmallVector<SDNode , 8> TFs; // List of token factors to visit. 1462* SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1463 SmallPtrSet<SDNode, 16> SeenOps; 1464* bool Changed = false; // If we should replace this token factor. 1465 1466 // Start out with this token factor. 1467 TFs.push_back(N); 1468 1469 // Iterate through token factors. The TFs grows when new token factors are 1470 // encountered. 1471 for (unsigned i = 0; i < TFs.size(); ++i) { 1472 SDNode TF = TFs[i]; 1473* 1474 // Check each of the operands. 1475 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1476 SDValue Op = TF->getOperand(i); 1477 1478 switch (Op.getOpcode()) { 1479 case ISD::EntryToken: 1480 // Entry tokens don't need to be added to the list. They are 1481 // rededundant. 1482 Changed = true; 1483 break; 1484 1485 case ISD::TokenFactor: 1486 if (Op.hasOneUse() && 1487 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1488 // Queue up for processing. 1489 TFs.push_back(Op.getNode()); 1490 // Clean up in case the token factor is removed. 1491 AddToWorklist(Op.getNode()); 1492 Changed = true; 1493 break; 1494 } 1495 // Fall thru 1496 1497 default: 1498 // Only add if it isn't already in the list. 1499 if (SeenOps.insert(Op.getNode()).second) 1500 Ops.push_back(Op); 1501 else 1502 Changed = true; 1503 break; 1504 } 1505 } 1506 } 1507 1508 SDValue Result; 1509 1510 // If we've change things around then replace token factor. 1511 if (Changed) { 1512 if (Ops.empty()) { 1513 // The entry token is the only possible outcome. 1514 Result = DAG.getEntryNode(); 1515 } else { 1516 // New and improved token factor. 1517 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1518 } 1519 1520 // Don't add users to work list. 1521 return CombineTo(N, Result, false); 1522 } 1523 1524 return Result; 1525} 1526 1527/// MERGE_VALUES can always be eliminated. 1528SDValue DAGCombiner::visitMERGE_VALUES(SDNode N) { 1529* WorklistRemover DeadNodes(this); 1530* // Replacing results may cause a different MERGE_VALUES to suddenly 1531 // be CSE'd with N, and carry its uses with it. Iterate until no 1532 // uses remain, to ensure that the node can be safely deleted. 1533 // First add the users of this node to the work list so that they 1534 // can be tried again once they have new operands. 1535 AddUsersToWorklist(N); 1536 do { 1537 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1538 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1539 } while (!N->use_empty()); 1540 deleteAndRecombine(N); 1541 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1542} 1543 1544SDValue DAGCombiner::visitADD(SDNode N) { 1545* SDValue N0 = N->getOperand(0); 1546 SDValue N1 = N->getOperand(1); 1547 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 1548* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 1549* EVT VT = N0.getValueType(); 1550 1551 // fold vector ops 1552 if (VT.isVector()) { 1553 SDValue FoldedVOp = SimplifyVBinOp(N); 1554 if (FoldedVOp.getNode()) return FoldedVOp; 1555 1556 // fold (add x, 0) -> x, vector edition 1557 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1558 return N0; 1559 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1560 return N1; 1561 } 1562 1563 // fold (add x, undef) -> undef 1564 if (N0.getOpcode() == ISD::UNDEF) 1565 return N0; 1566 if (N1.getOpcode() == ISD::UNDEF) 1567 return N1; 1568 // fold (add c1, c2) -> c1+c2 1569 if (N0C && N1C) 1570 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1571 // canonicalize constant to RHS 1572 if (N0C && !N1C) 1573 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1574 // fold (add x, 0) -> x 1575 if (N1C && N1C->isNullValue()) 1576 return N0; 1577 // fold (add Sym, c) -> Sym+c 1578 if (GlobalAddressSDNode GA = dyn_cast<GlobalAddressSDNode>(N0)) 1579* if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1580 GA->getOpcode() == ISD::GlobalAddress) 1581 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1582 GA->getOffset() + 1583 (uint64_t)N1C->getSExtValue()); 1584 // fold ((c1-A)+c2) -> (c1+c2)-A 1585 if (N1C && N0.getOpcode() == ISD::SUB) 1586 if (ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1587* return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1588 DAG.getConstant(N1C->getAPIntValue()+ 1589 N0C->getAPIntValue(), VT), 1590 N0.getOperand(1)); 1591 // reassociate add 1592 SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); 1593 if (RADD.getNode()) 1594 return RADD; 1595 // fold ((0-A) + B) -> B-A 1596 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1597 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1598 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1599 // fold (A + (0-B)) -> A-B 1600 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1601 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1602 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1603 // fold (A+(B-A)) -> B 1604 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1605 return N1.getOperand(0); 1606 // fold ((B-A)+A) -> B 1607 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1608 return N0.getOperand(0); 1609 // fold (A+(B-(A+C))) to (B-C) 1610 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1611 N0 == N1.getOperand(1).getOperand(0)) 1612 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1613 N1.getOperand(1).getOperand(1)); 1614 // fold (A+(B-(C+A))) to (B-C) 1615 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1616 N0 == N1.getOperand(1).getOperand(1)) 1617 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1618 N1.getOperand(1).getOperand(0)); 1619 // fold (A+((B-A)+or-C)) to (B+or-C) 1620 if ((N1.getOpcode() == ISD::SUB \|\| N1.getOpcode() == ISD::ADD) && 1621 N1.getOperand(0).getOpcode() == ISD::SUB && 1622 N0 == N1.getOperand(0).getOperand(1)) 1623 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1624 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1625 1626 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1627 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1628 SDValue N00 = N0.getOperand(0); 1629 SDValue N01 = N0.getOperand(1); 1630 SDValue N10 = N1.getOperand(0); 1631 SDValue N11 = N1.getOperand(1); 1632 1633 if (isa<ConstantSDNode>(N00) \|\| isa<ConstantSDNode>(N10)) 1634 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1635 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1636 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1637 } 1638 1639 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1640 return SDValue(N, 0); 1641 1642 // fold (a+b) -> (a\|b) iff a and b share no bits. 1643 if (VT.isInteger() && !VT.isVector()) { 1644 APInt LHSZero, LHSOne; 1645 APInt RHSZero, RHSOne; 1646 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1647 1648 if (LHSZero.getBoolValue()) { 1649 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1650 1651 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1652 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1653 if ((RHSZero & ~LHSZero) == ~LHSZero \|\| (LHSZero & ~RHSZero) == ~RHSZero){ 1654 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::OR, VT)) 1655 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1656 } 1657 } 1658 } 1659 1660 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1661 if (N1.getOpcode() == ISD::SHL && 1662 N1.getOperand(0).getOpcode() == ISD::SUB) 1663 if (ConstantSDNode C = 1664* dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1665 if (C->getAPIntValue() == 0) 1666 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1667 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1668 N1.getOperand(0).getOperand(1), 1669 N1.getOperand(1))); 1670 if (N0.getOpcode() == ISD::SHL && 1671 N0.getOperand(0).getOpcode() == ISD::SUB) 1672 if (ConstantSDNode C = 1673* dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1674 if (C->getAPIntValue() == 0) 1675 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1676 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1677 N0.getOperand(0).getOperand(1), 1678 N0.getOperand(1))); 1679 1680 if (N1.getOpcode() == ISD::AND) { 1681 SDValue AndOp0 = N1.getOperand(0); 1682 ConstantSDNode AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 1683* unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1684 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1685 1686 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1687 // and similar xforms where the inner op is either ~0 or 0. 1688 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 1689 SDLoc DL(N); 1690 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1691 } 1692 } 1693 1694 // add (sext i1), X -> sub X, (zext i1) 1695 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1696 N0.getOperand(0).getValueType() == MVT::i1 && 1697 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1698 SDLoc DL(N); 1699 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1700 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1701 } 1702 1703 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 1704 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1705 VTSDNode TN = cast<VTSDNode>(N1.getOperand(1)); 1706* if (TN->getVT() == MVT::i1) { 1707 SDLoc DL(N); 1708 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1709 DAG.getConstant(1, VT)); 1710 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 1711 } 1712 } 1713 1714 return SDValue(); 1715} 1716 1717SDValue DAGCombiner::visitADDC(SDNode N) { 1718* SDValue N0 = N->getOperand(0); 1719 SDValue N1 = N->getOperand(1); 1720 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 1721* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 1722* EVT VT = N0.getValueType(); 1723 1724 // If the flag result is dead, turn this into an ADD. 1725 if (!N->hasAnyUseOfValue(1)) 1726 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1727 DAG.getNode(ISD::CARRY_FALSE, 1728 SDLoc(N), MVT::Glue)); 1729 1730 // canonicalize constant to RHS. 1731 if (N0C && !N1C) 1732 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1733 1734 // fold (addc x, 0) -> x + no carry out 1735 if (N1C && N1C->isNullValue()) 1736 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1737 SDLoc(N), MVT::Glue)); 1738 1739 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1740 APInt LHSZero, LHSOne; 1741 APInt RHSZero, RHSOne; 1742 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1743 1744 if (LHSZero.getBoolValue()) { 1745 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1746 1747 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1748 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1749 if ((RHSZero & ~LHSZero) == ~LHSZero \|\| (LHSZero & ~RHSZero) == ~RHSZero) 1750 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1751 DAG.getNode(ISD::CARRY_FALSE, 1752 SDLoc(N), MVT::Glue)); 1753 } 1754 1755 return SDValue(); 1756} 1757 1758SDValue DAGCombiner::visitADDE(SDNode N) { 1759* SDValue N0 = N->getOperand(0); 1760 SDValue N1 = N->getOperand(1); 1761 SDValue CarryIn = N->getOperand(2); 1762 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 1763* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 1764* 1765 // canonicalize constant to RHS 1766 if (N0C && !N1C) 1767 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1768 N1, N0, CarryIn); 1769 1770 // fold (adde x, y, false) -> (addc x, y) 1771 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1772 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1773 1774 return SDValue(); 1775} 1776 1777// Since it may not be valid to emit a fold to zero for vector initializers 1778// check if we can before folding. 1779static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 1780 SelectionDAG &DAG, 1781 bool LegalOperations, bool LegalTypes) { 1782 if (!VT.isVector()) 1783 return DAG.getConstant(0, VT); 1784 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1785 return DAG.getConstant(0, VT); 1786 return SDValue(); 1787} 1788 1789SDValue DAGCombiner::visitSUB(SDNode N) { 1790* SDValue N0 = N->getOperand(0); 1791 SDValue N1 = N->getOperand(1); 1792 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1793* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1794* ConstantSDNode N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1795* dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1796 EVT VT = N0.getValueType(); 1797 1798 // fold vector ops 1799 if (VT.isVector()) { 1800 SDValue FoldedVOp = SimplifyVBinOp(N); 1801 if (FoldedVOp.getNode()) return FoldedVOp; 1802 1803 // fold (sub x, 0) -> x, vector edition 1804 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1805 return N0; 1806 } 1807 1808 // fold (sub x, x) -> 0 1809 // FIXME: Refactor this and xor and other similar operations together. 1810 if (N0 == N1) 1811 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1812 // fold (sub c1, c2) -> c1-c2 1813 if (N0C && N1C) 1814 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1815 // fold (sub x, c) -> (add x, -c) 1816 if (N1C) 1817 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, 1818 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1819 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1820 if (N0C && N0C->isAllOnesValue()) 1821 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1822 // fold A-(A-B) -> B 1823 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1824 return N1.getOperand(1); 1825 // fold (A+B)-A -> B 1826 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1827 return N0.getOperand(1); 1828 // fold (A+B)-B -> A 1829 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1830 return N0.getOperand(0); 1831 // fold C2-(A+C1) -> (C2-C1)-A 1832 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1833 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1834 VT); 1835 return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, 1836 N1.getOperand(0)); 1837 } 1838 // fold ((A+(B+or-C))-B) -> A+or-C 1839 if (N0.getOpcode() == ISD::ADD && 1840 (N0.getOperand(1).getOpcode() == ISD::SUB \|\| 1841 N0.getOperand(1).getOpcode() == ISD::ADD) && 1842 N0.getOperand(1).getOperand(0) == N1) 1843 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1844 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1845 // fold ((A+(C+B))-B) -> A+C 1846 if (N0.getOpcode() == ISD::ADD && 1847 N0.getOperand(1).getOpcode() == ISD::ADD && 1848 N0.getOperand(1).getOperand(1) == N1) 1849 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1850 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1851 // fold ((A-(B-C))-C) -> A-B 1852 if (N0.getOpcode() == ISD::SUB && 1853 N0.getOperand(1).getOpcode() == ISD::SUB && 1854 N0.getOperand(1).getOperand(1) == N1) 1855 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1856 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1857 1858 // If either operand of a sub is undef, the result is undef 1859 if (N0.getOpcode() == ISD::UNDEF) 1860 return N0; 1861 if (N1.getOpcode() == ISD::UNDEF) 1862 return N1; 1863 1864 // If the relocation model supports it, consider symbol offsets. 1865 if (GlobalAddressSDNode GA = dyn_cast<GlobalAddressSDNode>(N0)) 1866* if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1867 // fold (sub Sym, c) -> Sym-c 1868 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1869 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1870 GA->getOffset() - 1871 (uint64_t)N1C->getSExtValue()); 1872 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1873 if (GlobalAddressSDNode GB = dyn_cast<GlobalAddressSDNode>(N1)) 1874* if (GA->getGlobal() == GB->getGlobal()) 1875 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1876 VT); 1877 } 1878 1879 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 1880 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1881 VTSDNode TN = cast<VTSDNode>(N1.getOperand(1)); 1882* if (TN->getVT() == MVT::i1) { 1883 SDLoc DL(N); 1884 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1885 DAG.getConstant(1, VT)); 1886 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 1887 } 1888 } 1889 1890 return SDValue(); 1891} 1892 1893SDValue DAGCombiner::visitSUBC(SDNode N) { 1894* SDValue N0 = N->getOperand(0); 1895 SDValue N1 = N->getOperand(1); 1896 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 1897* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 1898* EVT VT = N0.getValueType(); 1899 1900 // If the flag result is dead, turn this into an SUB. 1901 if (!N->hasAnyUseOfValue(1)) 1902 return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), 1903 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1904 MVT::Glue)); 1905 1906 // fold (subc x, x) -> 0 + no borrow 1907 if (N0 == N1) 1908 return CombineTo(N, DAG.getConstant(0, VT), 1909 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1910 MVT::Glue)); 1911 1912 // fold (subc x, 0) -> x + no borrow 1913 if (N1C && N1C->isNullValue()) 1914 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1915 MVT::Glue)); 1916 1917 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1918 if (N0C && N0C->isAllOnesValue()) 1919 return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), 1920 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1921 MVT::Glue)); 1922 1923 return SDValue(); 1924} 1925 1926SDValue DAGCombiner::visitSUBE(SDNode N) { 1927* SDValue N0 = N->getOperand(0); 1928 SDValue N1 = N->getOperand(1); 1929 SDValue CarryIn = N->getOperand(2); 1930 1931 // fold (sube x, y, false) -> (subc x, y) 1932 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1933 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 1934 1935 return SDValue(); 1936} 1937 1938SDValue DAGCombiner::visitMUL(SDNode N) { 1939* SDValue N0 = N->getOperand(0); 1940 SDValue N1 = N->getOperand(1); 1941 EVT VT = N0.getValueType(); 1942 1943 // fold (mul x, undef) -> 0 1944 if (N0.getOpcode() == ISD::UNDEF \|\| N1.getOpcode() == ISD::UNDEF) 1945 return DAG.getConstant(0, VT); 1946 1947 bool N0IsConst = false; 1948 bool N1IsConst = false; 1949 APInt ConstValue0, ConstValue1; 1950 // fold vector ops 1951 if (VT.isVector()) { 1952 SDValue FoldedVOp = SimplifyVBinOp(N); 1953 if (FoldedVOp.getNode()) return FoldedVOp; 1954 1955 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); 1956 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); 1957 } else { 1958 N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr; 1959 ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() 1960 : APInt(); 1961 N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr; 1962 ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() 1963 : APInt(); 1964 } 1965 1966 // fold (mul c1, c2) -> c1c2 1967* if (N0IsConst && N1IsConst) 1968 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); 1969 1970 // canonicalize constant to RHS 1971 if (N0IsConst && !N1IsConst) 1972 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 1973 // fold (mul x, 0) -> 0 1974 if (N1IsConst && ConstValue1 == 0) 1975 return N1; 1976 // We require a splat of the entire scalar bit width for non-contiguous 1977 // bit patterns. 1978 bool IsFullSplat = 1979 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 1980 // fold (mul x, 1) -> x 1981 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 1982 return N0; 1983 // fold (mul x, -1) -> 0-x 1984 if (N1IsConst && ConstValue1.isAllOnesValue()) 1985 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1986 DAG.getConstant(0, VT), N0); 1987 // fold (mul x, (1 << c)) -> x << c 1988 if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) 1989 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 1990 DAG.getConstant(ConstValue1.logBase2(), 1991 getShiftAmountTy(N0.getValueType()))); 1992 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 1993 if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { 1994 unsigned Log2Val = (-ConstValue1).logBase2(); 1995 // FIXME: If the input is something that is easily negated (e.g. a 1996 // single-use add), we should put the negate there. 1997 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1998 DAG.getConstant(0, VT), 1999 DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 2000 DAG.getConstant(Log2Val, 2001 getShiftAmountTy(N0.getValueType())))); 2002 } 2003 2004 APInt Val; 2005 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 2006 if (N1IsConst && N0.getOpcode() == ISD::SHL && 2007 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) \|\| 2008 isa<ConstantSDNode>(N0.getOperand(1)))) { 2009 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 2010 N1, N0.getOperand(1)); 2011 AddToWorklist(C3.getNode()); 2012 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 2013 N0.getOperand(0), C3); 2014 } 2015 2016 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2017 // use. 2018 { 2019 SDValue Sh(nullptr,0), Y(nullptr,0); 2020 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2021 if (N0.getOpcode() == ISD::SHL && 2022 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) \|\| 2023 isa<ConstantSDNode>(N0.getOperand(1))) && 2024 N0.getNode()->hasOneUse()) { 2025 Sh = N0; Y = N1; 2026 } else if (N1.getOpcode() == ISD::SHL && 2027 isa<ConstantSDNode>(N1.getOperand(1)) && 2028 N1.getNode()->hasOneUse()) { 2029 Sh = N1; Y = N0; 2030 } 2031 2032 if (Sh.getNode()) { 2033 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2034 Sh.getOperand(0), Y); 2035 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 2036 Mul, Sh.getOperand(1)); 2037 } 2038 } 2039 2040 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1c2) 2041* if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 2042 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) \|\| 2043 isa<ConstantSDNode>(N0.getOperand(1)))) 2044 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2045 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2046 N0.getOperand(0), N1), 2047 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2048 N0.getOperand(1), N1)); 2049 2050 // reassociate mul 2051 SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); 2052 if (RMUL.getNode()) 2053 return RMUL; 2054 2055 return SDValue(); 2056} 2057 2058SDValue DAGCombiner::visitSDIV(SDNode N) { 2059* SDValue N0 = N->getOperand(0); 2060 SDValue N1 = N->getOperand(1); 2061 ConstantSDNode N0C = isConstOrConstSplat(N0); 2062* ConstantSDNode N1C = isConstOrConstSplat(N1); 2063* EVT VT = N->getValueType(0); 2064 2065 // fold vector ops 2066 if (VT.isVector()) { 2067 SDValue FoldedVOp = SimplifyVBinOp(N); 2068 if (FoldedVOp.getNode()) return FoldedVOp; 2069 } 2070 2071 // fold (sdiv c1, c2) -> c1/c2 2072 if (N0C && N1C && !N1C->isNullValue()) 2073 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 2074 // fold (sdiv X, 1) -> X 2075 if (N1C && N1C->getAPIntValue() == 1LL) 2076 return N0; 2077 // fold (sdiv X, -1) -> 0-X 2078 if (N1C && N1C->isAllOnesValue()) 2079 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 2080 DAG.getConstant(0, VT), N0); 2081 // If we know the sign bits of both operands are zero, strength reduce to a 2082 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2083 if (!VT.isVector()) { 2084 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2085 return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), 2086 N0, N1); 2087 } 2088 2089 // fold (sdiv X, pow2) -> simple ops after legalize 2090 if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() \|\| 2091 (-N1C->getAPIntValue()).isPowerOf2())) { 2092 // If dividing by powers of two is cheap, then don't perform the following 2093 // fold. 2094 if (TLI.isPow2SDivCheap()) 2095 return SDValue(); 2096 2097 // Target-specific implementation of sdiv x, pow2. 2098 SDValue Res = BuildSDIVPow2(N); 2099 if (Res.getNode()) 2100 return Res; 2101 2102 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2103 2104 // Splat the sign bit into the register 2105 SDValue SGN = 2106 DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, 2107 DAG.getConstant(VT.getScalarSizeInBits() - 1, 2108 getShiftAmountTy(N0.getValueType()))); 2109 AddToWorklist(SGN.getNode()); 2110 2111 // Add (N0 < 0) ? abs2 - 1 : 0; 2112 SDValue SRL = 2113 DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, 2114 DAG.getConstant(VT.getScalarSizeInBits() - lg2, 2115 getShiftAmountTy(SGN.getValueType()))); 2116 SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); 2117 AddToWorklist(SRL.getNode()); 2118 AddToWorklist(ADD.getNode()); // Divide by pow2 2119 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, 2120 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 2121 2122 // If we're dividing by a positive value, we're done. Otherwise, we must 2123 // negate the result. 2124 if (N1C->getAPIntValue().isNonNegative()) 2125 return SRA; 2126 2127 AddToWorklist(SRA.getNode()); 2128 return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); 2129 } 2130 2131 // if integer divide is expensive and we satisfy the requirements, emit an 2132 // alternate sequence. 2133 if (N1C && !TLI.isIntDivCheap()) { 2134 SDValue Op = BuildSDIV(N); 2135 if (Op.getNode()) return Op; 2136 } 2137 2138 // undef / X -> 0 2139 if (N0.getOpcode() == ISD::UNDEF) 2140 return DAG.getConstant(0, VT); 2141 // X / undef -> undef 2142 if (N1.getOpcode() == ISD::UNDEF) 2143 return N1; 2144 2145 return SDValue(); 2146} 2147 2148SDValue DAGCombiner::visitUDIV(SDNode N) { 2149* SDValue N0 = N->getOperand(0); 2150 SDValue N1 = N->getOperand(1); 2151 ConstantSDNode N0C = isConstOrConstSplat(N0); 2152* ConstantSDNode N1C = isConstOrConstSplat(N1); 2153* EVT VT = N->getValueType(0); 2154 2155 // fold vector ops 2156 if (VT.isVector()) { 2157 SDValue FoldedVOp = SimplifyVBinOp(N); 2158 if (FoldedVOp.getNode()) return FoldedVOp; 2159 } 2160 2161 // fold (udiv c1, c2) -> c1/c2 2162 if (N0C && N1C && !N1C->isNullValue()) 2163 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 2164 // fold (udiv x, (1 << c)) -> x >>u c 2165 if (N1C && N1C->getAPIntValue().isPowerOf2()) 2166 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, 2167 DAG.getConstant(N1C->getAPIntValue().logBase2(), 2168 getShiftAmountTy(N0.getValueType()))); 2169 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2170 if (N1.getOpcode() == ISD::SHL) { 2171 if (ConstantSDNode SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2172* if (SHC->getAPIntValue().isPowerOf2()) { 2173 EVT ADDVT = N1.getOperand(1).getValueType(); 2174 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, 2175 N1.getOperand(1), 2176 DAG.getConstant(SHC->getAPIntValue() 2177 .logBase2(), 2178 ADDVT)); 2179 AddToWorklist(Add.getNode()); 2180 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); 2181 } 2182 } 2183 } 2184 // fold (udiv x, c) -> alternate 2185 if (N1C && !TLI.isIntDivCheap()) { 2186 SDValue Op = BuildUDIV(N); 2187 if (Op.getNode()) return Op; 2188 } 2189 2190 // undef / X -> 0 2191 if (N0.getOpcode() == ISD::UNDEF) 2192 return DAG.getConstant(0, VT); 2193 // X / undef -> undef 2194 if (N1.getOpcode() == ISD::UNDEF) 2195 return N1; 2196 2197 return SDValue(); 2198} 2199 2200SDValue DAGCombiner::visitSREM(SDNode N) { 2201* SDValue N0 = N->getOperand(0); 2202 SDValue N1 = N->getOperand(1); 2203 ConstantSDNode N0C = isConstOrConstSplat(N0); 2204* ConstantSDNode N1C = isConstOrConstSplat(N1); 2205* EVT VT = N->getValueType(0); 2206 2207 // fold (srem c1, c2) -> c1%c2 2208 if (N0C && N1C && !N1C->isNullValue()) 2209 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 2210 // If we know the sign bits of both operands are zero, strength reduce to a 2211 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2212 if (!VT.isVector()) { 2213 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2214 return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); 2215 } 2216 2217 // If X/C can be simplified by the division-by-constant logic, lower 2218 // X%C to the equivalent of X-X/CC. 2219* if (N1C && !N1C->isNullValue()) { 2220 SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); 2221 AddToWorklist(Div.getNode()); 2222 SDValue OptimizedDiv = combine(Div.getNode()); 2223 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2224 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2225 OptimizedDiv, N1); 2226 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2227 AddToWorklist(Mul.getNode()); 2228 return Sub; 2229 } 2230 } 2231 2232 // undef % X -> 0 2233 if (N0.getOpcode() == ISD::UNDEF) 2234 return DAG.getConstant(0, VT); 2235 // X % undef -> undef 2236 if (N1.getOpcode() == ISD::UNDEF) 2237 return N1; 2238 2239 return SDValue(); 2240} 2241 2242SDValue DAGCombiner::visitUREM(SDNode N) { 2243* SDValue N0 = N->getOperand(0); 2244 SDValue N1 = N->getOperand(1); 2245 ConstantSDNode N0C = isConstOrConstSplat(N0); 2246* ConstantSDNode N1C = isConstOrConstSplat(N1); 2247* EVT VT = N->getValueType(0); 2248 2249 // fold (urem c1, c2) -> c1%c2 2250 if (N0C && N1C && !N1C->isNullValue()) 2251 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 2252 // fold (urem x, pow2) -> (and x, pow2-1) 2253 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 2254 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, 2255 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 2256 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2257 if (N1.getOpcode() == ISD::SHL) { 2258 if (ConstantSDNode SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2259* if (SHC->getAPIntValue().isPowerOf2()) { 2260 SDValue Add = 2261 DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, 2262 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 2263 VT)); 2264 AddToWorklist(Add.getNode()); 2265 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); 2266 } 2267 } 2268 } 2269 2270 // If X/C can be simplified by the division-by-constant logic, lower 2271 // X%C to the equivalent of X-X/CC. 2272* if (N1C && !N1C->isNullValue()) { 2273 SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); 2274 AddToWorklist(Div.getNode()); 2275 SDValue OptimizedDiv = combine(Div.getNode()); 2276 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2277 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2278 OptimizedDiv, N1); 2279 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2280 AddToWorklist(Mul.getNode()); 2281 return Sub; 2282 } 2283 } 2284 2285 // undef % X -> 0 2286 if (N0.getOpcode() == ISD::UNDEF) 2287 return DAG.getConstant(0, VT); 2288 // X % undef -> undef 2289 if (N1.getOpcode() == ISD::UNDEF) 2290 return N1; 2291 2292 return SDValue(); 2293} 2294 2295SDValue DAGCombiner::visitMULHS(SDNode N) { 2296* SDValue N0 = N->getOperand(0); 2297 SDValue N1 = N->getOperand(1); 2298 ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 2299* EVT VT = N->getValueType(0); 2300 SDLoc DL(N); 2301 2302 // fold (mulhs x, 0) -> 0 2303 if (N1C && N1C->isNullValue()) 2304 return N1; 2305 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2306 if (N1C && N1C->getAPIntValue() == 1) 2307 return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, 2308 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2309 getShiftAmountTy(N0.getValueType()))); 2310 // fold (mulhs x, undef) -> 0 2311 if (N0.getOpcode() == ISD::UNDEF \|\| N1.getOpcode() == ISD::UNDEF) 2312 return DAG.getConstant(0, VT); 2313 2314 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2315 // plus a shift. 2316 if (VT.isSimple() && !VT.isVector()) { 2317 MVT Simple = VT.getSimpleVT(); 2318 unsigned SimpleSize = Simple.getSizeInBits(); 2319 EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2); 2320 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2321 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2322 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2323 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2324 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2325 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2326 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2327 } 2328 } 2329 2330 return SDValue(); 2331} 2332 2333SDValue DAGCombiner::visitMULHU(SDNode N) { 2334* SDValue N0 = N->getOperand(0); 2335 SDValue N1 = N->getOperand(1); 2336 ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 2337* EVT VT = N->getValueType(0); 2338 SDLoc DL(N); 2339 2340 // fold (mulhu x, 0) -> 0 2341 if (N1C && N1C->isNullValue()) 2342 return N1; 2343 // fold (mulhu x, 1) -> 0 2344 if (N1C && N1C->getAPIntValue() == 1) 2345 return DAG.getConstant(0, N0.getValueType()); 2346 // fold (mulhu x, undef) -> 0 2347 if (N0.getOpcode() == ISD::UNDEF \|\| N1.getOpcode() == ISD::UNDEF) 2348 return DAG.getConstant(0, VT); 2349 2350 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2351 // plus a shift. 2352 if (VT.isSimple() && !VT.isVector()) { 2353 MVT Simple = VT.getSimpleVT(); 2354 unsigned SimpleSize = Simple.getSizeInBits(); 2355 EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2); 2356 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2357 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2358 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2359 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2360 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2361 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2362 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2363 } 2364 } 2365 2366 return SDValue(); 2367} 2368 2369/// Perform optimizations common to nodes that compute two values. LoOp and HiOp 2370/// give the opcodes for the two computations that are being performed. Return 2371/// true if a simplification was made. 2372SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode N, unsigned LoOp, 2373* unsigned HiOp) { 2374 // If the high half is not needed, just compute the low half. 2375 bool HiExists = N->hasAnyUseOfValue(1); 2376 if (!HiExists && 2377 (!LegalOperations \|\| 2378 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2379 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2380 return CombineTo(N, Res, Res); 2381 } 2382 2383 // If the low half is not needed, just compute the high half. 2384 bool LoExists = N->hasAnyUseOfValue(0); 2385 if (!LoExists && 2386 (!LegalOperations \|\| 2387 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2388 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2389 return CombineTo(N, Res, Res); 2390 } 2391 2392 // If both halves are used, return as it is. 2393 if (LoExists && HiExists) 2394 return SDValue(); 2395 2396 // If the two computed results can be simplified separately, separate them. 2397 if (LoExists) { 2398 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2399 AddToWorklist(Lo.getNode()); 2400 SDValue LoOpt = combine(Lo.getNode()); 2401 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2402 (!LegalOperations \|\| 2403 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2404 return CombineTo(N, LoOpt, LoOpt); 2405 } 2406 2407 if (HiExists) { 2408 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2409 AddToWorklist(Hi.getNode()); 2410 SDValue HiOpt = combine(Hi.getNode()); 2411 if (HiOpt.getNode() && HiOpt != Hi && 2412 (!LegalOperations \|\| 2413 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2414 return CombineTo(N, HiOpt, HiOpt); 2415 } 2416 2417 return SDValue(); 2418} 2419 2420SDValue DAGCombiner::visitSMUL_LOHI(SDNode N) { 2421* SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2422 if (Res.getNode()) return Res; 2423 2424 EVT VT = N->getValueType(0); 2425 SDLoc DL(N); 2426 2427 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2428 // plus a shift. 2429 if (VT.isSimple() && !VT.isVector()) { 2430 MVT Simple = VT.getSimpleVT(); 2431 unsigned SimpleSize = Simple.getSizeInBits(); 2432 EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2); 2433 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2434 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2435 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2436 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2437 // Compute the high part as N1. 2438 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2439 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2440 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2441 // Compute the low part as N0. 2442 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2443 return CombineTo(N, Lo, Hi); 2444 } 2445 } 2446 2447 return SDValue(); 2448} 2449 2450SDValue DAGCombiner::visitUMUL_LOHI(SDNode N) { 2451* SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2452 if (Res.getNode()) return Res; 2453 2454 EVT VT = N->getValueType(0); 2455 SDLoc DL(N); 2456 2457 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2458 // plus a shift. 2459 if (VT.isSimple() && !VT.isVector()) { 2460 MVT Simple = VT.getSimpleVT(); 2461 unsigned SimpleSize = Simple.getSizeInBits(); 2462 EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2); 2463 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2464 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2465 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2466 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2467 // Compute the high part as N1. 2468 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2469 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2470 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2471 // Compute the low part as N0. 2472 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2473 return CombineTo(N, Lo, Hi); 2474 } 2475 } 2476 2477 return SDValue(); 2478} 2479 2480SDValue DAGCombiner::visitSMULO(SDNode N) { 2481* // (smulo x, 2) -> (saddo x, x) 2482 if (ConstantSDNode C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2483* if (C2->getAPIntValue() == 2) 2484 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2485 N->getOperand(0), N->getOperand(0)); 2486 2487 return SDValue(); 2488} 2489 2490SDValue DAGCombiner::visitUMULO(SDNode N) { 2491* // (umulo x, 2) -> (uaddo x, x) 2492 if (ConstantSDNode C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2493* if (C2->getAPIntValue() == 2) 2494 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2495 N->getOperand(0), N->getOperand(0)); 2496 2497 return SDValue(); 2498} 2499 2500SDValue DAGCombiner::visitSDIVREM(SDNode N) { 2501* SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2502 if (Res.getNode()) return Res; 2503 2504 return SDValue(); 2505} 2506 2507SDValue DAGCombiner::visitUDIVREM(SDNode N) { 2508* SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2509 if (Res.getNode()) return Res; 2510 2511 return SDValue(); 2512} 2513 2514/// If this is a binary operator with two operands of the same opcode, try to 2515/// simplify it. 2516SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode N) { 2517* SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2518 EVT VT = N0.getValueType(); 2519 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2520 2521 // Bail early if none of these transforms apply. 2522 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2523 2524 // For each of OP in AND/OR/XOR: 2525 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2526 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2527 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2528 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 2529 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2530 // 2531 // do not sink logical op inside of a vector extend, since it may combine 2532 // into a vsetcc. 2533 EVT Op0VT = N0.getOperand(0).getValueType(); 2534 if ((N0.getOpcode() == ISD::ZERO_EXTEND \|\| 2535 N0.getOpcode() == ISD::SIGN_EXTEND \|\| 2536 N0.getOpcode() == ISD::BSWAP \|\| 2537 // Avoid infinite looping with PromoteIntBinOp. 2538 (N0.getOpcode() == ISD::ANY_EXTEND && 2539 (!LegalTypes \|\| TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) \|\| 2540 (N0.getOpcode() == ISD::TRUNCATE && 2541 (!TLI.isZExtFree(VT, Op0VT) \|\| 2542 !TLI.isTruncateFree(Op0VT, VT)) && 2543 TLI.isTypeLegal(Op0VT))) && 2544 !VT.isVector() && 2545 Op0VT == N1.getOperand(0).getValueType() && 2546 (!LegalOperations \|\| TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2547 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2548 N0.getOperand(0).getValueType(), 2549 N0.getOperand(0), N1.getOperand(0)); 2550 AddToWorklist(ORNode.getNode()); 2551 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2552 } 2553 2554 // For each of OP in SHL/SRL/SRA/AND... 2555 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2556 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2557 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2558 if ((N0.getOpcode() == ISD::SHL \|\| N0.getOpcode() == ISD::SRL \|\| 2559 N0.getOpcode() == ISD::SRA \|\| N0.getOpcode() == ISD::AND) && 2560 N0.getOperand(1) == N1.getOperand(1)) { 2561 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2562 N0.getOperand(0).getValueType(), 2563 N0.getOperand(0), N1.getOperand(0)); 2564 AddToWorklist(ORNode.getNode()); 2565 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2566 ORNode, N0.getOperand(1)); 2567 } 2568 2569 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2570 // Only perform this optimization after type legalization and before 2571 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2572 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2573 // we don't want to undo this promotion. 2574 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2575 // on scalars. 2576 if ((N0.getOpcode() == ISD::BITCAST \|\| 2577 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2578 Level == AfterLegalizeTypes) { 2579 SDValue In0 = N0.getOperand(0); 2580 SDValue In1 = N1.getOperand(0); 2581 EVT In0Ty = In0.getValueType(); 2582 EVT In1Ty = In1.getValueType(); 2583 SDLoc DL(N); 2584 // If both incoming values are integers, and the original types are the 2585 // same. 2586 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2587 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2588 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2589 AddToWorklist(Op.getNode()); 2590 return BC; 2591 } 2592 } 2593 2594 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2595 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2596 // If both shuffles use the same mask, and both shuffle within a single 2597 // vector, then it is worthwhile to move the swizzle after the operation. 2598 // The type-legalizer generates this pattern when loading illegal 2599 // vector types from memory. In many cases this allows additional shuffle 2600 // optimizations. 2601 // There are other cases where moving the shuffle after the xor/and/or 2602 // is profitable even if shuffles don't perform a swizzle. 2603 // If both shuffles use the same mask, and both shuffles have the same first 2604 // or second operand, then it might still be profitable to move the shuffle 2605 // after the xor/and/or operation. 2606 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2607 ShuffleVectorSDNode SVN0 = cast<ShuffleVectorSDNode>(N0); 2608* ShuffleVectorSDNode SVN1 = cast<ShuffleVectorSDNode>(N1); 2609* 2610 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2611 "Inputs to shuffles are not the same type"); 2612 2613 // Check that both shuffles use the same mask. The masks are known to be of 2614 // the same length because the result vector type is the same. 2615 // Check also that shuffles have only one use to avoid introducing extra 2616 // instructions. 2617 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2618 SVN0->getMask().equals(SVN1->getMask())) { 2619 SDValue ShOp = N0->getOperand(1); 2620 2621 // Don't try to fold this node if it requires introducing a 2622 // build vector of all zeros that might be illegal at this stage. 2623 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2624 if (!LegalTypes) 2625 ShOp = DAG.getConstant(0, VT); 2626 else 2627 ShOp = SDValue(); 2628 } 2629 2630 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2631 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2632 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2633 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2634 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2635 N0->getOperand(0), N1->getOperand(0)); 2636 AddToWorklist(NewNode.getNode()); 2637 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2638 &SVN0->getMask()[0]); 2639 } 2640 2641 // Don't try to fold this node if it requires introducing a 2642 // build vector of all zeros that might be illegal at this stage. 2643 ShOp = N0->getOperand(0); 2644 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2645 if (!LegalTypes) 2646 ShOp = DAG.getConstant(0, VT); 2647 else 2648 ShOp = SDValue(); 2649 } 2650 2651 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2652 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2653 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2654 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2655 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2656 N0->getOperand(1), N1->getOperand(1)); 2657 AddToWorklist(NewNode.getNode()); 2658 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2659 &SVN0->getMask()[0]); 2660 } 2661 } 2662 } 2663 2664 return SDValue(); 2665} 2666 2667SDValue DAGCombiner::visitAND(SDNode N) { 2668* SDValue N0 = N->getOperand(0); 2669 SDValue N1 = N->getOperand(1); 2670 SDValue LL, LR, RL, RR, CC0, CC1; 2671 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 2672* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 2673* EVT VT = N1.getValueType(); 2674 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2675 2676 // fold vector ops 2677 if (VT.isVector()) { 2678 SDValue FoldedVOp = SimplifyVBinOp(N); 2679 if (FoldedVOp.getNode()) return FoldedVOp; 2680 2681 // fold (and x, 0) -> 0, vector edition 2682 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2683 // do not return N0, because undef node may exist in N0 2684 return DAG.getConstant( 2685 APInt::getNullValue( 2686 N0.getValueType().getScalarType().getSizeInBits()), 2687 N0.getValueType()); 2688 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2689 // do not return N1, because undef node may exist in N1 2690 return DAG.getConstant( 2691 APInt::getNullValue( 2692 N1.getValueType().getScalarType().getSizeInBits()), 2693 N1.getValueType()); 2694 2695 // fold (and x, -1) -> x, vector edition 2696 if (ISD::isBuildVectorAllOnes(N0.getNode())) 2697 return N1; 2698 if (ISD::isBuildVectorAllOnes(N1.getNode())) 2699 return N0; 2700 } 2701 2702 // fold (and x, undef) -> 0 2703 if (N0.getOpcode() == ISD::UNDEF \|\| N1.getOpcode() == ISD::UNDEF) 2704 return DAG.getConstant(0, VT); 2705 // fold (and c1, c2) -> c1&c2 2706 if (N0C && N1C) 2707 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2708 // canonicalize constant to RHS 2709 if (N0C && !N1C) 2710 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 2711 // fold (and x, -1) -> x 2712 if (N1C && N1C->isAllOnesValue()) 2713 return N0; 2714 // if (and x, c) is known to be zero, return 0 2715 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2716 APInt::getAllOnesValue(BitWidth))) 2717 return DAG.getConstant(0, VT); 2718 // reassociate and 2719 SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); 2720 if (RAND.getNode()) 2721 return RAND; 2722 // fold (and (or x, C), D) -> D if (C & D) == D 2723 if (N1C && N0.getOpcode() == ISD::OR) 2724 if (ConstantSDNode ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2725* if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2726 return N1; 2727 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2728 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2729 SDValue N0Op0 = N0.getOperand(0); 2730 APInt Mask = ~N1C->getAPIntValue(); 2731 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2732 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2733 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 2734 N0.getValueType(), N0Op0); 2735 2736 // Replace uses of the AND with uses of the Zero extend node. 2737 CombineTo(N, Zext); 2738 2739 // We actually want to replace all uses of the any_extend with the 2740 // zero_extend, to avoid duplicating things. This will later cause this 2741 // AND to be folded. 2742 CombineTo(N0.getNode(), Zext); 2743 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2744 } 2745 } 2746 // similarly fold (and (X (load ([non_ext\|any_ext\|zero_ext] V))), c) -> 2747 // (X (load ([non_ext\|zero_ext] V))) if 'and' only clears top bits which must 2748 // already be zero by virtue of the width of the base type of the load. 2749 // 2750 // the 'X' node here can either be nothing or an extract_vector_elt to catch 2751 // more cases. 2752 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 2753 N0.getOperand(0).getOpcode() == ISD::LOAD) \|\| 2754 N0.getOpcode() == ISD::LOAD) { 2755 LoadSDNode Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 2756* N0 : N0.getOperand(0) ); 2757 2758 // Get the constant (if applicable) the zero'th operand is being ANDed with. 2759 // This can be a pure constant or a vector splat, in which case we treat the 2760 // vector as a scalar and use the splat value. 2761 APInt Constant = APInt::getNullValue(1); 2762 if (const ConstantSDNode C = dyn_cast<ConstantSDNode>(N1)) { 2763* Constant = C->getAPIntValue(); 2764 } else if (BuildVectorSDNode Vector = dyn_cast<BuildVectorSDNode>(N1)) { 2765* APInt SplatValue, SplatUndef; 2766 unsigned SplatBitSize; 2767 bool HasAnyUndefs; 2768 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 2769 SplatBitSize, HasAnyUndefs); 2770 if (IsSplat) { 2771 // Undef bits can contribute to a possible optimisation if set, so 2772 // set them. 2773 SplatValue \|= SplatUndef; 2774 2775 // The splat value may be something like "0x00FFFFFF", which means 0 for 2776 // the first vector value and FF for the rest, repeating. We need a mask 2777 // that will apply equally to all members of the vector, so AND all the 2778 // lanes of the constant together. 2779 EVT VT = Vector->getValueType(0); 2780 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 2781 2782 // If the splat value has been compressed to a bitlength lower 2783 // than the size of the vector lane, we need to re-expand it to 2784 // the lane size. 2785 if (BitWidth > SplatBitSize) 2786 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 2787 SplatBitSize < BitWidth; 2788 SplatBitSize = SplatBitSize * 2) 2789 SplatValue \|= SplatValue.shl(SplatBitSize); 2790	1163 // Add all the dag nodes to the worklist. 1164 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 1165 E = DAG.allnodes_end(); I != E; ++I) 1166 AddToWorklist(I); 1167 1168 // Create a dummy node (which is not added to allnodes), that adds a reference 1169 // to the root node, preventing it from being deleted, and tracking any 1170 // changes of the root. 1171 HandleSDNode Dummy(DAG.getRoot()); 1172 1173 // while the worklist isn't empty, find a node and 1174 // try and combine it. 1175 while (!WorklistMap.empty()) { 1176 SDNode N; 1177* // The Worklist holds the SDNodes in order, but it may contain null entries. 1178 do { 1179 N = Worklist.pop_back_val(); 1180 } while (!N); 1181 1182 bool GoodWorklistEntry = WorklistMap.erase(N); 1183 (void)GoodWorklistEntry; 1184 assert(GoodWorklistEntry && 1185 "Found a worklist entry without a corresponding map entry!"); 1186 1187 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1188 // N is deleted from the DAG, since they too may now be dead or may have a 1189 // reduced number of uses, allowing other xforms. 1190 if (recursivelyDeleteUnusedNodes(N)) 1191 continue; 1192 1193 WorklistRemover DeadNodes(this); 1194* 1195 // If this combine is running after legalizing the DAG, re-legalize any 1196 // nodes pulled off the worklist. 1197 if (Level == AfterLegalizeDAG) { 1198 SmallSetVector<SDNode , 16> UpdatedNodes; 1199* bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1200 1201 for (SDNode LN : UpdatedNodes) { 1202* AddToWorklist(LN); 1203 AddUsersToWorklist(LN); 1204 } 1205 if (!NIsValid) 1206 continue; 1207 } 1208 1209 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1210 1211 // Add any operands of the new node which have not yet been combined to the 1212 // worklist as well. Because the worklist uniques things already, this 1213 // won't repeatedly process the same operand. 1214 CombinedNodes.insert(N); 1215 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1216 if (!CombinedNodes.count(N->getOperand(i).getNode())) 1217 AddToWorklist(N->getOperand(i).getNode()); 1218 1219 SDValue RV = combine(N); 1220 1221 if (!RV.getNode()) 1222 continue; 1223 1224 ++NodesCombined; 1225 1226 // If we get back the same node we passed in, rather than a new node or 1227 // zero, we know that the node must have defined multiple values and 1228 // CombineTo was used. Since CombineTo takes care of the worklist 1229 // mechanics for us, we have no work to do in this case. 1230 if (RV.getNode() == N) 1231 continue; 1232 1233 assert(N->getOpcode() != ISD::DELETED_NODE && 1234 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1235 "Node was deleted but visit returned new node!"); 1236 1237 DEBUG(dbgs() << " ... into: "; 1238 RV.getNode()->dump(&DAG)); 1239 1240 // Transfer debug value. 1241 DAG.TransferDbgValues(SDValue(N, 0), RV); 1242 if (N->getNumValues() == RV.getNode()->getNumValues()) 1243 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1244 else { 1245 assert(N->getValueType(0) == RV.getValueType() && 1246 N->getNumValues() == 1 && "Type mismatch"); 1247 SDValue OpV = RV; 1248 DAG.ReplaceAllUsesWith(N, &OpV); 1249 } 1250 1251 // Push the new node and any users onto the worklist 1252 AddToWorklist(RV.getNode()); 1253 AddUsersToWorklist(RV.getNode()); 1254 1255 // Finally, if the node is now dead, remove it from the graph. The node 1256 // may not be dead if the replacement process recursively simplified to 1257 // something else needing this node. This will also take care of adding any 1258 // operands which have lost a user to the worklist. 1259 recursivelyDeleteUnusedNodes(N); 1260 } 1261 1262 // If the root changed (e.g. it was a dead load, update the root). 1263 DAG.setRoot(Dummy.getValue()); 1264 DAG.RemoveDeadNodes(); 1265} 1266 1267SDValue DAGCombiner::visit(SDNode N) { 1268* switch (N->getOpcode()) { 1269 default: break; 1270 case ISD::TokenFactor: return visitTokenFactor(N); 1271 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1272 case ISD::ADD: return visitADD(N); 1273 case ISD::SUB: return visitSUB(N); 1274 case ISD::ADDC: return visitADDC(N); 1275 case ISD::SUBC: return visitSUBC(N); 1276 case ISD::ADDE: return visitADDE(N); 1277 case ISD::SUBE: return visitSUBE(N); 1278 case ISD::MUL: return visitMUL(N); 1279 case ISD::SDIV: return visitSDIV(N); 1280 case ISD::UDIV: return visitUDIV(N); 1281 case ISD::SREM: return visitSREM(N); 1282 case ISD::UREM: return visitUREM(N); 1283 case ISD::MULHU: return visitMULHU(N); 1284 case ISD::MULHS: return visitMULHS(N); 1285 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1286 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1287 case ISD::SMULO: return visitSMULO(N); 1288 case ISD::UMULO: return visitUMULO(N); 1289 case ISD::SDIVREM: return visitSDIVREM(N); 1290 case ISD::UDIVREM: return visitUDIVREM(N); 1291 case ISD::AND: return visitAND(N); 1292 case ISD::OR: return visitOR(N); 1293 case ISD::XOR: return visitXOR(N); 1294 case ISD::SHL: return visitSHL(N); 1295 case ISD::SRA: return visitSRA(N); 1296 case ISD::SRL: return visitSRL(N); 1297 case ISD::ROTR: 1298 case ISD::ROTL: return visitRotate(N); 1299 case ISD::CTLZ: return visitCTLZ(N); 1300 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1301 case ISD::CTTZ: return visitCTTZ(N); 1302 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1303 case ISD::CTPOP: return visitCTPOP(N); 1304 case ISD::SELECT: return visitSELECT(N); 1305 case ISD::VSELECT: return visitVSELECT(N); 1306 case ISD::SELECT_CC: return visitSELECT_CC(N); 1307 case ISD::SETCC: return visitSETCC(N); 1308 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1309 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1310 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1311 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1312 case ISD::TRUNCATE: return visitTRUNCATE(N); 1313 case ISD::BITCAST: return visitBITCAST(N); 1314 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1315 case ISD::FADD: return visitFADD(N); 1316 case ISD::FSUB: return visitFSUB(N); 1317 case ISD::FMUL: return visitFMUL(N); 1318 case ISD::FMA: return visitFMA(N); 1319 case ISD::FDIV: return visitFDIV(N); 1320 case ISD::FREM: return visitFREM(N); 1321 case ISD::FSQRT: return visitFSQRT(N); 1322 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1323 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1324 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1325 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1326 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1327 case ISD::FP_ROUND: return visitFP_ROUND(N); 1328 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1329 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1330 case ISD::FNEG: return visitFNEG(N); 1331 case ISD::FABS: return visitFABS(N); 1332 case ISD::FFLOOR: return visitFFLOOR(N); 1333 case ISD::FMINNUM: return visitFMINNUM(N); 1334 case ISD::FMAXNUM: return visitFMAXNUM(N); 1335 case ISD::FCEIL: return visitFCEIL(N); 1336 case ISD::FTRUNC: return visitFTRUNC(N); 1337 case ISD::BRCOND: return visitBRCOND(N); 1338 case ISD::BR_CC: return visitBR_CC(N); 1339 case ISD::LOAD: return visitLOAD(N); 1340 case ISD::STORE: return visitSTORE(N); 1341 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1342 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1343 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1344 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1345 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1346 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1347 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1348 case ISD::MLOAD: return visitMLOAD(N); 1349 case ISD::MSTORE: return visitMSTORE(N); 1350 } 1351 return SDValue(); 1352} 1353 1354SDValue DAGCombiner::combine(SDNode N) { 1355* SDValue RV = visit(N); 1356 1357 // If nothing happened, try a target-specific DAG combine. 1358 if (!RV.getNode()) { 1359 assert(N->getOpcode() != ISD::DELETED_NODE && 1360 "Node was deleted but visit returned NULL!"); 1361 1362 if (N->getOpcode() >= ISD::BUILTIN_OP_END \|\| 1363 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1364 1365 // Expose the DAG combiner to the target combiner impls. 1366 TargetLowering::DAGCombinerInfo 1367 DagCombineInfo(DAG, Level, false, this); 1368 1369 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1370 } 1371 } 1372 1373 // If nothing happened still, try promoting the operation. 1374 if (!RV.getNode()) { 1375 switch (N->getOpcode()) { 1376 default: break; 1377 case ISD::ADD: 1378 case ISD::SUB: 1379 case ISD::MUL: 1380 case ISD::AND: 1381 case ISD::OR: 1382 case ISD::XOR: 1383 RV = PromoteIntBinOp(SDValue(N, 0)); 1384 break; 1385 case ISD::SHL: 1386 case ISD::SRA: 1387 case ISD::SRL: 1388 RV = PromoteIntShiftOp(SDValue(N, 0)); 1389 break; 1390 case ISD::SIGN_EXTEND: 1391 case ISD::ZERO_EXTEND: 1392 case ISD::ANY_EXTEND: 1393 RV = PromoteExtend(SDValue(N, 0)); 1394 break; 1395 case ISD::LOAD: 1396 if (PromoteLoad(SDValue(N, 0))) 1397 RV = SDValue(N, 0); 1398 break; 1399 } 1400 } 1401 1402 // If N is a commutative binary node, try commuting it to enable more 1403 // sdisel CSE. 1404 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1405 N->getNumValues() == 1) { 1406 SDValue N0 = N->getOperand(0); 1407 SDValue N1 = N->getOperand(1); 1408 1409 // Constant operands are canonicalized to RHS. 1410 if (isa<ConstantSDNode>(N0) \|\| !isa<ConstantSDNode>(N1)) { 1411 SDValue Ops[] = {N1, N0}; 1412 SDNode CSENode; 1413* if (const BinaryWithFlagsSDNode BinNode = 1414* dyn_cast<BinaryWithFlagsSDNode>(N)) { 1415 CSENode = DAG.getNodeIfExists( 1416 N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), 1417 BinNode->hasNoSignedWrap(), BinNode->isExact()); 1418 } else { 1419 CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); 1420 } 1421 if (CSENode) 1422 return SDValue(CSENode, 0); 1423 } 1424 } 1425 1426 return RV; 1427} 1428 1429/// Given a node, return its input chain if it has one, otherwise return a null 1430/// sd operand. 1431static SDValue getInputChainForNode(SDNode N) { 1432* if (unsigned NumOps = N->getNumOperands()) { 1433 if (N->getOperand(0).getValueType() == MVT::Other) 1434 return N->getOperand(0); 1435 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1436 return N->getOperand(NumOps-1); 1437 for (unsigned i = 1; i < NumOps-1; ++i) 1438 if (N->getOperand(i).getValueType() == MVT::Other) 1439 return N->getOperand(i); 1440 } 1441 return SDValue(); 1442} 1443 1444SDValue DAGCombiner::visitTokenFactor(SDNode N) { 1445* // If N has two operands, where one has an input chain equal to the other, 1446 // the 'other' chain is redundant. 1447 if (N->getNumOperands() == 2) { 1448 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1449 return N->getOperand(0); 1450 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1451 return N->getOperand(1); 1452 } 1453 1454 SmallVector<SDNode , 8> TFs; // List of token factors to visit. 1455* SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1456 SmallPtrSet<SDNode, 16> SeenOps; 1457* bool Changed = false; // If we should replace this token factor. 1458 1459 // Start out with this token factor. 1460 TFs.push_back(N); 1461 1462 // Iterate through token factors. The TFs grows when new token factors are 1463 // encountered. 1464 for (unsigned i = 0; i < TFs.size(); ++i) { 1465 SDNode TF = TFs[i]; 1466* 1467 // Check each of the operands. 1468 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1469 SDValue Op = TF->getOperand(i); 1470 1471 switch (Op.getOpcode()) { 1472 case ISD::EntryToken: 1473 // Entry tokens don't need to be added to the list. They are 1474 // rededundant. 1475 Changed = true; 1476 break; 1477 1478 case ISD::TokenFactor: 1479 if (Op.hasOneUse() && 1480 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1481 // Queue up for processing. 1482 TFs.push_back(Op.getNode()); 1483 // Clean up in case the token factor is removed. 1484 AddToWorklist(Op.getNode()); 1485 Changed = true; 1486 break; 1487 } 1488 // Fall thru 1489 1490 default: 1491 // Only add if it isn't already in the list. 1492 if (SeenOps.insert(Op.getNode()).second) 1493 Ops.push_back(Op); 1494 else 1495 Changed = true; 1496 break; 1497 } 1498 } 1499 } 1500 1501 SDValue Result; 1502 1503 // If we've change things around then replace token factor. 1504 if (Changed) { 1505 if (Ops.empty()) { 1506 // The entry token is the only possible outcome. 1507 Result = DAG.getEntryNode(); 1508 } else { 1509 // New and improved token factor. 1510 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1511 } 1512 1513 // Don't add users to work list. 1514 return CombineTo(N, Result, false); 1515 } 1516 1517 return Result; 1518} 1519 1520/// MERGE_VALUES can always be eliminated. 1521SDValue DAGCombiner::visitMERGE_VALUES(SDNode N) { 1522* WorklistRemover DeadNodes(this); 1523* // Replacing results may cause a different MERGE_VALUES to suddenly 1524 // be CSE'd with N, and carry its uses with it. Iterate until no 1525 // uses remain, to ensure that the node can be safely deleted. 1526 // First add the users of this node to the work list so that they 1527 // can be tried again once they have new operands. 1528 AddUsersToWorklist(N); 1529 do { 1530 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1531 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1532 } while (!N->use_empty()); 1533 deleteAndRecombine(N); 1534 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1535} 1536 1537SDValue DAGCombiner::visitADD(SDNode N) { 1538* SDValue N0 = N->getOperand(0); 1539 SDValue N1 = N->getOperand(1); 1540 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 1541* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 1542* EVT VT = N0.getValueType(); 1543 1544 // fold vector ops 1545 if (VT.isVector()) { 1546 SDValue FoldedVOp = SimplifyVBinOp(N); 1547 if (FoldedVOp.getNode()) return FoldedVOp; 1548 1549 // fold (add x, 0) -> x, vector edition 1550 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1551 return N0; 1552 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1553 return N1; 1554 } 1555 1556 // fold (add x, undef) -> undef 1557 if (N0.getOpcode() == ISD::UNDEF) 1558 return N0; 1559 if (N1.getOpcode() == ISD::UNDEF) 1560 return N1; 1561 // fold (add c1, c2) -> c1+c2 1562 if (N0C && N1C) 1563 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1564 // canonicalize constant to RHS 1565 if (N0C && !N1C) 1566 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1567 // fold (add x, 0) -> x 1568 if (N1C && N1C->isNullValue()) 1569 return N0; 1570 // fold (add Sym, c) -> Sym+c 1571 if (GlobalAddressSDNode GA = dyn_cast<GlobalAddressSDNode>(N0)) 1572* if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1573 GA->getOpcode() == ISD::GlobalAddress) 1574 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1575 GA->getOffset() + 1576 (uint64_t)N1C->getSExtValue()); 1577 // fold ((c1-A)+c2) -> (c1+c2)-A 1578 if (N1C && N0.getOpcode() == ISD::SUB) 1579 if (ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1580* return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1581 DAG.getConstant(N1C->getAPIntValue()+ 1582 N0C->getAPIntValue(), VT), 1583 N0.getOperand(1)); 1584 // reassociate add 1585 SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); 1586 if (RADD.getNode()) 1587 return RADD; 1588 // fold ((0-A) + B) -> B-A 1589 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1590 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1591 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1592 // fold (A + (0-B)) -> A-B 1593 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1594 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1595 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1596 // fold (A+(B-A)) -> B 1597 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1598 return N1.getOperand(0); 1599 // fold ((B-A)+A) -> B 1600 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1601 return N0.getOperand(0); 1602 // fold (A+(B-(A+C))) to (B-C) 1603 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1604 N0 == N1.getOperand(1).getOperand(0)) 1605 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1606 N1.getOperand(1).getOperand(1)); 1607 // fold (A+(B-(C+A))) to (B-C) 1608 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1609 N0 == N1.getOperand(1).getOperand(1)) 1610 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1611 N1.getOperand(1).getOperand(0)); 1612 // fold (A+((B-A)+or-C)) to (B+or-C) 1613 if ((N1.getOpcode() == ISD::SUB \|\| N1.getOpcode() == ISD::ADD) && 1614 N1.getOperand(0).getOpcode() == ISD::SUB && 1615 N0 == N1.getOperand(0).getOperand(1)) 1616 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1617 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1618 1619 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1620 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1621 SDValue N00 = N0.getOperand(0); 1622 SDValue N01 = N0.getOperand(1); 1623 SDValue N10 = N1.getOperand(0); 1624 SDValue N11 = N1.getOperand(1); 1625 1626 if (isa<ConstantSDNode>(N00) \|\| isa<ConstantSDNode>(N10)) 1627 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1628 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1629 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1630 } 1631 1632 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1633 return SDValue(N, 0); 1634 1635 // fold (a+b) -> (a\|b) iff a and b share no bits. 1636 if (VT.isInteger() && !VT.isVector()) { 1637 APInt LHSZero, LHSOne; 1638 APInt RHSZero, RHSOne; 1639 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1640 1641 if (LHSZero.getBoolValue()) { 1642 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1643 1644 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1645 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1646 if ((RHSZero & ~LHSZero) == ~LHSZero \|\| (LHSZero & ~RHSZero) == ~RHSZero){ 1647 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::OR, VT)) 1648 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1649 } 1650 } 1651 } 1652 1653 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1654 if (N1.getOpcode() == ISD::SHL && 1655 N1.getOperand(0).getOpcode() == ISD::SUB) 1656 if (ConstantSDNode C = 1657* dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1658 if (C->getAPIntValue() == 0) 1659 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1660 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1661 N1.getOperand(0).getOperand(1), 1662 N1.getOperand(1))); 1663 if (N0.getOpcode() == ISD::SHL && 1664 N0.getOperand(0).getOpcode() == ISD::SUB) 1665 if (ConstantSDNode C = 1666* dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1667 if (C->getAPIntValue() == 0) 1668 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1669 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1670 N0.getOperand(0).getOperand(1), 1671 N0.getOperand(1))); 1672 1673 if (N1.getOpcode() == ISD::AND) { 1674 SDValue AndOp0 = N1.getOperand(0); 1675 ConstantSDNode AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 1676* unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1677 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1678 1679 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1680 // and similar xforms where the inner op is either ~0 or 0. 1681 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 1682 SDLoc DL(N); 1683 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1684 } 1685 } 1686 1687 // add (sext i1), X -> sub X, (zext i1) 1688 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1689 N0.getOperand(0).getValueType() == MVT::i1 && 1690 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1691 SDLoc DL(N); 1692 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1693 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1694 } 1695 1696 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 1697 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1698 VTSDNode TN = cast<VTSDNode>(N1.getOperand(1)); 1699* if (TN->getVT() == MVT::i1) { 1700 SDLoc DL(N); 1701 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1702 DAG.getConstant(1, VT)); 1703 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 1704 } 1705 } 1706 1707 return SDValue(); 1708} 1709 1710SDValue DAGCombiner::visitADDC(SDNode N) { 1711* SDValue N0 = N->getOperand(0); 1712 SDValue N1 = N->getOperand(1); 1713 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 1714* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 1715* EVT VT = N0.getValueType(); 1716 1717 // If the flag result is dead, turn this into an ADD. 1718 if (!N->hasAnyUseOfValue(1)) 1719 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1720 DAG.getNode(ISD::CARRY_FALSE, 1721 SDLoc(N), MVT::Glue)); 1722 1723 // canonicalize constant to RHS. 1724 if (N0C && !N1C) 1725 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1726 1727 // fold (addc x, 0) -> x + no carry out 1728 if (N1C && N1C->isNullValue()) 1729 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1730 SDLoc(N), MVT::Glue)); 1731 1732 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1733 APInt LHSZero, LHSOne; 1734 APInt RHSZero, RHSOne; 1735 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1736 1737 if (LHSZero.getBoolValue()) { 1738 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1739 1740 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1741 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1742 if ((RHSZero & ~LHSZero) == ~LHSZero \|\| (LHSZero & ~RHSZero) == ~RHSZero) 1743 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1744 DAG.getNode(ISD::CARRY_FALSE, 1745 SDLoc(N), MVT::Glue)); 1746 } 1747 1748 return SDValue(); 1749} 1750 1751SDValue DAGCombiner::visitADDE(SDNode N) { 1752* SDValue N0 = N->getOperand(0); 1753 SDValue N1 = N->getOperand(1); 1754 SDValue CarryIn = N->getOperand(2); 1755 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 1756* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 1757* 1758 // canonicalize constant to RHS 1759 if (N0C && !N1C) 1760 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1761 N1, N0, CarryIn); 1762 1763 // fold (adde x, y, false) -> (addc x, y) 1764 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1765 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1766 1767 return SDValue(); 1768} 1769 1770// Since it may not be valid to emit a fold to zero for vector initializers 1771// check if we can before folding. 1772static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 1773 SelectionDAG &DAG, 1774 bool LegalOperations, bool LegalTypes) { 1775 if (!VT.isVector()) 1776 return DAG.getConstant(0, VT); 1777 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1778 return DAG.getConstant(0, VT); 1779 return SDValue(); 1780} 1781 1782SDValue DAGCombiner::visitSUB(SDNode N) { 1783* SDValue N0 = N->getOperand(0); 1784 SDValue N1 = N->getOperand(1); 1785 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1786* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1787* ConstantSDNode N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1788* dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1789 EVT VT = N0.getValueType(); 1790 1791 // fold vector ops 1792 if (VT.isVector()) { 1793 SDValue FoldedVOp = SimplifyVBinOp(N); 1794 if (FoldedVOp.getNode()) return FoldedVOp; 1795 1796 // fold (sub x, 0) -> x, vector edition 1797 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1798 return N0; 1799 } 1800 1801 // fold (sub x, x) -> 0 1802 // FIXME: Refactor this and xor and other similar operations together. 1803 if (N0 == N1) 1804 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1805 // fold (sub c1, c2) -> c1-c2 1806 if (N0C && N1C) 1807 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1808 // fold (sub x, c) -> (add x, -c) 1809 if (N1C) 1810 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, 1811 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1812 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1813 if (N0C && N0C->isAllOnesValue()) 1814 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1815 // fold A-(A-B) -> B 1816 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1817 return N1.getOperand(1); 1818 // fold (A+B)-A -> B 1819 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1820 return N0.getOperand(1); 1821 // fold (A+B)-B -> A 1822 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1823 return N0.getOperand(0); 1824 // fold C2-(A+C1) -> (C2-C1)-A 1825 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1826 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1827 VT); 1828 return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, 1829 N1.getOperand(0)); 1830 } 1831 // fold ((A+(B+or-C))-B) -> A+or-C 1832 if (N0.getOpcode() == ISD::ADD && 1833 (N0.getOperand(1).getOpcode() == ISD::SUB \|\| 1834 N0.getOperand(1).getOpcode() == ISD::ADD) && 1835 N0.getOperand(1).getOperand(0) == N1) 1836 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1837 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1838 // fold ((A+(C+B))-B) -> A+C 1839 if (N0.getOpcode() == ISD::ADD && 1840 N0.getOperand(1).getOpcode() == ISD::ADD && 1841 N0.getOperand(1).getOperand(1) == N1) 1842 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1843 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1844 // fold ((A-(B-C))-C) -> A-B 1845 if (N0.getOpcode() == ISD::SUB && 1846 N0.getOperand(1).getOpcode() == ISD::SUB && 1847 N0.getOperand(1).getOperand(1) == N1) 1848 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1849 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1850 1851 // If either operand of a sub is undef, the result is undef 1852 if (N0.getOpcode() == ISD::UNDEF) 1853 return N0; 1854 if (N1.getOpcode() == ISD::UNDEF) 1855 return N1; 1856 1857 // If the relocation model supports it, consider symbol offsets. 1858 if (GlobalAddressSDNode GA = dyn_cast<GlobalAddressSDNode>(N0)) 1859* if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1860 // fold (sub Sym, c) -> Sym-c 1861 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1862 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1863 GA->getOffset() - 1864 (uint64_t)N1C->getSExtValue()); 1865 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1866 if (GlobalAddressSDNode GB = dyn_cast<GlobalAddressSDNode>(N1)) 1867* if (GA->getGlobal() == GB->getGlobal()) 1868 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1869 VT); 1870 } 1871 1872 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 1873 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1874 VTSDNode TN = cast<VTSDNode>(N1.getOperand(1)); 1875* if (TN->getVT() == MVT::i1) { 1876 SDLoc DL(N); 1877 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1878 DAG.getConstant(1, VT)); 1879 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 1880 } 1881 } 1882 1883 return SDValue(); 1884} 1885 1886SDValue DAGCombiner::visitSUBC(SDNode N) { 1887* SDValue N0 = N->getOperand(0); 1888 SDValue N1 = N->getOperand(1); 1889 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 1890* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 1891* EVT VT = N0.getValueType(); 1892 1893 // If the flag result is dead, turn this into an SUB. 1894 if (!N->hasAnyUseOfValue(1)) 1895 return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), 1896 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1897 MVT::Glue)); 1898 1899 // fold (subc x, x) -> 0 + no borrow 1900 if (N0 == N1) 1901 return CombineTo(N, DAG.getConstant(0, VT), 1902 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1903 MVT::Glue)); 1904 1905 // fold (subc x, 0) -> x + no borrow 1906 if (N1C && N1C->isNullValue()) 1907 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1908 MVT::Glue)); 1909 1910 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1911 if (N0C && N0C->isAllOnesValue()) 1912 return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), 1913 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1914 MVT::Glue)); 1915 1916 return SDValue(); 1917} 1918 1919SDValue DAGCombiner::visitSUBE(SDNode N) { 1920* SDValue N0 = N->getOperand(0); 1921 SDValue N1 = N->getOperand(1); 1922 SDValue CarryIn = N->getOperand(2); 1923 1924 // fold (sube x, y, false) -> (subc x, y) 1925 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1926 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 1927 1928 return SDValue(); 1929} 1930 1931SDValue DAGCombiner::visitMUL(SDNode N) { 1932* SDValue N0 = N->getOperand(0); 1933 SDValue N1 = N->getOperand(1); 1934 EVT VT = N0.getValueType(); 1935 1936 // fold (mul x, undef) -> 0 1937 if (N0.getOpcode() == ISD::UNDEF \|\| N1.getOpcode() == ISD::UNDEF) 1938 return DAG.getConstant(0, VT); 1939 1940 bool N0IsConst = false; 1941 bool N1IsConst = false; 1942 APInt ConstValue0, ConstValue1; 1943 // fold vector ops 1944 if (VT.isVector()) { 1945 SDValue FoldedVOp = SimplifyVBinOp(N); 1946 if (FoldedVOp.getNode()) return FoldedVOp; 1947 1948 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); 1949 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); 1950 } else { 1951 N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr; 1952 ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() 1953 : APInt(); 1954 N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr; 1955 ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() 1956 : APInt(); 1957 } 1958 1959 // fold (mul c1, c2) -> c1c2 1960* if (N0IsConst && N1IsConst) 1961 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); 1962 1963 // canonicalize constant to RHS 1964 if (N0IsConst && !N1IsConst) 1965 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 1966 // fold (mul x, 0) -> 0 1967 if (N1IsConst && ConstValue1 == 0) 1968 return N1; 1969 // We require a splat of the entire scalar bit width for non-contiguous 1970 // bit patterns. 1971 bool IsFullSplat = 1972 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 1973 // fold (mul x, 1) -> x 1974 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 1975 return N0; 1976 // fold (mul x, -1) -> 0-x 1977 if (N1IsConst && ConstValue1.isAllOnesValue()) 1978 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1979 DAG.getConstant(0, VT), N0); 1980 // fold (mul x, (1 << c)) -> x << c 1981 if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) 1982 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 1983 DAG.getConstant(ConstValue1.logBase2(), 1984 getShiftAmountTy(N0.getValueType()))); 1985 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 1986 if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { 1987 unsigned Log2Val = (-ConstValue1).logBase2(); 1988 // FIXME: If the input is something that is easily negated (e.g. a 1989 // single-use add), we should put the negate there. 1990 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1991 DAG.getConstant(0, VT), 1992 DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 1993 DAG.getConstant(Log2Val, 1994 getShiftAmountTy(N0.getValueType())))); 1995 } 1996 1997 APInt Val; 1998 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 1999 if (N1IsConst && N0.getOpcode() == ISD::SHL && 2000 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) \|\| 2001 isa<ConstantSDNode>(N0.getOperand(1)))) { 2002 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 2003 N1, N0.getOperand(1)); 2004 AddToWorklist(C3.getNode()); 2005 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 2006 N0.getOperand(0), C3); 2007 } 2008 2009 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2010 // use. 2011 { 2012 SDValue Sh(nullptr,0), Y(nullptr,0); 2013 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2014 if (N0.getOpcode() == ISD::SHL && 2015 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) \|\| 2016 isa<ConstantSDNode>(N0.getOperand(1))) && 2017 N0.getNode()->hasOneUse()) { 2018 Sh = N0; Y = N1; 2019 } else if (N1.getOpcode() == ISD::SHL && 2020 isa<ConstantSDNode>(N1.getOperand(1)) && 2021 N1.getNode()->hasOneUse()) { 2022 Sh = N1; Y = N0; 2023 } 2024 2025 if (Sh.getNode()) { 2026 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2027 Sh.getOperand(0), Y); 2028 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 2029 Mul, Sh.getOperand(1)); 2030 } 2031 } 2032 2033 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1c2) 2034* if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 2035 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) \|\| 2036 isa<ConstantSDNode>(N0.getOperand(1)))) 2037 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2038 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2039 N0.getOperand(0), N1), 2040 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2041 N0.getOperand(1), N1)); 2042 2043 // reassociate mul 2044 SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); 2045 if (RMUL.getNode()) 2046 return RMUL; 2047 2048 return SDValue(); 2049} 2050 2051SDValue DAGCombiner::visitSDIV(SDNode N) { 2052* SDValue N0 = N->getOperand(0); 2053 SDValue N1 = N->getOperand(1); 2054 ConstantSDNode N0C = isConstOrConstSplat(N0); 2055* ConstantSDNode N1C = isConstOrConstSplat(N1); 2056* EVT VT = N->getValueType(0); 2057 2058 // fold vector ops 2059 if (VT.isVector()) { 2060 SDValue FoldedVOp = SimplifyVBinOp(N); 2061 if (FoldedVOp.getNode()) return FoldedVOp; 2062 } 2063 2064 // fold (sdiv c1, c2) -> c1/c2 2065 if (N0C && N1C && !N1C->isNullValue()) 2066 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 2067 // fold (sdiv X, 1) -> X 2068 if (N1C && N1C->getAPIntValue() == 1LL) 2069 return N0; 2070 // fold (sdiv X, -1) -> 0-X 2071 if (N1C && N1C->isAllOnesValue()) 2072 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 2073 DAG.getConstant(0, VT), N0); 2074 // If we know the sign bits of both operands are zero, strength reduce to a 2075 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2076 if (!VT.isVector()) { 2077 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2078 return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), 2079 N0, N1); 2080 } 2081 2082 // fold (sdiv X, pow2) -> simple ops after legalize 2083 if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() \|\| 2084 (-N1C->getAPIntValue()).isPowerOf2())) { 2085 // If dividing by powers of two is cheap, then don't perform the following 2086 // fold. 2087 if (TLI.isPow2SDivCheap()) 2088 return SDValue(); 2089 2090 // Target-specific implementation of sdiv x, pow2. 2091 SDValue Res = BuildSDIVPow2(N); 2092 if (Res.getNode()) 2093 return Res; 2094 2095 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2096 2097 // Splat the sign bit into the register 2098 SDValue SGN = 2099 DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, 2100 DAG.getConstant(VT.getScalarSizeInBits() - 1, 2101 getShiftAmountTy(N0.getValueType()))); 2102 AddToWorklist(SGN.getNode()); 2103 2104 // Add (N0 < 0) ? abs2 - 1 : 0; 2105 SDValue SRL = 2106 DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, 2107 DAG.getConstant(VT.getScalarSizeInBits() - lg2, 2108 getShiftAmountTy(SGN.getValueType()))); 2109 SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); 2110 AddToWorklist(SRL.getNode()); 2111 AddToWorklist(ADD.getNode()); // Divide by pow2 2112 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, 2113 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 2114 2115 // If we're dividing by a positive value, we're done. Otherwise, we must 2116 // negate the result. 2117 if (N1C->getAPIntValue().isNonNegative()) 2118 return SRA; 2119 2120 AddToWorklist(SRA.getNode()); 2121 return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); 2122 } 2123 2124 // if integer divide is expensive and we satisfy the requirements, emit an 2125 // alternate sequence. 2126 if (N1C && !TLI.isIntDivCheap()) { 2127 SDValue Op = BuildSDIV(N); 2128 if (Op.getNode()) return Op; 2129 } 2130 2131 // undef / X -> 0 2132 if (N0.getOpcode() == ISD::UNDEF) 2133 return DAG.getConstant(0, VT); 2134 // X / undef -> undef 2135 if (N1.getOpcode() == ISD::UNDEF) 2136 return N1; 2137 2138 return SDValue(); 2139} 2140 2141SDValue DAGCombiner::visitUDIV(SDNode N) { 2142* SDValue N0 = N->getOperand(0); 2143 SDValue N1 = N->getOperand(1); 2144 ConstantSDNode N0C = isConstOrConstSplat(N0); 2145* ConstantSDNode N1C = isConstOrConstSplat(N1); 2146* EVT VT = N->getValueType(0); 2147 2148 // fold vector ops 2149 if (VT.isVector()) { 2150 SDValue FoldedVOp = SimplifyVBinOp(N); 2151 if (FoldedVOp.getNode()) return FoldedVOp; 2152 } 2153 2154 // fold (udiv c1, c2) -> c1/c2 2155 if (N0C && N1C && !N1C->isNullValue()) 2156 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 2157 // fold (udiv x, (1 << c)) -> x >>u c 2158 if (N1C && N1C->getAPIntValue().isPowerOf2()) 2159 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, 2160 DAG.getConstant(N1C->getAPIntValue().logBase2(), 2161 getShiftAmountTy(N0.getValueType()))); 2162 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2163 if (N1.getOpcode() == ISD::SHL) { 2164 if (ConstantSDNode SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2165* if (SHC->getAPIntValue().isPowerOf2()) { 2166 EVT ADDVT = N1.getOperand(1).getValueType(); 2167 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, 2168 N1.getOperand(1), 2169 DAG.getConstant(SHC->getAPIntValue() 2170 .logBase2(), 2171 ADDVT)); 2172 AddToWorklist(Add.getNode()); 2173 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); 2174 } 2175 } 2176 } 2177 // fold (udiv x, c) -> alternate 2178 if (N1C && !TLI.isIntDivCheap()) { 2179 SDValue Op = BuildUDIV(N); 2180 if (Op.getNode()) return Op; 2181 } 2182 2183 // undef / X -> 0 2184 if (N0.getOpcode() == ISD::UNDEF) 2185 return DAG.getConstant(0, VT); 2186 // X / undef -> undef 2187 if (N1.getOpcode() == ISD::UNDEF) 2188 return N1; 2189 2190 return SDValue(); 2191} 2192 2193SDValue DAGCombiner::visitSREM(SDNode N) { 2194* SDValue N0 = N->getOperand(0); 2195 SDValue N1 = N->getOperand(1); 2196 ConstantSDNode N0C = isConstOrConstSplat(N0); 2197* ConstantSDNode N1C = isConstOrConstSplat(N1); 2198* EVT VT = N->getValueType(0); 2199 2200 // fold (srem c1, c2) -> c1%c2 2201 if (N0C && N1C && !N1C->isNullValue()) 2202 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 2203 // If we know the sign bits of both operands are zero, strength reduce to a 2204 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2205 if (!VT.isVector()) { 2206 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2207 return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); 2208 } 2209 2210 // If X/C can be simplified by the division-by-constant logic, lower 2211 // X%C to the equivalent of X-X/CC. 2212* if (N1C && !N1C->isNullValue()) { 2213 SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); 2214 AddToWorklist(Div.getNode()); 2215 SDValue OptimizedDiv = combine(Div.getNode()); 2216 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2217 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2218 OptimizedDiv, N1); 2219 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2220 AddToWorklist(Mul.getNode()); 2221 return Sub; 2222 } 2223 } 2224 2225 // undef % X -> 0 2226 if (N0.getOpcode() == ISD::UNDEF) 2227 return DAG.getConstant(0, VT); 2228 // X % undef -> undef 2229 if (N1.getOpcode() == ISD::UNDEF) 2230 return N1; 2231 2232 return SDValue(); 2233} 2234 2235SDValue DAGCombiner::visitUREM(SDNode N) { 2236* SDValue N0 = N->getOperand(0); 2237 SDValue N1 = N->getOperand(1); 2238 ConstantSDNode N0C = isConstOrConstSplat(N0); 2239* ConstantSDNode N1C = isConstOrConstSplat(N1); 2240* EVT VT = N->getValueType(0); 2241 2242 // fold (urem c1, c2) -> c1%c2 2243 if (N0C && N1C && !N1C->isNullValue()) 2244 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 2245 // fold (urem x, pow2) -> (and x, pow2-1) 2246 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 2247 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, 2248 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 2249 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2250 if (N1.getOpcode() == ISD::SHL) { 2251 if (ConstantSDNode SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2252* if (SHC->getAPIntValue().isPowerOf2()) { 2253 SDValue Add = 2254 DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, 2255 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 2256 VT)); 2257 AddToWorklist(Add.getNode()); 2258 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); 2259 } 2260 } 2261 } 2262 2263 // If X/C can be simplified by the division-by-constant logic, lower 2264 // X%C to the equivalent of X-X/CC. 2265* if (N1C && !N1C->isNullValue()) { 2266 SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); 2267 AddToWorklist(Div.getNode()); 2268 SDValue OptimizedDiv = combine(Div.getNode()); 2269 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2270 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2271 OptimizedDiv, N1); 2272 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2273 AddToWorklist(Mul.getNode()); 2274 return Sub; 2275 } 2276 } 2277 2278 // undef % X -> 0 2279 if (N0.getOpcode() == ISD::UNDEF) 2280 return DAG.getConstant(0, VT); 2281 // X % undef -> undef 2282 if (N1.getOpcode() == ISD::UNDEF) 2283 return N1; 2284 2285 return SDValue(); 2286} 2287 2288SDValue DAGCombiner::visitMULHS(SDNode N) { 2289* SDValue N0 = N->getOperand(0); 2290 SDValue N1 = N->getOperand(1); 2291 ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 2292* EVT VT = N->getValueType(0); 2293 SDLoc DL(N); 2294 2295 // fold (mulhs x, 0) -> 0 2296 if (N1C && N1C->isNullValue()) 2297 return N1; 2298 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2299 if (N1C && N1C->getAPIntValue() == 1) 2300 return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, 2301 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2302 getShiftAmountTy(N0.getValueType()))); 2303 // fold (mulhs x, undef) -> 0 2304 if (N0.getOpcode() == ISD::UNDEF \|\| N1.getOpcode() == ISD::UNDEF) 2305 return DAG.getConstant(0, VT); 2306 2307 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2308 // plus a shift. 2309 if (VT.isSimple() && !VT.isVector()) { 2310 MVT Simple = VT.getSimpleVT(); 2311 unsigned SimpleSize = Simple.getSizeInBits(); 2312 EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2); 2313 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2314 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2315 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2316 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2317 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2318 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2319 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2320 } 2321 } 2322 2323 return SDValue(); 2324} 2325 2326SDValue DAGCombiner::visitMULHU(SDNode N) { 2327* SDValue N0 = N->getOperand(0); 2328 SDValue N1 = N->getOperand(1); 2329 ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 2330* EVT VT = N->getValueType(0); 2331 SDLoc DL(N); 2332 2333 // fold (mulhu x, 0) -> 0 2334 if (N1C && N1C->isNullValue()) 2335 return N1; 2336 // fold (mulhu x, 1) -> 0 2337 if (N1C && N1C->getAPIntValue() == 1) 2338 return DAG.getConstant(0, N0.getValueType()); 2339 // fold (mulhu x, undef) -> 0 2340 if (N0.getOpcode() == ISD::UNDEF \|\| N1.getOpcode() == ISD::UNDEF) 2341 return DAG.getConstant(0, VT); 2342 2343 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2344 // plus a shift. 2345 if (VT.isSimple() && !VT.isVector()) { 2346 MVT Simple = VT.getSimpleVT(); 2347 unsigned SimpleSize = Simple.getSizeInBits(); 2348 EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2); 2349 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2350 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2351 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2352 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2353 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2354 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2355 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2356 } 2357 } 2358 2359 return SDValue(); 2360} 2361 2362/// Perform optimizations common to nodes that compute two values. LoOp and HiOp 2363/// give the opcodes for the two computations that are being performed. Return 2364/// true if a simplification was made. 2365SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode N, unsigned LoOp, 2366* unsigned HiOp) { 2367 // If the high half is not needed, just compute the low half. 2368 bool HiExists = N->hasAnyUseOfValue(1); 2369 if (!HiExists && 2370 (!LegalOperations \|\| 2371 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2372 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2373 return CombineTo(N, Res, Res); 2374 } 2375 2376 // If the low half is not needed, just compute the high half. 2377 bool LoExists = N->hasAnyUseOfValue(0); 2378 if (!LoExists && 2379 (!LegalOperations \|\| 2380 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2381 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2382 return CombineTo(N, Res, Res); 2383 } 2384 2385 // If both halves are used, return as it is. 2386 if (LoExists && HiExists) 2387 return SDValue(); 2388 2389 // If the two computed results can be simplified separately, separate them. 2390 if (LoExists) { 2391 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2392 AddToWorklist(Lo.getNode()); 2393 SDValue LoOpt = combine(Lo.getNode()); 2394 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2395 (!LegalOperations \|\| 2396 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2397 return CombineTo(N, LoOpt, LoOpt); 2398 } 2399 2400 if (HiExists) { 2401 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2402 AddToWorklist(Hi.getNode()); 2403 SDValue HiOpt = combine(Hi.getNode()); 2404 if (HiOpt.getNode() && HiOpt != Hi && 2405 (!LegalOperations \|\| 2406 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2407 return CombineTo(N, HiOpt, HiOpt); 2408 } 2409 2410 return SDValue(); 2411} 2412 2413SDValue DAGCombiner::visitSMUL_LOHI(SDNode N) { 2414* SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2415 if (Res.getNode()) return Res; 2416 2417 EVT VT = N->getValueType(0); 2418 SDLoc DL(N); 2419 2420 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2421 // plus a shift. 2422 if (VT.isSimple() && !VT.isVector()) { 2423 MVT Simple = VT.getSimpleVT(); 2424 unsigned SimpleSize = Simple.getSizeInBits(); 2425 EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2); 2426 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2427 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2428 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2429 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2430 // Compute the high part as N1. 2431 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2432 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2433 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2434 // Compute the low part as N0. 2435 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2436 return CombineTo(N, Lo, Hi); 2437 } 2438 } 2439 2440 return SDValue(); 2441} 2442 2443SDValue DAGCombiner::visitUMUL_LOHI(SDNode N) { 2444* SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2445 if (Res.getNode()) return Res; 2446 2447 EVT VT = N->getValueType(0); 2448 SDLoc DL(N); 2449 2450 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2451 // plus a shift. 2452 if (VT.isSimple() && !VT.isVector()) { 2453 MVT Simple = VT.getSimpleVT(); 2454 unsigned SimpleSize = Simple.getSizeInBits(); 2455 EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2); 2456 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2457 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2458 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2459 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2460 // Compute the high part as N1. 2461 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2462 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2463 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2464 // Compute the low part as N0. 2465 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2466 return CombineTo(N, Lo, Hi); 2467 } 2468 } 2469 2470 return SDValue(); 2471} 2472 2473SDValue DAGCombiner::visitSMULO(SDNode N) { 2474* // (smulo x, 2) -> (saddo x, x) 2475 if (ConstantSDNode C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2476* if (C2->getAPIntValue() == 2) 2477 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2478 N->getOperand(0), N->getOperand(0)); 2479 2480 return SDValue(); 2481} 2482 2483SDValue DAGCombiner::visitUMULO(SDNode N) { 2484* // (umulo x, 2) -> (uaddo x, x) 2485 if (ConstantSDNode C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2486* if (C2->getAPIntValue() == 2) 2487 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2488 N->getOperand(0), N->getOperand(0)); 2489 2490 return SDValue(); 2491} 2492 2493SDValue DAGCombiner::visitSDIVREM(SDNode N) { 2494* SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2495 if (Res.getNode()) return Res; 2496 2497 return SDValue(); 2498} 2499 2500SDValue DAGCombiner::visitUDIVREM(SDNode N) { 2501* SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2502 if (Res.getNode()) return Res; 2503 2504 return SDValue(); 2505} 2506 2507/// If this is a binary operator with two operands of the same opcode, try to 2508/// simplify it. 2509SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode N) { 2510* SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2511 EVT VT = N0.getValueType(); 2512 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2513 2514 // Bail early if none of these transforms apply. 2515 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2516 2517 // For each of OP in AND/OR/XOR: 2518 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2519 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2520 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2521 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 2522 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2523 // 2524 // do not sink logical op inside of a vector extend, since it may combine 2525 // into a vsetcc. 2526 EVT Op0VT = N0.getOperand(0).getValueType(); 2527 if ((N0.getOpcode() == ISD::ZERO_EXTEND \|\| 2528 N0.getOpcode() == ISD::SIGN_EXTEND \|\| 2529 N0.getOpcode() == ISD::BSWAP \|\| 2530 // Avoid infinite looping with PromoteIntBinOp. 2531 (N0.getOpcode() == ISD::ANY_EXTEND && 2532 (!LegalTypes \|\| TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) \|\| 2533 (N0.getOpcode() == ISD::TRUNCATE && 2534 (!TLI.isZExtFree(VT, Op0VT) \|\| 2535 !TLI.isTruncateFree(Op0VT, VT)) && 2536 TLI.isTypeLegal(Op0VT))) && 2537 !VT.isVector() && 2538 Op0VT == N1.getOperand(0).getValueType() && 2539 (!LegalOperations \|\| TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2540 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2541 N0.getOperand(0).getValueType(), 2542 N0.getOperand(0), N1.getOperand(0)); 2543 AddToWorklist(ORNode.getNode()); 2544 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2545 } 2546 2547 // For each of OP in SHL/SRL/SRA/AND... 2548 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2549 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2550 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2551 if ((N0.getOpcode() == ISD::SHL \|\| N0.getOpcode() == ISD::SRL \|\| 2552 N0.getOpcode() == ISD::SRA \|\| N0.getOpcode() == ISD::AND) && 2553 N0.getOperand(1) == N1.getOperand(1)) { 2554 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2555 N0.getOperand(0).getValueType(), 2556 N0.getOperand(0), N1.getOperand(0)); 2557 AddToWorklist(ORNode.getNode()); 2558 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2559 ORNode, N0.getOperand(1)); 2560 } 2561 2562 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2563 // Only perform this optimization after type legalization and before 2564 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2565 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2566 // we don't want to undo this promotion. 2567 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2568 // on scalars. 2569 if ((N0.getOpcode() == ISD::BITCAST \|\| 2570 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2571 Level == AfterLegalizeTypes) { 2572 SDValue In0 = N0.getOperand(0); 2573 SDValue In1 = N1.getOperand(0); 2574 EVT In0Ty = In0.getValueType(); 2575 EVT In1Ty = In1.getValueType(); 2576 SDLoc DL(N); 2577 // If both incoming values are integers, and the original types are the 2578 // same. 2579 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2580 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2581 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2582 AddToWorklist(Op.getNode()); 2583 return BC; 2584 } 2585 } 2586 2587 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2588 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2589 // If both shuffles use the same mask, and both shuffle within a single 2590 // vector, then it is worthwhile to move the swizzle after the operation. 2591 // The type-legalizer generates this pattern when loading illegal 2592 // vector types from memory. In many cases this allows additional shuffle 2593 // optimizations. 2594 // There are other cases where moving the shuffle after the xor/and/or 2595 // is profitable even if shuffles don't perform a swizzle. 2596 // If both shuffles use the same mask, and both shuffles have the same first 2597 // or second operand, then it might still be profitable to move the shuffle 2598 // after the xor/and/or operation. 2599 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2600 ShuffleVectorSDNode SVN0 = cast<ShuffleVectorSDNode>(N0); 2601* ShuffleVectorSDNode SVN1 = cast<ShuffleVectorSDNode>(N1); 2602* 2603 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2604 "Inputs to shuffles are not the same type"); 2605 2606 // Check that both shuffles use the same mask. The masks are known to be of 2607 // the same length because the result vector type is the same. 2608 // Check also that shuffles have only one use to avoid introducing extra 2609 // instructions. 2610 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2611 SVN0->getMask().equals(SVN1->getMask())) { 2612 SDValue ShOp = N0->getOperand(1); 2613 2614 // Don't try to fold this node if it requires introducing a 2615 // build vector of all zeros that might be illegal at this stage. 2616 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2617 if (!LegalTypes) 2618 ShOp = DAG.getConstant(0, VT); 2619 else 2620 ShOp = SDValue(); 2621 } 2622 2623 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2624 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2625 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2626 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2627 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2628 N0->getOperand(0), N1->getOperand(0)); 2629 AddToWorklist(NewNode.getNode()); 2630 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2631 &SVN0->getMask()[0]); 2632 } 2633 2634 // Don't try to fold this node if it requires introducing a 2635 // build vector of all zeros that might be illegal at this stage. 2636 ShOp = N0->getOperand(0); 2637 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2638 if (!LegalTypes) 2639 ShOp = DAG.getConstant(0, VT); 2640 else 2641 ShOp = SDValue(); 2642 } 2643 2644 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2645 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2646 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2647 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2648 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2649 N0->getOperand(1), N1->getOperand(1)); 2650 AddToWorklist(NewNode.getNode()); 2651 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2652 &SVN0->getMask()[0]); 2653 } 2654 } 2655 } 2656 2657 return SDValue(); 2658} 2659 2660SDValue DAGCombiner::visitAND(SDNode N) { 2661* SDValue N0 = N->getOperand(0); 2662 SDValue N1 = N->getOperand(1); 2663 SDValue LL, LR, RL, RR, CC0, CC1; 2664 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 2665* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 2666* EVT VT = N1.getValueType(); 2667 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2668 2669 // fold vector ops 2670 if (VT.isVector()) { 2671 SDValue FoldedVOp = SimplifyVBinOp(N); 2672 if (FoldedVOp.getNode()) return FoldedVOp; 2673 2674 // fold (and x, 0) -> 0, vector edition 2675 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2676 // do not return N0, because undef node may exist in N0 2677 return DAG.getConstant( 2678 APInt::getNullValue( 2679 N0.getValueType().getScalarType().getSizeInBits()), 2680 N0.getValueType()); 2681 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2682 // do not return N1, because undef node may exist in N1 2683 return DAG.getConstant( 2684 APInt::getNullValue( 2685 N1.getValueType().getScalarType().getSizeInBits()), 2686 N1.getValueType()); 2687 2688 // fold (and x, -1) -> x, vector edition 2689 if (ISD::isBuildVectorAllOnes(N0.getNode())) 2690 return N1; 2691 if (ISD::isBuildVectorAllOnes(N1.getNode())) 2692 return N0; 2693 } 2694 2695 // fold (and x, undef) -> 0 2696 if (N0.getOpcode() == ISD::UNDEF \|\| N1.getOpcode() == ISD::UNDEF) 2697 return DAG.getConstant(0, VT); 2698 // fold (and c1, c2) -> c1&c2 2699 if (N0C && N1C) 2700 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2701 // canonicalize constant to RHS 2702 if (N0C && !N1C) 2703 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 2704 // fold (and x, -1) -> x 2705 if (N1C && N1C->isAllOnesValue()) 2706 return N0; 2707 // if (and x, c) is known to be zero, return 0 2708 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2709 APInt::getAllOnesValue(BitWidth))) 2710 return DAG.getConstant(0, VT); 2711 // reassociate and 2712 SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); 2713 if (RAND.getNode()) 2714 return RAND; 2715 // fold (and (or x, C), D) -> D if (C & D) == D 2716 if (N1C && N0.getOpcode() == ISD::OR) 2717 if (ConstantSDNode ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2718* if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2719 return N1; 2720 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2721 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2722 SDValue N0Op0 = N0.getOperand(0); 2723 APInt Mask = ~N1C->getAPIntValue(); 2724 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2725 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2726 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 2727 N0.getValueType(), N0Op0); 2728 2729 // Replace uses of the AND with uses of the Zero extend node. 2730 CombineTo(N, Zext); 2731 2732 // We actually want to replace all uses of the any_extend with the 2733 // zero_extend, to avoid duplicating things. This will later cause this 2734 // AND to be folded. 2735 CombineTo(N0.getNode(), Zext); 2736 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2737 } 2738 } 2739 // similarly fold (and (X (load ([non_ext\|any_ext\|zero_ext] V))), c) -> 2740 // (X (load ([non_ext\|zero_ext] V))) if 'and' only clears top bits which must 2741 // already be zero by virtue of the width of the base type of the load. 2742 // 2743 // the 'X' node here can either be nothing or an extract_vector_elt to catch 2744 // more cases. 2745 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 2746 N0.getOperand(0).getOpcode() == ISD::LOAD) \|\| 2747 N0.getOpcode() == ISD::LOAD) { 2748 LoadSDNode Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 2749* N0 : N0.getOperand(0) ); 2750 2751 // Get the constant (if applicable) the zero'th operand is being ANDed with. 2752 // This can be a pure constant or a vector splat, in which case we treat the 2753 // vector as a scalar and use the splat value. 2754 APInt Constant = APInt::getNullValue(1); 2755 if (const ConstantSDNode C = dyn_cast<ConstantSDNode>(N1)) { 2756* Constant = C->getAPIntValue(); 2757 } else if (BuildVectorSDNode Vector = dyn_cast<BuildVectorSDNode>(N1)) { 2758* APInt SplatValue, SplatUndef; 2759 unsigned SplatBitSize; 2760 bool HasAnyUndefs; 2761 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 2762 SplatBitSize, HasAnyUndefs); 2763 if (IsSplat) { 2764 // Undef bits can contribute to a possible optimisation if set, so 2765 // set them. 2766 SplatValue \|= SplatUndef; 2767 2768 // The splat value may be something like "0x00FFFFFF", which means 0 for 2769 // the first vector value and FF for the rest, repeating. We need a mask 2770 // that will apply equally to all members of the vector, so AND all the 2771 // lanes of the constant together. 2772 EVT VT = Vector->getValueType(0); 2773 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 2774 2775 // If the splat value has been compressed to a bitlength lower 2776 // than the size of the vector lane, we need to re-expand it to 2777 // the lane size. 2778 if (BitWidth > SplatBitSize) 2779 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 2780 SplatBitSize < BitWidth; 2781 SplatBitSize = SplatBitSize * 2) 2782 SplatValue \|= SplatValue.shl(SplatBitSize); 2783
2791 Constant = APInt::getAllOnesValue(BitWidth); 2792 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 2793 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);	2784 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a 2785 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. 2786 if (SplatBitSize % BitWidth == 0) { 2787 Constant = APInt::getAllOnesValue(BitWidth); 2788 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 2789 Constant &= SplatValue.lshr(iBitWidth).zextOrTrunc(BitWidth); 2790* }
2794 } 2795 } 2796 2797 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 2798 // actually legal and isn't going to get expanded, else this is a false 2799 // optimisation. 2800 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 2801 Load->getValueType(0), 2802 Load->getMemoryVT()); 2803 2804 // Resize the constant to the same size as the original memory access before 2805 // extension. If it is still the AllOnesValue then this AND is completely 2806 // unneeded. 2807 Constant = 2808 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 2809 2810 bool B; 2811 switch (Load->getExtensionType()) { 2812 default: B = false; break; 2813 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 2814 case ISD::ZEXTLOAD: 2815 case ISD::NON_EXTLOAD: B = true; break; 2816 } 2817 2818 if (B && Constant.isAllOnesValue()) { 2819 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 2820 // preserve semantics once we get rid of the AND. 2821 SDValue NewLoad(Load, 0); 2822 if (Load->getExtensionType() == ISD::EXTLOAD) { 2823 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 2824 Load->getValueType(0), SDLoc(Load), 2825 Load->getChain(), Load->getBasePtr(), 2826 Load->getOffset(), Load->getMemoryVT(), 2827 Load->getMemOperand()); 2828 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 2829 if (Load->getNumValues() == 3) { 2830 // PRE/POST_INC loads have 3 values. 2831 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 2832 NewLoad.getValue(2) }; 2833 CombineTo(Load, To, 3, true); 2834 } else { 2835 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 2836 } 2837 } 2838 2839 // Fold the AND away, taking care not to fold to the old load node if we 2840 // replaced it. 2841 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 2842 2843 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2844 } 2845 } 2846 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2847 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2848 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2849 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2850 2851 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2852 LL.getValueType().isInteger()) { 2853 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2854 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2855 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2856 LR.getValueType(), LL, RL); 2857 AddToWorklist(ORNode.getNode()); 2858 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 2859 } 2860 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2861 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2862 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2863 LR.getValueType(), LL, RL); 2864 AddToWorklist(ANDNode.getNode()); 2865 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 2866 } 2867 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2868 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2869 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2870 LR.getValueType(), LL, RL); 2871 AddToWorklist(ORNode.getNode()); 2872 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 2873 } 2874 } 2875 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2876 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2877 Op0 == Op1 && LL.getValueType().isInteger() && 2878 Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && 2879 cast<ConstantSDNode>(RR)->isAllOnesValue()) \|\| 2880 (cast<ConstantSDNode>(LR)->isAllOnesValue() && 2881 cast<ConstantSDNode>(RR)->isNullValue()))) { 2882 SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), 2883 LL, DAG.getConstant(1, LL.getValueType())); 2884 AddToWorklist(ADDNode.getNode()); 2885 return DAG.getSetCC(SDLoc(N), VT, ADDNode, 2886 DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); 2887 } 2888 // canonicalize equivalent to ll == rl 2889 if (LL == RR && LR == RL) { 2890 Op1 = ISD::getSetCCSwappedOperands(Op1); 2891 std::swap(RL, RR); 2892 } 2893 if (LL == RL && LR == RR) { 2894 bool isInteger = LL.getValueType().isInteger(); 2895 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2896 if (Result != ISD::SETCC_INVALID && 2897 (!LegalOperations \|\| 2898 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2899 TLI.isOperationLegal(ISD::SETCC, 2900 getSetCCResultType(N0.getSimpleValueType()))))) 2901 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 2902 LL, LR, Result); 2903 } 2904 } 2905 2906 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 2907 if (N0.getOpcode() == N1.getOpcode()) { 2908 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2909 if (Tmp.getNode()) return Tmp; 2910 } 2911 2912 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 2913 // fold (and (sra)) -> (and (srl)) when possible. 2914 if (!VT.isVector() && 2915 SimplifyDemandedBits(SDValue(N, 0))) 2916 return SDValue(N, 0); 2917 2918 // fold (zext_inreg (extload x)) -> (zextload x) 2919 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 2920 LoadSDNode LN0 = cast<LoadSDNode>(N0); 2921* EVT MemVT = LN0->getMemoryVT(); 2922 // If we zero all the possible extended bits, then we can turn this into 2923 // a zextload if we are running before legalize or the operation is legal. 2924 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2925 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2926 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2927 ((!LegalOperations && !LN0->isVolatile()) \|\| 2928 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 2929 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 2930 LN0->getChain(), LN0->getBasePtr(), 2931 MemVT, LN0->getMemOperand()); 2932 AddToWorklist(N); 2933 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2934 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2935 } 2936 } 2937 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 2938 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 2939 N0.hasOneUse()) { 2940 LoadSDNode LN0 = cast<LoadSDNode>(N0); 2941* EVT MemVT = LN0->getMemoryVT(); 2942 // If we zero all the possible extended bits, then we can turn this into 2943 // a zextload if we are running before legalize or the operation is legal. 2944 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2945 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2946 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2947 ((!LegalOperations && !LN0->isVolatile()) \|\| 2948 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 2949 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 2950 LN0->getChain(), LN0->getBasePtr(), 2951 MemVT, LN0->getMemOperand()); 2952 AddToWorklist(N); 2953 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2954 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2955 } 2956 } 2957 2958 // fold (and (load x), 255) -> (zextload x, i8) 2959 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2960 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2961 if (N1C && (N0.getOpcode() == ISD::LOAD \|\| 2962 (N0.getOpcode() == ISD::ANY_EXTEND && 2963 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2964 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2965 LoadSDNode LN0 = HasAnyExt 2966* ? cast<LoadSDNode>(N0.getOperand(0)) 2967 : cast<LoadSDNode>(N0); 2968 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2969 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { 2970 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2971 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2972 EVT ExtVT = EVT::getIntegerVT(DAG.getContext(), ActiveBits); 2973* EVT LoadedVT = LN0->getMemoryVT(); 2974 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2975 2976 if (ExtVT == LoadedVT && 2977 (!LegalOperations \|\| TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 2978 ExtVT))) { 2979 2980 SDValue NewLoad = 2981 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 2982 LN0->getChain(), LN0->getBasePtr(), ExtVT, 2983 LN0->getMemOperand()); 2984 AddToWorklist(N); 2985 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 2986 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2987 } 2988 2989 // Do not change the width of a volatile load. 2990 // Do not generate loads of non-round integer types since these can 2991 // be expensive (and would be wrong if the type is not byte sized). 2992 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 2993 (!LegalOperations \|\| TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 2994 ExtVT))) { 2995 EVT PtrType = LN0->getOperand(1).getValueType(); 2996 2997 unsigned Alignment = LN0->getAlignment(); 2998 SDValue NewPtr = LN0->getBasePtr(); 2999 3000 // For big endian targets, we need to add an offset to the pointer 3001 // to load the correct bytes. For little endian systems, we merely 3002 // need to read fewer bytes from the same pointer. 3003 if (TLI.isBigEndian()) { 3004 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 3005 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 3006 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 3007 NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, 3008 NewPtr, DAG.getConstant(PtrOff, PtrType)); 3009 Alignment = MinAlign(Alignment, PtrOff); 3010 } 3011 3012 AddToWorklist(NewPtr.getNode()); 3013 3014 SDValue Load = 3015 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3016 LN0->getChain(), NewPtr, 3017 LN0->getPointerInfo(), 3018 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 3019 LN0->isInvariant(), Alignment, LN0->getAAInfo()); 3020 AddToWorklist(N); 3021 CombineTo(LN0, Load, Load.getValue(1)); 3022 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3023 } 3024 } 3025 } 3026 } 3027 3028 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 3029 VT.getSizeInBits() <= 64) { 3030 if (ConstantSDNode ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3031* APInt ADDC = ADDI->getAPIntValue(); 3032 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3033 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 3034 // immediate for an add, but it is legal if its top c2 bits are set, 3035 // transform the ADD so the immediate doesn't need to be materialized 3036 // in a register. 3037 if (ConstantSDNode SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 3038* APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 3039 SRLI->getZExtValue()); 3040 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 3041 ADDC \|= Mask; 3042 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3043 SDValue NewAdd = 3044 DAG.getNode(ISD::ADD, SDLoc(N0), VT, 3045 N0.getOperand(0), DAG.getConstant(ADDC, VT)); 3046 CombineTo(N0.getNode(), NewAdd); 3047 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3048 } 3049 } 3050 } 3051 } 3052 } 3053 } 3054 3055 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 3056 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 3057 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 3058 N0.getOperand(1), false); 3059 if (BSwap.getNode()) 3060 return BSwap; 3061 } 3062 3063 return SDValue(); 3064} 3065 3066/// Match (a >> 8) \| (a << 8) as (bswap a) >> 16. 3067SDValue DAGCombiner::MatchBSwapHWordLow(SDNode N, SDValue N0, SDValue N1, 3068* bool DemandHighBits) { 3069 if (!LegalOperations) 3070 return SDValue(); 3071 3072 EVT VT = N->getValueType(0); 3073 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 3074 return SDValue(); 3075 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3076 return SDValue(); 3077 3078 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 3079 bool LookPassAnd0 = false; 3080 bool LookPassAnd1 = false; 3081 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 3082 std::swap(N0, N1); 3083 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 3084 std::swap(N0, N1); 3085 if (N0.getOpcode() == ISD::AND) { 3086 if (!N0.getNode()->hasOneUse()) 3087 return SDValue(); 3088 ConstantSDNode N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3089* if (!N01C \|\| N01C->getZExtValue() != 0xFF00) 3090 return SDValue(); 3091 N0 = N0.getOperand(0); 3092 LookPassAnd0 = true; 3093 } 3094 3095 if (N1.getOpcode() == ISD::AND) { 3096 if (!N1.getNode()->hasOneUse()) 3097 return SDValue(); 3098 ConstantSDNode N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3099* if (!N11C \|\| N11C->getZExtValue() != 0xFF) 3100 return SDValue(); 3101 N1 = N1.getOperand(0); 3102 LookPassAnd1 = true; 3103 } 3104 3105 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 3106 std::swap(N0, N1); 3107 if (N0.getOpcode() != ISD::SHL \|\| N1.getOpcode() != ISD::SRL) 3108 return SDValue(); 3109 if (!N0.getNode()->hasOneUse() \|\| 3110 !N1.getNode()->hasOneUse()) 3111 return SDValue(); 3112 3113 ConstantSDNode N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3114* ConstantSDNode N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3115* if (!N01C \|\| !N11C) 3116 return SDValue(); 3117 if (N01C->getZExtValue() != 8 \|\| N11C->getZExtValue() != 8) 3118 return SDValue(); 3119 3120 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 3121 SDValue N00 = N0->getOperand(0); 3122 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 3123 if (!N00.getNode()->hasOneUse()) 3124 return SDValue(); 3125 ConstantSDNode N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 3126* if (!N001C \|\| N001C->getZExtValue() != 0xFF) 3127 return SDValue(); 3128 N00 = N00.getOperand(0); 3129 LookPassAnd0 = true; 3130 } 3131 3132 SDValue N10 = N1->getOperand(0); 3133 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 3134 if (!N10.getNode()->hasOneUse()) 3135 return SDValue(); 3136 ConstantSDNode N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 3137* if (!N101C \|\| N101C->getZExtValue() != 0xFF00) 3138 return SDValue(); 3139 N10 = N10.getOperand(0); 3140 LookPassAnd1 = true; 3141 } 3142 3143 if (N00 != N10) 3144 return SDValue(); 3145 3146 // Make sure everything beyond the low halfword gets set to zero since the SRL 3147 // 16 will clear the top bits. 3148 unsigned OpSizeInBits = VT.getSizeInBits(); 3149 if (DemandHighBits && OpSizeInBits > 16) { 3150 // If the left-shift isn't masked out then the only way this is a bswap is 3151 // if all bits beyond the low 8 are 0. In that case the entire pattern 3152 // reduces to a left shift anyway: leave it for other parts of the combiner. 3153 if (!LookPassAnd0) 3154 return SDValue(); 3155 3156 // However, if the right shift isn't masked out then it might be because 3157 // it's not needed. See if we can spot that too. 3158 if (!LookPassAnd1 && 3159 !DAG.MaskedValueIsZero( 3160 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 3161 return SDValue(); 3162 } 3163 3164 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 3165 if (OpSizeInBits > 16) 3166 Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, 3167 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 3168 return Res; 3169} 3170 3171/// Return true if the specified node is an element that makes up a 32-bit 3172/// packed halfword byteswap. 3173/// ((x & 0x000000ff) << 8) \| 3174/// ((x & 0x0000ff00) >> 8) \| 3175/// ((x & 0x00ff0000) << 8) \| 3176/// ((x & 0xff000000) >> 8) 3177static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode > Parts) { 3178* if (!N.getNode()->hasOneUse()) 3179 return false; 3180 3181 unsigned Opc = N.getOpcode(); 3182 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 3183 return false; 3184 3185 ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3186* if (!N1C) 3187 return false; 3188 3189 unsigned Num; 3190 switch (N1C->getZExtValue()) { 3191 default: 3192 return false; 3193 case 0xFF: Num = 0; break; 3194 case 0xFF00: Num = 1; break; 3195 case 0xFF0000: Num = 2; break; 3196 case 0xFF000000: Num = 3; break; 3197 } 3198 3199 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 3200 SDValue N0 = N.getOperand(0); 3201 if (Opc == ISD::AND) { 3202 if (Num == 0 \|\| Num == 2) { 3203 // (x >> 8) & 0xff 3204 // (x >> 8) & 0xff0000 3205 if (N0.getOpcode() != ISD::SRL) 3206 return false; 3207 ConstantSDNode C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3208* if (!C \|\| C->getZExtValue() != 8) 3209 return false; 3210 } else { 3211 // (x << 8) & 0xff00 3212 // (x << 8) & 0xff000000 3213 if (N0.getOpcode() != ISD::SHL) 3214 return false; 3215 ConstantSDNode C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3216* if (!C \|\| C->getZExtValue() != 8) 3217 return false; 3218 } 3219 } else if (Opc == ISD::SHL) { 3220 // (x & 0xff) << 8 3221 // (x & 0xff0000) << 8 3222 if (Num != 0 && Num != 2) 3223 return false; 3224 ConstantSDNode C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3225* if (!C \|\| C->getZExtValue() != 8) 3226 return false; 3227 } else { // Opc == ISD::SRL 3228 // (x & 0xff00) >> 8 3229 // (x & 0xff000000) >> 8 3230 if (Num != 1 && Num != 3) 3231 return false; 3232 ConstantSDNode C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3233* if (!C \|\| C->getZExtValue() != 8) 3234 return false; 3235 } 3236 3237 if (Parts[Num]) 3238 return false; 3239 3240 Parts[Num] = N0.getOperand(0).getNode(); 3241 return true; 3242} 3243 3244/// Match a 32-bit packed halfword bswap. That is 3245/// ((x & 0x000000ff) << 8) \| 3246/// ((x & 0x0000ff00) >> 8) \| 3247/// ((x & 0x00ff0000) << 8) \| 3248/// ((x & 0xff000000) >> 8) 3249/// => (rotl (bswap x), 16) 3250SDValue DAGCombiner::MatchBSwapHWord(SDNode N, SDValue N0, SDValue N1) { 3251* if (!LegalOperations) 3252 return SDValue(); 3253 3254 EVT VT = N->getValueType(0); 3255 if (VT != MVT::i32) 3256 return SDValue(); 3257 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3258 return SDValue(); 3259 3260 // Look for either 3261 // (or (or (and), (and)), (or (and), (and))) 3262 // (or (or (or (and), (and)), (and)), (and)) 3263 if (N0.getOpcode() != ISD::OR) 3264 return SDValue(); 3265 SDValue N00 = N0.getOperand(0); 3266 SDValue N01 = N0.getOperand(1); 3267 SDNode Parts[4] = {}; 3268* 3269 if (N1.getOpcode() == ISD::OR && 3270 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 3271 // (or (or (and), (and)), (or (and), (and))) 3272 SDValue N000 = N00.getOperand(0); 3273 if (!isBSwapHWordElement(N000, Parts)) 3274 return SDValue(); 3275 3276 SDValue N001 = N00.getOperand(1); 3277 if (!isBSwapHWordElement(N001, Parts)) 3278 return SDValue(); 3279 SDValue N010 = N01.getOperand(0); 3280 if (!isBSwapHWordElement(N010, Parts)) 3281 return SDValue(); 3282 SDValue N011 = N01.getOperand(1); 3283 if (!isBSwapHWordElement(N011, Parts)) 3284 return SDValue(); 3285 } else { 3286 // (or (or (or (and), (and)), (and)), (and)) 3287 if (!isBSwapHWordElement(N1, Parts)) 3288 return SDValue(); 3289 if (!isBSwapHWordElement(N01, Parts)) 3290 return SDValue(); 3291 if (N00.getOpcode() != ISD::OR) 3292 return SDValue(); 3293 SDValue N000 = N00.getOperand(0); 3294 if (!isBSwapHWordElement(N000, Parts)) 3295 return SDValue(); 3296 SDValue N001 = N00.getOperand(1); 3297 if (!isBSwapHWordElement(N001, Parts)) 3298 return SDValue(); 3299 } 3300 3301 // Make sure the parts are all coming from the same node. 3302 if (Parts[0] != Parts[1] \|\| Parts[0] != Parts[2] \|\| Parts[0] != Parts[3]) 3303 return SDValue(); 3304 3305 SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, 3306 SDValue(Parts[0],0)); 3307 3308 // Result of the bswap should be rotated by 16. If it's not legal, then 3309 // do (x << 16) \| (x >> 16). 3310 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 3311 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3312 return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); 3313 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3314 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); 3315 return DAG.getNode(ISD::OR, SDLoc(N), VT, 3316 DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), 3317 DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); 3318} 3319 3320SDValue DAGCombiner::visitOR(SDNode N) { 3321* SDValue N0 = N->getOperand(0); 3322 SDValue N1 = N->getOperand(1); 3323 SDValue LL, LR, RL, RR, CC0, CC1; 3324 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 3325* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 3326* EVT VT = N1.getValueType(); 3327 3328 // fold vector ops 3329 if (VT.isVector()) { 3330 SDValue FoldedVOp = SimplifyVBinOp(N); 3331 if (FoldedVOp.getNode()) return FoldedVOp; 3332 3333 // fold (or x, 0) -> x, vector edition 3334 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3335 return N1; 3336 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3337 return N0; 3338 3339 // fold (or x, -1) -> -1, vector edition 3340 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3341 // do not return N0, because undef node may exist in N0 3342 return DAG.getConstant( 3343 APInt::getAllOnesValue( 3344 N0.getValueType().getScalarType().getSizeInBits()), 3345 N0.getValueType()); 3346 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3347 // do not return N1, because undef node may exist in N1 3348 return DAG.getConstant( 3349 APInt::getAllOnesValue( 3350 N1.getValueType().getScalarType().getSizeInBits()), 3351 N1.getValueType()); 3352 3353 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) 3354 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) 3355 // Do this only if the resulting shuffle is legal. 3356 if (isa<ShuffleVectorSDNode>(N0) && 3357 isa<ShuffleVectorSDNode>(N1) && 3358 // Avoid folding a node with illegal type. 3359 TLI.isTypeLegal(VT) && 3360 N0->getOperand(1) == N1->getOperand(1) && 3361 ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { 3362 bool CanFold = true; 3363 unsigned NumElts = VT.getVectorNumElements(); 3364 const ShuffleVectorSDNode SV0 = cast<ShuffleVectorSDNode>(N0); 3365* const ShuffleVectorSDNode SV1 = cast<ShuffleVectorSDNode>(N1); 3366* // We construct two shuffle masks: 3367 // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand 3368 // and N1 as the second operand. 3369 // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand 3370 // and N0 as the second operand. 3371 // We do this because OR is commutable and therefore there might be 3372 // two ways to fold this node into a shuffle. 3373 SmallVector<int,4> Mask1; 3374 SmallVector<int,4> Mask2; 3375 3376 for (unsigned i = 0; i != NumElts && CanFold; ++i) { 3377 int M0 = SV0->getMaskElt(i); 3378 int M1 = SV1->getMaskElt(i); 3379 3380 // Both shuffle indexes are undef. Propagate Undef. 3381 if (M0 < 0 && M1 < 0) { 3382 Mask1.push_back(M0); 3383 Mask2.push_back(M0); 3384 continue; 3385 } 3386 3387 if (M0 < 0 \|\| M1 < 0 \|\| 3388 (M0 < (int)NumElts && M1 < (int)NumElts) \|\| 3389 (M0 >= (int)NumElts && M1 >= (int)NumElts)) { 3390 CanFold = false; 3391 break; 3392 } 3393 3394 Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); 3395 Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); 3396 } 3397 3398 if (CanFold) { 3399 // Fold this sequence only if the resulting shuffle is 'legal'. 3400 if (TLI.isShuffleMaskLegal(Mask1, VT)) 3401 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), 3402 N1->getOperand(0), &Mask1[0]); 3403 if (TLI.isShuffleMaskLegal(Mask2, VT)) 3404 return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), 3405 N0->getOperand(0), &Mask2[0]); 3406 } 3407 } 3408 } 3409 3410 // fold (or x, undef) -> -1 3411 if (!LegalOperations && 3412 (N0.getOpcode() == ISD::UNDEF \|\| N1.getOpcode() == ISD::UNDEF)) { 3413 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3414 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 3415 } 3416 // fold (or c1, c2) -> c1\|c2 3417 if (N0C && N1C) 3418 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 3419 // canonicalize constant to RHS 3420 if (N0C && !N1C) 3421 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 3422 // fold (or x, 0) -> x 3423 if (N1C && N1C->isNullValue()) 3424 return N0; 3425 // fold (or x, -1) -> -1 3426 if (N1C && N1C->isAllOnesValue()) 3427 return N1; 3428 // fold (or x, c) -> c iff (x & ~c) == 0 3429 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3430 return N1; 3431 3432 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3433 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 3434 if (BSwap.getNode()) 3435 return BSwap; 3436 BSwap = MatchBSwapHWordLow(N, N0, N1); 3437 if (BSwap.getNode()) 3438 return BSwap; 3439 3440 // reassociate or 3441 SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); 3442 if (ROR.getNode()) 3443 return ROR; 3444 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1\|c2) 3445 // iff (c1 & c2) == 0. 3446 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3447 isa<ConstantSDNode>(N0.getOperand(1))) { 3448 ConstantSDNode C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3449* if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { 3450 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) 3451 return DAG.getNode( 3452 ISD::AND, SDLoc(N), VT, 3453 DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); 3454 return SDValue(); 3455 } 3456 } 3457 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3458 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3459 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3460 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3461 3462 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 3463 LL.getValueType().isInteger()) { 3464 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3465 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3466 if (cast<ConstantSDNode>(LR)->isNullValue() && 3467 (Op1 == ISD::SETNE \|\| Op1 == ISD::SETLT)) { 3468 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 3469 LR.getValueType(), LL, RL); 3470 AddToWorklist(ORNode.getNode()); 3471 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 3472 } 3473 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3474 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3475 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 3476 (Op1 == ISD::SETNE \|\| Op1 == ISD::SETGT)) { 3477 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 3478 LR.getValueType(), LL, RL); 3479 AddToWorklist(ANDNode.getNode()); 3480 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 3481 } 3482 } 3483 // canonicalize equivalent to ll == rl 3484 if (LL == RR && LR == RL) { 3485 Op1 = ISD::getSetCCSwappedOperands(Op1); 3486 std::swap(RL, RR); 3487 } 3488 if (LL == RL && LR == RR) { 3489 bool isInteger = LL.getValueType().isInteger(); 3490 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3491 if (Result != ISD::SETCC_INVALID && 3492 (!LegalOperations \|\| 3493 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 3494 TLI.isOperationLegal(ISD::SETCC, 3495 getSetCCResultType(N0.getValueType()))))) 3496 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 3497 LL, LR, Result); 3498 } 3499 } 3500 3501 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3502 if (N0.getOpcode() == N1.getOpcode()) { 3503 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3504 if (Tmp.getNode()) return Tmp; 3505 } 3506 3507 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3508 if (N0.getOpcode() == ISD::AND && 3509 N1.getOpcode() == ISD::AND && 3510 N0.getOperand(1).getOpcode() == ISD::Constant && 3511 N1.getOperand(1).getOpcode() == ISD::Constant && 3512 // Don't increase # computations. 3513 (N0.getNode()->hasOneUse() \|\| N1.getNode()->hasOneUse())) { 3514 // We can only do this xform if we know that bits from X that are set in C2 3515 // but not in C1 are already zero. Likewise for Y. 3516 const APInt &LHSMask = 3517 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3518 const APInt &RHSMask = 3519 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 3520 3521 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3522 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3523 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3524 N0.getOperand(0), N1.getOperand(0)); 3525 return DAG.getNode(ISD::AND, SDLoc(N), VT, X, 3526 DAG.getConstant(LHSMask \| RHSMask, VT)); 3527 } 3528 } 3529 3530 // See if this is some rotate idiom. 3531 if (SDNode Rot = MatchRotate(N0, N1, SDLoc(N))) 3532* return SDValue(Rot, 0); 3533 3534 // Simplify the operands using demanded-bits information. 3535 if (!VT.isVector() && 3536 SimplifyDemandedBits(SDValue(N, 0))) 3537 return SDValue(N, 0); 3538 3539 return SDValue(); 3540} 3541 3542/// Match "(X shl/srl V1) & V2" where V2 may not be present. 3543static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3544 if (Op.getOpcode() == ISD::AND) { 3545 if (isa<ConstantSDNode>(Op.getOperand(1))) { 3546 Mask = Op.getOperand(1); 3547 Op = Op.getOperand(0); 3548 } else { 3549 return false; 3550 } 3551 } 3552 3553 if (Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SHL) { 3554 Shift = Op; 3555 return true; 3556 } 3557 3558 return false; 3559} 3560 3561// Return true if we can prove that, whenever Neg and Pos are both in the 3562// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that 3563// for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 3564// 3565// (or (shift1 X, Neg), (shift2 X, Pos)) 3566// 3567// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 3568// in direction shift1 by Neg. The range [0, OpSize) means that we only need 3569// to consider shift amounts with defined behavior. 3570static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { 3571 // If OpSize is a power of 2 then: 3572 // 3573 // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) 3574 // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). 3575 // 3576 // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check 3577 // for the stronger condition: 3578 // 3579 // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] 3580 // 3581 // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) 3582 // we can just replace Neg with Neg' for the rest of the function. 3583 // 3584 // In other cases we check for the even stronger condition: 3585 // 3586 // Neg == OpSize - Pos [B] 3587 // 3588 // for all Neg and Pos. Note that the (or ...) then invokes undefined 3589 // behavior if Pos == 0 (and consequently Neg == OpSize). 3590 // 3591 // We could actually use [A] whenever OpSize is a power of 2, but the 3592 // only extra cases that it would match are those uninteresting ones 3593 // where Neg and Pos are never in range at the same time. E.g. for 3594 // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 3595 // as well as (sub 32, Pos), but: 3596 // 3597 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 3598 // 3599 // always invokes undefined behavior for 32-bit X. 3600 // 3601 // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. 3602 unsigned MaskLoBits = 0; 3603 if (Neg.getOpcode() == ISD::AND && 3604 isPowerOf2_64(OpSize) && 3605 Neg.getOperand(1).getOpcode() == ISD::Constant && 3606 cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { 3607 Neg = Neg.getOperand(0); 3608 MaskLoBits = Log2_64(OpSize); 3609 } 3610 3611 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 3612 if (Neg.getOpcode() != ISD::SUB) 3613 return 0; 3614 ConstantSDNode NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); 3615* if (!NegC) 3616 return 0; 3617 SDValue NegOp1 = Neg.getOperand(1); 3618 3619 // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with 3620 // Pos'. The truncation is redundant for the purpose of the equality. 3621 if (MaskLoBits && 3622 Pos.getOpcode() == ISD::AND && 3623 Pos.getOperand(1).getOpcode() == ISD::Constant && 3624 cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) 3625 Pos = Pos.getOperand(0); 3626 3627 // The condition we need is now: 3628 // 3629 // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask 3630 // 3631 // If NegOp1 == Pos then we need: 3632 // 3633 // OpSize & Mask == NegC & Mask 3634 // 3635 // (because "x & Mask" is a truncation and distributes through subtraction). 3636 APInt Width; 3637 if (Pos == NegOp1) 3638 Width = NegC->getAPIntValue(); 3639 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 3640 // Then the condition we want to prove becomes: 3641 // 3642 // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask 3643 // 3644 // which, again because "x & Mask" is a truncation, becomes: 3645 // 3646 // NegC & Mask == (OpSize - PosC) & Mask 3647 // OpSize & Mask == (NegC + PosC) & Mask 3648 else if (Pos.getOpcode() == ISD::ADD && 3649 Pos.getOperand(0) == NegOp1 && 3650 Pos.getOperand(1).getOpcode() == ISD::Constant) 3651 Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + 3652 NegC->getAPIntValue()); 3653 else 3654 return false; 3655 3656 // Now we just need to check that OpSize & Mask == Width & Mask. 3657 if (MaskLoBits) 3658 // Opsize & Mask is 0 since Mask is Opsize - 1. 3659 return Width.getLoBits(MaskLoBits) == 0; 3660 return Width == OpSize; 3661} 3662 3663// A subroutine of MatchRotate used once we have found an OR of two opposite 3664// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 3665// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 3666// former being preferred if supported. InnerPos and InnerNeg are Pos and 3667// Neg with outer conversions stripped away. 3668SDNode DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 3669* SDValue Neg, SDValue InnerPos, 3670 SDValue InnerNeg, unsigned PosOpcode, 3671 unsigned NegOpcode, SDLoc DL) { 3672 // fold (or (shl x, (ext y)), 3673* // (srl x, (ext (sub 32, y)))) -> 3674* // (rotl x, y) or (rotr x, (sub 32, y)) 3675 // 3676 // fold (or (shl x, (ext (sub 32, y))), 3677* // (srl x, (ext y))) -> 3678* // (rotr x, y) or (rotl x, (sub 32, y)) 3679 EVT VT = Shifted.getValueType(); 3680 if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { 3681 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 3682 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 3683 HasPos ? Pos : Neg).getNode(); 3684 } 3685 3686 return nullptr; 3687} 3688 3689// MatchRotate - Handle an 'or' of two operands. If this is one of the many 3690// idioms for rotate, and if the target supports rotation instructions, generate 3691// a rot[lr]. 3692SDNode DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { 3693* // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3694 EVT VT = LHS.getValueType(); 3695 if (!TLI.isTypeLegal(VT)) return nullptr; 3696 3697 // The target must have at least one rotate flavor. 3698 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3699 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3700 if (!HasROTL && !HasROTR) return nullptr; 3701 3702 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3703 SDValue LHSShift; // The shift. 3704 SDValue LHSMask; // AND value if any. 3705 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3706 return nullptr; // Not part of a rotate. 3707 3708 SDValue RHSShift; // The shift. 3709 SDValue RHSMask; // AND value if any. 3710 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3711 return nullptr; // Not part of a rotate. 3712 3713 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3714 return nullptr; // Not shifting the same value. 3715 3716 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3717 return nullptr; // Shifts must disagree. 3718 3719 // Canonicalize shl to left side in a shl/srl pair. 3720 if (RHSShift.getOpcode() == ISD::SHL) { 3721 std::swap(LHS, RHS); 3722 std::swap(LHSShift, RHSShift); 3723 std::swap(LHSMask , RHSMask ); 3724 } 3725 3726 unsigned OpSizeInBits = VT.getSizeInBits(); 3727 SDValue LHSShiftArg = LHSShift.getOperand(0); 3728 SDValue LHSShiftAmt = LHSShift.getOperand(1); 3729 SDValue RHSShiftArg = RHSShift.getOperand(0); 3730 SDValue RHSShiftAmt = RHSShift.getOperand(1); 3731 3732 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 3733 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 3734 if (LHSShiftAmt.getOpcode() == ISD::Constant && 3735 RHSShiftAmt.getOpcode() == ISD::Constant) { 3736 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 3737 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 3738 if ((LShVal + RShVal) != OpSizeInBits) 3739 return nullptr; 3740 3741 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3742 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 3743 3744 // If there is an AND of either shifted operand, apply it to the result. 3745 if (LHSMask.getNode() \|\| RHSMask.getNode()) { 3746 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 3747 3748 if (LHSMask.getNode()) { 3749 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 3750 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() \| RHSBits; 3751 } 3752 if (RHSMask.getNode()) { 3753 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 3754 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() \| LHSBits; 3755 } 3756 3757 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 3758 } 3759 3760 return Rot.getNode(); 3761 } 3762 3763 // If there is a mask here, and we have a variable shift, we can't be sure 3764 // that we're masking out the right stuff. 3765 if (LHSMask.getNode() \|\| RHSMask.getNode()) 3766 return nullptr; 3767 3768 // If the shift amount is sign/zext/any-extended just peel it off. 3769 SDValue LExtOp0 = LHSShiftAmt; 3770 SDValue RExtOp0 = RHSShiftAmt; 3771 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND \|\| 3772 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND \|\| 3773 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND \|\| 3774 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3775 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND \|\| 3776 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND \|\| 3777 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND \|\| 3778 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3779 LExtOp0 = LHSShiftAmt.getOperand(0); 3780 RExtOp0 = RHSShiftAmt.getOperand(0); 3781 } 3782 3783 SDNode TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 3784* LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 3785 if (TryL) 3786 return TryL; 3787 3788 SDNode TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 3789* RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 3790 if (TryR) 3791 return TryR; 3792 3793 return nullptr; 3794} 3795 3796SDValue DAGCombiner::visitXOR(SDNode N) { 3797* SDValue N0 = N->getOperand(0); 3798 SDValue N1 = N->getOperand(1); 3799 SDValue LHS, RHS, CC; 3800 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 3801* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 3802* EVT VT = N0.getValueType(); 3803 3804 // fold vector ops 3805 if (VT.isVector()) { 3806 SDValue FoldedVOp = SimplifyVBinOp(N); 3807 if (FoldedVOp.getNode()) return FoldedVOp; 3808 3809 // fold (xor x, 0) -> x, vector edition 3810 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3811 return N1; 3812 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3813 return N0; 3814 } 3815 3816 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3817 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3818 return DAG.getConstant(0, VT); 3819 // fold (xor x, undef) -> undef 3820 if (N0.getOpcode() == ISD::UNDEF) 3821 return N0; 3822 if (N1.getOpcode() == ISD::UNDEF) 3823 return N1; 3824 // fold (xor c1, c2) -> c1^c2 3825 if (N0C && N1C) 3826 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 3827 // canonicalize constant to RHS 3828 if (N0C && !N1C) 3829 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 3830 // fold (xor x, 0) -> x 3831 if (N1C && N1C->isNullValue()) 3832 return N0; 3833 // reassociate xor 3834 SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); 3835 if (RXOR.getNode()) 3836 return RXOR; 3837 3838 // fold !(x cc y) -> (x !cc y) 3839 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 3840 bool isInt = LHS.getValueType().isInteger(); 3841 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3842 isInt); 3843 3844 if (!LegalOperations \|\| 3845 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 3846 switch (N0.getOpcode()) { 3847 default: 3848 llvm_unreachable("Unhandled SetCC Equivalent!"); 3849 case ISD::SETCC: 3850 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 3851 case ISD::SELECT_CC: 3852 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 3853 N0.getOperand(3), NotCC); 3854 } 3855 } 3856 } 3857 3858 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 3859 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 3860 N0.getNode()->hasOneUse() && 3861 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 3862 SDValue V = N0.getOperand(0); 3863 V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, 3864 DAG.getConstant(1, V.getValueType())); 3865 AddToWorklist(V.getNode()); 3866 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 3867 } 3868 3869 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 3870 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 3871 (N0.getOpcode() == ISD::OR \|\| N0.getOpcode() == ISD::AND)) { 3872 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3873 if (isOneUseSetCC(RHS) \|\| isOneUseSetCC(LHS)) { 3874 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3875 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3876 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3877 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 3878 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3879 } 3880 } 3881 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 3882 if (N1C && N1C->isAllOnesValue() && 3883 (N0.getOpcode() == ISD::OR \|\| N0.getOpcode() == ISD::AND)) { 3884 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3885 if (isa<ConstantSDNode>(RHS) \|\| isa<ConstantSDNode>(LHS)) { 3886 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3887 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3888 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3889 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 3890 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3891 } 3892 } 3893 // fold (xor (and x, y), y) -> (and (not x), y) 3894 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3895 N0->getOperand(1) == N1) { 3896 SDValue X = N0->getOperand(0); 3897 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 3898 AddToWorklist(NotX.getNode()); 3899 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 3900 } 3901 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 3902 if (N1C && N0.getOpcode() == ISD::XOR) { 3903 ConstantSDNode N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 3904* ConstantSDNode N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3905* if (N00C) 3906 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), 3907 DAG.getConstant(N1C->getAPIntValue() ^ 3908 N00C->getAPIntValue(), VT)); 3909 if (N01C) 3910 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), 3911 DAG.getConstant(N1C->getAPIntValue() ^ 3912 N01C->getAPIntValue(), VT)); 3913 } 3914 // fold (xor x, x) -> 0 3915 if (N0 == N1) 3916 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 3917 3918 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 3919 if (N0.getOpcode() == N1.getOpcode()) { 3920 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3921 if (Tmp.getNode()) return Tmp; 3922 } 3923 3924 // Simplify the expression using non-local knowledge. 3925 if (!VT.isVector() && 3926 SimplifyDemandedBits(SDValue(N, 0))) 3927 return SDValue(N, 0); 3928 3929 return SDValue(); 3930} 3931 3932/// Handle transforms common to the three shifts, when the shift amount is a 3933/// constant. 3934SDValue DAGCombiner::visitShiftByConstant(SDNode N, ConstantSDNode Amt) { 3935 // We can't and shouldn't fold opaque constants. 3936 if (Amt->isOpaque()) 3937 return SDValue(); 3938 3939 SDNode LHS = N->getOperand(0).getNode(); 3940* if (!LHS->hasOneUse()) return SDValue(); 3941 3942 // We want to pull some binops through shifts, so that we have (and (shift)) 3943 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 3944 // thing happens with address calculations, so it's important to canonicalize 3945 // it. 3946 bool HighBitSet = false; // Can we transform this if the high bit is set? 3947 3948 switch (LHS->getOpcode()) { 3949 default: return SDValue(); 3950 case ISD::OR: 3951 case ISD::XOR: 3952 HighBitSet = false; // We can only transform sra if the high bit is clear. 3953 break; 3954 case ISD::AND: 3955 HighBitSet = true; // We can only transform sra if the high bit is set. 3956 break; 3957 case ISD::ADD: 3958 if (N->getOpcode() != ISD::SHL) 3959 return SDValue(); // only shl(add) not sr[al](add). 3960 HighBitSet = false; // We can only transform sra if the high bit is clear. 3961 break; 3962 } 3963 3964 // We require the RHS of the binop to be a constant and not opaque as well. 3965 ConstantSDNode BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 3966* if (!BinOpCst \|\| BinOpCst->isOpaque()) return SDValue(); 3967 3968 // FIXME: disable this unless the input to the binop is a shift by a constant. 3969 // If it is not a shift, it pessimizes some common cases like: 3970 // 3971 // void foo(int X, int i) { X[i & 1235] = 1; } 3972* // int bar(int X, int i) { return X[i & 255]; } 3973* SDNode BinOpLHSVal = LHS->getOperand(0).getNode(); 3974* if ((BinOpLHSVal->getOpcode() != ISD::SHL && 3975 BinOpLHSVal->getOpcode() != ISD::SRA && 3976 BinOpLHSVal->getOpcode() != ISD::SRL) \|\| 3977 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 3978 return SDValue(); 3979 3980 EVT VT = N->getValueType(0); 3981 3982 // If this is a signed shift right, and the high bit is modified by the 3983 // logical operation, do not perform the transformation. The highBitSet 3984 // boolean indicates the value of the high bit of the constant which would 3985 // cause it to be modified for this operation. 3986 if (N->getOpcode() == ISD::SRA) { 3987 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 3988 if (BinOpRHSSignSet != HighBitSet) 3989 return SDValue(); 3990 } 3991 3992 if (!TLI.isDesirableToCommuteWithShift(LHS)) 3993 return SDValue(); 3994 3995 // Fold the constants, shifting the binop RHS by the shift amount. 3996 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 3997 N->getValueType(0), 3998 LHS->getOperand(1), N->getOperand(1)); 3999 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 4000 4001 // Create the new shift. 4002 SDValue NewShift = DAG.getNode(N->getOpcode(), 4003 SDLoc(LHS->getOperand(0)), 4004 VT, LHS->getOperand(0), N->getOperand(1)); 4005 4006 // Create the new binop. 4007 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 4008} 4009 4010SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode N) { 4011* assert(N->getOpcode() == ISD::TRUNCATE); 4012 assert(N->getOperand(0).getOpcode() == ISD::AND); 4013 4014 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 4015 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 4016 SDValue N01 = N->getOperand(0).getOperand(1); 4017 4018 if (ConstantSDNode N01C = isConstOrConstSplat(N01)) { 4019* EVT TruncVT = N->getValueType(0); 4020 SDValue N00 = N->getOperand(0).getOperand(0); 4021 APInt TruncC = N01C->getAPIntValue(); 4022 TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); 4023 4024 return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, 4025 DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), 4026 DAG.getConstant(TruncC, TruncVT)); 4027 } 4028 } 4029 4030 return SDValue(); 4031} 4032 4033SDValue DAGCombiner::visitRotate(SDNode N) { 4034* // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 4035 if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && 4036 N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { 4037 SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); 4038 if (NewOp1.getNode()) 4039 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 4040 N->getOperand(0), NewOp1); 4041 } 4042 return SDValue(); 4043} 4044 4045SDValue DAGCombiner::visitSHL(SDNode N) { 4046* SDValue N0 = N->getOperand(0); 4047 SDValue N1 = N->getOperand(1); 4048 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 4049* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 4050* EVT VT = N0.getValueType(); 4051 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 4052 4053 // fold vector ops 4054 if (VT.isVector()) { 4055 SDValue FoldedVOp = SimplifyVBinOp(N); 4056 if (FoldedVOp.getNode()) return FoldedVOp; 4057 4058 BuildVectorSDNode N1CV = dyn_cast<BuildVectorSDNode>(N1); 4059* // If setcc produces all-one true value then: 4060 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 4061 if (N1CV && N1CV->isConstant()) { 4062 if (N0.getOpcode() == ISD::AND) { 4063 SDValue N00 = N0->getOperand(0); 4064 SDValue N01 = N0->getOperand(1); 4065 BuildVectorSDNode N01CV = dyn_cast<BuildVectorSDNode>(N01); 4066* 4067 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 4068 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 4069 TargetLowering::ZeroOrNegativeOneBooleanContent) { 4070 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) 4071 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 4072 } 4073 } else { 4074 N1C = isConstOrConstSplat(N1); 4075 } 4076 } 4077 } 4078 4079 // fold (shl c1, c2) -> c1<<c2 4080 if (N0C && N1C) 4081 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 4082 // fold (shl 0, x) -> 0 4083 if (N0C && N0C->isNullValue()) 4084 return N0; 4085 // fold (shl x, c >= size(x)) -> undef 4086 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4087 return DAG.getUNDEF(VT); 4088 // fold (shl x, 0) -> x 4089 if (N1C && N1C->isNullValue()) 4090 return N0; 4091 // fold (shl undef, x) -> 0 4092 if (N0.getOpcode() == ISD::UNDEF) 4093 return DAG.getConstant(0, VT); 4094 // if (shl x, c) is known to be zero, return 0 4095 if (DAG.MaskedValueIsZero(SDValue(N, 0), 4096 APInt::getAllOnesValue(OpSizeInBits))) 4097 return DAG.getConstant(0, VT); 4098 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 4099 if (N1.getOpcode() == ISD::TRUNCATE && 4100 N1.getOperand(0).getOpcode() == ISD::AND) { 4101 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4102 if (NewOp1.getNode()) 4103 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 4104 } 4105 4106 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4107 return SDValue(N, 0); 4108 4109 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 4110 if (N1C && N0.getOpcode() == ISD::SHL) { 4111 if (ConstantSDNode N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4112* uint64_t c1 = N0C1->getZExtValue(); 4113 uint64_t c2 = N1C->getZExtValue(); 4114 if (c1 + c2 >= OpSizeInBits) 4115 return DAG.getConstant(0, VT); 4116 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4117 DAG.getConstant(c1 + c2, N1.getValueType())); 4118 } 4119 } 4120 4121 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 4122 // For this to be valid, the second form must not preserve any of the bits 4123 // that are shifted out by the inner shift in the first form. This means 4124 // the outer shift size must be >= the number of bits added by the ext. 4125 // As a corollary, we don't care what kind of ext it is. 4126 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND \|\| 4127 N0.getOpcode() == ISD::ANY_EXTEND \|\| 4128 N0.getOpcode() == ISD::SIGN_EXTEND) && 4129 N0.getOperand(0).getOpcode() == ISD::SHL) { 4130 SDValue N0Op0 = N0.getOperand(0); 4131 if (ConstantSDNode N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4132* uint64_t c1 = N0Op0C1->getZExtValue(); 4133 uint64_t c2 = N1C->getZExtValue(); 4134 EVT InnerShiftVT = N0Op0.getValueType(); 4135 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 4136 if (c2 >= OpSizeInBits - InnerShiftSize) { 4137 if (c1 + c2 >= OpSizeInBits) 4138 return DAG.getConstant(0, VT); 4139 return DAG.getNode(ISD::SHL, SDLoc(N0), VT, 4140 DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, 4141 N0Op0->getOperand(0)), 4142 DAG.getConstant(c1 + c2, N1.getValueType())); 4143 } 4144 } 4145 } 4146 4147 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 4148 // Only fold this if the inner zext has no other uses to avoid increasing 4149 // the total number of instructions. 4150 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 4151 N0.getOperand(0).getOpcode() == ISD::SRL) { 4152 SDValue N0Op0 = N0.getOperand(0); 4153 if (ConstantSDNode N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4154* uint64_t c1 = N0Op0C1->getZExtValue(); 4155 if (c1 < VT.getScalarSizeInBits()) { 4156 uint64_t c2 = N1C->getZExtValue(); 4157 if (c1 == c2) { 4158 SDValue NewOp0 = N0.getOperand(0); 4159 EVT CountVT = NewOp0.getOperand(1).getValueType(); 4160 SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), 4161 NewOp0, DAG.getConstant(c2, CountVT)); 4162 AddToWorklist(NewSHL.getNode()); 4163 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 4164 } 4165 } 4166 } 4167 } 4168 4169 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 4170 // (and (srl x, (sub c1, c2), MASK) 4171 // Only fold this if the inner shift has no other uses -- if it does, folding 4172 // this will increase the total number of instructions. 4173 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4174 if (ConstantSDNode N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4175* uint64_t c1 = N0C1->getZExtValue(); 4176 if (c1 < OpSizeInBits) { 4177 uint64_t c2 = N1C->getZExtValue(); 4178 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 4179 SDValue Shift; 4180 if (c2 > c1) { 4181 Mask = Mask.shl(c2 - c1); 4182 Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4183 DAG.getConstant(c2 - c1, N1.getValueType())); 4184 } else { 4185 Mask = Mask.lshr(c1 - c2); 4186 Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4187 DAG.getConstant(c1 - c2, N1.getValueType())); 4188 } 4189 return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, 4190 DAG.getConstant(Mask, VT)); 4191 } 4192 } 4193 } 4194 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 4195 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 4196 unsigned BitSize = VT.getScalarSizeInBits(); 4197 SDValue HiBitsMask = 4198 DAG.getConstant(APInt::getHighBitsSet(BitSize, 4199 BitSize - N1C->getZExtValue()), VT); 4200 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4201 HiBitsMask); 4202 } 4203 4204 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4205 // Variant of version done on multiply, except mul by a power of 2 is turned 4206 // into a shift. 4207 APInt Val; 4208 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 4209 (isa<ConstantSDNode>(N0.getOperand(1)) \|\| 4210 isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { 4211 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 4212 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 4213 return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); 4214 } 4215 4216 if (N1C) { 4217 SDValue NewSHL = visitShiftByConstant(N, N1C); 4218 if (NewSHL.getNode()) 4219 return NewSHL; 4220 } 4221 4222 return SDValue(); 4223} 4224 4225SDValue DAGCombiner::visitSRA(SDNode N) { 4226* SDValue N0 = N->getOperand(0); 4227 SDValue N1 = N->getOperand(1); 4228 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 4229* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 4230* EVT VT = N0.getValueType(); 4231 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4232 4233 // fold vector ops 4234 if (VT.isVector()) { 4235 SDValue FoldedVOp = SimplifyVBinOp(N); 4236 if (FoldedVOp.getNode()) return FoldedVOp; 4237 4238 N1C = isConstOrConstSplat(N1); 4239 } 4240 4241 // fold (sra c1, c2) -> (sra c1, c2) 4242 if (N0C && N1C) 4243 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 4244 // fold (sra 0, x) -> 0 4245 if (N0C && N0C->isNullValue()) 4246 return N0; 4247 // fold (sra -1, x) -> -1 4248 if (N0C && N0C->isAllOnesValue()) 4249 return N0; 4250 // fold (sra x, (setge c, size(x))) -> undef 4251 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4252 return DAG.getUNDEF(VT); 4253 // fold (sra x, 0) -> x 4254 if (N1C && N1C->isNullValue()) 4255 return N0; 4256 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 4257 // sext_inreg. 4258 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 4259 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 4260 EVT ExtVT = EVT::getIntegerVT(DAG.getContext(), LowBits); 4261* if (VT.isVector()) 4262 ExtVT = EVT::getVectorVT(DAG.getContext(), 4263* ExtVT, VT.getVectorNumElements()); 4264 if ((!LegalOperations \|\| 4265 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 4266 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 4267 N0.getOperand(0), DAG.getValueType(ExtVT)); 4268 } 4269 4270 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 4271 if (N1C && N0.getOpcode() == ISD::SRA) { 4272 if (ConstantSDNode C1 = isConstOrConstSplat(N0.getOperand(1))) { 4273* unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 4274 if (Sum >= OpSizeInBits) 4275 Sum = OpSizeInBits - 1; 4276 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), 4277 DAG.getConstant(Sum, N1.getValueType())); 4278 } 4279 } 4280 4281 // fold (sra (shl X, m), (sub result_size, n)) 4282 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 4283 // result_size - n != m. 4284 // If truncate is free for the target sext(shl) is likely to result in better 4285 // code. 4286 if (N0.getOpcode() == ISD::SHL && N1C) { 4287 // Get the two constanst of the shifts, CN0 = m, CN = n. 4288 const ConstantSDNode N01C = isConstOrConstSplat(N0.getOperand(1)); 4289* if (N01C) { 4290 LLVMContext &Ctx = DAG.getContext(); 4291* // Determine what the truncate's result bitsize and type would be. 4292 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 4293 4294 if (VT.isVector()) 4295 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 4296 4297 // Determine the residual right-shift amount. 4298 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 4299 4300 // If the shift is not a no-op (in which case this should be just a sign 4301 // extend already), the truncated to type is legal, sign_extend is legal 4302 // on that type, and the truncate to that type is both legal and free, 4303 // perform the transform. 4304 if ((ShiftAmt > 0) && 4305 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 4306 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 4307 TLI.isTruncateFree(VT, TruncVT)) { 4308 4309 SDValue Amt = DAG.getConstant(ShiftAmt, 4310 getShiftAmountTy(N0.getOperand(0).getValueType())); 4311 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, 4312 N0.getOperand(0), Amt); 4313 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, 4314 Shift); 4315 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), 4316 N->getValueType(0), Trunc); 4317 } 4318 } 4319 } 4320 4321 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 4322 if (N1.getOpcode() == ISD::TRUNCATE && 4323 N1.getOperand(0).getOpcode() == ISD::AND) { 4324 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4325 if (NewOp1.getNode()) 4326 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 4327 } 4328 4329 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 4330 // if c1 is equal to the number of bits the trunc removes 4331 if (N0.getOpcode() == ISD::TRUNCATE && 4332 (N0.getOperand(0).getOpcode() == ISD::SRL \|\| 4333 N0.getOperand(0).getOpcode() == ISD::SRA) && 4334 N0.getOperand(0).hasOneUse() && 4335 N0.getOperand(0).getOperand(1).hasOneUse() && 4336 N1C) { 4337 SDValue N0Op0 = N0.getOperand(0); 4338 if (ConstantSDNode LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 4339* unsigned LargeShiftVal = LargeShift->getZExtValue(); 4340 EVT LargeVT = N0Op0.getValueType(); 4341 4342 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 4343 SDValue Amt = 4344 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), 4345 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 4346 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, 4347 N0Op0.getOperand(0), Amt); 4348 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); 4349 } 4350 } 4351 } 4352 4353 // Simplify, based on bits shifted out of the LHS. 4354 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4355 return SDValue(N, 0); 4356 4357 4358 // If the sign bit is known to be zero, switch this to a SRL. 4359 if (DAG.SignBitIsZero(N0)) 4360 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 4361 4362 if (N1C) { 4363 SDValue NewSRA = visitShiftByConstant(N, N1C); 4364 if (NewSRA.getNode()) 4365 return NewSRA; 4366 } 4367 4368 return SDValue(); 4369} 4370 4371SDValue DAGCombiner::visitSRL(SDNode N) { 4372* SDValue N0 = N->getOperand(0); 4373 SDValue N1 = N->getOperand(1); 4374 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 4375* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 4376* EVT VT = N0.getValueType(); 4377 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4378 4379 // fold vector ops 4380 if (VT.isVector()) { 4381 SDValue FoldedVOp = SimplifyVBinOp(N); 4382 if (FoldedVOp.getNode()) return FoldedVOp; 4383 4384 N1C = isConstOrConstSplat(N1); 4385 } 4386 4387 // fold (srl c1, c2) -> c1 >>u c2 4388 if (N0C && N1C) 4389 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 4390 // fold (srl 0, x) -> 0 4391 if (N0C && N0C->isNullValue()) 4392 return N0; 4393 // fold (srl x, c >= size(x)) -> undef 4394 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4395 return DAG.getUNDEF(VT); 4396 // fold (srl x, 0) -> x 4397 if (N1C && N1C->isNullValue()) 4398 return N0; 4399 // if (srl x, c) is known to be zero, return 0 4400 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4401 APInt::getAllOnesValue(OpSizeInBits))) 4402 return DAG.getConstant(0, VT); 4403 4404 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 4405 if (N1C && N0.getOpcode() == ISD::SRL) { 4406 if (ConstantSDNode N01C = isConstOrConstSplat(N0.getOperand(1))) { 4407* uint64_t c1 = N01C->getZExtValue(); 4408 uint64_t c2 = N1C->getZExtValue(); 4409 if (c1 + c2 >= OpSizeInBits) 4410 return DAG.getConstant(0, VT); 4411 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4412 DAG.getConstant(c1 + c2, N1.getValueType())); 4413 } 4414 } 4415 4416 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 4417 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 4418 N0.getOperand(0).getOpcode() == ISD::SRL && 4419 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 4420 uint64_t c1 = 4421 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 4422 uint64_t c2 = N1C->getZExtValue(); 4423 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 4424 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 4425 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 4426 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 4427 if (c1 + OpSizeInBits == InnerShiftSize) { 4428 if (c1 + c2 >= InnerShiftSize) 4429 return DAG.getConstant(0, VT); 4430 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, 4431 DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, 4432 N0.getOperand(0)->getOperand(0), 4433 DAG.getConstant(c1 + c2, ShiftCountVT))); 4434 } 4435 } 4436 4437 // fold (srl (shl x, c), c) -> (and x, cst2) 4438 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { 4439 unsigned BitSize = N0.getScalarValueSizeInBits(); 4440 if (BitSize <= 64) { 4441 uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; 4442 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4443 DAG.getConstant(~0ULL >> ShAmt, VT)); 4444 } 4445 } 4446 4447 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 4448 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4449 // Shifting in all undef bits? 4450 EVT SmallVT = N0.getOperand(0).getValueType(); 4451 unsigned BitSize = SmallVT.getScalarSizeInBits(); 4452 if (N1C->getZExtValue() >= BitSize) 4453 return DAG.getUNDEF(VT); 4454 4455 if (!LegalTypes \|\| TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 4456 uint64_t ShiftAmt = N1C->getZExtValue(); 4457 SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, 4458 N0.getOperand(0), 4459 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 4460 AddToWorklist(SmallShift.getNode()); 4461 APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); 4462 return DAG.getNode(ISD::AND, SDLoc(N), VT, 4463 DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), 4464 DAG.getConstant(Mask, VT)); 4465 } 4466 } 4467 4468 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 4469 // bit, which is unmodified by sra. 4470 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 4471 if (N0.getOpcode() == ISD::SRA) 4472 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 4473 } 4474 4475 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 4476 if (N1C && N0.getOpcode() == ISD::CTLZ && 4477 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 4478 APInt KnownZero, KnownOne; 4479 DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); 4480 4481 // If any of the input bits are KnownOne, then the input couldn't be all 4482 // zeros, thus the result of the srl will always be zero. 4483 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 4484 4485 // If all of the bits input the to ctlz node are known to be zero, then 4486 // the result of the ctlz is "32" and the result of the shift is one. 4487 APInt UnknownBits = ~KnownZero; 4488 if (UnknownBits == 0) return DAG.getConstant(1, VT); 4489 4490 // Otherwise, check to see if there is exactly one bit input to the ctlz. 4491 if ((UnknownBits & (UnknownBits - 1)) == 0) { 4492 // Okay, we know that only that the single bit specified by UnknownBits 4493 // could be set on input to the CTLZ node. If this bit is set, the SRL 4494 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 4495 // to an SRL/XOR pair, which is likely to simplify more. 4496 unsigned ShAmt = UnknownBits.countTrailingZeros(); 4497 SDValue Op = N0.getOperand(0); 4498 4499 if (ShAmt) { 4500 Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, 4501 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 4502 AddToWorklist(Op.getNode()); 4503 } 4504 4505 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 4506 Op, DAG.getConstant(1, VT)); 4507 } 4508 } 4509 4510 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 4511 if (N1.getOpcode() == ISD::TRUNCATE && 4512 N1.getOperand(0).getOpcode() == ISD::AND) { 4513 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4514 if (NewOp1.getNode()) 4515 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 4516 } 4517 4518 // fold operands of srl based on knowledge that the low bits are not 4519 // demanded. 4520 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4521 return SDValue(N, 0); 4522 4523 if (N1C) { 4524 SDValue NewSRL = visitShiftByConstant(N, N1C); 4525 if (NewSRL.getNode()) 4526 return NewSRL; 4527 } 4528 4529 // Attempt to convert a srl of a load into a narrower zero-extending load. 4530 SDValue NarrowLoad = ReduceLoadWidth(N); 4531 if (NarrowLoad.getNode()) 4532 return NarrowLoad; 4533 4534 // Here is a common situation. We want to optimize: 4535 // 4536 // %a = ... 4537 // %b = and i32 %a, 2 4538 // %c = srl i32 %b, 1 4539 // brcond i32 %c ... 4540 // 4541 // into 4542 // 4543 // %a = ... 4544 // %b = and %a, 2 4545 // %c = setcc eq %b, 0 4546 // brcond %c ... 4547 // 4548 // However when after the source operand of SRL is optimized into AND, the SRL 4549 // itself may not be optimized further. Look for it and add the BRCOND into 4550 // the worklist. 4551 if (N->hasOneUse()) { 4552 SDNode Use = N->use_begin(); 4553 if (Use->getOpcode() == ISD::BRCOND) 4554 AddToWorklist(Use); 4555 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 4556 // Also look pass the truncate. 4557 Use = Use->use_begin(); 4558* if (Use->getOpcode() == ISD::BRCOND) 4559 AddToWorklist(Use); 4560 } 4561 } 4562 4563 return SDValue(); 4564} 4565 4566SDValue DAGCombiner::visitCTLZ(SDNode N) { 4567* SDValue N0 = N->getOperand(0); 4568 EVT VT = N->getValueType(0); 4569 4570 // fold (ctlz c1) -> c2 4571 if (isa<ConstantSDNode>(N0)) 4572 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 4573 return SDValue(); 4574} 4575 4576SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode N) { 4577* SDValue N0 = N->getOperand(0); 4578 EVT VT = N->getValueType(0); 4579 4580 // fold (ctlz_zero_undef c1) -> c2 4581 if (isa<ConstantSDNode>(N0)) 4582 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4583 return SDValue(); 4584} 4585 4586SDValue DAGCombiner::visitCTTZ(SDNode N) { 4587* SDValue N0 = N->getOperand(0); 4588 EVT VT = N->getValueType(0); 4589 4590 // fold (cttz c1) -> c2 4591 if (isa<ConstantSDNode>(N0)) 4592 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 4593 return SDValue(); 4594} 4595 4596SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode N) { 4597* SDValue N0 = N->getOperand(0); 4598 EVT VT = N->getValueType(0); 4599 4600 // fold (cttz_zero_undef c1) -> c2 4601 if (isa<ConstantSDNode>(N0)) 4602 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4603 return SDValue(); 4604} 4605 4606SDValue DAGCombiner::visitCTPOP(SDNode N) { 4607* SDValue N0 = N->getOperand(0); 4608 EVT VT = N->getValueType(0); 4609 4610 // fold (ctpop c1) -> c2 4611 if (isa<ConstantSDNode>(N0)) 4612 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 4613 return SDValue(); 4614} 4615 4616 4617/// \brief Generate Min/Max node 4618static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, 4619 SDValue True, SDValue False, 4620 ISD::CondCode CC, const TargetLowering &TLI, 4621 SelectionDAG &DAG) { 4622 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 4623 return SDValue(); 4624 4625 switch (CC) { 4626 case ISD::SETOLT: 4627 case ISD::SETOLE: 4628 case ISD::SETLT: 4629 case ISD::SETLE: 4630 case ISD::SETULT: 4631 case ISD::SETULE: { 4632 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; 4633 if (TLI.isOperationLegal(Opcode, VT)) 4634 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4635 return SDValue(); 4636 } 4637 case ISD::SETOGT: 4638 case ISD::SETOGE: 4639 case ISD::SETGT: 4640 case ISD::SETGE: 4641 case ISD::SETUGT: 4642 case ISD::SETUGE: { 4643 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; 4644 if (TLI.isOperationLegal(Opcode, VT)) 4645 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4646 return SDValue(); 4647 } 4648 default: 4649 return SDValue(); 4650 } 4651} 4652 4653SDValue DAGCombiner::visitSELECT(SDNode N) { 4654* SDValue N0 = N->getOperand(0); 4655 SDValue N1 = N->getOperand(1); 4656 SDValue N2 = N->getOperand(2); 4657 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 4658* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 4659* ConstantSDNode N2C = dyn_cast<ConstantSDNode>(N2); 4660* EVT VT = N->getValueType(0); 4661 EVT VT0 = N0.getValueType(); 4662 4663 // fold (select C, X, X) -> X 4664 if (N1 == N2) 4665 return N1; 4666 // fold (select true, X, Y) -> X 4667 if (N0C && !N0C->isNullValue()) 4668 return N1; 4669 // fold (select false, X, Y) -> Y 4670 if (N0C && N0C->isNullValue()) 4671 return N2; 4672 // fold (select C, 1, X) -> (or C, X) 4673 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 4674 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4675 // fold (select C, 0, 1) -> (xor C, 1) 4676 // We can't do this reliably if integer based booleans have different contents 4677 // to floating point based booleans. This is because we can't tell whether we 4678 // have an integer-based boolean or a floating-point-based boolean unless we 4679 // can find the SETCC that produced it and inspect its operands. This is 4680 // fairly easy if C is the SETCC node, but it can potentially be 4681 // undiscoverable (or not reasonably discoverable). For example, it could be 4682 // in another basic block or it could require searching a complicated 4683 // expression. 4684 if (VT.isInteger() && 4685 (VT0 == MVT::i1 \|\| (VT0.isInteger() && 4686 TLI.getBooleanContents(false, false) == 4687 TLI.getBooleanContents(false, true) && 4688 TLI.getBooleanContents(false, false) == 4689 TargetLowering::ZeroOrOneBooleanContent)) && 4690 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 4691 SDValue XORNode; 4692 if (VT == VT0) 4693 return DAG.getNode(ISD::XOR, SDLoc(N), VT0, 4694 N0, DAG.getConstant(1, VT0)); 4695 XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, 4696 N0, DAG.getConstant(1, VT0)); 4697 AddToWorklist(XORNode.getNode()); 4698 if (VT.bitsGT(VT0)) 4699 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); 4700 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); 4701 } 4702 // fold (select C, 0, X) -> (and (not C), X) 4703 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 4704 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4705 AddToWorklist(NOTNode.getNode()); 4706 return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); 4707 } 4708 // fold (select C, X, 1) -> (or (not C), X) 4709 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 4710 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4711 AddToWorklist(NOTNode.getNode()); 4712 return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); 4713 } 4714 // fold (select C, X, 0) -> (and C, X) 4715 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 4716 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4717 // fold (select X, X, Y) -> (or X, Y) 4718 // fold (select X, 1, Y) -> (or X, Y) 4719 if (VT == MVT::i1 && (N0 == N1 \|\| (N1C && N1C->getAPIntValue() == 1))) 4720 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4721 // fold (select X, Y, X) -> (and X, Y) 4722 // fold (select X, Y, 0) -> (and X, Y) 4723 if (VT == MVT::i1 && (N0 == N2 \|\| (N2C && N2C->getAPIntValue() == 0))) 4724 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4725 4726 // If we can fold this based on the true/false value, do so. 4727 if (SimplifySelectOps(N, N1, N2)) 4728 return SDValue(N, 0); // Don't revisit N. 4729 4730 // fold selects based on a setcc into other things, such as min/max/abs 4731 if (N0.getOpcode() == ISD::SETCC) { 4732 // select x, y (fcmp lt x, y) -> fminnum x, y 4733 // select x, y (fcmp gt x, y) -> fmaxnum x, y 4734 // 4735 // This is OK if we don't care about what happens if either operand is a 4736 // NaN. 4737 // 4738 4739 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for 4740 // no signed zeros as well as no nans. 4741 const TargetOptions &Options = DAG.getTarget().Options; 4742 if (Options.UnsafeFPMath && 4743 VT.isFloatingPoint() && N0.hasOneUse() && 4744 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { 4745 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 4746 4747 SDValue FMinMax = 4748 combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), 4749 N1, N2, CC, TLI, DAG); 4750 if (FMinMax) 4751 return FMinMax; 4752 } 4753 4754 if ((!LegalOperations && 4755 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) \|\| 4756 TLI.isOperationLegal(ISD::SELECT_CC, VT)) 4757 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, 4758 N0.getOperand(0), N0.getOperand(1), 4759 N1, N2, N0.getOperand(2)); 4760 return SimplifySelect(SDLoc(N), N0, N1, N2); 4761 } 4762 4763 return SDValue(); 4764} 4765 4766static 4767std::pair<SDValue, SDValue> SplitVSETCC(const SDNode N, SelectionDAG &DAG) { 4768* SDLoc DL(N); 4769 EVT LoVT, HiVT; 4770 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 4771 4772 // Split the inputs. 4773 SDValue Lo, Hi, LL, LH, RL, RH; 4774 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 4775 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 4776 4777 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 4778 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 4779 4780 return std::make_pair(Lo, Hi); 4781} 4782 4783// This function assumes all the vselect's arguments are CONCAT_VECTOR 4784// nodes and that the condition is a BV of ConstantSDNodes (or undefs). 4785static SDValue ConvertSelectToConcatVector(SDNode N, SelectionDAG &DAG) { 4786* SDLoc dl(N); 4787 SDValue Cond = N->getOperand(0); 4788 SDValue LHS = N->getOperand(1); 4789 SDValue RHS = N->getOperand(2); 4790 EVT VT = N->getValueType(0); 4791 int NumElems = VT.getVectorNumElements(); 4792 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 4793 RHS.getOpcode() == ISD::CONCAT_VECTORS && 4794 Cond.getOpcode() == ISD::BUILD_VECTOR); 4795 4796 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about 4797 // binary ones here. 4798 if (LHS->getNumOperands() != 2 \|\| RHS->getNumOperands() != 2) 4799 return SDValue(); 4800 4801 // We're sure we have an even number of elements due to the 4802 // concat_vectors we have as arguments to vselect. 4803 // Skip BV elements until we find one that's not an UNDEF 4804 // After we find an UNDEF element, keep looping until we get to half the 4805 // length of the BV and see if all the non-undef nodes are the same. 4806 ConstantSDNode BottomHalf = nullptr; 4807* for (int i = 0; i < NumElems / 2; ++i) { 4808 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4809 continue; 4810 4811 if (BottomHalf == nullptr) 4812 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4813 else if (Cond->getOperand(i).getNode() != BottomHalf) 4814 return SDValue(); 4815 } 4816 4817 // Do the same for the second half of the BuildVector 4818 ConstantSDNode TopHalf = nullptr; 4819* for (int i = NumElems / 2; i < NumElems; ++i) { 4820 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4821 continue; 4822 4823 if (TopHalf == nullptr) 4824 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4825 else if (Cond->getOperand(i).getNode() != TopHalf) 4826 return SDValue(); 4827 } 4828 4829 assert(TopHalf && BottomHalf && 4830 "One half of the selector was all UNDEFs and the other was all the " 4831 "same value. This should have been addressed before this function."); 4832 return DAG.getNode( 4833 ISD::CONCAT_VECTORS, dl, VT, 4834 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 4835 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 4836} 4837 4838SDValue DAGCombiner::visitMSTORE(SDNode N) { 4839* 4840 if (Level >= AfterLegalizeTypes) 4841 return SDValue(); 4842 4843 MaskedStoreSDNode MST = dyn_cast<MaskedStoreSDNode>(N); 4844* SDValue Mask = MST->getMask(); 4845 SDValue Data = MST->getValue(); 4846 SDLoc DL(N); 4847 4848 // If the MSTORE data type requires splitting and the mask is provided by a 4849 // SETCC, then split both nodes and its operands before legalization. This 4850 // prevents the type legalizer from unrolling SETCC into scalar comparisons 4851 // and enables future optimizations (e.g. min/max pattern matching on X86). 4852 if (Mask.getOpcode() == ISD::SETCC) { 4853 4854 // Check if any splitting is required. 4855 if (TLI.getTypeAction(DAG.getContext(), Data.getValueType()) != 4856* TargetLowering::TypeSplitVector) 4857 return SDValue(); 4858 4859 SDValue MaskLo, MaskHi, Lo, Hi; 4860 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 4861 4862 EVT LoVT, HiVT; 4863 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); 4864 4865 SDValue Chain = MST->getChain(); 4866 SDValue Ptr = MST->getBasePtr(); 4867 4868 EVT MemoryVT = MST->getMemoryVT(); 4869 unsigned Alignment = MST->getOriginalAlignment(); 4870 4871 // if Alignment is equal to the vector size, 4872 // take the half of it for the second part 4873 unsigned SecondHalfAlignment = 4874 (Alignment == Data->getValueType(0).getSizeInBits()/8) ? 4875 Alignment/2 : Alignment; 4876 4877 EVT LoMemVT, HiMemVT; 4878 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 4879 4880 SDValue DataLo, DataHi; 4881 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 4882 4883 MachineMemOperand MMO = DAG.getMachineFunction(). 4884* getMachineMemOperand(MST->getPointerInfo(), 4885 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 4886 Alignment, MST->getAAInfo(), MST->getRanges()); 4887 4888 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, 4889 MST->isTruncatingStore()); 4890 4891 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 4892 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 4893 DAG.getConstant(IncrementSize, Ptr.getValueType())); 4894 4895 MMO = DAG.getMachineFunction(). 4896 getMachineMemOperand(MST->getPointerInfo(), 4897 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), 4898 SecondHalfAlignment, MST->getAAInfo(), 4899 MST->getRanges()); 4900 4901 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, 4902 MST->isTruncatingStore()); 4903 4904 AddToWorklist(Lo.getNode()); 4905 AddToWorklist(Hi.getNode()); 4906 4907 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 4908 } 4909 return SDValue(); 4910} 4911 4912SDValue DAGCombiner::visitMLOAD(SDNode N) { 4913* 4914 if (Level >= AfterLegalizeTypes) 4915 return SDValue(); 4916 4917 MaskedLoadSDNode MLD = dyn_cast<MaskedLoadSDNode>(N); 4918* SDValue Mask = MLD->getMask(); 4919 SDLoc DL(N); 4920 4921 // If the MLOAD result requires splitting and the mask is provided by a 4922 // SETCC, then split both nodes and its operands before legalization. This 4923 // prevents the type legalizer from unrolling SETCC into scalar comparisons 4924 // and enables future optimizations (e.g. min/max pattern matching on X86). 4925 4926 if (Mask.getOpcode() == ISD::SETCC) { 4927 EVT VT = N->getValueType(0); 4928 4929 // Check if any splitting is required. 4930 if (TLI.getTypeAction(DAG.getContext(), VT) != 4931* TargetLowering::TypeSplitVector) 4932 return SDValue(); 4933 4934 SDValue MaskLo, MaskHi, Lo, Hi; 4935 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 4936 4937 SDValue Src0 = MLD->getSrc0(); 4938 SDValue Src0Lo, Src0Hi; 4939 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 4940 4941 EVT LoVT, HiVT; 4942 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 4943 4944 SDValue Chain = MLD->getChain(); 4945 SDValue Ptr = MLD->getBasePtr(); 4946 EVT MemoryVT = MLD->getMemoryVT(); 4947 unsigned Alignment = MLD->getOriginalAlignment(); 4948 4949 // if Alignment is equal to the vector size, 4950 // take the half of it for the second part 4951 unsigned SecondHalfAlignment = 4952 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? 4953 Alignment/2 : Alignment; 4954 4955 EVT LoMemVT, HiMemVT; 4956 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 4957 4958 MachineMemOperand MMO = DAG.getMachineFunction(). 4959* getMachineMemOperand(MLD->getPointerInfo(), 4960 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 4961 Alignment, MLD->getAAInfo(), MLD->getRanges()); 4962 4963 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, 4964 ISD::NON_EXTLOAD); 4965 4966 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 4967 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 4968 DAG.getConstant(IncrementSize, Ptr.getValueType())); 4969 4970 MMO = DAG.getMachineFunction(). 4971 getMachineMemOperand(MLD->getPointerInfo(), 4972 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), 4973 SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); 4974 4975 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, 4976 ISD::NON_EXTLOAD); 4977 4978 AddToWorklist(Lo.getNode()); 4979 AddToWorklist(Hi.getNode()); 4980 4981 // Build a factor node to remember that this load is independent of the 4982 // other one. 4983 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 4984 Hi.getValue(1)); 4985 4986 // Legalized the chain result - switch anything that used the old chain to 4987 // use the new one. 4988 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); 4989 4990 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 4991 4992 SDValue RetOps[] = { LoadRes, Chain }; 4993 return DAG.getMergeValues(RetOps, DL); 4994 } 4995 return SDValue(); 4996} 4997 4998SDValue DAGCombiner::visitVSELECT(SDNode N) { 4999* SDValue N0 = N->getOperand(0); 5000 SDValue N1 = N->getOperand(1); 5001 SDValue N2 = N->getOperand(2); 5002 SDLoc DL(N); 5003 5004 // Canonicalize integer abs. 5005 // vselect (setg[te] X, 0), X, -X -> 5006 // vselect (setgt X, -1), X, -X -> 5007 // vselect (setl[te] X, 0), -X, X -> 5008 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 5009 if (N0.getOpcode() == ISD::SETCC) { 5010 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 5011 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5012 bool isAbs = false; 5013 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 5014 5015 if (((RHSIsAllZeros && (CC == ISD::SETGT \|\| CC == ISD::SETGE)) \|\| 5016 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 5017 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 5018 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 5019 else if ((RHSIsAllZeros && (CC == ISD::SETLT \|\| CC == ISD::SETLE)) && 5020 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 5021 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 5022 5023 if (isAbs) { 5024 EVT VT = LHS.getValueType(); 5025 SDValue Shift = DAG.getNode( 5026 ISD::SRA, DL, VT, LHS, 5027 DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); 5028 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 5029 AddToWorklist(Shift.getNode()); 5030 AddToWorklist(Add.getNode()); 5031 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 5032 } 5033 } 5034 5035 // If the VSELECT result requires splitting and the mask is provided by a 5036 // SETCC, then split both nodes and its operands before legalization. This 5037 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5038 // and enables future optimizations (e.g. min/max pattern matching on X86). 5039 if (N0.getOpcode() == ISD::SETCC) { 5040 EVT VT = N->getValueType(0); 5041 5042 // Check if any splitting is required. 5043 if (TLI.getTypeAction(DAG.getContext(), VT) != 5044* TargetLowering::TypeSplitVector) 5045 return SDValue(); 5046 5047 SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; 5048 std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); 5049 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); 5050 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); 5051 5052 Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); 5053 Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); 5054 5055 // Add the new VSELECT nodes to the work list in case they need to be split 5056 // again. 5057 AddToWorklist(Lo.getNode()); 5058 AddToWorklist(Hi.getNode()); 5059 5060 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5061 } 5062 5063 // Fold (vselect (build_vector all_ones), N1, N2) -> N1 5064 if (ISD::isBuildVectorAllOnes(N0.getNode())) 5065 return N1; 5066 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 5067 if (ISD::isBuildVectorAllZeros(N0.getNode())) 5068 return N2; 5069 5070 // The ConvertSelectToConcatVector function is assuming both the above 5071 // checks for (vselect (build_vector all{ones,zeros) ...) have been made 5072 // and addressed. 5073 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 5074 N2.getOpcode() == ISD::CONCAT_VECTORS && 5075 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 5076 SDValue CV = ConvertSelectToConcatVector(N, DAG); 5077 if (CV.getNode()) 5078 return CV; 5079 } 5080 5081 return SDValue(); 5082} 5083 5084SDValue DAGCombiner::visitSELECT_CC(SDNode N) { 5085* SDValue N0 = N->getOperand(0); 5086 SDValue N1 = N->getOperand(1); 5087 SDValue N2 = N->getOperand(2); 5088 SDValue N3 = N->getOperand(3); 5089 SDValue N4 = N->getOperand(4); 5090 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 5091 5092 // fold select_cc lhs, rhs, x, x, cc -> x 5093 if (N2 == N3) 5094 return N2; 5095 5096 // Determine if the condition we're dealing with is constant 5097 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 5098 N0, N1, CC, SDLoc(N), false); 5099 if (SCC.getNode()) { 5100 AddToWorklist(SCC.getNode()); 5101 5102 if (ConstantSDNode SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { 5103* if (!SCCC->isNullValue()) 5104 return N2; // cond always true -> true val 5105 else 5106 return N3; // cond always false -> false val 5107 } else if (SCC->getOpcode() == ISD::UNDEF) { 5108 // When the condition is UNDEF, just return the first operand. This is 5109 // coherent the DAG creation, no setcc node is created in this case 5110 return N2; 5111 } else if (SCC.getOpcode() == ISD::SETCC) { 5112 // Fold to a simpler select_cc 5113 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 5114 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 5115 SCC.getOperand(2)); 5116 } 5117 } 5118 5119 // If we can fold this based on the true/false value, do so. 5120 if (SimplifySelectOps(N, N2, N3)) 5121 return SDValue(N, 0); // Don't revisit N. 5122 5123 // fold select_cc into other things, such as min/max/abs 5124 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 5125} 5126 5127SDValue DAGCombiner::visitSETCC(SDNode N) { 5128* return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 5129 cast<CondCodeSDNode>(N->getOperand(2))->get(), 5130 SDLoc(N)); 5131} 5132 5133// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext 5134// dag node into a ConstantSDNode or a build_vector of constants. 5135// This function is called by the DAGCombiner when visiting sext/zext/aext 5136// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 5137// Vector extends are not folded if operations are legal; this is to 5138// avoid introducing illegal build_vector dag nodes. 5139static SDNode tryToFoldExtendOfConstant(SDNode N, const TargetLowering &TLI, 5140 SelectionDAG &DAG, bool LegalTypes, 5141 bool LegalOperations) { 5142 unsigned Opcode = N->getOpcode(); 5143 SDValue N0 = N->getOperand(0); 5144 EVT VT = N->getValueType(0); 5145 5146 assert((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| 5147 Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); 5148 5149 // fold (sext c1) -> c1 5150 // fold (zext c1) -> c1 5151 // fold (aext c1) -> c1 5152 if (isa<ConstantSDNode>(N0)) 5153 return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); 5154 5155 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) 5156 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) 5157 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) 5158 EVT SVT = VT.getScalarType(); 5159 if (!(VT.isVector() && 5160 (!LegalTypes \|\| (!LegalOperations && TLI.isTypeLegal(SVT))) && 5161 ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) 5162 return nullptr; 5163 5164 // We can fold this node into a build_vector. 5165 unsigned VTBits = SVT.getSizeInBits(); 5166 unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); 5167 unsigned ShAmt = VTBits - EVTBits; 5168 SmallVector<SDValue, 8> Elts; 5169 unsigned NumElts = N0->getNumOperands(); 5170 SDLoc DL(N); 5171 5172 for (unsigned i=0; i != NumElts; ++i) { 5173 SDValue Op = N0->getOperand(i); 5174 if (Op->getOpcode() == ISD::UNDEF) { 5175 Elts.push_back(DAG.getUNDEF(SVT)); 5176 continue; 5177 } 5178 5179 ConstantSDNode CurrentND = cast<ConstantSDNode>(Op); 5180* const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 5181 if (Opcode == ISD::SIGN_EXTEND) 5182 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 5183 SVT)); 5184 else 5185 Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), 5186 SVT)); 5187 } 5188 5189 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); 5190} 5191 5192// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 5193// "fold ({s\|z\|a}ext (load x)) -> ({s\|z\|a}ext (truncate ({s\|z\|a}extload x)))" 5194// transformation. Returns true if extension are possible and the above 5195// mentioned transformation is profitable. 5196static bool ExtendUsesToFormExtLoad(SDNode N, SDValue N0, 5197* unsigned ExtOpc, 5198 SmallVectorImpl<SDNode > &ExtendNodes, 5199* const TargetLowering &TLI) { 5200 bool HasCopyToRegUses = false; 5201 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 5202 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 5203 UE = N0.getNode()->use_end(); 5204 UI != UE; ++UI) { 5205 SDNode User = UI; 5206 if (User == N) 5207 continue; 5208 if (UI.getUse().getResNo() != N0.getResNo()) 5209 continue; 5210 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 5211 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 5212 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 5213 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 5214 // Sign bits will be lost after a zext. 5215 return false; 5216 bool Add = false; 5217 for (unsigned i = 0; i != 2; ++i) { 5218 SDValue UseOp = User->getOperand(i); 5219 if (UseOp == N0) 5220 continue; 5221 if (!isa<ConstantSDNode>(UseOp)) 5222 return false; 5223 Add = true; 5224 } 5225 if (Add) 5226 ExtendNodes.push_back(User); 5227 continue; 5228 } 5229 // If truncates aren't free and there are users we can't 5230 // extend, it isn't worthwhile. 5231 if (!isTruncFree) 5232 return false; 5233 // Remember if this value is live-out. 5234 if (User->getOpcode() == ISD::CopyToReg) 5235 HasCopyToRegUses = true; 5236 } 5237 5238 if (HasCopyToRegUses) { 5239 bool BothLiveOut = false; 5240 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5241 UI != UE; ++UI) { 5242 SDUse &Use = UI.getUse(); 5243 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 5244 BothLiveOut = true; 5245 break; 5246 } 5247 } 5248 if (BothLiveOut) 5249 // Both unextended and extended values are live out. There had better be 5250 // a good reason for the transformation. 5251 return ExtendNodes.size(); 5252 } 5253 return true; 5254} 5255 5256void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode > &SetCCs, 5257* SDValue Trunc, SDValue ExtLoad, SDLoc DL, 5258 ISD::NodeType ExtType) { 5259 // Extend SetCC uses if necessary. 5260 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 5261 SDNode SetCC = SetCCs[i]; 5262* SmallVector<SDValue, 4> Ops; 5263 5264 for (unsigned j = 0; j != 2; ++j) { 5265 SDValue SOp = SetCC->getOperand(j); 5266 if (SOp == Trunc) 5267 Ops.push_back(ExtLoad); 5268 else 5269 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 5270 } 5271 5272 Ops.push_back(SetCC->getOperand(2)); 5273 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 5274 } 5275} 5276 5277SDValue DAGCombiner::visitSIGN_EXTEND(SDNode N) { 5278* SDValue N0 = N->getOperand(0); 5279 EVT VT = N->getValueType(0); 5280 5281 if (SDNode Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5282* LegalOperations)) 5283 return SDValue(Res, 0); 5284 5285 // fold (sext (sext x)) -> (sext x) 5286 // fold (sext (aext x)) -> (sext x) 5287 if (N0.getOpcode() == ISD::SIGN_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND) 5288 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, 5289 N0.getOperand(0)); 5290 5291 if (N0.getOpcode() == ISD::TRUNCATE) { 5292 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 5293 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 5294 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5295 if (NarrowLoad.getNode()) { 5296 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5297 if (NarrowLoad.getNode() != N0.getNode()) { 5298 CombineTo(N0.getNode(), NarrowLoad); 5299 // CombineTo deleted the truncate, if needed, but not what's under it. 5300 AddToWorklist(oye); 5301 } 5302 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5303 } 5304 5305 // See if the value being truncated is already sign extended. If so, just 5306 // eliminate the trunc/sext pair. 5307 SDValue Op = N0.getOperand(0); 5308 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 5309 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 5310 unsigned DestBits = VT.getScalarType().getSizeInBits(); 5311 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 5312 5313 if (OpBits == DestBits) { 5314 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 5315 // bits, it is already ready. 5316 if (NumSignBits > DestBits-MidBits) 5317 return Op; 5318 } else if (OpBits < DestBits) { 5319 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 5320 // bits, just sext from i32. 5321 if (NumSignBits > OpBits-MidBits) 5322 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); 5323 } else { 5324 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 5325 // bits, just truncate to i32. 5326 if (NumSignBits > OpBits-MidBits) 5327 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5328 } 5329 5330 // fold (sext (truncate x)) -> (sextinreg x). 5331 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 5332 N0.getValueType())) { 5333 if (OpBits < DestBits) 5334 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 5335 else if (OpBits > DestBits) 5336 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 5337 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, 5338 DAG.getValueType(N0.getValueType())); 5339 } 5340 } 5341 5342 // fold (sext (load x)) -> (sext (truncate (sextload x))) 5343 // None of the supported targets knows how to perform load and sign extend 5344 // on vectors in one instruction. We only perform this transformation on 5345 // scalars. 5346 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5347 ISD::isUNINDEXEDLoad(N0.getNode()) && 5348 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) \|\| 5349 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { 5350 bool DoXform = true; 5351 SmallVector<SDNode, 4> SetCCs; 5352* if (!N0.hasOneUse()) 5353 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 5354 if (DoXform) { 5355 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5356* SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5357 LN0->getChain(), 5358 LN0->getBasePtr(), N0.getValueType(), 5359 LN0->getMemOperand()); 5360 CombineTo(N, ExtLoad); 5361 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5362 N0.getValueType(), ExtLoad); 5363 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5364 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5365 ISD::SIGN_EXTEND); 5366 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5367 } 5368 } 5369 5370 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 5371 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 5372 if ((ISD::isSEXTLoad(N0.getNode()) \|\| ISD::isEXTLoad(N0.getNode())) && 5373 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5374 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5375* EVT MemVT = LN0->getMemoryVT(); 5376 if ((!LegalOperations && !LN0->isVolatile()) \|\| 5377 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { 5378 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5379 LN0->getChain(), 5380 LN0->getBasePtr(), MemVT, 5381 LN0->getMemOperand()); 5382 CombineTo(N, ExtLoad); 5383 CombineTo(N0.getNode(), 5384 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5385 N0.getValueType(), ExtLoad), 5386 ExtLoad.getValue(1)); 5387 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5388 } 5389 } 5390 5391 // fold (sext (and/or/xor (load x), cst)) -> 5392 // (and/or/xor (sextload x), (sext cst)) 5393 if ((N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR \|\| 5394 N0.getOpcode() == ISD::XOR) && 5395 isa<LoadSDNode>(N0.getOperand(0)) && 5396 N0.getOperand(1).getOpcode() == ISD::Constant && 5397 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && 5398 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5399 LoadSDNode LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5400* if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { 5401 bool DoXform = true; 5402 SmallVector<SDNode, 4> SetCCs; 5403* if (!N0.hasOneUse()) 5404 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 5405 SetCCs, TLI); 5406 if (DoXform) { 5407 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, 5408 LN0->getChain(), LN0->getBasePtr(), 5409 LN0->getMemoryVT(), 5410 LN0->getMemOperand()); 5411 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5412 Mask = Mask.sext(VT.getSizeInBits()); 5413 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5414 ExtLoad, DAG.getConstant(Mask, VT)); 5415 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5416 SDLoc(N0.getOperand(0)), 5417 N0.getOperand(0).getValueType(), ExtLoad); 5418 CombineTo(N, And); 5419 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5420 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5421 ISD::SIGN_EXTEND); 5422 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5423 } 5424 } 5425 } 5426 5427 if (N0.getOpcode() == ISD::SETCC) { 5428 EVT N0VT = N0.getOperand(0).getValueType(); 5429 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 5430 // Only do this before legalize for now. 5431 if (VT.isVector() && !LegalOperations && 5432 TLI.getBooleanContents(N0VT) == 5433 TargetLowering::ZeroOrNegativeOneBooleanContent) { 5434 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 5435 // of the same size as the compared operands. Only optimize sext(setcc()) 5436 // if this is the case. 5437 EVT SVT = getSetCCResultType(N0VT); 5438 5439 // We know that the # elements of the results is the same as the 5440 // # elements of the compare (and the # elements of the compare result 5441 // for that matter). Check to see that they are the same size. If so, 5442 // we know that the element size of the sext'd result matches the 5443 // element size of the compare operands. 5444 if (VT.getSizeInBits() == SVT.getSizeInBits()) 5445 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5446 N0.getOperand(1), 5447 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5448 5449 // If the desired elements are smaller or larger than the source 5450 // elements we can use a matching integer vector type and then 5451 // truncate/sign extend 5452 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 5453 if (SVT == MatchingVectorType) { 5454 SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, 5455 N0.getOperand(0), N0.getOperand(1), 5456 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5457 return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); 5458 } 5459 } 5460 5461 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) 5462 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 5463 SDValue NegOne = 5464 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 5465 SDValue SCC = 5466 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5467 NegOne, DAG.getConstant(0, VT), 5468 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5469 if (SCC.getNode()) return SCC; 5470 5471 if (!VT.isVector()) { 5472 EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); 5473 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { 5474 SDLoc DL(N); 5475 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5476 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, 5477 N0.getOperand(0), N0.getOperand(1), CC); 5478 return DAG.getSelect(DL, VT, SetCC, 5479 NegOne, DAG.getConstant(0, VT)); 5480 } 5481 } 5482 } 5483 5484 // fold (sext x) -> (zext x) if the sign bit is known zero. 5485 if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 5486 DAG.SignBitIsZero(N0)) 5487 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); 5488 5489 return SDValue(); 5490} 5491 5492// isTruncateOf - If N is a truncate of some other value, return true, record 5493// the value being truncated in Op and which of Op's bits are zero in KnownZero. 5494// This function computes KnownZero to avoid a duplicated call to 5495// computeKnownBits in the caller. 5496static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 5497 APInt &KnownZero) { 5498 APInt KnownOne; 5499 if (N->getOpcode() == ISD::TRUNCATE) { 5500 Op = N->getOperand(0); 5501 DAG.computeKnownBits(Op, KnownZero, KnownOne); 5502 return true; 5503 } 5504 5505 if (N->getOpcode() != ISD::SETCC \|\| N->getValueType(0) != MVT::i1 \|\| 5506 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 5507 return false; 5508 5509 SDValue Op0 = N->getOperand(0); 5510 SDValue Op1 = N->getOperand(1); 5511 assert(Op0.getValueType() == Op1.getValueType()); 5512 5513 ConstantSDNode COp0 = dyn_cast<ConstantSDNode>(Op0); 5514* ConstantSDNode COp1 = dyn_cast<ConstantSDNode>(Op1); 5515* if (COp0 && COp0->isNullValue()) 5516 Op = Op1; 5517 else if (COp1 && COp1->isNullValue()) 5518 Op = Op0; 5519 else 5520 return false; 5521 5522 DAG.computeKnownBits(Op, KnownZero, KnownOne); 5523 5524 if (!(KnownZero \| APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 5525 return false; 5526 5527 return true; 5528} 5529 5530SDValue DAGCombiner::visitZERO_EXTEND(SDNode N) { 5531* SDValue N0 = N->getOperand(0); 5532 EVT VT = N->getValueType(0); 5533 5534 if (SDNode Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5535* LegalOperations)) 5536 return SDValue(Res, 0); 5537 5538 // fold (zext (zext x)) -> (zext x) 5539 // fold (zext (aext x)) -> (zext x) 5540 if (N0.getOpcode() == ISD::ZERO_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND) 5541 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 5542 N0.getOperand(0)); 5543 5544 // fold (zext (truncate x)) -> (zext x) or 5545 // (zext (truncate x)) -> (truncate x) 5546 // This is valid when the truncated bits of x are already zero. 5547 // FIXME: We should extend this to work for vectors too. 5548 SDValue Op; 5549 APInt KnownZero; 5550 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 5551 APInt TruncatedBits = 5552 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 5553 APInt(Op.getValueSizeInBits(), 0) : 5554 APInt::getBitsSet(Op.getValueSizeInBits(), 5555 N0.getValueSizeInBits(), 5556 std::min(Op.getValueSizeInBits(), 5557 VT.getSizeInBits())); 5558 if (TruncatedBits == (KnownZero & TruncatedBits)) { 5559 if (VT.bitsGT(Op.getValueType())) 5560 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); 5561 if (VT.bitsLT(Op.getValueType())) 5562 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5563 5564 return Op; 5565 } 5566 } 5567 5568 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 5569 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 5570 if (N0.getOpcode() == ISD::TRUNCATE) { 5571 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5572 if (NarrowLoad.getNode()) { 5573 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5574 if (NarrowLoad.getNode() != N0.getNode()) { 5575 CombineTo(N0.getNode(), NarrowLoad); 5576 // CombineTo deleted the truncate, if needed, but not what's under it. 5577 AddToWorklist(oye); 5578 } 5579 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5580 } 5581 } 5582 5583 // fold (zext (truncate x)) -> (and x, mask) 5584 if (N0.getOpcode() == ISD::TRUNCATE && 5585 (!LegalOperations \|\| TLI.isOperationLegal(ISD::AND, VT))) { 5586 5587 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 5588 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 5589 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5590 if (NarrowLoad.getNode()) { 5591 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5592 if (NarrowLoad.getNode() != N0.getNode()) { 5593 CombineTo(N0.getNode(), NarrowLoad); 5594 // CombineTo deleted the truncate, if needed, but not what's under it. 5595 AddToWorklist(oye); 5596 } 5597 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5598 } 5599 5600 SDValue Op = N0.getOperand(0); 5601 if (Op.getValueType().bitsLT(VT)) { 5602 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); 5603 AddToWorklist(Op.getNode()); 5604 } else if (Op.getValueType().bitsGT(VT)) { 5605 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5606 AddToWorklist(Op.getNode()); 5607 } 5608 return DAG.getZeroExtendInReg(Op, SDLoc(N), 5609 N0.getValueType().getScalarType()); 5610 } 5611 5612 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 5613 // if either of the casts is not free. 5614 if (N0.getOpcode() == ISD::AND && 5615 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 5616 N0.getOperand(1).getOpcode() == ISD::Constant && 5617 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 5618 N0.getValueType()) \|\| 5619 !TLI.isZExtFree(N0.getValueType(), VT))) { 5620 SDValue X = N0.getOperand(0).getOperand(0); 5621 if (X.getValueType().bitsLT(VT)) { 5622 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); 5623 } else if (X.getValueType().bitsGT(VT)) { 5624 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 5625 } 5626 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5627 Mask = Mask.zext(VT.getSizeInBits()); 5628 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5629 X, DAG.getConstant(Mask, VT)); 5630 } 5631 5632 // fold (zext (load x)) -> (zext (truncate (zextload x))) 5633 // None of the supported targets knows how to perform load and vector_zext 5634 // on vectors in one instruction. We only perform this transformation on 5635 // scalars. 5636 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5637 ISD::isUNINDEXEDLoad(N0.getNode()) && 5638 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) \|\| 5639 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { 5640 bool DoXform = true; 5641 SmallVector<SDNode, 4> SetCCs; 5642* if (!N0.hasOneUse()) 5643 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 5644 if (DoXform) { 5645 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5646* SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 5647 LN0->getChain(), 5648 LN0->getBasePtr(), N0.getValueType(), 5649 LN0->getMemOperand()); 5650 CombineTo(N, ExtLoad); 5651 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5652 N0.getValueType(), ExtLoad); 5653 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5654 5655 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5656 ISD::ZERO_EXTEND); 5657 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5658 } 5659 } 5660 5661 // fold (zext (and/or/xor (load x), cst)) -> 5662 // (and/or/xor (zextload x), (zext cst)) 5663 if ((N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR \|\| 5664 N0.getOpcode() == ISD::XOR) && 5665 isa<LoadSDNode>(N0.getOperand(0)) && 5666 N0.getOperand(1).getOpcode() == ISD::Constant && 5667 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && 5668 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5669 LoadSDNode LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5670* if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { 5671 bool DoXform = true; 5672 SmallVector<SDNode, 4> SetCCs; 5673* if (!N0.hasOneUse()) 5674 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 5675 SetCCs, TLI); 5676 if (DoXform) { 5677 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, 5678 LN0->getChain(), LN0->getBasePtr(), 5679 LN0->getMemoryVT(), 5680 LN0->getMemOperand()); 5681 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5682 Mask = Mask.zext(VT.getSizeInBits()); 5683 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5684 ExtLoad, DAG.getConstant(Mask, VT)); 5685 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5686 SDLoc(N0.getOperand(0)), 5687 N0.getOperand(0).getValueType(), ExtLoad); 5688 CombineTo(N, And); 5689 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5690 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5691 ISD::ZERO_EXTEND); 5692 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5693 } 5694 } 5695 } 5696 5697 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 5698 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 5699 if ((ISD::isZEXTLoad(N0.getNode()) \|\| ISD::isEXTLoad(N0.getNode())) && 5700 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5701 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5702* EVT MemVT = LN0->getMemoryVT(); 5703 if ((!LegalOperations && !LN0->isVolatile()) \|\| 5704 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { 5705 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 5706 LN0->getChain(), 5707 LN0->getBasePtr(), MemVT, 5708 LN0->getMemOperand()); 5709 CombineTo(N, ExtLoad); 5710 CombineTo(N0.getNode(), 5711 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), 5712 ExtLoad), 5713 ExtLoad.getValue(1)); 5714 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5715 } 5716 } 5717 5718 if (N0.getOpcode() == ISD::SETCC) { 5719 if (!LegalOperations && VT.isVector() && 5720 N0.getValueType().getVectorElementType() == MVT::i1) { 5721 EVT N0VT = N0.getOperand(0).getValueType(); 5722 if (getSetCCResultType(N0VT) == N0.getValueType()) 5723 return SDValue(); 5724 5725 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 5726 // Only do this before legalize for now. 5727 EVT EltVT = VT.getVectorElementType(); 5728 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 5729 DAG.getConstant(1, EltVT)); 5730 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 5731 // We know that the # elements of the results is the same as the 5732 // # elements of the compare (and the # elements of the compare result 5733 // for that matter). Check to see that they are the same size. If so, 5734 // we know that the element size of the sext'd result matches the 5735 // element size of the compare operands. 5736 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5737 DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5738 N0.getOperand(1), 5739 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 5740 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, 5741 OneOps)); 5742 5743 // If the desired elements are smaller or larger than the source 5744 // elements we can use a matching integer vector type and then 5745 // truncate/sign extend 5746 EVT MatchingElementType = 5747 EVT::getIntegerVT(DAG.getContext(), 5748* N0VT.getScalarType().getSizeInBits()); 5749 EVT MatchingVectorType = 5750 EVT::getVectorVT(DAG.getContext(), MatchingElementType, 5751* N0VT.getVectorNumElements()); 5752 SDValue VsetCC = 5753 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 5754 N0.getOperand(1), 5755 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5756 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5757 DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), 5758 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); 5759 } 5760 5761 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 5762 SDValue SCC = 5763 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5764 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 5765 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5766 if (SCC.getNode()) return SCC; 5767 } 5768 5769 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 5770 if ((N0.getOpcode() == ISD::SHL \|\| N0.getOpcode() == ISD::SRL) && 5771 isa<ConstantSDNode>(N0.getOperand(1)) && 5772 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 5773 N0.hasOneUse()) { 5774 SDValue ShAmt = N0.getOperand(1); 5775 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 5776 if (N0.getOpcode() == ISD::SHL) { 5777 SDValue InnerZExt = N0.getOperand(0); 5778 // If the original shl may be shifting out bits, do not perform this 5779 // transformation. 5780 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 5781 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 5782 if (ShAmtVal > KnownZeroBits) 5783 return SDValue(); 5784 } 5785 5786 SDLoc DL(N); 5787 5788 // Ensure that the shift amount is wide enough for the shifted value. 5789 if (VT.getSizeInBits() >= 256) 5790 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 5791 5792 return DAG.getNode(N0.getOpcode(), DL, VT, 5793 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 5794 ShAmt); 5795 } 5796 5797 return SDValue(); 5798} 5799 5800SDValue DAGCombiner::visitANY_EXTEND(SDNode N) { 5801* SDValue N0 = N->getOperand(0); 5802 EVT VT = N->getValueType(0); 5803 5804 if (SDNode Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5805* LegalOperations)) 5806 return SDValue(Res, 0); 5807 5808 // fold (aext (aext x)) -> (aext x) 5809 // fold (aext (zext x)) -> (zext x) 5810 // fold (aext (sext x)) -> (sext x) 5811 if (N0.getOpcode() == ISD::ANY_EXTEND \|\| 5812 N0.getOpcode() == ISD::ZERO_EXTEND \|\| 5813 N0.getOpcode() == ISD::SIGN_EXTEND) 5814 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 5815 5816 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 5817 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 5818 if (N0.getOpcode() == ISD::TRUNCATE) { 5819 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5820 if (NarrowLoad.getNode()) { 5821 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5822 if (NarrowLoad.getNode() != N0.getNode()) { 5823 CombineTo(N0.getNode(), NarrowLoad); 5824 // CombineTo deleted the truncate, if needed, but not what's under it. 5825 AddToWorklist(oye); 5826 } 5827 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5828 } 5829 } 5830 5831 // fold (aext (truncate x)) 5832 if (N0.getOpcode() == ISD::TRUNCATE) { 5833 SDValue TruncOp = N0.getOperand(0); 5834 if (TruncOp.getValueType() == VT) 5835 return TruncOp; // x iff x size == zext size. 5836 if (TruncOp.getValueType().bitsGT(VT)) 5837 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); 5838 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); 5839 } 5840 5841 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 5842 // if the trunc is not free. 5843 if (N0.getOpcode() == ISD::AND && 5844 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 5845 N0.getOperand(1).getOpcode() == ISD::Constant && 5846 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 5847 N0.getValueType())) { 5848 SDValue X = N0.getOperand(0).getOperand(0); 5849 if (X.getValueType().bitsLT(VT)) { 5850 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); 5851 } else if (X.getValueType().bitsGT(VT)) { 5852 X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); 5853 } 5854 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5855 Mask = Mask.zext(VT.getSizeInBits()); 5856 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5857 X, DAG.getConstant(Mask, VT)); 5858 } 5859 5860 // fold (aext (load x)) -> (aext (truncate (extload x))) 5861 // None of the supported targets knows how to perform load and any_ext 5862 // on vectors in one instruction. We only perform this transformation on 5863 // scalars. 5864 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5865 ISD::isUNINDEXEDLoad(N0.getNode()) && 5866 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 5867 bool DoXform = true; 5868 SmallVector<SDNode, 4> SetCCs; 5869* if (!N0.hasOneUse()) 5870 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 5871 if (DoXform) { 5872 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5873* SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 5874 LN0->getChain(), 5875 LN0->getBasePtr(), N0.getValueType(), 5876 LN0->getMemOperand()); 5877 CombineTo(N, ExtLoad); 5878 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5879 N0.getValueType(), ExtLoad); 5880 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5881 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5882 ISD::ANY_EXTEND); 5883 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5884 } 5885 } 5886 5887 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 5888 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 5889 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 5890 if (N0.getOpcode() == ISD::LOAD && 5891 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 5892 N0.hasOneUse()) { 5893 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5894* ISD::LoadExtType ExtType = LN0->getExtensionType(); 5895 EVT MemVT = LN0->getMemoryVT(); 5896 if (!LegalOperations \|\| TLI.isLoadExtLegal(ExtType, VT, MemVT)) { 5897 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), 5898 VT, LN0->getChain(), LN0->getBasePtr(), 5899 MemVT, LN0->getMemOperand()); 5900 CombineTo(N, ExtLoad); 5901 CombineTo(N0.getNode(), 5902 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5903 N0.getValueType(), ExtLoad), 5904 ExtLoad.getValue(1)); 5905 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5906 } 5907 } 5908 5909 if (N0.getOpcode() == ISD::SETCC) { 5910 // For vectors: 5911 // aext(setcc) -> vsetcc 5912 // aext(setcc) -> truncate(vsetcc) 5913 // aext(setcc) -> aext(vsetcc) 5914 // Only do this before legalize for now. 5915 if (VT.isVector() && !LegalOperations) { 5916 EVT N0VT = N0.getOperand(0).getValueType(); 5917 // We know that the # elements of the results is the same as the 5918 // # elements of the compare (and the # elements of the compare result 5919 // for that matter). Check to see that they are the same size. If so, 5920 // we know that the element size of the sext'd result matches the 5921 // element size of the compare operands. 5922 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 5923 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5924 N0.getOperand(1), 5925 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5926 // If the desired elements are smaller or larger than the source 5927 // elements we can use a matching integer vector type and then 5928 // truncate/any extend 5929 else { 5930 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 5931 SDValue VsetCC = 5932 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 5933 N0.getOperand(1), 5934 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5935 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); 5936 } 5937 } 5938 5939 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 5940 SDValue SCC = 5941 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5942 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 5943 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5944 if (SCC.getNode()) 5945 return SCC; 5946 } 5947 5948 return SDValue(); 5949} 5950 5951/// See if the specified operand can be simplified with the knowledge that only 5952/// the bits specified by Mask are used. If so, return the simpler operand, 5953/// otherwise return a null SDValue. 5954SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 5955 switch (V.getOpcode()) { 5956 default: break; 5957 case ISD::Constant: { 5958 const ConstantSDNode CV = cast<ConstantSDNode>(V.getNode()); 5959* assert(CV && "Const value should be ConstSDNode."); 5960 const APInt &CVal = CV->getAPIntValue(); 5961 APInt NewVal = CVal & Mask; 5962 if (NewVal != CVal) 5963 return DAG.getConstant(NewVal, V.getValueType()); 5964 break; 5965 } 5966 case ISD::OR: 5967 case ISD::XOR: 5968 // If the LHS or RHS don't contribute bits to the or, drop them. 5969 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 5970 return V.getOperand(1); 5971 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 5972 return V.getOperand(0); 5973 break; 5974 case ISD::SRL: 5975 // Only look at single-use SRLs. 5976 if (!V.getNode()->hasOneUse()) 5977 break; 5978 if (ConstantSDNode RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 5979* // See if we can recursively simplify the LHS. 5980 unsigned Amt = RHSC->getZExtValue(); 5981 5982 // Watch out for shift count overflow though. 5983 if (Amt >= Mask.getBitWidth()) break; 5984 APInt NewMask = Mask << Amt; 5985 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 5986 if (SimplifyLHS.getNode()) 5987 return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), 5988 SimplifyLHS, V.getOperand(1)); 5989 } 5990 } 5991 return SDValue(); 5992} 5993 5994/// If the result of a wider load is shifted to right of N bits and then 5995/// truncated to a narrower type and where N is a multiple of number of bits of 5996/// the narrower type, transform it to a narrower load from address + N / num of 5997/// bits of new type. If the result is to be extended, also fold the extension 5998/// to form a extending load. 5999SDValue DAGCombiner::ReduceLoadWidth(SDNode N) { 6000* unsigned Opc = N->getOpcode(); 6001 6002 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 6003 SDValue N0 = N->getOperand(0); 6004 EVT VT = N->getValueType(0); 6005 EVT ExtVT = VT; 6006 6007 // This transformation isn't valid for vector loads. 6008 if (VT.isVector()) 6009 return SDValue(); 6010 6011 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 6012 // extended to VT. 6013 if (Opc == ISD::SIGN_EXTEND_INREG) { 6014 ExtType = ISD::SEXTLOAD; 6015 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6016 } else if (Opc == ISD::SRL) { 6017 // Another special-case: SRL is basically zero-extending a narrower value. 6018 ExtType = ISD::ZEXTLOAD; 6019 N0 = SDValue(N, 0); 6020 ConstantSDNode N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6021* if (!N01) return SDValue(); 6022 ExtVT = EVT::getIntegerVT(DAG.getContext(), 6023* VT.getSizeInBits() - N01->getZExtValue()); 6024 } 6025 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) 6026 return SDValue(); 6027 6028 unsigned EVTBits = ExtVT.getSizeInBits(); 6029 6030 // Do not generate loads of non-round integer types since these can 6031 // be expensive (and would be wrong if the type is not byte sized). 6032 if (!ExtVT.isRound()) 6033 return SDValue(); 6034 6035 unsigned ShAmt = 0; 6036 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 6037 if (ConstantSDNode N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6038* ShAmt = N01->getZExtValue(); 6039 // Is the shift amount a multiple of size of VT? 6040 if ((ShAmt & (EVTBits-1)) == 0) { 6041 N0 = N0.getOperand(0); 6042 // Is the load width a multiple of size of VT? 6043 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 6044 return SDValue(); 6045 } 6046 6047 // At this point, we must have a load or else we can't do the transform. 6048 if (!isa<LoadSDNode>(N0)) return SDValue(); 6049 6050 // Because a SRL must be assumed to need to zero-extend the high bits 6051 // (as opposed to anyext the high bits), we can't combine the zextload 6052 // lowering of SRL and an sextload. 6053 if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) 6054 return SDValue(); 6055 6056 // If the shift amount is larger than the input type then we're not 6057 // accessing any of the loaded bytes. If the load was a zextload/extload 6058 // then the result of the shift+trunc is zero/undef (handled elsewhere). 6059 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 6060 return SDValue(); 6061 } 6062 } 6063 6064 // If the load is shifted left (and the result isn't shifted back right), 6065 // we can fold the truncate through the shift. 6066 unsigned ShLeftAmt = 0; 6067 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 6068 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 6069 if (ConstantSDNode N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6070* ShLeftAmt = N01->getZExtValue(); 6071 N0 = N0.getOperand(0); 6072 } 6073 } 6074 6075 // If we haven't found a load, we can't narrow it. Don't transform one with 6076 // multiple uses, this would require adding a new load. 6077 if (!isa<LoadSDNode>(N0) \|\| !N0.hasOneUse()) 6078 return SDValue(); 6079 6080 // Don't change the width of a volatile load. 6081 LoadSDNode LN0 = cast<LoadSDNode>(N0); 6082* if (LN0->isVolatile()) 6083 return SDValue(); 6084 6085 // Verify that we are actually reducing a load width here. 6086 if (LN0->getMemoryVT().getSizeInBits() < EVTBits) 6087 return SDValue(); 6088 6089 // For the transform to be legal, the load must produce only two values 6090 // (the value loaded and the chain). Don't transform a pre-increment 6091 // load, for example, which produces an extra value. Otherwise the 6092 // transformation is not equivalent, and the downstream logic to replace 6093 // uses gets things wrong. 6094 if (LN0->getNumValues() > 2) 6095 return SDValue(); 6096 6097 // If the load that we're shrinking is an extload and we're not just 6098 // discarding the extension we can't simply shrink the load. Bail. 6099 // TODO: It would be possible to merge the extensions in some cases. 6100 if (LN0->getExtensionType() != ISD::NON_EXTLOAD && 6101 LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) 6102 return SDValue(); 6103 6104 if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) 6105 return SDValue(); 6106 6107 EVT PtrType = N0.getOperand(1).getValueType(); 6108 6109 if (PtrType == MVT::Untyped \|\| PtrType.isExtended()) 6110 // It's not possible to generate a constant of extended or untyped type. 6111 return SDValue(); 6112 6113 // For big endian targets, we need to adjust the offset to the pointer to 6114 // load the correct bytes. 6115 if (TLI.isBigEndian()) { 6116 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 6117 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 6118 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 6119 } 6120 6121 uint64_t PtrOff = ShAmt / 8; 6122 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 6123 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), 6124 PtrType, LN0->getBasePtr(), 6125 DAG.getConstant(PtrOff, PtrType)); 6126 AddToWorklist(NewPtr.getNode()); 6127 6128 SDValue Load; 6129 if (ExtType == ISD::NON_EXTLOAD) 6130 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 6131 LN0->getPointerInfo().getWithOffset(PtrOff), 6132 LN0->isVolatile(), LN0->isNonTemporal(), 6133 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6134 else 6135 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, 6136 LN0->getPointerInfo().getWithOffset(PtrOff), 6137 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 6138 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6139 6140 // Replace the old load's chain with the new load's chain. 6141 WorklistRemover DeadNodes(this); 6142* DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 6143 6144 // Shift the result left, if we've swallowed a left shift. 6145 SDValue Result = Load; 6146 if (ShLeftAmt != 0) { 6147 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 6148 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 6149 ShImmTy = VT; 6150 // If the shift amount is as large as the result size (but, presumably, 6151 // no larger than the source) then the useful bits of the result are 6152 // zero; we can't simply return the shortened shift, because the result 6153 // of that operation is undefined. 6154 if (ShLeftAmt >= VT.getSizeInBits()) 6155 Result = DAG.getConstant(0, VT); 6156 else 6157 Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, 6158 Result, DAG.getConstant(ShLeftAmt, ShImmTy)); 6159 } 6160 6161 // Return the new loaded value. 6162 return Result; 6163} 6164 6165SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode N) { 6166* SDValue N0 = N->getOperand(0); 6167 SDValue N1 = N->getOperand(1); 6168 EVT VT = N->getValueType(0); 6169 EVT EVT = cast<VTSDNode>(N1)->getVT(); 6170 unsigned VTBits = VT.getScalarType().getSizeInBits(); 6171 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 6172 6173 // fold (sext_in_reg c1) -> c1 6174 if (isa<ConstantSDNode>(N0) \|\| N0.getOpcode() == ISD::UNDEF) 6175 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 6176 6177 // If the input is already sign extended, just drop the extension. 6178 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 6179 return N0; 6180 6181 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 6182 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 6183 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) 6184 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6185 N0.getOperand(0), N1); 6186 6187 // fold (sext_in_reg (sext x)) -> (sext x) 6188 // fold (sext_in_reg (aext x)) -> (sext x) 6189 // if x is small enough. 6190 if (N0.getOpcode() == ISD::SIGN_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND) { 6191 SDValue N00 = N0.getOperand(0); 6192 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 6193 (!LegalOperations \|\| TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 6194 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 6195 } 6196 6197 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 6198 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 6199 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); 6200 6201 // fold operands of sext_in_reg based on knowledge that the top bits are not 6202 // demanded. 6203 if (SimplifyDemandedBits(SDValue(N, 0))) 6204 return SDValue(N, 0); 6205 6206 // fold (sext_in_reg (load x)) -> (smaller sextload x) 6207 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 6208 SDValue NarrowLoad = ReduceLoadWidth(N); 6209 if (NarrowLoad.getNode()) 6210 return NarrowLoad; 6211 6212 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 6213 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 6214 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 6215 if (N0.getOpcode() == ISD::SRL) { 6216 if (ConstantSDNode ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 6217* if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 6218 // We can turn this into an SRA iff the input to the SRL is already sign 6219 // extended enough. 6220 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 6221 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 6222 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 6223 N0.getOperand(0), N0.getOperand(1)); 6224 } 6225 } 6226 6227 // fold (sext_inreg (extload x)) -> (sextload x) 6228 if (ISD::isEXTLoad(N0.getNode()) && 6229 ISD::isUNINDEXEDLoad(N0.getNode()) && 6230 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6231 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) \|\| 6232 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6233 LoadSDNode LN0 = cast<LoadSDNode>(N0); 6234* SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6235 LN0->getChain(), 6236 LN0->getBasePtr(), EVT, 6237 LN0->getMemOperand()); 6238 CombineTo(N, ExtLoad); 6239 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6240 AddToWorklist(ExtLoad.getNode()); 6241 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6242 } 6243 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 6244 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6245 N0.hasOneUse() && 6246 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6247 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) \|\| 6248 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6249 LoadSDNode LN0 = cast<LoadSDNode>(N0); 6250* SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6251 LN0->getChain(), 6252 LN0->getBasePtr(), EVT, 6253 LN0->getMemOperand()); 6254 CombineTo(N, ExtLoad); 6255 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6256 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6257 } 6258 6259 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 6260 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 6261 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 6262 N0.getOperand(1), false); 6263 if (BSwap.getNode()) 6264 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6265 BSwap, N1); 6266 } 6267 6268 // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs 6269 // into a build_vector. 6270 if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 6271 SmallVector<SDValue, 8> Elts; 6272 unsigned NumElts = N0->getNumOperands(); 6273 unsigned ShAmt = VTBits - EVTBits; 6274 6275 for (unsigned i = 0; i != NumElts; ++i) { 6276 SDValue Op = N0->getOperand(i); 6277 if (Op->getOpcode() == ISD::UNDEF) { 6278 Elts.push_back(Op); 6279 continue; 6280 } 6281 6282 ConstantSDNode CurrentND = cast<ConstantSDNode>(Op); 6283* const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 6284 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 6285 Op.getValueType())); 6286 } 6287 6288 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); 6289 } 6290 6291 return SDValue(); 6292} 6293 6294SDValue DAGCombiner::visitTRUNCATE(SDNode N) { 6295* SDValue N0 = N->getOperand(0); 6296 EVT VT = N->getValueType(0); 6297 bool isLE = TLI.isLittleEndian(); 6298 6299 // noop truncate 6300 if (N0.getValueType() == N->getValueType(0)) 6301 return N0; 6302 // fold (truncate c1) -> c1 6303 if (isa<ConstantSDNode>(N0)) 6304 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 6305 // fold (truncate (truncate x)) -> (truncate x) 6306 if (N0.getOpcode() == ISD::TRUNCATE) 6307 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 6308 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 6309 if (N0.getOpcode() == ISD::ZERO_EXTEND \|\| 6310 N0.getOpcode() == ISD::SIGN_EXTEND \|\| 6311 N0.getOpcode() == ISD::ANY_EXTEND) { 6312 if (N0.getOperand(0).getValueType().bitsLT(VT)) 6313 // if the source is smaller than the dest, we still need an extend 6314 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 6315 N0.getOperand(0)); 6316 if (N0.getOperand(0).getValueType().bitsGT(VT)) 6317 // if the source is larger than the dest, than we just need the truncate 6318 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 6319 // if the source and dest are the same type, we can drop both the extend 6320 // and the truncate. 6321 return N0.getOperand(0); 6322 } 6323 6324 // Fold extract-and-trunc into a narrow extract. For example: 6325 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 6326 // i32 y = TRUNCATE(i64 x) 6327 // -- becomes -- 6328 // v16i8 b = BITCAST (v2i64 val) 6329 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 6330 // 6331 // Note: We only run this optimization after type legalization (which often 6332 // creates this pattern) and before operation legalization after which 6333 // we need to be more careful about the vector instructions that we generate. 6334 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 6335 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { 6336 6337 EVT VecTy = N0.getOperand(0).getValueType(); 6338 EVT ExTy = N0.getValueType(); 6339 EVT TrTy = N->getValueType(0); 6340 6341 unsigned NumElem = VecTy.getVectorNumElements(); 6342 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 6343 6344 EVT NVT = EVT::getVectorVT(DAG.getContext(), TrTy, SizeRatio NumElem); 6345 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 6346 6347 SDValue EltNo = N0->getOperand(1); 6348 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 6349 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 6350 EVT IndexTy = TLI.getVectorIdxTy(); 6351 int Index = isLE ? (EltSizeRatio) : (EltSizeRatio + (SizeRatio-1)); 6352 6353 SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), 6354 NVT, N0.getOperand(0)); 6355 6356 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 6357 SDLoc(N), TrTy, V, 6358 DAG.getConstant(Index, IndexTy)); 6359 } 6360 } 6361 6362 // trunc (select c, a, b) -> select c, (trunc a), (trunc b) 6363 if (N0.getOpcode() == ISD::SELECT) { 6364 EVT SrcVT = N0.getValueType(); 6365 if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::SELECT, SrcVT)) && 6366 TLI.isTruncateFree(SrcVT, VT)) { 6367 SDLoc SL(N0); 6368 SDValue Cond = N0.getOperand(0); 6369 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 6370 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); 6371 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); 6372 } 6373 } 6374 6375 // Fold a series of buildvector, bitcast, and truncate if possible. 6376 // For example fold 6377 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 6378 // (2xi32 (buildvector x, y)). 6379 if (Level == AfterLegalizeVectorOps && VT.isVector() && 6380 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 6381 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 6382 N0.getOperand(0).hasOneUse()) { 6383 6384 SDValue BuildVect = N0.getOperand(0); 6385 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 6386 EVT TruncVecEltTy = VT.getVectorElementType(); 6387 6388 // Check that the element types match. 6389 if (BuildVectEltTy == TruncVecEltTy) { 6390 // Now we only need to compute the offset of the truncated elements. 6391 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 6392 unsigned TruncVecNumElts = VT.getVectorNumElements(); 6393 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 6394 6395 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 6396 "Invalid number of elements"); 6397 6398 SmallVector<SDValue, 8> Opnds; 6399 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 6400 Opnds.push_back(BuildVect.getOperand(i)); 6401 6402 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 6403 } 6404 } 6405 6406 // See if we can simplify the input to this truncate through knowledge that 6407 // only the low bits are being used. 6408 // For example "trunc (or (shl x, 8), y)" // -> trunc y 6409 // Currently we only perform this optimization on scalars because vectors 6410 // may have different active low bits. 6411 if (!VT.isVector()) { 6412 SDValue Shorter = 6413 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 6414 VT.getSizeInBits())); 6415 if (Shorter.getNode()) 6416 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 6417 } 6418 // fold (truncate (load x)) -> (smaller load x) 6419 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 6420 if (!LegalTypes \|\| TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 6421 SDValue Reduced = ReduceLoadWidth(N); 6422 if (Reduced.getNode()) 6423 return Reduced; 6424 // Handle the case where the load remains an extending load even 6425 // after truncation. 6426 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { 6427 LoadSDNode LN0 = cast<LoadSDNode>(N0); 6428* if (!LN0->isVolatile() && 6429 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { 6430 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), 6431 VT, LN0->getChain(), LN0->getBasePtr(), 6432 LN0->getMemoryVT(), 6433 LN0->getMemOperand()); 6434 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); 6435 return NewLoad; 6436 } 6437 } 6438 } 6439 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 6440 // where ... are all 'undef'. 6441 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 6442 SmallVector<EVT, 8> VTs; 6443 SDValue V; 6444 unsigned Idx = 0; 6445 unsigned NumDefs = 0; 6446 6447 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 6448 SDValue X = N0.getOperand(i); 6449 if (X.getOpcode() != ISD::UNDEF) { 6450 V = X; 6451 Idx = i; 6452 NumDefs++; 6453 } 6454 // Stop if more than one members are non-undef. 6455 if (NumDefs > 1) 6456 break; 6457 VTs.push_back(EVT::getVectorVT(DAG.getContext(), 6458* VT.getVectorElementType(), 6459 X.getValueType().getVectorNumElements())); 6460 } 6461 6462 if (NumDefs == 0) 6463 return DAG.getUNDEF(VT); 6464 6465 if (NumDefs == 1) { 6466 assert(V.getNode() && "The single defined operand is empty!"); 6467 SmallVector<SDValue, 8> Opnds; 6468 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 6469 if (i != Idx) { 6470 Opnds.push_back(DAG.getUNDEF(VTs[i])); 6471 continue; 6472 } 6473 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 6474 AddToWorklist(NV.getNode()); 6475 Opnds.push_back(NV); 6476 } 6477 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); 6478 } 6479 } 6480 6481 // Simplify the operands using demanded-bits information. 6482 if (!VT.isVector() && 6483 SimplifyDemandedBits(SDValue(N, 0))) 6484 return SDValue(N, 0); 6485 6486 return SDValue(); 6487} 6488 6489static SDNode getBuildPairElt(SDNode N, unsigned i) { 6490 SDValue Elt = N->getOperand(i); 6491 if (Elt.getOpcode() != ISD::MERGE_VALUES) 6492 return Elt.getNode(); 6493 return Elt.getOperand(Elt.getResNo()).getNode(); 6494} 6495 6496/// build_pair (load, load) -> load 6497/// if load locations are consecutive. 6498SDValue DAGCombiner::CombineConsecutiveLoads(SDNode N, EVT VT) { 6499* assert(N->getOpcode() == ISD::BUILD_PAIR); 6500 6501 LoadSDNode LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 6502* LoadSDNode LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 6503* if (!LD1 \|\| !LD2 \|\| !ISD::isNON_EXTLoad(LD1) \|\| !LD1->hasOneUse() \|\| 6504 LD1->getAddressSpace() != LD2->getAddressSpace()) 6505 return SDValue(); 6506 EVT LD1VT = LD1->getValueType(0); 6507 6508 if (ISD::isNON_EXTLoad(LD2) && 6509 LD2->hasOneUse() && 6510 // If both are volatile this would reduce the number of volatile loads. 6511 // If one is volatile it might be ok, but play conservative and bail out. 6512 !LD1->isVolatile() && 6513 !LD2->isVolatile() && 6514 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 6515 unsigned Align = LD1->getAlignment(); 6516 unsigned NewAlign = TLI.getDataLayout()-> 6517 getABITypeAlignment(VT.getTypeForEVT(DAG.getContext())); 6518* 6519 if (NewAlign <= Align && 6520 (!LegalOperations \|\| TLI.isOperationLegal(ISD::LOAD, VT))) 6521 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), 6522 LD1->getBasePtr(), LD1->getPointerInfo(), 6523 false, false, false, Align); 6524 } 6525 6526 return SDValue(); 6527} 6528 6529SDValue DAGCombiner::visitBITCAST(SDNode N) { 6530* SDValue N0 = N->getOperand(0); 6531 EVT VT = N->getValueType(0); 6532 6533 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 6534 // Only do this before legalize, since afterward the target may be depending 6535 // on the bitconvert. 6536 // First check to see if this is all constant. 6537 if (!LegalTypes && 6538 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 6539 VT.isVector()) { 6540 bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); 6541 6542 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 6543 assert(!DestEltVT.isVector() && 6544 "Element type of vector ValueType must not be vector!"); 6545 if (isSimple) 6546 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 6547 } 6548 6549 // If the input is a constant, let getNode fold it. 6550 if (isa<ConstantSDNode>(N0) \|\| isa<ConstantFPSDNode>(N0)) { 6551 // If we can't allow illegal operations, we need to check that this is just 6552 // a fp -> int or int -> conversion and that the resulting operation will 6553 // be legal. 6554 if (!LegalOperations \|\| 6555 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && 6556 TLI.isOperationLegal(ISD::ConstantFP, VT)) \|\| 6557 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && 6558 TLI.isOperationLegal(ISD::Constant, VT))) 6559 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); 6560 } 6561 6562 // (conv (conv x, t1), t2) -> (conv x, t2) 6563 if (N0.getOpcode() == ISD::BITCAST) 6564 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, 6565 N0.getOperand(0)); 6566 6567 // fold (conv (load x)) -> (load (conv)x) 6568* // If the resultant load doesn't need a higher alignment than the original! 6569 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 6570 // Do not change the width of a volatile load. 6571 !cast<LoadSDNode>(N0)->isVolatile() && 6572 // Do not remove the cast if the types differ in endian layout. 6573 TLI.hasBigEndianPartOrdering(N0.getValueType()) == 6574 TLI.hasBigEndianPartOrdering(VT) && 6575 (!LegalOperations \|\| TLI.isOperationLegal(ISD::LOAD, VT)) && 6576 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { 6577 LoadSDNode LN0 = cast<LoadSDNode>(N0); 6578* unsigned Align = TLI.getDataLayout()-> 6579 getABITypeAlignment(VT.getTypeForEVT(DAG.getContext())); 6580* unsigned OrigAlign = LN0->getAlignment(); 6581 6582 if (Align <= OrigAlign) { 6583 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), 6584 LN0->getBasePtr(), LN0->getPointerInfo(), 6585 LN0->isVolatile(), LN0->isNonTemporal(), 6586 LN0->isInvariant(), OrigAlign, 6587 LN0->getAAInfo()); 6588 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 6589 return Load; 6590 } 6591 } 6592 6593 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 6594 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 6595 // This often reduces constant pool loads. 6596 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) \|\| 6597 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && 6598 N0.getNode()->hasOneUse() && VT.isInteger() && 6599 !VT.isVector() && !N0.getValueType().isVector()) { 6600 SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, 6601 N0.getOperand(0)); 6602 AddToWorklist(NewConv.getNode()); 6603 6604 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 6605 if (N0.getOpcode() == ISD::FNEG) 6606 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 6607 NewConv, DAG.getConstant(SignBit, VT)); 6608 assert(N0.getOpcode() == ISD::FABS); 6609 return DAG.getNode(ISD::AND, SDLoc(N), VT, 6610 NewConv, DAG.getConstant(~SignBit, VT)); 6611 } 6612 6613 // fold (bitconvert (fcopysign cst, x)) -> 6614 // (or (and (bitconvert x), sign), (and cst, (not sign))) 6615 // Note that we don't handle (copysign x, cst) because this can always be 6616 // folded to an fneg or fabs. 6617 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 6618 isa<ConstantFPSDNode>(N0.getOperand(0)) && 6619 VT.isInteger() && !VT.isVector()) { 6620 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 6621 EVT IntXVT = EVT::getIntegerVT(DAG.getContext(), OrigXWidth); 6622* if (isTypeLegal(IntXVT)) { 6623 SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), 6624 IntXVT, N0.getOperand(1)); 6625 AddToWorklist(X.getNode()); 6626 6627 // If X has a different width than the result/lhs, sext it or truncate it. 6628 unsigned VTWidth = VT.getSizeInBits(); 6629 if (OrigXWidth < VTWidth) { 6630 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 6631 AddToWorklist(X.getNode()); 6632 } else if (OrigXWidth > VTWidth) { 6633 // To get the sign bit in the right place, we have to shift it right 6634 // before truncating. 6635 X = DAG.getNode(ISD::SRL, SDLoc(X), 6636 X.getValueType(), X, 6637 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 6638 AddToWorklist(X.getNode()); 6639 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 6640 AddToWorklist(X.getNode()); 6641 } 6642 6643 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 6644 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 6645 X, DAG.getConstant(SignBit, VT)); 6646 AddToWorklist(X.getNode()); 6647 6648 SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), 6649 VT, N0.getOperand(0)); 6650 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 6651 Cst, DAG.getConstant(~SignBit, VT)); 6652 AddToWorklist(Cst.getNode()); 6653 6654 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 6655 } 6656 } 6657 6658 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 6659 if (N0.getOpcode() == ISD::BUILD_PAIR) { 6660 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 6661 if (CombineLD.getNode()) 6662 return CombineLD; 6663 } 6664 6665 return SDValue(); 6666} 6667 6668SDValue DAGCombiner::visitBUILD_PAIR(SDNode N) { 6669* EVT VT = N->getValueType(0); 6670 return CombineConsecutiveLoads(N, VT); 6671} 6672 6673/// We know that BV is a build_vector node with Constant, ConstantFP or Undef 6674/// operands. DstEltVT indicates the destination element value type. 6675SDValue DAGCombiner:: 6676ConstantFoldBITCASTofBUILD_VECTOR(SDNode BV, EVT DstEltVT) { 6677* EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 6678 6679 // If this is already the right type, we're done. 6680 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 6681 6682 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 6683 unsigned DstBitSize = DstEltVT.getSizeInBits(); 6684 6685 // If this is a conversion of N elements of one type to N elements of another 6686 // type, convert each element. This handles FP<->INT cases. 6687 if (SrcBitSize == DstBitSize) { 6688 EVT VT = EVT::getVectorVT(DAG.getContext(), DstEltVT, 6689* BV->getValueType(0).getVectorNumElements()); 6690 6691 // Due to the FP element handling below calling this routine recursively, 6692 // we can end up with a scalar-to-vector node here. 6693 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 6694 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 6695 DAG.getNode(ISD::BITCAST, SDLoc(BV), 6696 DstEltVT, BV->getOperand(0))); 6697 6698 SmallVector<SDValue, 8> Ops; 6699 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 6700 SDValue Op = BV->getOperand(i); 6701 // If the vector element type is not legal, the BUILD_VECTOR operands 6702 // are promoted and implicitly truncated. Make that explicit here. 6703 if (Op.getValueType() != SrcEltVT) 6704 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 6705 Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), 6706 DstEltVT, Op)); 6707 AddToWorklist(Ops.back().getNode()); 6708 } 6709 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6710 } 6711 6712 // Otherwise, we're growing or shrinking the elements. To avoid having to 6713 // handle annoying details of growing/shrinking FP values, we convert them to 6714 // int first. 6715 if (SrcEltVT.isFloatingPoint()) { 6716 // Convert the input float vector to a int vector where the elements are the 6717 // same sizes. 6718 EVT IntVT = EVT::getIntegerVT(DAG.getContext(), SrcEltVT.getSizeInBits()); 6719* BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 6720 SrcEltVT = IntVT; 6721 } 6722 6723 // Now we know the input is an integer vector. If the output is a FP type, 6724 // convert to integer first, then to FP of the right size. 6725 if (DstEltVT.isFloatingPoint()) { 6726 EVT TmpVT = EVT::getIntegerVT(DAG.getContext(), DstEltVT.getSizeInBits()); 6727* SDNode Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 6728* 6729 // Next, convert to FP elements of the same size. 6730 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 6731 } 6732 6733 // Okay, we know the src/dst types are both integers of differing types. 6734 // Handling growing first. 6735 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 6736 if (SrcBitSize < DstBitSize) { 6737 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 6738 6739 SmallVector<SDValue, 8> Ops; 6740 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 6741 i += NumInputsPerOutput) { 6742 bool isLE = TLI.isLittleEndian(); 6743 APInt NewBits = APInt(DstBitSize, 0); 6744 bool EltIsUndef = true; 6745 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 6746 // Shift the previously computed bits over. 6747 NewBits <<= SrcBitSize; 6748 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 6749 if (Op.getOpcode() == ISD::UNDEF) continue; 6750 EltIsUndef = false; 6751 6752 NewBits \|= cast<ConstantSDNode>(Op)->getAPIntValue(). 6753 zextOrTrunc(SrcBitSize).zext(DstBitSize); 6754 } 6755 6756 if (EltIsUndef) 6757 Ops.push_back(DAG.getUNDEF(DstEltVT)); 6758 else 6759 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 6760 } 6761 6762 EVT VT = EVT::getVectorVT(DAG.getContext(), DstEltVT, Ops.size()); 6763* return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6764 } 6765 6766 // Finally, this must be the case where we are shrinking elements: each input 6767 // turns into multiple outputs. 6768 bool isS2V = ISD::isScalarToVector(BV); 6769 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 6770 EVT VT = EVT::getVectorVT(DAG.getContext(), DstEltVT, 6771* NumOutputsPerInputBV->getNumOperands()); 6772* SmallVector<SDValue, 8> Ops; 6773 6774 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 6775 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 6776 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 6777 Ops.push_back(DAG.getUNDEF(DstEltVT)); 6778 continue; 6779 } 6780 6781 APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> 6782 getAPIntValue().zextOrTrunc(SrcBitSize); 6783 6784 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 6785 APInt ThisVal = OpVal.trunc(DstBitSize); 6786 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 6787 if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) 6788 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 6789 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 6790 Ops[0]); 6791 OpVal = OpVal.lshr(DstBitSize); 6792 } 6793 6794 // For big endian targets, swap the order of the pieces of each element. 6795 if (TLI.isBigEndian()) 6796 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 6797 } 6798 6799 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6800} 6801 6802SDValue DAGCombiner::visitFADD(SDNode N) { 6803* SDValue N0 = N->getOperand(0); 6804 SDValue N1 = N->getOperand(1); 6805 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6806* ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6807* EVT VT = N->getValueType(0); 6808 const TargetOptions &Options = DAG.getTarget().Options; 6809 6810 // fold vector ops 6811 if (VT.isVector()) { 6812 SDValue FoldedVOp = SimplifyVBinOp(N); 6813 if (FoldedVOp.getNode()) return FoldedVOp; 6814 } 6815 6816 // fold (fadd c1, c2) -> c1 + c2 6817 if (N0CFP && N1CFP) 6818 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); 6819 6820 // canonicalize constant to RHS 6821 if (N0CFP && !N1CFP) 6822 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); 6823 6824 // fold (fadd A, (fneg B)) -> (fsub A, B) 6825 if ((!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 6826 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) 6827 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, 6828 GetNegatedExpression(N1, DAG, LegalOperations)); 6829 6830 // fold (fadd (fneg A), B) -> (fsub B, A) 6831 if ((!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 6832 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) 6833 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, 6834 GetNegatedExpression(N0, DAG, LegalOperations)); 6835 6836 // If 'unsafe math' is enabled, fold lots of things. 6837 if (Options.UnsafeFPMath) { 6838 // No FP constant should be created after legalization as Instruction 6839 // Selection pass has a hard time dealing with FP constants. 6840 bool AllowNewConst = (Level < AfterLegalizeDAG); 6841 6842 // fold (fadd A, 0) -> A 6843 if (N1CFP && N1CFP->getValueAPF().isZero()) 6844 return N0; 6845 6846 // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 6847 if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 6848 isa<ConstantFPSDNode>(N0.getOperand(1))) 6849 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), 6850 DAG.getNode(ISD::FADD, SDLoc(N), VT, 6851 N0.getOperand(1), N1)); 6852 6853 // If allowed, fold (fadd (fneg x), x) -> 0.0 6854 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) 6855 return DAG.getConstantFP(0.0, VT); 6856 6857 // If allowed, fold (fadd x, (fneg x)) -> 0.0 6858 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) 6859 return DAG.getConstantFP(0.0, VT); 6860 6861 // We can fold chains of FADD's of the same value into multiplications. 6862 // This transform is not safe in general because we are reducing the number 6863 // of rounding steps. 6864 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { 6865 if (N0.getOpcode() == ISD::FMUL) { 6866 ConstantFPSDNode CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 6867* ConstantFPSDNode CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 6868* 6869 // (fadd (fmul x, c), x) -> (fmul x, c+1) 6870 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 6871 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6872 SDValue(CFP01, 0), 6873 DAG.getConstantFP(1.0, VT)); 6874 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); 6875 } 6876 6877 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) 6878 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 6879 N1.getOperand(0) == N1.getOperand(1) && 6880 N0.getOperand(0) == N1.getOperand(0)) { 6881 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6882 SDValue(CFP01, 0), 6883 DAG.getConstantFP(2.0, VT)); 6884 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6885 N0.getOperand(0), NewCFP); 6886 } 6887 } 6888 6889 if (N1.getOpcode() == ISD::FMUL) { 6890 ConstantFPSDNode CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 6891* ConstantFPSDNode CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); 6892* 6893 // (fadd x, (fmul x, c)) -> (fmul x, c+1) 6894 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 6895 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6896 SDValue(CFP11, 0), 6897 DAG.getConstantFP(1.0, VT)); 6898 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); 6899 } 6900 6901 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) 6902 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && 6903 N0.getOperand(0) == N0.getOperand(1) && 6904 N1.getOperand(0) == N0.getOperand(0)) { 6905 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6906 SDValue(CFP11, 0), 6907 DAG.getConstantFP(2.0, VT)); 6908 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); 6909 } 6910 } 6911 6912 if (N0.getOpcode() == ISD::FADD && AllowNewConst) { 6913 ConstantFPSDNode CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 6914* // (fadd (fadd x, x), x) -> (fmul x, 3.0) 6915 if (!CFP && N0.getOperand(0) == N0.getOperand(1) && 6916 (N0.getOperand(0) == N1)) 6917 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6918 N1, DAG.getConstantFP(3.0, VT)); 6919 } 6920 6921 if (N1.getOpcode() == ISD::FADD && AllowNewConst) { 6922 ConstantFPSDNode CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 6923* // (fadd x, (fadd x, x)) -> (fmul x, 3.0) 6924 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 6925 N1.getOperand(0) == N0) 6926 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6927 N0, DAG.getConstantFP(3.0, VT)); 6928 } 6929 6930 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) 6931 if (AllowNewConst && 6932 N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 6933 N0.getOperand(0) == N0.getOperand(1) && 6934 N1.getOperand(0) == N1.getOperand(1) && 6935 N0.getOperand(0) == N1.getOperand(0)) 6936 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6937 N0.getOperand(0), DAG.getConstantFP(4.0, VT)); 6938 } 6939 } // enable-unsafe-fp-math 6940 6941 // FADD -> FMA combines: 6942 if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) && 6943 TLI.isFMAFasterThanFMulAndFAdd(VT) && 6944 (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { 6945 6946 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 6947 if (N0.getOpcode() == ISD::FMUL && 6948 (N0->hasOneUse() \|\| TLI.enableAggressiveFMAFusion(VT))) 6949 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6950 N0.getOperand(0), N0.getOperand(1), N1); 6951 6952 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 6953 // Note: Commutes FADD operands. 6954 if (N1.getOpcode() == ISD::FMUL && 6955 (N1->hasOneUse() \|\| TLI.enableAggressiveFMAFusion(VT))) 6956 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6957 N1.getOperand(0), N1.getOperand(1), N0); 6958 6959 // When FP_EXTEND nodes are free on the target, and there is an opportunity 6960 // to combine into FMA, arrange such nodes accordingly. 6961 if (TLI.isFPExtFree(VT)) { 6962 6963 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 6964 if (N0.getOpcode() == ISD::FP_EXTEND) { 6965 SDValue N00 = N0.getOperand(0); 6966 if (N00.getOpcode() == ISD::FMUL) 6967 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6968 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6969 N00.getOperand(0)), 6970 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6971 N00.getOperand(1)), N1); 6972 } 6973 6974 // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) 6975 // Note: Commutes FADD operands. 6976 if (N1.getOpcode() == ISD::FP_EXTEND) { 6977 SDValue N10 = N1.getOperand(0); 6978 if (N10.getOpcode() == ISD::FMUL) 6979 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6980 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6981 N10.getOperand(0)), 6982 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6983 N10.getOperand(1)), N0); 6984 } 6985 } 6986 6987 // More folding opportunities when target permits. 6988 if (TLI.enableAggressiveFMAFusion(VT)) { 6989 6990 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) 6991 if (N0.getOpcode() == ISD::FMA && 6992 N0.getOperand(2).getOpcode() == ISD::FMUL) 6993 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6994 N0.getOperand(0), N0.getOperand(1), 6995 DAG.getNode(ISD::FMA, SDLoc(N), VT, 6996 N0.getOperand(2).getOperand(0), 6997 N0.getOperand(2).getOperand(1), 6998 N1)); 6999 7000 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) 7001 if (N1->getOpcode() == ISD::FMA && 7002 N1.getOperand(2).getOpcode() == ISD::FMUL) 7003 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7004 N1.getOperand(0), N1.getOperand(1), 7005 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7006 N1.getOperand(2).getOperand(0), 7007 N1.getOperand(2).getOperand(1), 7008 N0)); 7009 } 7010 } 7011 7012 return SDValue(); 7013} 7014 7015SDValue DAGCombiner::visitFSUB(SDNode N) { 7016* SDValue N0 = N->getOperand(0); 7017 SDValue N1 = N->getOperand(1); 7018 ConstantFPSDNode N0CFP = isConstOrConstSplatFP(N0); 7019* ConstantFPSDNode N1CFP = isConstOrConstSplatFP(N1); 7020* EVT VT = N->getValueType(0); 7021 SDLoc dl(N); 7022 const TargetOptions &Options = DAG.getTarget().Options; 7023 7024 // fold vector ops 7025 if (VT.isVector()) { 7026 SDValue FoldedVOp = SimplifyVBinOp(N); 7027 if (FoldedVOp.getNode()) return FoldedVOp; 7028 } 7029 7030 // fold (fsub c1, c2) -> c1-c2 7031 if (N0CFP && N1CFP) 7032 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); 7033 7034 // fold (fsub A, (fneg B)) -> (fadd A, B) 7035 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 7036 return DAG.getNode(ISD::FADD, dl, VT, N0, 7037 GetNegatedExpression(N1, DAG, LegalOperations)); 7038 7039 // If 'unsafe math' is enabled, fold lots of things. 7040 if (Options.UnsafeFPMath) { 7041 // (fsub A, 0) -> A 7042 if (N1CFP && N1CFP->getValueAPF().isZero()) 7043 return N0; 7044 7045 // (fsub 0, B) -> -B 7046 if (N0CFP && N0CFP->getValueAPF().isZero()) { 7047 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 7048 return GetNegatedExpression(N1, DAG, LegalOperations); 7049 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT)) 7050 return DAG.getNode(ISD::FNEG, dl, VT, N1); 7051 } 7052 7053 // (fsub x, x) -> 0.0 7054 if (N0 == N1) 7055 return DAG.getConstantFP(0.0f, VT); 7056 7057 // (fsub x, (fadd x, y)) -> (fneg y) 7058 // (fsub x, (fadd y, x)) -> (fneg y) 7059 if (N1.getOpcode() == ISD::FADD) { 7060 SDValue N10 = N1->getOperand(0); 7061 SDValue N11 = N1->getOperand(1); 7062 7063 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) 7064 return GetNegatedExpression(N11, DAG, LegalOperations); 7065 7066 if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) 7067 return GetNegatedExpression(N10, DAG, LegalOperations); 7068 } 7069 } 7070 7071 // FSUB -> FMA combines: 7072 if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) && 7073 TLI.isFMAFasterThanFMulAndFAdd(VT) && 7074 (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { 7075 7076 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 7077 if (N0.getOpcode() == ISD::FMUL && 7078 (N0->hasOneUse() \|\| TLI.enableAggressiveFMAFusion(VT))) 7079 return DAG.getNode(ISD::FMA, dl, VT, 7080 N0.getOperand(0), N0.getOperand(1), 7081 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7082 7083 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 7084 // Note: Commutes FSUB operands. 7085 if (N1.getOpcode() == ISD::FMUL && 7086 (N1->hasOneUse() \|\| TLI.enableAggressiveFMAFusion(VT))) 7087 return DAG.getNode(ISD::FMA, dl, VT, 7088 DAG.getNode(ISD::FNEG, dl, VT, 7089 N1.getOperand(0)), 7090 N1.getOperand(1), N0); 7091 7092 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 7093 if (N0.getOpcode() == ISD::FNEG && 7094 N0.getOperand(0).getOpcode() == ISD::FMUL && 7095 ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) \|\| 7096 TLI.enableAggressiveFMAFusion(VT))) { 7097 SDValue N00 = N0.getOperand(0).getOperand(0); 7098 SDValue N01 = N0.getOperand(0).getOperand(1); 7099 return DAG.getNode(ISD::FMA, dl, VT, 7100 DAG.getNode(ISD::FNEG, dl, VT, N00), N01, 7101 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7102 } 7103 7104 // When FP_EXTEND nodes are free on the target, and there is an opportunity 7105 // to combine into FMA, arrange such nodes accordingly. 7106 if (TLI.isFPExtFree(VT)) { 7107 7108 // fold (fsub (fpext (fmul x, y)), z) 7109 // -> (fma (fpext x), (fpext y), (fneg z)) 7110 if (N0.getOpcode() == ISD::FP_EXTEND) { 7111 SDValue N00 = N0.getOperand(0); 7112 if (N00.getOpcode() == ISD::FMUL) 7113 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7114 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7115 N00.getOperand(0)), 7116 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7117 N00.getOperand(1)), 7118 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); 7119 } 7120 7121 // fold (fsub x, (fpext (fmul y, z))) 7122 // -> (fma (fneg (fpext y)), (fpext z), x) 7123 // Note: Commutes FSUB operands. 7124 if (N1.getOpcode() == ISD::FP_EXTEND) { 7125 SDValue N10 = N1.getOperand(0); 7126 if (N10.getOpcode() == ISD::FMUL) 7127 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7128 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7129 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7130 VT, N10.getOperand(0))), 7131 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7132 N10.getOperand(1)), 7133 N0); 7134 } 7135 7136 // fold (fsub (fpext (fneg (fmul, x, y))), z) 7137 // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) 7138 if (N0.getOpcode() == ISD::FP_EXTEND) { 7139 SDValue N00 = N0.getOperand(0); 7140 if (N00.getOpcode() == ISD::FNEG) { 7141 SDValue N000 = N00.getOperand(0); 7142 if (N000.getOpcode() == ISD::FMUL) { 7143 return DAG.getNode(ISD::FMA, dl, VT, 7144 DAG.getNode(ISD::FNEG, dl, VT, 7145 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7146 VT, N000.getOperand(0))), 7147 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7148 N000.getOperand(1)), 7149 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7150 } 7151 } 7152 } 7153 7154 // fold (fsub (fneg (fpext (fmul, x, y))), z) 7155 // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) 7156 if (N0.getOpcode() == ISD::FNEG) { 7157 SDValue N00 = N0.getOperand(0); 7158 if (N00.getOpcode() == ISD::FP_EXTEND) { 7159 SDValue N000 = N00.getOperand(0); 7160 if (N000.getOpcode() == ISD::FMUL) { 7161 return DAG.getNode(ISD::FMA, dl, VT, 7162 DAG.getNode(ISD::FNEG, dl, VT, 7163 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7164 VT, N000.getOperand(0))), 7165 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7166 N000.getOperand(1)), 7167 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7168 } 7169 } 7170 } 7171 } 7172 7173 // More folding opportunities when target permits. 7174 if (TLI.enableAggressiveFMAFusion(VT)) { 7175 7176 // fold (fsub (fma x, y, (fmul u, v)), z) 7177 // -> (fma x, y (fma u, v, (fneg z))) 7178 if (N0.getOpcode() == ISD::FMA && 7179 N0.getOperand(2).getOpcode() == ISD::FMUL) 7180 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7181 N0.getOperand(0), N0.getOperand(1), 7182 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7183 N0.getOperand(2).getOperand(0), 7184 N0.getOperand(2).getOperand(1), 7185 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7186 N1))); 7187 7188 // fold (fsub x, (fma y, z, (fmul u, v))) 7189 // -> (fma (fneg y), z, (fma (fneg u), v, x)) 7190 if (N1.getOpcode() == ISD::FMA && 7191 N1.getOperand(2).getOpcode() == ISD::FMUL) { 7192 SDValue N20 = N1.getOperand(2).getOperand(0); 7193 SDValue N21 = N1.getOperand(2).getOperand(1); 7194 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7195 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7196 N1.getOperand(0)), 7197 N1.getOperand(1), 7198 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7199 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7200 N20), 7201 N21, N0)); 7202 } 7203 } 7204 } 7205 7206 return SDValue(); 7207} 7208 7209SDValue DAGCombiner::visitFMUL(SDNode N) { 7210* SDValue N0 = N->getOperand(0); 7211 SDValue N1 = N->getOperand(1); 7212 ConstantFPSDNode N0CFP = isConstOrConstSplatFP(N0); 7213* ConstantFPSDNode N1CFP = isConstOrConstSplatFP(N1); 7214* EVT VT = N->getValueType(0); 7215 const TargetOptions &Options = DAG.getTarget().Options; 7216 7217 // fold vector ops 7218 if (VT.isVector()) { 7219 // This just handles C1 * C2 for vectors. Other vector folds are below. 7220 SDValue FoldedVOp = SimplifyVBinOp(N); 7221 if (FoldedVOp.getNode()) 7222 return FoldedVOp; 7223 // Canonicalize vector constant to RHS. 7224 if (N0.getOpcode() == ISD::BUILD_VECTOR && 7225 N1.getOpcode() != ISD::BUILD_VECTOR) 7226 if (auto BV0 = dyn_cast<BuildVectorSDNode>(N0)) 7227* if (BV0->isConstant()) 7228 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 7229 } 7230 7231 // fold (fmul c1, c2) -> c1c2 7232* if (N0CFP && N1CFP) 7233 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); 7234 7235 // canonicalize constant to RHS 7236 if (N0CFP && !N1CFP) 7237 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); 7238 7239 // fold (fmul A, 1.0) -> A 7240 if (N1CFP && N1CFP->isExactlyValue(1.0)) 7241 return N0; 7242 7243 if (Options.UnsafeFPMath) { 7244 // fold (fmul A, 0) -> 0 7245 if (N1CFP && N1CFP->getValueAPF().isZero()) 7246 return N1; 7247 7248 // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 7249 if (N0.getOpcode() == ISD::FMUL) { 7250 // Fold scalars or any vector constants (not just splats). 7251 // This fold is done in general by InstCombine, but extra fmul insts 7252 // may have been generated during lowering. 7253 SDValue N01 = N0.getOperand(1); 7254 auto BV1 = dyn_cast<BuildVectorSDNode>(N1); 7255* auto BV01 = dyn_cast<BuildVectorSDNode>(N01); 7256* if ((N1CFP && isConstOrConstSplatFP(N01)) \|\| 7257 (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { 7258 SDLoc SL(N); 7259 SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); 7260 return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); 7261 } 7262 } 7263 7264 // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) 7265 // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs 7266 // during an early run of DAGCombiner can prevent folding with fmuls 7267 // inserted during lowering. 7268 if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { 7269 SDLoc SL(N); 7270 const SDValue Two = DAG.getConstantFP(2.0, VT); 7271 SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); 7272 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); 7273 } 7274 } 7275 7276 // fold (fmul X, 2.0) -> (fadd X, X) 7277 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 7278 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); 7279 7280 // fold (fmul X, -1.0) -> (fneg X) 7281 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 7282 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT)) 7283 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 7284 7285 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 7286 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 7287 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 7288 // Both can be negated for free, check to see if at least one is cheaper 7289 // negated. 7290 if (LHSNeg == 2 \|\| RHSNeg == 2) 7291 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 7292 GetNegatedExpression(N0, DAG, LegalOperations), 7293 GetNegatedExpression(N1, DAG, LegalOperations)); 7294 } 7295 } 7296 7297 return SDValue(); 7298} 7299 7300SDValue DAGCombiner::visitFMA(SDNode N) { 7301* SDValue N0 = N->getOperand(0); 7302 SDValue N1 = N->getOperand(1); 7303 SDValue N2 = N->getOperand(2); 7304 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7305* ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7306* EVT VT = N->getValueType(0); 7307 SDLoc dl(N); 7308 const TargetOptions &Options = DAG.getTarget().Options; 7309 7310 // Constant fold FMA. 7311 if (isa<ConstantFPSDNode>(N0) && 7312 isa<ConstantFPSDNode>(N1) && 7313 isa<ConstantFPSDNode>(N2)) { 7314 return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); 7315 } 7316 7317 if (Options.UnsafeFPMath) { 7318 if (N0CFP && N0CFP->isZero()) 7319 return N2; 7320 if (N1CFP && N1CFP->isZero()) 7321 return N2; 7322 } 7323 if (N0CFP && N0CFP->isExactlyValue(1.0)) 7324 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 7325 if (N1CFP && N1CFP->isExactlyValue(1.0)) 7326 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 7327 7328 // Canonicalize (fma c, x, y) -> (fma x, c, y) 7329 if (N0CFP && !N1CFP) 7330 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 7331 7332 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 7333 if (Options.UnsafeFPMath && N1CFP && 7334 N2.getOpcode() == ISD::FMUL && 7335 N0 == N2.getOperand(0) && 7336 N2.getOperand(1).getOpcode() == ISD::ConstantFP) { 7337 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7338 DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); 7339 } 7340 7341 7342 // (fma (fmul x, c1), c2, y) -> (fma x, c1c2, y) 7343* if (Options.UnsafeFPMath && 7344 N0.getOpcode() == ISD::FMUL && N1CFP && 7345 N0.getOperand(1).getOpcode() == ISD::ConstantFP) { 7346 return DAG.getNode(ISD::FMA, dl, VT, 7347 N0.getOperand(0), 7348 DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), 7349 N2); 7350 } 7351 7352 // (fma x, 1, y) -> (fadd x, y) 7353 // (fma x, -1, y) -> (fadd (fneg x), y) 7354 if (N1CFP) { 7355 if (N1CFP->isExactlyValue(1.0)) 7356 return DAG.getNode(ISD::FADD, dl, VT, N0, N2); 7357 7358 if (N1CFP->isExactlyValue(-1.0) && 7359 (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))) { 7360 SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); 7361 AddToWorklist(RHSNeg.getNode()); 7362 return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); 7363 } 7364 } 7365 7366 // (fma x, c, x) -> (fmul x, (c+1)) 7367 if (Options.UnsafeFPMath && N1CFP && N0 == N2) 7368 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7369 DAG.getNode(ISD::FADD, dl, VT, 7370 N1, DAG.getConstantFP(1.0, VT))); 7371 7372 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 7373 if (Options.UnsafeFPMath && N1CFP && 7374 N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) 7375 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7376 DAG.getNode(ISD::FADD, dl, VT, 7377 N1, DAG.getConstantFP(-1.0, VT))); 7378 7379 7380 return SDValue(); 7381} 7382 7383SDValue DAGCombiner::visitFDIV(SDNode N) { 7384* SDValue N0 = N->getOperand(0); 7385 SDValue N1 = N->getOperand(1); 7386 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7387* ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7388* EVT VT = N->getValueType(0); 7389 SDLoc DL(N); 7390 const TargetOptions &Options = DAG.getTarget().Options; 7391 7392 // fold vector ops 7393 if (VT.isVector()) { 7394 SDValue FoldedVOp = SimplifyVBinOp(N); 7395 if (FoldedVOp.getNode()) return FoldedVOp; 7396 } 7397 7398 // fold (fdiv c1, c2) -> c1/c2 7399 if (N0CFP && N1CFP) 7400 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); 7401 7402 if (Options.UnsafeFPMath) { 7403 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 7404 if (N1CFP) { 7405 // Compute the reciprocal 1.0 / c2. 7406 APFloat N1APF = N1CFP->getValueAPF(); 7407 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 7408 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 7409 // Only do the transform if the reciprocal is a legal fp immediate that 7410 // isn't too nasty (eg NaN, denormal, ...). 7411 if ((st == APFloat::opOK \|\| st == APFloat::opInexact) && // Not too nasty 7412 (!LegalOperations \|\| 7413 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 7414 // backend)... we should handle this gracefully after Legalize. 7415 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) \|\| 7416 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) \|\| 7417 TLI.isFPImmLegal(Recip, VT))) 7418 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, 7419 DAG.getConstantFP(Recip, VT)); 7420 } 7421 7422 // If this FDIV is part of a reciprocal square root, it may be folded 7423 // into a target-specific square root estimate instruction. 7424 if (N1.getOpcode() == ISD::FSQRT) { 7425 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { 7426 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7427 } 7428 } else if (N1.getOpcode() == ISD::FP_EXTEND && 7429 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7430 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { 7431 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); 7432 AddToWorklist(RV.getNode()); 7433 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7434 } 7435 } else if (N1.getOpcode() == ISD::FP_ROUND && 7436 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7437 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { 7438 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); 7439 AddToWorklist(RV.getNode()); 7440 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7441 } 7442 } else if (N1.getOpcode() == ISD::FMUL) { 7443 // Look through an FMUL. Even though this won't remove the FDIV directly, 7444 // it's still worthwhile to get rid of the FSQRT if possible. 7445 SDValue SqrtOp; 7446 SDValue OtherOp; 7447 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7448 SqrtOp = N1.getOperand(0); 7449 OtherOp = N1.getOperand(1); 7450 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { 7451 SqrtOp = N1.getOperand(1); 7452 OtherOp = N1.getOperand(0); 7453 } 7454 if (SqrtOp.getNode()) { 7455 // We found a FSQRT, so try to make this fold: 7456 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) 7457 if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { 7458 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); 7459 AddToWorklist(RV.getNode()); 7460 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7461 } 7462 } 7463 } 7464 7465 // Fold into a reciprocal estimate and multiply instead of a real divide. 7466 if (SDValue RV = BuildReciprocalEstimate(N1)) { 7467 AddToWorklist(RV.getNode()); 7468 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7469 } 7470 } 7471 7472 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 7473 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 7474 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 7475 // Both can be negated for free, check to see if at least one is cheaper 7476 // negated. 7477 if (LHSNeg == 2 \|\| RHSNeg == 2) 7478 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, 7479 GetNegatedExpression(N0, DAG, LegalOperations), 7480 GetNegatedExpression(N1, DAG, LegalOperations)); 7481 } 7482 } 7483 7484 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 7485 // reciprocal. 7486 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) 7487 // Notice that this is not always beneficial. One reason is different target 7488 // may have different costs for FDIV and FMUL, so sometimes the cost of two 7489 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason 7490 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". 7491 if (Options.UnsafeFPMath) { 7492 // Skip if current node is a reciprocal. 7493 if (N0CFP && N0CFP->isExactlyValue(1.0)) 7494 return SDValue(); 7495 7496 SmallVector<SDNode , 4> Users; 7497* // Find all FDIV users of the same divisor. 7498 for (SDNode::use_iterator UI = N1.getNode()->use_begin(), 7499 UE = N1.getNode()->use_end(); 7500 UI != UE; ++UI) { 7501 SDNode User = UI.getUse().getUser(); 7502* if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) 7503 Users.push_back(User); 7504 } 7505 7506 if (TLI.combineRepeatedFPDivisors(Users.size())) { 7507 SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 7508 SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); 7509 7510 // Dividend / Divisor -> Dividend * Reciprocal 7511 for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { 7512 if ((I)->getOperand(0) != FPOne) { 7513* SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(I), VT, 7514* (I)->getOperand(0), Reciprocal); 7515* DAG.ReplaceAllUsesWith(I, NewNode.getNode()); 7516* } 7517 } 7518 return SDValue(); 7519 } 7520 } 7521 7522 return SDValue(); 7523} 7524 7525SDValue DAGCombiner::visitFREM(SDNode N) { 7526* SDValue N0 = N->getOperand(0); 7527 SDValue N1 = N->getOperand(1); 7528 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7529* ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7530* EVT VT = N->getValueType(0); 7531 7532 // fold (frem c1, c2) -> fmod(c1,c2) 7533 if (N0CFP && N1CFP) 7534 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); 7535 7536 return SDValue(); 7537} 7538 7539SDValue DAGCombiner::visitFSQRT(SDNode N) { 7540* if (DAG.getTarget().Options.UnsafeFPMath && 7541 !TLI.isFsqrtCheap()) { 7542 // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) 7543 if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { 7544 EVT VT = RV.getValueType(); 7545 RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); 7546 AddToWorklist(RV.getNode()); 7547 7548 // Unfortunately, RV is now NaN if the input was exactly 0. 7549 // Select out this case and force the answer to 0. 7550 SDValue Zero = DAG.getConstantFP(0.0, VT); 7551 SDValue ZeroCmp = 7552 DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(DAG.getContext(), VT), 7553* N->getOperand(0), Zero, ISD::SETEQ); 7554 AddToWorklist(ZeroCmp.getNode()); 7555 AddToWorklist(RV.getNode()); 7556 7557 RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, 7558 SDLoc(N), VT, ZeroCmp, Zero, RV); 7559 return RV; 7560 } 7561 } 7562 return SDValue(); 7563} 7564 7565SDValue DAGCombiner::visitFCOPYSIGN(SDNode N) { 7566* SDValue N0 = N->getOperand(0); 7567 SDValue N1 = N->getOperand(1); 7568 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7569* ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7570* EVT VT = N->getValueType(0); 7571 7572 if (N0CFP && N1CFP) // Constant fold 7573 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); 7574 7575 if (N1CFP) { 7576 const APFloat& V = N1CFP->getValueAPF(); 7577 // copysign(x, c1) -> fabs(x) iff ispos(c1) 7578 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 7579 if (!V.isNegative()) { 7580 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FABS, VT)) 7581 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7582 } else { 7583 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT)) 7584 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7585 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); 7586 } 7587 } 7588 7589 // copysign(fabs(x), y) -> copysign(x, y) 7590 // copysign(fneg(x), y) -> copysign(x, y) 7591 // copysign(copysign(x,z), y) -> copysign(x, y) 7592 if (N0.getOpcode() == ISD::FABS \|\| N0.getOpcode() == ISD::FNEG \|\| 7593 N0.getOpcode() == ISD::FCOPYSIGN) 7594 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7595 N0.getOperand(0), N1); 7596 7597 // copysign(x, abs(y)) -> abs(x) 7598 if (N1.getOpcode() == ISD::FABS) 7599 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7600 7601 // copysign(x, copysign(y,z)) -> copysign(x, z) 7602 if (N1.getOpcode() == ISD::FCOPYSIGN) 7603 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7604 N0, N1.getOperand(1)); 7605 7606 // copysign(x, fp_extend(y)) -> copysign(x, y) 7607 // copysign(x, fp_round(y)) -> copysign(x, y) 7608 if (N1.getOpcode() == ISD::FP_EXTEND \|\| N1.getOpcode() == ISD::FP_ROUND) 7609 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7610 N0, N1.getOperand(0)); 7611 7612 return SDValue(); 7613} 7614 7615SDValue DAGCombiner::visitSINT_TO_FP(SDNode N) { 7616* SDValue N0 = N->getOperand(0); 7617 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 7618* EVT VT = N->getValueType(0); 7619 EVT OpVT = N0.getValueType(); 7620 7621 // fold (sint_to_fp c1) -> c1fp 7622 if (N0C && 7623 // ...but only if the target supports immediate floating-point values 7624 (!LegalOperations \|\| 7625 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 7626 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 7627 7628 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 7629 // but UINT_TO_FP is legal on this target, try to convert. 7630 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 7631 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 7632 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 7633 if (DAG.SignBitIsZero(N0)) 7634 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 7635 } 7636 7637 // The next optimizations are desirable only if SELECT_CC can be lowered. 7638 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) \|\| !LegalOperations) { 7639 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 7640 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 7641 !VT.isVector() && 7642 (!LegalOperations \|\| 7643 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7644 SDValue Ops[] = 7645 { N0.getOperand(0), N0.getOperand(1), 7646 DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), 7647 N0.getOperand(2) }; 7648 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7649 } 7650 7651 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 7652 // (select_cc x, y, 1.0, 0.0,, cc) 7653 if (N0.getOpcode() == ISD::ZERO_EXTEND && 7654 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 7655 (!LegalOperations \|\| 7656 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7657 SDValue Ops[] = 7658 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 7659 DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), 7660 N0.getOperand(0).getOperand(2) }; 7661 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7662 } 7663 } 7664 7665 return SDValue(); 7666} 7667 7668SDValue DAGCombiner::visitUINT_TO_FP(SDNode N) { 7669* SDValue N0 = N->getOperand(0); 7670 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 7671* EVT VT = N->getValueType(0); 7672 EVT OpVT = N0.getValueType(); 7673 7674 // fold (uint_to_fp c1) -> c1fp 7675 if (N0C && 7676 // ...but only if the target supports immediate floating-point values 7677 (!LegalOperations \|\| 7678 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 7679 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 7680 7681 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 7682 // but SINT_TO_FP is legal on this target, try to convert. 7683 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 7684 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 7685 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 7686 if (DAG.SignBitIsZero(N0)) 7687 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 7688 } 7689 7690 // The next optimizations are desirable only if SELECT_CC can be lowered. 7691 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) \|\| !LegalOperations) { 7692 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 7693 7694 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 7695 (!LegalOperations \|\| 7696 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7697 SDValue Ops[] = 7698 { N0.getOperand(0), N0.getOperand(1), 7699 DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), 7700 N0.getOperand(2) }; 7701 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7702 } 7703 } 7704 7705 return SDValue(); 7706} 7707 7708SDValue DAGCombiner::visitFP_TO_SINT(SDNode N) { 7709* SDValue N0 = N->getOperand(0); 7710 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7711* EVT VT = N->getValueType(0); 7712 7713 // fold (fp_to_sint c1fp) -> c1 7714 if (N0CFP) 7715 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); 7716 7717 return SDValue(); 7718} 7719 7720SDValue DAGCombiner::visitFP_TO_UINT(SDNode N) { 7721* SDValue N0 = N->getOperand(0); 7722 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7723* EVT VT = N->getValueType(0); 7724 7725 // fold (fp_to_uint c1fp) -> c1 7726 if (N0CFP) 7727 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); 7728 7729 return SDValue(); 7730} 7731 7732SDValue DAGCombiner::visitFP_ROUND(SDNode N) { 7733* SDValue N0 = N->getOperand(0); 7734 SDValue N1 = N->getOperand(1); 7735 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7736* EVT VT = N->getValueType(0); 7737 7738 // fold (fp_round c1fp) -> c1fp 7739 if (N0CFP) 7740 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); 7741 7742 // fold (fp_round (fp_extend x)) -> x 7743 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 7744 return N0.getOperand(0); 7745 7746 // fold (fp_round (fp_round x)) -> (fp_round x) 7747 if (N0.getOpcode() == ISD::FP_ROUND) { 7748 // This is a value preserving truncation if both round's are. 7749 bool IsTrunc = N->getConstantOperandVal(1) == 1 && 7750 N0.getNode()->getConstantOperandVal(1) == 1; 7751 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), 7752 DAG.getIntPtrConstant(IsTrunc)); 7753 } 7754 7755 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 7756 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 7757 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, 7758 N0.getOperand(0), N1); 7759 AddToWorklist(Tmp.getNode()); 7760 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7761 Tmp, N0.getOperand(1)); 7762 } 7763 7764 return SDValue(); 7765} 7766 7767SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode N) { 7768* SDValue N0 = N->getOperand(0); 7769 EVT VT = N->getValueType(0); 7770 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 7771 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7772* 7773 // fold (fp_round_inreg c1fp) -> c1fp 7774 if (N0CFP && isTypeLegal(EVT)) { 7775 SDValue Round = DAG.getConstantFP(N0CFP->getConstantFPValue(), EVT); 7776* return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); 7777 } 7778 7779 return SDValue(); 7780} 7781 7782SDValue DAGCombiner::visitFP_EXTEND(SDNode N) { 7783* SDValue N0 = N->getOperand(0); 7784 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7785* EVT VT = N->getValueType(0); 7786 7787 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 7788 if (N->hasOneUse() && 7789 N->use_begin()->getOpcode() == ISD::FP_ROUND) 7790 return SDValue(); 7791 7792 // fold (fp_extend c1fp) -> c1fp 7793 if (N0CFP) 7794 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); 7795 7796 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 7797 // value of X. 7798 if (N0.getOpcode() == ISD::FP_ROUND 7799 && N0.getNode()->getConstantOperandVal(1) == 1) { 7800 SDValue In = N0.getOperand(0); 7801 if (In.getValueType() == VT) return In; 7802 if (VT.bitsLT(In.getValueType())) 7803 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, 7804 In, N0.getOperand(1)); 7805 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); 7806 } 7807 7808 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 7809 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 7810 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 7811 LoadSDNode LN0 = cast<LoadSDNode>(N0); 7812* SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 7813 LN0->getChain(), 7814 LN0->getBasePtr(), N0.getValueType(), 7815 LN0->getMemOperand()); 7816 CombineTo(N, ExtLoad); 7817 CombineTo(N0.getNode(), 7818 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), 7819 N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), 7820 ExtLoad.getValue(1)); 7821 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7822 } 7823 7824 return SDValue(); 7825} 7826 7827SDValue DAGCombiner::visitFCEIL(SDNode N) { 7828* SDValue N0 = N->getOperand(0); 7829 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7830* EVT VT = N->getValueType(0); 7831 7832 // fold (fceil c1) -> fceil(c1) 7833 if (N0CFP) 7834 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); 7835 7836 return SDValue(); 7837} 7838 7839SDValue DAGCombiner::visitFTRUNC(SDNode N) { 7840* SDValue N0 = N->getOperand(0); 7841 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7842* EVT VT = N->getValueType(0); 7843 7844 // fold (ftrunc c1) -> ftrunc(c1) 7845 if (N0CFP) 7846 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); 7847 7848 return SDValue(); 7849} 7850 7851SDValue DAGCombiner::visitFFLOOR(SDNode N) { 7852* SDValue N0 = N->getOperand(0); 7853 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7854* EVT VT = N->getValueType(0); 7855 7856 // fold (ffloor c1) -> ffloor(c1) 7857 if (N0CFP) 7858 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); 7859 7860 return SDValue(); 7861} 7862 7863// FIXME: FNEG and FABS have a lot in common; refactor. 7864SDValue DAGCombiner::visitFNEG(SDNode N) { 7865* SDValue N0 = N->getOperand(0); 7866 EVT VT = N->getValueType(0); 7867 7868 if (VT.isVector()) { 7869 SDValue FoldedVOp = SimplifyVUnaryOp(N); 7870 if (FoldedVOp.getNode()) return FoldedVOp; 7871 } 7872 7873 // Constant fold FNEG. 7874 if (isa<ConstantFPSDNode>(N0)) 7875 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); 7876 7877 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 7878 &DAG.getTarget().Options)) 7879 return GetNegatedExpression(N0, DAG, LegalOperations); 7880 7881 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading 7882 // constant pool values. 7883 if (!TLI.isFNegFree(VT) && 7884 N0.getOpcode() == ISD::BITCAST && 7885 N0.getNode()->hasOneUse()) { 7886 SDValue Int = N0.getOperand(0); 7887 EVT IntVT = Int.getValueType(); 7888 if (IntVT.isInteger() && !IntVT.isVector()) { 7889 APInt SignMask; 7890 if (N0.getValueType().isVector()) { 7891 // For a vector, get a mask such as 0x80... per scalar element 7892 // and splat it. 7893 SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 7894 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 7895 } else { 7896 // For a scalar, just generate 0x80... 7897 SignMask = APInt::getSignBit(IntVT.getSizeInBits()); 7898 } 7899 Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, 7900 DAG.getConstant(SignMask, IntVT)); 7901 AddToWorklist(Int.getNode()); 7902 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); 7903 } 7904 } 7905 7906 // (fneg (fmul c, x)) -> (fmul -c, x) 7907 if (N0.getOpcode() == ISD::FMUL) { 7908 ConstantFPSDNode CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 7909* if (CFP1) { 7910 APFloat CVal = CFP1->getValueAPF(); 7911 CVal.changeSign(); 7912 if (Level >= AfterLegalizeDAG && 7913 (TLI.isFPImmLegal(CVal, N->getValueType(0)) \|\| 7914 TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) 7915 return DAG.getNode( 7916 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 7917 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); 7918 } 7919 } 7920 7921 return SDValue(); 7922} 7923 7924SDValue DAGCombiner::visitFMINNUM(SDNode N) { 7925* SDValue N0 = N->getOperand(0); 7926 SDValue N1 = N->getOperand(1); 7927 const ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7928* const ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7929* 7930 if (N0CFP && N1CFP) { 7931 const APFloat &C0 = N0CFP->getValueAPF(); 7932 const APFloat &C1 = N1CFP->getValueAPF(); 7933 return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); 7934 } 7935 7936 if (N0CFP) { 7937 EVT VT = N->getValueType(0); 7938 // Canonicalize to constant on RHS. 7939 return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); 7940 } 7941 7942 return SDValue(); 7943} 7944 7945SDValue DAGCombiner::visitFMAXNUM(SDNode N) { 7946* SDValue N0 = N->getOperand(0); 7947 SDValue N1 = N->getOperand(1); 7948 const ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7949* const ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7950* 7951 if (N0CFP && N1CFP) { 7952 const APFloat &C0 = N0CFP->getValueAPF(); 7953 const APFloat &C1 = N1CFP->getValueAPF(); 7954 return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); 7955 } 7956 7957 if (N0CFP) { 7958 EVT VT = N->getValueType(0); 7959 // Canonicalize to constant on RHS. 7960 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); 7961 } 7962 7963 return SDValue(); 7964} 7965 7966SDValue DAGCombiner::visitFABS(SDNode N) { 7967* SDValue N0 = N->getOperand(0); 7968 EVT VT = N->getValueType(0); 7969 7970 if (VT.isVector()) { 7971 SDValue FoldedVOp = SimplifyVUnaryOp(N); 7972 if (FoldedVOp.getNode()) return FoldedVOp; 7973 } 7974 7975 // fold (fabs c1) -> fabs(c1) 7976 if (isa<ConstantFPSDNode>(N0)) 7977 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7978 7979 // fold (fabs (fabs x)) -> (fabs x) 7980 if (N0.getOpcode() == ISD::FABS) 7981 return N->getOperand(0); 7982 7983 // fold (fabs (fneg x)) -> (fabs x) 7984 // fold (fabs (fcopysign x, y)) -> (fabs x) 7985 if (N0.getOpcode() == ISD::FNEG \|\| N0.getOpcode() == ISD::FCOPYSIGN) 7986 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); 7987 7988 // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading 7989 // constant pool values. 7990 if (!TLI.isFAbsFree(VT) && 7991 N0.getOpcode() == ISD::BITCAST && 7992 N0.getNode()->hasOneUse()) { 7993 SDValue Int = N0.getOperand(0); 7994 EVT IntVT = Int.getValueType(); 7995 if (IntVT.isInteger() && !IntVT.isVector()) { 7996 APInt SignMask; 7997 if (N0.getValueType().isVector()) { 7998 // For a vector, get a mask such as 0x7f... per scalar element 7999 // and splat it. 8000 SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 8001 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 8002 } else { 8003 // For a scalar, just generate 0x7f... 8004 SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); 8005 } 8006 Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, 8007 DAG.getConstant(SignMask, IntVT)); 8008 AddToWorklist(Int.getNode()); 8009 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); 8010 } 8011 } 8012 8013 return SDValue(); 8014} 8015 8016SDValue DAGCombiner::visitBRCOND(SDNode N) { 8017* SDValue Chain = N->getOperand(0); 8018 SDValue N1 = N->getOperand(1); 8019 SDValue N2 = N->getOperand(2); 8020 8021 // If N is a constant we could fold this into a fallthrough or unconditional 8022 // branch. However that doesn't happen very often in normal code, because 8023 // Instcombine/SimplifyCFG should have handled the available opportunities. 8024 // If we did this folding here, it would be necessary to update the 8025 // MachineBasicBlock CFG, which is awkward. 8026 8027 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 8028 // on the target. 8029 if (N1.getOpcode() == ISD::SETCC && 8030 TLI.isOperationLegalOrCustom(ISD::BR_CC, 8031 N1.getOperand(0).getValueType())) { 8032 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 8033 Chain, N1.getOperand(2), 8034 N1.getOperand(0), N1.getOperand(1), N2); 8035 } 8036 8037 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) \|\| 8038 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 8039 (N1.getOperand(0).hasOneUse() && 8040 N1.getOperand(0).getOpcode() == ISD::SRL))) { 8041 SDNode Trunc = nullptr; 8042* if (N1.getOpcode() == ISD::TRUNCATE) { 8043 // Look pass the truncate. 8044 Trunc = N1.getNode(); 8045 N1 = N1.getOperand(0); 8046 } 8047 8048 // Match this pattern so that we can generate simpler code: 8049 // 8050 // %a = ... 8051 // %b = and i32 %a, 2 8052 // %c = srl i32 %b, 1 8053 // brcond i32 %c ... 8054 // 8055 // into 8056 // 8057 // %a = ... 8058 // %b = and i32 %a, 2 8059 // %c = setcc eq %b, 0 8060 // brcond %c ... 8061 // 8062 // This applies only when the AND constant value has one bit set and the 8063 // SRL constant is equal to the log2 of the AND constant. The back-end is 8064 // smart enough to convert the result into a TEST/JMP sequence. 8065 SDValue Op0 = N1.getOperand(0); 8066 SDValue Op1 = N1.getOperand(1); 8067 8068 if (Op0.getOpcode() == ISD::AND && 8069 Op1.getOpcode() == ISD::Constant) { 8070 SDValue AndOp1 = Op0.getOperand(1); 8071 8072 if (AndOp1.getOpcode() == ISD::Constant) { 8073 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 8074 8075 if (AndConst.isPowerOf2() && 8076 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 8077 SDValue SetCC = 8078 DAG.getSetCC(SDLoc(N), 8079 getSetCCResultType(Op0.getValueType()), 8080 Op0, DAG.getConstant(0, Op0.getValueType()), 8081 ISD::SETNE); 8082 8083 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), 8084 MVT::Other, Chain, SetCC, N2); 8085 // Don't add the new BRCond into the worklist or else SimplifySelectCC 8086 // will convert it back to (X & C1) >> C2. 8087 CombineTo(N, NewBRCond, false); 8088 // Truncate is dead. 8089 if (Trunc) 8090 deleteAndRecombine(Trunc); 8091 // Replace the uses of SRL with SETCC 8092 WorklistRemover DeadNodes(this); 8093* DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 8094 deleteAndRecombine(N1.getNode()); 8095 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8096 } 8097 } 8098 } 8099 8100 if (Trunc) 8101 // Restore N1 if the above transformation doesn't match. 8102 N1 = N->getOperand(1); 8103 } 8104 8105 // Transform br(xor(x, y)) -> br(x != y) 8106 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 8107 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 8108 SDNode TheXor = N1.getNode(); 8109* SDValue Op0 = TheXor->getOperand(0); 8110 SDValue Op1 = TheXor->getOperand(1); 8111 if (Op0.getOpcode() == Op1.getOpcode()) { 8112 // Avoid missing important xor optimizations. 8113 SDValue Tmp = visitXOR(TheXor); 8114 if (Tmp.getNode()) { 8115 if (Tmp.getNode() != TheXor) { 8116 DEBUG(dbgs() << "\nReplacing.8 "; 8117 TheXor->dump(&DAG); 8118 dbgs() << "\nWith: "; 8119 Tmp.getNode()->dump(&DAG); 8120 dbgs() << '\n'); 8121 WorklistRemover DeadNodes(this); 8122* DAG.ReplaceAllUsesOfValueWith(N1, Tmp); 8123 deleteAndRecombine(TheXor); 8124 return DAG.getNode(ISD::BRCOND, SDLoc(N), 8125 MVT::Other, Chain, Tmp, N2); 8126 } 8127 8128 // visitXOR has changed XOR's operands or replaced the XOR completely, 8129 // bail out. 8130 return SDValue(N, 0); 8131 } 8132 } 8133 8134 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 8135 bool Equal = false; 8136 if (ConstantSDNode RHSCI = dyn_cast<ConstantSDNode>(Op0)) 8137* if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && 8138 Op0.getOpcode() == ISD::XOR) { 8139 TheXor = Op0.getNode(); 8140 Equal = true; 8141 } 8142 8143 EVT SetCCVT = N1.getValueType(); 8144 if (LegalTypes) 8145 SetCCVT = getSetCCResultType(SetCCVT); 8146 SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), 8147 SetCCVT, 8148 Op0, Op1, 8149 Equal ? ISD::SETEQ : ISD::SETNE); 8150 // Replace the uses of XOR with SETCC 8151 WorklistRemover DeadNodes(this); 8152* DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 8153 deleteAndRecombine(N1.getNode()); 8154 return DAG.getNode(ISD::BRCOND, SDLoc(N), 8155 MVT::Other, Chain, SetCC, N2); 8156 } 8157 } 8158 8159 return SDValue(); 8160} 8161 8162// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 8163// 8164SDValue DAGCombiner::visitBR_CC(SDNode N) { 8165* CondCodeSDNode CC = cast<CondCodeSDNode>(N->getOperand(1)); 8166* SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 8167 8168 // If N is a constant we could fold this into a fallthrough or unconditional 8169 // branch. However that doesn't happen very often in normal code, because 8170 // Instcombine/SimplifyCFG should have handled the available opportunities. 8171 // If we did this folding here, it would be necessary to update the 8172 // MachineBasicBlock CFG, which is awkward. 8173 8174 // Use SimplifySetCC to simplify SETCC's. 8175 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), 8176 CondLHS, CondRHS, CC->get(), SDLoc(N), 8177 false); 8178 if (Simp.getNode()) AddToWorklist(Simp.getNode()); 8179 8180 // fold to a simpler setcc 8181 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 8182 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 8183 N->getOperand(0), Simp.getOperand(2), 8184 Simp.getOperand(0), Simp.getOperand(1), 8185 N->getOperand(4)); 8186 8187 return SDValue(); 8188} 8189 8190/// Return true if 'Use' is a load or a store that uses N as its base pointer 8191/// and that N may be folded in the load / store addressing mode. 8192static bool canFoldInAddressingMode(SDNode N, SDNode Use, 8193 SelectionDAG &DAG, 8194 const TargetLowering &TLI) { 8195 EVT VT; 8196 if (LoadSDNode LD = dyn_cast<LoadSDNode>(Use)) { 8197* if (LD->isIndexed() \|\| LD->getBasePtr().getNode() != N) 8198 return false; 8199 VT = Use->getValueType(0); 8200 } else if (StoreSDNode ST = dyn_cast<StoreSDNode>(Use)) { 8201* if (ST->isIndexed() \|\| ST->getBasePtr().getNode() != N) 8202 return false; 8203 VT = ST->getValue().getValueType(); 8204 } else 8205 return false; 8206 8207 TargetLowering::AddrMode AM; 8208 if (N->getOpcode() == ISD::ADD) { 8209 ConstantSDNode Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8210* if (Offset) 8211 // [reg +/- imm] 8212 AM.BaseOffs = Offset->getSExtValue(); 8213 else 8214 // [reg +/- reg] 8215 AM.Scale = 1; 8216 } else if (N->getOpcode() == ISD::SUB) { 8217 ConstantSDNode Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8218* if (Offset) 8219 // [reg +/- imm] 8220 AM.BaseOffs = -Offset->getSExtValue(); 8221 else 8222 // [reg +/- reg] 8223 AM.Scale = 1; 8224 } else 8225 return false; 8226 8227 return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(DAG.getContext())); 8228} 8229* 8230/// Try turning a load/store into a pre-indexed load/store when the base 8231/// pointer is an add or subtract and it has other uses besides the load/store. 8232/// After the transformation, the new indexed load/store has effectively folded 8233/// the add/subtract in and all of its other uses are redirected to the 8234/// new load/store. 8235bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode N) { 8236* if (Level < AfterLegalizeDAG) 8237 return false; 8238 8239 bool isLoad = true; 8240 SDValue Ptr; 8241 EVT VT; 8242 if (LoadSDNode LD = dyn_cast<LoadSDNode>(N)) { 8243* if (LD->isIndexed()) 8244 return false; 8245 VT = LD->getMemoryVT(); 8246 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 8247 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 8248 return false; 8249 Ptr = LD->getBasePtr(); 8250 } else if (StoreSDNode ST = dyn_cast<StoreSDNode>(N)) { 8251* if (ST->isIndexed()) 8252 return false; 8253 VT = ST->getMemoryVT(); 8254 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 8255 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 8256 return false; 8257 Ptr = ST->getBasePtr(); 8258 isLoad = false; 8259 } else { 8260 return false; 8261 } 8262 8263 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 8264 // out. There is no reason to make this a preinc/predec. 8265 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) \|\| 8266 Ptr.getNode()->hasOneUse()) 8267 return false; 8268 8269 // Ask the target to do addressing mode selection. 8270 SDValue BasePtr; 8271 SDValue Offset; 8272 ISD::MemIndexedMode AM = ISD::UNINDEXED; 8273 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 8274 return false; 8275 8276 // Backends without true r+i pre-indexed forms may need to pass a 8277 // constant base with a variable offset so that constant coercion 8278 // will work with the patterns in canonical form. 8279 bool Swapped = false; 8280 if (isa<ConstantSDNode>(BasePtr)) { 8281 std::swap(BasePtr, Offset); 8282 Swapped = true; 8283 } 8284 8285 // Don't create a indexed load / store with zero offset. 8286 if (isa<ConstantSDNode>(Offset) && 8287 cast<ConstantSDNode>(Offset)->isNullValue()) 8288 return false; 8289 8290 // Try turning it into a pre-indexed load / store except when: 8291 // 1) The new base ptr is a frame index. 8292 // 2) If N is a store and the new base ptr is either the same as or is a 8293 // predecessor of the value being stored. 8294 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 8295 // that would create a cycle. 8296 // 4) All uses are load / store ops that use it as old base ptr. 8297 8298 // Check #1. Preinc'ing a frame index would require copying the stack pointer 8299 // (plus the implicit offset) to a register to preinc anyway. 8300 if (isa<FrameIndexSDNode>(BasePtr) \|\| isa<RegisterSDNode>(BasePtr)) 8301 return false; 8302 8303 // Check #2. 8304 if (!isLoad) { 8305 SDValue Val = cast<StoreSDNode>(N)->getValue(); 8306 if (Val == BasePtr \|\| BasePtr.getNode()->isPredecessorOf(Val.getNode())) 8307 return false; 8308 } 8309 8310 // If the offset is a constant, there may be other adds of constants that 8311 // can be folded with this one. We should do this to avoid having to keep 8312 // a copy of the original base pointer. 8313 SmallVector<SDNode , 16> OtherUses; 8314* if (isa<ConstantSDNode>(Offset)) 8315 for (SDNode Use : BasePtr.getNode()->uses()) { 8316* if (Use == Ptr.getNode()) 8317 continue; 8318 8319 if (Use->isPredecessorOf(N)) 8320 continue; 8321 8322 if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { 8323 OtherUses.clear(); 8324 break; 8325 } 8326 8327 SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); 8328 if (Op1.getNode() == BasePtr.getNode()) 8329 std::swap(Op0, Op1); 8330 assert(Op0.getNode() == BasePtr.getNode() && 8331 "Use of ADD/SUB but not an operand"); 8332 8333 if (!isa<ConstantSDNode>(Op1)) { 8334 OtherUses.clear(); 8335 break; 8336 } 8337 8338 // FIXME: In some cases, we can be smarter about this. 8339 if (Op1.getValueType() != Offset.getValueType()) { 8340 OtherUses.clear(); 8341 break; 8342 } 8343 8344 OtherUses.push_back(Use); 8345 } 8346 8347 if (Swapped) 8348 std::swap(BasePtr, Offset); 8349 8350 // Now check for #3 and #4. 8351 bool RealUse = false; 8352 8353 // Caches for hasPredecessorHelper 8354 SmallPtrSet<const SDNode , 32> Visited; 8355* SmallVector<const SDNode , 16> Worklist; 8356* 8357 for (SDNode Use : Ptr.getNode()->uses()) { 8358* if (Use == N) 8359 continue; 8360 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 8361 return false; 8362 8363 // If Ptr may be folded in addressing mode of other use, then it's 8364 // not profitable to do this transformation. 8365 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 8366 RealUse = true; 8367 } 8368 8369 if (!RealUse) 8370 return false; 8371 8372 SDValue Result; 8373 if (isLoad) 8374 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 8375 BasePtr, Offset, AM); 8376 else 8377 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 8378 BasePtr, Offset, AM); 8379 ++PreIndexedNodes; 8380 ++NodesCombined; 8381 DEBUG(dbgs() << "\nReplacing.4 "; 8382 N->dump(&DAG); 8383 dbgs() << "\nWith: "; 8384 Result.getNode()->dump(&DAG); 8385 dbgs() << '\n'); 8386 WorklistRemover DeadNodes(this); 8387* if (isLoad) { 8388 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 8389 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 8390 } else { 8391 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 8392 } 8393 8394 // Finally, since the node is now dead, remove it from the graph. 8395 deleteAndRecombine(N); 8396 8397 if (Swapped) 8398 std::swap(BasePtr, Offset); 8399 8400 // Replace other uses of BasePtr that can be updated to use Ptr 8401 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { 8402 unsigned OffsetIdx = 1; 8403 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) 8404 OffsetIdx = 0; 8405 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == 8406 BasePtr.getNode() && "Expected BasePtr operand"); 8407 8408 // We need to replace ptr0 in the following expression: 8409 // x0 * offset0 + y0 * ptr0 = t0 8410 // knowing that 8411 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) 8412 // 8413 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the 8414 // indexed load/store and the expresion that needs to be re-written. 8415 // 8416 // Therefore, we have: 8417 // t0 = (x0 * offset0 - x1 * y0 * y1 offset1) + (y0 y1) * t1 8418 8419 ConstantSDNode CN = 8420* cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); 8421 int X0, X1, Y0, Y1; 8422 APInt Offset0 = CN->getAPIntValue(); 8423 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); 8424 8425 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; 8426 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; 8427 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; 8428 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; 8429 8430 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; 8431 8432 APInt CNV = Offset0; 8433 if (X0 < 0) CNV = -CNV; 8434 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; 8435 else CNV = CNV - Offset1; 8436 8437 // We can now generate the new expression. 8438 SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); 8439 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); 8440 8441 SDValue NewUse = DAG.getNode(Opcode, 8442 SDLoc(OtherUses[i]), 8443 OtherUses[i]->getValueType(0), NewOp1, NewOp2); 8444 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); 8445 deleteAndRecombine(OtherUses[i]); 8446 } 8447 8448 // Replace the uses of Ptr with uses of the updated base value. 8449 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 8450 deleteAndRecombine(Ptr.getNode()); 8451 8452 return true; 8453} 8454 8455/// Try to combine a load/store with a add/sub of the base pointer node into a 8456/// post-indexed load/store. The transformation folded the add/subtract into the 8457/// new indexed load/store effectively and all of its uses are redirected to the 8458/// new load/store. 8459bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode N) { 8460* if (Level < AfterLegalizeDAG) 8461 return false; 8462 8463 bool isLoad = true; 8464 SDValue Ptr; 8465 EVT VT; 8466 if (LoadSDNode LD = dyn_cast<LoadSDNode>(N)) { 8467* if (LD->isIndexed()) 8468 return false; 8469 VT = LD->getMemoryVT(); 8470 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 8471 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 8472 return false; 8473 Ptr = LD->getBasePtr(); 8474 } else if (StoreSDNode ST = dyn_cast<StoreSDNode>(N)) { 8475* if (ST->isIndexed()) 8476 return false; 8477 VT = ST->getMemoryVT(); 8478 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 8479 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 8480 return false; 8481 Ptr = ST->getBasePtr(); 8482 isLoad = false; 8483 } else { 8484 return false; 8485 } 8486 8487 if (Ptr.getNode()->hasOneUse()) 8488 return false; 8489 8490 for (SDNode Op : Ptr.getNode()->uses()) { 8491* if (Op == N \|\| 8492 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 8493 continue; 8494 8495 SDValue BasePtr; 8496 SDValue Offset; 8497 ISD::MemIndexedMode AM = ISD::UNINDEXED; 8498 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 8499 // Don't create a indexed load / store with zero offset. 8500 if (isa<ConstantSDNode>(Offset) && 8501 cast<ConstantSDNode>(Offset)->isNullValue()) 8502 continue; 8503 8504 // Try turning it into a post-indexed load / store except when 8505 // 1) All uses are load / store ops that use it as base ptr (and 8506 // it may be folded as addressing mmode). 8507 // 2) Op must be independent of N, i.e. Op is neither a predecessor 8508 // nor a successor of N. Otherwise, if Op is folded that would 8509 // create a cycle. 8510 8511 if (isa<FrameIndexSDNode>(BasePtr) \|\| isa<RegisterSDNode>(BasePtr)) 8512 continue; 8513 8514 // Check for #1. 8515 bool TryNext = false; 8516 for (SDNode Use : BasePtr.getNode()->uses()) { 8517* if (Use == Ptr.getNode()) 8518 continue; 8519 8520 // If all the uses are load / store addresses, then don't do the 8521 // transformation. 8522 if (Use->getOpcode() == ISD::ADD \|\| Use->getOpcode() == ISD::SUB){ 8523 bool RealUse = false; 8524 for (SDNode UseUse : Use->uses()) { 8525* if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 8526 RealUse = true; 8527 } 8528 8529 if (!RealUse) { 8530 TryNext = true; 8531 break; 8532 } 8533 } 8534 } 8535 8536 if (TryNext) 8537 continue; 8538 8539 // Check for #2 8540 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 8541 SDValue Result = isLoad 8542 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 8543 BasePtr, Offset, AM) 8544 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 8545 BasePtr, Offset, AM); 8546 ++PostIndexedNodes; 8547 ++NodesCombined; 8548 DEBUG(dbgs() << "\nReplacing.5 "; 8549 N->dump(&DAG); 8550 dbgs() << "\nWith: "; 8551 Result.getNode()->dump(&DAG); 8552 dbgs() << '\n'); 8553 WorklistRemover DeadNodes(this); 8554* if (isLoad) { 8555 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 8556 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 8557 } else { 8558 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 8559 } 8560 8561 // Finally, since the node is now dead, remove it from the graph. 8562 deleteAndRecombine(N); 8563 8564 // Replace the uses of Use with uses of the updated base value. 8565 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 8566 Result.getValue(isLoad ? 1 : 0)); 8567 deleteAndRecombine(Op); 8568 return true; 8569 } 8570 } 8571 } 8572 8573 return false; 8574} 8575 8576/// \brief Return the base-pointer arithmetic from an indexed \p LD. 8577SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode LD) { 8578* ISD::MemIndexedMode AM = LD->getAddressingMode(); 8579 assert(AM != ISD::UNINDEXED); 8580 SDValue BP = LD->getOperand(1); 8581 SDValue Inc = LD->getOperand(2); 8582 8583 // Some backends use TargetConstants for load offsets, but don't expect 8584 // TargetConstants in general ADD nodes. We can convert these constants into 8585 // regular Constants (if the constant is not opaque). 8586 assert((Inc.getOpcode() != ISD::TargetConstant \|\| 8587 !cast<ConstantSDNode>(Inc)->isOpaque()) && 8588 "Cannot split out indexing using opaque target constants"); 8589 if (Inc.getOpcode() == ISD::TargetConstant) { 8590 ConstantSDNode ConstInc = cast<ConstantSDNode>(Inc); 8591* Inc = DAG.getConstant(ConstInc->getConstantIntValue(), 8592* ConstInc->getValueType(0)); 8593 } 8594 8595 unsigned Opc = 8596 (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); 8597 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); 8598} 8599 8600SDValue DAGCombiner::visitLOAD(SDNode N) { 8601* LoadSDNode LD = cast<LoadSDNode>(N); 8602* SDValue Chain = LD->getChain(); 8603 SDValue Ptr = LD->getBasePtr(); 8604 8605 // If load is not volatile and there are no uses of the loaded value (and 8606 // the updated indexed value in case of indexed loads), change uses of the 8607 // chain value into uses of the chain input (i.e. delete the dead load). 8608 if (!LD->isVolatile()) { 8609 if (N->getValueType(1) == MVT::Other) { 8610 // Unindexed loads. 8611 if (!N->hasAnyUseOfValue(0)) { 8612 // It's not safe to use the two value CombineTo variant here. e.g. 8613 // v1, chain2 = load chain1, loc 8614 // v2, chain3 = load chain2, loc 8615 // v3 = add v2, c 8616 // Now we replace use of chain2 with chain1. This makes the second load 8617 // isomorphic to the one we are deleting, and thus makes this load live. 8618 DEBUG(dbgs() << "\nReplacing.6 "; 8619 N->dump(&DAG); 8620 dbgs() << "\nWith chain: "; 8621 Chain.getNode()->dump(&DAG); 8622 dbgs() << "\n"); 8623 WorklistRemover DeadNodes(this); 8624* DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 8625 8626 if (N->use_empty()) 8627 deleteAndRecombine(N); 8628 8629 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8630 } 8631 } else { 8632 // Indexed loads. 8633 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 8634 8635 // If this load has an opaque TargetConstant offset, then we cannot split 8636 // the indexing into an add/sub directly (that TargetConstant may not be 8637 // valid for a different type of node, and we cannot convert an opaque 8638 // target constant into a regular constant). 8639 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && 8640 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); 8641 8642 if (!N->hasAnyUseOfValue(0) && 8643 ((MaySplitLoadIndex && !HasOTCInc) \|\| !N->hasAnyUseOfValue(1))) { 8644 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 8645 SDValue Index; 8646 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { 8647 Index = SplitIndexingFromLoad(LD); 8648 // Try to fold the base pointer arithmetic into subsequent loads and 8649 // stores. 8650 AddUsersToWorklist(N); 8651 } else 8652 Index = DAG.getUNDEF(N->getValueType(1)); 8653 DEBUG(dbgs() << "\nReplacing.7 "; 8654 N->dump(&DAG); 8655 dbgs() << "\nWith: "; 8656 Undef.getNode()->dump(&DAG); 8657 dbgs() << " and 2 other values\n"); 8658 WorklistRemover DeadNodes(this); 8659* DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 8660 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); 8661 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 8662 deleteAndRecombine(N); 8663 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8664 } 8665 } 8666 } 8667 8668 // If this load is directly stored, replace the load value with the stored 8669 // value. 8670 // TODO: Handle store large -> read small portion. 8671 // TODO: Handle TRUNCSTORE/LOADEXT 8672 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 8673 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 8674 StoreSDNode PrevST = cast<StoreSDNode>(Chain); 8675* if (PrevST->getBasePtr() == Ptr && 8676 PrevST->getValue().getValueType() == N->getValueType(0)) 8677 return CombineTo(N, Chain.getOperand(1), Chain); 8678 } 8679 } 8680 8681 // Try to infer better alignment information than the load already has. 8682 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 8683 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 8684 if (Align > LD->getMemOperand()->getBaseAlignment()) { 8685 SDValue NewLoad = 8686 DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), 8687 LD->getValueType(0), 8688 Chain, Ptr, LD->getPointerInfo(), 8689 LD->getMemoryVT(), 8690 LD->isVolatile(), LD->isNonTemporal(), 8691 LD->isInvariant(), Align, LD->getAAInfo()); 8692 return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); 8693 } 8694 } 8695 } 8696 8697 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 8698 : DAG.getSubtarget().useAA(); 8699#ifndef NDEBUG 8700 if (CombinerAAOnlyFunc.getNumOccurrences() && 8701 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 8702 UseAA = false; 8703#endif 8704 if (UseAA && LD->isUnindexed()) { 8705 // Walk up chain skipping non-aliasing memory nodes. 8706 SDValue BetterChain = FindBetterChain(N, Chain); 8707 8708 // If there is a better chain. 8709 if (Chain != BetterChain) { 8710 SDValue ReplLoad; 8711 8712 // Replace the chain to void dependency. 8713 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 8714 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), 8715 BetterChain, Ptr, LD->getMemOperand()); 8716 } else { 8717 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), 8718 LD->getValueType(0), 8719 BetterChain, Ptr, LD->getMemoryVT(), 8720 LD->getMemOperand()); 8721 } 8722 8723 // Create token factor to keep old chain connected. 8724 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 8725 MVT::Other, Chain, ReplLoad.getValue(1)); 8726 8727 // Make sure the new and old chains are cleaned up. 8728 AddToWorklist(Token.getNode()); 8729 8730 // Replace uses with load result and token factor. Don't add users 8731 // to work list. 8732 return CombineTo(N, ReplLoad.getValue(0), Token, false); 8733 } 8734 } 8735 8736 // Try transforming N to an indexed load. 8737 if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N)) 8738 return SDValue(N, 0); 8739 8740 // Try to slice up N to more direct loads if the slices are mapped to 8741 // different register banks or pairing can take place. 8742 if (SliceUpLoad(N)) 8743 return SDValue(N, 0); 8744 8745 return SDValue(); 8746} 8747 8748namespace { 8749/// \brief Helper structure used to slice a load in smaller loads. 8750/// Basically a slice is obtained from the following sequence: 8751/// Origin = load Ty1, Base 8752/// Shift = srl Ty1 Origin, CstTy Amount 8753/// Inst = trunc Shift to Ty2 8754/// 8755/// Then, it will be rewriten into: 8756/// Slice = load SliceTy, Base + SliceOffset 8757/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 8758/// 8759/// SliceTy is deduced from the number of bits that are actually used to 8760/// build Inst. 8761struct LoadedSlice { 8762 /// \brief Helper structure used to compute the cost of a slice. 8763 struct Cost { 8764 /// Are we optimizing for code size. 8765 bool ForCodeSize; 8766 /// Various cost. 8767 unsigned Loads; 8768 unsigned Truncates; 8769 unsigned CrossRegisterBanksCopies; 8770 unsigned ZExts; 8771 unsigned Shift; 8772 8773 Cost(bool ForCodeSize = false) 8774 : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), 8775 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} 8776 8777 /// \brief Get the cost of one isolated slice. 8778 Cost(const LoadedSlice &LS, bool ForCodeSize = false) 8779 : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), 8780 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { 8781 EVT TruncType = LS.Inst->getValueType(0); 8782 EVT LoadedType = LS.getLoadedType(); 8783 if (TruncType != LoadedType && 8784 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) 8785 ZExts = 1; 8786 } 8787 8788 /// \brief Account for slicing gain in the current cost. 8789 /// Slicing provide a few gains like removing a shift or a 8790 /// truncate. This method allows to grow the cost of the original 8791 /// load with the gain from this slice. 8792 void addSliceGain(const LoadedSlice &LS) { 8793 // Each slice saves a truncate. 8794 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); 8795 if (!TLI.isTruncateFree(LS.Inst->getValueType(0), 8796 LS.Inst->getOperand(0).getValueType())) 8797 ++Truncates; 8798 // If there is a shift amount, this slice gets rid of it. 8799 if (LS.Shift) 8800 ++Shift; 8801 // If this slice can merge a cross register bank copy, account for it. 8802 if (LS.canMergeExpensiveCrossRegisterBankCopy()) 8803 ++CrossRegisterBanksCopies; 8804 } 8805 8806 Cost &operator+=(const Cost &RHS) { 8807 Loads += RHS.Loads; 8808 Truncates += RHS.Truncates; 8809 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; 8810 ZExts += RHS.ZExts; 8811 Shift += RHS.Shift; 8812 return this; 8813* } 8814 8815 bool operator==(const Cost &RHS) const { 8816 return Loads == RHS.Loads && Truncates == RHS.Truncates && 8817 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && 8818 ZExts == RHS.ZExts && Shift == RHS.Shift; 8819 } 8820 8821 bool operator!=(const Cost &RHS) const { return !(this == RHS); } 8822* 8823 bool operator<(const Cost &RHS) const { 8824 // Assume cross register banks copies are as expensive as loads. 8825 // FIXME: Do we want some more target hooks? 8826 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; 8827 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; 8828 // Unless we are optimizing for code size, consider the 8829 // expensive operation first. 8830 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) 8831 return ExpensiveOpsLHS < ExpensiveOpsRHS; 8832 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < 8833 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); 8834 } 8835 8836 bool operator>(const Cost &RHS) const { return RHS < this; } 8837* 8838 bool operator<=(const Cost &RHS) const { return !(RHS < this); } 8839* 8840 bool operator>=(const Cost &RHS) const { return !(this < RHS); } 8841* }; 8842 // The last instruction that represent the slice. This should be a 8843 // truncate instruction. 8844 SDNode Inst; 8845* // The original load instruction. 8846 LoadSDNode Origin; 8847* // The right shift amount in bits from the original load. 8848 unsigned Shift; 8849 // The DAG from which Origin came from. 8850 // This is used to get some contextual information about legal types, etc. 8851 SelectionDAG DAG; 8852* 8853 LoadedSlice(SDNode Inst = nullptr, LoadSDNode Origin = nullptr, 8854 unsigned Shift = 0, SelectionDAG DAG = nullptr) 8855* : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} 8856 8857 LoadedSlice(const LoadedSlice &LS) 8858 : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} 8859 8860 /// \brief Get the bits used in a chunk of bits \p BitWidth large. 8861 /// \return Result is \p BitWidth and has used bits set to 1 and 8862 /// not used bits set to 0. 8863 APInt getUsedBits() const { 8864 // Reproduce the trunc(lshr) sequence: 8865 // - Start from the truncated value. 8866 // - Zero extend to the desired bit width. 8867 // - Shift left. 8868 assert(Origin && "No original load to compare against."); 8869 unsigned BitWidth = Origin->getValueSizeInBits(0); 8870 assert(Inst && "This slice is not bound to an instruction"); 8871 assert(Inst->getValueSizeInBits(0) <= BitWidth && 8872 "Extracted slice is bigger than the whole type!"); 8873 APInt UsedBits(Inst->getValueSizeInBits(0), 0); 8874 UsedBits.setAllBits(); 8875 UsedBits = UsedBits.zext(BitWidth); 8876 UsedBits <<= Shift; 8877 return UsedBits; 8878 } 8879 8880 /// \brief Get the size of the slice to be loaded in bytes. 8881 unsigned getLoadedSize() const { 8882 unsigned SliceSize = getUsedBits().countPopulation(); 8883 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); 8884 return SliceSize / 8; 8885 } 8886 8887 /// \brief Get the type that will be loaded for this slice. 8888 /// Note: This may not be the final type for the slice. 8889 EVT getLoadedType() const { 8890 assert(DAG && "Missing context"); 8891 LLVMContext &Ctxt = DAG->getContext(); 8892* return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); 8893 } 8894 8895 /// \brief Get the alignment of the load used for this slice. 8896 unsigned getAlignment() const { 8897 unsigned Alignment = Origin->getAlignment(); 8898 unsigned Offset = getOffsetFromBase(); 8899 if (Offset != 0) 8900 Alignment = MinAlign(Alignment, Alignment + Offset); 8901 return Alignment; 8902 } 8903 8904 /// \brief Check if this slice can be rewritten with legal operations. 8905 bool isLegal() const { 8906 // An invalid slice is not legal. 8907 if (!Origin \|\| !Inst \|\| !DAG) 8908 return false; 8909 8910 // Offsets are for indexed load only, we do not handle that. 8911 if (Origin->getOffset().getOpcode() != ISD::UNDEF) 8912 return false; 8913 8914 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 8915 8916 // Check that the type is legal. 8917 EVT SliceType = getLoadedType(); 8918 if (!TLI.isTypeLegal(SliceType)) 8919 return false; 8920 8921 // Check that the load is legal for this type. 8922 if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) 8923 return false; 8924 8925 // Check that the offset can be computed. 8926 // 1. Check its type. 8927 EVT PtrType = Origin->getBasePtr().getValueType(); 8928 if (PtrType == MVT::Untyped \|\| PtrType.isExtended()) 8929 return false; 8930 8931 // 2. Check that it fits in the immediate. 8932 if (!TLI.isLegalAddImmediate(getOffsetFromBase())) 8933 return false; 8934 8935 // 3. Check that the computation is legal. 8936 if (!TLI.isOperationLegal(ISD::ADD, PtrType)) 8937 return false; 8938 8939 // Check that the zext is legal if it needs one. 8940 EVT TruncateType = Inst->getValueType(0); 8941 if (TruncateType != SliceType && 8942 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) 8943 return false; 8944 8945 return true; 8946 } 8947 8948 /// \brief Get the offset in bytes of this slice in the original chunk of 8949 /// bits. 8950 /// \pre DAG != nullptr. 8951 uint64_t getOffsetFromBase() const { 8952 assert(DAG && "Missing context."); 8953 bool IsBigEndian = 8954 DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); 8955 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); 8956 uint64_t Offset = Shift / 8; 8957 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; 8958 assert(!(Origin->getValueSizeInBits(0) & 0x7) && 8959 "The size of the original loaded type is not a multiple of a" 8960 " byte."); 8961 // If Offset is bigger than TySizeInBytes, it means we are loading all 8962 // zeros. This should have been optimized before in the process. 8963 assert(TySizeInBytes > Offset && 8964 "Invalid shift amount for given loaded size"); 8965 if (IsBigEndian) 8966 Offset = TySizeInBytes - Offset - getLoadedSize(); 8967 return Offset; 8968 } 8969 8970 /// \brief Generate the sequence of instructions to load the slice 8971 /// represented by this object and redirect the uses of this slice to 8972 /// this new sequence of instructions. 8973 /// \pre this->Inst && this->Origin are valid Instructions and this 8974 /// object passed the legal check: LoadedSlice::isLegal returned true. 8975 /// \return The last instruction of the sequence used to load the slice. 8976 SDValue loadSlice() const { 8977 assert(Inst && Origin && "Unable to replace a non-existing slice."); 8978 const SDValue &OldBaseAddr = Origin->getBasePtr(); 8979 SDValue BaseAddr = OldBaseAddr; 8980 // Get the offset in that chunk of bytes w.r.t. the endianess. 8981 int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); 8982 assert(Offset >= 0 && "Offset too big to fit in int64_t!"); 8983 if (Offset) { 8984 // BaseAddr = BaseAddr + Offset. 8985 EVT ArithType = BaseAddr.getValueType(); 8986 BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, 8987 DAG->getConstant(Offset, ArithType)); 8988 } 8989 8990 // Create the type of the loaded slice according to its size. 8991 EVT SliceType = getLoadedType(); 8992 8993 // Create the load for the slice. 8994 SDValue LastInst = DAG->getLoad( 8995 SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, 8996 Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), 8997 Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); 8998 // If the final type is not the same as the loaded type, this means that 8999 // we have to pad with zero. Create a zero extend for that. 9000 EVT FinalType = Inst->getValueType(0); 9001 if (SliceType != FinalType) 9002 LastInst = 9003 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); 9004 return LastInst; 9005 } 9006 9007 /// \brief Check if this slice can be merged with an expensive cross register 9008 /// bank copy. E.g., 9009 /// i = load i32 9010 /// f = bitcast i32 i to float 9011 bool canMergeExpensiveCrossRegisterBankCopy() const { 9012 if (!Inst \|\| !Inst->hasOneUse()) 9013 return false; 9014 SDNode Use = Inst->use_begin(); 9015 if (Use->getOpcode() != ISD::BITCAST) 9016 return false; 9017 assert(DAG && "Missing context"); 9018 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 9019 EVT ResVT = Use->getValueType(0); 9020 const TargetRegisterClass ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); 9021* const TargetRegisterClass ArgRC = 9022* TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); 9023 if (ArgRC == ResRC \|\| !TLI.isOperationLegal(ISD::LOAD, ResVT)) 9024 return false; 9025 9026 // At this point, we know that we perform a cross-register-bank copy. 9027 // Check if it is expensive. 9028 const TargetRegisterInfo TRI = DAG->getSubtarget().getRegisterInfo(); 9029* // Assume bitcasts are cheap, unless both register classes do not 9030 // explicitly share a common sub class. 9031 if (!TRI \|\| TRI->getCommonSubClass(ArgRC, ResRC)) 9032 return false; 9033 9034 // Check if it will be merged with the load. 9035 // 1. Check the alignment constraint. 9036 unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( 9037 ResVT.getTypeForEVT(DAG->getContext())); 9038* 9039 if (RequiredAlignment > getAlignment()) 9040 return false; 9041 9042 // 2. Check that the load is a legal operation for that type. 9043 if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) 9044 return false; 9045 9046 // 3. Check that we do not have a zext in the way. 9047 if (Inst->getValueType(0) != getLoadedType()) 9048 return false; 9049 9050 return true; 9051 } 9052}; 9053} 9054 9055/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., 9056/// \p UsedBits looks like 0..0 1..1 0..0. 9057static bool areUsedBitsDense(const APInt &UsedBits) { 9058 // If all the bits are one, this is dense! 9059 if (UsedBits.isAllOnesValue()) 9060 return true; 9061 9062 // Get rid of the unused bits on the right. 9063 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); 9064 // Get rid of the unused bits on the left. 9065 if (NarrowedUsedBits.countLeadingZeros()) 9066 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); 9067 // Check that the chunk of bits is completely used. 9068 return NarrowedUsedBits.isAllOnesValue(); 9069} 9070 9071/// \brief Check whether or not \p First and \p Second are next to each other 9072/// in memory. This means that there is no hole between the bits loaded 9073/// by \p First and the bits loaded by \p Second. 9074static bool areSlicesNextToEachOther(const LoadedSlice &First, 9075 const LoadedSlice &Second) { 9076 assert(First.Origin == Second.Origin && First.Origin && 9077 "Unable to match different memory origins."); 9078 APInt UsedBits = First.getUsedBits(); 9079 assert((UsedBits & Second.getUsedBits()) == 0 && 9080 "Slices are not supposed to overlap."); 9081 UsedBits \|= Second.getUsedBits(); 9082 return areUsedBitsDense(UsedBits); 9083} 9084 9085/// \brief Adjust the \p GlobalLSCost according to the target 9086/// paring capabilities and the layout of the slices. 9087/// \pre \p GlobalLSCost should account for at least as many loads as 9088/// there is in the slices in \p LoadedSlices. 9089static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, 9090 LoadedSlice::Cost &GlobalLSCost) { 9091 unsigned NumberOfSlices = LoadedSlices.size(); 9092 // If there is less than 2 elements, no pairing is possible. 9093 if (NumberOfSlices < 2) 9094 return; 9095 9096 // Sort the slices so that elements that are likely to be next to each 9097 // other in memory are next to each other in the list. 9098 std::sort(LoadedSlices.begin(), LoadedSlices.end(), 9099 [](const LoadedSlice &LHS, const LoadedSlice &RHS) { 9100 assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); 9101 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); 9102 }); 9103 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); 9104 // First (resp. Second) is the first (resp. Second) potentially candidate 9105 // to be placed in a paired load. 9106 const LoadedSlice First = nullptr; 9107* const LoadedSlice Second = nullptr; 9108* for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, 9109 // Set the beginning of the pair. 9110 First = Second) { 9111 9112 Second = &LoadedSlices[CurrSlice]; 9113 9114 // If First is NULL, it means we start a new pair. 9115 // Get to the next slice. 9116 if (!First) 9117 continue; 9118 9119 EVT LoadedType = First->getLoadedType(); 9120 9121 // If the types of the slices are different, we cannot pair them. 9122 if (LoadedType != Second->getLoadedType()) 9123 continue; 9124 9125 // Check if the target supplies paired loads for this type. 9126 unsigned RequiredAlignment = 0; 9127 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { 9128 // move to the next pair, this type is hopeless. 9129 Second = nullptr; 9130 continue; 9131 } 9132 // Check if we meet the alignment requirement. 9133 if (RequiredAlignment > First->getAlignment()) 9134 continue; 9135 9136 // Check that both loads are next to each other in memory. 9137 if (!areSlicesNextToEachOther(First, Second)) 9138 continue; 9139 9140 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); 9141 --GlobalLSCost.Loads; 9142 // Move to the next pair. 9143 Second = nullptr; 9144 } 9145} 9146 9147/// \brief Check the profitability of all involved LoadedSlice. 9148/// Currently, it is considered profitable if there is exactly two 9149/// involved slices (1) which are (2) next to each other in memory, and 9150/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). 9151/// 9152/// Note: The order of the elements in \p LoadedSlices may be modified, but not 9153/// the elements themselves. 9154/// 9155/// FIXME: When the cost model will be mature enough, we can relax 9156/// constraints (1) and (2). 9157static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, 9158 const APInt &UsedBits, bool ForCodeSize) { 9159 unsigned NumberOfSlices = LoadedSlices.size(); 9160 if (StressLoadSlicing) 9161 return NumberOfSlices > 1; 9162 9163 // Check (1). 9164 if (NumberOfSlices != 2) 9165 return false; 9166 9167 // Check (2). 9168 if (!areUsedBitsDense(UsedBits)) 9169 return false; 9170 9171 // Check (3). 9172 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); 9173 // The original code has one big load. 9174 OrigCost.Loads = 1; 9175 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { 9176 const LoadedSlice &LS = LoadedSlices[CurrSlice]; 9177 // Accumulate the cost of all the slices. 9178 LoadedSlice::Cost SliceCost(LS, ForCodeSize); 9179 GlobalSlicingCost += SliceCost; 9180 9181 // Account as cost in the original configuration the gain obtained 9182 // with the current slices. 9183 OrigCost.addSliceGain(LS); 9184 } 9185 9186 // If the target supports paired load, adjust the cost accordingly. 9187 adjustCostForPairing(LoadedSlices, GlobalSlicingCost); 9188 return OrigCost > GlobalSlicingCost; 9189} 9190 9191/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) 9192/// operations, split it in the various pieces being extracted. 9193/// 9194/// This sort of thing is introduced by SROA. 9195/// This slicing takes care not to insert overlapping loads. 9196/// \pre LI is a simple load (i.e., not an atomic or volatile load). 9197bool DAGCombiner::SliceUpLoad(SDNode N) { 9198* if (Level < AfterLegalizeDAG) 9199 return false; 9200 9201 LoadSDNode LD = cast<LoadSDNode>(N); 9202* if (LD->isVolatile() \|\| !ISD::isNormalLoad(LD) \|\| 9203 !LD->getValueType(0).isInteger()) 9204 return false; 9205 9206 // Keep track of already used bits to detect overlapping values. 9207 // In that case, we will just abort the transformation. 9208 APInt UsedBits(LD->getValueSizeInBits(0), 0); 9209 9210 SmallVector<LoadedSlice, 4> LoadedSlices; 9211 9212 // Check if this load is used as several smaller chunks of bits. 9213 // Basically, look for uses in trunc or trunc(lshr) and record a new chain 9214 // of computation for each trunc. 9215 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 9216 UI != UIEnd; ++UI) { 9217 // Skip the uses of the chain. 9218 if (UI.getUse().getResNo() != 0) 9219 continue; 9220 9221 SDNode User = UI; 9222 unsigned Shift = 0; 9223 9224 // Check if this is a trunc(lshr). 9225 if (User->getOpcode() == ISD::SRL && User->hasOneUse() && 9226 isa<ConstantSDNode>(User->getOperand(1))) { 9227 Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); 9228 User = User->use_begin(); 9229* } 9230 9231 // At this point, User is a Truncate, iff we encountered, trunc or 9232 // trunc(lshr). 9233 if (User->getOpcode() != ISD::TRUNCATE) 9234 return false; 9235 9236 // The width of the type must be a power of 2 and greater than 8-bits. 9237 // Otherwise the load cannot be represented in LLVM IR. 9238 // Moreover, if we shifted with a non-8-bits multiple, the slice 9239 // will be across several bytes. We do not support that. 9240 unsigned Width = User->getValueSizeInBits(0); 9241 if (Width < 8 \|\| !isPowerOf2_32(Width) \|\| (Shift & 0x7)) 9242 return 0; 9243 9244 // Build the slice for this chain of computations. 9245 LoadedSlice LS(User, LD, Shift, &DAG); 9246 APInt CurrentUsedBits = LS.getUsedBits(); 9247 9248 // Check if this slice overlaps with another. 9249 if ((CurrentUsedBits & UsedBits) != 0) 9250 return false; 9251 // Update the bits used globally. 9252 UsedBits \|= CurrentUsedBits; 9253 9254 // Check if the new slice would be legal. 9255 if (!LS.isLegal()) 9256 return false; 9257 9258 // Record the slice. 9259 LoadedSlices.push_back(LS); 9260 } 9261 9262 // Abort slicing if it does not seem to be profitable. 9263 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) 9264 return false; 9265 9266 ++SlicedLoads; 9267 9268 // Rewrite each chain to use an independent load. 9269 // By construction, each chain can be represented by a unique load. 9270 9271 // Prepare the argument for the new token factor for all the slices. 9272 SmallVector<SDValue, 8> ArgChains; 9273 for (SmallVectorImpl<LoadedSlice>::const_iterator 9274 LSIt = LoadedSlices.begin(), 9275 LSItEnd = LoadedSlices.end(); 9276 LSIt != LSItEnd; ++LSIt) { 9277 SDValue SliceInst = LSIt->loadSlice(); 9278 CombineTo(LSIt->Inst, SliceInst, true); 9279 if (SliceInst.getNode()->getOpcode() != ISD::LOAD) 9280 SliceInst = SliceInst.getOperand(0); 9281 assert(SliceInst->getOpcode() == ISD::LOAD && 9282 "It takes more than a zext to get to the loaded slice!!"); 9283 ArgChains.push_back(SliceInst.getValue(1)); 9284 } 9285 9286 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, 9287 ArgChains); 9288 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 9289 return true; 9290} 9291 9292/// Check to see if V is (and load (ptr), imm), where the load is having 9293/// specific bytes cleared out. If so, return the byte size being masked out 9294/// and the shift amount. 9295static std::pair<unsigned, unsigned> 9296CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 9297 std::pair<unsigned, unsigned> Result(0, 0); 9298 9299 // Check for the structure we're looking for. 9300 if (V->getOpcode() != ISD::AND \|\| 9301 !isa<ConstantSDNode>(V->getOperand(1)) \|\| 9302 !ISD::isNormalLoad(V->getOperand(0).getNode())) 9303 return Result; 9304 9305 // Check the chain and pointer. 9306 LoadSDNode LD = cast<LoadSDNode>(V->getOperand(0)); 9307* if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 9308 9309 // The store should be chained directly to the load or be an operand of a 9310 // tokenfactor. 9311 if (LD == Chain.getNode()) 9312 ; // ok. 9313 else if (Chain->getOpcode() != ISD::TokenFactor) 9314 return Result; // Fail. 9315 else { 9316 bool isOk = false; 9317 for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) 9318 if (Chain->getOperand(i).getNode() == LD) { 9319 isOk = true; 9320 break; 9321 } 9322 if (!isOk) return Result; 9323 } 9324 9325 // This only handles simple types. 9326 if (V.getValueType() != MVT::i16 && 9327 V.getValueType() != MVT::i32 && 9328 V.getValueType() != MVT::i64) 9329 return Result; 9330 9331 // Check the constant mask. Invert it so that the bits being masked out are 9332 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 9333 // follow the sign bit for uniformity. 9334 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 9335 unsigned NotMaskLZ = countLeadingZeros(NotMask); 9336 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 9337 unsigned NotMaskTZ = countTrailingZeros(NotMask); 9338 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 9339 if (NotMaskLZ == 64) return Result; // All zero mask. 9340 9341 // See if we have a continuous run of bits. If so, we have 01+0 9342 if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) 9343 return Result; 9344 9345 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 9346 if (V.getValueType() != MVT::i64 && NotMaskLZ) 9347 NotMaskLZ -= 64-V.getValueSizeInBits(); 9348 9349 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 9350 switch (MaskedBytes) { 9351 case 1: 9352 case 2: 9353 case 4: break; 9354 default: return Result; // All one mask, or 5-byte mask. 9355 } 9356 9357 // Verify that the first bit starts at a multiple of mask so that the access 9358 // is aligned the same as the access width. 9359 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 9360 9361 Result.first = MaskedBytes; 9362 Result.second = NotMaskTZ/8; 9363 return Result; 9364} 9365 9366 9367/// Check to see if IVal is something that provides a value as specified by 9368/// MaskInfo. If so, replace the specified store with a narrower store of 9369/// truncated IVal. 9370static SDNode * 9371ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 9372 SDValue IVal, StoreSDNode St, 9373* DAGCombiner DC) { 9374* unsigned NumBytes = MaskInfo.first; 9375 unsigned ByteShift = MaskInfo.second; 9376 SelectionDAG &DAG = DC->getDAG(); 9377 9378 // Check to see if IVal is all zeros in the part being masked in by the 'or' 9379 // that uses this. If not, this is not a replacement. 9380 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 9381 ByteShift8, (ByteShift+NumBytes)8); 9382 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; 9383 9384 // Check that it is legal on the target to do this. It is legal if the new 9385 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 9386 // legalization. 9387 MVT VT = MVT::getIntegerVT(NumBytes8); 9388* if (!DC->isTypeLegal(VT)) 9389 return nullptr; 9390 9391 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 9392 // shifted by ByteShift and truncated down to NumBytes. 9393 if (ByteShift) 9394 IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, 9395 DAG.getConstant(ByteShift8, 9396* DC->getShiftAmountTy(IVal.getValueType()))); 9397 9398 // Figure out the offset for the store and the alignment of the access. 9399 unsigned StOffset; 9400 unsigned NewAlign = St->getAlignment(); 9401 9402 if (DAG.getTargetLoweringInfo().isLittleEndian()) 9403 StOffset = ByteShift; 9404 else 9405 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 9406 9407 SDValue Ptr = St->getBasePtr(); 9408 if (StOffset) { 9409 Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), 9410 Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); 9411 NewAlign = MinAlign(NewAlign, StOffset); 9412 } 9413 9414 // Truncate down to the new size. 9415 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); 9416 9417 ++OpsNarrowed; 9418 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, 9419 St->getPointerInfo().getWithOffset(StOffset), 9420 false, false, NewAlign).getNode(); 9421} 9422 9423 9424/// Look for sequence of load / op / store where op is one of 'or', 'xor', and 9425/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try 9426/// narrowing the load and store if it would end up being a win for performance 9427/// or code size. 9428SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode N) { 9429* StoreSDNode ST = cast<StoreSDNode>(N); 9430* if (ST->isVolatile()) 9431 return SDValue(); 9432 9433 SDValue Chain = ST->getChain(); 9434 SDValue Value = ST->getValue(); 9435 SDValue Ptr = ST->getBasePtr(); 9436 EVT VT = Value.getValueType(); 9437 9438 if (ST->isTruncatingStore() \|\| VT.isVector() \|\| !Value.hasOneUse()) 9439 return SDValue(); 9440 9441 unsigned Opc = Value.getOpcode(); 9442 9443 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 9444 // is a byte mask indicating a consecutive number of bytes, check to see if 9445 // Y is known to provide just those bytes. If so, we try to replace the 9446 // load + replace + store sequence with a single (narrower) store, which makes 9447 // the load dead. 9448 if (Opc == ISD::OR) { 9449 std::pair<unsigned, unsigned> MaskedLoad; 9450 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 9451 if (MaskedLoad.first) 9452 if (SDNode NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 9453* Value.getOperand(1), ST,this)) 9454 return SDValue(NewST, 0); 9455 9456 // Or is commutative, so try swapping X and Y. 9457 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 9458 if (MaskedLoad.first) 9459 if (SDNode NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 9460* Value.getOperand(0), ST,this)) 9461 return SDValue(NewST, 0); 9462 } 9463 9464 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) \|\| 9465 Value.getOperand(1).getOpcode() != ISD::Constant) 9466 return SDValue(); 9467 9468 SDValue N0 = Value.getOperand(0); 9469 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 9470 Chain == SDValue(N0.getNode(), 1)) { 9471 LoadSDNode LD = cast<LoadSDNode>(N0); 9472* if (LD->getBasePtr() != Ptr \|\| 9473 LD->getPointerInfo().getAddrSpace() != 9474 ST->getPointerInfo().getAddrSpace()) 9475 return SDValue(); 9476 9477 // Find the type to narrow it the load / op / store to. 9478 SDValue N1 = Value.getOperand(1); 9479 unsigned BitWidth = N1.getValueSizeInBits(); 9480 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 9481 if (Opc == ISD::AND) 9482 Imm ^= APInt::getAllOnesValue(BitWidth); 9483 if (Imm == 0 \|\| Imm.isAllOnesValue()) 9484 return SDValue(); 9485 unsigned ShAmt = Imm.countTrailingZeros(); 9486 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 9487 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 9488 EVT NewVT = EVT::getIntegerVT(DAG.getContext(), NewBW); 9489* // The narrowing should be profitable, the load/store operation should be 9490 // legal (or custom) and the store size should be equal to the NewVT width. 9491 while (NewBW < BitWidth && 9492 !(TLI.isOperationLegalOrCustom(Opc, NewVT) && 9493 TLI.isNarrowingProfitable(VT, NewVT))) { 9494 NewBW = NextPowerOf2(NewBW); 9495 NewVT = EVT::getIntegerVT(DAG.getContext(), NewBW); 9496* } 9497 if (NewBW >= BitWidth) 9498 return SDValue(); 9499 9500 // If the lsb changed does not start at the type bitwidth boundary, 9501 // start at the previous one. 9502 if (ShAmt % NewBW) 9503 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 9504 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, 9505 std::min(BitWidth, ShAmt + NewBW)); 9506 if ((Imm & Mask) == Imm) { 9507 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 9508 if (Opc == ISD::AND) 9509 NewImm ^= APInt::getAllOnesValue(NewBW); 9510 uint64_t PtrOff = ShAmt / 8; 9511 // For big endian targets, we need to adjust the offset to the pointer to 9512 // load the correct bytes. 9513 if (TLI.isBigEndian()) 9514 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 9515 9516 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 9517 Type NewVTTy = NewVT.getTypeForEVT(DAG.getContext()); 9518 if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) 9519 return SDValue(); 9520 9521 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), 9522 Ptr.getValueType(), Ptr, 9523 DAG.getConstant(PtrOff, Ptr.getValueType())); 9524 SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), 9525 LD->getChain(), NewPtr, 9526 LD->getPointerInfo().getWithOffset(PtrOff), 9527 LD->isVolatile(), LD->isNonTemporal(), 9528 LD->isInvariant(), NewAlign, 9529 LD->getAAInfo()); 9530 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, 9531 DAG.getConstant(NewImm, NewVT)); 9532 SDValue NewST = DAG.getStore(Chain, SDLoc(N), 9533 NewVal, NewPtr, 9534 ST->getPointerInfo().getWithOffset(PtrOff), 9535 false, false, NewAlign); 9536 9537 AddToWorklist(NewPtr.getNode()); 9538 AddToWorklist(NewLD.getNode()); 9539 AddToWorklist(NewVal.getNode()); 9540 WorklistRemover DeadNodes(this); 9541* DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 9542 ++OpsNarrowed; 9543 return NewST; 9544 } 9545 } 9546 9547 return SDValue(); 9548} 9549 9550/// For a given floating point load / store pair, if the load value isn't used 9551/// by any other operations, then consider transforming the pair to integer 9552/// load / store operations if the target deems the transformation profitable. 9553SDValue DAGCombiner::TransformFPLoadStorePair(SDNode N) { 9554* StoreSDNode ST = cast<StoreSDNode>(N); 9555* SDValue Chain = ST->getChain(); 9556 SDValue Value = ST->getValue(); 9557 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 9558 Value.hasOneUse() && 9559 Chain == SDValue(Value.getNode(), 1)) { 9560 LoadSDNode LD = cast<LoadSDNode>(Value); 9561* EVT VT = LD->getMemoryVT(); 9562 if (!VT.isFloatingPoint() \|\| 9563 VT != ST->getMemoryVT() \|\| 9564 LD->isNonTemporal() \|\| 9565 ST->isNonTemporal() \|\| 9566 LD->getPointerInfo().getAddrSpace() != 0 \|\| 9567 ST->getPointerInfo().getAddrSpace() != 0) 9568 return SDValue(); 9569 9570 EVT IntVT = EVT::getIntegerVT(DAG.getContext(), VT.getSizeInBits()); 9571* if (!TLI.isOperationLegal(ISD::LOAD, IntVT) \|\| 9572 !TLI.isOperationLegal(ISD::STORE, IntVT) \|\| 9573 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) \|\| 9574 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 9575 return SDValue(); 9576 9577 unsigned LDAlign = LD->getAlignment(); 9578 unsigned STAlign = ST->getAlignment(); 9579 Type IntVTTy = IntVT.getTypeForEVT(DAG.getContext()); 9580 unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); 9581 if (LDAlign < ABIAlign \|\| STAlign < ABIAlign) 9582 return SDValue(); 9583 9584 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), 9585 LD->getChain(), LD->getBasePtr(), 9586 LD->getPointerInfo(), 9587 false, false, false, LDAlign); 9588 9589 SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), 9590 NewLD, ST->getBasePtr(), 9591 ST->getPointerInfo(), 9592 false, false, STAlign); 9593 9594 AddToWorklist(NewLD.getNode()); 9595 AddToWorklist(NewST.getNode()); 9596 WorklistRemover DeadNodes(this); 9597* DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 9598 ++LdStFP2Int; 9599 return NewST; 9600 } 9601 9602 return SDValue(); 9603} 9604 9605/// Helper struct to parse and store a memory address as base + index + offset. 9606/// We ignore sign extensions when it is safe to do so. 9607/// The following two expressions are not equivalent. To differentiate we need 9608/// to store whether there was a sign extension involved in the index 9609/// computation. 9610/// (load (i64 add (i64 copyfromreg %c) 9611/// (i64 signextend (add (i8 load %index) 9612/// (i8 1)))) 9613/// vs 9614/// 9615/// (load (i64 add (i64 copyfromreg %c) 9616/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) 9617/// (i32 1))))) 9618struct BaseIndexOffset { 9619 SDValue Base; 9620 SDValue Index; 9621 int64_t Offset; 9622 bool IsIndexSignExt; 9623 9624 BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} 9625 9626 BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, 9627 bool IsIndexSignExt) : 9628 Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} 9629 9630 bool equalBaseIndex(const BaseIndexOffset &Other) { 9631 return Other.Base == Base && Other.Index == Index && 9632 Other.IsIndexSignExt == IsIndexSignExt; 9633 } 9634 9635 /// Parses tree in Ptr for base, index, offset addresses. 9636 static BaseIndexOffset match(SDValue Ptr) { 9637 bool IsIndexSignExt = false; 9638 9639 // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD 9640 // instruction, then it could be just the BASE or everything else we don't 9641 // know how to handle. Just use Ptr as BASE and give up. 9642 if (Ptr->getOpcode() != ISD::ADD) 9643 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9644 9645 // We know that we have at least an ADD instruction. Try to pattern match 9646 // the simple case of BASE + OFFSET. 9647 if (isa<ConstantSDNode>(Ptr->getOperand(1))) { 9648 int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); 9649 return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, 9650 IsIndexSignExt); 9651 } 9652 9653 // Inside a loop the current BASE pointer is calculated using an ADD and a 9654 // MUL instruction. In this case Ptr is the actual BASE pointer. 9655 // (i64 add (i64 %array_ptr) 9656 // (i64 mul (i64 %induction_var) 9657 // (i64 %element_size))) 9658 if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) 9659 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9660 9661 // Look at Base + Index + Offset cases. 9662 SDValue Base = Ptr->getOperand(0); 9663 SDValue IndexOffset = Ptr->getOperand(1); 9664 9665 // Skip signextends. 9666 if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { 9667 IndexOffset = IndexOffset->getOperand(0); 9668 IsIndexSignExt = true; 9669 } 9670 9671 // Either the case of Base + Index (no offset) or something else. 9672 if (IndexOffset->getOpcode() != ISD::ADD) 9673 return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); 9674 9675 // Now we have the case of Base + Index + offset. 9676 SDValue Index = IndexOffset->getOperand(0); 9677 SDValue Offset = IndexOffset->getOperand(1); 9678 9679 if (!isa<ConstantSDNode>(Offset)) 9680 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9681 9682 // Ignore signextends. 9683 if (Index->getOpcode() == ISD::SIGN_EXTEND) { 9684 Index = Index->getOperand(0); 9685 IsIndexSignExt = true; 9686 } else IsIndexSignExt = false; 9687 9688 int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); 9689 return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); 9690 } 9691}; 9692 9693/// Holds a pointer to an LSBaseSDNode as well as information on where it 9694/// is located in a sequence of memory operations connected by a chain. 9695struct MemOpLink { 9696 MemOpLink (LSBaseSDNode N, int64_t Offset, unsigned Seq): 9697* MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 9698 // Ptr to the mem node. 9699 LSBaseSDNode MemNode; 9700* // Offset from the base ptr. 9701 int64_t OffsetFromBase; 9702 // What is the sequence number of this mem node. 9703 // Lowest mem operand in the DAG starts at zero. 9704 unsigned SequenceNum; 9705}; 9706 9707bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { 9708 EVT MemVT = St->getMemoryVT(); 9709 int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; 9710 bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). 9711 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); 9712 9713 // Don't merge vectors into wider inputs. 9714 if (MemVT.isVector() \|\| !MemVT.isSimple()) 9715 return false; 9716 9717 // Perform an early exit check. Do not bother looking at stored values that 9718 // are not constants or loads. 9719 SDValue StoredVal = St->getValue(); 9720 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 9721 if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && 9722 !IsLoadSrc) 9723 return false; 9724 9725 // Only look at ends of store sequences. 9726 SDValue Chain = SDValue(St, 0); 9727 if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) 9728 return false; 9729 9730 // This holds the base pointer, index, and the offset in bytes from the base 9731 // pointer. 9732 BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); 9733 9734 // We must have a base and an offset. 9735 if (!BasePtr.Base.getNode()) 9736 return false; 9737 9738 // Do not handle stores to undef base pointers. 9739 if (BasePtr.Base.getOpcode() == ISD::UNDEF) 9740 return false; 9741 9742 // Save the LoadSDNodes that we find in the chain. 9743 // We need to make sure that these nodes do not interfere with 9744 // any of the store nodes. 9745 SmallVector<LSBaseSDNode, 8> AliasLoadNodes; 9746* 9747 // Save the StoreSDNodes that we find in the chain. 9748 SmallVector<MemOpLink, 8> StoreNodes; 9749 9750 // Walk up the chain and look for nodes with offsets from the same 9751 // base pointer. Stop when reaching an instruction with a different kind 9752 // or instruction which has a different base pointer. 9753 unsigned Seq = 0; 9754 StoreSDNode Index = St; 9755* while (Index) { 9756 // If the chain has more than one use, then we can't reorder the mem ops. 9757 if (Index != St && !SDValue(Index, 0)->hasOneUse()) 9758 break; 9759 9760 // Find the base pointer and offset for this memory node. 9761 BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); 9762 9763 // Check that the base pointer is the same as the original one. 9764 if (!Ptr.equalBaseIndex(BasePtr)) 9765 break; 9766 9767 // Check that the alignment is the same. 9768 if (Index->getAlignment() != St->getAlignment()) 9769 break; 9770 9771 // The memory operands must not be volatile. 9772 if (Index->isVolatile() \|\| Index->isIndexed()) 9773 break; 9774 9775 // No truncation. 9776 if (StoreSDNode St = dyn_cast<StoreSDNode>(Index)) 9777* if (St->isTruncatingStore()) 9778 break; 9779 9780 // The stored memory type must be the same. 9781 if (Index->getMemoryVT() != MemVT) 9782 break; 9783 9784 // We do not allow unaligned stores because we want to prevent overriding 9785 // stores. 9786 if (Index->getAlignment()8 != MemVT.getSizeInBits()) 9787* break; 9788 9789 // We found a potential memory operand to merge. 9790 StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); 9791 9792 // Find the next memory operand in the chain. If the next operand in the 9793 // chain is a store then move up and continue the scan with the next 9794 // memory operand. If the next operand is a load save it and use alias 9795 // information to check if it interferes with anything. 9796 SDNode NextInChain = Index->getChain().getNode(); 9797* while (1) { 9798 if (StoreSDNode STn = dyn_cast<StoreSDNode>(NextInChain)) { 9799* // We found a store node. Use it for the next iteration. 9800 Index = STn; 9801 break; 9802 } else if (LoadSDNode Ldn = dyn_cast<LoadSDNode>(NextInChain)) { 9803* if (Ldn->isVolatile()) { 9804 Index = nullptr; 9805 break; 9806 } 9807 9808 // Save the load node for later. Continue the scan. 9809 AliasLoadNodes.push_back(Ldn); 9810 NextInChain = Ldn->getChain().getNode(); 9811 continue; 9812 } else { 9813 Index = nullptr; 9814 break; 9815 } 9816 } 9817 } 9818 9819 // Check if there is anything to merge. 9820 if (StoreNodes.size() < 2) 9821 return false; 9822 9823 // Sort the memory operands according to their distance from the base pointer. 9824 std::sort(StoreNodes.begin(), StoreNodes.end(), 9825 [](MemOpLink LHS, MemOpLink RHS) { 9826 return LHS.OffsetFromBase < RHS.OffsetFromBase \|\| 9827 (LHS.OffsetFromBase == RHS.OffsetFromBase && 9828 LHS.SequenceNum > RHS.SequenceNum); 9829 }); 9830 9831 // Scan the memory operations on the chain and find the first non-consecutive 9832 // store memory address. 9833 unsigned LastConsecutiveStore = 0; 9834 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 9835 for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { 9836 9837 // Check that the addresses are consecutive starting from the second 9838 // element in the list of stores. 9839 if (i > 0) { 9840 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 9841 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 9842 break; 9843 } 9844 9845 bool Alias = false; 9846 // Check if this store interferes with any of the loads that we found. 9847 for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) 9848 if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { 9849 Alias = true; 9850 break; 9851 } 9852 // We found a load that alias with this store. Stop the sequence. 9853 if (Alias) 9854 break; 9855 9856 // Mark this node as useful. 9857 LastConsecutiveStore = i; 9858 } 9859 9860 // The node with the lowest store address. 9861 LSBaseSDNode FirstInChain = StoreNodes[0].MemNode; 9862* 9863 // Store the constants into memory as one consecutive store. 9864 if (!IsLoadSrc) { 9865 unsigned LastLegalType = 0; 9866 unsigned LastLegalVectorType = 0; 9867 bool NonZero = false; 9868 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 9869 StoreSDNode St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9870* SDValue StoredVal = St->getValue(); 9871 9872 if (ConstantSDNode C = dyn_cast<ConstantSDNode>(StoredVal)) { 9873* NonZero \|= !C->isNullValue(); 9874 } else if (ConstantFPSDNode C = dyn_cast<ConstantFPSDNode>(StoredVal)) { 9875* NonZero \|= !C->getConstantFPValue()->isNullValue(); 9876 } else { 9877 // Non-constant. 9878 break; 9879 } 9880 9881 // Find a legal type for the constant store. 9882 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 9883 EVT StoreTy = EVT::getIntegerVT(DAG.getContext(), StoreBW); 9884* if (TLI.isTypeLegal(StoreTy)) 9885 LastLegalType = i+1; 9886 // Or check whether a truncstore is legal. 9887 else if (TLI.getTypeAction(DAG.getContext(), StoreTy) == 9888* TargetLowering::TypePromoteInteger) { 9889 EVT LegalizedStoredValueTy = 9890 TLI.getTypeToTransformTo(DAG.getContext(), StoredVal.getValueType()); 9891* if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) 9892 LastLegalType = i+1; 9893 } 9894 9895 // Find a legal type for the vector store. 9896 EVT Ty = EVT::getVectorVT(DAG.getContext(), MemVT, i+1); 9897* if (TLI.isTypeLegal(Ty)) 9898 LastLegalVectorType = i + 1; 9899 } 9900 9901 // We only use vectors if the constant is known to be zero and the 9902 // function is not marked with the noimplicitfloat attribute. 9903 if (NonZero \|\| NoVectors) 9904 LastLegalVectorType = 0; 9905 9906 // Check if we found a legal integer type to store. 9907 if (LastLegalType == 0 && LastLegalVectorType == 0) 9908 return false; 9909 9910 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; 9911 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; 9912 9913 // Make sure we have something to merge. 9914 if (NumElem < 2) 9915 return false; 9916 9917 unsigned EarliestNodeUsed = 0; 9918 for (unsigned i=0; i < NumElem; ++i) { 9919 // Find a chain for the new wide-store operand. Notice that some 9920 // of the store nodes that we found may not be selected for inclusion 9921 // in the wide store. The chain we use needs to be the chain of the 9922 // earliest store node which is used and replaced by the wide store. 9923 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 9924 EarliestNodeUsed = i; 9925 } 9926 9927 // The earliest Node in the DAG. 9928 LSBaseSDNode EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 9929* SDLoc DL(StoreNodes[0].MemNode); 9930 9931 SDValue StoredVal; 9932 if (UseVector) { 9933 // Find a legal type for the vector store. 9934 EVT Ty = EVT::getVectorVT(DAG.getContext(), MemVT, NumElem); 9935* assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); 9936 StoredVal = DAG.getConstant(0, Ty); 9937 } else { 9938 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 9939 APInt StoreInt(StoreBW, 0); 9940 9941 // Construct a single integer constant which is made of the smaller 9942 // constant inputs. 9943 bool IsLE = TLI.isLittleEndian(); 9944 for (unsigned i = 0; i < NumElem ; ++i) { 9945 unsigned Idx = IsLE ?(NumElem - 1 - i) : i; 9946 StoreSDNode St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 9947* SDValue Val = St->getValue(); 9948 StoreInt<<=ElementSizeBytes8; 9949* if (ConstantSDNode C = dyn_cast<ConstantSDNode>(Val)) { 9950* StoreInt\|=C->getAPIntValue().zext(StoreBW); 9951 } else if (ConstantFPSDNode C = dyn_cast<ConstantFPSDNode>(Val)) { 9952* StoreInt\|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); 9953 } else { 9954 llvm_unreachable("Invalid constant element type"); 9955 } 9956 } 9957 9958 // Create the new Load and Store operations. 9959 EVT StoreTy = EVT::getIntegerVT(DAG.getContext(), StoreBW); 9960* StoredVal = DAG.getConstant(StoreInt, StoreTy); 9961 } 9962 9963 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, 9964 FirstInChain->getBasePtr(), 9965 FirstInChain->getPointerInfo(), 9966 false, false, 9967 FirstInChain->getAlignment()); 9968 9969 // Replace the first store with the new store 9970 CombineTo(EarliestOp, NewStore); 9971 // Erase all other stores. 9972 for (unsigned i = 0; i < NumElem ; ++i) { 9973 if (StoreNodes[i].MemNode == EarliestOp) 9974 continue; 9975 StoreSDNode St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9976* // ReplaceAllUsesWith will replace all uses that existed when it was 9977 // called, but graph optimizations may cause new ones to appear. For 9978 // example, the case in pr14333 looks like 9979 // 9980 // St's chain -> St -> another store -> X 9981 // 9982 // And the only difference from St to the other store is the chain. 9983 // When we change it's chain to be St's chain they become identical, 9984 // get CSEed and the net result is that X is now a use of St. 9985 // Since we know that St is redundant, just iterate. 9986 while (!St->use_empty()) 9987 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); 9988 deleteAndRecombine(St); 9989 } 9990 9991 return true; 9992 } 9993 9994 // Below we handle the case of multiple consecutive stores that 9995 // come from multiple consecutive loads. We merge them into a single 9996 // wide load and a single wide store. 9997 9998 // Look for load nodes which are used by the stored values. 9999 SmallVector<MemOpLink, 8> LoadNodes; 10000 10001 // Find acceptable loads. Loads need to have the same chain (token factor), 10002 // must not be zext, volatile, indexed, and they must be consecutive. 10003 BaseIndexOffset LdBasePtr; 10004 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 10005 StoreSDNode St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10006* LoadSDNode Ld = dyn_cast<LoadSDNode>(St->getValue()); 10007* if (!Ld) break; 10008 10009 // Loads must only have one use. 10010 if (!Ld->hasNUsesOfValue(1, 0)) 10011 break; 10012 10013 // Check that the alignment is the same as the stores. 10014 if (Ld->getAlignment() != St->getAlignment()) 10015 break; 10016 10017 // The memory operands must not be volatile. 10018 if (Ld->isVolatile() \|\| Ld->isIndexed()) 10019 break; 10020 10021 // We do not accept ext loads. 10022 if (Ld->getExtensionType() != ISD::NON_EXTLOAD) 10023 break; 10024 10025 // The stored memory type must be the same. 10026 if (Ld->getMemoryVT() != MemVT) 10027 break; 10028 10029 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); 10030 // If this is not the first ptr that we check. 10031 if (LdBasePtr.Base.getNode()) { 10032 // The base ptr must be the same. 10033 if (!LdPtr.equalBaseIndex(LdBasePtr)) 10034 break; 10035 } else { 10036 // Check that all other base pointers are the same as this one. 10037 LdBasePtr = LdPtr; 10038 } 10039 10040 // We found a potential memory operand to merge. 10041 LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); 10042 } 10043 10044 if (LoadNodes.size() < 2) 10045 return false; 10046 10047 // If we have load/store pair instructions and we only have two values, 10048 // don't bother. 10049 unsigned RequiredAlignment; 10050 if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && 10051 St->getAlignment() >= RequiredAlignment) 10052 return false; 10053 10054 // Scan the memory operations on the chain and find the first non-consecutive 10055 // load memory address. These variables hold the index in the store node 10056 // array. 10057 unsigned LastConsecutiveLoad = 0; 10058 // This variable refers to the size and not index in the array. 10059 unsigned LastLegalVectorType = 0; 10060 unsigned LastLegalIntegerType = 0; 10061 StartAddress = LoadNodes[0].OffsetFromBase; 10062 SDValue FirstChain = LoadNodes[0].MemNode->getChain(); 10063 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 10064 // All loads much share the same chain. 10065 if (LoadNodes[i].MemNode->getChain() != FirstChain) 10066 break; 10067 10068 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 10069 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 10070 break; 10071 LastConsecutiveLoad = i; 10072 10073 // Find a legal type for the vector store. 10074 EVT StoreTy = EVT::getVectorVT(DAG.getContext(), MemVT, i+1); 10075* if (TLI.isTypeLegal(StoreTy)) 10076 LastLegalVectorType = i + 1; 10077 10078 // Find a legal type for the integer store. 10079 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 10080 StoreTy = EVT::getIntegerVT(DAG.getContext(), StoreBW); 10081* if (TLI.isTypeLegal(StoreTy)) 10082 LastLegalIntegerType = i + 1; 10083 // Or check whether a truncstore and extload is legal. 10084 else if (TLI.getTypeAction(DAG.getContext(), StoreTy) == 10085* TargetLowering::TypePromoteInteger) { 10086 EVT LegalizedStoredValueTy = 10087 TLI.getTypeToTransformTo(DAG.getContext(), StoreTy); 10088* if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && 10089 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && 10090 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && 10091 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) 10092 LastLegalIntegerType = i+1; 10093 } 10094 } 10095 10096 // Only use vector types if the vector type is larger than the integer type. 10097 // If they are the same, use integers. 10098 bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; 10099 unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); 10100 10101 // We add +1 here because the LastXXX variables refer to location while 10102 // the NumElem refers to array/index size. 10103 unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; 10104 NumElem = std::min(LastLegalType, NumElem); 10105 10106 if (NumElem < 2) 10107 return false; 10108 10109 // The earliest Node in the DAG. 10110 unsigned EarliestNodeUsed = 0; 10111 LSBaseSDNode EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 10112* for (unsigned i=1; i<NumElem; ++i) { 10113 // Find a chain for the new wide-store operand. Notice that some 10114 // of the store nodes that we found may not be selected for inclusion 10115 // in the wide store. The chain we use needs to be the chain of the 10116 // earliest store node which is used and replaced by the wide store. 10117 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 10118 EarliestNodeUsed = i; 10119 } 10120 10121 // Find if it is better to use vectors or integers to load and store 10122 // to memory. 10123 EVT JointMemOpVT; 10124 if (UseVectorTy) { 10125 JointMemOpVT = EVT::getVectorVT(DAG.getContext(), MemVT, NumElem); 10126* } else { 10127 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 10128 JointMemOpVT = EVT::getIntegerVT(DAG.getContext(), StoreBW); 10129* } 10130 10131 SDLoc LoadDL(LoadNodes[0].MemNode); 10132 SDLoc StoreDL(StoreNodes[0].MemNode); 10133 10134 LoadSDNode FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 10135* SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, 10136 FirstLoad->getChain(), 10137 FirstLoad->getBasePtr(), 10138 FirstLoad->getPointerInfo(), 10139 false, false, false, 10140 FirstLoad->getAlignment()); 10141 10142 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, 10143 FirstInChain->getBasePtr(), 10144 FirstInChain->getPointerInfo(), false, false, 10145 FirstInChain->getAlignment()); 10146 10147 // Replace one of the loads with the new load. 10148 LoadSDNode Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); 10149* DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 10150 SDValue(NewLoad.getNode(), 1)); 10151 10152 // Remove the rest of the load chains. 10153 for (unsigned i = 1; i < NumElem ; ++i) { 10154 // Replace all chain users of the old load nodes with the chain of the new 10155 // load node. 10156 LoadSDNode Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 10157* DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); 10158 } 10159 10160 // Replace the first store with the new store. 10161 CombineTo(EarliestOp, NewStore); 10162 // Erase all other stores. 10163 for (unsigned i = 0; i < NumElem ; ++i) { 10164 // Remove all Store nodes. 10165 if (StoreNodes[i].MemNode == EarliestOp) 10166 continue; 10167 StoreSDNode St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10168* DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); 10169 deleteAndRecombine(St); 10170 } 10171 10172 return true; 10173} 10174 10175SDValue DAGCombiner::visitSTORE(SDNode N) { 10176* StoreSDNode ST = cast<StoreSDNode>(N); 10177* SDValue Chain = ST->getChain(); 10178 SDValue Value = ST->getValue(); 10179 SDValue Ptr = ST->getBasePtr(); 10180 10181 // If this is a store of a bit convert, store the input value if the 10182 // resultant store does not need a higher alignment than the original. 10183 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 10184 ST->isUnindexed()) { 10185 unsigned OrigAlign = ST->getAlignment(); 10186 EVT SVT = Value.getOperand(0).getValueType(); 10187 unsigned Align = TLI.getDataLayout()-> 10188 getABITypeAlignment(SVT.getTypeForEVT(DAG.getContext())); 10189* if (Align <= OrigAlign && 10190 ((!LegalOperations && !ST->isVolatile()) \|\| 10191 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 10192 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), 10193 Ptr, ST->getPointerInfo(), ST->isVolatile(), 10194 ST->isNonTemporal(), OrigAlign, 10195 ST->getAAInfo()); 10196 } 10197 10198 // Turn 'store undef, Ptr' -> nothing. 10199 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 10200 return Chain; 10201 10202 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 10203 if (ConstantFPSDNode CFP = dyn_cast<ConstantFPSDNode>(Value)) { 10204* // NOTE: If the original store is volatile, this transform must not increase 10205 // the number of stores. For example, on x86-32 an f64 can be stored in one 10206 // processor operation but an i64 (which is not legal) requires two. So the 10207 // transform should not be done in this case. 10208 if (Value.getOpcode() != ISD::TargetConstantFP) { 10209 SDValue Tmp; 10210 switch (CFP->getSimpleValueType(0).SimpleTy) { 10211 default: llvm_unreachable("Unknown FP type"); 10212 case MVT::f16: // We don't do this for these yet. 10213 case MVT::f80: 10214 case MVT::f128: 10215 case MVT::ppcf128: 10216 break; 10217 case MVT::f32: 10218 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) \|\| 10219 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 10220 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 10221 bitcastToAPInt().getZExtValue(), MVT::i32); 10222 return DAG.getStore(Chain, SDLoc(N), Tmp, 10223 Ptr, ST->getMemOperand()); 10224 } 10225 break; 10226 case MVT::f64: 10227 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 10228 !ST->isVolatile()) \|\| 10229 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 10230 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 10231 getZExtValue(), MVT::i64); 10232 return DAG.getStore(Chain, SDLoc(N), Tmp, 10233 Ptr, ST->getMemOperand()); 10234 } 10235 10236 if (!ST->isVolatile() && 10237 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 10238 // Many FP stores are not made apparent until after legalize, e.g. for 10239 // argument passing. Since this is so common, custom legalize the 10240 // 64-bit integer store into two 32-bit stores. 10241 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 10242 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); 10243 SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); 10244 if (TLI.isBigEndian()) std::swap(Lo, Hi); 10245 10246 unsigned Alignment = ST->getAlignment(); 10247 bool isVolatile = ST->isVolatile(); 10248 bool isNonTemporal = ST->isNonTemporal(); 10249 AAMDNodes AAInfo = ST->getAAInfo(); 10250 10251 SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, 10252 Ptr, ST->getPointerInfo(), 10253 isVolatile, isNonTemporal, 10254 ST->getAlignment(), AAInfo); 10255 Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, 10256 DAG.getConstant(4, Ptr.getValueType())); 10257 Alignment = MinAlign(Alignment, 4U); 10258 SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, 10259 Ptr, ST->getPointerInfo().getWithOffset(4), 10260 isVolatile, isNonTemporal, 10261 Alignment, AAInfo); 10262 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, 10263 St0, St1); 10264 } 10265 10266 break; 10267 } 10268 } 10269 } 10270 10271 // Try to infer better alignment information than the store already has. 10272 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 10273 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 10274 if (Align > ST->getAlignment()) 10275 return DAG.getTruncStore(Chain, SDLoc(N), Value, 10276 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 10277 ST->isVolatile(), ST->isNonTemporal(), Align, 10278 ST->getAAInfo()); 10279 } 10280 } 10281 10282 // Try transforming a pair floating point load / store ops to integer 10283 // load / store ops. 10284 SDValue NewST = TransformFPLoadStorePair(N); 10285 if (NewST.getNode()) 10286 return NewST; 10287 10288 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 10289 : DAG.getSubtarget().useAA(); 10290#ifndef NDEBUG 10291 if (CombinerAAOnlyFunc.getNumOccurrences() && 10292 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 10293 UseAA = false; 10294#endif 10295 if (UseAA && ST->isUnindexed()) { 10296 // Walk up chain skipping non-aliasing memory nodes. 10297 SDValue BetterChain = FindBetterChain(N, Chain); 10298 10299 // If there is a better chain. 10300 if (Chain != BetterChain) { 10301 SDValue ReplStore; 10302 10303 // Replace the chain to avoid dependency. 10304 if (ST->isTruncatingStore()) { 10305 ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, 10306 ST->getMemoryVT(), ST->getMemOperand()); 10307 } else { 10308 ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, 10309 ST->getMemOperand()); 10310 } 10311 10312 // Create token to keep both nodes around. 10313 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 10314 MVT::Other, Chain, ReplStore); 10315 10316 // Make sure the new and old chains are cleaned up. 10317 AddToWorklist(Token.getNode()); 10318 10319 // Don't add users to work list. 10320 return CombineTo(N, Token, false); 10321 } 10322 } 10323 10324 // Try transforming N to an indexed store. 10325 if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N)) 10326 return SDValue(N, 0); 10327 10328 // FIXME: is there such a thing as a truncating indexed store? 10329 if (ST->isTruncatingStore() && ST->isUnindexed() && 10330 Value.getValueType().isInteger()) { 10331 // See if we can simplify the input to this truncstore with knowledge that 10332 // only the low bits are being used. For example: 10333 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 10334 SDValue Shorter = 10335 GetDemandedBits(Value, 10336 APInt::getLowBitsSet( 10337 Value.getValueType().getScalarType().getSizeInBits(), 10338 ST->getMemoryVT().getScalarType().getSizeInBits())); 10339 AddToWorklist(Value.getNode()); 10340 if (Shorter.getNode()) 10341 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, 10342 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 10343 10344 // Otherwise, see if we can simplify the operation with 10345 // SimplifyDemandedBits, which only works if the value has a single use. 10346 if (SimplifyDemandedBits(Value, 10347 APInt::getLowBitsSet( 10348 Value.getValueType().getScalarType().getSizeInBits(), 10349 ST->getMemoryVT().getScalarType().getSizeInBits()))) 10350 return SDValue(N, 0); 10351 } 10352 10353 // If this is a load followed by a store to the same location, then the store 10354 // is dead/noop. 10355 if (LoadSDNode Ld = dyn_cast<LoadSDNode>(Value)) { 10356* if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 10357 ST->isUnindexed() && !ST->isVolatile() && 10358 // There can't be any side effects between the load and store, such as 10359 // a call or store. 10360 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 10361 // The store is dead, remove it. 10362 return Chain; 10363 } 10364 } 10365 10366 // If this is a store followed by a store with the same value to the same 10367 // location, then the store is dead/noop. 10368 if (StoreSDNode ST1 = dyn_cast<StoreSDNode>(Chain)) { 10369* if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && 10370 ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && 10371 ST1->isUnindexed() && !ST1->isVolatile()) { 10372 // The store is dead, remove it. 10373 return Chain; 10374 } 10375 } 10376 10377 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 10378 // truncating store. We can do this even if this is already a truncstore. 10379 if ((Value.getOpcode() == ISD::FP_ROUND \|\| Value.getOpcode() == ISD::TRUNCATE) 10380 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 10381 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 10382 ST->getMemoryVT())) { 10383 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), 10384 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 10385 } 10386 10387 // Only perform this optimization before the types are legal, because we 10388 // don't want to perform this optimization on every DAGCombine invocation. 10389 if (!LegalTypes) { 10390 bool EverChanged = false; 10391 10392 do { 10393 // There can be multiple store sequences on the same chain. 10394 // Keep trying to merge store sequences until we are unable to do so 10395 // or until we merge the last store on the chain. 10396 bool Changed = MergeConsecutiveStores(ST); 10397 EverChanged \|= Changed; 10398 if (!Changed) break; 10399 } while (ST->getOpcode() != ISD::DELETED_NODE); 10400 10401 if (EverChanged) 10402 return SDValue(N, 0); 10403 } 10404 10405 return ReduceLoadOpStoreWidth(N); 10406} 10407 10408SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode N) { 10409* SDValue InVec = N->getOperand(0); 10410 SDValue InVal = N->getOperand(1); 10411 SDValue EltNo = N->getOperand(2); 10412 SDLoc dl(N); 10413 10414 // If the inserted element is an UNDEF, just use the input vector. 10415 if (InVal.getOpcode() == ISD::UNDEF) 10416 return InVec; 10417 10418 EVT VT = InVec.getValueType(); 10419 10420 // If we can't generate a legal BUILD_VECTOR, exit 10421 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 10422 return SDValue(); 10423 10424 // Check that we know which element is being inserted 10425 if (!isa<ConstantSDNode>(EltNo)) 10426 return SDValue(); 10427 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10428 10429 // Canonicalize insert_vector_elt dag nodes. 10430 // Example: 10431 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) 10432 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) 10433 // 10434 // Do this only if the child insert_vector node has one use; also 10435 // do this only if indices are both constants and Idx1 < Idx0. 10436 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() 10437 && isa<ConstantSDNode>(InVec.getOperand(2))) { 10438 unsigned OtherElt = 10439 cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); 10440 if (Elt < OtherElt) { 10441 // Swap nodes. 10442 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, 10443 InVec.getOperand(0), InVal, EltNo); 10444 AddToWorklist(NewOp.getNode()); 10445 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), 10446 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); 10447 } 10448 } 10449 10450 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 10451 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 10452 // vector elements. 10453 SmallVector<SDValue, 8> Ops; 10454 // Do not combine these two vectors if the output vector will not replace 10455 // the input vector. 10456 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { 10457 Ops.append(InVec.getNode()->op_begin(), 10458 InVec.getNode()->op_end()); 10459 } else if (InVec.getOpcode() == ISD::UNDEF) { 10460 unsigned NElts = VT.getVectorNumElements(); 10461 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 10462 } else { 10463 return SDValue(); 10464 } 10465 10466 // Insert the element 10467 if (Elt < Ops.size()) { 10468 // All the operands of BUILD_VECTOR must have the same type; 10469 // we enforce that here. 10470 EVT OpVT = Ops[0].getValueType(); 10471 if (InVal.getValueType() != OpVT) 10472 InVal = OpVT.bitsGT(InVal.getValueType()) ? 10473 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 10474 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 10475 Ops[Elt] = InVal; 10476 } 10477 10478 // Return the new vector 10479 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); 10480} 10481 10482SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 10483 SDNode EVE, EVT InVecVT, SDValue EltNo, LoadSDNode OriginalLoad) { 10484 EVT ResultVT = EVE->getValueType(0); 10485 EVT VecEltVT = InVecVT.getVectorElementType(); 10486 unsigned Align = OriginalLoad->getAlignment(); 10487 unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( 10488 VecEltVT.getTypeForEVT(DAG.getContext())); 10489* 10490 if (NewAlign > Align \|\| !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) 10491 return SDValue(); 10492 10493 Align = NewAlign; 10494 10495 SDValue NewPtr = OriginalLoad->getBasePtr(); 10496 SDValue Offset; 10497 EVT PtrType = NewPtr.getValueType(); 10498 MachinePointerInfo MPI; 10499 if (auto ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { 10500* int Elt = ConstEltNo->getZExtValue(); 10501 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; 10502 if (TLI.isBigEndian()) 10503 PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; 10504 Offset = DAG.getConstant(PtrOff, PtrType); 10505 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); 10506 } else { 10507 Offset = DAG.getNode( 10508 ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, 10509 DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); 10510 if (TLI.isBigEndian()) 10511 Offset = DAG.getNode( 10512 ISD::SUB, SDLoc(EVE), EltNo.getValueType(), 10513 DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); 10514 MPI = OriginalLoad->getPointerInfo(); 10515 } 10516 NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); 10517 10518 // The replacement we need to do here is a little tricky: we need to 10519 // replace an extractelement of a load with a load. 10520 // Use ReplaceAllUsesOfValuesWith to do the replacement. 10521 // Note that this replacement assumes that the extractvalue is the only 10522 // use of the load; that's okay because we don't want to perform this 10523 // transformation in other cases anyway. 10524 SDValue Load; 10525 SDValue Chain; 10526 if (ResultVT.bitsGT(VecEltVT)) { 10527 // If the result type of vextract is wider than the load, then issue an 10528 // extending load instead. 10529 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, 10530 VecEltVT) 10531 ? ISD::ZEXTLOAD 10532 : ISD::EXTLOAD; 10533 Load = DAG.getExtLoad( 10534 ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, 10535 VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 10536 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 10537 Chain = Load.getValue(1); 10538 } else { 10539 Load = DAG.getLoad( 10540 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, 10541 OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 10542 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 10543 Chain = Load.getValue(1); 10544 if (ResultVT.bitsLT(VecEltVT)) 10545 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); 10546 else 10547 Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); 10548 } 10549 WorklistRemover DeadNodes(this); 10550* SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; 10551 SDValue To[] = { Load, Chain }; 10552 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 10553 // Since we're explicitly calling ReplaceAllUses, add the new node to the 10554 // worklist explicitly as well. 10555 AddToWorklist(Load.getNode()); 10556 AddUsersToWorklist(Load.getNode()); // Add users too 10557 // Make sure to revisit this node to clean it up; it will usually be dead. 10558 AddToWorklist(EVE); 10559 ++OpsNarrowed; 10560 return SDValue(EVE, 0); 10561} 10562 10563SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode N) { 10564* // (vextract (scalar_to_vector val, 0) -> val 10565 SDValue InVec = N->getOperand(0); 10566 EVT VT = InVec.getValueType(); 10567 EVT NVT = N->getValueType(0); 10568 10569 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 10570 // Check if the result type doesn't match the inserted element type. A 10571 // SCALAR_TO_VECTOR may truncate the inserted element and the 10572 // EXTRACT_VECTOR_ELT may widen the extracted vector. 10573 SDValue InOp = InVec.getOperand(0); 10574 if (InOp.getValueType() != NVT) { 10575 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 10576 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); 10577 } 10578 return InOp; 10579 } 10580 10581 SDValue EltNo = N->getOperand(1); 10582 bool ConstEltNo = isa<ConstantSDNode>(EltNo); 10583 10584 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 10585 // We only perform this optimization before the op legalization phase because 10586 // we may introduce new vector instructions which are not backed by TD 10587 // patterns. For example on AVX, extracting elements from a wide vector 10588 // without using extract_subvector. However, if we can find an underlying 10589 // scalar value, then we can always use that. 10590 if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE 10591 && ConstEltNo) { 10592 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10593 int NumElem = VT.getVectorNumElements(); 10594 ShuffleVectorSDNode SVOp = cast<ShuffleVectorSDNode>(InVec); 10595* // Find the new index to extract from. 10596 int OrigElt = SVOp->getMaskElt(Elt); 10597 10598 // Extracting an undef index is undef. 10599 if (OrigElt == -1) 10600 return DAG.getUNDEF(NVT); 10601 10602 // Select the right vector half to extract from. 10603 SDValue SVInVec; 10604 if (OrigElt < NumElem) { 10605 SVInVec = InVec->getOperand(0); 10606 } else { 10607 SVInVec = InVec->getOperand(1); 10608 OrigElt -= NumElem; 10609 } 10610 10611 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { 10612 SDValue InOp = SVInVec.getOperand(OrigElt); 10613 if (InOp.getValueType() != NVT) { 10614 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 10615 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); 10616 } 10617 10618 return InOp; 10619 } 10620 10621 // FIXME: We should handle recursing on other vector shuffles and 10622 // scalar_to_vector here as well. 10623 10624 if (!LegalOperations) { 10625 EVT IndexTy = TLI.getVectorIdxTy(); 10626 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, 10627 SVInVec, DAG.getConstant(OrigElt, IndexTy)); 10628 } 10629 } 10630 10631 bool BCNumEltsChanged = false; 10632 EVT ExtVT = VT.getVectorElementType(); 10633 EVT LVT = ExtVT; 10634 10635 // If the result of load has to be truncated, then it's not necessarily 10636 // profitable. 10637 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 10638 return SDValue(); 10639 10640 if (InVec.getOpcode() == ISD::BITCAST) { 10641 // Don't duplicate a load with other uses. 10642 if (!InVec.hasOneUse()) 10643 return SDValue(); 10644 10645 EVT BCVT = InVec.getOperand(0).getValueType(); 10646 if (!BCVT.isVector() \|\| ExtVT.bitsGT(BCVT.getVectorElementType())) 10647 return SDValue(); 10648 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 10649 BCNumEltsChanged = true; 10650 InVec = InVec.getOperand(0); 10651 ExtVT = BCVT.getVectorElementType(); 10652 } 10653 10654 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) 10655 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && 10656 ISD::isNormalLoad(InVec.getNode()) && 10657 !N->getOperand(1)->hasPredecessor(InVec.getNode())) { 10658 SDValue Index = N->getOperand(1); 10659 if (LoadSDNode OrigLoad = dyn_cast<LoadSDNode>(InVec)) 10660* return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, 10661 OrigLoad); 10662 } 10663 10664 // Perform only after legalization to ensure build_vector / vector_shuffle 10665 // optimizations have already been done. 10666 if (!LegalOperations) return SDValue(); 10667 10668 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+csize) 10669* // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+csize) 10670* // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 10671 10672 if (ConstEltNo) { 10673 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10674 10675 LoadSDNode LN0 = nullptr; 10676* const ShuffleVectorSDNode SVN = nullptr; 10677* if (ISD::isNormalLoad(InVec.getNode())) { 10678 LN0 = cast<LoadSDNode>(InVec); 10679 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 10680 InVec.getOperand(0).getValueType() == ExtVT && 10681 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 10682 // Don't duplicate a load with other uses. 10683 if (!InVec.hasOneUse()) 10684 return SDValue(); 10685 10686 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 10687 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 10688 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 10689 // => 10690 // (load $addr+1size) 10691* 10692 // Don't duplicate a load with other uses. 10693 if (!InVec.hasOneUse()) 10694 return SDValue(); 10695 10696 // If the bit convert changed the number of elements, it is unsafe 10697 // to examine the mask. 10698 if (BCNumEltsChanged) 10699 return SDValue(); 10700 10701 // Select the input vector, guarding against out of range extract vector. 10702 unsigned NumElems = VT.getVectorNumElements(); 10703 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 10704 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 10705 10706 if (InVec.getOpcode() == ISD::BITCAST) { 10707 // Don't duplicate a load with other uses. 10708 if (!InVec.hasOneUse()) 10709 return SDValue(); 10710 10711 InVec = InVec.getOperand(0); 10712 } 10713 if (ISD::isNormalLoad(InVec.getNode())) { 10714 LN0 = cast<LoadSDNode>(InVec); 10715 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 10716 EltNo = DAG.getConstant(Elt, EltNo.getValueType()); 10717 } 10718 } 10719 10720 // Make sure we found a non-volatile load and the extractelement is 10721 // the only use. 10722 if (!LN0 \|\| !LN0->hasNUsesOfValue(1,0) \|\| LN0->isVolatile()) 10723 return SDValue(); 10724 10725 // If Idx was -1 above, Elt is going to be -1, so just return undef. 10726 if (Elt == -1) 10727 return DAG.getUNDEF(LVT); 10728 10729 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); 10730 } 10731 10732 return SDValue(); 10733} 10734 10735// Simplify (build_vec (ext )) to (bitcast (build_vec )) 10736SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode N) { 10737* // We perform this optimization post type-legalization because 10738 // the type-legalizer often scalarizes integer-promoted vectors. 10739 // Performing this optimization before may create bit-casts which 10740 // will be type-legalized to complex code sequences. 10741 // We perform this optimization only before the operation legalizer because we 10742 // may introduce illegal operations. 10743 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 10744 return SDValue(); 10745 10746 unsigned NumInScalars = N->getNumOperands(); 10747 SDLoc dl(N); 10748 EVT VT = N->getValueType(0); 10749 10750 // Check to see if this is a BUILD_VECTOR of a bunch of values 10751 // which come from any_extend or zero_extend nodes. If so, we can create 10752 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 10753 // optimizations. We do not handle sign-extend because we can't fill the sign 10754 // using shuffles. 10755 EVT SourceType = MVT::Other; 10756 bool AllAnyExt = true; 10757 10758 for (unsigned i = 0; i != NumInScalars; ++i) { 10759 SDValue In = N->getOperand(i); 10760 // Ignore undef inputs. 10761 if (In.getOpcode() == ISD::UNDEF) continue; 10762 10763 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 10764 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 10765 10766 // Abort if the element is not an extension. 10767 if (!ZeroExt && !AnyExt) { 10768 SourceType = MVT::Other; 10769 break; 10770 } 10771 10772 // The input is a ZeroExt or AnyExt. Check the original type. 10773 EVT InTy = In.getOperand(0).getValueType(); 10774 10775 // Check that all of the widened source types are the same. 10776 if (SourceType == MVT::Other) 10777 // First time. 10778 SourceType = InTy; 10779 else if (InTy != SourceType) { 10780 // Multiple income types. Abort. 10781 SourceType = MVT::Other; 10782 break; 10783 } 10784 10785 // Check if all of the extends are ANY_EXTENDs. 10786 AllAnyExt &= AnyExt; 10787 } 10788 10789 // In order to have valid types, all of the inputs must be extended from the 10790 // same source type and all of the inputs must be any or zero extend. 10791 // Scalar sizes must be a power of two. 10792 EVT OutScalarTy = VT.getScalarType(); 10793 bool ValidTypes = SourceType != MVT::Other && 10794 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 10795 isPowerOf2_32(SourceType.getSizeInBits()); 10796 10797 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 10798 // turn into a single shuffle instruction. 10799 if (!ValidTypes) 10800 return SDValue(); 10801 10802 bool isLE = TLI.isLittleEndian(); 10803 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 10804 assert(ElemRatio > 1 && "Invalid element size ratio"); 10805 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 10806 DAG.getConstant(0, SourceType); 10807 10808 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 10809 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 10810 10811 // Populate the new build_vector 10812 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 10813 SDValue Cast = N->getOperand(i); 10814 assert((Cast.getOpcode() == ISD::ANY_EXTEND \|\| 10815 Cast.getOpcode() == ISD::ZERO_EXTEND \|\| 10816 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 10817 SDValue In; 10818 if (Cast.getOpcode() == ISD::UNDEF) 10819 In = DAG.getUNDEF(SourceType); 10820 else 10821 In = Cast->getOperand(0); 10822 unsigned Index = isLE ? (i * ElemRatio) : 10823 (i * ElemRatio + (ElemRatio - 1)); 10824 10825 assert(Index < Ops.size() && "Invalid index"); 10826 Ops[Index] = In; 10827 } 10828 10829 // The type of the new BUILD_VECTOR node. 10830 EVT VecVT = EVT::getVectorVT(DAG.getContext(), SourceType, NewBVElems); 10831* assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 10832 "Invalid vector size"); 10833 // Check if the new vector type is legal. 10834 if (!isTypeLegal(VecVT)) return SDValue(); 10835 10836 // Make the new BUILD_VECTOR. 10837 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); 10838 10839 // The new BUILD_VECTOR node has the potential to be further optimized. 10840 AddToWorklist(BV.getNode()); 10841 // Bitcast to the desired type. 10842 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 10843} 10844 10845SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode N) { 10846* EVT VT = N->getValueType(0); 10847 10848 unsigned NumInScalars = N->getNumOperands(); 10849 SDLoc dl(N); 10850 10851 EVT SrcVT = MVT::Other; 10852 unsigned Opcode = ISD::DELETED_NODE; 10853 unsigned NumDefs = 0; 10854 10855 for (unsigned i = 0; i != NumInScalars; ++i) { 10856 SDValue In = N->getOperand(i); 10857 unsigned Opc = In.getOpcode(); 10858 10859 if (Opc == ISD::UNDEF) 10860 continue; 10861 10862 // If all scalar values are floats and converted from integers. 10863 if (Opcode == ISD::DELETED_NODE && 10864 (Opc == ISD::UINT_TO_FP \|\| Opc == ISD::SINT_TO_FP)) { 10865 Opcode = Opc; 10866 } 10867 10868 if (Opc != Opcode) 10869 return SDValue(); 10870 10871 EVT InVT = In.getOperand(0).getValueType(); 10872 10873 // If all scalar values are typed differently, bail out. It's chosen to 10874 // simplify BUILD_VECTOR of integer types. 10875 if (SrcVT == MVT::Other) 10876 SrcVT = InVT; 10877 if (SrcVT != InVT) 10878 return SDValue(); 10879 NumDefs++; 10880 } 10881 10882 // If the vector has just one element defined, it's not worth to fold it into 10883 // a vectorized one. 10884 if (NumDefs < 2) 10885 return SDValue(); 10886 10887 assert((Opcode == ISD::UINT_TO_FP \|\| Opcode == ISD::SINT_TO_FP) 10888 && "Should only handle conversion from integer to float."); 10889 assert(SrcVT != MVT::Other && "Cannot determine source type!"); 10890 10891 EVT NVT = EVT::getVectorVT(DAG.getContext(), SrcVT, NumInScalars); 10892* 10893 if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) 10894 return SDValue(); 10895 10896 SmallVector<SDValue, 8> Opnds; 10897 for (unsigned i = 0; i != NumInScalars; ++i) { 10898 SDValue In = N->getOperand(i); 10899 10900 if (In.getOpcode() == ISD::UNDEF) 10901 Opnds.push_back(DAG.getUNDEF(SrcVT)); 10902 else 10903 Opnds.push_back(In.getOperand(0)); 10904 } 10905 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); 10906 AddToWorklist(BV.getNode()); 10907 10908 return DAG.getNode(Opcode, dl, VT, BV); 10909} 10910 10911SDValue DAGCombiner::visitBUILD_VECTOR(SDNode N) { 10912* unsigned NumInScalars = N->getNumOperands(); 10913 SDLoc dl(N); 10914 EVT VT = N->getValueType(0); 10915 10916 // A vector built entirely of undefs is undef. 10917 if (ISD::allOperandsUndef(N)) 10918 return DAG.getUNDEF(VT); 10919 10920 SDValue V = reduceBuildVecExtToExtBuildVec(N); 10921 if (V.getNode()) 10922 return V; 10923 10924 V = reduceBuildVecConvertToConvertBuildVec(N); 10925 if (V.getNode()) 10926 return V; 10927 10928 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 10929 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 10930 // at most two distinct vectors, turn this into a shuffle node. 10931 10932 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 10933 if (!isTypeLegal(VT)) 10934 return SDValue(); 10935 10936 // May only combine to shuffle after legalize if shuffle is legal. 10937 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) 10938 return SDValue(); 10939 10940 SDValue VecIn1, VecIn2; 10941 bool UsesZeroVector = false; 10942 for (unsigned i = 0; i != NumInScalars; ++i) { 10943 SDValue Op = N->getOperand(i); 10944 // Ignore undef inputs. 10945 if (Op.getOpcode() == ISD::UNDEF) continue; 10946 10947 // See if we can combine this build_vector into a blend with a zero vector. 10948 if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && 10949 cast<ConstantSDNode>(Op.getNode())->isNullValue()) \|\| 10950 (Op.getOpcode() == ISD::ConstantFP && 10951 cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) { 10952 UsesZeroVector = true; 10953 continue; 10954 } 10955 10956 // If this input is something other than a EXTRACT_VECTOR_ELT with a 10957 // constant index, bail out. 10958 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\| 10959 !isa<ConstantSDNode>(Op.getOperand(1))) { 10960 VecIn1 = VecIn2 = SDValue(nullptr, 0); 10961 break; 10962 } 10963 10964 // We allow up to two distinct input vectors. 10965 SDValue ExtractedFromVec = Op.getOperand(0); 10966 if (ExtractedFromVec == VecIn1 \|\| ExtractedFromVec == VecIn2) 10967 continue; 10968 10969 if (!VecIn1.getNode()) { 10970 VecIn1 = ExtractedFromVec; 10971 } else if (!VecIn2.getNode() && !UsesZeroVector) { 10972 VecIn2 = ExtractedFromVec; 10973 } else { 10974 // Too many inputs. 10975 VecIn1 = VecIn2 = SDValue(nullptr, 0); 10976 break; 10977 } 10978 } 10979 10980 // If everything is good, we can make a shuffle operation. 10981 if (VecIn1.getNode()) { 10982 unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); 10983 SmallVector<int, 8> Mask; 10984 for (unsigned i = 0; i != NumInScalars; ++i) { 10985 unsigned Opcode = N->getOperand(i).getOpcode(); 10986 if (Opcode == ISD::UNDEF) { 10987 Mask.push_back(-1); 10988 continue; 10989 } 10990 10991 // Operands can also be zero. 10992 if (Opcode != ISD::EXTRACT_VECTOR_ELT) { 10993 assert(UsesZeroVector && 10994 (Opcode == ISD::Constant \|\| Opcode == ISD::ConstantFP) && 10995 "Unexpected node found!"); 10996 Mask.push_back(NumInScalars+i); 10997 continue; 10998 } 10999 11000 // If extracting from the first vector, just use the index directly. 11001 SDValue Extract = N->getOperand(i); 11002 SDValue ExtVal = Extract.getOperand(1); 11003 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 11004 if (Extract.getOperand(0) == VecIn1) { 11005 Mask.push_back(ExtIndex); 11006 continue; 11007 } 11008 11009 // Otherwise, use InIdx + InputVecSize 11010 Mask.push_back(InNumElements + ExtIndex); 11011 } 11012 11013 // Avoid introducing illegal shuffles with zero. 11014 if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) 11015 return SDValue(); 11016 11017 // We can't generate a shuffle node with mismatched input and output types. 11018 // Attempt to transform a single input vector to the correct type. 11019 if ((VT != VecIn1.getValueType())) { 11020 // If the input vector type has a different base type to the output 11021 // vector type, bail out. 11022 EVT VTElemType = VT.getVectorElementType(); 11023 if ((VecIn1.getValueType().getVectorElementType() != VTElemType) \|\| 11024 (VecIn2.getNode() && 11025 (VecIn2.getValueType().getVectorElementType() != VTElemType))) 11026 return SDValue(); 11027 11028 // If the input vector is too small, widen it. 11029 // We only support widening of vectors which are half the size of the 11030 // output registers. For example XMM->YMM widening on X86 with AVX. 11031 EVT VecInT = VecIn1.getValueType(); 11032 if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { 11033 // If we only have one small input, widen it by adding undef values. 11034 if (!VecIn2.getNode()) 11035 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, 11036 DAG.getUNDEF(VecIn1.getValueType())); 11037 else if (VecIn1.getValueType() == VecIn2.getValueType()) { 11038 // If we have two small inputs of the same type, try to concat them. 11039 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); 11040 VecIn2 = SDValue(nullptr, 0); 11041 } else 11042 return SDValue(); 11043 } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { 11044 // If the input vector is too large, try to split it. 11045 // We don't support having two input vectors that are too large.	2791 } 2792 } 2793 2794 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 2795 // actually legal and isn't going to get expanded, else this is a false 2796 // optimisation. 2797 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 2798 Load->getValueType(0), 2799 Load->getMemoryVT()); 2800 2801 // Resize the constant to the same size as the original memory access before 2802 // extension. If it is still the AllOnesValue then this AND is completely 2803 // unneeded. 2804 Constant = 2805 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 2806 2807 bool B; 2808 switch (Load->getExtensionType()) { 2809 default: B = false; break; 2810 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 2811 case ISD::ZEXTLOAD: 2812 case ISD::NON_EXTLOAD: B = true; break; 2813 } 2814 2815 if (B && Constant.isAllOnesValue()) { 2816 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 2817 // preserve semantics once we get rid of the AND. 2818 SDValue NewLoad(Load, 0); 2819 if (Load->getExtensionType() == ISD::EXTLOAD) { 2820 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 2821 Load->getValueType(0), SDLoc(Load), 2822 Load->getChain(), Load->getBasePtr(), 2823 Load->getOffset(), Load->getMemoryVT(), 2824 Load->getMemOperand()); 2825 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 2826 if (Load->getNumValues() == 3) { 2827 // PRE/POST_INC loads have 3 values. 2828 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 2829 NewLoad.getValue(2) }; 2830 CombineTo(Load, To, 3, true); 2831 } else { 2832 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 2833 } 2834 } 2835 2836 // Fold the AND away, taking care not to fold to the old load node if we 2837 // replaced it. 2838 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 2839 2840 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2841 } 2842 } 2843 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2844 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2845 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2846 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2847 2848 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2849 LL.getValueType().isInteger()) { 2850 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2851 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2852 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2853 LR.getValueType(), LL, RL); 2854 AddToWorklist(ORNode.getNode()); 2855 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 2856 } 2857 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2858 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2859 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2860 LR.getValueType(), LL, RL); 2861 AddToWorklist(ANDNode.getNode()); 2862 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 2863 } 2864 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2865 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2866 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2867 LR.getValueType(), LL, RL); 2868 AddToWorklist(ORNode.getNode()); 2869 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 2870 } 2871 } 2872 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2873 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2874 Op0 == Op1 && LL.getValueType().isInteger() && 2875 Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && 2876 cast<ConstantSDNode>(RR)->isAllOnesValue()) \|\| 2877 (cast<ConstantSDNode>(LR)->isAllOnesValue() && 2878 cast<ConstantSDNode>(RR)->isNullValue()))) { 2879 SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), 2880 LL, DAG.getConstant(1, LL.getValueType())); 2881 AddToWorklist(ADDNode.getNode()); 2882 return DAG.getSetCC(SDLoc(N), VT, ADDNode, 2883 DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); 2884 } 2885 // canonicalize equivalent to ll == rl 2886 if (LL == RR && LR == RL) { 2887 Op1 = ISD::getSetCCSwappedOperands(Op1); 2888 std::swap(RL, RR); 2889 } 2890 if (LL == RL && LR == RR) { 2891 bool isInteger = LL.getValueType().isInteger(); 2892 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2893 if (Result != ISD::SETCC_INVALID && 2894 (!LegalOperations \|\| 2895 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2896 TLI.isOperationLegal(ISD::SETCC, 2897 getSetCCResultType(N0.getSimpleValueType()))))) 2898 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 2899 LL, LR, Result); 2900 } 2901 } 2902 2903 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 2904 if (N0.getOpcode() == N1.getOpcode()) { 2905 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2906 if (Tmp.getNode()) return Tmp; 2907 } 2908 2909 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 2910 // fold (and (sra)) -> (and (srl)) when possible. 2911 if (!VT.isVector() && 2912 SimplifyDemandedBits(SDValue(N, 0))) 2913 return SDValue(N, 0); 2914 2915 // fold (zext_inreg (extload x)) -> (zextload x) 2916 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 2917 LoadSDNode LN0 = cast<LoadSDNode>(N0); 2918* EVT MemVT = LN0->getMemoryVT(); 2919 // If we zero all the possible extended bits, then we can turn this into 2920 // a zextload if we are running before legalize or the operation is legal. 2921 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2922 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2923 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2924 ((!LegalOperations && !LN0->isVolatile()) \|\| 2925 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 2926 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 2927 LN0->getChain(), LN0->getBasePtr(), 2928 MemVT, LN0->getMemOperand()); 2929 AddToWorklist(N); 2930 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2931 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2932 } 2933 } 2934 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 2935 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 2936 N0.hasOneUse()) { 2937 LoadSDNode LN0 = cast<LoadSDNode>(N0); 2938* EVT MemVT = LN0->getMemoryVT(); 2939 // If we zero all the possible extended bits, then we can turn this into 2940 // a zextload if we are running before legalize or the operation is legal. 2941 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2942 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2943 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2944 ((!LegalOperations && !LN0->isVolatile()) \|\| 2945 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 2946 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 2947 LN0->getChain(), LN0->getBasePtr(), 2948 MemVT, LN0->getMemOperand()); 2949 AddToWorklist(N); 2950 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2951 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2952 } 2953 } 2954 2955 // fold (and (load x), 255) -> (zextload x, i8) 2956 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2957 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2958 if (N1C && (N0.getOpcode() == ISD::LOAD \|\| 2959 (N0.getOpcode() == ISD::ANY_EXTEND && 2960 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2961 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2962 LoadSDNode LN0 = HasAnyExt 2963* ? cast<LoadSDNode>(N0.getOperand(0)) 2964 : cast<LoadSDNode>(N0); 2965 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2966 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { 2967 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2968 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2969 EVT ExtVT = EVT::getIntegerVT(DAG.getContext(), ActiveBits); 2970* EVT LoadedVT = LN0->getMemoryVT(); 2971 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2972 2973 if (ExtVT == LoadedVT && 2974 (!LegalOperations \|\| TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 2975 ExtVT))) { 2976 2977 SDValue NewLoad = 2978 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 2979 LN0->getChain(), LN0->getBasePtr(), ExtVT, 2980 LN0->getMemOperand()); 2981 AddToWorklist(N); 2982 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 2983 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2984 } 2985 2986 // Do not change the width of a volatile load. 2987 // Do not generate loads of non-round integer types since these can 2988 // be expensive (and would be wrong if the type is not byte sized). 2989 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 2990 (!LegalOperations \|\| TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 2991 ExtVT))) { 2992 EVT PtrType = LN0->getOperand(1).getValueType(); 2993 2994 unsigned Alignment = LN0->getAlignment(); 2995 SDValue NewPtr = LN0->getBasePtr(); 2996 2997 // For big endian targets, we need to add an offset to the pointer 2998 // to load the correct bytes. For little endian systems, we merely 2999 // need to read fewer bytes from the same pointer. 3000 if (TLI.isBigEndian()) { 3001 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 3002 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 3003 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 3004 NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, 3005 NewPtr, DAG.getConstant(PtrOff, PtrType)); 3006 Alignment = MinAlign(Alignment, PtrOff); 3007 } 3008 3009 AddToWorklist(NewPtr.getNode()); 3010 3011 SDValue Load = 3012 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3013 LN0->getChain(), NewPtr, 3014 LN0->getPointerInfo(), 3015 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 3016 LN0->isInvariant(), Alignment, LN0->getAAInfo()); 3017 AddToWorklist(N); 3018 CombineTo(LN0, Load, Load.getValue(1)); 3019 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3020 } 3021 } 3022 } 3023 } 3024 3025 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 3026 VT.getSizeInBits() <= 64) { 3027 if (ConstantSDNode ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3028* APInt ADDC = ADDI->getAPIntValue(); 3029 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3030 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 3031 // immediate for an add, but it is legal if its top c2 bits are set, 3032 // transform the ADD so the immediate doesn't need to be materialized 3033 // in a register. 3034 if (ConstantSDNode SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 3035* APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 3036 SRLI->getZExtValue()); 3037 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 3038 ADDC \|= Mask; 3039 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3040 SDValue NewAdd = 3041 DAG.getNode(ISD::ADD, SDLoc(N0), VT, 3042 N0.getOperand(0), DAG.getConstant(ADDC, VT)); 3043 CombineTo(N0.getNode(), NewAdd); 3044 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3045 } 3046 } 3047 } 3048 } 3049 } 3050 } 3051 3052 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 3053 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 3054 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 3055 N0.getOperand(1), false); 3056 if (BSwap.getNode()) 3057 return BSwap; 3058 } 3059 3060 return SDValue(); 3061} 3062 3063/// Match (a >> 8) \| (a << 8) as (bswap a) >> 16. 3064SDValue DAGCombiner::MatchBSwapHWordLow(SDNode N, SDValue N0, SDValue N1, 3065* bool DemandHighBits) { 3066 if (!LegalOperations) 3067 return SDValue(); 3068 3069 EVT VT = N->getValueType(0); 3070 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 3071 return SDValue(); 3072 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3073 return SDValue(); 3074 3075 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 3076 bool LookPassAnd0 = false; 3077 bool LookPassAnd1 = false; 3078 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 3079 std::swap(N0, N1); 3080 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 3081 std::swap(N0, N1); 3082 if (N0.getOpcode() == ISD::AND) { 3083 if (!N0.getNode()->hasOneUse()) 3084 return SDValue(); 3085 ConstantSDNode N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3086* if (!N01C \|\| N01C->getZExtValue() != 0xFF00) 3087 return SDValue(); 3088 N0 = N0.getOperand(0); 3089 LookPassAnd0 = true; 3090 } 3091 3092 if (N1.getOpcode() == ISD::AND) { 3093 if (!N1.getNode()->hasOneUse()) 3094 return SDValue(); 3095 ConstantSDNode N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3096* if (!N11C \|\| N11C->getZExtValue() != 0xFF) 3097 return SDValue(); 3098 N1 = N1.getOperand(0); 3099 LookPassAnd1 = true; 3100 } 3101 3102 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 3103 std::swap(N0, N1); 3104 if (N0.getOpcode() != ISD::SHL \|\| N1.getOpcode() != ISD::SRL) 3105 return SDValue(); 3106 if (!N0.getNode()->hasOneUse() \|\| 3107 !N1.getNode()->hasOneUse()) 3108 return SDValue(); 3109 3110 ConstantSDNode N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3111* ConstantSDNode N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3112* if (!N01C \|\| !N11C) 3113 return SDValue(); 3114 if (N01C->getZExtValue() != 8 \|\| N11C->getZExtValue() != 8) 3115 return SDValue(); 3116 3117 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 3118 SDValue N00 = N0->getOperand(0); 3119 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 3120 if (!N00.getNode()->hasOneUse()) 3121 return SDValue(); 3122 ConstantSDNode N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 3123* if (!N001C \|\| N001C->getZExtValue() != 0xFF) 3124 return SDValue(); 3125 N00 = N00.getOperand(0); 3126 LookPassAnd0 = true; 3127 } 3128 3129 SDValue N10 = N1->getOperand(0); 3130 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 3131 if (!N10.getNode()->hasOneUse()) 3132 return SDValue(); 3133 ConstantSDNode N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 3134* if (!N101C \|\| N101C->getZExtValue() != 0xFF00) 3135 return SDValue(); 3136 N10 = N10.getOperand(0); 3137 LookPassAnd1 = true; 3138 } 3139 3140 if (N00 != N10) 3141 return SDValue(); 3142 3143 // Make sure everything beyond the low halfword gets set to zero since the SRL 3144 // 16 will clear the top bits. 3145 unsigned OpSizeInBits = VT.getSizeInBits(); 3146 if (DemandHighBits && OpSizeInBits > 16) { 3147 // If the left-shift isn't masked out then the only way this is a bswap is 3148 // if all bits beyond the low 8 are 0. In that case the entire pattern 3149 // reduces to a left shift anyway: leave it for other parts of the combiner. 3150 if (!LookPassAnd0) 3151 return SDValue(); 3152 3153 // However, if the right shift isn't masked out then it might be because 3154 // it's not needed. See if we can spot that too. 3155 if (!LookPassAnd1 && 3156 !DAG.MaskedValueIsZero( 3157 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 3158 return SDValue(); 3159 } 3160 3161 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 3162 if (OpSizeInBits > 16) 3163 Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, 3164 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 3165 return Res; 3166} 3167 3168/// Return true if the specified node is an element that makes up a 32-bit 3169/// packed halfword byteswap. 3170/// ((x & 0x000000ff) << 8) \| 3171/// ((x & 0x0000ff00) >> 8) \| 3172/// ((x & 0x00ff0000) << 8) \| 3173/// ((x & 0xff000000) >> 8) 3174static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode > Parts) { 3175* if (!N.getNode()->hasOneUse()) 3176 return false; 3177 3178 unsigned Opc = N.getOpcode(); 3179 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 3180 return false; 3181 3182 ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3183* if (!N1C) 3184 return false; 3185 3186 unsigned Num; 3187 switch (N1C->getZExtValue()) { 3188 default: 3189 return false; 3190 case 0xFF: Num = 0; break; 3191 case 0xFF00: Num = 1; break; 3192 case 0xFF0000: Num = 2; break; 3193 case 0xFF000000: Num = 3; break; 3194 } 3195 3196 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 3197 SDValue N0 = N.getOperand(0); 3198 if (Opc == ISD::AND) { 3199 if (Num == 0 \|\| Num == 2) { 3200 // (x >> 8) & 0xff 3201 // (x >> 8) & 0xff0000 3202 if (N0.getOpcode() != ISD::SRL) 3203 return false; 3204 ConstantSDNode C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3205* if (!C \|\| C->getZExtValue() != 8) 3206 return false; 3207 } else { 3208 // (x << 8) & 0xff00 3209 // (x << 8) & 0xff000000 3210 if (N0.getOpcode() != ISD::SHL) 3211 return false; 3212 ConstantSDNode C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3213* if (!C \|\| C->getZExtValue() != 8) 3214 return false; 3215 } 3216 } else if (Opc == ISD::SHL) { 3217 // (x & 0xff) << 8 3218 // (x & 0xff0000) << 8 3219 if (Num != 0 && Num != 2) 3220 return false; 3221 ConstantSDNode C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3222* if (!C \|\| C->getZExtValue() != 8) 3223 return false; 3224 } else { // Opc == ISD::SRL 3225 // (x & 0xff00) >> 8 3226 // (x & 0xff000000) >> 8 3227 if (Num != 1 && Num != 3) 3228 return false; 3229 ConstantSDNode C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3230* if (!C \|\| C->getZExtValue() != 8) 3231 return false; 3232 } 3233 3234 if (Parts[Num]) 3235 return false; 3236 3237 Parts[Num] = N0.getOperand(0).getNode(); 3238 return true; 3239} 3240 3241/// Match a 32-bit packed halfword bswap. That is 3242/// ((x & 0x000000ff) << 8) \| 3243/// ((x & 0x0000ff00) >> 8) \| 3244/// ((x & 0x00ff0000) << 8) \| 3245/// ((x & 0xff000000) >> 8) 3246/// => (rotl (bswap x), 16) 3247SDValue DAGCombiner::MatchBSwapHWord(SDNode N, SDValue N0, SDValue N1) { 3248* if (!LegalOperations) 3249 return SDValue(); 3250 3251 EVT VT = N->getValueType(0); 3252 if (VT != MVT::i32) 3253 return SDValue(); 3254 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3255 return SDValue(); 3256 3257 // Look for either 3258 // (or (or (and), (and)), (or (and), (and))) 3259 // (or (or (or (and), (and)), (and)), (and)) 3260 if (N0.getOpcode() != ISD::OR) 3261 return SDValue(); 3262 SDValue N00 = N0.getOperand(0); 3263 SDValue N01 = N0.getOperand(1); 3264 SDNode Parts[4] = {}; 3265* 3266 if (N1.getOpcode() == ISD::OR && 3267 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 3268 // (or (or (and), (and)), (or (and), (and))) 3269 SDValue N000 = N00.getOperand(0); 3270 if (!isBSwapHWordElement(N000, Parts)) 3271 return SDValue(); 3272 3273 SDValue N001 = N00.getOperand(1); 3274 if (!isBSwapHWordElement(N001, Parts)) 3275 return SDValue(); 3276 SDValue N010 = N01.getOperand(0); 3277 if (!isBSwapHWordElement(N010, Parts)) 3278 return SDValue(); 3279 SDValue N011 = N01.getOperand(1); 3280 if (!isBSwapHWordElement(N011, Parts)) 3281 return SDValue(); 3282 } else { 3283 // (or (or (or (and), (and)), (and)), (and)) 3284 if (!isBSwapHWordElement(N1, Parts)) 3285 return SDValue(); 3286 if (!isBSwapHWordElement(N01, Parts)) 3287 return SDValue(); 3288 if (N00.getOpcode() != ISD::OR) 3289 return SDValue(); 3290 SDValue N000 = N00.getOperand(0); 3291 if (!isBSwapHWordElement(N000, Parts)) 3292 return SDValue(); 3293 SDValue N001 = N00.getOperand(1); 3294 if (!isBSwapHWordElement(N001, Parts)) 3295 return SDValue(); 3296 } 3297 3298 // Make sure the parts are all coming from the same node. 3299 if (Parts[0] != Parts[1] \|\| Parts[0] != Parts[2] \|\| Parts[0] != Parts[3]) 3300 return SDValue(); 3301 3302 SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, 3303 SDValue(Parts[0],0)); 3304 3305 // Result of the bswap should be rotated by 16. If it's not legal, then 3306 // do (x << 16) \| (x >> 16). 3307 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 3308 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3309 return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); 3310 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3311 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); 3312 return DAG.getNode(ISD::OR, SDLoc(N), VT, 3313 DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), 3314 DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); 3315} 3316 3317SDValue DAGCombiner::visitOR(SDNode N) { 3318* SDValue N0 = N->getOperand(0); 3319 SDValue N1 = N->getOperand(1); 3320 SDValue LL, LR, RL, RR, CC0, CC1; 3321 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 3322* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 3323* EVT VT = N1.getValueType(); 3324 3325 // fold vector ops 3326 if (VT.isVector()) { 3327 SDValue FoldedVOp = SimplifyVBinOp(N); 3328 if (FoldedVOp.getNode()) return FoldedVOp; 3329 3330 // fold (or x, 0) -> x, vector edition 3331 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3332 return N1; 3333 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3334 return N0; 3335 3336 // fold (or x, -1) -> -1, vector edition 3337 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3338 // do not return N0, because undef node may exist in N0 3339 return DAG.getConstant( 3340 APInt::getAllOnesValue( 3341 N0.getValueType().getScalarType().getSizeInBits()), 3342 N0.getValueType()); 3343 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3344 // do not return N1, because undef node may exist in N1 3345 return DAG.getConstant( 3346 APInt::getAllOnesValue( 3347 N1.getValueType().getScalarType().getSizeInBits()), 3348 N1.getValueType()); 3349 3350 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) 3351 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) 3352 // Do this only if the resulting shuffle is legal. 3353 if (isa<ShuffleVectorSDNode>(N0) && 3354 isa<ShuffleVectorSDNode>(N1) && 3355 // Avoid folding a node with illegal type. 3356 TLI.isTypeLegal(VT) && 3357 N0->getOperand(1) == N1->getOperand(1) && 3358 ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { 3359 bool CanFold = true; 3360 unsigned NumElts = VT.getVectorNumElements(); 3361 const ShuffleVectorSDNode SV0 = cast<ShuffleVectorSDNode>(N0); 3362* const ShuffleVectorSDNode SV1 = cast<ShuffleVectorSDNode>(N1); 3363* // We construct two shuffle masks: 3364 // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand 3365 // and N1 as the second operand. 3366 // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand 3367 // and N0 as the second operand. 3368 // We do this because OR is commutable and therefore there might be 3369 // two ways to fold this node into a shuffle. 3370 SmallVector<int,4> Mask1; 3371 SmallVector<int,4> Mask2; 3372 3373 for (unsigned i = 0; i != NumElts && CanFold; ++i) { 3374 int M0 = SV0->getMaskElt(i); 3375 int M1 = SV1->getMaskElt(i); 3376 3377 // Both shuffle indexes are undef. Propagate Undef. 3378 if (M0 < 0 && M1 < 0) { 3379 Mask1.push_back(M0); 3380 Mask2.push_back(M0); 3381 continue; 3382 } 3383 3384 if (M0 < 0 \|\| M1 < 0 \|\| 3385 (M0 < (int)NumElts && M1 < (int)NumElts) \|\| 3386 (M0 >= (int)NumElts && M1 >= (int)NumElts)) { 3387 CanFold = false; 3388 break; 3389 } 3390 3391 Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); 3392 Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); 3393 } 3394 3395 if (CanFold) { 3396 // Fold this sequence only if the resulting shuffle is 'legal'. 3397 if (TLI.isShuffleMaskLegal(Mask1, VT)) 3398 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), 3399 N1->getOperand(0), &Mask1[0]); 3400 if (TLI.isShuffleMaskLegal(Mask2, VT)) 3401 return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), 3402 N0->getOperand(0), &Mask2[0]); 3403 } 3404 } 3405 } 3406 3407 // fold (or x, undef) -> -1 3408 if (!LegalOperations && 3409 (N0.getOpcode() == ISD::UNDEF \|\| N1.getOpcode() == ISD::UNDEF)) { 3410 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3411 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 3412 } 3413 // fold (or c1, c2) -> c1\|c2 3414 if (N0C && N1C) 3415 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 3416 // canonicalize constant to RHS 3417 if (N0C && !N1C) 3418 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 3419 // fold (or x, 0) -> x 3420 if (N1C && N1C->isNullValue()) 3421 return N0; 3422 // fold (or x, -1) -> -1 3423 if (N1C && N1C->isAllOnesValue()) 3424 return N1; 3425 // fold (or x, c) -> c iff (x & ~c) == 0 3426 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3427 return N1; 3428 3429 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3430 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 3431 if (BSwap.getNode()) 3432 return BSwap; 3433 BSwap = MatchBSwapHWordLow(N, N0, N1); 3434 if (BSwap.getNode()) 3435 return BSwap; 3436 3437 // reassociate or 3438 SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); 3439 if (ROR.getNode()) 3440 return ROR; 3441 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1\|c2) 3442 // iff (c1 & c2) == 0. 3443 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3444 isa<ConstantSDNode>(N0.getOperand(1))) { 3445 ConstantSDNode C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3446* if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { 3447 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) 3448 return DAG.getNode( 3449 ISD::AND, SDLoc(N), VT, 3450 DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); 3451 return SDValue(); 3452 } 3453 } 3454 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3455 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3456 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3457 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3458 3459 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 3460 LL.getValueType().isInteger()) { 3461 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3462 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3463 if (cast<ConstantSDNode>(LR)->isNullValue() && 3464 (Op1 == ISD::SETNE \|\| Op1 == ISD::SETLT)) { 3465 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 3466 LR.getValueType(), LL, RL); 3467 AddToWorklist(ORNode.getNode()); 3468 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 3469 } 3470 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3471 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3472 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 3473 (Op1 == ISD::SETNE \|\| Op1 == ISD::SETGT)) { 3474 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 3475 LR.getValueType(), LL, RL); 3476 AddToWorklist(ANDNode.getNode()); 3477 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 3478 } 3479 } 3480 // canonicalize equivalent to ll == rl 3481 if (LL == RR && LR == RL) { 3482 Op1 = ISD::getSetCCSwappedOperands(Op1); 3483 std::swap(RL, RR); 3484 } 3485 if (LL == RL && LR == RR) { 3486 bool isInteger = LL.getValueType().isInteger(); 3487 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3488 if (Result != ISD::SETCC_INVALID && 3489 (!LegalOperations \|\| 3490 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 3491 TLI.isOperationLegal(ISD::SETCC, 3492 getSetCCResultType(N0.getValueType()))))) 3493 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 3494 LL, LR, Result); 3495 } 3496 } 3497 3498 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3499 if (N0.getOpcode() == N1.getOpcode()) { 3500 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3501 if (Tmp.getNode()) return Tmp; 3502 } 3503 3504 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3505 if (N0.getOpcode() == ISD::AND && 3506 N1.getOpcode() == ISD::AND && 3507 N0.getOperand(1).getOpcode() == ISD::Constant && 3508 N1.getOperand(1).getOpcode() == ISD::Constant && 3509 // Don't increase # computations. 3510 (N0.getNode()->hasOneUse() \|\| N1.getNode()->hasOneUse())) { 3511 // We can only do this xform if we know that bits from X that are set in C2 3512 // but not in C1 are already zero. Likewise for Y. 3513 const APInt &LHSMask = 3514 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3515 const APInt &RHSMask = 3516 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 3517 3518 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3519 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3520 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3521 N0.getOperand(0), N1.getOperand(0)); 3522 return DAG.getNode(ISD::AND, SDLoc(N), VT, X, 3523 DAG.getConstant(LHSMask \| RHSMask, VT)); 3524 } 3525 } 3526 3527 // See if this is some rotate idiom. 3528 if (SDNode Rot = MatchRotate(N0, N1, SDLoc(N))) 3529* return SDValue(Rot, 0); 3530 3531 // Simplify the operands using demanded-bits information. 3532 if (!VT.isVector() && 3533 SimplifyDemandedBits(SDValue(N, 0))) 3534 return SDValue(N, 0); 3535 3536 return SDValue(); 3537} 3538 3539/// Match "(X shl/srl V1) & V2" where V2 may not be present. 3540static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3541 if (Op.getOpcode() == ISD::AND) { 3542 if (isa<ConstantSDNode>(Op.getOperand(1))) { 3543 Mask = Op.getOperand(1); 3544 Op = Op.getOperand(0); 3545 } else { 3546 return false; 3547 } 3548 } 3549 3550 if (Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SHL) { 3551 Shift = Op; 3552 return true; 3553 } 3554 3555 return false; 3556} 3557 3558// Return true if we can prove that, whenever Neg and Pos are both in the 3559// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that 3560// for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 3561// 3562// (or (shift1 X, Neg), (shift2 X, Pos)) 3563// 3564// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 3565// in direction shift1 by Neg. The range [0, OpSize) means that we only need 3566// to consider shift amounts with defined behavior. 3567static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { 3568 // If OpSize is a power of 2 then: 3569 // 3570 // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) 3571 // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). 3572 // 3573 // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check 3574 // for the stronger condition: 3575 // 3576 // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] 3577 // 3578 // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) 3579 // we can just replace Neg with Neg' for the rest of the function. 3580 // 3581 // In other cases we check for the even stronger condition: 3582 // 3583 // Neg == OpSize - Pos [B] 3584 // 3585 // for all Neg and Pos. Note that the (or ...) then invokes undefined 3586 // behavior if Pos == 0 (and consequently Neg == OpSize). 3587 // 3588 // We could actually use [A] whenever OpSize is a power of 2, but the 3589 // only extra cases that it would match are those uninteresting ones 3590 // where Neg and Pos are never in range at the same time. E.g. for 3591 // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 3592 // as well as (sub 32, Pos), but: 3593 // 3594 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 3595 // 3596 // always invokes undefined behavior for 32-bit X. 3597 // 3598 // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. 3599 unsigned MaskLoBits = 0; 3600 if (Neg.getOpcode() == ISD::AND && 3601 isPowerOf2_64(OpSize) && 3602 Neg.getOperand(1).getOpcode() == ISD::Constant && 3603 cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { 3604 Neg = Neg.getOperand(0); 3605 MaskLoBits = Log2_64(OpSize); 3606 } 3607 3608 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 3609 if (Neg.getOpcode() != ISD::SUB) 3610 return 0; 3611 ConstantSDNode NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); 3612* if (!NegC) 3613 return 0; 3614 SDValue NegOp1 = Neg.getOperand(1); 3615 3616 // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with 3617 // Pos'. The truncation is redundant for the purpose of the equality. 3618 if (MaskLoBits && 3619 Pos.getOpcode() == ISD::AND && 3620 Pos.getOperand(1).getOpcode() == ISD::Constant && 3621 cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) 3622 Pos = Pos.getOperand(0); 3623 3624 // The condition we need is now: 3625 // 3626 // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask 3627 // 3628 // If NegOp1 == Pos then we need: 3629 // 3630 // OpSize & Mask == NegC & Mask 3631 // 3632 // (because "x & Mask" is a truncation and distributes through subtraction). 3633 APInt Width; 3634 if (Pos == NegOp1) 3635 Width = NegC->getAPIntValue(); 3636 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 3637 // Then the condition we want to prove becomes: 3638 // 3639 // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask 3640 // 3641 // which, again because "x & Mask" is a truncation, becomes: 3642 // 3643 // NegC & Mask == (OpSize - PosC) & Mask 3644 // OpSize & Mask == (NegC + PosC) & Mask 3645 else if (Pos.getOpcode() == ISD::ADD && 3646 Pos.getOperand(0) == NegOp1 && 3647 Pos.getOperand(1).getOpcode() == ISD::Constant) 3648 Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + 3649 NegC->getAPIntValue()); 3650 else 3651 return false; 3652 3653 // Now we just need to check that OpSize & Mask == Width & Mask. 3654 if (MaskLoBits) 3655 // Opsize & Mask is 0 since Mask is Opsize - 1. 3656 return Width.getLoBits(MaskLoBits) == 0; 3657 return Width == OpSize; 3658} 3659 3660// A subroutine of MatchRotate used once we have found an OR of two opposite 3661// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 3662// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 3663// former being preferred if supported. InnerPos and InnerNeg are Pos and 3664// Neg with outer conversions stripped away. 3665SDNode DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 3666* SDValue Neg, SDValue InnerPos, 3667 SDValue InnerNeg, unsigned PosOpcode, 3668 unsigned NegOpcode, SDLoc DL) { 3669 // fold (or (shl x, (ext y)), 3670* // (srl x, (ext (sub 32, y)))) -> 3671* // (rotl x, y) or (rotr x, (sub 32, y)) 3672 // 3673 // fold (or (shl x, (ext (sub 32, y))), 3674* // (srl x, (ext y))) -> 3675* // (rotr x, y) or (rotl x, (sub 32, y)) 3676 EVT VT = Shifted.getValueType(); 3677 if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { 3678 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 3679 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 3680 HasPos ? Pos : Neg).getNode(); 3681 } 3682 3683 return nullptr; 3684} 3685 3686// MatchRotate - Handle an 'or' of two operands. If this is one of the many 3687// idioms for rotate, and if the target supports rotation instructions, generate 3688// a rot[lr]. 3689SDNode DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { 3690* // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3691 EVT VT = LHS.getValueType(); 3692 if (!TLI.isTypeLegal(VT)) return nullptr; 3693 3694 // The target must have at least one rotate flavor. 3695 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3696 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3697 if (!HasROTL && !HasROTR) return nullptr; 3698 3699 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3700 SDValue LHSShift; // The shift. 3701 SDValue LHSMask; // AND value if any. 3702 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3703 return nullptr; // Not part of a rotate. 3704 3705 SDValue RHSShift; // The shift. 3706 SDValue RHSMask; // AND value if any. 3707 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3708 return nullptr; // Not part of a rotate. 3709 3710 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3711 return nullptr; // Not shifting the same value. 3712 3713 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3714 return nullptr; // Shifts must disagree. 3715 3716 // Canonicalize shl to left side in a shl/srl pair. 3717 if (RHSShift.getOpcode() == ISD::SHL) { 3718 std::swap(LHS, RHS); 3719 std::swap(LHSShift, RHSShift); 3720 std::swap(LHSMask , RHSMask ); 3721 } 3722 3723 unsigned OpSizeInBits = VT.getSizeInBits(); 3724 SDValue LHSShiftArg = LHSShift.getOperand(0); 3725 SDValue LHSShiftAmt = LHSShift.getOperand(1); 3726 SDValue RHSShiftArg = RHSShift.getOperand(0); 3727 SDValue RHSShiftAmt = RHSShift.getOperand(1); 3728 3729 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 3730 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 3731 if (LHSShiftAmt.getOpcode() == ISD::Constant && 3732 RHSShiftAmt.getOpcode() == ISD::Constant) { 3733 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 3734 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 3735 if ((LShVal + RShVal) != OpSizeInBits) 3736 return nullptr; 3737 3738 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3739 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 3740 3741 // If there is an AND of either shifted operand, apply it to the result. 3742 if (LHSMask.getNode() \|\| RHSMask.getNode()) { 3743 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 3744 3745 if (LHSMask.getNode()) { 3746 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 3747 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() \| RHSBits; 3748 } 3749 if (RHSMask.getNode()) { 3750 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 3751 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() \| LHSBits; 3752 } 3753 3754 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 3755 } 3756 3757 return Rot.getNode(); 3758 } 3759 3760 // If there is a mask here, and we have a variable shift, we can't be sure 3761 // that we're masking out the right stuff. 3762 if (LHSMask.getNode() \|\| RHSMask.getNode()) 3763 return nullptr; 3764 3765 // If the shift amount is sign/zext/any-extended just peel it off. 3766 SDValue LExtOp0 = LHSShiftAmt; 3767 SDValue RExtOp0 = RHSShiftAmt; 3768 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND \|\| 3769 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND \|\| 3770 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND \|\| 3771 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3772 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND \|\| 3773 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND \|\| 3774 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND \|\| 3775 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3776 LExtOp0 = LHSShiftAmt.getOperand(0); 3777 RExtOp0 = RHSShiftAmt.getOperand(0); 3778 } 3779 3780 SDNode TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 3781* LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 3782 if (TryL) 3783 return TryL; 3784 3785 SDNode TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 3786* RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 3787 if (TryR) 3788 return TryR; 3789 3790 return nullptr; 3791} 3792 3793SDValue DAGCombiner::visitXOR(SDNode N) { 3794* SDValue N0 = N->getOperand(0); 3795 SDValue N1 = N->getOperand(1); 3796 SDValue LHS, RHS, CC; 3797 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 3798* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 3799* EVT VT = N0.getValueType(); 3800 3801 // fold vector ops 3802 if (VT.isVector()) { 3803 SDValue FoldedVOp = SimplifyVBinOp(N); 3804 if (FoldedVOp.getNode()) return FoldedVOp; 3805 3806 // fold (xor x, 0) -> x, vector edition 3807 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3808 return N1; 3809 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3810 return N0; 3811 } 3812 3813 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3814 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3815 return DAG.getConstant(0, VT); 3816 // fold (xor x, undef) -> undef 3817 if (N0.getOpcode() == ISD::UNDEF) 3818 return N0; 3819 if (N1.getOpcode() == ISD::UNDEF) 3820 return N1; 3821 // fold (xor c1, c2) -> c1^c2 3822 if (N0C && N1C) 3823 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 3824 // canonicalize constant to RHS 3825 if (N0C && !N1C) 3826 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 3827 // fold (xor x, 0) -> x 3828 if (N1C && N1C->isNullValue()) 3829 return N0; 3830 // reassociate xor 3831 SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); 3832 if (RXOR.getNode()) 3833 return RXOR; 3834 3835 // fold !(x cc y) -> (x !cc y) 3836 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 3837 bool isInt = LHS.getValueType().isInteger(); 3838 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3839 isInt); 3840 3841 if (!LegalOperations \|\| 3842 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 3843 switch (N0.getOpcode()) { 3844 default: 3845 llvm_unreachable("Unhandled SetCC Equivalent!"); 3846 case ISD::SETCC: 3847 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 3848 case ISD::SELECT_CC: 3849 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 3850 N0.getOperand(3), NotCC); 3851 } 3852 } 3853 } 3854 3855 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 3856 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 3857 N0.getNode()->hasOneUse() && 3858 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 3859 SDValue V = N0.getOperand(0); 3860 V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, 3861 DAG.getConstant(1, V.getValueType())); 3862 AddToWorklist(V.getNode()); 3863 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 3864 } 3865 3866 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 3867 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 3868 (N0.getOpcode() == ISD::OR \|\| N0.getOpcode() == ISD::AND)) { 3869 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3870 if (isOneUseSetCC(RHS) \|\| isOneUseSetCC(LHS)) { 3871 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3872 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3873 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3874 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 3875 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3876 } 3877 } 3878 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 3879 if (N1C && N1C->isAllOnesValue() && 3880 (N0.getOpcode() == ISD::OR \|\| N0.getOpcode() == ISD::AND)) { 3881 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3882 if (isa<ConstantSDNode>(RHS) \|\| isa<ConstantSDNode>(LHS)) { 3883 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3884 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3885 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3886 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 3887 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3888 } 3889 } 3890 // fold (xor (and x, y), y) -> (and (not x), y) 3891 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3892 N0->getOperand(1) == N1) { 3893 SDValue X = N0->getOperand(0); 3894 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 3895 AddToWorklist(NotX.getNode()); 3896 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 3897 } 3898 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 3899 if (N1C && N0.getOpcode() == ISD::XOR) { 3900 ConstantSDNode N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 3901* ConstantSDNode N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3902* if (N00C) 3903 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), 3904 DAG.getConstant(N1C->getAPIntValue() ^ 3905 N00C->getAPIntValue(), VT)); 3906 if (N01C) 3907 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), 3908 DAG.getConstant(N1C->getAPIntValue() ^ 3909 N01C->getAPIntValue(), VT)); 3910 } 3911 // fold (xor x, x) -> 0 3912 if (N0 == N1) 3913 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 3914 3915 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 3916 if (N0.getOpcode() == N1.getOpcode()) { 3917 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3918 if (Tmp.getNode()) return Tmp; 3919 } 3920 3921 // Simplify the expression using non-local knowledge. 3922 if (!VT.isVector() && 3923 SimplifyDemandedBits(SDValue(N, 0))) 3924 return SDValue(N, 0); 3925 3926 return SDValue(); 3927} 3928 3929/// Handle transforms common to the three shifts, when the shift amount is a 3930/// constant. 3931SDValue DAGCombiner::visitShiftByConstant(SDNode N, ConstantSDNode Amt) { 3932 // We can't and shouldn't fold opaque constants. 3933 if (Amt->isOpaque()) 3934 return SDValue(); 3935 3936 SDNode LHS = N->getOperand(0).getNode(); 3937* if (!LHS->hasOneUse()) return SDValue(); 3938 3939 // We want to pull some binops through shifts, so that we have (and (shift)) 3940 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 3941 // thing happens with address calculations, so it's important to canonicalize 3942 // it. 3943 bool HighBitSet = false; // Can we transform this if the high bit is set? 3944 3945 switch (LHS->getOpcode()) { 3946 default: return SDValue(); 3947 case ISD::OR: 3948 case ISD::XOR: 3949 HighBitSet = false; // We can only transform sra if the high bit is clear. 3950 break; 3951 case ISD::AND: 3952 HighBitSet = true; // We can only transform sra if the high bit is set. 3953 break; 3954 case ISD::ADD: 3955 if (N->getOpcode() != ISD::SHL) 3956 return SDValue(); // only shl(add) not sr[al](add). 3957 HighBitSet = false; // We can only transform sra if the high bit is clear. 3958 break; 3959 } 3960 3961 // We require the RHS of the binop to be a constant and not opaque as well. 3962 ConstantSDNode BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 3963* if (!BinOpCst \|\| BinOpCst->isOpaque()) return SDValue(); 3964 3965 // FIXME: disable this unless the input to the binop is a shift by a constant. 3966 // If it is not a shift, it pessimizes some common cases like: 3967 // 3968 // void foo(int X, int i) { X[i & 1235] = 1; } 3969* // int bar(int X, int i) { return X[i & 255]; } 3970* SDNode BinOpLHSVal = LHS->getOperand(0).getNode(); 3971* if ((BinOpLHSVal->getOpcode() != ISD::SHL && 3972 BinOpLHSVal->getOpcode() != ISD::SRA && 3973 BinOpLHSVal->getOpcode() != ISD::SRL) \|\| 3974 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 3975 return SDValue(); 3976 3977 EVT VT = N->getValueType(0); 3978 3979 // If this is a signed shift right, and the high bit is modified by the 3980 // logical operation, do not perform the transformation. The highBitSet 3981 // boolean indicates the value of the high bit of the constant which would 3982 // cause it to be modified for this operation. 3983 if (N->getOpcode() == ISD::SRA) { 3984 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 3985 if (BinOpRHSSignSet != HighBitSet) 3986 return SDValue(); 3987 } 3988 3989 if (!TLI.isDesirableToCommuteWithShift(LHS)) 3990 return SDValue(); 3991 3992 // Fold the constants, shifting the binop RHS by the shift amount. 3993 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 3994 N->getValueType(0), 3995 LHS->getOperand(1), N->getOperand(1)); 3996 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 3997 3998 // Create the new shift. 3999 SDValue NewShift = DAG.getNode(N->getOpcode(), 4000 SDLoc(LHS->getOperand(0)), 4001 VT, LHS->getOperand(0), N->getOperand(1)); 4002 4003 // Create the new binop. 4004 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 4005} 4006 4007SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode N) { 4008* assert(N->getOpcode() == ISD::TRUNCATE); 4009 assert(N->getOperand(0).getOpcode() == ISD::AND); 4010 4011 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 4012 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 4013 SDValue N01 = N->getOperand(0).getOperand(1); 4014 4015 if (ConstantSDNode N01C = isConstOrConstSplat(N01)) { 4016* EVT TruncVT = N->getValueType(0); 4017 SDValue N00 = N->getOperand(0).getOperand(0); 4018 APInt TruncC = N01C->getAPIntValue(); 4019 TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); 4020 4021 return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, 4022 DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), 4023 DAG.getConstant(TruncC, TruncVT)); 4024 } 4025 } 4026 4027 return SDValue(); 4028} 4029 4030SDValue DAGCombiner::visitRotate(SDNode N) { 4031* // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 4032 if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && 4033 N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { 4034 SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); 4035 if (NewOp1.getNode()) 4036 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 4037 N->getOperand(0), NewOp1); 4038 } 4039 return SDValue(); 4040} 4041 4042SDValue DAGCombiner::visitSHL(SDNode N) { 4043* SDValue N0 = N->getOperand(0); 4044 SDValue N1 = N->getOperand(1); 4045 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 4046* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 4047* EVT VT = N0.getValueType(); 4048 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 4049 4050 // fold vector ops 4051 if (VT.isVector()) { 4052 SDValue FoldedVOp = SimplifyVBinOp(N); 4053 if (FoldedVOp.getNode()) return FoldedVOp; 4054 4055 BuildVectorSDNode N1CV = dyn_cast<BuildVectorSDNode>(N1); 4056* // If setcc produces all-one true value then: 4057 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 4058 if (N1CV && N1CV->isConstant()) { 4059 if (N0.getOpcode() == ISD::AND) { 4060 SDValue N00 = N0->getOperand(0); 4061 SDValue N01 = N0->getOperand(1); 4062 BuildVectorSDNode N01CV = dyn_cast<BuildVectorSDNode>(N01); 4063* 4064 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 4065 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 4066 TargetLowering::ZeroOrNegativeOneBooleanContent) { 4067 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) 4068 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 4069 } 4070 } else { 4071 N1C = isConstOrConstSplat(N1); 4072 } 4073 } 4074 } 4075 4076 // fold (shl c1, c2) -> c1<<c2 4077 if (N0C && N1C) 4078 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 4079 // fold (shl 0, x) -> 0 4080 if (N0C && N0C->isNullValue()) 4081 return N0; 4082 // fold (shl x, c >= size(x)) -> undef 4083 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4084 return DAG.getUNDEF(VT); 4085 // fold (shl x, 0) -> x 4086 if (N1C && N1C->isNullValue()) 4087 return N0; 4088 // fold (shl undef, x) -> 0 4089 if (N0.getOpcode() == ISD::UNDEF) 4090 return DAG.getConstant(0, VT); 4091 // if (shl x, c) is known to be zero, return 0 4092 if (DAG.MaskedValueIsZero(SDValue(N, 0), 4093 APInt::getAllOnesValue(OpSizeInBits))) 4094 return DAG.getConstant(0, VT); 4095 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 4096 if (N1.getOpcode() == ISD::TRUNCATE && 4097 N1.getOperand(0).getOpcode() == ISD::AND) { 4098 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4099 if (NewOp1.getNode()) 4100 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 4101 } 4102 4103 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4104 return SDValue(N, 0); 4105 4106 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 4107 if (N1C && N0.getOpcode() == ISD::SHL) { 4108 if (ConstantSDNode N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4109* uint64_t c1 = N0C1->getZExtValue(); 4110 uint64_t c2 = N1C->getZExtValue(); 4111 if (c1 + c2 >= OpSizeInBits) 4112 return DAG.getConstant(0, VT); 4113 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4114 DAG.getConstant(c1 + c2, N1.getValueType())); 4115 } 4116 } 4117 4118 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 4119 // For this to be valid, the second form must not preserve any of the bits 4120 // that are shifted out by the inner shift in the first form. This means 4121 // the outer shift size must be >= the number of bits added by the ext. 4122 // As a corollary, we don't care what kind of ext it is. 4123 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND \|\| 4124 N0.getOpcode() == ISD::ANY_EXTEND \|\| 4125 N0.getOpcode() == ISD::SIGN_EXTEND) && 4126 N0.getOperand(0).getOpcode() == ISD::SHL) { 4127 SDValue N0Op0 = N0.getOperand(0); 4128 if (ConstantSDNode N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4129* uint64_t c1 = N0Op0C1->getZExtValue(); 4130 uint64_t c2 = N1C->getZExtValue(); 4131 EVT InnerShiftVT = N0Op0.getValueType(); 4132 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 4133 if (c2 >= OpSizeInBits - InnerShiftSize) { 4134 if (c1 + c2 >= OpSizeInBits) 4135 return DAG.getConstant(0, VT); 4136 return DAG.getNode(ISD::SHL, SDLoc(N0), VT, 4137 DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, 4138 N0Op0->getOperand(0)), 4139 DAG.getConstant(c1 + c2, N1.getValueType())); 4140 } 4141 } 4142 } 4143 4144 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 4145 // Only fold this if the inner zext has no other uses to avoid increasing 4146 // the total number of instructions. 4147 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 4148 N0.getOperand(0).getOpcode() == ISD::SRL) { 4149 SDValue N0Op0 = N0.getOperand(0); 4150 if (ConstantSDNode N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4151* uint64_t c1 = N0Op0C1->getZExtValue(); 4152 if (c1 < VT.getScalarSizeInBits()) { 4153 uint64_t c2 = N1C->getZExtValue(); 4154 if (c1 == c2) { 4155 SDValue NewOp0 = N0.getOperand(0); 4156 EVT CountVT = NewOp0.getOperand(1).getValueType(); 4157 SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), 4158 NewOp0, DAG.getConstant(c2, CountVT)); 4159 AddToWorklist(NewSHL.getNode()); 4160 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 4161 } 4162 } 4163 } 4164 } 4165 4166 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 4167 // (and (srl x, (sub c1, c2), MASK) 4168 // Only fold this if the inner shift has no other uses -- if it does, folding 4169 // this will increase the total number of instructions. 4170 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4171 if (ConstantSDNode N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4172* uint64_t c1 = N0C1->getZExtValue(); 4173 if (c1 < OpSizeInBits) { 4174 uint64_t c2 = N1C->getZExtValue(); 4175 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 4176 SDValue Shift; 4177 if (c2 > c1) { 4178 Mask = Mask.shl(c2 - c1); 4179 Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4180 DAG.getConstant(c2 - c1, N1.getValueType())); 4181 } else { 4182 Mask = Mask.lshr(c1 - c2); 4183 Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4184 DAG.getConstant(c1 - c2, N1.getValueType())); 4185 } 4186 return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, 4187 DAG.getConstant(Mask, VT)); 4188 } 4189 } 4190 } 4191 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 4192 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 4193 unsigned BitSize = VT.getScalarSizeInBits(); 4194 SDValue HiBitsMask = 4195 DAG.getConstant(APInt::getHighBitsSet(BitSize, 4196 BitSize - N1C->getZExtValue()), VT); 4197 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4198 HiBitsMask); 4199 } 4200 4201 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4202 // Variant of version done on multiply, except mul by a power of 2 is turned 4203 // into a shift. 4204 APInt Val; 4205 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 4206 (isa<ConstantSDNode>(N0.getOperand(1)) \|\| 4207 isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { 4208 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 4209 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 4210 return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); 4211 } 4212 4213 if (N1C) { 4214 SDValue NewSHL = visitShiftByConstant(N, N1C); 4215 if (NewSHL.getNode()) 4216 return NewSHL; 4217 } 4218 4219 return SDValue(); 4220} 4221 4222SDValue DAGCombiner::visitSRA(SDNode N) { 4223* SDValue N0 = N->getOperand(0); 4224 SDValue N1 = N->getOperand(1); 4225 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 4226* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 4227* EVT VT = N0.getValueType(); 4228 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4229 4230 // fold vector ops 4231 if (VT.isVector()) { 4232 SDValue FoldedVOp = SimplifyVBinOp(N); 4233 if (FoldedVOp.getNode()) return FoldedVOp; 4234 4235 N1C = isConstOrConstSplat(N1); 4236 } 4237 4238 // fold (sra c1, c2) -> (sra c1, c2) 4239 if (N0C && N1C) 4240 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 4241 // fold (sra 0, x) -> 0 4242 if (N0C && N0C->isNullValue()) 4243 return N0; 4244 // fold (sra -1, x) -> -1 4245 if (N0C && N0C->isAllOnesValue()) 4246 return N0; 4247 // fold (sra x, (setge c, size(x))) -> undef 4248 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4249 return DAG.getUNDEF(VT); 4250 // fold (sra x, 0) -> x 4251 if (N1C && N1C->isNullValue()) 4252 return N0; 4253 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 4254 // sext_inreg. 4255 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 4256 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 4257 EVT ExtVT = EVT::getIntegerVT(DAG.getContext(), LowBits); 4258* if (VT.isVector()) 4259 ExtVT = EVT::getVectorVT(DAG.getContext(), 4260* ExtVT, VT.getVectorNumElements()); 4261 if ((!LegalOperations \|\| 4262 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 4263 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 4264 N0.getOperand(0), DAG.getValueType(ExtVT)); 4265 } 4266 4267 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 4268 if (N1C && N0.getOpcode() == ISD::SRA) { 4269 if (ConstantSDNode C1 = isConstOrConstSplat(N0.getOperand(1))) { 4270* unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 4271 if (Sum >= OpSizeInBits) 4272 Sum = OpSizeInBits - 1; 4273 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), 4274 DAG.getConstant(Sum, N1.getValueType())); 4275 } 4276 } 4277 4278 // fold (sra (shl X, m), (sub result_size, n)) 4279 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 4280 // result_size - n != m. 4281 // If truncate is free for the target sext(shl) is likely to result in better 4282 // code. 4283 if (N0.getOpcode() == ISD::SHL && N1C) { 4284 // Get the two constanst of the shifts, CN0 = m, CN = n. 4285 const ConstantSDNode N01C = isConstOrConstSplat(N0.getOperand(1)); 4286* if (N01C) { 4287 LLVMContext &Ctx = DAG.getContext(); 4288* // Determine what the truncate's result bitsize and type would be. 4289 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 4290 4291 if (VT.isVector()) 4292 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 4293 4294 // Determine the residual right-shift amount. 4295 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 4296 4297 // If the shift is not a no-op (in which case this should be just a sign 4298 // extend already), the truncated to type is legal, sign_extend is legal 4299 // on that type, and the truncate to that type is both legal and free, 4300 // perform the transform. 4301 if ((ShiftAmt > 0) && 4302 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 4303 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 4304 TLI.isTruncateFree(VT, TruncVT)) { 4305 4306 SDValue Amt = DAG.getConstant(ShiftAmt, 4307 getShiftAmountTy(N0.getOperand(0).getValueType())); 4308 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, 4309 N0.getOperand(0), Amt); 4310 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, 4311 Shift); 4312 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), 4313 N->getValueType(0), Trunc); 4314 } 4315 } 4316 } 4317 4318 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 4319 if (N1.getOpcode() == ISD::TRUNCATE && 4320 N1.getOperand(0).getOpcode() == ISD::AND) { 4321 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4322 if (NewOp1.getNode()) 4323 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 4324 } 4325 4326 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 4327 // if c1 is equal to the number of bits the trunc removes 4328 if (N0.getOpcode() == ISD::TRUNCATE && 4329 (N0.getOperand(0).getOpcode() == ISD::SRL \|\| 4330 N0.getOperand(0).getOpcode() == ISD::SRA) && 4331 N0.getOperand(0).hasOneUse() && 4332 N0.getOperand(0).getOperand(1).hasOneUse() && 4333 N1C) { 4334 SDValue N0Op0 = N0.getOperand(0); 4335 if (ConstantSDNode LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 4336* unsigned LargeShiftVal = LargeShift->getZExtValue(); 4337 EVT LargeVT = N0Op0.getValueType(); 4338 4339 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 4340 SDValue Amt = 4341 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), 4342 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 4343 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, 4344 N0Op0.getOperand(0), Amt); 4345 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); 4346 } 4347 } 4348 } 4349 4350 // Simplify, based on bits shifted out of the LHS. 4351 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4352 return SDValue(N, 0); 4353 4354 4355 // If the sign bit is known to be zero, switch this to a SRL. 4356 if (DAG.SignBitIsZero(N0)) 4357 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 4358 4359 if (N1C) { 4360 SDValue NewSRA = visitShiftByConstant(N, N1C); 4361 if (NewSRA.getNode()) 4362 return NewSRA; 4363 } 4364 4365 return SDValue(); 4366} 4367 4368SDValue DAGCombiner::visitSRL(SDNode N) { 4369* SDValue N0 = N->getOperand(0); 4370 SDValue N1 = N->getOperand(1); 4371 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 4372* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 4373* EVT VT = N0.getValueType(); 4374 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4375 4376 // fold vector ops 4377 if (VT.isVector()) { 4378 SDValue FoldedVOp = SimplifyVBinOp(N); 4379 if (FoldedVOp.getNode()) return FoldedVOp; 4380 4381 N1C = isConstOrConstSplat(N1); 4382 } 4383 4384 // fold (srl c1, c2) -> c1 >>u c2 4385 if (N0C && N1C) 4386 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 4387 // fold (srl 0, x) -> 0 4388 if (N0C && N0C->isNullValue()) 4389 return N0; 4390 // fold (srl x, c >= size(x)) -> undef 4391 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4392 return DAG.getUNDEF(VT); 4393 // fold (srl x, 0) -> x 4394 if (N1C && N1C->isNullValue()) 4395 return N0; 4396 // if (srl x, c) is known to be zero, return 0 4397 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4398 APInt::getAllOnesValue(OpSizeInBits))) 4399 return DAG.getConstant(0, VT); 4400 4401 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 4402 if (N1C && N0.getOpcode() == ISD::SRL) { 4403 if (ConstantSDNode N01C = isConstOrConstSplat(N0.getOperand(1))) { 4404* uint64_t c1 = N01C->getZExtValue(); 4405 uint64_t c2 = N1C->getZExtValue(); 4406 if (c1 + c2 >= OpSizeInBits) 4407 return DAG.getConstant(0, VT); 4408 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4409 DAG.getConstant(c1 + c2, N1.getValueType())); 4410 } 4411 } 4412 4413 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 4414 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 4415 N0.getOperand(0).getOpcode() == ISD::SRL && 4416 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 4417 uint64_t c1 = 4418 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 4419 uint64_t c2 = N1C->getZExtValue(); 4420 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 4421 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 4422 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 4423 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 4424 if (c1 + OpSizeInBits == InnerShiftSize) { 4425 if (c1 + c2 >= InnerShiftSize) 4426 return DAG.getConstant(0, VT); 4427 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, 4428 DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, 4429 N0.getOperand(0)->getOperand(0), 4430 DAG.getConstant(c1 + c2, ShiftCountVT))); 4431 } 4432 } 4433 4434 // fold (srl (shl x, c), c) -> (and x, cst2) 4435 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { 4436 unsigned BitSize = N0.getScalarValueSizeInBits(); 4437 if (BitSize <= 64) { 4438 uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; 4439 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4440 DAG.getConstant(~0ULL >> ShAmt, VT)); 4441 } 4442 } 4443 4444 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 4445 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4446 // Shifting in all undef bits? 4447 EVT SmallVT = N0.getOperand(0).getValueType(); 4448 unsigned BitSize = SmallVT.getScalarSizeInBits(); 4449 if (N1C->getZExtValue() >= BitSize) 4450 return DAG.getUNDEF(VT); 4451 4452 if (!LegalTypes \|\| TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 4453 uint64_t ShiftAmt = N1C->getZExtValue(); 4454 SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, 4455 N0.getOperand(0), 4456 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 4457 AddToWorklist(SmallShift.getNode()); 4458 APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); 4459 return DAG.getNode(ISD::AND, SDLoc(N), VT, 4460 DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), 4461 DAG.getConstant(Mask, VT)); 4462 } 4463 } 4464 4465 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 4466 // bit, which is unmodified by sra. 4467 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 4468 if (N0.getOpcode() == ISD::SRA) 4469 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 4470 } 4471 4472 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 4473 if (N1C && N0.getOpcode() == ISD::CTLZ && 4474 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 4475 APInt KnownZero, KnownOne; 4476 DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); 4477 4478 // If any of the input bits are KnownOne, then the input couldn't be all 4479 // zeros, thus the result of the srl will always be zero. 4480 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 4481 4482 // If all of the bits input the to ctlz node are known to be zero, then 4483 // the result of the ctlz is "32" and the result of the shift is one. 4484 APInt UnknownBits = ~KnownZero; 4485 if (UnknownBits == 0) return DAG.getConstant(1, VT); 4486 4487 // Otherwise, check to see if there is exactly one bit input to the ctlz. 4488 if ((UnknownBits & (UnknownBits - 1)) == 0) { 4489 // Okay, we know that only that the single bit specified by UnknownBits 4490 // could be set on input to the CTLZ node. If this bit is set, the SRL 4491 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 4492 // to an SRL/XOR pair, which is likely to simplify more. 4493 unsigned ShAmt = UnknownBits.countTrailingZeros(); 4494 SDValue Op = N0.getOperand(0); 4495 4496 if (ShAmt) { 4497 Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, 4498 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 4499 AddToWorklist(Op.getNode()); 4500 } 4501 4502 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 4503 Op, DAG.getConstant(1, VT)); 4504 } 4505 } 4506 4507 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 4508 if (N1.getOpcode() == ISD::TRUNCATE && 4509 N1.getOperand(0).getOpcode() == ISD::AND) { 4510 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4511 if (NewOp1.getNode()) 4512 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 4513 } 4514 4515 // fold operands of srl based on knowledge that the low bits are not 4516 // demanded. 4517 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4518 return SDValue(N, 0); 4519 4520 if (N1C) { 4521 SDValue NewSRL = visitShiftByConstant(N, N1C); 4522 if (NewSRL.getNode()) 4523 return NewSRL; 4524 } 4525 4526 // Attempt to convert a srl of a load into a narrower zero-extending load. 4527 SDValue NarrowLoad = ReduceLoadWidth(N); 4528 if (NarrowLoad.getNode()) 4529 return NarrowLoad; 4530 4531 // Here is a common situation. We want to optimize: 4532 // 4533 // %a = ... 4534 // %b = and i32 %a, 2 4535 // %c = srl i32 %b, 1 4536 // brcond i32 %c ... 4537 // 4538 // into 4539 // 4540 // %a = ... 4541 // %b = and %a, 2 4542 // %c = setcc eq %b, 0 4543 // brcond %c ... 4544 // 4545 // However when after the source operand of SRL is optimized into AND, the SRL 4546 // itself may not be optimized further. Look for it and add the BRCOND into 4547 // the worklist. 4548 if (N->hasOneUse()) { 4549 SDNode Use = N->use_begin(); 4550 if (Use->getOpcode() == ISD::BRCOND) 4551 AddToWorklist(Use); 4552 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 4553 // Also look pass the truncate. 4554 Use = Use->use_begin(); 4555* if (Use->getOpcode() == ISD::BRCOND) 4556 AddToWorklist(Use); 4557 } 4558 } 4559 4560 return SDValue(); 4561} 4562 4563SDValue DAGCombiner::visitCTLZ(SDNode N) { 4564* SDValue N0 = N->getOperand(0); 4565 EVT VT = N->getValueType(0); 4566 4567 // fold (ctlz c1) -> c2 4568 if (isa<ConstantSDNode>(N0)) 4569 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 4570 return SDValue(); 4571} 4572 4573SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode N) { 4574* SDValue N0 = N->getOperand(0); 4575 EVT VT = N->getValueType(0); 4576 4577 // fold (ctlz_zero_undef c1) -> c2 4578 if (isa<ConstantSDNode>(N0)) 4579 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4580 return SDValue(); 4581} 4582 4583SDValue DAGCombiner::visitCTTZ(SDNode N) { 4584* SDValue N0 = N->getOperand(0); 4585 EVT VT = N->getValueType(0); 4586 4587 // fold (cttz c1) -> c2 4588 if (isa<ConstantSDNode>(N0)) 4589 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 4590 return SDValue(); 4591} 4592 4593SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode N) { 4594* SDValue N0 = N->getOperand(0); 4595 EVT VT = N->getValueType(0); 4596 4597 // fold (cttz_zero_undef c1) -> c2 4598 if (isa<ConstantSDNode>(N0)) 4599 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4600 return SDValue(); 4601} 4602 4603SDValue DAGCombiner::visitCTPOP(SDNode N) { 4604* SDValue N0 = N->getOperand(0); 4605 EVT VT = N->getValueType(0); 4606 4607 // fold (ctpop c1) -> c2 4608 if (isa<ConstantSDNode>(N0)) 4609 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 4610 return SDValue(); 4611} 4612 4613 4614/// \brief Generate Min/Max node 4615static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, 4616 SDValue True, SDValue False, 4617 ISD::CondCode CC, const TargetLowering &TLI, 4618 SelectionDAG &DAG) { 4619 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 4620 return SDValue(); 4621 4622 switch (CC) { 4623 case ISD::SETOLT: 4624 case ISD::SETOLE: 4625 case ISD::SETLT: 4626 case ISD::SETLE: 4627 case ISD::SETULT: 4628 case ISD::SETULE: { 4629 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; 4630 if (TLI.isOperationLegal(Opcode, VT)) 4631 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4632 return SDValue(); 4633 } 4634 case ISD::SETOGT: 4635 case ISD::SETOGE: 4636 case ISD::SETGT: 4637 case ISD::SETGE: 4638 case ISD::SETUGT: 4639 case ISD::SETUGE: { 4640 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; 4641 if (TLI.isOperationLegal(Opcode, VT)) 4642 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4643 return SDValue(); 4644 } 4645 default: 4646 return SDValue(); 4647 } 4648} 4649 4650SDValue DAGCombiner::visitSELECT(SDNode N) { 4651* SDValue N0 = N->getOperand(0); 4652 SDValue N1 = N->getOperand(1); 4653 SDValue N2 = N->getOperand(2); 4654 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 4655* ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1); 4656* ConstantSDNode N2C = dyn_cast<ConstantSDNode>(N2); 4657* EVT VT = N->getValueType(0); 4658 EVT VT0 = N0.getValueType(); 4659 4660 // fold (select C, X, X) -> X 4661 if (N1 == N2) 4662 return N1; 4663 // fold (select true, X, Y) -> X 4664 if (N0C && !N0C->isNullValue()) 4665 return N1; 4666 // fold (select false, X, Y) -> Y 4667 if (N0C && N0C->isNullValue()) 4668 return N2; 4669 // fold (select C, 1, X) -> (or C, X) 4670 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 4671 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4672 // fold (select C, 0, 1) -> (xor C, 1) 4673 // We can't do this reliably if integer based booleans have different contents 4674 // to floating point based booleans. This is because we can't tell whether we 4675 // have an integer-based boolean or a floating-point-based boolean unless we 4676 // can find the SETCC that produced it and inspect its operands. This is 4677 // fairly easy if C is the SETCC node, but it can potentially be 4678 // undiscoverable (or not reasonably discoverable). For example, it could be 4679 // in another basic block or it could require searching a complicated 4680 // expression. 4681 if (VT.isInteger() && 4682 (VT0 == MVT::i1 \|\| (VT0.isInteger() && 4683 TLI.getBooleanContents(false, false) == 4684 TLI.getBooleanContents(false, true) && 4685 TLI.getBooleanContents(false, false) == 4686 TargetLowering::ZeroOrOneBooleanContent)) && 4687 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 4688 SDValue XORNode; 4689 if (VT == VT0) 4690 return DAG.getNode(ISD::XOR, SDLoc(N), VT0, 4691 N0, DAG.getConstant(1, VT0)); 4692 XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, 4693 N0, DAG.getConstant(1, VT0)); 4694 AddToWorklist(XORNode.getNode()); 4695 if (VT.bitsGT(VT0)) 4696 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); 4697 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); 4698 } 4699 // fold (select C, 0, X) -> (and (not C), X) 4700 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 4701 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4702 AddToWorklist(NOTNode.getNode()); 4703 return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); 4704 } 4705 // fold (select C, X, 1) -> (or (not C), X) 4706 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 4707 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4708 AddToWorklist(NOTNode.getNode()); 4709 return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); 4710 } 4711 // fold (select C, X, 0) -> (and C, X) 4712 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 4713 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4714 // fold (select X, X, Y) -> (or X, Y) 4715 // fold (select X, 1, Y) -> (or X, Y) 4716 if (VT == MVT::i1 && (N0 == N1 \|\| (N1C && N1C->getAPIntValue() == 1))) 4717 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4718 // fold (select X, Y, X) -> (and X, Y) 4719 // fold (select X, Y, 0) -> (and X, Y) 4720 if (VT == MVT::i1 && (N0 == N2 \|\| (N2C && N2C->getAPIntValue() == 0))) 4721 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4722 4723 // If we can fold this based on the true/false value, do so. 4724 if (SimplifySelectOps(N, N1, N2)) 4725 return SDValue(N, 0); // Don't revisit N. 4726 4727 // fold selects based on a setcc into other things, such as min/max/abs 4728 if (N0.getOpcode() == ISD::SETCC) { 4729 // select x, y (fcmp lt x, y) -> fminnum x, y 4730 // select x, y (fcmp gt x, y) -> fmaxnum x, y 4731 // 4732 // This is OK if we don't care about what happens if either operand is a 4733 // NaN. 4734 // 4735 4736 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for 4737 // no signed zeros as well as no nans. 4738 const TargetOptions &Options = DAG.getTarget().Options; 4739 if (Options.UnsafeFPMath && 4740 VT.isFloatingPoint() && N0.hasOneUse() && 4741 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { 4742 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 4743 4744 SDValue FMinMax = 4745 combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), 4746 N1, N2, CC, TLI, DAG); 4747 if (FMinMax) 4748 return FMinMax; 4749 } 4750 4751 if ((!LegalOperations && 4752 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) \|\| 4753 TLI.isOperationLegal(ISD::SELECT_CC, VT)) 4754 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, 4755 N0.getOperand(0), N0.getOperand(1), 4756 N1, N2, N0.getOperand(2)); 4757 return SimplifySelect(SDLoc(N), N0, N1, N2); 4758 } 4759 4760 return SDValue(); 4761} 4762 4763static 4764std::pair<SDValue, SDValue> SplitVSETCC(const SDNode N, SelectionDAG &DAG) { 4765* SDLoc DL(N); 4766 EVT LoVT, HiVT; 4767 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 4768 4769 // Split the inputs. 4770 SDValue Lo, Hi, LL, LH, RL, RH; 4771 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 4772 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 4773 4774 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 4775 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 4776 4777 return std::make_pair(Lo, Hi); 4778} 4779 4780// This function assumes all the vselect's arguments are CONCAT_VECTOR 4781// nodes and that the condition is a BV of ConstantSDNodes (or undefs). 4782static SDValue ConvertSelectToConcatVector(SDNode N, SelectionDAG &DAG) { 4783* SDLoc dl(N); 4784 SDValue Cond = N->getOperand(0); 4785 SDValue LHS = N->getOperand(1); 4786 SDValue RHS = N->getOperand(2); 4787 EVT VT = N->getValueType(0); 4788 int NumElems = VT.getVectorNumElements(); 4789 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 4790 RHS.getOpcode() == ISD::CONCAT_VECTORS && 4791 Cond.getOpcode() == ISD::BUILD_VECTOR); 4792 4793 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about 4794 // binary ones here. 4795 if (LHS->getNumOperands() != 2 \|\| RHS->getNumOperands() != 2) 4796 return SDValue(); 4797 4798 // We're sure we have an even number of elements due to the 4799 // concat_vectors we have as arguments to vselect. 4800 // Skip BV elements until we find one that's not an UNDEF 4801 // After we find an UNDEF element, keep looping until we get to half the 4802 // length of the BV and see if all the non-undef nodes are the same. 4803 ConstantSDNode BottomHalf = nullptr; 4804* for (int i = 0; i < NumElems / 2; ++i) { 4805 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4806 continue; 4807 4808 if (BottomHalf == nullptr) 4809 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4810 else if (Cond->getOperand(i).getNode() != BottomHalf) 4811 return SDValue(); 4812 } 4813 4814 // Do the same for the second half of the BuildVector 4815 ConstantSDNode TopHalf = nullptr; 4816* for (int i = NumElems / 2; i < NumElems; ++i) { 4817 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4818 continue; 4819 4820 if (TopHalf == nullptr) 4821 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4822 else if (Cond->getOperand(i).getNode() != TopHalf) 4823 return SDValue(); 4824 } 4825 4826 assert(TopHalf && BottomHalf && 4827 "One half of the selector was all UNDEFs and the other was all the " 4828 "same value. This should have been addressed before this function."); 4829 return DAG.getNode( 4830 ISD::CONCAT_VECTORS, dl, VT, 4831 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 4832 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 4833} 4834 4835SDValue DAGCombiner::visitMSTORE(SDNode N) { 4836* 4837 if (Level >= AfterLegalizeTypes) 4838 return SDValue(); 4839 4840 MaskedStoreSDNode MST = dyn_cast<MaskedStoreSDNode>(N); 4841* SDValue Mask = MST->getMask(); 4842 SDValue Data = MST->getValue(); 4843 SDLoc DL(N); 4844 4845 // If the MSTORE data type requires splitting and the mask is provided by a 4846 // SETCC, then split both nodes and its operands before legalization. This 4847 // prevents the type legalizer from unrolling SETCC into scalar comparisons 4848 // and enables future optimizations (e.g. min/max pattern matching on X86). 4849 if (Mask.getOpcode() == ISD::SETCC) { 4850 4851 // Check if any splitting is required. 4852 if (TLI.getTypeAction(DAG.getContext(), Data.getValueType()) != 4853* TargetLowering::TypeSplitVector) 4854 return SDValue(); 4855 4856 SDValue MaskLo, MaskHi, Lo, Hi; 4857 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 4858 4859 EVT LoVT, HiVT; 4860 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); 4861 4862 SDValue Chain = MST->getChain(); 4863 SDValue Ptr = MST->getBasePtr(); 4864 4865 EVT MemoryVT = MST->getMemoryVT(); 4866 unsigned Alignment = MST->getOriginalAlignment(); 4867 4868 // if Alignment is equal to the vector size, 4869 // take the half of it for the second part 4870 unsigned SecondHalfAlignment = 4871 (Alignment == Data->getValueType(0).getSizeInBits()/8) ? 4872 Alignment/2 : Alignment; 4873 4874 EVT LoMemVT, HiMemVT; 4875 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 4876 4877 SDValue DataLo, DataHi; 4878 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 4879 4880 MachineMemOperand MMO = DAG.getMachineFunction(). 4881* getMachineMemOperand(MST->getPointerInfo(), 4882 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 4883 Alignment, MST->getAAInfo(), MST->getRanges()); 4884 4885 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, 4886 MST->isTruncatingStore()); 4887 4888 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 4889 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 4890 DAG.getConstant(IncrementSize, Ptr.getValueType())); 4891 4892 MMO = DAG.getMachineFunction(). 4893 getMachineMemOperand(MST->getPointerInfo(), 4894 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), 4895 SecondHalfAlignment, MST->getAAInfo(), 4896 MST->getRanges()); 4897 4898 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, 4899 MST->isTruncatingStore()); 4900 4901 AddToWorklist(Lo.getNode()); 4902 AddToWorklist(Hi.getNode()); 4903 4904 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 4905 } 4906 return SDValue(); 4907} 4908 4909SDValue DAGCombiner::visitMLOAD(SDNode N) { 4910* 4911 if (Level >= AfterLegalizeTypes) 4912 return SDValue(); 4913 4914 MaskedLoadSDNode MLD = dyn_cast<MaskedLoadSDNode>(N); 4915* SDValue Mask = MLD->getMask(); 4916 SDLoc DL(N); 4917 4918 // If the MLOAD result requires splitting and the mask is provided by a 4919 // SETCC, then split both nodes and its operands before legalization. This 4920 // prevents the type legalizer from unrolling SETCC into scalar comparisons 4921 // and enables future optimizations (e.g. min/max pattern matching on X86). 4922 4923 if (Mask.getOpcode() == ISD::SETCC) { 4924 EVT VT = N->getValueType(0); 4925 4926 // Check if any splitting is required. 4927 if (TLI.getTypeAction(DAG.getContext(), VT) != 4928* TargetLowering::TypeSplitVector) 4929 return SDValue(); 4930 4931 SDValue MaskLo, MaskHi, Lo, Hi; 4932 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 4933 4934 SDValue Src0 = MLD->getSrc0(); 4935 SDValue Src0Lo, Src0Hi; 4936 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 4937 4938 EVT LoVT, HiVT; 4939 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 4940 4941 SDValue Chain = MLD->getChain(); 4942 SDValue Ptr = MLD->getBasePtr(); 4943 EVT MemoryVT = MLD->getMemoryVT(); 4944 unsigned Alignment = MLD->getOriginalAlignment(); 4945 4946 // if Alignment is equal to the vector size, 4947 // take the half of it for the second part 4948 unsigned SecondHalfAlignment = 4949 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? 4950 Alignment/2 : Alignment; 4951 4952 EVT LoMemVT, HiMemVT; 4953 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 4954 4955 MachineMemOperand MMO = DAG.getMachineFunction(). 4956* getMachineMemOperand(MLD->getPointerInfo(), 4957 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 4958 Alignment, MLD->getAAInfo(), MLD->getRanges()); 4959 4960 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, 4961 ISD::NON_EXTLOAD); 4962 4963 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 4964 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 4965 DAG.getConstant(IncrementSize, Ptr.getValueType())); 4966 4967 MMO = DAG.getMachineFunction(). 4968 getMachineMemOperand(MLD->getPointerInfo(), 4969 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), 4970 SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); 4971 4972 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, 4973 ISD::NON_EXTLOAD); 4974 4975 AddToWorklist(Lo.getNode()); 4976 AddToWorklist(Hi.getNode()); 4977 4978 // Build a factor node to remember that this load is independent of the 4979 // other one. 4980 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 4981 Hi.getValue(1)); 4982 4983 // Legalized the chain result - switch anything that used the old chain to 4984 // use the new one. 4985 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); 4986 4987 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 4988 4989 SDValue RetOps[] = { LoadRes, Chain }; 4990 return DAG.getMergeValues(RetOps, DL); 4991 } 4992 return SDValue(); 4993} 4994 4995SDValue DAGCombiner::visitVSELECT(SDNode N) { 4996* SDValue N0 = N->getOperand(0); 4997 SDValue N1 = N->getOperand(1); 4998 SDValue N2 = N->getOperand(2); 4999 SDLoc DL(N); 5000 5001 // Canonicalize integer abs. 5002 // vselect (setg[te] X, 0), X, -X -> 5003 // vselect (setgt X, -1), X, -X -> 5004 // vselect (setl[te] X, 0), -X, X -> 5005 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 5006 if (N0.getOpcode() == ISD::SETCC) { 5007 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 5008 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5009 bool isAbs = false; 5010 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 5011 5012 if (((RHSIsAllZeros && (CC == ISD::SETGT \|\| CC == ISD::SETGE)) \|\| 5013 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 5014 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 5015 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 5016 else if ((RHSIsAllZeros && (CC == ISD::SETLT \|\| CC == ISD::SETLE)) && 5017 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 5018 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 5019 5020 if (isAbs) { 5021 EVT VT = LHS.getValueType(); 5022 SDValue Shift = DAG.getNode( 5023 ISD::SRA, DL, VT, LHS, 5024 DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); 5025 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 5026 AddToWorklist(Shift.getNode()); 5027 AddToWorklist(Add.getNode()); 5028 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 5029 } 5030 } 5031 5032 // If the VSELECT result requires splitting and the mask is provided by a 5033 // SETCC, then split both nodes and its operands before legalization. This 5034 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5035 // and enables future optimizations (e.g. min/max pattern matching on X86). 5036 if (N0.getOpcode() == ISD::SETCC) { 5037 EVT VT = N->getValueType(0); 5038 5039 // Check if any splitting is required. 5040 if (TLI.getTypeAction(DAG.getContext(), VT) != 5041* TargetLowering::TypeSplitVector) 5042 return SDValue(); 5043 5044 SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; 5045 std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); 5046 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); 5047 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); 5048 5049 Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); 5050 Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); 5051 5052 // Add the new VSELECT nodes to the work list in case they need to be split 5053 // again. 5054 AddToWorklist(Lo.getNode()); 5055 AddToWorklist(Hi.getNode()); 5056 5057 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5058 } 5059 5060 // Fold (vselect (build_vector all_ones), N1, N2) -> N1 5061 if (ISD::isBuildVectorAllOnes(N0.getNode())) 5062 return N1; 5063 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 5064 if (ISD::isBuildVectorAllZeros(N0.getNode())) 5065 return N2; 5066 5067 // The ConvertSelectToConcatVector function is assuming both the above 5068 // checks for (vselect (build_vector all{ones,zeros) ...) have been made 5069 // and addressed. 5070 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 5071 N2.getOpcode() == ISD::CONCAT_VECTORS && 5072 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 5073 SDValue CV = ConvertSelectToConcatVector(N, DAG); 5074 if (CV.getNode()) 5075 return CV; 5076 } 5077 5078 return SDValue(); 5079} 5080 5081SDValue DAGCombiner::visitSELECT_CC(SDNode N) { 5082* SDValue N0 = N->getOperand(0); 5083 SDValue N1 = N->getOperand(1); 5084 SDValue N2 = N->getOperand(2); 5085 SDValue N3 = N->getOperand(3); 5086 SDValue N4 = N->getOperand(4); 5087 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 5088 5089 // fold select_cc lhs, rhs, x, x, cc -> x 5090 if (N2 == N3) 5091 return N2; 5092 5093 // Determine if the condition we're dealing with is constant 5094 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 5095 N0, N1, CC, SDLoc(N), false); 5096 if (SCC.getNode()) { 5097 AddToWorklist(SCC.getNode()); 5098 5099 if (ConstantSDNode SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { 5100* if (!SCCC->isNullValue()) 5101 return N2; // cond always true -> true val 5102 else 5103 return N3; // cond always false -> false val 5104 } else if (SCC->getOpcode() == ISD::UNDEF) { 5105 // When the condition is UNDEF, just return the first operand. This is 5106 // coherent the DAG creation, no setcc node is created in this case 5107 return N2; 5108 } else if (SCC.getOpcode() == ISD::SETCC) { 5109 // Fold to a simpler select_cc 5110 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 5111 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 5112 SCC.getOperand(2)); 5113 } 5114 } 5115 5116 // If we can fold this based on the true/false value, do so. 5117 if (SimplifySelectOps(N, N2, N3)) 5118 return SDValue(N, 0); // Don't revisit N. 5119 5120 // fold select_cc into other things, such as min/max/abs 5121 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 5122} 5123 5124SDValue DAGCombiner::visitSETCC(SDNode N) { 5125* return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 5126 cast<CondCodeSDNode>(N->getOperand(2))->get(), 5127 SDLoc(N)); 5128} 5129 5130// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext 5131// dag node into a ConstantSDNode or a build_vector of constants. 5132// This function is called by the DAGCombiner when visiting sext/zext/aext 5133// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 5134// Vector extends are not folded if operations are legal; this is to 5135// avoid introducing illegal build_vector dag nodes. 5136static SDNode tryToFoldExtendOfConstant(SDNode N, const TargetLowering &TLI, 5137 SelectionDAG &DAG, bool LegalTypes, 5138 bool LegalOperations) { 5139 unsigned Opcode = N->getOpcode(); 5140 SDValue N0 = N->getOperand(0); 5141 EVT VT = N->getValueType(0); 5142 5143 assert((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| 5144 Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); 5145 5146 // fold (sext c1) -> c1 5147 // fold (zext c1) -> c1 5148 // fold (aext c1) -> c1 5149 if (isa<ConstantSDNode>(N0)) 5150 return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); 5151 5152 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) 5153 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) 5154 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) 5155 EVT SVT = VT.getScalarType(); 5156 if (!(VT.isVector() && 5157 (!LegalTypes \|\| (!LegalOperations && TLI.isTypeLegal(SVT))) && 5158 ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) 5159 return nullptr; 5160 5161 // We can fold this node into a build_vector. 5162 unsigned VTBits = SVT.getSizeInBits(); 5163 unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); 5164 unsigned ShAmt = VTBits - EVTBits; 5165 SmallVector<SDValue, 8> Elts; 5166 unsigned NumElts = N0->getNumOperands(); 5167 SDLoc DL(N); 5168 5169 for (unsigned i=0; i != NumElts; ++i) { 5170 SDValue Op = N0->getOperand(i); 5171 if (Op->getOpcode() == ISD::UNDEF) { 5172 Elts.push_back(DAG.getUNDEF(SVT)); 5173 continue; 5174 } 5175 5176 ConstantSDNode CurrentND = cast<ConstantSDNode>(Op); 5177* const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 5178 if (Opcode == ISD::SIGN_EXTEND) 5179 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 5180 SVT)); 5181 else 5182 Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), 5183 SVT)); 5184 } 5185 5186 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); 5187} 5188 5189// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 5190// "fold ({s\|z\|a}ext (load x)) -> ({s\|z\|a}ext (truncate ({s\|z\|a}extload x)))" 5191// transformation. Returns true if extension are possible and the above 5192// mentioned transformation is profitable. 5193static bool ExtendUsesToFormExtLoad(SDNode N, SDValue N0, 5194* unsigned ExtOpc, 5195 SmallVectorImpl<SDNode > &ExtendNodes, 5196* const TargetLowering &TLI) { 5197 bool HasCopyToRegUses = false; 5198 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 5199 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 5200 UE = N0.getNode()->use_end(); 5201 UI != UE; ++UI) { 5202 SDNode User = UI; 5203 if (User == N) 5204 continue; 5205 if (UI.getUse().getResNo() != N0.getResNo()) 5206 continue; 5207 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 5208 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 5209 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 5210 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 5211 // Sign bits will be lost after a zext. 5212 return false; 5213 bool Add = false; 5214 for (unsigned i = 0; i != 2; ++i) { 5215 SDValue UseOp = User->getOperand(i); 5216 if (UseOp == N0) 5217 continue; 5218 if (!isa<ConstantSDNode>(UseOp)) 5219 return false; 5220 Add = true; 5221 } 5222 if (Add) 5223 ExtendNodes.push_back(User); 5224 continue; 5225 } 5226 // If truncates aren't free and there are users we can't 5227 // extend, it isn't worthwhile. 5228 if (!isTruncFree) 5229 return false; 5230 // Remember if this value is live-out. 5231 if (User->getOpcode() == ISD::CopyToReg) 5232 HasCopyToRegUses = true; 5233 } 5234 5235 if (HasCopyToRegUses) { 5236 bool BothLiveOut = false; 5237 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5238 UI != UE; ++UI) { 5239 SDUse &Use = UI.getUse(); 5240 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 5241 BothLiveOut = true; 5242 break; 5243 } 5244 } 5245 if (BothLiveOut) 5246 // Both unextended and extended values are live out. There had better be 5247 // a good reason for the transformation. 5248 return ExtendNodes.size(); 5249 } 5250 return true; 5251} 5252 5253void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode > &SetCCs, 5254* SDValue Trunc, SDValue ExtLoad, SDLoc DL, 5255 ISD::NodeType ExtType) { 5256 // Extend SetCC uses if necessary. 5257 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 5258 SDNode SetCC = SetCCs[i]; 5259* SmallVector<SDValue, 4> Ops; 5260 5261 for (unsigned j = 0; j != 2; ++j) { 5262 SDValue SOp = SetCC->getOperand(j); 5263 if (SOp == Trunc) 5264 Ops.push_back(ExtLoad); 5265 else 5266 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 5267 } 5268 5269 Ops.push_back(SetCC->getOperand(2)); 5270 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 5271 } 5272} 5273 5274SDValue DAGCombiner::visitSIGN_EXTEND(SDNode N) { 5275* SDValue N0 = N->getOperand(0); 5276 EVT VT = N->getValueType(0); 5277 5278 if (SDNode Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5279* LegalOperations)) 5280 return SDValue(Res, 0); 5281 5282 // fold (sext (sext x)) -> (sext x) 5283 // fold (sext (aext x)) -> (sext x) 5284 if (N0.getOpcode() == ISD::SIGN_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND) 5285 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, 5286 N0.getOperand(0)); 5287 5288 if (N0.getOpcode() == ISD::TRUNCATE) { 5289 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 5290 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 5291 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5292 if (NarrowLoad.getNode()) { 5293 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5294 if (NarrowLoad.getNode() != N0.getNode()) { 5295 CombineTo(N0.getNode(), NarrowLoad); 5296 // CombineTo deleted the truncate, if needed, but not what's under it. 5297 AddToWorklist(oye); 5298 } 5299 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5300 } 5301 5302 // See if the value being truncated is already sign extended. If so, just 5303 // eliminate the trunc/sext pair. 5304 SDValue Op = N0.getOperand(0); 5305 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 5306 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 5307 unsigned DestBits = VT.getScalarType().getSizeInBits(); 5308 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 5309 5310 if (OpBits == DestBits) { 5311 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 5312 // bits, it is already ready. 5313 if (NumSignBits > DestBits-MidBits) 5314 return Op; 5315 } else if (OpBits < DestBits) { 5316 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 5317 // bits, just sext from i32. 5318 if (NumSignBits > OpBits-MidBits) 5319 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); 5320 } else { 5321 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 5322 // bits, just truncate to i32. 5323 if (NumSignBits > OpBits-MidBits) 5324 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5325 } 5326 5327 // fold (sext (truncate x)) -> (sextinreg x). 5328 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 5329 N0.getValueType())) { 5330 if (OpBits < DestBits) 5331 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 5332 else if (OpBits > DestBits) 5333 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 5334 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, 5335 DAG.getValueType(N0.getValueType())); 5336 } 5337 } 5338 5339 // fold (sext (load x)) -> (sext (truncate (sextload x))) 5340 // None of the supported targets knows how to perform load and sign extend 5341 // on vectors in one instruction. We only perform this transformation on 5342 // scalars. 5343 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5344 ISD::isUNINDEXEDLoad(N0.getNode()) && 5345 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) \|\| 5346 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { 5347 bool DoXform = true; 5348 SmallVector<SDNode, 4> SetCCs; 5349* if (!N0.hasOneUse()) 5350 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 5351 if (DoXform) { 5352 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5353* SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5354 LN0->getChain(), 5355 LN0->getBasePtr(), N0.getValueType(), 5356 LN0->getMemOperand()); 5357 CombineTo(N, ExtLoad); 5358 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5359 N0.getValueType(), ExtLoad); 5360 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5361 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5362 ISD::SIGN_EXTEND); 5363 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5364 } 5365 } 5366 5367 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 5368 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 5369 if ((ISD::isSEXTLoad(N0.getNode()) \|\| ISD::isEXTLoad(N0.getNode())) && 5370 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5371 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5372* EVT MemVT = LN0->getMemoryVT(); 5373 if ((!LegalOperations && !LN0->isVolatile()) \|\| 5374 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { 5375 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5376 LN0->getChain(), 5377 LN0->getBasePtr(), MemVT, 5378 LN0->getMemOperand()); 5379 CombineTo(N, ExtLoad); 5380 CombineTo(N0.getNode(), 5381 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5382 N0.getValueType(), ExtLoad), 5383 ExtLoad.getValue(1)); 5384 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5385 } 5386 } 5387 5388 // fold (sext (and/or/xor (load x), cst)) -> 5389 // (and/or/xor (sextload x), (sext cst)) 5390 if ((N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR \|\| 5391 N0.getOpcode() == ISD::XOR) && 5392 isa<LoadSDNode>(N0.getOperand(0)) && 5393 N0.getOperand(1).getOpcode() == ISD::Constant && 5394 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && 5395 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5396 LoadSDNode LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5397* if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { 5398 bool DoXform = true; 5399 SmallVector<SDNode, 4> SetCCs; 5400* if (!N0.hasOneUse()) 5401 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 5402 SetCCs, TLI); 5403 if (DoXform) { 5404 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, 5405 LN0->getChain(), LN0->getBasePtr(), 5406 LN0->getMemoryVT(), 5407 LN0->getMemOperand()); 5408 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5409 Mask = Mask.sext(VT.getSizeInBits()); 5410 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5411 ExtLoad, DAG.getConstant(Mask, VT)); 5412 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5413 SDLoc(N0.getOperand(0)), 5414 N0.getOperand(0).getValueType(), ExtLoad); 5415 CombineTo(N, And); 5416 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5417 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5418 ISD::SIGN_EXTEND); 5419 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5420 } 5421 } 5422 } 5423 5424 if (N0.getOpcode() == ISD::SETCC) { 5425 EVT N0VT = N0.getOperand(0).getValueType(); 5426 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 5427 // Only do this before legalize for now. 5428 if (VT.isVector() && !LegalOperations && 5429 TLI.getBooleanContents(N0VT) == 5430 TargetLowering::ZeroOrNegativeOneBooleanContent) { 5431 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 5432 // of the same size as the compared operands. Only optimize sext(setcc()) 5433 // if this is the case. 5434 EVT SVT = getSetCCResultType(N0VT); 5435 5436 // We know that the # elements of the results is the same as the 5437 // # elements of the compare (and the # elements of the compare result 5438 // for that matter). Check to see that they are the same size. If so, 5439 // we know that the element size of the sext'd result matches the 5440 // element size of the compare operands. 5441 if (VT.getSizeInBits() == SVT.getSizeInBits()) 5442 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5443 N0.getOperand(1), 5444 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5445 5446 // If the desired elements are smaller or larger than the source 5447 // elements we can use a matching integer vector type and then 5448 // truncate/sign extend 5449 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 5450 if (SVT == MatchingVectorType) { 5451 SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, 5452 N0.getOperand(0), N0.getOperand(1), 5453 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5454 return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); 5455 } 5456 } 5457 5458 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) 5459 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 5460 SDValue NegOne = 5461 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 5462 SDValue SCC = 5463 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5464 NegOne, DAG.getConstant(0, VT), 5465 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5466 if (SCC.getNode()) return SCC; 5467 5468 if (!VT.isVector()) { 5469 EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); 5470 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { 5471 SDLoc DL(N); 5472 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5473 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, 5474 N0.getOperand(0), N0.getOperand(1), CC); 5475 return DAG.getSelect(DL, VT, SetCC, 5476 NegOne, DAG.getConstant(0, VT)); 5477 } 5478 } 5479 } 5480 5481 // fold (sext x) -> (zext x) if the sign bit is known zero. 5482 if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 5483 DAG.SignBitIsZero(N0)) 5484 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); 5485 5486 return SDValue(); 5487} 5488 5489// isTruncateOf - If N is a truncate of some other value, return true, record 5490// the value being truncated in Op and which of Op's bits are zero in KnownZero. 5491// This function computes KnownZero to avoid a duplicated call to 5492// computeKnownBits in the caller. 5493static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 5494 APInt &KnownZero) { 5495 APInt KnownOne; 5496 if (N->getOpcode() == ISD::TRUNCATE) { 5497 Op = N->getOperand(0); 5498 DAG.computeKnownBits(Op, KnownZero, KnownOne); 5499 return true; 5500 } 5501 5502 if (N->getOpcode() != ISD::SETCC \|\| N->getValueType(0) != MVT::i1 \|\| 5503 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 5504 return false; 5505 5506 SDValue Op0 = N->getOperand(0); 5507 SDValue Op1 = N->getOperand(1); 5508 assert(Op0.getValueType() == Op1.getValueType()); 5509 5510 ConstantSDNode COp0 = dyn_cast<ConstantSDNode>(Op0); 5511* ConstantSDNode COp1 = dyn_cast<ConstantSDNode>(Op1); 5512* if (COp0 && COp0->isNullValue()) 5513 Op = Op1; 5514 else if (COp1 && COp1->isNullValue()) 5515 Op = Op0; 5516 else 5517 return false; 5518 5519 DAG.computeKnownBits(Op, KnownZero, KnownOne); 5520 5521 if (!(KnownZero \| APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 5522 return false; 5523 5524 return true; 5525} 5526 5527SDValue DAGCombiner::visitZERO_EXTEND(SDNode N) { 5528* SDValue N0 = N->getOperand(0); 5529 EVT VT = N->getValueType(0); 5530 5531 if (SDNode Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5532* LegalOperations)) 5533 return SDValue(Res, 0); 5534 5535 // fold (zext (zext x)) -> (zext x) 5536 // fold (zext (aext x)) -> (zext x) 5537 if (N0.getOpcode() == ISD::ZERO_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND) 5538 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 5539 N0.getOperand(0)); 5540 5541 // fold (zext (truncate x)) -> (zext x) or 5542 // (zext (truncate x)) -> (truncate x) 5543 // This is valid when the truncated bits of x are already zero. 5544 // FIXME: We should extend this to work for vectors too. 5545 SDValue Op; 5546 APInt KnownZero; 5547 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 5548 APInt TruncatedBits = 5549 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 5550 APInt(Op.getValueSizeInBits(), 0) : 5551 APInt::getBitsSet(Op.getValueSizeInBits(), 5552 N0.getValueSizeInBits(), 5553 std::min(Op.getValueSizeInBits(), 5554 VT.getSizeInBits())); 5555 if (TruncatedBits == (KnownZero & TruncatedBits)) { 5556 if (VT.bitsGT(Op.getValueType())) 5557 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); 5558 if (VT.bitsLT(Op.getValueType())) 5559 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5560 5561 return Op; 5562 } 5563 } 5564 5565 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 5566 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 5567 if (N0.getOpcode() == ISD::TRUNCATE) { 5568 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5569 if (NarrowLoad.getNode()) { 5570 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5571 if (NarrowLoad.getNode() != N0.getNode()) { 5572 CombineTo(N0.getNode(), NarrowLoad); 5573 // CombineTo deleted the truncate, if needed, but not what's under it. 5574 AddToWorklist(oye); 5575 } 5576 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5577 } 5578 } 5579 5580 // fold (zext (truncate x)) -> (and x, mask) 5581 if (N0.getOpcode() == ISD::TRUNCATE && 5582 (!LegalOperations \|\| TLI.isOperationLegal(ISD::AND, VT))) { 5583 5584 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 5585 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 5586 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5587 if (NarrowLoad.getNode()) { 5588 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5589 if (NarrowLoad.getNode() != N0.getNode()) { 5590 CombineTo(N0.getNode(), NarrowLoad); 5591 // CombineTo deleted the truncate, if needed, but not what's under it. 5592 AddToWorklist(oye); 5593 } 5594 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5595 } 5596 5597 SDValue Op = N0.getOperand(0); 5598 if (Op.getValueType().bitsLT(VT)) { 5599 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); 5600 AddToWorklist(Op.getNode()); 5601 } else if (Op.getValueType().bitsGT(VT)) { 5602 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5603 AddToWorklist(Op.getNode()); 5604 } 5605 return DAG.getZeroExtendInReg(Op, SDLoc(N), 5606 N0.getValueType().getScalarType()); 5607 } 5608 5609 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 5610 // if either of the casts is not free. 5611 if (N0.getOpcode() == ISD::AND && 5612 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 5613 N0.getOperand(1).getOpcode() == ISD::Constant && 5614 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 5615 N0.getValueType()) \|\| 5616 !TLI.isZExtFree(N0.getValueType(), VT))) { 5617 SDValue X = N0.getOperand(0).getOperand(0); 5618 if (X.getValueType().bitsLT(VT)) { 5619 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); 5620 } else if (X.getValueType().bitsGT(VT)) { 5621 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 5622 } 5623 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5624 Mask = Mask.zext(VT.getSizeInBits()); 5625 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5626 X, DAG.getConstant(Mask, VT)); 5627 } 5628 5629 // fold (zext (load x)) -> (zext (truncate (zextload x))) 5630 // None of the supported targets knows how to perform load and vector_zext 5631 // on vectors in one instruction. We only perform this transformation on 5632 // scalars. 5633 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5634 ISD::isUNINDEXEDLoad(N0.getNode()) && 5635 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) \|\| 5636 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { 5637 bool DoXform = true; 5638 SmallVector<SDNode, 4> SetCCs; 5639* if (!N0.hasOneUse()) 5640 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 5641 if (DoXform) { 5642 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5643* SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 5644 LN0->getChain(), 5645 LN0->getBasePtr(), N0.getValueType(), 5646 LN0->getMemOperand()); 5647 CombineTo(N, ExtLoad); 5648 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5649 N0.getValueType(), ExtLoad); 5650 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5651 5652 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5653 ISD::ZERO_EXTEND); 5654 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5655 } 5656 } 5657 5658 // fold (zext (and/or/xor (load x), cst)) -> 5659 // (and/or/xor (zextload x), (zext cst)) 5660 if ((N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR \|\| 5661 N0.getOpcode() == ISD::XOR) && 5662 isa<LoadSDNode>(N0.getOperand(0)) && 5663 N0.getOperand(1).getOpcode() == ISD::Constant && 5664 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && 5665 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5666 LoadSDNode LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5667* if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { 5668 bool DoXform = true; 5669 SmallVector<SDNode, 4> SetCCs; 5670* if (!N0.hasOneUse()) 5671 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 5672 SetCCs, TLI); 5673 if (DoXform) { 5674 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, 5675 LN0->getChain(), LN0->getBasePtr(), 5676 LN0->getMemoryVT(), 5677 LN0->getMemOperand()); 5678 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5679 Mask = Mask.zext(VT.getSizeInBits()); 5680 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5681 ExtLoad, DAG.getConstant(Mask, VT)); 5682 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5683 SDLoc(N0.getOperand(0)), 5684 N0.getOperand(0).getValueType(), ExtLoad); 5685 CombineTo(N, And); 5686 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5687 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5688 ISD::ZERO_EXTEND); 5689 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5690 } 5691 } 5692 } 5693 5694 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 5695 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 5696 if ((ISD::isZEXTLoad(N0.getNode()) \|\| ISD::isEXTLoad(N0.getNode())) && 5697 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5698 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5699* EVT MemVT = LN0->getMemoryVT(); 5700 if ((!LegalOperations && !LN0->isVolatile()) \|\| 5701 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { 5702 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 5703 LN0->getChain(), 5704 LN0->getBasePtr(), MemVT, 5705 LN0->getMemOperand()); 5706 CombineTo(N, ExtLoad); 5707 CombineTo(N0.getNode(), 5708 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), 5709 ExtLoad), 5710 ExtLoad.getValue(1)); 5711 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5712 } 5713 } 5714 5715 if (N0.getOpcode() == ISD::SETCC) { 5716 if (!LegalOperations && VT.isVector() && 5717 N0.getValueType().getVectorElementType() == MVT::i1) { 5718 EVT N0VT = N0.getOperand(0).getValueType(); 5719 if (getSetCCResultType(N0VT) == N0.getValueType()) 5720 return SDValue(); 5721 5722 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 5723 // Only do this before legalize for now. 5724 EVT EltVT = VT.getVectorElementType(); 5725 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 5726 DAG.getConstant(1, EltVT)); 5727 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 5728 // We know that the # elements of the results is the same as the 5729 // # elements of the compare (and the # elements of the compare result 5730 // for that matter). Check to see that they are the same size. If so, 5731 // we know that the element size of the sext'd result matches the 5732 // element size of the compare operands. 5733 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5734 DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5735 N0.getOperand(1), 5736 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 5737 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, 5738 OneOps)); 5739 5740 // If the desired elements are smaller or larger than the source 5741 // elements we can use a matching integer vector type and then 5742 // truncate/sign extend 5743 EVT MatchingElementType = 5744 EVT::getIntegerVT(DAG.getContext(), 5745* N0VT.getScalarType().getSizeInBits()); 5746 EVT MatchingVectorType = 5747 EVT::getVectorVT(DAG.getContext(), MatchingElementType, 5748* N0VT.getVectorNumElements()); 5749 SDValue VsetCC = 5750 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 5751 N0.getOperand(1), 5752 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5753 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5754 DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), 5755 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); 5756 } 5757 5758 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 5759 SDValue SCC = 5760 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5761 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 5762 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5763 if (SCC.getNode()) return SCC; 5764 } 5765 5766 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 5767 if ((N0.getOpcode() == ISD::SHL \|\| N0.getOpcode() == ISD::SRL) && 5768 isa<ConstantSDNode>(N0.getOperand(1)) && 5769 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 5770 N0.hasOneUse()) { 5771 SDValue ShAmt = N0.getOperand(1); 5772 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 5773 if (N0.getOpcode() == ISD::SHL) { 5774 SDValue InnerZExt = N0.getOperand(0); 5775 // If the original shl may be shifting out bits, do not perform this 5776 // transformation. 5777 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 5778 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 5779 if (ShAmtVal > KnownZeroBits) 5780 return SDValue(); 5781 } 5782 5783 SDLoc DL(N); 5784 5785 // Ensure that the shift amount is wide enough for the shifted value. 5786 if (VT.getSizeInBits() >= 256) 5787 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 5788 5789 return DAG.getNode(N0.getOpcode(), DL, VT, 5790 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 5791 ShAmt); 5792 } 5793 5794 return SDValue(); 5795} 5796 5797SDValue DAGCombiner::visitANY_EXTEND(SDNode N) { 5798* SDValue N0 = N->getOperand(0); 5799 EVT VT = N->getValueType(0); 5800 5801 if (SDNode Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5802* LegalOperations)) 5803 return SDValue(Res, 0); 5804 5805 // fold (aext (aext x)) -> (aext x) 5806 // fold (aext (zext x)) -> (zext x) 5807 // fold (aext (sext x)) -> (sext x) 5808 if (N0.getOpcode() == ISD::ANY_EXTEND \|\| 5809 N0.getOpcode() == ISD::ZERO_EXTEND \|\| 5810 N0.getOpcode() == ISD::SIGN_EXTEND) 5811 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 5812 5813 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 5814 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 5815 if (N0.getOpcode() == ISD::TRUNCATE) { 5816 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5817 if (NarrowLoad.getNode()) { 5818 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5819 if (NarrowLoad.getNode() != N0.getNode()) { 5820 CombineTo(N0.getNode(), NarrowLoad); 5821 // CombineTo deleted the truncate, if needed, but not what's under it. 5822 AddToWorklist(oye); 5823 } 5824 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5825 } 5826 } 5827 5828 // fold (aext (truncate x)) 5829 if (N0.getOpcode() == ISD::TRUNCATE) { 5830 SDValue TruncOp = N0.getOperand(0); 5831 if (TruncOp.getValueType() == VT) 5832 return TruncOp; // x iff x size == zext size. 5833 if (TruncOp.getValueType().bitsGT(VT)) 5834 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); 5835 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); 5836 } 5837 5838 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 5839 // if the trunc is not free. 5840 if (N0.getOpcode() == ISD::AND && 5841 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 5842 N0.getOperand(1).getOpcode() == ISD::Constant && 5843 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 5844 N0.getValueType())) { 5845 SDValue X = N0.getOperand(0).getOperand(0); 5846 if (X.getValueType().bitsLT(VT)) { 5847 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); 5848 } else if (X.getValueType().bitsGT(VT)) { 5849 X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); 5850 } 5851 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5852 Mask = Mask.zext(VT.getSizeInBits()); 5853 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5854 X, DAG.getConstant(Mask, VT)); 5855 } 5856 5857 // fold (aext (load x)) -> (aext (truncate (extload x))) 5858 // None of the supported targets knows how to perform load and any_ext 5859 // on vectors in one instruction. We only perform this transformation on 5860 // scalars. 5861 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5862 ISD::isUNINDEXEDLoad(N0.getNode()) && 5863 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 5864 bool DoXform = true; 5865 SmallVector<SDNode, 4> SetCCs; 5866* if (!N0.hasOneUse()) 5867 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 5868 if (DoXform) { 5869 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5870* SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 5871 LN0->getChain(), 5872 LN0->getBasePtr(), N0.getValueType(), 5873 LN0->getMemOperand()); 5874 CombineTo(N, ExtLoad); 5875 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5876 N0.getValueType(), ExtLoad); 5877 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5878 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5879 ISD::ANY_EXTEND); 5880 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5881 } 5882 } 5883 5884 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 5885 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 5886 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 5887 if (N0.getOpcode() == ISD::LOAD && 5888 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 5889 N0.hasOneUse()) { 5890 LoadSDNode LN0 = cast<LoadSDNode>(N0); 5891* ISD::LoadExtType ExtType = LN0->getExtensionType(); 5892 EVT MemVT = LN0->getMemoryVT(); 5893 if (!LegalOperations \|\| TLI.isLoadExtLegal(ExtType, VT, MemVT)) { 5894 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), 5895 VT, LN0->getChain(), LN0->getBasePtr(), 5896 MemVT, LN0->getMemOperand()); 5897 CombineTo(N, ExtLoad); 5898 CombineTo(N0.getNode(), 5899 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5900 N0.getValueType(), ExtLoad), 5901 ExtLoad.getValue(1)); 5902 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5903 } 5904 } 5905 5906 if (N0.getOpcode() == ISD::SETCC) { 5907 // For vectors: 5908 // aext(setcc) -> vsetcc 5909 // aext(setcc) -> truncate(vsetcc) 5910 // aext(setcc) -> aext(vsetcc) 5911 // Only do this before legalize for now. 5912 if (VT.isVector() && !LegalOperations) { 5913 EVT N0VT = N0.getOperand(0).getValueType(); 5914 // We know that the # elements of the results is the same as the 5915 // # elements of the compare (and the # elements of the compare result 5916 // for that matter). Check to see that they are the same size. If so, 5917 // we know that the element size of the sext'd result matches the 5918 // element size of the compare operands. 5919 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 5920 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5921 N0.getOperand(1), 5922 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5923 // If the desired elements are smaller or larger than the source 5924 // elements we can use a matching integer vector type and then 5925 // truncate/any extend 5926 else { 5927 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 5928 SDValue VsetCC = 5929 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 5930 N0.getOperand(1), 5931 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5932 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); 5933 } 5934 } 5935 5936 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 5937 SDValue SCC = 5938 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5939 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 5940 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5941 if (SCC.getNode()) 5942 return SCC; 5943 } 5944 5945 return SDValue(); 5946} 5947 5948/// See if the specified operand can be simplified with the knowledge that only 5949/// the bits specified by Mask are used. If so, return the simpler operand, 5950/// otherwise return a null SDValue. 5951SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 5952 switch (V.getOpcode()) { 5953 default: break; 5954 case ISD::Constant: { 5955 const ConstantSDNode CV = cast<ConstantSDNode>(V.getNode()); 5956* assert(CV && "Const value should be ConstSDNode."); 5957 const APInt &CVal = CV->getAPIntValue(); 5958 APInt NewVal = CVal & Mask; 5959 if (NewVal != CVal) 5960 return DAG.getConstant(NewVal, V.getValueType()); 5961 break; 5962 } 5963 case ISD::OR: 5964 case ISD::XOR: 5965 // If the LHS or RHS don't contribute bits to the or, drop them. 5966 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 5967 return V.getOperand(1); 5968 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 5969 return V.getOperand(0); 5970 break; 5971 case ISD::SRL: 5972 // Only look at single-use SRLs. 5973 if (!V.getNode()->hasOneUse()) 5974 break; 5975 if (ConstantSDNode RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 5976* // See if we can recursively simplify the LHS. 5977 unsigned Amt = RHSC->getZExtValue(); 5978 5979 // Watch out for shift count overflow though. 5980 if (Amt >= Mask.getBitWidth()) break; 5981 APInt NewMask = Mask << Amt; 5982 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 5983 if (SimplifyLHS.getNode()) 5984 return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), 5985 SimplifyLHS, V.getOperand(1)); 5986 } 5987 } 5988 return SDValue(); 5989} 5990 5991/// If the result of a wider load is shifted to right of N bits and then 5992/// truncated to a narrower type and where N is a multiple of number of bits of 5993/// the narrower type, transform it to a narrower load from address + N / num of 5994/// bits of new type. If the result is to be extended, also fold the extension 5995/// to form a extending load. 5996SDValue DAGCombiner::ReduceLoadWidth(SDNode N) { 5997* unsigned Opc = N->getOpcode(); 5998 5999 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 6000 SDValue N0 = N->getOperand(0); 6001 EVT VT = N->getValueType(0); 6002 EVT ExtVT = VT; 6003 6004 // This transformation isn't valid for vector loads. 6005 if (VT.isVector()) 6006 return SDValue(); 6007 6008 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 6009 // extended to VT. 6010 if (Opc == ISD::SIGN_EXTEND_INREG) { 6011 ExtType = ISD::SEXTLOAD; 6012 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6013 } else if (Opc == ISD::SRL) { 6014 // Another special-case: SRL is basically zero-extending a narrower value. 6015 ExtType = ISD::ZEXTLOAD; 6016 N0 = SDValue(N, 0); 6017 ConstantSDNode N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6018* if (!N01) return SDValue(); 6019 ExtVT = EVT::getIntegerVT(DAG.getContext(), 6020* VT.getSizeInBits() - N01->getZExtValue()); 6021 } 6022 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) 6023 return SDValue(); 6024 6025 unsigned EVTBits = ExtVT.getSizeInBits(); 6026 6027 // Do not generate loads of non-round integer types since these can 6028 // be expensive (and would be wrong if the type is not byte sized). 6029 if (!ExtVT.isRound()) 6030 return SDValue(); 6031 6032 unsigned ShAmt = 0; 6033 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 6034 if (ConstantSDNode N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6035* ShAmt = N01->getZExtValue(); 6036 // Is the shift amount a multiple of size of VT? 6037 if ((ShAmt & (EVTBits-1)) == 0) { 6038 N0 = N0.getOperand(0); 6039 // Is the load width a multiple of size of VT? 6040 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 6041 return SDValue(); 6042 } 6043 6044 // At this point, we must have a load or else we can't do the transform. 6045 if (!isa<LoadSDNode>(N0)) return SDValue(); 6046 6047 // Because a SRL must be assumed to need to zero-extend the high bits 6048 // (as opposed to anyext the high bits), we can't combine the zextload 6049 // lowering of SRL and an sextload. 6050 if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) 6051 return SDValue(); 6052 6053 // If the shift amount is larger than the input type then we're not 6054 // accessing any of the loaded bytes. If the load was a zextload/extload 6055 // then the result of the shift+trunc is zero/undef (handled elsewhere). 6056 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 6057 return SDValue(); 6058 } 6059 } 6060 6061 // If the load is shifted left (and the result isn't shifted back right), 6062 // we can fold the truncate through the shift. 6063 unsigned ShLeftAmt = 0; 6064 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 6065 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 6066 if (ConstantSDNode N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6067* ShLeftAmt = N01->getZExtValue(); 6068 N0 = N0.getOperand(0); 6069 } 6070 } 6071 6072 // If we haven't found a load, we can't narrow it. Don't transform one with 6073 // multiple uses, this would require adding a new load. 6074 if (!isa<LoadSDNode>(N0) \|\| !N0.hasOneUse()) 6075 return SDValue(); 6076 6077 // Don't change the width of a volatile load. 6078 LoadSDNode LN0 = cast<LoadSDNode>(N0); 6079* if (LN0->isVolatile()) 6080 return SDValue(); 6081 6082 // Verify that we are actually reducing a load width here. 6083 if (LN0->getMemoryVT().getSizeInBits() < EVTBits) 6084 return SDValue(); 6085 6086 // For the transform to be legal, the load must produce only two values 6087 // (the value loaded and the chain). Don't transform a pre-increment 6088 // load, for example, which produces an extra value. Otherwise the 6089 // transformation is not equivalent, and the downstream logic to replace 6090 // uses gets things wrong. 6091 if (LN0->getNumValues() > 2) 6092 return SDValue(); 6093 6094 // If the load that we're shrinking is an extload and we're not just 6095 // discarding the extension we can't simply shrink the load. Bail. 6096 // TODO: It would be possible to merge the extensions in some cases. 6097 if (LN0->getExtensionType() != ISD::NON_EXTLOAD && 6098 LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) 6099 return SDValue(); 6100 6101 if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) 6102 return SDValue(); 6103 6104 EVT PtrType = N0.getOperand(1).getValueType(); 6105 6106 if (PtrType == MVT::Untyped \|\| PtrType.isExtended()) 6107 // It's not possible to generate a constant of extended or untyped type. 6108 return SDValue(); 6109 6110 // For big endian targets, we need to adjust the offset to the pointer to 6111 // load the correct bytes. 6112 if (TLI.isBigEndian()) { 6113 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 6114 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 6115 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 6116 } 6117 6118 uint64_t PtrOff = ShAmt / 8; 6119 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 6120 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), 6121 PtrType, LN0->getBasePtr(), 6122 DAG.getConstant(PtrOff, PtrType)); 6123 AddToWorklist(NewPtr.getNode()); 6124 6125 SDValue Load; 6126 if (ExtType == ISD::NON_EXTLOAD) 6127 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 6128 LN0->getPointerInfo().getWithOffset(PtrOff), 6129 LN0->isVolatile(), LN0->isNonTemporal(), 6130 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6131 else 6132 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, 6133 LN0->getPointerInfo().getWithOffset(PtrOff), 6134 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 6135 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6136 6137 // Replace the old load's chain with the new load's chain. 6138 WorklistRemover DeadNodes(this); 6139* DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 6140 6141 // Shift the result left, if we've swallowed a left shift. 6142 SDValue Result = Load; 6143 if (ShLeftAmt != 0) { 6144 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 6145 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 6146 ShImmTy = VT; 6147 // If the shift amount is as large as the result size (but, presumably, 6148 // no larger than the source) then the useful bits of the result are 6149 // zero; we can't simply return the shortened shift, because the result 6150 // of that operation is undefined. 6151 if (ShLeftAmt >= VT.getSizeInBits()) 6152 Result = DAG.getConstant(0, VT); 6153 else 6154 Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, 6155 Result, DAG.getConstant(ShLeftAmt, ShImmTy)); 6156 } 6157 6158 // Return the new loaded value. 6159 return Result; 6160} 6161 6162SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode N) { 6163* SDValue N0 = N->getOperand(0); 6164 SDValue N1 = N->getOperand(1); 6165 EVT VT = N->getValueType(0); 6166 EVT EVT = cast<VTSDNode>(N1)->getVT(); 6167 unsigned VTBits = VT.getScalarType().getSizeInBits(); 6168 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 6169 6170 // fold (sext_in_reg c1) -> c1 6171 if (isa<ConstantSDNode>(N0) \|\| N0.getOpcode() == ISD::UNDEF) 6172 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 6173 6174 // If the input is already sign extended, just drop the extension. 6175 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 6176 return N0; 6177 6178 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 6179 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 6180 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) 6181 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6182 N0.getOperand(0), N1); 6183 6184 // fold (sext_in_reg (sext x)) -> (sext x) 6185 // fold (sext_in_reg (aext x)) -> (sext x) 6186 // if x is small enough. 6187 if (N0.getOpcode() == ISD::SIGN_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND) { 6188 SDValue N00 = N0.getOperand(0); 6189 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 6190 (!LegalOperations \|\| TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 6191 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 6192 } 6193 6194 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 6195 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 6196 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); 6197 6198 // fold operands of sext_in_reg based on knowledge that the top bits are not 6199 // demanded. 6200 if (SimplifyDemandedBits(SDValue(N, 0))) 6201 return SDValue(N, 0); 6202 6203 // fold (sext_in_reg (load x)) -> (smaller sextload x) 6204 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 6205 SDValue NarrowLoad = ReduceLoadWidth(N); 6206 if (NarrowLoad.getNode()) 6207 return NarrowLoad; 6208 6209 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 6210 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 6211 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 6212 if (N0.getOpcode() == ISD::SRL) { 6213 if (ConstantSDNode ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 6214* if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 6215 // We can turn this into an SRA iff the input to the SRL is already sign 6216 // extended enough. 6217 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 6218 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 6219 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 6220 N0.getOperand(0), N0.getOperand(1)); 6221 } 6222 } 6223 6224 // fold (sext_inreg (extload x)) -> (sextload x) 6225 if (ISD::isEXTLoad(N0.getNode()) && 6226 ISD::isUNINDEXEDLoad(N0.getNode()) && 6227 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6228 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) \|\| 6229 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6230 LoadSDNode LN0 = cast<LoadSDNode>(N0); 6231* SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6232 LN0->getChain(), 6233 LN0->getBasePtr(), EVT, 6234 LN0->getMemOperand()); 6235 CombineTo(N, ExtLoad); 6236 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6237 AddToWorklist(ExtLoad.getNode()); 6238 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6239 } 6240 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 6241 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6242 N0.hasOneUse() && 6243 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6244 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) \|\| 6245 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6246 LoadSDNode LN0 = cast<LoadSDNode>(N0); 6247* SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6248 LN0->getChain(), 6249 LN0->getBasePtr(), EVT, 6250 LN0->getMemOperand()); 6251 CombineTo(N, ExtLoad); 6252 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6253 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6254 } 6255 6256 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 6257 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 6258 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 6259 N0.getOperand(1), false); 6260 if (BSwap.getNode()) 6261 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6262 BSwap, N1); 6263 } 6264 6265 // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs 6266 // into a build_vector. 6267 if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 6268 SmallVector<SDValue, 8> Elts; 6269 unsigned NumElts = N0->getNumOperands(); 6270 unsigned ShAmt = VTBits - EVTBits; 6271 6272 for (unsigned i = 0; i != NumElts; ++i) { 6273 SDValue Op = N0->getOperand(i); 6274 if (Op->getOpcode() == ISD::UNDEF) { 6275 Elts.push_back(Op); 6276 continue; 6277 } 6278 6279 ConstantSDNode CurrentND = cast<ConstantSDNode>(Op); 6280* const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 6281 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 6282 Op.getValueType())); 6283 } 6284 6285 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); 6286 } 6287 6288 return SDValue(); 6289} 6290 6291SDValue DAGCombiner::visitTRUNCATE(SDNode N) { 6292* SDValue N0 = N->getOperand(0); 6293 EVT VT = N->getValueType(0); 6294 bool isLE = TLI.isLittleEndian(); 6295 6296 // noop truncate 6297 if (N0.getValueType() == N->getValueType(0)) 6298 return N0; 6299 // fold (truncate c1) -> c1 6300 if (isa<ConstantSDNode>(N0)) 6301 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 6302 // fold (truncate (truncate x)) -> (truncate x) 6303 if (N0.getOpcode() == ISD::TRUNCATE) 6304 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 6305 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 6306 if (N0.getOpcode() == ISD::ZERO_EXTEND \|\| 6307 N0.getOpcode() == ISD::SIGN_EXTEND \|\| 6308 N0.getOpcode() == ISD::ANY_EXTEND) { 6309 if (N0.getOperand(0).getValueType().bitsLT(VT)) 6310 // if the source is smaller than the dest, we still need an extend 6311 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 6312 N0.getOperand(0)); 6313 if (N0.getOperand(0).getValueType().bitsGT(VT)) 6314 // if the source is larger than the dest, than we just need the truncate 6315 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 6316 // if the source and dest are the same type, we can drop both the extend 6317 // and the truncate. 6318 return N0.getOperand(0); 6319 } 6320 6321 // Fold extract-and-trunc into a narrow extract. For example: 6322 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 6323 // i32 y = TRUNCATE(i64 x) 6324 // -- becomes -- 6325 // v16i8 b = BITCAST (v2i64 val) 6326 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 6327 // 6328 // Note: We only run this optimization after type legalization (which often 6329 // creates this pattern) and before operation legalization after which 6330 // we need to be more careful about the vector instructions that we generate. 6331 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 6332 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { 6333 6334 EVT VecTy = N0.getOperand(0).getValueType(); 6335 EVT ExTy = N0.getValueType(); 6336 EVT TrTy = N->getValueType(0); 6337 6338 unsigned NumElem = VecTy.getVectorNumElements(); 6339 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 6340 6341 EVT NVT = EVT::getVectorVT(DAG.getContext(), TrTy, SizeRatio NumElem); 6342 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 6343 6344 SDValue EltNo = N0->getOperand(1); 6345 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 6346 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 6347 EVT IndexTy = TLI.getVectorIdxTy(); 6348 int Index = isLE ? (EltSizeRatio) : (EltSizeRatio + (SizeRatio-1)); 6349 6350 SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), 6351 NVT, N0.getOperand(0)); 6352 6353 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 6354 SDLoc(N), TrTy, V, 6355 DAG.getConstant(Index, IndexTy)); 6356 } 6357 } 6358 6359 // trunc (select c, a, b) -> select c, (trunc a), (trunc b) 6360 if (N0.getOpcode() == ISD::SELECT) { 6361 EVT SrcVT = N0.getValueType(); 6362 if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::SELECT, SrcVT)) && 6363 TLI.isTruncateFree(SrcVT, VT)) { 6364 SDLoc SL(N0); 6365 SDValue Cond = N0.getOperand(0); 6366 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 6367 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); 6368 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); 6369 } 6370 } 6371 6372 // Fold a series of buildvector, bitcast, and truncate if possible. 6373 // For example fold 6374 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 6375 // (2xi32 (buildvector x, y)). 6376 if (Level == AfterLegalizeVectorOps && VT.isVector() && 6377 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 6378 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 6379 N0.getOperand(0).hasOneUse()) { 6380 6381 SDValue BuildVect = N0.getOperand(0); 6382 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 6383 EVT TruncVecEltTy = VT.getVectorElementType(); 6384 6385 // Check that the element types match. 6386 if (BuildVectEltTy == TruncVecEltTy) { 6387 // Now we only need to compute the offset of the truncated elements. 6388 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 6389 unsigned TruncVecNumElts = VT.getVectorNumElements(); 6390 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 6391 6392 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 6393 "Invalid number of elements"); 6394 6395 SmallVector<SDValue, 8> Opnds; 6396 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 6397 Opnds.push_back(BuildVect.getOperand(i)); 6398 6399 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 6400 } 6401 } 6402 6403 // See if we can simplify the input to this truncate through knowledge that 6404 // only the low bits are being used. 6405 // For example "trunc (or (shl x, 8), y)" // -> trunc y 6406 // Currently we only perform this optimization on scalars because vectors 6407 // may have different active low bits. 6408 if (!VT.isVector()) { 6409 SDValue Shorter = 6410 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 6411 VT.getSizeInBits())); 6412 if (Shorter.getNode()) 6413 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 6414 } 6415 // fold (truncate (load x)) -> (smaller load x) 6416 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 6417 if (!LegalTypes \|\| TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 6418 SDValue Reduced = ReduceLoadWidth(N); 6419 if (Reduced.getNode()) 6420 return Reduced; 6421 // Handle the case where the load remains an extending load even 6422 // after truncation. 6423 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { 6424 LoadSDNode LN0 = cast<LoadSDNode>(N0); 6425* if (!LN0->isVolatile() && 6426 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { 6427 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), 6428 VT, LN0->getChain(), LN0->getBasePtr(), 6429 LN0->getMemoryVT(), 6430 LN0->getMemOperand()); 6431 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); 6432 return NewLoad; 6433 } 6434 } 6435 } 6436 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 6437 // where ... are all 'undef'. 6438 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 6439 SmallVector<EVT, 8> VTs; 6440 SDValue V; 6441 unsigned Idx = 0; 6442 unsigned NumDefs = 0; 6443 6444 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 6445 SDValue X = N0.getOperand(i); 6446 if (X.getOpcode() != ISD::UNDEF) { 6447 V = X; 6448 Idx = i; 6449 NumDefs++; 6450 } 6451 // Stop if more than one members are non-undef. 6452 if (NumDefs > 1) 6453 break; 6454 VTs.push_back(EVT::getVectorVT(DAG.getContext(), 6455* VT.getVectorElementType(), 6456 X.getValueType().getVectorNumElements())); 6457 } 6458 6459 if (NumDefs == 0) 6460 return DAG.getUNDEF(VT); 6461 6462 if (NumDefs == 1) { 6463 assert(V.getNode() && "The single defined operand is empty!"); 6464 SmallVector<SDValue, 8> Opnds; 6465 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 6466 if (i != Idx) { 6467 Opnds.push_back(DAG.getUNDEF(VTs[i])); 6468 continue; 6469 } 6470 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 6471 AddToWorklist(NV.getNode()); 6472 Opnds.push_back(NV); 6473 } 6474 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); 6475 } 6476 } 6477 6478 // Simplify the operands using demanded-bits information. 6479 if (!VT.isVector() && 6480 SimplifyDemandedBits(SDValue(N, 0))) 6481 return SDValue(N, 0); 6482 6483 return SDValue(); 6484} 6485 6486static SDNode getBuildPairElt(SDNode N, unsigned i) { 6487 SDValue Elt = N->getOperand(i); 6488 if (Elt.getOpcode() != ISD::MERGE_VALUES) 6489 return Elt.getNode(); 6490 return Elt.getOperand(Elt.getResNo()).getNode(); 6491} 6492 6493/// build_pair (load, load) -> load 6494/// if load locations are consecutive. 6495SDValue DAGCombiner::CombineConsecutiveLoads(SDNode N, EVT VT) { 6496* assert(N->getOpcode() == ISD::BUILD_PAIR); 6497 6498 LoadSDNode LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 6499* LoadSDNode LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 6500* if (!LD1 \|\| !LD2 \|\| !ISD::isNON_EXTLoad(LD1) \|\| !LD1->hasOneUse() \|\| 6501 LD1->getAddressSpace() != LD2->getAddressSpace()) 6502 return SDValue(); 6503 EVT LD1VT = LD1->getValueType(0); 6504 6505 if (ISD::isNON_EXTLoad(LD2) && 6506 LD2->hasOneUse() && 6507 // If both are volatile this would reduce the number of volatile loads. 6508 // If one is volatile it might be ok, but play conservative and bail out. 6509 !LD1->isVolatile() && 6510 !LD2->isVolatile() && 6511 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 6512 unsigned Align = LD1->getAlignment(); 6513 unsigned NewAlign = TLI.getDataLayout()-> 6514 getABITypeAlignment(VT.getTypeForEVT(DAG.getContext())); 6515* 6516 if (NewAlign <= Align && 6517 (!LegalOperations \|\| TLI.isOperationLegal(ISD::LOAD, VT))) 6518 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), 6519 LD1->getBasePtr(), LD1->getPointerInfo(), 6520 false, false, false, Align); 6521 } 6522 6523 return SDValue(); 6524} 6525 6526SDValue DAGCombiner::visitBITCAST(SDNode N) { 6527* SDValue N0 = N->getOperand(0); 6528 EVT VT = N->getValueType(0); 6529 6530 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 6531 // Only do this before legalize, since afterward the target may be depending 6532 // on the bitconvert. 6533 // First check to see if this is all constant. 6534 if (!LegalTypes && 6535 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 6536 VT.isVector()) { 6537 bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); 6538 6539 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 6540 assert(!DestEltVT.isVector() && 6541 "Element type of vector ValueType must not be vector!"); 6542 if (isSimple) 6543 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 6544 } 6545 6546 // If the input is a constant, let getNode fold it. 6547 if (isa<ConstantSDNode>(N0) \|\| isa<ConstantFPSDNode>(N0)) { 6548 // If we can't allow illegal operations, we need to check that this is just 6549 // a fp -> int or int -> conversion and that the resulting operation will 6550 // be legal. 6551 if (!LegalOperations \|\| 6552 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && 6553 TLI.isOperationLegal(ISD::ConstantFP, VT)) \|\| 6554 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && 6555 TLI.isOperationLegal(ISD::Constant, VT))) 6556 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); 6557 } 6558 6559 // (conv (conv x, t1), t2) -> (conv x, t2) 6560 if (N0.getOpcode() == ISD::BITCAST) 6561 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, 6562 N0.getOperand(0)); 6563 6564 // fold (conv (load x)) -> (load (conv)x) 6565* // If the resultant load doesn't need a higher alignment than the original! 6566 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 6567 // Do not change the width of a volatile load. 6568 !cast<LoadSDNode>(N0)->isVolatile() && 6569 // Do not remove the cast if the types differ in endian layout. 6570 TLI.hasBigEndianPartOrdering(N0.getValueType()) == 6571 TLI.hasBigEndianPartOrdering(VT) && 6572 (!LegalOperations \|\| TLI.isOperationLegal(ISD::LOAD, VT)) && 6573 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { 6574 LoadSDNode LN0 = cast<LoadSDNode>(N0); 6575* unsigned Align = TLI.getDataLayout()-> 6576 getABITypeAlignment(VT.getTypeForEVT(DAG.getContext())); 6577* unsigned OrigAlign = LN0->getAlignment(); 6578 6579 if (Align <= OrigAlign) { 6580 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), 6581 LN0->getBasePtr(), LN0->getPointerInfo(), 6582 LN0->isVolatile(), LN0->isNonTemporal(), 6583 LN0->isInvariant(), OrigAlign, 6584 LN0->getAAInfo()); 6585 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 6586 return Load; 6587 } 6588 } 6589 6590 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 6591 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 6592 // This often reduces constant pool loads. 6593 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) \|\| 6594 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && 6595 N0.getNode()->hasOneUse() && VT.isInteger() && 6596 !VT.isVector() && !N0.getValueType().isVector()) { 6597 SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, 6598 N0.getOperand(0)); 6599 AddToWorklist(NewConv.getNode()); 6600 6601 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 6602 if (N0.getOpcode() == ISD::FNEG) 6603 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 6604 NewConv, DAG.getConstant(SignBit, VT)); 6605 assert(N0.getOpcode() == ISD::FABS); 6606 return DAG.getNode(ISD::AND, SDLoc(N), VT, 6607 NewConv, DAG.getConstant(~SignBit, VT)); 6608 } 6609 6610 // fold (bitconvert (fcopysign cst, x)) -> 6611 // (or (and (bitconvert x), sign), (and cst, (not sign))) 6612 // Note that we don't handle (copysign x, cst) because this can always be 6613 // folded to an fneg or fabs. 6614 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 6615 isa<ConstantFPSDNode>(N0.getOperand(0)) && 6616 VT.isInteger() && !VT.isVector()) { 6617 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 6618 EVT IntXVT = EVT::getIntegerVT(DAG.getContext(), OrigXWidth); 6619* if (isTypeLegal(IntXVT)) { 6620 SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), 6621 IntXVT, N0.getOperand(1)); 6622 AddToWorklist(X.getNode()); 6623 6624 // If X has a different width than the result/lhs, sext it or truncate it. 6625 unsigned VTWidth = VT.getSizeInBits(); 6626 if (OrigXWidth < VTWidth) { 6627 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 6628 AddToWorklist(X.getNode()); 6629 } else if (OrigXWidth > VTWidth) { 6630 // To get the sign bit in the right place, we have to shift it right 6631 // before truncating. 6632 X = DAG.getNode(ISD::SRL, SDLoc(X), 6633 X.getValueType(), X, 6634 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 6635 AddToWorklist(X.getNode()); 6636 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 6637 AddToWorklist(X.getNode()); 6638 } 6639 6640 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 6641 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 6642 X, DAG.getConstant(SignBit, VT)); 6643 AddToWorklist(X.getNode()); 6644 6645 SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), 6646 VT, N0.getOperand(0)); 6647 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 6648 Cst, DAG.getConstant(~SignBit, VT)); 6649 AddToWorklist(Cst.getNode()); 6650 6651 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 6652 } 6653 } 6654 6655 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 6656 if (N0.getOpcode() == ISD::BUILD_PAIR) { 6657 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 6658 if (CombineLD.getNode()) 6659 return CombineLD; 6660 } 6661 6662 return SDValue(); 6663} 6664 6665SDValue DAGCombiner::visitBUILD_PAIR(SDNode N) { 6666* EVT VT = N->getValueType(0); 6667 return CombineConsecutiveLoads(N, VT); 6668} 6669 6670/// We know that BV is a build_vector node with Constant, ConstantFP or Undef 6671/// operands. DstEltVT indicates the destination element value type. 6672SDValue DAGCombiner:: 6673ConstantFoldBITCASTofBUILD_VECTOR(SDNode BV, EVT DstEltVT) { 6674* EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 6675 6676 // If this is already the right type, we're done. 6677 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 6678 6679 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 6680 unsigned DstBitSize = DstEltVT.getSizeInBits(); 6681 6682 // If this is a conversion of N elements of one type to N elements of another 6683 // type, convert each element. This handles FP<->INT cases. 6684 if (SrcBitSize == DstBitSize) { 6685 EVT VT = EVT::getVectorVT(DAG.getContext(), DstEltVT, 6686* BV->getValueType(0).getVectorNumElements()); 6687 6688 // Due to the FP element handling below calling this routine recursively, 6689 // we can end up with a scalar-to-vector node here. 6690 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 6691 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 6692 DAG.getNode(ISD::BITCAST, SDLoc(BV), 6693 DstEltVT, BV->getOperand(0))); 6694 6695 SmallVector<SDValue, 8> Ops; 6696 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 6697 SDValue Op = BV->getOperand(i); 6698 // If the vector element type is not legal, the BUILD_VECTOR operands 6699 // are promoted and implicitly truncated. Make that explicit here. 6700 if (Op.getValueType() != SrcEltVT) 6701 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 6702 Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), 6703 DstEltVT, Op)); 6704 AddToWorklist(Ops.back().getNode()); 6705 } 6706 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6707 } 6708 6709 // Otherwise, we're growing or shrinking the elements. To avoid having to 6710 // handle annoying details of growing/shrinking FP values, we convert them to 6711 // int first. 6712 if (SrcEltVT.isFloatingPoint()) { 6713 // Convert the input float vector to a int vector where the elements are the 6714 // same sizes. 6715 EVT IntVT = EVT::getIntegerVT(DAG.getContext(), SrcEltVT.getSizeInBits()); 6716* BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 6717 SrcEltVT = IntVT; 6718 } 6719 6720 // Now we know the input is an integer vector. If the output is a FP type, 6721 // convert to integer first, then to FP of the right size. 6722 if (DstEltVT.isFloatingPoint()) { 6723 EVT TmpVT = EVT::getIntegerVT(DAG.getContext(), DstEltVT.getSizeInBits()); 6724* SDNode Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 6725* 6726 // Next, convert to FP elements of the same size. 6727 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 6728 } 6729 6730 // Okay, we know the src/dst types are both integers of differing types. 6731 // Handling growing first. 6732 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 6733 if (SrcBitSize < DstBitSize) { 6734 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 6735 6736 SmallVector<SDValue, 8> Ops; 6737 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 6738 i += NumInputsPerOutput) { 6739 bool isLE = TLI.isLittleEndian(); 6740 APInt NewBits = APInt(DstBitSize, 0); 6741 bool EltIsUndef = true; 6742 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 6743 // Shift the previously computed bits over. 6744 NewBits <<= SrcBitSize; 6745 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 6746 if (Op.getOpcode() == ISD::UNDEF) continue; 6747 EltIsUndef = false; 6748 6749 NewBits \|= cast<ConstantSDNode>(Op)->getAPIntValue(). 6750 zextOrTrunc(SrcBitSize).zext(DstBitSize); 6751 } 6752 6753 if (EltIsUndef) 6754 Ops.push_back(DAG.getUNDEF(DstEltVT)); 6755 else 6756 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 6757 } 6758 6759 EVT VT = EVT::getVectorVT(DAG.getContext(), DstEltVT, Ops.size()); 6760* return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6761 } 6762 6763 // Finally, this must be the case where we are shrinking elements: each input 6764 // turns into multiple outputs. 6765 bool isS2V = ISD::isScalarToVector(BV); 6766 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 6767 EVT VT = EVT::getVectorVT(DAG.getContext(), DstEltVT, 6768* NumOutputsPerInputBV->getNumOperands()); 6769* SmallVector<SDValue, 8> Ops; 6770 6771 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 6772 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 6773 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 6774 Ops.push_back(DAG.getUNDEF(DstEltVT)); 6775 continue; 6776 } 6777 6778 APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> 6779 getAPIntValue().zextOrTrunc(SrcBitSize); 6780 6781 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 6782 APInt ThisVal = OpVal.trunc(DstBitSize); 6783 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 6784 if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) 6785 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 6786 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 6787 Ops[0]); 6788 OpVal = OpVal.lshr(DstBitSize); 6789 } 6790 6791 // For big endian targets, swap the order of the pieces of each element. 6792 if (TLI.isBigEndian()) 6793 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 6794 } 6795 6796 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6797} 6798 6799SDValue DAGCombiner::visitFADD(SDNode N) { 6800* SDValue N0 = N->getOperand(0); 6801 SDValue N1 = N->getOperand(1); 6802 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6803* ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6804* EVT VT = N->getValueType(0); 6805 const TargetOptions &Options = DAG.getTarget().Options; 6806 6807 // fold vector ops 6808 if (VT.isVector()) { 6809 SDValue FoldedVOp = SimplifyVBinOp(N); 6810 if (FoldedVOp.getNode()) return FoldedVOp; 6811 } 6812 6813 // fold (fadd c1, c2) -> c1 + c2 6814 if (N0CFP && N1CFP) 6815 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); 6816 6817 // canonicalize constant to RHS 6818 if (N0CFP && !N1CFP) 6819 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); 6820 6821 // fold (fadd A, (fneg B)) -> (fsub A, B) 6822 if ((!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 6823 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) 6824 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, 6825 GetNegatedExpression(N1, DAG, LegalOperations)); 6826 6827 // fold (fadd (fneg A), B) -> (fsub B, A) 6828 if ((!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 6829 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) 6830 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, 6831 GetNegatedExpression(N0, DAG, LegalOperations)); 6832 6833 // If 'unsafe math' is enabled, fold lots of things. 6834 if (Options.UnsafeFPMath) { 6835 // No FP constant should be created after legalization as Instruction 6836 // Selection pass has a hard time dealing with FP constants. 6837 bool AllowNewConst = (Level < AfterLegalizeDAG); 6838 6839 // fold (fadd A, 0) -> A 6840 if (N1CFP && N1CFP->getValueAPF().isZero()) 6841 return N0; 6842 6843 // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 6844 if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 6845 isa<ConstantFPSDNode>(N0.getOperand(1))) 6846 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), 6847 DAG.getNode(ISD::FADD, SDLoc(N), VT, 6848 N0.getOperand(1), N1)); 6849 6850 // If allowed, fold (fadd (fneg x), x) -> 0.0 6851 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) 6852 return DAG.getConstantFP(0.0, VT); 6853 6854 // If allowed, fold (fadd x, (fneg x)) -> 0.0 6855 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) 6856 return DAG.getConstantFP(0.0, VT); 6857 6858 // We can fold chains of FADD's of the same value into multiplications. 6859 // This transform is not safe in general because we are reducing the number 6860 // of rounding steps. 6861 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { 6862 if (N0.getOpcode() == ISD::FMUL) { 6863 ConstantFPSDNode CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 6864* ConstantFPSDNode CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 6865* 6866 // (fadd (fmul x, c), x) -> (fmul x, c+1) 6867 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 6868 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6869 SDValue(CFP01, 0), 6870 DAG.getConstantFP(1.0, VT)); 6871 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); 6872 } 6873 6874 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) 6875 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 6876 N1.getOperand(0) == N1.getOperand(1) && 6877 N0.getOperand(0) == N1.getOperand(0)) { 6878 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6879 SDValue(CFP01, 0), 6880 DAG.getConstantFP(2.0, VT)); 6881 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6882 N0.getOperand(0), NewCFP); 6883 } 6884 } 6885 6886 if (N1.getOpcode() == ISD::FMUL) { 6887 ConstantFPSDNode CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 6888* ConstantFPSDNode CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); 6889* 6890 // (fadd x, (fmul x, c)) -> (fmul x, c+1) 6891 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 6892 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6893 SDValue(CFP11, 0), 6894 DAG.getConstantFP(1.0, VT)); 6895 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); 6896 } 6897 6898 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) 6899 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && 6900 N0.getOperand(0) == N0.getOperand(1) && 6901 N1.getOperand(0) == N0.getOperand(0)) { 6902 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6903 SDValue(CFP11, 0), 6904 DAG.getConstantFP(2.0, VT)); 6905 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); 6906 } 6907 } 6908 6909 if (N0.getOpcode() == ISD::FADD && AllowNewConst) { 6910 ConstantFPSDNode CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 6911* // (fadd (fadd x, x), x) -> (fmul x, 3.0) 6912 if (!CFP && N0.getOperand(0) == N0.getOperand(1) && 6913 (N0.getOperand(0) == N1)) 6914 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6915 N1, DAG.getConstantFP(3.0, VT)); 6916 } 6917 6918 if (N1.getOpcode() == ISD::FADD && AllowNewConst) { 6919 ConstantFPSDNode CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 6920* // (fadd x, (fadd x, x)) -> (fmul x, 3.0) 6921 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 6922 N1.getOperand(0) == N0) 6923 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6924 N0, DAG.getConstantFP(3.0, VT)); 6925 } 6926 6927 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) 6928 if (AllowNewConst && 6929 N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 6930 N0.getOperand(0) == N0.getOperand(1) && 6931 N1.getOperand(0) == N1.getOperand(1) && 6932 N0.getOperand(0) == N1.getOperand(0)) 6933 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6934 N0.getOperand(0), DAG.getConstantFP(4.0, VT)); 6935 } 6936 } // enable-unsafe-fp-math 6937 6938 // FADD -> FMA combines: 6939 if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) && 6940 TLI.isFMAFasterThanFMulAndFAdd(VT) && 6941 (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { 6942 6943 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 6944 if (N0.getOpcode() == ISD::FMUL && 6945 (N0->hasOneUse() \|\| TLI.enableAggressiveFMAFusion(VT))) 6946 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6947 N0.getOperand(0), N0.getOperand(1), N1); 6948 6949 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 6950 // Note: Commutes FADD operands. 6951 if (N1.getOpcode() == ISD::FMUL && 6952 (N1->hasOneUse() \|\| TLI.enableAggressiveFMAFusion(VT))) 6953 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6954 N1.getOperand(0), N1.getOperand(1), N0); 6955 6956 // When FP_EXTEND nodes are free on the target, and there is an opportunity 6957 // to combine into FMA, arrange such nodes accordingly. 6958 if (TLI.isFPExtFree(VT)) { 6959 6960 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 6961 if (N0.getOpcode() == ISD::FP_EXTEND) { 6962 SDValue N00 = N0.getOperand(0); 6963 if (N00.getOpcode() == ISD::FMUL) 6964 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6965 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6966 N00.getOperand(0)), 6967 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6968 N00.getOperand(1)), N1); 6969 } 6970 6971 // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) 6972 // Note: Commutes FADD operands. 6973 if (N1.getOpcode() == ISD::FP_EXTEND) { 6974 SDValue N10 = N1.getOperand(0); 6975 if (N10.getOpcode() == ISD::FMUL) 6976 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6977 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6978 N10.getOperand(0)), 6979 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6980 N10.getOperand(1)), N0); 6981 } 6982 } 6983 6984 // More folding opportunities when target permits. 6985 if (TLI.enableAggressiveFMAFusion(VT)) { 6986 6987 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) 6988 if (N0.getOpcode() == ISD::FMA && 6989 N0.getOperand(2).getOpcode() == ISD::FMUL) 6990 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6991 N0.getOperand(0), N0.getOperand(1), 6992 DAG.getNode(ISD::FMA, SDLoc(N), VT, 6993 N0.getOperand(2).getOperand(0), 6994 N0.getOperand(2).getOperand(1), 6995 N1)); 6996 6997 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) 6998 if (N1->getOpcode() == ISD::FMA && 6999 N1.getOperand(2).getOpcode() == ISD::FMUL) 7000 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7001 N1.getOperand(0), N1.getOperand(1), 7002 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7003 N1.getOperand(2).getOperand(0), 7004 N1.getOperand(2).getOperand(1), 7005 N0)); 7006 } 7007 } 7008 7009 return SDValue(); 7010} 7011 7012SDValue DAGCombiner::visitFSUB(SDNode N) { 7013* SDValue N0 = N->getOperand(0); 7014 SDValue N1 = N->getOperand(1); 7015 ConstantFPSDNode N0CFP = isConstOrConstSplatFP(N0); 7016* ConstantFPSDNode N1CFP = isConstOrConstSplatFP(N1); 7017* EVT VT = N->getValueType(0); 7018 SDLoc dl(N); 7019 const TargetOptions &Options = DAG.getTarget().Options; 7020 7021 // fold vector ops 7022 if (VT.isVector()) { 7023 SDValue FoldedVOp = SimplifyVBinOp(N); 7024 if (FoldedVOp.getNode()) return FoldedVOp; 7025 } 7026 7027 // fold (fsub c1, c2) -> c1-c2 7028 if (N0CFP && N1CFP) 7029 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); 7030 7031 // fold (fsub A, (fneg B)) -> (fadd A, B) 7032 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 7033 return DAG.getNode(ISD::FADD, dl, VT, N0, 7034 GetNegatedExpression(N1, DAG, LegalOperations)); 7035 7036 // If 'unsafe math' is enabled, fold lots of things. 7037 if (Options.UnsafeFPMath) { 7038 // (fsub A, 0) -> A 7039 if (N1CFP && N1CFP->getValueAPF().isZero()) 7040 return N0; 7041 7042 // (fsub 0, B) -> -B 7043 if (N0CFP && N0CFP->getValueAPF().isZero()) { 7044 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 7045 return GetNegatedExpression(N1, DAG, LegalOperations); 7046 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT)) 7047 return DAG.getNode(ISD::FNEG, dl, VT, N1); 7048 } 7049 7050 // (fsub x, x) -> 0.0 7051 if (N0 == N1) 7052 return DAG.getConstantFP(0.0f, VT); 7053 7054 // (fsub x, (fadd x, y)) -> (fneg y) 7055 // (fsub x, (fadd y, x)) -> (fneg y) 7056 if (N1.getOpcode() == ISD::FADD) { 7057 SDValue N10 = N1->getOperand(0); 7058 SDValue N11 = N1->getOperand(1); 7059 7060 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) 7061 return GetNegatedExpression(N11, DAG, LegalOperations); 7062 7063 if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) 7064 return GetNegatedExpression(N10, DAG, LegalOperations); 7065 } 7066 } 7067 7068 // FSUB -> FMA combines: 7069 if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) && 7070 TLI.isFMAFasterThanFMulAndFAdd(VT) && 7071 (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { 7072 7073 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 7074 if (N0.getOpcode() == ISD::FMUL && 7075 (N0->hasOneUse() \|\| TLI.enableAggressiveFMAFusion(VT))) 7076 return DAG.getNode(ISD::FMA, dl, VT, 7077 N0.getOperand(0), N0.getOperand(1), 7078 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7079 7080 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 7081 // Note: Commutes FSUB operands. 7082 if (N1.getOpcode() == ISD::FMUL && 7083 (N1->hasOneUse() \|\| TLI.enableAggressiveFMAFusion(VT))) 7084 return DAG.getNode(ISD::FMA, dl, VT, 7085 DAG.getNode(ISD::FNEG, dl, VT, 7086 N1.getOperand(0)), 7087 N1.getOperand(1), N0); 7088 7089 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 7090 if (N0.getOpcode() == ISD::FNEG && 7091 N0.getOperand(0).getOpcode() == ISD::FMUL && 7092 ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) \|\| 7093 TLI.enableAggressiveFMAFusion(VT))) { 7094 SDValue N00 = N0.getOperand(0).getOperand(0); 7095 SDValue N01 = N0.getOperand(0).getOperand(1); 7096 return DAG.getNode(ISD::FMA, dl, VT, 7097 DAG.getNode(ISD::FNEG, dl, VT, N00), N01, 7098 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7099 } 7100 7101 // When FP_EXTEND nodes are free on the target, and there is an opportunity 7102 // to combine into FMA, arrange such nodes accordingly. 7103 if (TLI.isFPExtFree(VT)) { 7104 7105 // fold (fsub (fpext (fmul x, y)), z) 7106 // -> (fma (fpext x), (fpext y), (fneg z)) 7107 if (N0.getOpcode() == ISD::FP_EXTEND) { 7108 SDValue N00 = N0.getOperand(0); 7109 if (N00.getOpcode() == ISD::FMUL) 7110 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7111 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7112 N00.getOperand(0)), 7113 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7114 N00.getOperand(1)), 7115 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); 7116 } 7117 7118 // fold (fsub x, (fpext (fmul y, z))) 7119 // -> (fma (fneg (fpext y)), (fpext z), x) 7120 // Note: Commutes FSUB operands. 7121 if (N1.getOpcode() == ISD::FP_EXTEND) { 7122 SDValue N10 = N1.getOperand(0); 7123 if (N10.getOpcode() == ISD::FMUL) 7124 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7125 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7126 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7127 VT, N10.getOperand(0))), 7128 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7129 N10.getOperand(1)), 7130 N0); 7131 } 7132 7133 // fold (fsub (fpext (fneg (fmul, x, y))), z) 7134 // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) 7135 if (N0.getOpcode() == ISD::FP_EXTEND) { 7136 SDValue N00 = N0.getOperand(0); 7137 if (N00.getOpcode() == ISD::FNEG) { 7138 SDValue N000 = N00.getOperand(0); 7139 if (N000.getOpcode() == ISD::FMUL) { 7140 return DAG.getNode(ISD::FMA, dl, VT, 7141 DAG.getNode(ISD::FNEG, dl, VT, 7142 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7143 VT, N000.getOperand(0))), 7144 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7145 N000.getOperand(1)), 7146 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7147 } 7148 } 7149 } 7150 7151 // fold (fsub (fneg (fpext (fmul, x, y))), z) 7152 // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) 7153 if (N0.getOpcode() == ISD::FNEG) { 7154 SDValue N00 = N0.getOperand(0); 7155 if (N00.getOpcode() == ISD::FP_EXTEND) { 7156 SDValue N000 = N00.getOperand(0); 7157 if (N000.getOpcode() == ISD::FMUL) { 7158 return DAG.getNode(ISD::FMA, dl, VT, 7159 DAG.getNode(ISD::FNEG, dl, VT, 7160 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7161 VT, N000.getOperand(0))), 7162 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7163 N000.getOperand(1)), 7164 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7165 } 7166 } 7167 } 7168 } 7169 7170 // More folding opportunities when target permits. 7171 if (TLI.enableAggressiveFMAFusion(VT)) { 7172 7173 // fold (fsub (fma x, y, (fmul u, v)), z) 7174 // -> (fma x, y (fma u, v, (fneg z))) 7175 if (N0.getOpcode() == ISD::FMA && 7176 N0.getOperand(2).getOpcode() == ISD::FMUL) 7177 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7178 N0.getOperand(0), N0.getOperand(1), 7179 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7180 N0.getOperand(2).getOperand(0), 7181 N0.getOperand(2).getOperand(1), 7182 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7183 N1))); 7184 7185 // fold (fsub x, (fma y, z, (fmul u, v))) 7186 // -> (fma (fneg y), z, (fma (fneg u), v, x)) 7187 if (N1.getOpcode() == ISD::FMA && 7188 N1.getOperand(2).getOpcode() == ISD::FMUL) { 7189 SDValue N20 = N1.getOperand(2).getOperand(0); 7190 SDValue N21 = N1.getOperand(2).getOperand(1); 7191 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7192 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7193 N1.getOperand(0)), 7194 N1.getOperand(1), 7195 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7196 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7197 N20), 7198 N21, N0)); 7199 } 7200 } 7201 } 7202 7203 return SDValue(); 7204} 7205 7206SDValue DAGCombiner::visitFMUL(SDNode N) { 7207* SDValue N0 = N->getOperand(0); 7208 SDValue N1 = N->getOperand(1); 7209 ConstantFPSDNode N0CFP = isConstOrConstSplatFP(N0); 7210* ConstantFPSDNode N1CFP = isConstOrConstSplatFP(N1); 7211* EVT VT = N->getValueType(0); 7212 const TargetOptions &Options = DAG.getTarget().Options; 7213 7214 // fold vector ops 7215 if (VT.isVector()) { 7216 // This just handles C1 * C2 for vectors. Other vector folds are below. 7217 SDValue FoldedVOp = SimplifyVBinOp(N); 7218 if (FoldedVOp.getNode()) 7219 return FoldedVOp; 7220 // Canonicalize vector constant to RHS. 7221 if (N0.getOpcode() == ISD::BUILD_VECTOR && 7222 N1.getOpcode() != ISD::BUILD_VECTOR) 7223 if (auto BV0 = dyn_cast<BuildVectorSDNode>(N0)) 7224* if (BV0->isConstant()) 7225 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 7226 } 7227 7228 // fold (fmul c1, c2) -> c1c2 7229* if (N0CFP && N1CFP) 7230 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); 7231 7232 // canonicalize constant to RHS 7233 if (N0CFP && !N1CFP) 7234 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); 7235 7236 // fold (fmul A, 1.0) -> A 7237 if (N1CFP && N1CFP->isExactlyValue(1.0)) 7238 return N0; 7239 7240 if (Options.UnsafeFPMath) { 7241 // fold (fmul A, 0) -> 0 7242 if (N1CFP && N1CFP->getValueAPF().isZero()) 7243 return N1; 7244 7245 // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 7246 if (N0.getOpcode() == ISD::FMUL) { 7247 // Fold scalars or any vector constants (not just splats). 7248 // This fold is done in general by InstCombine, but extra fmul insts 7249 // may have been generated during lowering. 7250 SDValue N01 = N0.getOperand(1); 7251 auto BV1 = dyn_cast<BuildVectorSDNode>(N1); 7252* auto BV01 = dyn_cast<BuildVectorSDNode>(N01); 7253* if ((N1CFP && isConstOrConstSplatFP(N01)) \|\| 7254 (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { 7255 SDLoc SL(N); 7256 SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); 7257 return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); 7258 } 7259 } 7260 7261 // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) 7262 // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs 7263 // during an early run of DAGCombiner can prevent folding with fmuls 7264 // inserted during lowering. 7265 if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { 7266 SDLoc SL(N); 7267 const SDValue Two = DAG.getConstantFP(2.0, VT); 7268 SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); 7269 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); 7270 } 7271 } 7272 7273 // fold (fmul X, 2.0) -> (fadd X, X) 7274 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 7275 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); 7276 7277 // fold (fmul X, -1.0) -> (fneg X) 7278 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 7279 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT)) 7280 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 7281 7282 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 7283 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 7284 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 7285 // Both can be negated for free, check to see if at least one is cheaper 7286 // negated. 7287 if (LHSNeg == 2 \|\| RHSNeg == 2) 7288 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 7289 GetNegatedExpression(N0, DAG, LegalOperations), 7290 GetNegatedExpression(N1, DAG, LegalOperations)); 7291 } 7292 } 7293 7294 return SDValue(); 7295} 7296 7297SDValue DAGCombiner::visitFMA(SDNode N) { 7298* SDValue N0 = N->getOperand(0); 7299 SDValue N1 = N->getOperand(1); 7300 SDValue N2 = N->getOperand(2); 7301 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7302* ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7303* EVT VT = N->getValueType(0); 7304 SDLoc dl(N); 7305 const TargetOptions &Options = DAG.getTarget().Options; 7306 7307 // Constant fold FMA. 7308 if (isa<ConstantFPSDNode>(N0) && 7309 isa<ConstantFPSDNode>(N1) && 7310 isa<ConstantFPSDNode>(N2)) { 7311 return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); 7312 } 7313 7314 if (Options.UnsafeFPMath) { 7315 if (N0CFP && N0CFP->isZero()) 7316 return N2; 7317 if (N1CFP && N1CFP->isZero()) 7318 return N2; 7319 } 7320 if (N0CFP && N0CFP->isExactlyValue(1.0)) 7321 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 7322 if (N1CFP && N1CFP->isExactlyValue(1.0)) 7323 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 7324 7325 // Canonicalize (fma c, x, y) -> (fma x, c, y) 7326 if (N0CFP && !N1CFP) 7327 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 7328 7329 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 7330 if (Options.UnsafeFPMath && N1CFP && 7331 N2.getOpcode() == ISD::FMUL && 7332 N0 == N2.getOperand(0) && 7333 N2.getOperand(1).getOpcode() == ISD::ConstantFP) { 7334 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7335 DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); 7336 } 7337 7338 7339 // (fma (fmul x, c1), c2, y) -> (fma x, c1c2, y) 7340* if (Options.UnsafeFPMath && 7341 N0.getOpcode() == ISD::FMUL && N1CFP && 7342 N0.getOperand(1).getOpcode() == ISD::ConstantFP) { 7343 return DAG.getNode(ISD::FMA, dl, VT, 7344 N0.getOperand(0), 7345 DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), 7346 N2); 7347 } 7348 7349 // (fma x, 1, y) -> (fadd x, y) 7350 // (fma x, -1, y) -> (fadd (fneg x), y) 7351 if (N1CFP) { 7352 if (N1CFP->isExactlyValue(1.0)) 7353 return DAG.getNode(ISD::FADD, dl, VT, N0, N2); 7354 7355 if (N1CFP->isExactlyValue(-1.0) && 7356 (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))) { 7357 SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); 7358 AddToWorklist(RHSNeg.getNode()); 7359 return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); 7360 } 7361 } 7362 7363 // (fma x, c, x) -> (fmul x, (c+1)) 7364 if (Options.UnsafeFPMath && N1CFP && N0 == N2) 7365 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7366 DAG.getNode(ISD::FADD, dl, VT, 7367 N1, DAG.getConstantFP(1.0, VT))); 7368 7369 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 7370 if (Options.UnsafeFPMath && N1CFP && 7371 N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) 7372 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7373 DAG.getNode(ISD::FADD, dl, VT, 7374 N1, DAG.getConstantFP(-1.0, VT))); 7375 7376 7377 return SDValue(); 7378} 7379 7380SDValue DAGCombiner::visitFDIV(SDNode N) { 7381* SDValue N0 = N->getOperand(0); 7382 SDValue N1 = N->getOperand(1); 7383 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7384* ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7385* EVT VT = N->getValueType(0); 7386 SDLoc DL(N); 7387 const TargetOptions &Options = DAG.getTarget().Options; 7388 7389 // fold vector ops 7390 if (VT.isVector()) { 7391 SDValue FoldedVOp = SimplifyVBinOp(N); 7392 if (FoldedVOp.getNode()) return FoldedVOp; 7393 } 7394 7395 // fold (fdiv c1, c2) -> c1/c2 7396 if (N0CFP && N1CFP) 7397 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); 7398 7399 if (Options.UnsafeFPMath) { 7400 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 7401 if (N1CFP) { 7402 // Compute the reciprocal 1.0 / c2. 7403 APFloat N1APF = N1CFP->getValueAPF(); 7404 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 7405 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 7406 // Only do the transform if the reciprocal is a legal fp immediate that 7407 // isn't too nasty (eg NaN, denormal, ...). 7408 if ((st == APFloat::opOK \|\| st == APFloat::opInexact) && // Not too nasty 7409 (!LegalOperations \|\| 7410 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 7411 // backend)... we should handle this gracefully after Legalize. 7412 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) \|\| 7413 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) \|\| 7414 TLI.isFPImmLegal(Recip, VT))) 7415 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, 7416 DAG.getConstantFP(Recip, VT)); 7417 } 7418 7419 // If this FDIV is part of a reciprocal square root, it may be folded 7420 // into a target-specific square root estimate instruction. 7421 if (N1.getOpcode() == ISD::FSQRT) { 7422 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { 7423 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7424 } 7425 } else if (N1.getOpcode() == ISD::FP_EXTEND && 7426 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7427 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { 7428 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); 7429 AddToWorklist(RV.getNode()); 7430 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7431 } 7432 } else if (N1.getOpcode() == ISD::FP_ROUND && 7433 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7434 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { 7435 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); 7436 AddToWorklist(RV.getNode()); 7437 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7438 } 7439 } else if (N1.getOpcode() == ISD::FMUL) { 7440 // Look through an FMUL. Even though this won't remove the FDIV directly, 7441 // it's still worthwhile to get rid of the FSQRT if possible. 7442 SDValue SqrtOp; 7443 SDValue OtherOp; 7444 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7445 SqrtOp = N1.getOperand(0); 7446 OtherOp = N1.getOperand(1); 7447 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { 7448 SqrtOp = N1.getOperand(1); 7449 OtherOp = N1.getOperand(0); 7450 } 7451 if (SqrtOp.getNode()) { 7452 // We found a FSQRT, so try to make this fold: 7453 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) 7454 if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { 7455 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); 7456 AddToWorklist(RV.getNode()); 7457 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7458 } 7459 } 7460 } 7461 7462 // Fold into a reciprocal estimate and multiply instead of a real divide. 7463 if (SDValue RV = BuildReciprocalEstimate(N1)) { 7464 AddToWorklist(RV.getNode()); 7465 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7466 } 7467 } 7468 7469 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 7470 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 7471 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 7472 // Both can be negated for free, check to see if at least one is cheaper 7473 // negated. 7474 if (LHSNeg == 2 \|\| RHSNeg == 2) 7475 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, 7476 GetNegatedExpression(N0, DAG, LegalOperations), 7477 GetNegatedExpression(N1, DAG, LegalOperations)); 7478 } 7479 } 7480 7481 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 7482 // reciprocal. 7483 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) 7484 // Notice that this is not always beneficial. One reason is different target 7485 // may have different costs for FDIV and FMUL, so sometimes the cost of two 7486 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason 7487 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". 7488 if (Options.UnsafeFPMath) { 7489 // Skip if current node is a reciprocal. 7490 if (N0CFP && N0CFP->isExactlyValue(1.0)) 7491 return SDValue(); 7492 7493 SmallVector<SDNode , 4> Users; 7494* // Find all FDIV users of the same divisor. 7495 for (SDNode::use_iterator UI = N1.getNode()->use_begin(), 7496 UE = N1.getNode()->use_end(); 7497 UI != UE; ++UI) { 7498 SDNode User = UI.getUse().getUser(); 7499* if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) 7500 Users.push_back(User); 7501 } 7502 7503 if (TLI.combineRepeatedFPDivisors(Users.size())) { 7504 SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 7505 SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); 7506 7507 // Dividend / Divisor -> Dividend * Reciprocal 7508 for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { 7509 if ((I)->getOperand(0) != FPOne) { 7510* SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(I), VT, 7511* (I)->getOperand(0), Reciprocal); 7512* DAG.ReplaceAllUsesWith(I, NewNode.getNode()); 7513* } 7514 } 7515 return SDValue(); 7516 } 7517 } 7518 7519 return SDValue(); 7520} 7521 7522SDValue DAGCombiner::visitFREM(SDNode N) { 7523* SDValue N0 = N->getOperand(0); 7524 SDValue N1 = N->getOperand(1); 7525 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7526* ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7527* EVT VT = N->getValueType(0); 7528 7529 // fold (frem c1, c2) -> fmod(c1,c2) 7530 if (N0CFP && N1CFP) 7531 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); 7532 7533 return SDValue(); 7534} 7535 7536SDValue DAGCombiner::visitFSQRT(SDNode N) { 7537* if (DAG.getTarget().Options.UnsafeFPMath && 7538 !TLI.isFsqrtCheap()) { 7539 // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) 7540 if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { 7541 EVT VT = RV.getValueType(); 7542 RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); 7543 AddToWorklist(RV.getNode()); 7544 7545 // Unfortunately, RV is now NaN if the input was exactly 0. 7546 // Select out this case and force the answer to 0. 7547 SDValue Zero = DAG.getConstantFP(0.0, VT); 7548 SDValue ZeroCmp = 7549 DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(DAG.getContext(), VT), 7550* N->getOperand(0), Zero, ISD::SETEQ); 7551 AddToWorklist(ZeroCmp.getNode()); 7552 AddToWorklist(RV.getNode()); 7553 7554 RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, 7555 SDLoc(N), VT, ZeroCmp, Zero, RV); 7556 return RV; 7557 } 7558 } 7559 return SDValue(); 7560} 7561 7562SDValue DAGCombiner::visitFCOPYSIGN(SDNode N) { 7563* SDValue N0 = N->getOperand(0); 7564 SDValue N1 = N->getOperand(1); 7565 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7566* ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7567* EVT VT = N->getValueType(0); 7568 7569 if (N0CFP && N1CFP) // Constant fold 7570 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); 7571 7572 if (N1CFP) { 7573 const APFloat& V = N1CFP->getValueAPF(); 7574 // copysign(x, c1) -> fabs(x) iff ispos(c1) 7575 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 7576 if (!V.isNegative()) { 7577 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FABS, VT)) 7578 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7579 } else { 7580 if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT)) 7581 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7582 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); 7583 } 7584 } 7585 7586 // copysign(fabs(x), y) -> copysign(x, y) 7587 // copysign(fneg(x), y) -> copysign(x, y) 7588 // copysign(copysign(x,z), y) -> copysign(x, y) 7589 if (N0.getOpcode() == ISD::FABS \|\| N0.getOpcode() == ISD::FNEG \|\| 7590 N0.getOpcode() == ISD::FCOPYSIGN) 7591 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7592 N0.getOperand(0), N1); 7593 7594 // copysign(x, abs(y)) -> abs(x) 7595 if (N1.getOpcode() == ISD::FABS) 7596 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7597 7598 // copysign(x, copysign(y,z)) -> copysign(x, z) 7599 if (N1.getOpcode() == ISD::FCOPYSIGN) 7600 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7601 N0, N1.getOperand(1)); 7602 7603 // copysign(x, fp_extend(y)) -> copysign(x, y) 7604 // copysign(x, fp_round(y)) -> copysign(x, y) 7605 if (N1.getOpcode() == ISD::FP_EXTEND \|\| N1.getOpcode() == ISD::FP_ROUND) 7606 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7607 N0, N1.getOperand(0)); 7608 7609 return SDValue(); 7610} 7611 7612SDValue DAGCombiner::visitSINT_TO_FP(SDNode N) { 7613* SDValue N0 = N->getOperand(0); 7614 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 7615* EVT VT = N->getValueType(0); 7616 EVT OpVT = N0.getValueType(); 7617 7618 // fold (sint_to_fp c1) -> c1fp 7619 if (N0C && 7620 // ...but only if the target supports immediate floating-point values 7621 (!LegalOperations \|\| 7622 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 7623 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 7624 7625 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 7626 // but UINT_TO_FP is legal on this target, try to convert. 7627 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 7628 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 7629 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 7630 if (DAG.SignBitIsZero(N0)) 7631 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 7632 } 7633 7634 // The next optimizations are desirable only if SELECT_CC can be lowered. 7635 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) \|\| !LegalOperations) { 7636 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 7637 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 7638 !VT.isVector() && 7639 (!LegalOperations \|\| 7640 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7641 SDValue Ops[] = 7642 { N0.getOperand(0), N0.getOperand(1), 7643 DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), 7644 N0.getOperand(2) }; 7645 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7646 } 7647 7648 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 7649 // (select_cc x, y, 1.0, 0.0,, cc) 7650 if (N0.getOpcode() == ISD::ZERO_EXTEND && 7651 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 7652 (!LegalOperations \|\| 7653 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7654 SDValue Ops[] = 7655 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 7656 DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), 7657 N0.getOperand(0).getOperand(2) }; 7658 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7659 } 7660 } 7661 7662 return SDValue(); 7663} 7664 7665SDValue DAGCombiner::visitUINT_TO_FP(SDNode N) { 7666* SDValue N0 = N->getOperand(0); 7667 ConstantSDNode N0C = dyn_cast<ConstantSDNode>(N0); 7668* EVT VT = N->getValueType(0); 7669 EVT OpVT = N0.getValueType(); 7670 7671 // fold (uint_to_fp c1) -> c1fp 7672 if (N0C && 7673 // ...but only if the target supports immediate floating-point values 7674 (!LegalOperations \|\| 7675 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 7676 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 7677 7678 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 7679 // but SINT_TO_FP is legal on this target, try to convert. 7680 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 7681 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 7682 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 7683 if (DAG.SignBitIsZero(N0)) 7684 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 7685 } 7686 7687 // The next optimizations are desirable only if SELECT_CC can be lowered. 7688 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) \|\| !LegalOperations) { 7689 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 7690 7691 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 7692 (!LegalOperations \|\| 7693 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7694 SDValue Ops[] = 7695 { N0.getOperand(0), N0.getOperand(1), 7696 DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), 7697 N0.getOperand(2) }; 7698 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7699 } 7700 } 7701 7702 return SDValue(); 7703} 7704 7705SDValue DAGCombiner::visitFP_TO_SINT(SDNode N) { 7706* SDValue N0 = N->getOperand(0); 7707 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7708* EVT VT = N->getValueType(0); 7709 7710 // fold (fp_to_sint c1fp) -> c1 7711 if (N0CFP) 7712 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); 7713 7714 return SDValue(); 7715} 7716 7717SDValue DAGCombiner::visitFP_TO_UINT(SDNode N) { 7718* SDValue N0 = N->getOperand(0); 7719 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7720* EVT VT = N->getValueType(0); 7721 7722 // fold (fp_to_uint c1fp) -> c1 7723 if (N0CFP) 7724 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); 7725 7726 return SDValue(); 7727} 7728 7729SDValue DAGCombiner::visitFP_ROUND(SDNode N) { 7730* SDValue N0 = N->getOperand(0); 7731 SDValue N1 = N->getOperand(1); 7732 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7733* EVT VT = N->getValueType(0); 7734 7735 // fold (fp_round c1fp) -> c1fp 7736 if (N0CFP) 7737 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); 7738 7739 // fold (fp_round (fp_extend x)) -> x 7740 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 7741 return N0.getOperand(0); 7742 7743 // fold (fp_round (fp_round x)) -> (fp_round x) 7744 if (N0.getOpcode() == ISD::FP_ROUND) { 7745 // This is a value preserving truncation if both round's are. 7746 bool IsTrunc = N->getConstantOperandVal(1) == 1 && 7747 N0.getNode()->getConstantOperandVal(1) == 1; 7748 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), 7749 DAG.getIntPtrConstant(IsTrunc)); 7750 } 7751 7752 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 7753 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 7754 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, 7755 N0.getOperand(0), N1); 7756 AddToWorklist(Tmp.getNode()); 7757 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7758 Tmp, N0.getOperand(1)); 7759 } 7760 7761 return SDValue(); 7762} 7763 7764SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode N) { 7765* SDValue N0 = N->getOperand(0); 7766 EVT VT = N->getValueType(0); 7767 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 7768 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7769* 7770 // fold (fp_round_inreg c1fp) -> c1fp 7771 if (N0CFP && isTypeLegal(EVT)) { 7772 SDValue Round = DAG.getConstantFP(N0CFP->getConstantFPValue(), EVT); 7773* return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); 7774 } 7775 7776 return SDValue(); 7777} 7778 7779SDValue DAGCombiner::visitFP_EXTEND(SDNode N) { 7780* SDValue N0 = N->getOperand(0); 7781 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7782* EVT VT = N->getValueType(0); 7783 7784 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 7785 if (N->hasOneUse() && 7786 N->use_begin()->getOpcode() == ISD::FP_ROUND) 7787 return SDValue(); 7788 7789 // fold (fp_extend c1fp) -> c1fp 7790 if (N0CFP) 7791 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); 7792 7793 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 7794 // value of X. 7795 if (N0.getOpcode() == ISD::FP_ROUND 7796 && N0.getNode()->getConstantOperandVal(1) == 1) { 7797 SDValue In = N0.getOperand(0); 7798 if (In.getValueType() == VT) return In; 7799 if (VT.bitsLT(In.getValueType())) 7800 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, 7801 In, N0.getOperand(1)); 7802 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); 7803 } 7804 7805 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 7806 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 7807 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 7808 LoadSDNode LN0 = cast<LoadSDNode>(N0); 7809* SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 7810 LN0->getChain(), 7811 LN0->getBasePtr(), N0.getValueType(), 7812 LN0->getMemOperand()); 7813 CombineTo(N, ExtLoad); 7814 CombineTo(N0.getNode(), 7815 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), 7816 N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), 7817 ExtLoad.getValue(1)); 7818 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7819 } 7820 7821 return SDValue(); 7822} 7823 7824SDValue DAGCombiner::visitFCEIL(SDNode N) { 7825* SDValue N0 = N->getOperand(0); 7826 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7827* EVT VT = N->getValueType(0); 7828 7829 // fold (fceil c1) -> fceil(c1) 7830 if (N0CFP) 7831 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); 7832 7833 return SDValue(); 7834} 7835 7836SDValue DAGCombiner::visitFTRUNC(SDNode N) { 7837* SDValue N0 = N->getOperand(0); 7838 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7839* EVT VT = N->getValueType(0); 7840 7841 // fold (ftrunc c1) -> ftrunc(c1) 7842 if (N0CFP) 7843 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); 7844 7845 return SDValue(); 7846} 7847 7848SDValue DAGCombiner::visitFFLOOR(SDNode N) { 7849* SDValue N0 = N->getOperand(0); 7850 ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7851* EVT VT = N->getValueType(0); 7852 7853 // fold (ffloor c1) -> ffloor(c1) 7854 if (N0CFP) 7855 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); 7856 7857 return SDValue(); 7858} 7859 7860// FIXME: FNEG and FABS have a lot in common; refactor. 7861SDValue DAGCombiner::visitFNEG(SDNode N) { 7862* SDValue N0 = N->getOperand(0); 7863 EVT VT = N->getValueType(0); 7864 7865 if (VT.isVector()) { 7866 SDValue FoldedVOp = SimplifyVUnaryOp(N); 7867 if (FoldedVOp.getNode()) return FoldedVOp; 7868 } 7869 7870 // Constant fold FNEG. 7871 if (isa<ConstantFPSDNode>(N0)) 7872 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); 7873 7874 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 7875 &DAG.getTarget().Options)) 7876 return GetNegatedExpression(N0, DAG, LegalOperations); 7877 7878 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading 7879 // constant pool values. 7880 if (!TLI.isFNegFree(VT) && 7881 N0.getOpcode() == ISD::BITCAST && 7882 N0.getNode()->hasOneUse()) { 7883 SDValue Int = N0.getOperand(0); 7884 EVT IntVT = Int.getValueType(); 7885 if (IntVT.isInteger() && !IntVT.isVector()) { 7886 APInt SignMask; 7887 if (N0.getValueType().isVector()) { 7888 // For a vector, get a mask such as 0x80... per scalar element 7889 // and splat it. 7890 SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 7891 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 7892 } else { 7893 // For a scalar, just generate 0x80... 7894 SignMask = APInt::getSignBit(IntVT.getSizeInBits()); 7895 } 7896 Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, 7897 DAG.getConstant(SignMask, IntVT)); 7898 AddToWorklist(Int.getNode()); 7899 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); 7900 } 7901 } 7902 7903 // (fneg (fmul c, x)) -> (fmul -c, x) 7904 if (N0.getOpcode() == ISD::FMUL) { 7905 ConstantFPSDNode CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 7906* if (CFP1) { 7907 APFloat CVal = CFP1->getValueAPF(); 7908 CVal.changeSign(); 7909 if (Level >= AfterLegalizeDAG && 7910 (TLI.isFPImmLegal(CVal, N->getValueType(0)) \|\| 7911 TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) 7912 return DAG.getNode( 7913 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 7914 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); 7915 } 7916 } 7917 7918 return SDValue(); 7919} 7920 7921SDValue DAGCombiner::visitFMINNUM(SDNode N) { 7922* SDValue N0 = N->getOperand(0); 7923 SDValue N1 = N->getOperand(1); 7924 const ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7925* const ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7926* 7927 if (N0CFP && N1CFP) { 7928 const APFloat &C0 = N0CFP->getValueAPF(); 7929 const APFloat &C1 = N1CFP->getValueAPF(); 7930 return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); 7931 } 7932 7933 if (N0CFP) { 7934 EVT VT = N->getValueType(0); 7935 // Canonicalize to constant on RHS. 7936 return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); 7937 } 7938 7939 return SDValue(); 7940} 7941 7942SDValue DAGCombiner::visitFMAXNUM(SDNode N) { 7943* SDValue N0 = N->getOperand(0); 7944 SDValue N1 = N->getOperand(1); 7945 const ConstantFPSDNode N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7946* const ConstantFPSDNode N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7947* 7948 if (N0CFP && N1CFP) { 7949 const APFloat &C0 = N0CFP->getValueAPF(); 7950 const APFloat &C1 = N1CFP->getValueAPF(); 7951 return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); 7952 } 7953 7954 if (N0CFP) { 7955 EVT VT = N->getValueType(0); 7956 // Canonicalize to constant on RHS. 7957 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); 7958 } 7959 7960 return SDValue(); 7961} 7962 7963SDValue DAGCombiner::visitFABS(SDNode N) { 7964* SDValue N0 = N->getOperand(0); 7965 EVT VT = N->getValueType(0); 7966 7967 if (VT.isVector()) { 7968 SDValue FoldedVOp = SimplifyVUnaryOp(N); 7969 if (FoldedVOp.getNode()) return FoldedVOp; 7970 } 7971 7972 // fold (fabs c1) -> fabs(c1) 7973 if (isa<ConstantFPSDNode>(N0)) 7974 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7975 7976 // fold (fabs (fabs x)) -> (fabs x) 7977 if (N0.getOpcode() == ISD::FABS) 7978 return N->getOperand(0); 7979 7980 // fold (fabs (fneg x)) -> (fabs x) 7981 // fold (fabs (fcopysign x, y)) -> (fabs x) 7982 if (N0.getOpcode() == ISD::FNEG \|\| N0.getOpcode() == ISD::FCOPYSIGN) 7983 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); 7984 7985 // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading 7986 // constant pool values. 7987 if (!TLI.isFAbsFree(VT) && 7988 N0.getOpcode() == ISD::BITCAST && 7989 N0.getNode()->hasOneUse()) { 7990 SDValue Int = N0.getOperand(0); 7991 EVT IntVT = Int.getValueType(); 7992 if (IntVT.isInteger() && !IntVT.isVector()) { 7993 APInt SignMask; 7994 if (N0.getValueType().isVector()) { 7995 // For a vector, get a mask such as 0x7f... per scalar element 7996 // and splat it. 7997 SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 7998 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 7999 } else { 8000 // For a scalar, just generate 0x7f... 8001 SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); 8002 } 8003 Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, 8004 DAG.getConstant(SignMask, IntVT)); 8005 AddToWorklist(Int.getNode()); 8006 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); 8007 } 8008 } 8009 8010 return SDValue(); 8011} 8012 8013SDValue DAGCombiner::visitBRCOND(SDNode N) { 8014* SDValue Chain = N->getOperand(0); 8015 SDValue N1 = N->getOperand(1); 8016 SDValue N2 = N->getOperand(2); 8017 8018 // If N is a constant we could fold this into a fallthrough or unconditional 8019 // branch. However that doesn't happen very often in normal code, because 8020 // Instcombine/SimplifyCFG should have handled the available opportunities. 8021 // If we did this folding here, it would be necessary to update the 8022 // MachineBasicBlock CFG, which is awkward. 8023 8024 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 8025 // on the target. 8026 if (N1.getOpcode() == ISD::SETCC && 8027 TLI.isOperationLegalOrCustom(ISD::BR_CC, 8028 N1.getOperand(0).getValueType())) { 8029 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 8030 Chain, N1.getOperand(2), 8031 N1.getOperand(0), N1.getOperand(1), N2); 8032 } 8033 8034 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) \|\| 8035 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 8036 (N1.getOperand(0).hasOneUse() && 8037 N1.getOperand(0).getOpcode() == ISD::SRL))) { 8038 SDNode Trunc = nullptr; 8039* if (N1.getOpcode() == ISD::TRUNCATE) { 8040 // Look pass the truncate. 8041 Trunc = N1.getNode(); 8042 N1 = N1.getOperand(0); 8043 } 8044 8045 // Match this pattern so that we can generate simpler code: 8046 // 8047 // %a = ... 8048 // %b = and i32 %a, 2 8049 // %c = srl i32 %b, 1 8050 // brcond i32 %c ... 8051 // 8052 // into 8053 // 8054 // %a = ... 8055 // %b = and i32 %a, 2 8056 // %c = setcc eq %b, 0 8057 // brcond %c ... 8058 // 8059 // This applies only when the AND constant value has one bit set and the 8060 // SRL constant is equal to the log2 of the AND constant. The back-end is 8061 // smart enough to convert the result into a TEST/JMP sequence. 8062 SDValue Op0 = N1.getOperand(0); 8063 SDValue Op1 = N1.getOperand(1); 8064 8065 if (Op0.getOpcode() == ISD::AND && 8066 Op1.getOpcode() == ISD::Constant) { 8067 SDValue AndOp1 = Op0.getOperand(1); 8068 8069 if (AndOp1.getOpcode() == ISD::Constant) { 8070 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 8071 8072 if (AndConst.isPowerOf2() && 8073 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 8074 SDValue SetCC = 8075 DAG.getSetCC(SDLoc(N), 8076 getSetCCResultType(Op0.getValueType()), 8077 Op0, DAG.getConstant(0, Op0.getValueType()), 8078 ISD::SETNE); 8079 8080 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), 8081 MVT::Other, Chain, SetCC, N2); 8082 // Don't add the new BRCond into the worklist or else SimplifySelectCC 8083 // will convert it back to (X & C1) >> C2. 8084 CombineTo(N, NewBRCond, false); 8085 // Truncate is dead. 8086 if (Trunc) 8087 deleteAndRecombine(Trunc); 8088 // Replace the uses of SRL with SETCC 8089 WorklistRemover DeadNodes(this); 8090* DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 8091 deleteAndRecombine(N1.getNode()); 8092 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8093 } 8094 } 8095 } 8096 8097 if (Trunc) 8098 // Restore N1 if the above transformation doesn't match. 8099 N1 = N->getOperand(1); 8100 } 8101 8102 // Transform br(xor(x, y)) -> br(x != y) 8103 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 8104 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 8105 SDNode TheXor = N1.getNode(); 8106* SDValue Op0 = TheXor->getOperand(0); 8107 SDValue Op1 = TheXor->getOperand(1); 8108 if (Op0.getOpcode() == Op1.getOpcode()) { 8109 // Avoid missing important xor optimizations. 8110 SDValue Tmp = visitXOR(TheXor); 8111 if (Tmp.getNode()) { 8112 if (Tmp.getNode() != TheXor) { 8113 DEBUG(dbgs() << "\nReplacing.8 "; 8114 TheXor->dump(&DAG); 8115 dbgs() << "\nWith: "; 8116 Tmp.getNode()->dump(&DAG); 8117 dbgs() << '\n'); 8118 WorklistRemover DeadNodes(this); 8119* DAG.ReplaceAllUsesOfValueWith(N1, Tmp); 8120 deleteAndRecombine(TheXor); 8121 return DAG.getNode(ISD::BRCOND, SDLoc(N), 8122 MVT::Other, Chain, Tmp, N2); 8123 } 8124 8125 // visitXOR has changed XOR's operands or replaced the XOR completely, 8126 // bail out. 8127 return SDValue(N, 0); 8128 } 8129 } 8130 8131 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 8132 bool Equal = false; 8133 if (ConstantSDNode RHSCI = dyn_cast<ConstantSDNode>(Op0)) 8134* if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && 8135 Op0.getOpcode() == ISD::XOR) { 8136 TheXor = Op0.getNode(); 8137 Equal = true; 8138 } 8139 8140 EVT SetCCVT = N1.getValueType(); 8141 if (LegalTypes) 8142 SetCCVT = getSetCCResultType(SetCCVT); 8143 SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), 8144 SetCCVT, 8145 Op0, Op1, 8146 Equal ? ISD::SETEQ : ISD::SETNE); 8147 // Replace the uses of XOR with SETCC 8148 WorklistRemover DeadNodes(this); 8149* DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 8150 deleteAndRecombine(N1.getNode()); 8151 return DAG.getNode(ISD::BRCOND, SDLoc(N), 8152 MVT::Other, Chain, SetCC, N2); 8153 } 8154 } 8155 8156 return SDValue(); 8157} 8158 8159// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 8160// 8161SDValue DAGCombiner::visitBR_CC(SDNode N) { 8162* CondCodeSDNode CC = cast<CondCodeSDNode>(N->getOperand(1)); 8163* SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 8164 8165 // If N is a constant we could fold this into a fallthrough or unconditional 8166 // branch. However that doesn't happen very often in normal code, because 8167 // Instcombine/SimplifyCFG should have handled the available opportunities. 8168 // If we did this folding here, it would be necessary to update the 8169 // MachineBasicBlock CFG, which is awkward. 8170 8171 // Use SimplifySetCC to simplify SETCC's. 8172 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), 8173 CondLHS, CondRHS, CC->get(), SDLoc(N), 8174 false); 8175 if (Simp.getNode()) AddToWorklist(Simp.getNode()); 8176 8177 // fold to a simpler setcc 8178 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 8179 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 8180 N->getOperand(0), Simp.getOperand(2), 8181 Simp.getOperand(0), Simp.getOperand(1), 8182 N->getOperand(4)); 8183 8184 return SDValue(); 8185} 8186 8187/// Return true if 'Use' is a load or a store that uses N as its base pointer 8188/// and that N may be folded in the load / store addressing mode. 8189static bool canFoldInAddressingMode(SDNode N, SDNode Use, 8190 SelectionDAG &DAG, 8191 const TargetLowering &TLI) { 8192 EVT VT; 8193 if (LoadSDNode LD = dyn_cast<LoadSDNode>(Use)) { 8194* if (LD->isIndexed() \|\| LD->getBasePtr().getNode() != N) 8195 return false; 8196 VT = Use->getValueType(0); 8197 } else if (StoreSDNode ST = dyn_cast<StoreSDNode>(Use)) { 8198* if (ST->isIndexed() \|\| ST->getBasePtr().getNode() != N) 8199 return false; 8200 VT = ST->getValue().getValueType(); 8201 } else 8202 return false; 8203 8204 TargetLowering::AddrMode AM; 8205 if (N->getOpcode() == ISD::ADD) { 8206 ConstantSDNode Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8207* if (Offset) 8208 // [reg +/- imm] 8209 AM.BaseOffs = Offset->getSExtValue(); 8210 else 8211 // [reg +/- reg] 8212 AM.Scale = 1; 8213 } else if (N->getOpcode() == ISD::SUB) { 8214 ConstantSDNode Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8215* if (Offset) 8216 // [reg +/- imm] 8217 AM.BaseOffs = -Offset->getSExtValue(); 8218 else 8219 // [reg +/- reg] 8220 AM.Scale = 1; 8221 } else 8222 return false; 8223 8224 return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(DAG.getContext())); 8225} 8226* 8227/// Try turning a load/store into a pre-indexed load/store when the base 8228/// pointer is an add or subtract and it has other uses besides the load/store. 8229/// After the transformation, the new indexed load/store has effectively folded 8230/// the add/subtract in and all of its other uses are redirected to the 8231/// new load/store. 8232bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode N) { 8233* if (Level < AfterLegalizeDAG) 8234 return false; 8235 8236 bool isLoad = true; 8237 SDValue Ptr; 8238 EVT VT; 8239 if (LoadSDNode LD = dyn_cast<LoadSDNode>(N)) { 8240* if (LD->isIndexed()) 8241 return false; 8242 VT = LD->getMemoryVT(); 8243 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 8244 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 8245 return false; 8246 Ptr = LD->getBasePtr(); 8247 } else if (StoreSDNode ST = dyn_cast<StoreSDNode>(N)) { 8248* if (ST->isIndexed()) 8249 return false; 8250 VT = ST->getMemoryVT(); 8251 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 8252 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 8253 return false; 8254 Ptr = ST->getBasePtr(); 8255 isLoad = false; 8256 } else { 8257 return false; 8258 } 8259 8260 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 8261 // out. There is no reason to make this a preinc/predec. 8262 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) \|\| 8263 Ptr.getNode()->hasOneUse()) 8264 return false; 8265 8266 // Ask the target to do addressing mode selection. 8267 SDValue BasePtr; 8268 SDValue Offset; 8269 ISD::MemIndexedMode AM = ISD::UNINDEXED; 8270 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 8271 return false; 8272 8273 // Backends without true r+i pre-indexed forms may need to pass a 8274 // constant base with a variable offset so that constant coercion 8275 // will work with the patterns in canonical form. 8276 bool Swapped = false; 8277 if (isa<ConstantSDNode>(BasePtr)) { 8278 std::swap(BasePtr, Offset); 8279 Swapped = true; 8280 } 8281 8282 // Don't create a indexed load / store with zero offset. 8283 if (isa<ConstantSDNode>(Offset) && 8284 cast<ConstantSDNode>(Offset)->isNullValue()) 8285 return false; 8286 8287 // Try turning it into a pre-indexed load / store except when: 8288 // 1) The new base ptr is a frame index. 8289 // 2) If N is a store and the new base ptr is either the same as or is a 8290 // predecessor of the value being stored. 8291 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 8292 // that would create a cycle. 8293 // 4) All uses are load / store ops that use it as old base ptr. 8294 8295 // Check #1. Preinc'ing a frame index would require copying the stack pointer 8296 // (plus the implicit offset) to a register to preinc anyway. 8297 if (isa<FrameIndexSDNode>(BasePtr) \|\| isa<RegisterSDNode>(BasePtr)) 8298 return false; 8299 8300 // Check #2. 8301 if (!isLoad) { 8302 SDValue Val = cast<StoreSDNode>(N)->getValue(); 8303 if (Val == BasePtr \|\| BasePtr.getNode()->isPredecessorOf(Val.getNode())) 8304 return false; 8305 } 8306 8307 // If the offset is a constant, there may be other adds of constants that 8308 // can be folded with this one. We should do this to avoid having to keep 8309 // a copy of the original base pointer. 8310 SmallVector<SDNode , 16> OtherUses; 8311* if (isa<ConstantSDNode>(Offset)) 8312 for (SDNode Use : BasePtr.getNode()->uses()) { 8313* if (Use == Ptr.getNode()) 8314 continue; 8315 8316 if (Use->isPredecessorOf(N)) 8317 continue; 8318 8319 if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { 8320 OtherUses.clear(); 8321 break; 8322 } 8323 8324 SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); 8325 if (Op1.getNode() == BasePtr.getNode()) 8326 std::swap(Op0, Op1); 8327 assert(Op0.getNode() == BasePtr.getNode() && 8328 "Use of ADD/SUB but not an operand"); 8329 8330 if (!isa<ConstantSDNode>(Op1)) { 8331 OtherUses.clear(); 8332 break; 8333 } 8334 8335 // FIXME: In some cases, we can be smarter about this. 8336 if (Op1.getValueType() != Offset.getValueType()) { 8337 OtherUses.clear(); 8338 break; 8339 } 8340 8341 OtherUses.push_back(Use); 8342 } 8343 8344 if (Swapped) 8345 std::swap(BasePtr, Offset); 8346 8347 // Now check for #3 and #4. 8348 bool RealUse = false; 8349 8350 // Caches for hasPredecessorHelper 8351 SmallPtrSet<const SDNode , 32> Visited; 8352* SmallVector<const SDNode , 16> Worklist; 8353* 8354 for (SDNode Use : Ptr.getNode()->uses()) { 8355* if (Use == N) 8356 continue; 8357 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 8358 return false; 8359 8360 // If Ptr may be folded in addressing mode of other use, then it's 8361 // not profitable to do this transformation. 8362 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 8363 RealUse = true; 8364 } 8365 8366 if (!RealUse) 8367 return false; 8368 8369 SDValue Result; 8370 if (isLoad) 8371 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 8372 BasePtr, Offset, AM); 8373 else 8374 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 8375 BasePtr, Offset, AM); 8376 ++PreIndexedNodes; 8377 ++NodesCombined; 8378 DEBUG(dbgs() << "\nReplacing.4 "; 8379 N->dump(&DAG); 8380 dbgs() << "\nWith: "; 8381 Result.getNode()->dump(&DAG); 8382 dbgs() << '\n'); 8383 WorklistRemover DeadNodes(this); 8384* if (isLoad) { 8385 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 8386 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 8387 } else { 8388 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 8389 } 8390 8391 // Finally, since the node is now dead, remove it from the graph. 8392 deleteAndRecombine(N); 8393 8394 if (Swapped) 8395 std::swap(BasePtr, Offset); 8396 8397 // Replace other uses of BasePtr that can be updated to use Ptr 8398 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { 8399 unsigned OffsetIdx = 1; 8400 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) 8401 OffsetIdx = 0; 8402 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == 8403 BasePtr.getNode() && "Expected BasePtr operand"); 8404 8405 // We need to replace ptr0 in the following expression: 8406 // x0 * offset0 + y0 * ptr0 = t0 8407 // knowing that 8408 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) 8409 // 8410 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the 8411 // indexed load/store and the expresion that needs to be re-written. 8412 // 8413 // Therefore, we have: 8414 // t0 = (x0 * offset0 - x1 * y0 * y1 offset1) + (y0 y1) * t1 8415 8416 ConstantSDNode CN = 8417* cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); 8418 int X0, X1, Y0, Y1; 8419 APInt Offset0 = CN->getAPIntValue(); 8420 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); 8421 8422 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; 8423 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; 8424 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; 8425 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; 8426 8427 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; 8428 8429 APInt CNV = Offset0; 8430 if (X0 < 0) CNV = -CNV; 8431 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; 8432 else CNV = CNV - Offset1; 8433 8434 // We can now generate the new expression. 8435 SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); 8436 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); 8437 8438 SDValue NewUse = DAG.getNode(Opcode, 8439 SDLoc(OtherUses[i]), 8440 OtherUses[i]->getValueType(0), NewOp1, NewOp2); 8441 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); 8442 deleteAndRecombine(OtherUses[i]); 8443 } 8444 8445 // Replace the uses of Ptr with uses of the updated base value. 8446 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 8447 deleteAndRecombine(Ptr.getNode()); 8448 8449 return true; 8450} 8451 8452/// Try to combine a load/store with a add/sub of the base pointer node into a 8453/// post-indexed load/store. The transformation folded the add/subtract into the 8454/// new indexed load/store effectively and all of its uses are redirected to the 8455/// new load/store. 8456bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode N) { 8457* if (Level < AfterLegalizeDAG) 8458 return false; 8459 8460 bool isLoad = true; 8461 SDValue Ptr; 8462 EVT VT; 8463 if (LoadSDNode LD = dyn_cast<LoadSDNode>(N)) { 8464* if (LD->isIndexed()) 8465 return false; 8466 VT = LD->getMemoryVT(); 8467 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 8468 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 8469 return false; 8470 Ptr = LD->getBasePtr(); 8471 } else if (StoreSDNode ST = dyn_cast<StoreSDNode>(N)) { 8472* if (ST->isIndexed()) 8473 return false; 8474 VT = ST->getMemoryVT(); 8475 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 8476 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 8477 return false; 8478 Ptr = ST->getBasePtr(); 8479 isLoad = false; 8480 } else { 8481 return false; 8482 } 8483 8484 if (Ptr.getNode()->hasOneUse()) 8485 return false; 8486 8487 for (SDNode Op : Ptr.getNode()->uses()) { 8488* if (Op == N \|\| 8489 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 8490 continue; 8491 8492 SDValue BasePtr; 8493 SDValue Offset; 8494 ISD::MemIndexedMode AM = ISD::UNINDEXED; 8495 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 8496 // Don't create a indexed load / store with zero offset. 8497 if (isa<ConstantSDNode>(Offset) && 8498 cast<ConstantSDNode>(Offset)->isNullValue()) 8499 continue; 8500 8501 // Try turning it into a post-indexed load / store except when 8502 // 1) All uses are load / store ops that use it as base ptr (and 8503 // it may be folded as addressing mmode). 8504 // 2) Op must be independent of N, i.e. Op is neither a predecessor 8505 // nor a successor of N. Otherwise, if Op is folded that would 8506 // create a cycle. 8507 8508 if (isa<FrameIndexSDNode>(BasePtr) \|\| isa<RegisterSDNode>(BasePtr)) 8509 continue; 8510 8511 // Check for #1. 8512 bool TryNext = false; 8513 for (SDNode Use : BasePtr.getNode()->uses()) { 8514* if (Use == Ptr.getNode()) 8515 continue; 8516 8517 // If all the uses are load / store addresses, then don't do the 8518 // transformation. 8519 if (Use->getOpcode() == ISD::ADD \|\| Use->getOpcode() == ISD::SUB){ 8520 bool RealUse = false; 8521 for (SDNode UseUse : Use->uses()) { 8522* if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 8523 RealUse = true; 8524 } 8525 8526 if (!RealUse) { 8527 TryNext = true; 8528 break; 8529 } 8530 } 8531 } 8532 8533 if (TryNext) 8534 continue; 8535 8536 // Check for #2 8537 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 8538 SDValue Result = isLoad 8539 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 8540 BasePtr, Offset, AM) 8541 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 8542 BasePtr, Offset, AM); 8543 ++PostIndexedNodes; 8544 ++NodesCombined; 8545 DEBUG(dbgs() << "\nReplacing.5 "; 8546 N->dump(&DAG); 8547 dbgs() << "\nWith: "; 8548 Result.getNode()->dump(&DAG); 8549 dbgs() << '\n'); 8550 WorklistRemover DeadNodes(this); 8551* if (isLoad) { 8552 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 8553 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 8554 } else { 8555 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 8556 } 8557 8558 // Finally, since the node is now dead, remove it from the graph. 8559 deleteAndRecombine(N); 8560 8561 // Replace the uses of Use with uses of the updated base value. 8562 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 8563 Result.getValue(isLoad ? 1 : 0)); 8564 deleteAndRecombine(Op); 8565 return true; 8566 } 8567 } 8568 } 8569 8570 return false; 8571} 8572 8573/// \brief Return the base-pointer arithmetic from an indexed \p LD. 8574SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode LD) { 8575* ISD::MemIndexedMode AM = LD->getAddressingMode(); 8576 assert(AM != ISD::UNINDEXED); 8577 SDValue BP = LD->getOperand(1); 8578 SDValue Inc = LD->getOperand(2); 8579 8580 // Some backends use TargetConstants for load offsets, but don't expect 8581 // TargetConstants in general ADD nodes. We can convert these constants into 8582 // regular Constants (if the constant is not opaque). 8583 assert((Inc.getOpcode() != ISD::TargetConstant \|\| 8584 !cast<ConstantSDNode>(Inc)->isOpaque()) && 8585 "Cannot split out indexing using opaque target constants"); 8586 if (Inc.getOpcode() == ISD::TargetConstant) { 8587 ConstantSDNode ConstInc = cast<ConstantSDNode>(Inc); 8588* Inc = DAG.getConstant(ConstInc->getConstantIntValue(), 8589* ConstInc->getValueType(0)); 8590 } 8591 8592 unsigned Opc = 8593 (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); 8594 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); 8595} 8596 8597SDValue DAGCombiner::visitLOAD(SDNode N) { 8598* LoadSDNode LD = cast<LoadSDNode>(N); 8599* SDValue Chain = LD->getChain(); 8600 SDValue Ptr = LD->getBasePtr(); 8601 8602 // If load is not volatile and there are no uses of the loaded value (and 8603 // the updated indexed value in case of indexed loads), change uses of the 8604 // chain value into uses of the chain input (i.e. delete the dead load). 8605 if (!LD->isVolatile()) { 8606 if (N->getValueType(1) == MVT::Other) { 8607 // Unindexed loads. 8608 if (!N->hasAnyUseOfValue(0)) { 8609 // It's not safe to use the two value CombineTo variant here. e.g. 8610 // v1, chain2 = load chain1, loc 8611 // v2, chain3 = load chain2, loc 8612 // v3 = add v2, c 8613 // Now we replace use of chain2 with chain1. This makes the second load 8614 // isomorphic to the one we are deleting, and thus makes this load live. 8615 DEBUG(dbgs() << "\nReplacing.6 "; 8616 N->dump(&DAG); 8617 dbgs() << "\nWith chain: "; 8618 Chain.getNode()->dump(&DAG); 8619 dbgs() << "\n"); 8620 WorklistRemover DeadNodes(this); 8621* DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 8622 8623 if (N->use_empty()) 8624 deleteAndRecombine(N); 8625 8626 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8627 } 8628 } else { 8629 // Indexed loads. 8630 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 8631 8632 // If this load has an opaque TargetConstant offset, then we cannot split 8633 // the indexing into an add/sub directly (that TargetConstant may not be 8634 // valid for a different type of node, and we cannot convert an opaque 8635 // target constant into a regular constant). 8636 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && 8637 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); 8638 8639 if (!N->hasAnyUseOfValue(0) && 8640 ((MaySplitLoadIndex && !HasOTCInc) \|\| !N->hasAnyUseOfValue(1))) { 8641 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 8642 SDValue Index; 8643 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { 8644 Index = SplitIndexingFromLoad(LD); 8645 // Try to fold the base pointer arithmetic into subsequent loads and 8646 // stores. 8647 AddUsersToWorklist(N); 8648 } else 8649 Index = DAG.getUNDEF(N->getValueType(1)); 8650 DEBUG(dbgs() << "\nReplacing.7 "; 8651 N->dump(&DAG); 8652 dbgs() << "\nWith: "; 8653 Undef.getNode()->dump(&DAG); 8654 dbgs() << " and 2 other values\n"); 8655 WorklistRemover DeadNodes(this); 8656* DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 8657 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); 8658 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 8659 deleteAndRecombine(N); 8660 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8661 } 8662 } 8663 } 8664 8665 // If this load is directly stored, replace the load value with the stored 8666 // value. 8667 // TODO: Handle store large -> read small portion. 8668 // TODO: Handle TRUNCSTORE/LOADEXT 8669 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 8670 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 8671 StoreSDNode PrevST = cast<StoreSDNode>(Chain); 8672* if (PrevST->getBasePtr() == Ptr && 8673 PrevST->getValue().getValueType() == N->getValueType(0)) 8674 return CombineTo(N, Chain.getOperand(1), Chain); 8675 } 8676 } 8677 8678 // Try to infer better alignment information than the load already has. 8679 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 8680 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 8681 if (Align > LD->getMemOperand()->getBaseAlignment()) { 8682 SDValue NewLoad = 8683 DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), 8684 LD->getValueType(0), 8685 Chain, Ptr, LD->getPointerInfo(), 8686 LD->getMemoryVT(), 8687 LD->isVolatile(), LD->isNonTemporal(), 8688 LD->isInvariant(), Align, LD->getAAInfo()); 8689 return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); 8690 } 8691 } 8692 } 8693 8694 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 8695 : DAG.getSubtarget().useAA(); 8696#ifndef NDEBUG 8697 if (CombinerAAOnlyFunc.getNumOccurrences() && 8698 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 8699 UseAA = false; 8700#endif 8701 if (UseAA && LD->isUnindexed()) { 8702 // Walk up chain skipping non-aliasing memory nodes. 8703 SDValue BetterChain = FindBetterChain(N, Chain); 8704 8705 // If there is a better chain. 8706 if (Chain != BetterChain) { 8707 SDValue ReplLoad; 8708 8709 // Replace the chain to void dependency. 8710 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 8711 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), 8712 BetterChain, Ptr, LD->getMemOperand()); 8713 } else { 8714 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), 8715 LD->getValueType(0), 8716 BetterChain, Ptr, LD->getMemoryVT(), 8717 LD->getMemOperand()); 8718 } 8719 8720 // Create token factor to keep old chain connected. 8721 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 8722 MVT::Other, Chain, ReplLoad.getValue(1)); 8723 8724 // Make sure the new and old chains are cleaned up. 8725 AddToWorklist(Token.getNode()); 8726 8727 // Replace uses with load result and token factor. Don't add users 8728 // to work list. 8729 return CombineTo(N, ReplLoad.getValue(0), Token, false); 8730 } 8731 } 8732 8733 // Try transforming N to an indexed load. 8734 if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N)) 8735 return SDValue(N, 0); 8736 8737 // Try to slice up N to more direct loads if the slices are mapped to 8738 // different register banks or pairing can take place. 8739 if (SliceUpLoad(N)) 8740 return SDValue(N, 0); 8741 8742 return SDValue(); 8743} 8744 8745namespace { 8746/// \brief Helper structure used to slice a load in smaller loads. 8747/// Basically a slice is obtained from the following sequence: 8748/// Origin = load Ty1, Base 8749/// Shift = srl Ty1 Origin, CstTy Amount 8750/// Inst = trunc Shift to Ty2 8751/// 8752/// Then, it will be rewriten into: 8753/// Slice = load SliceTy, Base + SliceOffset 8754/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 8755/// 8756/// SliceTy is deduced from the number of bits that are actually used to 8757/// build Inst. 8758struct LoadedSlice { 8759 /// \brief Helper structure used to compute the cost of a slice. 8760 struct Cost { 8761 /// Are we optimizing for code size. 8762 bool ForCodeSize; 8763 /// Various cost. 8764 unsigned Loads; 8765 unsigned Truncates; 8766 unsigned CrossRegisterBanksCopies; 8767 unsigned ZExts; 8768 unsigned Shift; 8769 8770 Cost(bool ForCodeSize = false) 8771 : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), 8772 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} 8773 8774 /// \brief Get the cost of one isolated slice. 8775 Cost(const LoadedSlice &LS, bool ForCodeSize = false) 8776 : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), 8777 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { 8778 EVT TruncType = LS.Inst->getValueType(0); 8779 EVT LoadedType = LS.getLoadedType(); 8780 if (TruncType != LoadedType && 8781 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) 8782 ZExts = 1; 8783 } 8784 8785 /// \brief Account for slicing gain in the current cost. 8786 /// Slicing provide a few gains like removing a shift or a 8787 /// truncate. This method allows to grow the cost of the original 8788 /// load with the gain from this slice. 8789 void addSliceGain(const LoadedSlice &LS) { 8790 // Each slice saves a truncate. 8791 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); 8792 if (!TLI.isTruncateFree(LS.Inst->getValueType(0), 8793 LS.Inst->getOperand(0).getValueType())) 8794 ++Truncates; 8795 // If there is a shift amount, this slice gets rid of it. 8796 if (LS.Shift) 8797 ++Shift; 8798 // If this slice can merge a cross register bank copy, account for it. 8799 if (LS.canMergeExpensiveCrossRegisterBankCopy()) 8800 ++CrossRegisterBanksCopies; 8801 } 8802 8803 Cost &operator+=(const Cost &RHS) { 8804 Loads += RHS.Loads; 8805 Truncates += RHS.Truncates; 8806 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; 8807 ZExts += RHS.ZExts; 8808 Shift += RHS.Shift; 8809 return this; 8810* } 8811 8812 bool operator==(const Cost &RHS) const { 8813 return Loads == RHS.Loads && Truncates == RHS.Truncates && 8814 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && 8815 ZExts == RHS.ZExts && Shift == RHS.Shift; 8816 } 8817 8818 bool operator!=(const Cost &RHS) const { return !(this == RHS); } 8819* 8820 bool operator<(const Cost &RHS) const { 8821 // Assume cross register banks copies are as expensive as loads. 8822 // FIXME: Do we want some more target hooks? 8823 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; 8824 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; 8825 // Unless we are optimizing for code size, consider the 8826 // expensive operation first. 8827 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) 8828 return ExpensiveOpsLHS < ExpensiveOpsRHS; 8829 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < 8830 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); 8831 } 8832 8833 bool operator>(const Cost &RHS) const { return RHS < this; } 8834* 8835 bool operator<=(const Cost &RHS) const { return !(RHS < this); } 8836* 8837 bool operator>=(const Cost &RHS) const { return !(this < RHS); } 8838* }; 8839 // The last instruction that represent the slice. This should be a 8840 // truncate instruction. 8841 SDNode Inst; 8842* // The original load instruction. 8843 LoadSDNode Origin; 8844* // The right shift amount in bits from the original load. 8845 unsigned Shift; 8846 // The DAG from which Origin came from. 8847 // This is used to get some contextual information about legal types, etc. 8848 SelectionDAG DAG; 8849* 8850 LoadedSlice(SDNode Inst = nullptr, LoadSDNode Origin = nullptr, 8851 unsigned Shift = 0, SelectionDAG DAG = nullptr) 8852* : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} 8853 8854 LoadedSlice(const LoadedSlice &LS) 8855 : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} 8856 8857 /// \brief Get the bits used in a chunk of bits \p BitWidth large. 8858 /// \return Result is \p BitWidth and has used bits set to 1 and 8859 /// not used bits set to 0. 8860 APInt getUsedBits() const { 8861 // Reproduce the trunc(lshr) sequence: 8862 // - Start from the truncated value. 8863 // - Zero extend to the desired bit width. 8864 // - Shift left. 8865 assert(Origin && "No original load to compare against."); 8866 unsigned BitWidth = Origin->getValueSizeInBits(0); 8867 assert(Inst && "This slice is not bound to an instruction"); 8868 assert(Inst->getValueSizeInBits(0) <= BitWidth && 8869 "Extracted slice is bigger than the whole type!"); 8870 APInt UsedBits(Inst->getValueSizeInBits(0), 0); 8871 UsedBits.setAllBits(); 8872 UsedBits = UsedBits.zext(BitWidth); 8873 UsedBits <<= Shift; 8874 return UsedBits; 8875 } 8876 8877 /// \brief Get the size of the slice to be loaded in bytes. 8878 unsigned getLoadedSize() const { 8879 unsigned SliceSize = getUsedBits().countPopulation(); 8880 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); 8881 return SliceSize / 8; 8882 } 8883 8884 /// \brief Get the type that will be loaded for this slice. 8885 /// Note: This may not be the final type for the slice. 8886 EVT getLoadedType() const { 8887 assert(DAG && "Missing context"); 8888 LLVMContext &Ctxt = DAG->getContext(); 8889* return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); 8890 } 8891 8892 /// \brief Get the alignment of the load used for this slice. 8893 unsigned getAlignment() const { 8894 unsigned Alignment = Origin->getAlignment(); 8895 unsigned Offset = getOffsetFromBase(); 8896 if (Offset != 0) 8897 Alignment = MinAlign(Alignment, Alignment + Offset); 8898 return Alignment; 8899 } 8900 8901 /// \brief Check if this slice can be rewritten with legal operations. 8902 bool isLegal() const { 8903 // An invalid slice is not legal. 8904 if (!Origin \|\| !Inst \|\| !DAG) 8905 return false; 8906 8907 // Offsets are for indexed load only, we do not handle that. 8908 if (Origin->getOffset().getOpcode() != ISD::UNDEF) 8909 return false; 8910 8911 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 8912 8913 // Check that the type is legal. 8914 EVT SliceType = getLoadedType(); 8915 if (!TLI.isTypeLegal(SliceType)) 8916 return false; 8917 8918 // Check that the load is legal for this type. 8919 if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) 8920 return false; 8921 8922 // Check that the offset can be computed. 8923 // 1. Check its type. 8924 EVT PtrType = Origin->getBasePtr().getValueType(); 8925 if (PtrType == MVT::Untyped \|\| PtrType.isExtended()) 8926 return false; 8927 8928 // 2. Check that it fits in the immediate. 8929 if (!TLI.isLegalAddImmediate(getOffsetFromBase())) 8930 return false; 8931 8932 // 3. Check that the computation is legal. 8933 if (!TLI.isOperationLegal(ISD::ADD, PtrType)) 8934 return false; 8935 8936 // Check that the zext is legal if it needs one. 8937 EVT TruncateType = Inst->getValueType(0); 8938 if (TruncateType != SliceType && 8939 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) 8940 return false; 8941 8942 return true; 8943 } 8944 8945 /// \brief Get the offset in bytes of this slice in the original chunk of 8946 /// bits. 8947 /// \pre DAG != nullptr. 8948 uint64_t getOffsetFromBase() const { 8949 assert(DAG && "Missing context."); 8950 bool IsBigEndian = 8951 DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); 8952 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); 8953 uint64_t Offset = Shift / 8; 8954 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; 8955 assert(!(Origin->getValueSizeInBits(0) & 0x7) && 8956 "The size of the original loaded type is not a multiple of a" 8957 " byte."); 8958 // If Offset is bigger than TySizeInBytes, it means we are loading all 8959 // zeros. This should have been optimized before in the process. 8960 assert(TySizeInBytes > Offset && 8961 "Invalid shift amount for given loaded size"); 8962 if (IsBigEndian) 8963 Offset = TySizeInBytes - Offset - getLoadedSize(); 8964 return Offset; 8965 } 8966 8967 /// \brief Generate the sequence of instructions to load the slice 8968 /// represented by this object and redirect the uses of this slice to 8969 /// this new sequence of instructions. 8970 /// \pre this->Inst && this->Origin are valid Instructions and this 8971 /// object passed the legal check: LoadedSlice::isLegal returned true. 8972 /// \return The last instruction of the sequence used to load the slice. 8973 SDValue loadSlice() const { 8974 assert(Inst && Origin && "Unable to replace a non-existing slice."); 8975 const SDValue &OldBaseAddr = Origin->getBasePtr(); 8976 SDValue BaseAddr = OldBaseAddr; 8977 // Get the offset in that chunk of bytes w.r.t. the endianess. 8978 int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); 8979 assert(Offset >= 0 && "Offset too big to fit in int64_t!"); 8980 if (Offset) { 8981 // BaseAddr = BaseAddr + Offset. 8982 EVT ArithType = BaseAddr.getValueType(); 8983 BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, 8984 DAG->getConstant(Offset, ArithType)); 8985 } 8986 8987 // Create the type of the loaded slice according to its size. 8988 EVT SliceType = getLoadedType(); 8989 8990 // Create the load for the slice. 8991 SDValue LastInst = DAG->getLoad( 8992 SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, 8993 Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), 8994 Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); 8995 // If the final type is not the same as the loaded type, this means that 8996 // we have to pad with zero. Create a zero extend for that. 8997 EVT FinalType = Inst->getValueType(0); 8998 if (SliceType != FinalType) 8999 LastInst = 9000 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); 9001 return LastInst; 9002 } 9003 9004 /// \brief Check if this slice can be merged with an expensive cross register 9005 /// bank copy. E.g., 9006 /// i = load i32 9007 /// f = bitcast i32 i to float 9008 bool canMergeExpensiveCrossRegisterBankCopy() const { 9009 if (!Inst \|\| !Inst->hasOneUse()) 9010 return false; 9011 SDNode Use = Inst->use_begin(); 9012 if (Use->getOpcode() != ISD::BITCAST) 9013 return false; 9014 assert(DAG && "Missing context"); 9015 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 9016 EVT ResVT = Use->getValueType(0); 9017 const TargetRegisterClass ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); 9018* const TargetRegisterClass ArgRC = 9019* TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); 9020 if (ArgRC == ResRC \|\| !TLI.isOperationLegal(ISD::LOAD, ResVT)) 9021 return false; 9022 9023 // At this point, we know that we perform a cross-register-bank copy. 9024 // Check if it is expensive. 9025 const TargetRegisterInfo TRI = DAG->getSubtarget().getRegisterInfo(); 9026* // Assume bitcasts are cheap, unless both register classes do not 9027 // explicitly share a common sub class. 9028 if (!TRI \|\| TRI->getCommonSubClass(ArgRC, ResRC)) 9029 return false; 9030 9031 // Check if it will be merged with the load. 9032 // 1. Check the alignment constraint. 9033 unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( 9034 ResVT.getTypeForEVT(DAG->getContext())); 9035* 9036 if (RequiredAlignment > getAlignment()) 9037 return false; 9038 9039 // 2. Check that the load is a legal operation for that type. 9040 if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) 9041 return false; 9042 9043 // 3. Check that we do not have a zext in the way. 9044 if (Inst->getValueType(0) != getLoadedType()) 9045 return false; 9046 9047 return true; 9048 } 9049}; 9050} 9051 9052/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., 9053/// \p UsedBits looks like 0..0 1..1 0..0. 9054static bool areUsedBitsDense(const APInt &UsedBits) { 9055 // If all the bits are one, this is dense! 9056 if (UsedBits.isAllOnesValue()) 9057 return true; 9058 9059 // Get rid of the unused bits on the right. 9060 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); 9061 // Get rid of the unused bits on the left. 9062 if (NarrowedUsedBits.countLeadingZeros()) 9063 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); 9064 // Check that the chunk of bits is completely used. 9065 return NarrowedUsedBits.isAllOnesValue(); 9066} 9067 9068/// \brief Check whether or not \p First and \p Second are next to each other 9069/// in memory. This means that there is no hole between the bits loaded 9070/// by \p First and the bits loaded by \p Second. 9071static bool areSlicesNextToEachOther(const LoadedSlice &First, 9072 const LoadedSlice &Second) { 9073 assert(First.Origin == Second.Origin && First.Origin && 9074 "Unable to match different memory origins."); 9075 APInt UsedBits = First.getUsedBits(); 9076 assert((UsedBits & Second.getUsedBits()) == 0 && 9077 "Slices are not supposed to overlap."); 9078 UsedBits \|= Second.getUsedBits(); 9079 return areUsedBitsDense(UsedBits); 9080} 9081 9082/// \brief Adjust the \p GlobalLSCost according to the target 9083/// paring capabilities and the layout of the slices. 9084/// \pre \p GlobalLSCost should account for at least as many loads as 9085/// there is in the slices in \p LoadedSlices. 9086static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, 9087 LoadedSlice::Cost &GlobalLSCost) { 9088 unsigned NumberOfSlices = LoadedSlices.size(); 9089 // If there is less than 2 elements, no pairing is possible. 9090 if (NumberOfSlices < 2) 9091 return; 9092 9093 // Sort the slices so that elements that are likely to be next to each 9094 // other in memory are next to each other in the list. 9095 std::sort(LoadedSlices.begin(), LoadedSlices.end(), 9096 [](const LoadedSlice &LHS, const LoadedSlice &RHS) { 9097 assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); 9098 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); 9099 }); 9100 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); 9101 // First (resp. Second) is the first (resp. Second) potentially candidate 9102 // to be placed in a paired load. 9103 const LoadedSlice First = nullptr; 9104* const LoadedSlice Second = nullptr; 9105* for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, 9106 // Set the beginning of the pair. 9107 First = Second) { 9108 9109 Second = &LoadedSlices[CurrSlice]; 9110 9111 // If First is NULL, it means we start a new pair. 9112 // Get to the next slice. 9113 if (!First) 9114 continue; 9115 9116 EVT LoadedType = First->getLoadedType(); 9117 9118 // If the types of the slices are different, we cannot pair them. 9119 if (LoadedType != Second->getLoadedType()) 9120 continue; 9121 9122 // Check if the target supplies paired loads for this type. 9123 unsigned RequiredAlignment = 0; 9124 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { 9125 // move to the next pair, this type is hopeless. 9126 Second = nullptr; 9127 continue; 9128 } 9129 // Check if we meet the alignment requirement. 9130 if (RequiredAlignment > First->getAlignment()) 9131 continue; 9132 9133 // Check that both loads are next to each other in memory. 9134 if (!areSlicesNextToEachOther(First, Second)) 9135 continue; 9136 9137 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); 9138 --GlobalLSCost.Loads; 9139 // Move to the next pair. 9140 Second = nullptr; 9141 } 9142} 9143 9144/// \brief Check the profitability of all involved LoadedSlice. 9145/// Currently, it is considered profitable if there is exactly two 9146/// involved slices (1) which are (2) next to each other in memory, and 9147/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). 9148/// 9149/// Note: The order of the elements in \p LoadedSlices may be modified, but not 9150/// the elements themselves. 9151/// 9152/// FIXME: When the cost model will be mature enough, we can relax 9153/// constraints (1) and (2). 9154static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, 9155 const APInt &UsedBits, bool ForCodeSize) { 9156 unsigned NumberOfSlices = LoadedSlices.size(); 9157 if (StressLoadSlicing) 9158 return NumberOfSlices > 1; 9159 9160 // Check (1). 9161 if (NumberOfSlices != 2) 9162 return false; 9163 9164 // Check (2). 9165 if (!areUsedBitsDense(UsedBits)) 9166 return false; 9167 9168 // Check (3). 9169 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); 9170 // The original code has one big load. 9171 OrigCost.Loads = 1; 9172 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { 9173 const LoadedSlice &LS = LoadedSlices[CurrSlice]; 9174 // Accumulate the cost of all the slices. 9175 LoadedSlice::Cost SliceCost(LS, ForCodeSize); 9176 GlobalSlicingCost += SliceCost; 9177 9178 // Account as cost in the original configuration the gain obtained 9179 // with the current slices. 9180 OrigCost.addSliceGain(LS); 9181 } 9182 9183 // If the target supports paired load, adjust the cost accordingly. 9184 adjustCostForPairing(LoadedSlices, GlobalSlicingCost); 9185 return OrigCost > GlobalSlicingCost; 9186} 9187 9188/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) 9189/// operations, split it in the various pieces being extracted. 9190/// 9191/// This sort of thing is introduced by SROA. 9192/// This slicing takes care not to insert overlapping loads. 9193/// \pre LI is a simple load (i.e., not an atomic or volatile load). 9194bool DAGCombiner::SliceUpLoad(SDNode N) { 9195* if (Level < AfterLegalizeDAG) 9196 return false; 9197 9198 LoadSDNode LD = cast<LoadSDNode>(N); 9199* if (LD->isVolatile() \|\| !ISD::isNormalLoad(LD) \|\| 9200 !LD->getValueType(0).isInteger()) 9201 return false; 9202 9203 // Keep track of already used bits to detect overlapping values. 9204 // In that case, we will just abort the transformation. 9205 APInt UsedBits(LD->getValueSizeInBits(0), 0); 9206 9207 SmallVector<LoadedSlice, 4> LoadedSlices; 9208 9209 // Check if this load is used as several smaller chunks of bits. 9210 // Basically, look for uses in trunc or trunc(lshr) and record a new chain 9211 // of computation for each trunc. 9212 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 9213 UI != UIEnd; ++UI) { 9214 // Skip the uses of the chain. 9215 if (UI.getUse().getResNo() != 0) 9216 continue; 9217 9218 SDNode User = UI; 9219 unsigned Shift = 0; 9220 9221 // Check if this is a trunc(lshr). 9222 if (User->getOpcode() == ISD::SRL && User->hasOneUse() && 9223 isa<ConstantSDNode>(User->getOperand(1))) { 9224 Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); 9225 User = User->use_begin(); 9226* } 9227 9228 // At this point, User is a Truncate, iff we encountered, trunc or 9229 // trunc(lshr). 9230 if (User->getOpcode() != ISD::TRUNCATE) 9231 return false; 9232 9233 // The width of the type must be a power of 2 and greater than 8-bits. 9234 // Otherwise the load cannot be represented in LLVM IR. 9235 // Moreover, if we shifted with a non-8-bits multiple, the slice 9236 // will be across several bytes. We do not support that. 9237 unsigned Width = User->getValueSizeInBits(0); 9238 if (Width < 8 \|\| !isPowerOf2_32(Width) \|\| (Shift & 0x7)) 9239 return 0; 9240 9241 // Build the slice for this chain of computations. 9242 LoadedSlice LS(User, LD, Shift, &DAG); 9243 APInt CurrentUsedBits = LS.getUsedBits(); 9244 9245 // Check if this slice overlaps with another. 9246 if ((CurrentUsedBits & UsedBits) != 0) 9247 return false; 9248 // Update the bits used globally. 9249 UsedBits \|= CurrentUsedBits; 9250 9251 // Check if the new slice would be legal. 9252 if (!LS.isLegal()) 9253 return false; 9254 9255 // Record the slice. 9256 LoadedSlices.push_back(LS); 9257 } 9258 9259 // Abort slicing if it does not seem to be profitable. 9260 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) 9261 return false; 9262 9263 ++SlicedLoads; 9264 9265 // Rewrite each chain to use an independent load. 9266 // By construction, each chain can be represented by a unique load. 9267 9268 // Prepare the argument for the new token factor for all the slices. 9269 SmallVector<SDValue, 8> ArgChains; 9270 for (SmallVectorImpl<LoadedSlice>::const_iterator 9271 LSIt = LoadedSlices.begin(), 9272 LSItEnd = LoadedSlices.end(); 9273 LSIt != LSItEnd; ++LSIt) { 9274 SDValue SliceInst = LSIt->loadSlice(); 9275 CombineTo(LSIt->Inst, SliceInst, true); 9276 if (SliceInst.getNode()->getOpcode() != ISD::LOAD) 9277 SliceInst = SliceInst.getOperand(0); 9278 assert(SliceInst->getOpcode() == ISD::LOAD && 9279 "It takes more than a zext to get to the loaded slice!!"); 9280 ArgChains.push_back(SliceInst.getValue(1)); 9281 } 9282 9283 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, 9284 ArgChains); 9285 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 9286 return true; 9287} 9288 9289/// Check to see if V is (and load (ptr), imm), where the load is having 9290/// specific bytes cleared out. If so, return the byte size being masked out 9291/// and the shift amount. 9292static std::pair<unsigned, unsigned> 9293CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 9294 std::pair<unsigned, unsigned> Result(0, 0); 9295 9296 // Check for the structure we're looking for. 9297 if (V->getOpcode() != ISD::AND \|\| 9298 !isa<ConstantSDNode>(V->getOperand(1)) \|\| 9299 !ISD::isNormalLoad(V->getOperand(0).getNode())) 9300 return Result; 9301 9302 // Check the chain and pointer. 9303 LoadSDNode LD = cast<LoadSDNode>(V->getOperand(0)); 9304* if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 9305 9306 // The store should be chained directly to the load or be an operand of a 9307 // tokenfactor. 9308 if (LD == Chain.getNode()) 9309 ; // ok. 9310 else if (Chain->getOpcode() != ISD::TokenFactor) 9311 return Result; // Fail. 9312 else { 9313 bool isOk = false; 9314 for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) 9315 if (Chain->getOperand(i).getNode() == LD) { 9316 isOk = true; 9317 break; 9318 } 9319 if (!isOk) return Result; 9320 } 9321 9322 // This only handles simple types. 9323 if (V.getValueType() != MVT::i16 && 9324 V.getValueType() != MVT::i32 && 9325 V.getValueType() != MVT::i64) 9326 return Result; 9327 9328 // Check the constant mask. Invert it so that the bits being masked out are 9329 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 9330 // follow the sign bit for uniformity. 9331 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 9332 unsigned NotMaskLZ = countLeadingZeros(NotMask); 9333 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 9334 unsigned NotMaskTZ = countTrailingZeros(NotMask); 9335 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 9336 if (NotMaskLZ == 64) return Result; // All zero mask. 9337 9338 // See if we have a continuous run of bits. If so, we have 01+0 9339 if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) 9340 return Result; 9341 9342 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 9343 if (V.getValueType() != MVT::i64 && NotMaskLZ) 9344 NotMaskLZ -= 64-V.getValueSizeInBits(); 9345 9346 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 9347 switch (MaskedBytes) { 9348 case 1: 9349 case 2: 9350 case 4: break; 9351 default: return Result; // All one mask, or 5-byte mask. 9352 } 9353 9354 // Verify that the first bit starts at a multiple of mask so that the access 9355 // is aligned the same as the access width. 9356 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 9357 9358 Result.first = MaskedBytes; 9359 Result.second = NotMaskTZ/8; 9360 return Result; 9361} 9362 9363 9364/// Check to see if IVal is something that provides a value as specified by 9365/// MaskInfo. If so, replace the specified store with a narrower store of 9366/// truncated IVal. 9367static SDNode * 9368ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 9369 SDValue IVal, StoreSDNode St, 9370* DAGCombiner DC) { 9371* unsigned NumBytes = MaskInfo.first; 9372 unsigned ByteShift = MaskInfo.second; 9373 SelectionDAG &DAG = DC->getDAG(); 9374 9375 // Check to see if IVal is all zeros in the part being masked in by the 'or' 9376 // that uses this. If not, this is not a replacement. 9377 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 9378 ByteShift8, (ByteShift+NumBytes)8); 9379 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; 9380 9381 // Check that it is legal on the target to do this. It is legal if the new 9382 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 9383 // legalization. 9384 MVT VT = MVT::getIntegerVT(NumBytes8); 9385* if (!DC->isTypeLegal(VT)) 9386 return nullptr; 9387 9388 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 9389 // shifted by ByteShift and truncated down to NumBytes. 9390 if (ByteShift) 9391 IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, 9392 DAG.getConstant(ByteShift8, 9393* DC->getShiftAmountTy(IVal.getValueType()))); 9394 9395 // Figure out the offset for the store and the alignment of the access. 9396 unsigned StOffset; 9397 unsigned NewAlign = St->getAlignment(); 9398 9399 if (DAG.getTargetLoweringInfo().isLittleEndian()) 9400 StOffset = ByteShift; 9401 else 9402 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 9403 9404 SDValue Ptr = St->getBasePtr(); 9405 if (StOffset) { 9406 Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), 9407 Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); 9408 NewAlign = MinAlign(NewAlign, StOffset); 9409 } 9410 9411 // Truncate down to the new size. 9412 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); 9413 9414 ++OpsNarrowed; 9415 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, 9416 St->getPointerInfo().getWithOffset(StOffset), 9417 false, false, NewAlign).getNode(); 9418} 9419 9420 9421/// Look for sequence of load / op / store where op is one of 'or', 'xor', and 9422/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try 9423/// narrowing the load and store if it would end up being a win for performance 9424/// or code size. 9425SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode N) { 9426* StoreSDNode ST = cast<StoreSDNode>(N); 9427* if (ST->isVolatile()) 9428 return SDValue(); 9429 9430 SDValue Chain = ST->getChain(); 9431 SDValue Value = ST->getValue(); 9432 SDValue Ptr = ST->getBasePtr(); 9433 EVT VT = Value.getValueType(); 9434 9435 if (ST->isTruncatingStore() \|\| VT.isVector() \|\| !Value.hasOneUse()) 9436 return SDValue(); 9437 9438 unsigned Opc = Value.getOpcode(); 9439 9440 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 9441 // is a byte mask indicating a consecutive number of bytes, check to see if 9442 // Y is known to provide just those bytes. If so, we try to replace the 9443 // load + replace + store sequence with a single (narrower) store, which makes 9444 // the load dead. 9445 if (Opc == ISD::OR) { 9446 std::pair<unsigned, unsigned> MaskedLoad; 9447 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 9448 if (MaskedLoad.first) 9449 if (SDNode NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 9450* Value.getOperand(1), ST,this)) 9451 return SDValue(NewST, 0); 9452 9453 // Or is commutative, so try swapping X and Y. 9454 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 9455 if (MaskedLoad.first) 9456 if (SDNode NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 9457* Value.getOperand(0), ST,this)) 9458 return SDValue(NewST, 0); 9459 } 9460 9461 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) \|\| 9462 Value.getOperand(1).getOpcode() != ISD::Constant) 9463 return SDValue(); 9464 9465 SDValue N0 = Value.getOperand(0); 9466 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 9467 Chain == SDValue(N0.getNode(), 1)) { 9468 LoadSDNode LD = cast<LoadSDNode>(N0); 9469* if (LD->getBasePtr() != Ptr \|\| 9470 LD->getPointerInfo().getAddrSpace() != 9471 ST->getPointerInfo().getAddrSpace()) 9472 return SDValue(); 9473 9474 // Find the type to narrow it the load / op / store to. 9475 SDValue N1 = Value.getOperand(1); 9476 unsigned BitWidth = N1.getValueSizeInBits(); 9477 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 9478 if (Opc == ISD::AND) 9479 Imm ^= APInt::getAllOnesValue(BitWidth); 9480 if (Imm == 0 \|\| Imm.isAllOnesValue()) 9481 return SDValue(); 9482 unsigned ShAmt = Imm.countTrailingZeros(); 9483 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 9484 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 9485 EVT NewVT = EVT::getIntegerVT(DAG.getContext(), NewBW); 9486* // The narrowing should be profitable, the load/store operation should be 9487 // legal (or custom) and the store size should be equal to the NewVT width. 9488 while (NewBW < BitWidth && 9489 !(TLI.isOperationLegalOrCustom(Opc, NewVT) && 9490 TLI.isNarrowingProfitable(VT, NewVT))) { 9491 NewBW = NextPowerOf2(NewBW); 9492 NewVT = EVT::getIntegerVT(DAG.getContext(), NewBW); 9493* } 9494 if (NewBW >= BitWidth) 9495 return SDValue(); 9496 9497 // If the lsb changed does not start at the type bitwidth boundary, 9498 // start at the previous one. 9499 if (ShAmt % NewBW) 9500 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 9501 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, 9502 std::min(BitWidth, ShAmt + NewBW)); 9503 if ((Imm & Mask) == Imm) { 9504 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 9505 if (Opc == ISD::AND) 9506 NewImm ^= APInt::getAllOnesValue(NewBW); 9507 uint64_t PtrOff = ShAmt / 8; 9508 // For big endian targets, we need to adjust the offset to the pointer to 9509 // load the correct bytes. 9510 if (TLI.isBigEndian()) 9511 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 9512 9513 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 9514 Type NewVTTy = NewVT.getTypeForEVT(DAG.getContext()); 9515 if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) 9516 return SDValue(); 9517 9518 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), 9519 Ptr.getValueType(), Ptr, 9520 DAG.getConstant(PtrOff, Ptr.getValueType())); 9521 SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), 9522 LD->getChain(), NewPtr, 9523 LD->getPointerInfo().getWithOffset(PtrOff), 9524 LD->isVolatile(), LD->isNonTemporal(), 9525 LD->isInvariant(), NewAlign, 9526 LD->getAAInfo()); 9527 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, 9528 DAG.getConstant(NewImm, NewVT)); 9529 SDValue NewST = DAG.getStore(Chain, SDLoc(N), 9530 NewVal, NewPtr, 9531 ST->getPointerInfo().getWithOffset(PtrOff), 9532 false, false, NewAlign); 9533 9534 AddToWorklist(NewPtr.getNode()); 9535 AddToWorklist(NewLD.getNode()); 9536 AddToWorklist(NewVal.getNode()); 9537 WorklistRemover DeadNodes(this); 9538* DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 9539 ++OpsNarrowed; 9540 return NewST; 9541 } 9542 } 9543 9544 return SDValue(); 9545} 9546 9547/// For a given floating point load / store pair, if the load value isn't used 9548/// by any other operations, then consider transforming the pair to integer 9549/// load / store operations if the target deems the transformation profitable. 9550SDValue DAGCombiner::TransformFPLoadStorePair(SDNode N) { 9551* StoreSDNode ST = cast<StoreSDNode>(N); 9552* SDValue Chain = ST->getChain(); 9553 SDValue Value = ST->getValue(); 9554 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 9555 Value.hasOneUse() && 9556 Chain == SDValue(Value.getNode(), 1)) { 9557 LoadSDNode LD = cast<LoadSDNode>(Value); 9558* EVT VT = LD->getMemoryVT(); 9559 if (!VT.isFloatingPoint() \|\| 9560 VT != ST->getMemoryVT() \|\| 9561 LD->isNonTemporal() \|\| 9562 ST->isNonTemporal() \|\| 9563 LD->getPointerInfo().getAddrSpace() != 0 \|\| 9564 ST->getPointerInfo().getAddrSpace() != 0) 9565 return SDValue(); 9566 9567 EVT IntVT = EVT::getIntegerVT(DAG.getContext(), VT.getSizeInBits()); 9568* if (!TLI.isOperationLegal(ISD::LOAD, IntVT) \|\| 9569 !TLI.isOperationLegal(ISD::STORE, IntVT) \|\| 9570 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) \|\| 9571 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 9572 return SDValue(); 9573 9574 unsigned LDAlign = LD->getAlignment(); 9575 unsigned STAlign = ST->getAlignment(); 9576 Type IntVTTy = IntVT.getTypeForEVT(DAG.getContext()); 9577 unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); 9578 if (LDAlign < ABIAlign \|\| STAlign < ABIAlign) 9579 return SDValue(); 9580 9581 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), 9582 LD->getChain(), LD->getBasePtr(), 9583 LD->getPointerInfo(), 9584 false, false, false, LDAlign); 9585 9586 SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), 9587 NewLD, ST->getBasePtr(), 9588 ST->getPointerInfo(), 9589 false, false, STAlign); 9590 9591 AddToWorklist(NewLD.getNode()); 9592 AddToWorklist(NewST.getNode()); 9593 WorklistRemover DeadNodes(this); 9594* DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 9595 ++LdStFP2Int; 9596 return NewST; 9597 } 9598 9599 return SDValue(); 9600} 9601 9602/// Helper struct to parse and store a memory address as base + index + offset. 9603/// We ignore sign extensions when it is safe to do so. 9604/// The following two expressions are not equivalent. To differentiate we need 9605/// to store whether there was a sign extension involved in the index 9606/// computation. 9607/// (load (i64 add (i64 copyfromreg %c) 9608/// (i64 signextend (add (i8 load %index) 9609/// (i8 1)))) 9610/// vs 9611/// 9612/// (load (i64 add (i64 copyfromreg %c) 9613/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) 9614/// (i32 1))))) 9615struct BaseIndexOffset { 9616 SDValue Base; 9617 SDValue Index; 9618 int64_t Offset; 9619 bool IsIndexSignExt; 9620 9621 BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} 9622 9623 BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, 9624 bool IsIndexSignExt) : 9625 Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} 9626 9627 bool equalBaseIndex(const BaseIndexOffset &Other) { 9628 return Other.Base == Base && Other.Index == Index && 9629 Other.IsIndexSignExt == IsIndexSignExt; 9630 } 9631 9632 /// Parses tree in Ptr for base, index, offset addresses. 9633 static BaseIndexOffset match(SDValue Ptr) { 9634 bool IsIndexSignExt = false; 9635 9636 // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD 9637 // instruction, then it could be just the BASE or everything else we don't 9638 // know how to handle. Just use Ptr as BASE and give up. 9639 if (Ptr->getOpcode() != ISD::ADD) 9640 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9641 9642 // We know that we have at least an ADD instruction. Try to pattern match 9643 // the simple case of BASE + OFFSET. 9644 if (isa<ConstantSDNode>(Ptr->getOperand(1))) { 9645 int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); 9646 return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, 9647 IsIndexSignExt); 9648 } 9649 9650 // Inside a loop the current BASE pointer is calculated using an ADD and a 9651 // MUL instruction. In this case Ptr is the actual BASE pointer. 9652 // (i64 add (i64 %array_ptr) 9653 // (i64 mul (i64 %induction_var) 9654 // (i64 %element_size))) 9655 if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) 9656 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9657 9658 // Look at Base + Index + Offset cases. 9659 SDValue Base = Ptr->getOperand(0); 9660 SDValue IndexOffset = Ptr->getOperand(1); 9661 9662 // Skip signextends. 9663 if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { 9664 IndexOffset = IndexOffset->getOperand(0); 9665 IsIndexSignExt = true; 9666 } 9667 9668 // Either the case of Base + Index (no offset) or something else. 9669 if (IndexOffset->getOpcode() != ISD::ADD) 9670 return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); 9671 9672 // Now we have the case of Base + Index + offset. 9673 SDValue Index = IndexOffset->getOperand(0); 9674 SDValue Offset = IndexOffset->getOperand(1); 9675 9676 if (!isa<ConstantSDNode>(Offset)) 9677 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9678 9679 // Ignore signextends. 9680 if (Index->getOpcode() == ISD::SIGN_EXTEND) { 9681 Index = Index->getOperand(0); 9682 IsIndexSignExt = true; 9683 } else IsIndexSignExt = false; 9684 9685 int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); 9686 return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); 9687 } 9688}; 9689 9690/// Holds a pointer to an LSBaseSDNode as well as information on where it 9691/// is located in a sequence of memory operations connected by a chain. 9692struct MemOpLink { 9693 MemOpLink (LSBaseSDNode N, int64_t Offset, unsigned Seq): 9694* MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 9695 // Ptr to the mem node. 9696 LSBaseSDNode MemNode; 9697* // Offset from the base ptr. 9698 int64_t OffsetFromBase; 9699 // What is the sequence number of this mem node. 9700 // Lowest mem operand in the DAG starts at zero. 9701 unsigned SequenceNum; 9702}; 9703 9704bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { 9705 EVT MemVT = St->getMemoryVT(); 9706 int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; 9707 bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). 9708 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); 9709 9710 // Don't merge vectors into wider inputs. 9711 if (MemVT.isVector() \|\| !MemVT.isSimple()) 9712 return false; 9713 9714 // Perform an early exit check. Do not bother looking at stored values that 9715 // are not constants or loads. 9716 SDValue StoredVal = St->getValue(); 9717 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 9718 if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && 9719 !IsLoadSrc) 9720 return false; 9721 9722 // Only look at ends of store sequences. 9723 SDValue Chain = SDValue(St, 0); 9724 if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) 9725 return false; 9726 9727 // This holds the base pointer, index, and the offset in bytes from the base 9728 // pointer. 9729 BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); 9730 9731 // We must have a base and an offset. 9732 if (!BasePtr.Base.getNode()) 9733 return false; 9734 9735 // Do not handle stores to undef base pointers. 9736 if (BasePtr.Base.getOpcode() == ISD::UNDEF) 9737 return false; 9738 9739 // Save the LoadSDNodes that we find in the chain. 9740 // We need to make sure that these nodes do not interfere with 9741 // any of the store nodes. 9742 SmallVector<LSBaseSDNode, 8> AliasLoadNodes; 9743* 9744 // Save the StoreSDNodes that we find in the chain. 9745 SmallVector<MemOpLink, 8> StoreNodes; 9746 9747 // Walk up the chain and look for nodes with offsets from the same 9748 // base pointer. Stop when reaching an instruction with a different kind 9749 // or instruction which has a different base pointer. 9750 unsigned Seq = 0; 9751 StoreSDNode Index = St; 9752* while (Index) { 9753 // If the chain has more than one use, then we can't reorder the mem ops. 9754 if (Index != St && !SDValue(Index, 0)->hasOneUse()) 9755 break; 9756 9757 // Find the base pointer and offset for this memory node. 9758 BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); 9759 9760 // Check that the base pointer is the same as the original one. 9761 if (!Ptr.equalBaseIndex(BasePtr)) 9762 break; 9763 9764 // Check that the alignment is the same. 9765 if (Index->getAlignment() != St->getAlignment()) 9766 break; 9767 9768 // The memory operands must not be volatile. 9769 if (Index->isVolatile() \|\| Index->isIndexed()) 9770 break; 9771 9772 // No truncation. 9773 if (StoreSDNode St = dyn_cast<StoreSDNode>(Index)) 9774* if (St->isTruncatingStore()) 9775 break; 9776 9777 // The stored memory type must be the same. 9778 if (Index->getMemoryVT() != MemVT) 9779 break; 9780 9781 // We do not allow unaligned stores because we want to prevent overriding 9782 // stores. 9783 if (Index->getAlignment()8 != MemVT.getSizeInBits()) 9784* break; 9785 9786 // We found a potential memory operand to merge. 9787 StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); 9788 9789 // Find the next memory operand in the chain. If the next operand in the 9790 // chain is a store then move up and continue the scan with the next 9791 // memory operand. If the next operand is a load save it and use alias 9792 // information to check if it interferes with anything. 9793 SDNode NextInChain = Index->getChain().getNode(); 9794* while (1) { 9795 if (StoreSDNode STn = dyn_cast<StoreSDNode>(NextInChain)) { 9796* // We found a store node. Use it for the next iteration. 9797 Index = STn; 9798 break; 9799 } else if (LoadSDNode Ldn = dyn_cast<LoadSDNode>(NextInChain)) { 9800* if (Ldn->isVolatile()) { 9801 Index = nullptr; 9802 break; 9803 } 9804 9805 // Save the load node for later. Continue the scan. 9806 AliasLoadNodes.push_back(Ldn); 9807 NextInChain = Ldn->getChain().getNode(); 9808 continue; 9809 } else { 9810 Index = nullptr; 9811 break; 9812 } 9813 } 9814 } 9815 9816 // Check if there is anything to merge. 9817 if (StoreNodes.size() < 2) 9818 return false; 9819 9820 // Sort the memory operands according to their distance from the base pointer. 9821 std::sort(StoreNodes.begin(), StoreNodes.end(), 9822 [](MemOpLink LHS, MemOpLink RHS) { 9823 return LHS.OffsetFromBase < RHS.OffsetFromBase \|\| 9824 (LHS.OffsetFromBase == RHS.OffsetFromBase && 9825 LHS.SequenceNum > RHS.SequenceNum); 9826 }); 9827 9828 // Scan the memory operations on the chain and find the first non-consecutive 9829 // store memory address. 9830 unsigned LastConsecutiveStore = 0; 9831 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 9832 for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { 9833 9834 // Check that the addresses are consecutive starting from the second 9835 // element in the list of stores. 9836 if (i > 0) { 9837 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 9838 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 9839 break; 9840 } 9841 9842 bool Alias = false; 9843 // Check if this store interferes with any of the loads that we found. 9844 for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) 9845 if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { 9846 Alias = true; 9847 break; 9848 } 9849 // We found a load that alias with this store. Stop the sequence. 9850 if (Alias) 9851 break; 9852 9853 // Mark this node as useful. 9854 LastConsecutiveStore = i; 9855 } 9856 9857 // The node with the lowest store address. 9858 LSBaseSDNode FirstInChain = StoreNodes[0].MemNode; 9859* 9860 // Store the constants into memory as one consecutive store. 9861 if (!IsLoadSrc) { 9862 unsigned LastLegalType = 0; 9863 unsigned LastLegalVectorType = 0; 9864 bool NonZero = false; 9865 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 9866 StoreSDNode St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9867* SDValue StoredVal = St->getValue(); 9868 9869 if (ConstantSDNode C = dyn_cast<ConstantSDNode>(StoredVal)) { 9870* NonZero \|= !C->isNullValue(); 9871 } else if (ConstantFPSDNode C = dyn_cast<ConstantFPSDNode>(StoredVal)) { 9872* NonZero \|= !C->getConstantFPValue()->isNullValue(); 9873 } else { 9874 // Non-constant. 9875 break; 9876 } 9877 9878 // Find a legal type for the constant store. 9879 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 9880 EVT StoreTy = EVT::getIntegerVT(DAG.getContext(), StoreBW); 9881* if (TLI.isTypeLegal(StoreTy)) 9882 LastLegalType = i+1; 9883 // Or check whether a truncstore is legal. 9884 else if (TLI.getTypeAction(DAG.getContext(), StoreTy) == 9885* TargetLowering::TypePromoteInteger) { 9886 EVT LegalizedStoredValueTy = 9887 TLI.getTypeToTransformTo(DAG.getContext(), StoredVal.getValueType()); 9888* if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) 9889 LastLegalType = i+1; 9890 } 9891 9892 // Find a legal type for the vector store. 9893 EVT Ty = EVT::getVectorVT(DAG.getContext(), MemVT, i+1); 9894* if (TLI.isTypeLegal(Ty)) 9895 LastLegalVectorType = i + 1; 9896 } 9897 9898 // We only use vectors if the constant is known to be zero and the 9899 // function is not marked with the noimplicitfloat attribute. 9900 if (NonZero \|\| NoVectors) 9901 LastLegalVectorType = 0; 9902 9903 // Check if we found a legal integer type to store. 9904 if (LastLegalType == 0 && LastLegalVectorType == 0) 9905 return false; 9906 9907 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; 9908 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; 9909 9910 // Make sure we have something to merge. 9911 if (NumElem < 2) 9912 return false; 9913 9914 unsigned EarliestNodeUsed = 0; 9915 for (unsigned i=0; i < NumElem; ++i) { 9916 // Find a chain for the new wide-store operand. Notice that some 9917 // of the store nodes that we found may not be selected for inclusion 9918 // in the wide store. The chain we use needs to be the chain of the 9919 // earliest store node which is used and replaced by the wide store. 9920 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 9921 EarliestNodeUsed = i; 9922 } 9923 9924 // The earliest Node in the DAG. 9925 LSBaseSDNode EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 9926* SDLoc DL(StoreNodes[0].MemNode); 9927 9928 SDValue StoredVal; 9929 if (UseVector) { 9930 // Find a legal type for the vector store. 9931 EVT Ty = EVT::getVectorVT(DAG.getContext(), MemVT, NumElem); 9932* assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); 9933 StoredVal = DAG.getConstant(0, Ty); 9934 } else { 9935 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 9936 APInt StoreInt(StoreBW, 0); 9937 9938 // Construct a single integer constant which is made of the smaller 9939 // constant inputs. 9940 bool IsLE = TLI.isLittleEndian(); 9941 for (unsigned i = 0; i < NumElem ; ++i) { 9942 unsigned Idx = IsLE ?(NumElem - 1 - i) : i; 9943 StoreSDNode St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 9944* SDValue Val = St->getValue(); 9945 StoreInt<<=ElementSizeBytes8; 9946* if (ConstantSDNode C = dyn_cast<ConstantSDNode>(Val)) { 9947* StoreInt\|=C->getAPIntValue().zext(StoreBW); 9948 } else if (ConstantFPSDNode C = dyn_cast<ConstantFPSDNode>(Val)) { 9949* StoreInt\|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); 9950 } else { 9951 llvm_unreachable("Invalid constant element type"); 9952 } 9953 } 9954 9955 // Create the new Load and Store operations. 9956 EVT StoreTy = EVT::getIntegerVT(DAG.getContext(), StoreBW); 9957* StoredVal = DAG.getConstant(StoreInt, StoreTy); 9958 } 9959 9960 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, 9961 FirstInChain->getBasePtr(), 9962 FirstInChain->getPointerInfo(), 9963 false, false, 9964 FirstInChain->getAlignment()); 9965 9966 // Replace the first store with the new store 9967 CombineTo(EarliestOp, NewStore); 9968 // Erase all other stores. 9969 for (unsigned i = 0; i < NumElem ; ++i) { 9970 if (StoreNodes[i].MemNode == EarliestOp) 9971 continue; 9972 StoreSDNode St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9973* // ReplaceAllUsesWith will replace all uses that existed when it was 9974 // called, but graph optimizations may cause new ones to appear. For 9975 // example, the case in pr14333 looks like 9976 // 9977 // St's chain -> St -> another store -> X 9978 // 9979 // And the only difference from St to the other store is the chain. 9980 // When we change it's chain to be St's chain they become identical, 9981 // get CSEed and the net result is that X is now a use of St. 9982 // Since we know that St is redundant, just iterate. 9983 while (!St->use_empty()) 9984 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); 9985 deleteAndRecombine(St); 9986 } 9987 9988 return true; 9989 } 9990 9991 // Below we handle the case of multiple consecutive stores that 9992 // come from multiple consecutive loads. We merge them into a single 9993 // wide load and a single wide store. 9994 9995 // Look for load nodes which are used by the stored values. 9996 SmallVector<MemOpLink, 8> LoadNodes; 9997 9998 // Find acceptable loads. Loads need to have the same chain (token factor), 9999 // must not be zext, volatile, indexed, and they must be consecutive. 10000 BaseIndexOffset LdBasePtr; 10001 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 10002 StoreSDNode St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10003* LoadSDNode Ld = dyn_cast<LoadSDNode>(St->getValue()); 10004* if (!Ld) break; 10005 10006 // Loads must only have one use. 10007 if (!Ld->hasNUsesOfValue(1, 0)) 10008 break; 10009 10010 // Check that the alignment is the same as the stores. 10011 if (Ld->getAlignment() != St->getAlignment()) 10012 break; 10013 10014 // The memory operands must not be volatile. 10015 if (Ld->isVolatile() \|\| Ld->isIndexed()) 10016 break; 10017 10018 // We do not accept ext loads. 10019 if (Ld->getExtensionType() != ISD::NON_EXTLOAD) 10020 break; 10021 10022 // The stored memory type must be the same. 10023 if (Ld->getMemoryVT() != MemVT) 10024 break; 10025 10026 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); 10027 // If this is not the first ptr that we check. 10028 if (LdBasePtr.Base.getNode()) { 10029 // The base ptr must be the same. 10030 if (!LdPtr.equalBaseIndex(LdBasePtr)) 10031 break; 10032 } else { 10033 // Check that all other base pointers are the same as this one. 10034 LdBasePtr = LdPtr; 10035 } 10036 10037 // We found a potential memory operand to merge. 10038 LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); 10039 } 10040 10041 if (LoadNodes.size() < 2) 10042 return false; 10043 10044 // If we have load/store pair instructions and we only have two values, 10045 // don't bother. 10046 unsigned RequiredAlignment; 10047 if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && 10048 St->getAlignment() >= RequiredAlignment) 10049 return false; 10050 10051 // Scan the memory operations on the chain and find the first non-consecutive 10052 // load memory address. These variables hold the index in the store node 10053 // array. 10054 unsigned LastConsecutiveLoad = 0; 10055 // This variable refers to the size and not index in the array. 10056 unsigned LastLegalVectorType = 0; 10057 unsigned LastLegalIntegerType = 0; 10058 StartAddress = LoadNodes[0].OffsetFromBase; 10059 SDValue FirstChain = LoadNodes[0].MemNode->getChain(); 10060 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 10061 // All loads much share the same chain. 10062 if (LoadNodes[i].MemNode->getChain() != FirstChain) 10063 break; 10064 10065 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 10066 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 10067 break; 10068 LastConsecutiveLoad = i; 10069 10070 // Find a legal type for the vector store. 10071 EVT StoreTy = EVT::getVectorVT(DAG.getContext(), MemVT, i+1); 10072* if (TLI.isTypeLegal(StoreTy)) 10073 LastLegalVectorType = i + 1; 10074 10075 // Find a legal type for the integer store. 10076 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 10077 StoreTy = EVT::getIntegerVT(DAG.getContext(), StoreBW); 10078* if (TLI.isTypeLegal(StoreTy)) 10079 LastLegalIntegerType = i + 1; 10080 // Or check whether a truncstore and extload is legal. 10081 else if (TLI.getTypeAction(DAG.getContext(), StoreTy) == 10082* TargetLowering::TypePromoteInteger) { 10083 EVT LegalizedStoredValueTy = 10084 TLI.getTypeToTransformTo(DAG.getContext(), StoreTy); 10085* if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && 10086 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && 10087 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && 10088 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) 10089 LastLegalIntegerType = i+1; 10090 } 10091 } 10092 10093 // Only use vector types if the vector type is larger than the integer type. 10094 // If they are the same, use integers. 10095 bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; 10096 unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); 10097 10098 // We add +1 here because the LastXXX variables refer to location while 10099 // the NumElem refers to array/index size. 10100 unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; 10101 NumElem = std::min(LastLegalType, NumElem); 10102 10103 if (NumElem < 2) 10104 return false; 10105 10106 // The earliest Node in the DAG. 10107 unsigned EarliestNodeUsed = 0; 10108 LSBaseSDNode EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 10109* for (unsigned i=1; i<NumElem; ++i) { 10110 // Find a chain for the new wide-store operand. Notice that some 10111 // of the store nodes that we found may not be selected for inclusion 10112 // in the wide store. The chain we use needs to be the chain of the 10113 // earliest store node which is used and replaced by the wide store. 10114 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 10115 EarliestNodeUsed = i; 10116 } 10117 10118 // Find if it is better to use vectors or integers to load and store 10119 // to memory. 10120 EVT JointMemOpVT; 10121 if (UseVectorTy) { 10122 JointMemOpVT = EVT::getVectorVT(DAG.getContext(), MemVT, NumElem); 10123* } else { 10124 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 10125 JointMemOpVT = EVT::getIntegerVT(DAG.getContext(), StoreBW); 10126* } 10127 10128 SDLoc LoadDL(LoadNodes[0].MemNode); 10129 SDLoc StoreDL(StoreNodes[0].MemNode); 10130 10131 LoadSDNode FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 10132* SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, 10133 FirstLoad->getChain(), 10134 FirstLoad->getBasePtr(), 10135 FirstLoad->getPointerInfo(), 10136 false, false, false, 10137 FirstLoad->getAlignment()); 10138 10139 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, 10140 FirstInChain->getBasePtr(), 10141 FirstInChain->getPointerInfo(), false, false, 10142 FirstInChain->getAlignment()); 10143 10144 // Replace one of the loads with the new load. 10145 LoadSDNode Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); 10146* DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 10147 SDValue(NewLoad.getNode(), 1)); 10148 10149 // Remove the rest of the load chains. 10150 for (unsigned i = 1; i < NumElem ; ++i) { 10151 // Replace all chain users of the old load nodes with the chain of the new 10152 // load node. 10153 LoadSDNode Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 10154* DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); 10155 } 10156 10157 // Replace the first store with the new store. 10158 CombineTo(EarliestOp, NewStore); 10159 // Erase all other stores. 10160 for (unsigned i = 0; i < NumElem ; ++i) { 10161 // Remove all Store nodes. 10162 if (StoreNodes[i].MemNode == EarliestOp) 10163 continue; 10164 StoreSDNode St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10165* DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); 10166 deleteAndRecombine(St); 10167 } 10168 10169 return true; 10170} 10171 10172SDValue DAGCombiner::visitSTORE(SDNode N) { 10173* StoreSDNode ST = cast<StoreSDNode>(N); 10174* SDValue Chain = ST->getChain(); 10175 SDValue Value = ST->getValue(); 10176 SDValue Ptr = ST->getBasePtr(); 10177 10178 // If this is a store of a bit convert, store the input value if the 10179 // resultant store does not need a higher alignment than the original. 10180 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 10181 ST->isUnindexed()) { 10182 unsigned OrigAlign = ST->getAlignment(); 10183 EVT SVT = Value.getOperand(0).getValueType(); 10184 unsigned Align = TLI.getDataLayout()-> 10185 getABITypeAlignment(SVT.getTypeForEVT(DAG.getContext())); 10186* if (Align <= OrigAlign && 10187 ((!LegalOperations && !ST->isVolatile()) \|\| 10188 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 10189 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), 10190 Ptr, ST->getPointerInfo(), ST->isVolatile(), 10191 ST->isNonTemporal(), OrigAlign, 10192 ST->getAAInfo()); 10193 } 10194 10195 // Turn 'store undef, Ptr' -> nothing. 10196 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 10197 return Chain; 10198 10199 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 10200 if (ConstantFPSDNode CFP = dyn_cast<ConstantFPSDNode>(Value)) { 10201* // NOTE: If the original store is volatile, this transform must not increase 10202 // the number of stores. For example, on x86-32 an f64 can be stored in one 10203 // processor operation but an i64 (which is not legal) requires two. So the 10204 // transform should not be done in this case. 10205 if (Value.getOpcode() != ISD::TargetConstantFP) { 10206 SDValue Tmp; 10207 switch (CFP->getSimpleValueType(0).SimpleTy) { 10208 default: llvm_unreachable("Unknown FP type"); 10209 case MVT::f16: // We don't do this for these yet. 10210 case MVT::f80: 10211 case MVT::f128: 10212 case MVT::ppcf128: 10213 break; 10214 case MVT::f32: 10215 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) \|\| 10216 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 10217 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 10218 bitcastToAPInt().getZExtValue(), MVT::i32); 10219 return DAG.getStore(Chain, SDLoc(N), Tmp, 10220 Ptr, ST->getMemOperand()); 10221 } 10222 break; 10223 case MVT::f64: 10224 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 10225 !ST->isVolatile()) \|\| 10226 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 10227 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 10228 getZExtValue(), MVT::i64); 10229 return DAG.getStore(Chain, SDLoc(N), Tmp, 10230 Ptr, ST->getMemOperand()); 10231 } 10232 10233 if (!ST->isVolatile() && 10234 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 10235 // Many FP stores are not made apparent until after legalize, e.g. for 10236 // argument passing. Since this is so common, custom legalize the 10237 // 64-bit integer store into two 32-bit stores. 10238 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 10239 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); 10240 SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); 10241 if (TLI.isBigEndian()) std::swap(Lo, Hi); 10242 10243 unsigned Alignment = ST->getAlignment(); 10244 bool isVolatile = ST->isVolatile(); 10245 bool isNonTemporal = ST->isNonTemporal(); 10246 AAMDNodes AAInfo = ST->getAAInfo(); 10247 10248 SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, 10249 Ptr, ST->getPointerInfo(), 10250 isVolatile, isNonTemporal, 10251 ST->getAlignment(), AAInfo); 10252 Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, 10253 DAG.getConstant(4, Ptr.getValueType())); 10254 Alignment = MinAlign(Alignment, 4U); 10255 SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, 10256 Ptr, ST->getPointerInfo().getWithOffset(4), 10257 isVolatile, isNonTemporal, 10258 Alignment, AAInfo); 10259 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, 10260 St0, St1); 10261 } 10262 10263 break; 10264 } 10265 } 10266 } 10267 10268 // Try to infer better alignment information than the store already has. 10269 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 10270 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 10271 if (Align > ST->getAlignment()) 10272 return DAG.getTruncStore(Chain, SDLoc(N), Value, 10273 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 10274 ST->isVolatile(), ST->isNonTemporal(), Align, 10275 ST->getAAInfo()); 10276 } 10277 } 10278 10279 // Try transforming a pair floating point load / store ops to integer 10280 // load / store ops. 10281 SDValue NewST = TransformFPLoadStorePair(N); 10282 if (NewST.getNode()) 10283 return NewST; 10284 10285 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 10286 : DAG.getSubtarget().useAA(); 10287#ifndef NDEBUG 10288 if (CombinerAAOnlyFunc.getNumOccurrences() && 10289 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 10290 UseAA = false; 10291#endif 10292 if (UseAA && ST->isUnindexed()) { 10293 // Walk up chain skipping non-aliasing memory nodes. 10294 SDValue BetterChain = FindBetterChain(N, Chain); 10295 10296 // If there is a better chain. 10297 if (Chain != BetterChain) { 10298 SDValue ReplStore; 10299 10300 // Replace the chain to avoid dependency. 10301 if (ST->isTruncatingStore()) { 10302 ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, 10303 ST->getMemoryVT(), ST->getMemOperand()); 10304 } else { 10305 ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, 10306 ST->getMemOperand()); 10307 } 10308 10309 // Create token to keep both nodes around. 10310 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 10311 MVT::Other, Chain, ReplStore); 10312 10313 // Make sure the new and old chains are cleaned up. 10314 AddToWorklist(Token.getNode()); 10315 10316 // Don't add users to work list. 10317 return CombineTo(N, Token, false); 10318 } 10319 } 10320 10321 // Try transforming N to an indexed store. 10322 if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N)) 10323 return SDValue(N, 0); 10324 10325 // FIXME: is there such a thing as a truncating indexed store? 10326 if (ST->isTruncatingStore() && ST->isUnindexed() && 10327 Value.getValueType().isInteger()) { 10328 // See if we can simplify the input to this truncstore with knowledge that 10329 // only the low bits are being used. For example: 10330 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 10331 SDValue Shorter = 10332 GetDemandedBits(Value, 10333 APInt::getLowBitsSet( 10334 Value.getValueType().getScalarType().getSizeInBits(), 10335 ST->getMemoryVT().getScalarType().getSizeInBits())); 10336 AddToWorklist(Value.getNode()); 10337 if (Shorter.getNode()) 10338 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, 10339 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 10340 10341 // Otherwise, see if we can simplify the operation with 10342 // SimplifyDemandedBits, which only works if the value has a single use. 10343 if (SimplifyDemandedBits(Value, 10344 APInt::getLowBitsSet( 10345 Value.getValueType().getScalarType().getSizeInBits(), 10346 ST->getMemoryVT().getScalarType().getSizeInBits()))) 10347 return SDValue(N, 0); 10348 } 10349 10350 // If this is a load followed by a store to the same location, then the store 10351 // is dead/noop. 10352 if (LoadSDNode Ld = dyn_cast<LoadSDNode>(Value)) { 10353* if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 10354 ST->isUnindexed() && !ST->isVolatile() && 10355 // There can't be any side effects between the load and store, such as 10356 // a call or store. 10357 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 10358 // The store is dead, remove it. 10359 return Chain; 10360 } 10361 } 10362 10363 // If this is a store followed by a store with the same value to the same 10364 // location, then the store is dead/noop. 10365 if (StoreSDNode ST1 = dyn_cast<StoreSDNode>(Chain)) { 10366* if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && 10367 ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && 10368 ST1->isUnindexed() && !ST1->isVolatile()) { 10369 // The store is dead, remove it. 10370 return Chain; 10371 } 10372 } 10373 10374 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 10375 // truncating store. We can do this even if this is already a truncstore. 10376 if ((Value.getOpcode() == ISD::FP_ROUND \|\| Value.getOpcode() == ISD::TRUNCATE) 10377 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 10378 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 10379 ST->getMemoryVT())) { 10380 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), 10381 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 10382 } 10383 10384 // Only perform this optimization before the types are legal, because we 10385 // don't want to perform this optimization on every DAGCombine invocation. 10386 if (!LegalTypes) { 10387 bool EverChanged = false; 10388 10389 do { 10390 // There can be multiple store sequences on the same chain. 10391 // Keep trying to merge store sequences until we are unable to do so 10392 // or until we merge the last store on the chain. 10393 bool Changed = MergeConsecutiveStores(ST); 10394 EverChanged \|= Changed; 10395 if (!Changed) break; 10396 } while (ST->getOpcode() != ISD::DELETED_NODE); 10397 10398 if (EverChanged) 10399 return SDValue(N, 0); 10400 } 10401 10402 return ReduceLoadOpStoreWidth(N); 10403} 10404 10405SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode N) { 10406* SDValue InVec = N->getOperand(0); 10407 SDValue InVal = N->getOperand(1); 10408 SDValue EltNo = N->getOperand(2); 10409 SDLoc dl(N); 10410 10411 // If the inserted element is an UNDEF, just use the input vector. 10412 if (InVal.getOpcode() == ISD::UNDEF) 10413 return InVec; 10414 10415 EVT VT = InVec.getValueType(); 10416 10417 // If we can't generate a legal BUILD_VECTOR, exit 10418 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 10419 return SDValue(); 10420 10421 // Check that we know which element is being inserted 10422 if (!isa<ConstantSDNode>(EltNo)) 10423 return SDValue(); 10424 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10425 10426 // Canonicalize insert_vector_elt dag nodes. 10427 // Example: 10428 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) 10429 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) 10430 // 10431 // Do this only if the child insert_vector node has one use; also 10432 // do this only if indices are both constants and Idx1 < Idx0. 10433 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() 10434 && isa<ConstantSDNode>(InVec.getOperand(2))) { 10435 unsigned OtherElt = 10436 cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); 10437 if (Elt < OtherElt) { 10438 // Swap nodes. 10439 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, 10440 InVec.getOperand(0), InVal, EltNo); 10441 AddToWorklist(NewOp.getNode()); 10442 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), 10443 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); 10444 } 10445 } 10446 10447 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 10448 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 10449 // vector elements. 10450 SmallVector<SDValue, 8> Ops; 10451 // Do not combine these two vectors if the output vector will not replace 10452 // the input vector. 10453 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { 10454 Ops.append(InVec.getNode()->op_begin(), 10455 InVec.getNode()->op_end()); 10456 } else if (InVec.getOpcode() == ISD::UNDEF) { 10457 unsigned NElts = VT.getVectorNumElements(); 10458 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 10459 } else { 10460 return SDValue(); 10461 } 10462 10463 // Insert the element 10464 if (Elt < Ops.size()) { 10465 // All the operands of BUILD_VECTOR must have the same type; 10466 // we enforce that here. 10467 EVT OpVT = Ops[0].getValueType(); 10468 if (InVal.getValueType() != OpVT) 10469 InVal = OpVT.bitsGT(InVal.getValueType()) ? 10470 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 10471 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 10472 Ops[Elt] = InVal; 10473 } 10474 10475 // Return the new vector 10476 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); 10477} 10478 10479SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 10480 SDNode EVE, EVT InVecVT, SDValue EltNo, LoadSDNode OriginalLoad) { 10481 EVT ResultVT = EVE->getValueType(0); 10482 EVT VecEltVT = InVecVT.getVectorElementType(); 10483 unsigned Align = OriginalLoad->getAlignment(); 10484 unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( 10485 VecEltVT.getTypeForEVT(DAG.getContext())); 10486* 10487 if (NewAlign > Align \|\| !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) 10488 return SDValue(); 10489 10490 Align = NewAlign; 10491 10492 SDValue NewPtr = OriginalLoad->getBasePtr(); 10493 SDValue Offset; 10494 EVT PtrType = NewPtr.getValueType(); 10495 MachinePointerInfo MPI; 10496 if (auto ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { 10497* int Elt = ConstEltNo->getZExtValue(); 10498 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; 10499 if (TLI.isBigEndian()) 10500 PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; 10501 Offset = DAG.getConstant(PtrOff, PtrType); 10502 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); 10503 } else { 10504 Offset = DAG.getNode( 10505 ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, 10506 DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); 10507 if (TLI.isBigEndian()) 10508 Offset = DAG.getNode( 10509 ISD::SUB, SDLoc(EVE), EltNo.getValueType(), 10510 DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); 10511 MPI = OriginalLoad->getPointerInfo(); 10512 } 10513 NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); 10514 10515 // The replacement we need to do here is a little tricky: we need to 10516 // replace an extractelement of a load with a load. 10517 // Use ReplaceAllUsesOfValuesWith to do the replacement. 10518 // Note that this replacement assumes that the extractvalue is the only 10519 // use of the load; that's okay because we don't want to perform this 10520 // transformation in other cases anyway. 10521 SDValue Load; 10522 SDValue Chain; 10523 if (ResultVT.bitsGT(VecEltVT)) { 10524 // If the result type of vextract is wider than the load, then issue an 10525 // extending load instead. 10526 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, 10527 VecEltVT) 10528 ? ISD::ZEXTLOAD 10529 : ISD::EXTLOAD; 10530 Load = DAG.getExtLoad( 10531 ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, 10532 VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 10533 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 10534 Chain = Load.getValue(1); 10535 } else { 10536 Load = DAG.getLoad( 10537 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, 10538 OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 10539 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 10540 Chain = Load.getValue(1); 10541 if (ResultVT.bitsLT(VecEltVT)) 10542 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); 10543 else 10544 Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); 10545 } 10546 WorklistRemover DeadNodes(this); 10547* SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; 10548 SDValue To[] = { Load, Chain }; 10549 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 10550 // Since we're explicitly calling ReplaceAllUses, add the new node to the 10551 // worklist explicitly as well. 10552 AddToWorklist(Load.getNode()); 10553 AddUsersToWorklist(Load.getNode()); // Add users too 10554 // Make sure to revisit this node to clean it up; it will usually be dead. 10555 AddToWorklist(EVE); 10556 ++OpsNarrowed; 10557 return SDValue(EVE, 0); 10558} 10559 10560SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode N) { 10561* // (vextract (scalar_to_vector val, 0) -> val 10562 SDValue InVec = N->getOperand(0); 10563 EVT VT = InVec.getValueType(); 10564 EVT NVT = N->getValueType(0); 10565 10566 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 10567 // Check if the result type doesn't match the inserted element type. A 10568 // SCALAR_TO_VECTOR may truncate the inserted element and the 10569 // EXTRACT_VECTOR_ELT may widen the extracted vector. 10570 SDValue InOp = InVec.getOperand(0); 10571 if (InOp.getValueType() != NVT) { 10572 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 10573 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); 10574 } 10575 return InOp; 10576 } 10577 10578 SDValue EltNo = N->getOperand(1); 10579 bool ConstEltNo = isa<ConstantSDNode>(EltNo); 10580 10581 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 10582 // We only perform this optimization before the op legalization phase because 10583 // we may introduce new vector instructions which are not backed by TD 10584 // patterns. For example on AVX, extracting elements from a wide vector 10585 // without using extract_subvector. However, if we can find an underlying 10586 // scalar value, then we can always use that. 10587 if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE 10588 && ConstEltNo) { 10589 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10590 int NumElem = VT.getVectorNumElements(); 10591 ShuffleVectorSDNode SVOp = cast<ShuffleVectorSDNode>(InVec); 10592* // Find the new index to extract from. 10593 int OrigElt = SVOp->getMaskElt(Elt); 10594 10595 // Extracting an undef index is undef. 10596 if (OrigElt == -1) 10597 return DAG.getUNDEF(NVT); 10598 10599 // Select the right vector half to extract from. 10600 SDValue SVInVec; 10601 if (OrigElt < NumElem) { 10602 SVInVec = InVec->getOperand(0); 10603 } else { 10604 SVInVec = InVec->getOperand(1); 10605 OrigElt -= NumElem; 10606 } 10607 10608 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { 10609 SDValue InOp = SVInVec.getOperand(OrigElt); 10610 if (InOp.getValueType() != NVT) { 10611 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 10612 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); 10613 } 10614 10615 return InOp; 10616 } 10617 10618 // FIXME: We should handle recursing on other vector shuffles and 10619 // scalar_to_vector here as well. 10620 10621 if (!LegalOperations) { 10622 EVT IndexTy = TLI.getVectorIdxTy(); 10623 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, 10624 SVInVec, DAG.getConstant(OrigElt, IndexTy)); 10625 } 10626 } 10627 10628 bool BCNumEltsChanged = false; 10629 EVT ExtVT = VT.getVectorElementType(); 10630 EVT LVT = ExtVT; 10631 10632 // If the result of load has to be truncated, then it's not necessarily 10633 // profitable. 10634 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 10635 return SDValue(); 10636 10637 if (InVec.getOpcode() == ISD::BITCAST) { 10638 // Don't duplicate a load with other uses. 10639 if (!InVec.hasOneUse()) 10640 return SDValue(); 10641 10642 EVT BCVT = InVec.getOperand(0).getValueType(); 10643 if (!BCVT.isVector() \|\| ExtVT.bitsGT(BCVT.getVectorElementType())) 10644 return SDValue(); 10645 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 10646 BCNumEltsChanged = true; 10647 InVec = InVec.getOperand(0); 10648 ExtVT = BCVT.getVectorElementType(); 10649 } 10650 10651 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) 10652 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && 10653 ISD::isNormalLoad(InVec.getNode()) && 10654 !N->getOperand(1)->hasPredecessor(InVec.getNode())) { 10655 SDValue Index = N->getOperand(1); 10656 if (LoadSDNode OrigLoad = dyn_cast<LoadSDNode>(InVec)) 10657* return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, 10658 OrigLoad); 10659 } 10660 10661 // Perform only after legalization to ensure build_vector / vector_shuffle 10662 // optimizations have already been done. 10663 if (!LegalOperations) return SDValue(); 10664 10665 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+csize) 10666* // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+csize) 10667* // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 10668 10669 if (ConstEltNo) { 10670 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10671 10672 LoadSDNode LN0 = nullptr; 10673* const ShuffleVectorSDNode SVN = nullptr; 10674* if (ISD::isNormalLoad(InVec.getNode())) { 10675 LN0 = cast<LoadSDNode>(InVec); 10676 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 10677 InVec.getOperand(0).getValueType() == ExtVT && 10678 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 10679 // Don't duplicate a load with other uses. 10680 if (!InVec.hasOneUse()) 10681 return SDValue(); 10682 10683 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 10684 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 10685 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 10686 // => 10687 // (load $addr+1size) 10688* 10689 // Don't duplicate a load with other uses. 10690 if (!InVec.hasOneUse()) 10691 return SDValue(); 10692 10693 // If the bit convert changed the number of elements, it is unsafe 10694 // to examine the mask. 10695 if (BCNumEltsChanged) 10696 return SDValue(); 10697 10698 // Select the input vector, guarding against out of range extract vector. 10699 unsigned NumElems = VT.getVectorNumElements(); 10700 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 10701 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 10702 10703 if (InVec.getOpcode() == ISD::BITCAST) { 10704 // Don't duplicate a load with other uses. 10705 if (!InVec.hasOneUse()) 10706 return SDValue(); 10707 10708 InVec = InVec.getOperand(0); 10709 } 10710 if (ISD::isNormalLoad(InVec.getNode())) { 10711 LN0 = cast<LoadSDNode>(InVec); 10712 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 10713 EltNo = DAG.getConstant(Elt, EltNo.getValueType()); 10714 } 10715 } 10716 10717 // Make sure we found a non-volatile load and the extractelement is 10718 // the only use. 10719 if (!LN0 \|\| !LN0->hasNUsesOfValue(1,0) \|\| LN0->isVolatile()) 10720 return SDValue(); 10721 10722 // If Idx was -1 above, Elt is going to be -1, so just return undef. 10723 if (Elt == -1) 10724 return DAG.getUNDEF(LVT); 10725 10726 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); 10727 } 10728 10729 return SDValue(); 10730} 10731 10732// Simplify (build_vec (ext )) to (bitcast (build_vec )) 10733SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode N) { 10734* // We perform this optimization post type-legalization because 10735 // the type-legalizer often scalarizes integer-promoted vectors. 10736 // Performing this optimization before may create bit-casts which 10737 // will be type-legalized to complex code sequences. 10738 // We perform this optimization only before the operation legalizer because we 10739 // may introduce illegal operations. 10740 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 10741 return SDValue(); 10742 10743 unsigned NumInScalars = N->getNumOperands(); 10744 SDLoc dl(N); 10745 EVT VT = N->getValueType(0); 10746 10747 // Check to see if this is a BUILD_VECTOR of a bunch of values 10748 // which come from any_extend or zero_extend nodes. If so, we can create 10749 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 10750 // optimizations. We do not handle sign-extend because we can't fill the sign 10751 // using shuffles. 10752 EVT SourceType = MVT::Other; 10753 bool AllAnyExt = true; 10754 10755 for (unsigned i = 0; i != NumInScalars; ++i) { 10756 SDValue In = N->getOperand(i); 10757 // Ignore undef inputs. 10758 if (In.getOpcode() == ISD::UNDEF) continue; 10759 10760 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 10761 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 10762 10763 // Abort if the element is not an extension. 10764 if (!ZeroExt && !AnyExt) { 10765 SourceType = MVT::Other; 10766 break; 10767 } 10768 10769 // The input is a ZeroExt or AnyExt. Check the original type. 10770 EVT InTy = In.getOperand(0).getValueType(); 10771 10772 // Check that all of the widened source types are the same. 10773 if (SourceType == MVT::Other) 10774 // First time. 10775 SourceType = InTy; 10776 else if (InTy != SourceType) { 10777 // Multiple income types. Abort. 10778 SourceType = MVT::Other; 10779 break; 10780 } 10781 10782 // Check if all of the extends are ANY_EXTENDs. 10783 AllAnyExt &= AnyExt; 10784 } 10785 10786 // In order to have valid types, all of the inputs must be extended from the 10787 // same source type and all of the inputs must be any or zero extend. 10788 // Scalar sizes must be a power of two. 10789 EVT OutScalarTy = VT.getScalarType(); 10790 bool ValidTypes = SourceType != MVT::Other && 10791 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 10792 isPowerOf2_32(SourceType.getSizeInBits()); 10793 10794 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 10795 // turn into a single shuffle instruction. 10796 if (!ValidTypes) 10797 return SDValue(); 10798 10799 bool isLE = TLI.isLittleEndian(); 10800 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 10801 assert(ElemRatio > 1 && "Invalid element size ratio"); 10802 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 10803 DAG.getConstant(0, SourceType); 10804 10805 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 10806 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 10807 10808 // Populate the new build_vector 10809 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 10810 SDValue Cast = N->getOperand(i); 10811 assert((Cast.getOpcode() == ISD::ANY_EXTEND \|\| 10812 Cast.getOpcode() == ISD::ZERO_EXTEND \|\| 10813 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 10814 SDValue In; 10815 if (Cast.getOpcode() == ISD::UNDEF) 10816 In = DAG.getUNDEF(SourceType); 10817 else 10818 In = Cast->getOperand(0); 10819 unsigned Index = isLE ? (i * ElemRatio) : 10820 (i * ElemRatio + (ElemRatio - 1)); 10821 10822 assert(Index < Ops.size() && "Invalid index"); 10823 Ops[Index] = In; 10824 } 10825 10826 // The type of the new BUILD_VECTOR node. 10827 EVT VecVT = EVT::getVectorVT(DAG.getContext(), SourceType, NewBVElems); 10828* assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 10829 "Invalid vector size"); 10830 // Check if the new vector type is legal. 10831 if (!isTypeLegal(VecVT)) return SDValue(); 10832 10833 // Make the new BUILD_VECTOR. 10834 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); 10835 10836 // The new BUILD_VECTOR node has the potential to be further optimized. 10837 AddToWorklist(BV.getNode()); 10838 // Bitcast to the desired type. 10839 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 10840} 10841 10842SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode N) { 10843* EVT VT = N->getValueType(0); 10844 10845 unsigned NumInScalars = N->getNumOperands(); 10846 SDLoc dl(N); 10847 10848 EVT SrcVT = MVT::Other; 10849 unsigned Opcode = ISD::DELETED_NODE; 10850 unsigned NumDefs = 0; 10851 10852 for (unsigned i = 0; i != NumInScalars; ++i) { 10853 SDValue In = N->getOperand(i); 10854 unsigned Opc = In.getOpcode(); 10855 10856 if (Opc == ISD::UNDEF) 10857 continue; 10858 10859 // If all scalar values are floats and converted from integers. 10860 if (Opcode == ISD::DELETED_NODE && 10861 (Opc == ISD::UINT_TO_FP \|\| Opc == ISD::SINT_TO_FP)) { 10862 Opcode = Opc; 10863 } 10864 10865 if (Opc != Opcode) 10866 return SDValue(); 10867 10868 EVT InVT = In.getOperand(0).getValueType(); 10869 10870 // If all scalar values are typed differently, bail out. It's chosen to 10871 // simplify BUILD_VECTOR of integer types. 10872 if (SrcVT == MVT::Other) 10873 SrcVT = InVT; 10874 if (SrcVT != InVT) 10875 return SDValue(); 10876 NumDefs++; 10877 } 10878 10879 // If the vector has just one element defined, it's not worth to fold it into 10880 // a vectorized one. 10881 if (NumDefs < 2) 10882 return SDValue(); 10883 10884 assert((Opcode == ISD::UINT_TO_FP \|\| Opcode == ISD::SINT_TO_FP) 10885 && "Should only handle conversion from integer to float."); 10886 assert(SrcVT != MVT::Other && "Cannot determine source type!"); 10887 10888 EVT NVT = EVT::getVectorVT(DAG.getContext(), SrcVT, NumInScalars); 10889* 10890 if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) 10891 return SDValue(); 10892 10893 SmallVector<SDValue, 8> Opnds; 10894 for (unsigned i = 0; i != NumInScalars; ++i) { 10895 SDValue In = N->getOperand(i); 10896 10897 if (In.getOpcode() == ISD::UNDEF) 10898 Opnds.push_back(DAG.getUNDEF(SrcVT)); 10899 else 10900 Opnds.push_back(In.getOperand(0)); 10901 } 10902 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); 10903 AddToWorklist(BV.getNode()); 10904 10905 return DAG.getNode(Opcode, dl, VT, BV); 10906} 10907 10908SDValue DAGCombiner::visitBUILD_VECTOR(SDNode N) { 10909* unsigned NumInScalars = N->getNumOperands(); 10910 SDLoc dl(N); 10911 EVT VT = N->getValueType(0); 10912 10913 // A vector built entirely of undefs is undef. 10914 if (ISD::allOperandsUndef(N)) 10915 return DAG.getUNDEF(VT); 10916 10917 SDValue V = reduceBuildVecExtToExtBuildVec(N); 10918 if (V.getNode()) 10919 return V; 10920 10921 V = reduceBuildVecConvertToConvertBuildVec(N); 10922 if (V.getNode()) 10923 return V; 10924 10925 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 10926 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 10927 // at most two distinct vectors, turn this into a shuffle node. 10928 10929 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 10930 if (!isTypeLegal(VT)) 10931 return SDValue(); 10932 10933 // May only combine to shuffle after legalize if shuffle is legal. 10934 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) 10935 return SDValue(); 10936 10937 SDValue VecIn1, VecIn2; 10938 bool UsesZeroVector = false; 10939 for (unsigned i = 0; i != NumInScalars; ++i) { 10940 SDValue Op = N->getOperand(i); 10941 // Ignore undef inputs. 10942 if (Op.getOpcode() == ISD::UNDEF) continue; 10943 10944 // See if we can combine this build_vector into a blend with a zero vector. 10945 if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && 10946 cast<ConstantSDNode>(Op.getNode())->isNullValue()) \|\| 10947 (Op.getOpcode() == ISD::ConstantFP && 10948 cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) { 10949 UsesZeroVector = true; 10950 continue; 10951 } 10952 10953 // If this input is something other than a EXTRACT_VECTOR_ELT with a 10954 // constant index, bail out. 10955 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\| 10956 !isa<ConstantSDNode>(Op.getOperand(1))) { 10957 VecIn1 = VecIn2 = SDValue(nullptr, 0); 10958 break; 10959 } 10960 10961 // We allow up to two distinct input vectors. 10962 SDValue ExtractedFromVec = Op.getOperand(0); 10963 if (ExtractedFromVec == VecIn1 \|\| ExtractedFromVec == VecIn2) 10964 continue; 10965 10966 if (!VecIn1.getNode()) { 10967 VecIn1 = ExtractedFromVec; 10968 } else if (!VecIn2.getNode() && !UsesZeroVector) { 10969 VecIn2 = ExtractedFromVec; 10970 } else { 10971 // Too many inputs. 10972 VecIn1 = VecIn2 = SDValue(nullptr, 0); 10973 break; 10974 } 10975 } 10976 10977 // If everything is good, we can make a shuffle operation. 10978 if (VecIn1.getNode()) { 10979 unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); 10980 SmallVector<int, 8> Mask; 10981 for (unsigned i = 0; i != NumInScalars; ++i) { 10982 unsigned Opcode = N->getOperand(i).getOpcode(); 10983 if (Opcode == ISD::UNDEF) { 10984 Mask.push_back(-1); 10985 continue; 10986 } 10987 10988 // Operands can also be zero. 10989 if (Opcode != ISD::EXTRACT_VECTOR_ELT) { 10990 assert(UsesZeroVector && 10991 (Opcode == ISD::Constant \|\| Opcode == ISD::ConstantFP) && 10992 "Unexpected node found!"); 10993 Mask.push_back(NumInScalars+i); 10994 continue; 10995 } 10996 10997 // If extracting from the first vector, just use the index directly. 10998 SDValue Extract = N->getOperand(i); 10999 SDValue ExtVal = Extract.getOperand(1); 11000 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 11001 if (Extract.getOperand(0) == VecIn1) { 11002 Mask.push_back(ExtIndex); 11003 continue; 11004 } 11005 11006 // Otherwise, use InIdx + InputVecSize 11007 Mask.push_back(InNumElements + ExtIndex); 11008 } 11009 11010 // Avoid introducing illegal shuffles with zero. 11011 if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) 11012 return SDValue(); 11013 11014 // We can't generate a shuffle node with mismatched input and output types. 11015 // Attempt to transform a single input vector to the correct type. 11016 if ((VT != VecIn1.getValueType())) { 11017 // If the input vector type has a different base type to the output 11018 // vector type, bail out. 11019 EVT VTElemType = VT.getVectorElementType(); 11020 if ((VecIn1.getValueType().getVectorElementType() != VTElemType) \|\| 11021 (VecIn2.getNode() && 11022 (VecIn2.getValueType().getVectorElementType() != VTElemType))) 11023 return SDValue(); 11024 11025 // If the input vector is too small, widen it. 11026 // We only support widening of vectors which are half the size of the 11027 // output registers. For example XMM->YMM widening on X86 with AVX. 11028 EVT VecInT = VecIn1.getValueType(); 11029 if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { 11030 // If we only have one small input, widen it by adding undef values. 11031 if (!VecIn2.getNode()) 11032 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, 11033 DAG.getUNDEF(VecIn1.getValueType())); 11034 else if (VecIn1.getValueType() == VecIn2.getValueType()) { 11035 // If we have two small inputs of the same type, try to concat them. 11036 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); 11037 VecIn2 = SDValue(nullptr, 0); 11038 } else 11039 return SDValue(); 11040 } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { 11041 // If the input vector is too large, try to split it. 11042 // We don't support having two input vectors that are too large.
11046 if (VecIn2.getNode())	11043 // If the zero vector was used, we can not split the vector, 11044 // since we'd need 3 inputs. 11045 if (UsesZeroVector \|\| VecIn2.getNode())
11047 return SDValue(); 11048 11049 if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) 11050 return SDValue(); 11051 11052 // Try to replace VecIn1 with two extract_subvectors 11053 // No need to update the masks, they should still be correct. 11054 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 11055 DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); 11056 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 11057 DAG.getConstant(0, TLI.getVectorIdxTy()));	11046 return SDValue(); 11047 11048 if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) 11049 return SDValue(); 11050 11051 // Try to replace VecIn1 with two extract_subvectors 11052 // No need to update the masks, they should still be correct. 11053 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 11054 DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); 11055 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 11056 DAG.getConstant(0, TLI.getVectorIdxTy()));
11058 UsesZeroVector = false;
11059 } else 11060 return SDValue(); 11061 } 11062 11063 if (UsesZeroVector) 11064 VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) : 11065 DAG.getConstantFP(0.0, VT); 11066 else 11067 // If VecIn2 is unused then change it to undef. 11068 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 11069 11070 // Check that we were able to transform all incoming values to the same 11071 // type. 11072 if (VecIn2.getValueType() != VecIn1.getValueType() \|\| 11073 VecIn1.getValueType() != VT) 11074 return SDValue(); 11075 11076 // Return the new VECTOR_SHUFFLE node. 11077 SDValue Ops[2]; 11078 Ops[0] = VecIn1; 11079 Ops[1] = VecIn2; 11080 return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); 11081 } 11082 11083 return SDValue(); 11084} 11085 11086SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode N) { 11087* // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of 11088 // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector 11089 // inputs come from at most two distinct vectors, turn this into a shuffle 11090 // node. 11091 11092 // If we only have one input vector, we don't need to do any concatenation. 11093 if (N->getNumOperands() == 1) 11094 return N->getOperand(0); 11095 11096 // Check if all of the operands are undefs. 11097 EVT VT = N->getValueType(0); 11098 if (ISD::allOperandsUndef(N)) 11099 return DAG.getUNDEF(VT); 11100 11101 // Optimize concat_vectors where one of the vectors is undef. 11102 if (N->getNumOperands() == 2 && 11103 N->getOperand(1)->getOpcode() == ISD::UNDEF) { 11104 SDValue In = N->getOperand(0); 11105 assert(In.getValueType().isVector() && "Must concat vectors"); 11106 11107 // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). 11108 if (In->getOpcode() == ISD::BITCAST && 11109 !In->getOperand(0)->getValueType(0).isVector()) { 11110 SDValue Scalar = In->getOperand(0); 11111 EVT SclTy = Scalar->getValueType(0); 11112 11113 if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) 11114 return SDValue(); 11115 11116 EVT NVT = EVT::getVectorVT(DAG.getContext(), SclTy, 11117* VT.getSizeInBits() / SclTy.getSizeInBits()); 11118 if (!TLI.isTypeLegal(NVT) \|\| !TLI.isTypeLegal(Scalar.getValueType())) 11119 return SDValue(); 11120 11121 SDLoc dl = SDLoc(N); 11122 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); 11123 return DAG.getNode(ISD::BITCAST, dl, VT, Res); 11124 } 11125 } 11126 11127 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) 11128 // -> (BUILD_VECTOR A, B, ..., C, D, ...) 11129 if (N->getNumOperands() == 2 && 11130 N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 11131 N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { 11132 EVT VT = N->getValueType(0); 11133 SDValue N0 = N->getOperand(0); 11134 SDValue N1 = N->getOperand(1); 11135 SmallVector<SDValue, 8> Opnds; 11136 unsigned BuildVecNumElts = N0.getNumOperands(); 11137 11138 EVT SclTy0 = N0.getOperand(0)->getValueType(0); 11139 EVT SclTy1 = N1.getOperand(0)->getValueType(0); 11140 if (SclTy0.isFloatingPoint()) { 11141 for (unsigned i = 0; i != BuildVecNumElts; ++i) 11142 Opnds.push_back(N0.getOperand(i)); 11143 for (unsigned i = 0; i != BuildVecNumElts; ++i) 11144 Opnds.push_back(N1.getOperand(i)); 11145 } else { 11146 // If BUILD_VECTOR are from built from integer, they may have different 11147 // operand types. Get the smaller type and truncate all operands to it. 11148 EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; 11149 for (unsigned i = 0; i != BuildVecNumElts; ++i) 11150 Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, 11151 N0.getOperand(i))); 11152 for (unsigned i = 0; i != BuildVecNumElts; ++i) 11153 Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, 11154 N1.getOperand(i))); 11155 } 11156 11157 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 11158 } 11159 11160 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR 11161 // nodes often generate nop CONCAT_VECTOR nodes. 11162 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that 11163 // place the incoming vectors at the exact same location. 11164 SDValue SingleSource = SDValue(); 11165 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); 11166 11167 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 11168 SDValue Op = N->getOperand(i); 11169 11170 if (Op.getOpcode() == ISD::UNDEF) 11171 continue; 11172 11173 // Check if this is the identity extract: 11174 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 11175 return SDValue(); 11176 11177 // Find the single incoming vector for the extract_subvector. 11178 if (SingleSource.getNode()) { 11179 if (Op.getOperand(0) != SingleSource) 11180 return SDValue(); 11181 } else { 11182 SingleSource = Op.getOperand(0); 11183 11184 // Check the source type is the same as the type of the result. 11185 // If not, this concat may extend the vector, so we can not 11186 // optimize it away. 11187 if (SingleSource.getValueType() != N->getValueType(0)) 11188 return SDValue(); 11189 } 11190 11191 unsigned IdentityIndex = i * PartNumElem; 11192 ConstantSDNode CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 11193* // The extract index must be constant. 11194 if (!CS) 11195 return SDValue(); 11196 11197 // Check that we are reading from the identity index. 11198 if (CS->getZExtValue() != IdentityIndex) 11199 return SDValue(); 11200 } 11201 11202 if (SingleSource.getNode()) 11203 return SingleSource; 11204 11205 return SDValue(); 11206} 11207 11208SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 11209 EVT NVT = N->getValueType(0); 11210 SDValue V = N->getOperand(0); 11211 11212 if (V->getOpcode() == ISD::CONCAT_VECTORS) { 11213 // Combine: 11214 // (extract_subvec (concat V1, V2, ...), i) 11215 // Into: 11216 // Vi if possible 11217 // Only operand 0 is checked as 'concat' assumes all inputs of the same 11218 // type. 11219 if (V->getOperand(0).getValueType() != NVT) 11220 return SDValue(); 11221 unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 11222 unsigned NumElems = NVT.getVectorNumElements(); 11223 assert((Idx % NumElems) == 0 && 11224 "IDX in concat is not a multiple of the result vector length."); 11225 return V->getOperand(Idx / NumElems); 11226 } 11227 11228 // Skip bitcasting 11229 if (V->getOpcode() == ISD::BITCAST) 11230 V = V.getOperand(0); 11231 11232 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 11233 SDLoc dl(N); 11234 // Handle only simple case where vector being inserted and vector 11235 // being extracted are of same type, and are half size of larger vectors. 11236 EVT BigVT = V->getOperand(0).getValueType(); 11237 EVT SmallVT = V->getOperand(1).getValueType(); 11238 if (!NVT.bitsEq(SmallVT) \|\| NVT.getSizeInBits()2 != BigVT.getSizeInBits()) 11239* return SDValue(); 11240 11241 // Only handle cases where both indexes are constants with the same type. 11242 ConstantSDNode ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 11243* ConstantSDNode InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 11244* 11245 if (InsIdx && ExtIdx && 11246 InsIdx->getValueType(0).getSizeInBits() <= 64 && 11247 ExtIdx->getValueType(0).getSizeInBits() <= 64) { 11248 // Combine: 11249 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 11250 // Into: 11251 // indices are equal or bit offsets are equal => V1 11252 // otherwise => (extract_subvec V1, ExtIdx) 11253 if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == 11254 ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) 11255 return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); 11256 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, 11257 DAG.getNode(ISD::BITCAST, dl, 11258 N->getOperand(0).getValueType(), 11259 V->getOperand(0)), N->getOperand(1)); 11260 } 11261 } 11262 11263 return SDValue(); 11264} 11265 11266static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, 11267 SDValue V, SelectionDAG &DAG) { 11268 SDLoc DL(V); 11269 EVT VT = V.getValueType(); 11270 11271 switch (V.getOpcode()) { 11272 default: 11273 return V; 11274 11275 case ISD::CONCAT_VECTORS: { 11276 EVT OpVT = V->getOperand(0).getValueType(); 11277 int OpSize = OpVT.getVectorNumElements(); 11278 SmallBitVector OpUsedElements(OpSize, false); 11279 bool FoundSimplification = false; 11280 SmallVector<SDValue, 4> NewOps; 11281 NewOps.reserve(V->getNumOperands()); 11282 for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { 11283 SDValue Op = V->getOperand(i); 11284 bool OpUsed = false; 11285 for (int j = 0; j < OpSize; ++j) 11286 if (UsedElements[i * OpSize + j]) { 11287 OpUsedElements[j] = true; 11288 OpUsed = true; 11289 } 11290 NewOps.push_back( 11291 OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) 11292 : DAG.getUNDEF(OpVT)); 11293 FoundSimplification \|= Op == NewOps.back(); 11294 OpUsedElements.reset(); 11295 } 11296 if (FoundSimplification) 11297 V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); 11298 return V; 11299 } 11300 11301 case ISD::INSERT_SUBVECTOR: { 11302 SDValue BaseV = V->getOperand(0); 11303 SDValue SubV = V->getOperand(1); 11304 auto IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); 11305* if (!IdxN) 11306 return V; 11307 11308 int SubSize = SubV.getValueType().getVectorNumElements(); 11309 int Idx = IdxN->getZExtValue(); 11310 bool SubVectorUsed = false; 11311 SmallBitVector SubUsedElements(SubSize, false); 11312 for (int i = 0; i < SubSize; ++i) 11313 if (UsedElements[i + Idx]) { 11314 SubVectorUsed = true; 11315 SubUsedElements[i] = true; 11316 UsedElements[i + Idx] = false; 11317 } 11318 11319 // Now recurse on both the base and sub vectors. 11320 SDValue SimplifiedSubV = 11321 SubVectorUsed 11322 ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) 11323 : DAG.getUNDEF(SubV.getValueType()); 11324 SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); 11325 if (SimplifiedSubV != SubV \|\| SimplifiedBaseV != BaseV) 11326 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 11327 SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); 11328 return V; 11329 } 11330 } 11331} 11332 11333static SDValue simplifyShuffleOperands(ShuffleVectorSDNode SVN, SDValue N0, 11334* SDValue N1, SelectionDAG &DAG) { 11335 EVT VT = SVN->getValueType(0); 11336 int NumElts = VT.getVectorNumElements(); 11337 SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); 11338 for (int M : SVN->getMask()) 11339 if (M >= 0 && M < NumElts) 11340 N0UsedElements[M] = true; 11341 else if (M >= NumElts) 11342 N1UsedElements[M - NumElts] = true; 11343 11344 SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); 11345 SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); 11346 if (S0 == N0 && S1 == N1) 11347 return SDValue(); 11348 11349 return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); 11350} 11351 11352// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. 11353static SDValue partitionShuffleOfConcats(SDNode N, SelectionDAG &DAG) { 11354* EVT VT = N->getValueType(0); 11355 unsigned NumElts = VT.getVectorNumElements(); 11356 11357 SDValue N0 = N->getOperand(0); 11358 SDValue N1 = N->getOperand(1); 11359 ShuffleVectorSDNode SVN = cast<ShuffleVectorSDNode>(N); 11360* 11361 SmallVector<SDValue, 4> Ops; 11362 EVT ConcatVT = N0.getOperand(0).getValueType(); 11363 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); 11364 unsigned NumConcats = NumElts / NumElemsPerConcat; 11365 11366 // Look at every vector that's inserted. We're looking for exact 11367 // subvector-sized copies from a concatenated vector 11368 for (unsigned I = 0; I != NumConcats; ++I) { 11369 // Make sure we're dealing with a copy. 11370 unsigned Begin = I * NumElemsPerConcat; 11371 bool AllUndef = true, NoUndef = true; 11372 for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { 11373 if (SVN->getMaskElt(J) >= 0) 11374 AllUndef = false; 11375 else 11376 NoUndef = false; 11377 } 11378 11379 if (NoUndef) { 11380 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) 11381 return SDValue(); 11382 11383 for (unsigned J = 1; J != NumElemsPerConcat; ++J) 11384 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) 11385 return SDValue(); 11386 11387 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; 11388 if (FirstElt < N0.getNumOperands()) 11389 Ops.push_back(N0.getOperand(FirstElt)); 11390 else 11391 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); 11392 11393 } else if (AllUndef) { 11394 Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); 11395 } else { // Mixed with general masks and undefs, can't do optimization. 11396 return SDValue(); 11397 } 11398 } 11399 11400 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 11401} 11402 11403SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode N) { 11404* EVT VT = N->getValueType(0); 11405 unsigned NumElts = VT.getVectorNumElements(); 11406 11407 SDValue N0 = N->getOperand(0); 11408 SDValue N1 = N->getOperand(1); 11409 11410 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 11411 11412 // Canonicalize shuffle undef, undef -> undef 11413 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 11414 return DAG.getUNDEF(VT); 11415 11416 ShuffleVectorSDNode SVN = cast<ShuffleVectorSDNode>(N); 11417* 11418 // Canonicalize shuffle v, v -> v, undef 11419 if (N0 == N1) { 11420 SmallVector<int, 8> NewMask; 11421 for (unsigned i = 0; i != NumElts; ++i) { 11422 int Idx = SVN->getMaskElt(i); 11423 if (Idx >= (int)NumElts) Idx -= NumElts; 11424 NewMask.push_back(Idx); 11425 } 11426 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), 11427 &NewMask[0]); 11428 } 11429 11430 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 11431 if (N0.getOpcode() == ISD::UNDEF) { 11432 SmallVector<int, 8> NewMask; 11433 for (unsigned i = 0; i != NumElts; ++i) { 11434 int Idx = SVN->getMaskElt(i); 11435 if (Idx >= 0) { 11436 if (Idx >= (int)NumElts) 11437 Idx -= NumElts; 11438 else 11439 Idx = -1; // remove reference to lhs 11440 } 11441 NewMask.push_back(Idx); 11442 } 11443 return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), 11444 &NewMask[0]); 11445 } 11446 11447 // Remove references to rhs if it is undef 11448 if (N1.getOpcode() == ISD::UNDEF) { 11449 bool Changed = false; 11450 SmallVector<int, 8> NewMask; 11451 for (unsigned i = 0; i != NumElts; ++i) { 11452 int Idx = SVN->getMaskElt(i); 11453 if (Idx >= (int)NumElts) { 11454 Idx = -1; 11455 Changed = true; 11456 } 11457 NewMask.push_back(Idx); 11458 } 11459 if (Changed) 11460 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); 11461 } 11462 11463 // If it is a splat, check if the argument vector is another splat or a 11464 // build_vector with all scalar elements the same. 11465 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 11466 SDNode V = N0.getNode(); 11467* 11468 // If this is a bit convert that changes the element type of the vector but 11469 // not the number of vector elements, look through it. Be careful not to 11470 // look though conversions that change things like v4f32 to v2f64. 11471 if (V->getOpcode() == ISD::BITCAST) { 11472 SDValue ConvInput = V->getOperand(0); 11473 if (ConvInput.getValueType().isVector() && 11474 ConvInput.getValueType().getVectorNumElements() == NumElts) 11475 V = ConvInput.getNode(); 11476 } 11477 11478 if (V->getOpcode() == ISD::BUILD_VECTOR) { 11479 assert(V->getNumOperands() == NumElts && 11480 "BUILD_VECTOR has wrong number of operands"); 11481 SDValue Base; 11482 bool AllSame = true; 11483 for (unsigned i = 0; i != NumElts; ++i) { 11484 if (V->getOperand(i).getOpcode() != ISD::UNDEF) { 11485 Base = V->getOperand(i); 11486 break; 11487 } 11488 } 11489 // Splat of <u, u, u, u>, return <u, u, u, u> 11490 if (!Base.getNode()) 11491 return N0; 11492 for (unsigned i = 0; i != NumElts; ++i) { 11493 if (V->getOperand(i) != Base) { 11494 AllSame = false; 11495 break; 11496 } 11497 } 11498 // Splat of <x, x, x, x>, return <x, x, x, x> 11499 if (AllSame) 11500 return N0; 11501 } 11502 } 11503 11504 // There are various patterns used to build up a vector from smaller vectors, 11505 // subvectors, or elements. Scan chains of these and replace unused insertions 11506 // or components with undef. 11507 if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) 11508 return S; 11509 11510 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 11511 Level < AfterLegalizeVectorOps && 11512 (N1.getOpcode() == ISD::UNDEF \|\| 11513 (N1.getOpcode() == ISD::CONCAT_VECTORS && 11514 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { 11515 SDValue V = partitionShuffleOfConcats(N, DAG); 11516 11517 if (V.getNode()) 11518 return V; 11519 } 11520 11521 // Canonicalize shuffles according to rules: 11522 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) 11523 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) 11524 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) 11525 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && 11526 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 11527 TLI.isTypeLegal(VT)) { 11528 // The incoming shuffle must be of the same type as the result of the 11529 // current shuffle. 11530 assert(N1->getOperand(0).getValueType() == VT && 11531 "Shuffle types don't match"); 11532 11533 SDValue SV0 = N1->getOperand(0); 11534 SDValue SV1 = N1->getOperand(1); 11535 bool HasSameOp0 = N0 == SV0; 11536 bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; 11537 if (HasSameOp0 \|\| IsSV1Undef \|\| N0 == SV1) 11538 // Commute the operands of this shuffle so that next rule 11539 // will trigger. 11540 return DAG.getCommutedVectorShuffle(SVN); 11541* } 11542 11543 // Try to fold according to rules: 11544 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 11545 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 11546 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 11547 // Don't try to fold shuffles with illegal type. 11548 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 11549 TLI.isTypeLegal(VT)) { 11550 ShuffleVectorSDNode OtherSV = cast<ShuffleVectorSDNode>(N0); 11551* 11552 // The incoming shuffle must be of the same type as the result of the 11553 // current shuffle. 11554 assert(OtherSV->getOperand(0).getValueType() == VT && 11555 "Shuffle types don't match"); 11556 11557 SDValue SV0, SV1; 11558 SmallVector<int, 4> Mask; 11559 // Compute the combined shuffle mask for a shuffle with SV0 as the first 11560 // operand, and SV1 as the second operand. 11561 for (unsigned i = 0; i != NumElts; ++i) { 11562 int Idx = SVN->getMaskElt(i); 11563 if (Idx < 0) { 11564 // Propagate Undef. 11565 Mask.push_back(Idx); 11566 continue; 11567 } 11568 11569 SDValue CurrentVec; 11570 if (Idx < (int)NumElts) { 11571 // This shuffle index refers to the inner shuffle N0. Lookup the inner 11572 // shuffle mask to identify which vector is actually referenced. 11573 Idx = OtherSV->getMaskElt(Idx); 11574 if (Idx < 0) { 11575 // Propagate Undef. 11576 Mask.push_back(Idx); 11577 continue; 11578 } 11579 11580 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) 11581 : OtherSV->getOperand(1); 11582 } else { 11583 // This shuffle index references an element within N1. 11584 CurrentVec = N1; 11585 } 11586 11587 // Simple case where 'CurrentVec' is UNDEF. 11588 if (CurrentVec.getOpcode() == ISD::UNDEF) { 11589 Mask.push_back(-1); 11590 continue; 11591 } 11592 11593 // Canonicalize the shuffle index. We don't know yet if CurrentVec 11594 // will be the first or second operand of the combined shuffle. 11595 Idx = Idx % NumElts; 11596 if (!SV0.getNode() \|\| SV0 == CurrentVec) { 11597 // Ok. CurrentVec is the left hand side. 11598 // Update the mask accordingly. 11599 SV0 = CurrentVec; 11600 Mask.push_back(Idx); 11601 continue; 11602 } 11603 11604 // Bail out if we cannot convert the shuffle pair into a single shuffle. 11605 if (SV1.getNode() && SV1 != CurrentVec) 11606 return SDValue(); 11607 11608 // Ok. CurrentVec is the right hand side. 11609 // Update the mask accordingly. 11610 SV1 = CurrentVec; 11611 Mask.push_back(Idx + NumElts); 11612 } 11613 11614 // Check if all indices in Mask are Undef. In case, propagate Undef. 11615 bool isUndefMask = true; 11616 for (unsigned i = 0; i != NumElts && isUndefMask; ++i) 11617 isUndefMask &= Mask[i] < 0; 11618 11619 if (isUndefMask) 11620 return DAG.getUNDEF(VT); 11621 11622 if (!SV0.getNode()) 11623 SV0 = DAG.getUNDEF(VT); 11624 if (!SV1.getNode()) 11625 SV1 = DAG.getUNDEF(VT); 11626 11627 // Avoid introducing shuffles with illegal mask. 11628 if (!TLI.isShuffleMaskLegal(Mask, VT)) { 11629 // Compute the commuted shuffle mask and test again. 11630 for (unsigned i = 0; i != NumElts; ++i) { 11631 int idx = Mask[i]; 11632 if (idx < 0) 11633 continue; 11634 else if (idx < (int)NumElts) 11635 Mask[i] = idx + NumElts; 11636 else 11637 Mask[i] = idx - NumElts; 11638 } 11639 11640 if (!TLI.isShuffleMaskLegal(Mask, VT)) 11641 return SDValue(); 11642 11643 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) 11644 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) 11645 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) 11646 std::swap(SV0, SV1); 11647 } 11648 11649 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 11650 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 11651 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 11652 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); 11653 } 11654 11655 return SDValue(); 11656} 11657 11658SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode N) { 11659* SDValue N0 = N->getOperand(0); 11660 SDValue N2 = N->getOperand(2); 11661 11662 // If the input vector is a concatenation, and the insert replaces 11663 // one of the halves, we can optimize into a single concat_vectors. 11664 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 11665 N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { 11666 APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); 11667 EVT VT = N->getValueType(0); 11668 11669 // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> 11670 // (concat_vectors Z, Y) 11671 if (InsIdx == 0) 11672 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 11673 N->getOperand(1), N0.getOperand(1)); 11674 11675 // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> 11676 // (concat_vectors X, Z) 11677 if (InsIdx == VT.getVectorNumElements()/2) 11678 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 11679 N0.getOperand(0), N->getOperand(1)); 11680 } 11681 11682 return SDValue(); 11683} 11684 11685/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle 11686/// with the destination vector and a zero vector. 11687/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 11688/// vector_shuffle V, Zero, <0, 4, 2, 4> 11689SDValue DAGCombiner::XformToShuffleWithZero(SDNode N) { 11690* EVT VT = N->getValueType(0); 11691 SDLoc dl(N); 11692 SDValue LHS = N->getOperand(0); 11693 SDValue RHS = N->getOperand(1); 11694 if (N->getOpcode() == ISD::AND) { 11695 if (RHS.getOpcode() == ISD::BITCAST) 11696 RHS = RHS.getOperand(0); 11697 if (RHS.getOpcode() == ISD::BUILD_VECTOR) { 11698 SmallVector<int, 8> Indices; 11699 unsigned NumElts = RHS.getNumOperands(); 11700 for (unsigned i = 0; i != NumElts; ++i) { 11701 SDValue Elt = RHS.getOperand(i); 11702 if (!isa<ConstantSDNode>(Elt)) 11703 return SDValue(); 11704 11705 if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) 11706 Indices.push_back(i); 11707 else if (cast<ConstantSDNode>(Elt)->isNullValue()) 11708 Indices.push_back(NumElts+i); 11709 else 11710 return SDValue(); 11711 } 11712 11713 // Let's see if the target supports this vector_shuffle. 11714 EVT RVT = RHS.getValueType(); 11715 if (!TLI.isVectorClearMaskLegal(Indices, RVT)) 11716 return SDValue(); 11717 11718 // Return the new VECTOR_SHUFFLE node. 11719 EVT EltVT = RVT.getVectorElementType(); 11720 SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), 11721 DAG.getConstant(0, EltVT)); 11722 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); 11723 LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); 11724 SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); 11725 return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); 11726 } 11727 } 11728 11729 return SDValue(); 11730} 11731 11732/// Visit a binary vector operation, like ADD. 11733SDValue DAGCombiner::SimplifyVBinOp(SDNode N) { 11734* assert(N->getValueType(0).isVector() && 11735 "SimplifyVBinOp only works on vectors!"); 11736 11737 SDValue LHS = N->getOperand(0); 11738 SDValue RHS = N->getOperand(1); 11739 SDValue Shuffle = XformToShuffleWithZero(N); 11740 if (Shuffle.getNode()) return Shuffle; 11741 11742 // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold 11743 // this operation. 11744 if (LHS.getOpcode() == ISD::BUILD_VECTOR && 11745 RHS.getOpcode() == ISD::BUILD_VECTOR) { 11746 // Check if both vectors are constants. If not bail out. 11747 if (!(cast<BuildVectorSDNode>(LHS)->isConstant() && 11748 cast<BuildVectorSDNode>(RHS)->isConstant())) 11749 return SDValue(); 11750 11751 SmallVector<SDValue, 8> Ops; 11752 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 11753 SDValue LHSOp = LHS.getOperand(i); 11754 SDValue RHSOp = RHS.getOperand(i); 11755 11756 // Can't fold divide by zero. 11757 if (N->getOpcode() == ISD::SDIV \|\| N->getOpcode() == ISD::UDIV \|\| 11758 N->getOpcode() == ISD::FDIV) { 11759 if ((RHSOp.getOpcode() == ISD::Constant && 11760 cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) \|\| 11761 (RHSOp.getOpcode() == ISD::ConstantFP && 11762 cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) 11763 break; 11764 } 11765 11766 EVT VT = LHSOp.getValueType(); 11767 EVT RVT = RHSOp.getValueType(); 11768 if (RVT != VT) { 11769 // Integer BUILD_VECTOR operands may have types larger than the element 11770 // size (e.g., when the element type is not legal). Prior to type 11771 // legalization, the types may not match between the two BUILD_VECTORS. 11772 // Truncate one of the operands to make them match. 11773 if (RVT.getSizeInBits() > VT.getSizeInBits()) { 11774 RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); 11775 } else { 11776 LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); 11777 VT = RVT; 11778 } 11779 } 11780 SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, 11781 LHSOp, RHSOp); 11782 if (FoldOp.getOpcode() != ISD::UNDEF && 11783 FoldOp.getOpcode() != ISD::Constant && 11784 FoldOp.getOpcode() != ISD::ConstantFP) 11785 break; 11786 Ops.push_back(FoldOp); 11787 AddToWorklist(FoldOp.getNode()); 11788 } 11789 11790 if (Ops.size() == LHS.getNumOperands()) 11791 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); 11792 } 11793 11794 // Type legalization might introduce new shuffles in the DAG. 11795 // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) 11796 // -> (shuffle (VBinOp (A, B)), Undef, Mask). 11797 if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && 11798 isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && 11799 LHS.getOperand(1).getOpcode() == ISD::UNDEF && 11800 RHS.getOperand(1).getOpcode() == ISD::UNDEF) { 11801 ShuffleVectorSDNode SVN0 = cast<ShuffleVectorSDNode>(LHS); 11802* ShuffleVectorSDNode SVN1 = cast<ShuffleVectorSDNode>(RHS); 11803* 11804 if (SVN0->getMask().equals(SVN1->getMask())) { 11805 EVT VT = N->getValueType(0); 11806 SDValue UndefVector = LHS.getOperand(1); 11807 SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 11808 LHS.getOperand(0), RHS.getOperand(0)); 11809 AddUsersToWorklist(N); 11810 return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, 11811 &SVN0->getMask()[0]); 11812 } 11813 } 11814 11815 return SDValue(); 11816} 11817 11818/// Visit a binary vector operation, like FABS/FNEG. 11819SDValue DAGCombiner::SimplifyVUnaryOp(SDNode N) { 11820* assert(N->getValueType(0).isVector() && 11821 "SimplifyVUnaryOp only works on vectors!"); 11822 11823 SDValue N0 = N->getOperand(0); 11824 11825 if (N0.getOpcode() != ISD::BUILD_VECTOR) 11826 return SDValue(); 11827 11828 // Operand is a BUILD_VECTOR node, see if we can constant fold it. 11829 SmallVector<SDValue, 8> Ops; 11830 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 11831 SDValue Op = N0.getOperand(i); 11832 if (Op.getOpcode() != ISD::UNDEF && 11833 Op.getOpcode() != ISD::ConstantFP) 11834 break; 11835 EVT EltVT = Op.getValueType(); 11836 SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); 11837 if (FoldOp.getOpcode() != ISD::UNDEF && 11838 FoldOp.getOpcode() != ISD::ConstantFP) 11839 break; 11840 Ops.push_back(FoldOp); 11841 AddToWorklist(FoldOp.getNode()); 11842 } 11843 11844 if (Ops.size() != N0.getNumOperands()) 11845 return SDValue(); 11846 11847 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); 11848} 11849 11850SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, 11851 SDValue N1, SDValue N2){ 11852 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 11853 11854 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 11855 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 11856 11857 // If we got a simplified select_cc node back from SimplifySelectCC, then 11858 // break it down into a new SETCC node, and a new SELECT node, and then return 11859 // the SELECT node, since we were called with a SELECT node. 11860 if (SCC.getNode()) { 11861 // Check to see if we got a select_cc back (to turn into setcc/select). 11862 // Otherwise, just return whatever node we got back, like fabs. 11863 if (SCC.getOpcode() == ISD::SELECT_CC) { 11864 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), 11865 N0.getValueType(), 11866 SCC.getOperand(0), SCC.getOperand(1), 11867 SCC.getOperand(4)); 11868 AddToWorklist(SETCC.getNode()); 11869 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, 11870 SCC.getOperand(2), SCC.getOperand(3)); 11871 } 11872 11873 return SCC; 11874 } 11875 return SDValue(); 11876} 11877 11878/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values 11879/// being selected between, see if we can simplify the select. Callers of this 11880/// should assume that TheSelect is deleted if this returns true. As such, they 11881/// should return the appropriate thing (e.g. the node) back to the top-level of 11882/// the DAG combiner loop to avoid it being looked at. 11883bool DAGCombiner::SimplifySelectOps(SDNode TheSelect, SDValue LHS, 11884* SDValue RHS) { 11885 11886 // Cannot simplify select with vector condition 11887 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 11888 11889 // If this is a select from two identical things, try to pull the operation 11890 // through the select. 11891 if (LHS.getOpcode() != RHS.getOpcode() \|\| 11892 !LHS.hasOneUse() \|\| !RHS.hasOneUse()) 11893 return false; 11894 11895 // If this is a load and the token chain is identical, replace the select 11896 // of two loads with a load through a select of the address to load from. 11897 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 11898 // constants have been dropped into the constant pool. 11899 if (LHS.getOpcode() == ISD::LOAD) { 11900 LoadSDNode LLD = cast<LoadSDNode>(LHS); 11901* LoadSDNode RLD = cast<LoadSDNode>(RHS); 11902* 11903 // Token chains must be identical. 11904 if (LHS.getOperand(0) != RHS.getOperand(0) \|\| 11905 // Do not let this transformation reduce the number of volatile loads. 11906 LLD->isVolatile() \|\| RLD->isVolatile() \|\| 11907 // If this is an EXTLOAD, the VT's must match. 11908 LLD->getMemoryVT() != RLD->getMemoryVT() \|\| 11909 // If this is an EXTLOAD, the kind of extension must match. 11910 (LLD->getExtensionType() != RLD->getExtensionType() && 11911 // The only exception is if one of the extensions is anyext. 11912 LLD->getExtensionType() != ISD::EXTLOAD && 11913 RLD->getExtensionType() != ISD::EXTLOAD) \|\| 11914 // FIXME: this discards src value information. This is 11915 // over-conservative. It would be beneficial to be able to remember 11916 // both potential memory locations. Since we are discarding 11917 // src value info, don't do the transformation if the memory 11918 // locations are not in the default address space. 11919 LLD->getPointerInfo().getAddrSpace() != 0 \|\| 11920 RLD->getPointerInfo().getAddrSpace() != 0 \|\| 11921 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), 11922 LLD->getBasePtr().getValueType())) 11923 return false; 11924 11925 // Check that the select condition doesn't reach either load. If so, 11926 // folding this will induce a cycle into the DAG. If not, this is safe to 11927 // xform, so create a select of the addresses. 11928 SDValue Addr; 11929 if (TheSelect->getOpcode() == ISD::SELECT) { 11930 SDNode CondNode = TheSelect->getOperand(0).getNode(); 11931* if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) \|\| 11932 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) 11933 return false; 11934 // The loads must not depend on one another. 11935 if (LLD->isPredecessorOf(RLD) \|\| 11936 RLD->isPredecessorOf(LLD)) 11937 return false; 11938 Addr = DAG.getSelect(SDLoc(TheSelect), 11939 LLD->getBasePtr().getValueType(), 11940 TheSelect->getOperand(0), LLD->getBasePtr(), 11941 RLD->getBasePtr()); 11942 } else { // Otherwise SELECT_CC 11943 SDNode CondLHS = TheSelect->getOperand(0).getNode(); 11944* SDNode CondRHS = TheSelect->getOperand(1).getNode(); 11945* 11946 if ((LLD->hasAnyUseOfValue(1) && 11947 (LLD->isPredecessorOf(CondLHS) \|\| LLD->isPredecessorOf(CondRHS))) \|\| 11948 (RLD->hasAnyUseOfValue(1) && 11949 (RLD->isPredecessorOf(CondLHS) \|\| RLD->isPredecessorOf(CondRHS)))) 11950 return false; 11951 11952 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), 11953 LLD->getBasePtr().getValueType(), 11954 TheSelect->getOperand(0), 11955 TheSelect->getOperand(1), 11956 LLD->getBasePtr(), RLD->getBasePtr(), 11957 TheSelect->getOperand(4)); 11958 } 11959 11960 SDValue Load; 11961 // It is safe to replace the two loads if they have different alignments, 11962 // but the new load must be the minimum (most restrictive) alignment of the 11963 // inputs. 11964 bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); 11965 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); 11966 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 11967 Load = DAG.getLoad(TheSelect->getValueType(0), 11968 SDLoc(TheSelect), 11969 // FIXME: Discards pointer and AA info. 11970 LLD->getChain(), Addr, MachinePointerInfo(), 11971 LLD->isVolatile(), LLD->isNonTemporal(), 11972 isInvariant, Alignment); 11973 } else { 11974 Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? 11975 RLD->getExtensionType() : LLD->getExtensionType(), 11976 SDLoc(TheSelect), 11977 TheSelect->getValueType(0), 11978 // FIXME: Discards pointer and AA info. 11979 LLD->getChain(), Addr, MachinePointerInfo(), 11980 LLD->getMemoryVT(), LLD->isVolatile(), 11981 LLD->isNonTemporal(), isInvariant, Alignment); 11982 } 11983 11984 // Users of the select now use the result of the load. 11985 CombineTo(TheSelect, Load); 11986 11987 // Users of the old loads now use the new load's chain. We know the 11988 // old-load value is dead now. 11989 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 11990 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 11991 return true; 11992 } 11993 11994 return false; 11995} 11996 11997/// Simplify an expression of the form (N0 cond N1) ? N2 : N3 11998/// where 'cond' is the comparison specified by CC. 11999SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, 12000 SDValue N2, SDValue N3, 12001 ISD::CondCode CC, bool NotExtCompare) { 12002 // (x ? y : y) -> y. 12003 if (N2 == N3) return N2; 12004 12005 EVT VT = N2.getValueType(); 12006 ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 12007* ConstantSDNode N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 12008* ConstantSDNode N3C = dyn_cast<ConstantSDNode>(N3.getNode()); 12009* 12010 // Determine if the condition we're dealing with is constant 12011 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 12012 N0, N1, CC, DL, false); 12013 if (SCC.getNode()) AddToWorklist(SCC.getNode()); 12014 ConstantSDNode SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); 12015* 12016 // fold select_cc true, x, y -> x 12017 if (SCCC && !SCCC->isNullValue()) 12018 return N2; 12019 // fold select_cc false, x, y -> y 12020 if (SCCC && SCCC->isNullValue()) 12021 return N3; 12022 12023 // Check to see if we can simplify the select into an fabs node 12024 if (ConstantFPSDNode CFP = dyn_cast<ConstantFPSDNode>(N1)) { 12025* // Allow either -0.0 or 0.0 12026 if (CFP->getValueAPF().isZero()) { 12027 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs 12028 if ((CC == ISD::SETGE \|\| CC == ISD::SETGT) && 12029 N0 == N2 && N3.getOpcode() == ISD::FNEG && 12030 N2 == N3.getOperand(0)) 12031 return DAG.getNode(ISD::FABS, DL, VT, N0); 12032 12033 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs 12034 if ((CC == ISD::SETLT \|\| CC == ISD::SETLE) && 12035 N0 == N3 && N2.getOpcode() == ISD::FNEG && 12036 N2.getOperand(0) == N3) 12037 return DAG.getNode(ISD::FABS, DL, VT, N3); 12038 } 12039 } 12040 12041 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 12042 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 12043 // in it. This is a win when the constant is not otherwise available because 12044 // it replaces two constant pool loads with one. We only do this if the FP 12045 // type is known to be legal, because if it isn't, then we are before legalize 12046 // types an we want the other legalization to happen first (e.g. to avoid 12047 // messing with soft float) and if the ConstantFP is not legal, because if 12048 // it is legal, we may not need to store the FP constant in a constant pool. 12049 if (ConstantFPSDNode TV = dyn_cast<ConstantFPSDNode>(N2)) 12050* if (ConstantFPSDNode FV = dyn_cast<ConstantFPSDNode>(N3)) { 12051* if (TLI.isTypeLegal(N2.getValueType()) && 12052 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 12053 TargetLowering::Legal && 12054 !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && 12055 !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && 12056 // If both constants have multiple uses, then we won't need to do an 12057 // extra load, they are likely around in registers for other users. 12058 (TV->hasOneUse() \|\| FV->hasOneUse())) { 12059 Constant Elts[] = { 12060* const_cast<ConstantFP>(FV->getConstantFPValue()), 12061* const_cast<ConstantFP>(TV->getConstantFPValue()) 12062* }; 12063 Type FPTy = Elts[0]->getType(); 12064* const DataLayout &TD = TLI.getDataLayout(); 12065* 12066 // Create a ConstantArray of the two constants. 12067 Constant CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 12068* SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), 12069 TD.getPrefTypeAlignment(FPTy)); 12070 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 12071 12072 // Get the offsets to the 0 and 1 element of the array so that we can 12073 // select between them. 12074 SDValue Zero = DAG.getIntPtrConstant(0); 12075 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 12076 SDValue One = DAG.getIntPtrConstant(EltSize); 12077 12078 SDValue Cond = DAG.getSetCC(DL, 12079 getSetCCResultType(N0.getValueType()), 12080 N0, N1, CC); 12081 AddToWorklist(Cond.getNode()); 12082 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), 12083 Cond, One, Zero); 12084 AddToWorklist(CstOffset.getNode()); 12085 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, 12086 CstOffset); 12087 AddToWorklist(CPIdx.getNode()); 12088 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 12089 MachinePointerInfo::getConstantPool(), false, 12090 false, false, Alignment); 12091 12092 } 12093 } 12094 12095 // Check to see if we can perform the "gzip trick", transforming 12096 // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) 12097 if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && 12098 (N1C->isNullValue() \|\| // (a < 0) ? b : 0 12099 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 12100 EVT XType = N0.getValueType(); 12101 EVT AType = N2.getValueType(); 12102 if (XType.bitsGE(AType)) { 12103 // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a 12104 // single-bit constant. 12105 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { 12106 unsigned ShCtV = N2C->getAPIntValue().logBase2(); 12107 ShCtV = XType.getSizeInBits()-ShCtV-1; 12108 SDValue ShCt = DAG.getConstant(ShCtV, 12109 getShiftAmountTy(N0.getValueType())); 12110 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), 12111 XType, N0, ShCt); 12112 AddToWorklist(Shift.getNode()); 12113 12114 if (XType.bitsGT(AType)) { 12115 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 12116 AddToWorklist(Shift.getNode()); 12117 } 12118 12119 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 12120 } 12121 12122 SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), 12123 XType, N0, 12124 DAG.getConstant(XType.getSizeInBits()-1, 12125 getShiftAmountTy(N0.getValueType()))); 12126 AddToWorklist(Shift.getNode()); 12127 12128 if (XType.bitsGT(AType)) { 12129 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 12130 AddToWorklist(Shift.getNode()); 12131 } 12132 12133 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 12134 } 12135 } 12136 12137 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 12138 // where y is has a single bit set. 12139 // A plaintext description would be, we can turn the SELECT_CC into an AND 12140 // when the condition can be materialized as an all-ones register. Any 12141 // single bit-test can be materialized as an all-ones register with 12142 // shift-left and shift-right-arith. 12143 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 12144 N0->getValueType(0) == VT && 12145 N1C && N1C->isNullValue() && 12146 N2C && N2C->isNullValue()) { 12147 SDValue AndLHS = N0->getOperand(0); 12148 ConstantSDNode ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 12149* if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 12150 // Shift the tested bit over the sign bit. 12151 APInt AndMask = ConstAndRHS->getAPIntValue(); 12152 SDValue ShlAmt = 12153 DAG.getConstant(AndMask.countLeadingZeros(), 12154 getShiftAmountTy(AndLHS.getValueType())); 12155 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); 12156 12157 // Now arithmetic right shift it all the way over, so the result is either 12158 // all-ones, or zero. 12159 SDValue ShrAmt = 12160 DAG.getConstant(AndMask.getBitWidth()-1, 12161 getShiftAmountTy(Shl.getValueType())); 12162 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); 12163 12164 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 12165 } 12166 } 12167 12168 // fold select C, 16, 0 -> shl C, 4 12169 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && 12170 TLI.getBooleanContents(N0.getValueType()) == 12171 TargetLowering::ZeroOrOneBooleanContent) { 12172 12173 // If the caller doesn't want us to simplify this into a zext of a compare, 12174 // don't do it. 12175 if (NotExtCompare && N2C->getAPIntValue() == 1) 12176 return SDValue(); 12177 12178 // Get a SetCC of the condition 12179 // NOTE: Don't create a SETCC if it's not legal on this target. 12180 if (!LegalOperations \|\| 12181 TLI.isOperationLegal(ISD::SETCC, 12182 LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { 12183 SDValue Temp, SCC; 12184 // cast from setcc result type to select result type 12185 if (LegalTypes) { 12186 SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), 12187 N0, N1, CC); 12188 if (N2.getValueType().bitsLT(SCC.getValueType())) 12189 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), 12190 N2.getValueType()); 12191 else 12192 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 12193 N2.getValueType(), SCC); 12194 } else { 12195 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); 12196 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 12197 N2.getValueType(), SCC); 12198 } 12199 12200 AddToWorklist(SCC.getNode()); 12201 AddToWorklist(Temp.getNode()); 12202 12203 if (N2C->getAPIntValue() == 1) 12204 return Temp; 12205 12206 // shl setcc result by log2 n2c 12207 return DAG.getNode( 12208 ISD::SHL, DL, N2.getValueType(), Temp, 12209 DAG.getConstant(N2C->getAPIntValue().logBase2(), 12210 getShiftAmountTy(Temp.getValueType()))); 12211 } 12212 } 12213 12214 // Check to see if this is the equivalent of setcc 12215 // FIXME: Turn all of these into setcc if setcc if setcc is legal 12216 // otherwise, go ahead with the folds. 12217 if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { 12218 EVT XType = N0.getValueType(); 12219 if (!LegalOperations \|\| 12220 TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { 12221 SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); 12222 if (Res.getValueType() != VT) 12223 Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); 12224 return Res; 12225 } 12226 12227 // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) 12228 if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && 12229 (!LegalOperations \|\| 12230 TLI.isOperationLegal(ISD::CTLZ, XType))) { 12231 SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); 12232 return DAG.getNode(ISD::SRL, DL, XType, Ctlz, 12233 DAG.getConstant(Log2_32(XType.getSizeInBits()), 12234 getShiftAmountTy(Ctlz.getValueType()))); 12235 } 12236 // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) 12237 if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { 12238 SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), 12239 XType, DAG.getConstant(0, XType), N0); 12240 SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); 12241 return DAG.getNode(ISD::SRL, DL, XType, 12242 DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), 12243 DAG.getConstant(XType.getSizeInBits()-1, 12244 getShiftAmountTy(XType))); 12245 } 12246 // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) 12247 if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { 12248 SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, 12249 DAG.getConstant(XType.getSizeInBits()-1, 12250 getShiftAmountTy(N0.getValueType()))); 12251 return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); 12252 } 12253 } 12254 12255 // Check to see if this is an integer abs. 12256 // select_cc setg[te] X, 0, X, -X -> 12257 // select_cc setgt X, -1, X, -X -> 12258 // select_cc setl[te] X, 0, -X, X -> 12259 // select_cc setlt X, 1, -X, X -> 12260 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 12261 if (N1C) { 12262 ConstantSDNode SubC = nullptr; 12263* if (((N1C->isNullValue() && (CC == ISD::SETGT \|\| CC == ISD::SETGE)) \|\| 12264 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 12265 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 12266 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 12267 else if (((N1C->isNullValue() && (CC == ISD::SETLT \|\| CC == ISD::SETLE)) \|\| 12268 (N1C->isOne() && CC == ISD::SETLT)) && 12269 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 12270 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 12271 12272 EVT XType = N0.getValueType(); 12273 if (SubC && SubC->isNullValue() && XType.isInteger()) { 12274 SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, 12275 N0, 12276 DAG.getConstant(XType.getSizeInBits()-1, 12277 getShiftAmountTy(N0.getValueType()))); 12278 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), 12279 XType, N0, Shift); 12280 AddToWorklist(Shift.getNode()); 12281 AddToWorklist(Add.getNode()); 12282 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 12283 } 12284 } 12285 12286 return SDValue(); 12287} 12288 12289/// This is a stub for TargetLowering::SimplifySetCC. 12290SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, 12291 SDValue N1, ISD::CondCode Cond, 12292 SDLoc DL, bool foldBooleans) { 12293 TargetLowering::DAGCombinerInfo 12294 DagCombineInfo(DAG, Level, false, this); 12295 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 12296} 12297 12298/// Given an ISD::SDIV node expressing a divide by constant, return 12299/// a DAG expression to select that will generate the same value by multiplying 12300/// by a magic number. 12301/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 12302SDValue DAGCombiner::BuildSDIV(SDNode N) { 12303* ConstantSDNode C = isConstOrConstSplat(N->getOperand(1)); 12304* if (!C) 12305 return SDValue(); 12306 12307 // Avoid division by zero. 12308 if (!C->getAPIntValue()) 12309 return SDValue(); 12310 12311 std::vector<SDNode> Built; 12312* SDValue S = 12313 TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 12314 12315 for (SDNode N : Built) 12316* AddToWorklist(N); 12317 return S; 12318} 12319 12320/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a 12321/// DAG expression that will generate the same value by right shifting. 12322SDValue DAGCombiner::BuildSDIVPow2(SDNode N) { 12323* ConstantSDNode C = isConstOrConstSplat(N->getOperand(1)); 12324* if (!C) 12325 return SDValue(); 12326 12327 // Avoid division by zero. 12328 if (!C->getAPIntValue()) 12329 return SDValue(); 12330 12331 std::vector<SDNode > Built; 12332* SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); 12333 12334 for (SDNode N : Built) 12335* AddToWorklist(N); 12336 return S; 12337} 12338 12339/// Given an ISD::UDIV node expressing a divide by constant, return a DAG 12340/// expression that will generate the same value by multiplying by a magic 12341/// number. 12342/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 12343SDValue DAGCombiner::BuildUDIV(SDNode N) { 12344* ConstantSDNode C = isConstOrConstSplat(N->getOperand(1)); 12345* if (!C) 12346 return SDValue(); 12347 12348 // Avoid division by zero. 12349 if (!C->getAPIntValue()) 12350 return SDValue(); 12351 12352 std::vector<SDNode> Built; 12353* SDValue S = 12354 TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 12355 12356 for (SDNode N : Built) 12357* AddToWorklist(N); 12358 return S; 12359} 12360 12361SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { 12362 if (Level >= AfterLegalizeDAG) 12363 return SDValue(); 12364 12365 // Expose the DAG combiner to the target combiner implementations. 12366 TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); 12367 12368 unsigned Iterations = 0; 12369 if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { 12370 if (Iterations) { 12371 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 12372 // For the reciprocal, we need to find the zero of the function: 12373 // F(X) = A X - 1 [which has a zero at X = 1/A] 12374 // => 12375 // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 12376 // does not require additional intermediate precision] 12377 EVT VT = Op.getValueType(); 12378 SDLoc DL(Op); 12379 SDValue FPOne = DAG.getConstantFP(1.0, VT); 12380 12381 AddToWorklist(Est.getNode()); 12382 12383 // Newton iterations: Est = Est + Est (1 - Arg * Est) 12384 for (unsigned i = 0; i < Iterations; ++i) { 12385 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est); 12386 AddToWorklist(NewEst.getNode()); 12387 12388 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst); 12389 AddToWorklist(NewEst.getNode()); 12390 12391 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); 12392 AddToWorklist(NewEst.getNode()); 12393 12394 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst); 12395 AddToWorklist(Est.getNode()); 12396 } 12397 } 12398 return Est; 12399 } 12400 12401 return SDValue(); 12402} 12403 12404/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 12405/// For the reciprocal sqrt, we need to find the zero of the function: 12406/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 12407/// => 12408/// X_{i+1} = X_i (1.5 - A X_i^2 / 2) 12409/// As a result, we precompute A/2 prior to the iteration loop. 12410SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, 12411 unsigned Iterations) { 12412 EVT VT = Arg.getValueType(); 12413 SDLoc DL(Arg); 12414 SDValue ThreeHalves = DAG.getConstantFP(1.5, VT); 12415 12416 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that 12417 // this entire sequence requires only one FP constant. 12418 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); 12419 AddToWorklist(HalfArg.getNode()); 12420 12421 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); 12422 AddToWorklist(HalfArg.getNode()); 12423 12424 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 12425 for (unsigned i = 0; i < Iterations; ++i) { 12426 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); 12427 AddToWorklist(NewEst.getNode()); 12428 12429 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); 12430 AddToWorklist(NewEst.getNode()); 12431 12432 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); 12433 AddToWorklist(NewEst.getNode()); 12434 12435 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); 12436 AddToWorklist(Est.getNode()); 12437 } 12438 return Est; 12439} 12440 12441/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 12442/// For the reciprocal sqrt, we need to find the zero of the function: 12443/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 12444/// => 12445/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) 12446SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, 12447 unsigned Iterations) { 12448 EVT VT = Arg.getValueType(); 12449 SDLoc DL(Arg); 12450 SDValue MinusThree = DAG.getConstantFP(-3.0, VT); 12451 SDValue MinusHalf = DAG.getConstantFP(-0.5, VT); 12452 12453 // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) 12454 for (unsigned i = 0; i < Iterations; ++i) { 12455 SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); 12456 AddToWorklist(HalfEst.getNode()); 12457 12458 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); 12459 AddToWorklist(Est.getNode()); 12460 12461 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); 12462 AddToWorklist(Est.getNode()); 12463 12464 Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); 12465 AddToWorklist(Est.getNode()); 12466 12467 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); 12468 AddToWorklist(Est.getNode()); 12469 } 12470 return Est; 12471} 12472 12473SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { 12474 if (Level >= AfterLegalizeDAG) 12475 return SDValue(); 12476 12477 // Expose the DAG combiner to the target combiner implementations. 12478 TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); 12479 unsigned Iterations = 0; 12480 bool UseOneConstNR = false; 12481 if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { 12482 AddToWorklist(Est.getNode()); 12483 if (Iterations) { 12484 Est = UseOneConstNR ? 12485 BuildRsqrtNROneConst(Op, Est, Iterations) : 12486 BuildRsqrtNRTwoConst(Op, Est, Iterations); 12487 } 12488 return Est; 12489 } 12490 12491 return SDValue(); 12492} 12493 12494/// Return true if base is a frame index, which is known not to alias with 12495/// anything but itself. Provides base object and offset as results. 12496static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, 12497 const GlobalValue &GV, const void &CV) { 12498 // Assume it is a primitive operation. 12499 Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; 12500 12501 // If it's an adding a simple constant then integrate the offset. 12502 if (Base.getOpcode() == ISD::ADD) { 12503 if (ConstantSDNode C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { 12504* Base = Base.getOperand(0); 12505 Offset += C->getZExtValue(); 12506 } 12507 } 12508 12509 // Return the underlying GlobalValue, and update the Offset. Return false 12510 // for GlobalAddressSDNode since the same GlobalAddress may be represented 12511 // by multiple nodes with different offsets. 12512 if (GlobalAddressSDNode G = dyn_cast<GlobalAddressSDNode>(Base)) { 12513* GV = G->getGlobal(); 12514 Offset += G->getOffset(); 12515 return false; 12516 } 12517 12518 // Return the underlying Constant value, and update the Offset. Return false 12519 // for ConstantSDNodes since the same constant pool entry may be represented 12520 // by multiple nodes with different offsets. 12521 if (ConstantPoolSDNode C = dyn_cast<ConstantPoolSDNode>(Base)) { 12522* CV = C->isMachineConstantPoolEntry() ? (const void )C->getMachineCPVal() 12523* : (const void )C->getConstVal(); 12524* Offset += C->getOffset(); 12525 return false; 12526 } 12527 // If it's any of the following then it can't alias with anything but itself. 12528 return isa<FrameIndexSDNode>(Base); 12529} 12530 12531/// Return true if there is any possibility that the two addresses overlap. 12532bool DAGCombiner::isAlias(LSBaseSDNode Op0, LSBaseSDNode Op1) const { 12533 // If they are the same then they must be aliases. 12534 if (Op0->getBasePtr() == Op1->getBasePtr()) return true; 12535 12536 // If they are both volatile then they cannot be reordered. 12537 if (Op0->isVolatile() && Op1->isVolatile()) return true; 12538 12539 // Gather base node and offset information. 12540 SDValue Base1, Base2; 12541 int64_t Offset1, Offset2; 12542 const GlobalValue GV1, GV2; 12543 const void CV1, CV2; 12544 bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), 12545 Base1, Offset1, GV1, CV1); 12546 bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), 12547 Base2, Offset2, GV2, CV2); 12548 12549 // If they have a same base address then check to see if they overlap. 12550 if (Base1 == Base2 \|\| (GV1 && (GV1 == GV2)) \|\| (CV1 && (CV1 == CV2))) 12551 return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 \|\| 12552 (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); 12553 12554 // It is possible for different frame indices to alias each other, mostly 12555 // when tail call optimization reuses return address slots for arguments. 12556 // To catch this case, look up the actual index of frame indices to compute 12557 // the real alias relationship. 12558 if (isFrameIndex1 && isFrameIndex2) { 12559 MachineFrameInfo MFI = DAG.getMachineFunction().getFrameInfo(); 12560* Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); 12561 Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); 12562 return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 \|\| 12563 (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); 12564 } 12565 12566 // Otherwise, if we know what the bases are, and they aren't identical, then 12567 // we know they cannot alias. 12568 if ((isFrameIndex1 \|\| CV1 \|\| GV1) && (isFrameIndex2 \|\| CV2 \|\| GV2)) 12569 return false; 12570 12571 // If we know required SrcValue1 and SrcValue2 have relatively large alignment 12572 // compared to the size and offset of the access, we may be able to prove they 12573 // do not alias. This check is conservative for now to catch cases created by 12574 // splitting vector types. 12575 if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && 12576 (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && 12577 (Op0->getMemoryVT().getSizeInBits() >> 3 == 12578 Op1->getMemoryVT().getSizeInBits() >> 3) && 12579 (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { 12580 int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); 12581 int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); 12582 12583 // There is no overlap between these relatively aligned accesses of similar 12584 // size, return no alias. 12585 if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 \|\| 12586 (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) 12587 return false; 12588 } 12589 12590 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 12591 ? CombinerGlobalAA 12592 : DAG.getSubtarget().useAA(); 12593#ifndef NDEBUG 12594 if (CombinerAAOnlyFunc.getNumOccurrences() && 12595 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 12596 UseAA = false; 12597#endif 12598 if (UseAA && 12599 Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { 12600 // Use alias analysis information. 12601 int64_t MinOffset = std::min(Op0->getSrcValueOffset(), 12602 Op1->getSrcValueOffset()); 12603 int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + 12604 Op0->getSrcValueOffset() - MinOffset; 12605 int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + 12606 Op1->getSrcValueOffset() - MinOffset; 12607 AliasAnalysis::AliasResult AAResult = 12608 AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), 12609 Overlap1, 12610 UseTBAA ? Op0->getAAInfo() : AAMDNodes()), 12611 AliasAnalysis::Location(Op1->getMemOperand()->getValue(), 12612 Overlap2, 12613 UseTBAA ? Op1->getAAInfo() : AAMDNodes())); 12614 if (AAResult == AliasAnalysis::NoAlias) 12615 return false; 12616 } 12617 12618 // Otherwise we have to assume they alias. 12619 return true; 12620} 12621 12622/// Walk up chain skipping non-aliasing memory nodes, 12623/// looking for aliasing nodes and adding them to the Aliases vector. 12624void DAGCombiner::GatherAllAliases(SDNode N, SDValue OriginalChain, 12625* SmallVectorImpl<SDValue> &Aliases) { 12626 SmallVector<SDValue, 8> Chains; // List of chains to visit. 12627 SmallPtrSet<SDNode , 16> Visited; // Visited node set. 12628* 12629 // Get alias information for node. 12630 bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); 12631 12632 // Starting off. 12633 Chains.push_back(OriginalChain); 12634 unsigned Depth = 0; 12635 12636 // Look at each chain and determine if it is an alias. If so, add it to the 12637 // aliases list. If not, then continue up the chain looking for the next 12638 // candidate. 12639 while (!Chains.empty()) { 12640 SDValue Chain = Chains.back(); 12641 Chains.pop_back(); 12642 12643 // For TokenFactor nodes, look at each operand and only continue up the 12644 // chain until we find two aliases. If we've seen two aliases, assume we'll 12645 // find more and revert to original chain since the xform is unlikely to be 12646 // profitable. 12647 // 12648 // FIXME: The depth check could be made to return the last non-aliasing 12649 // chain we found before we hit a tokenfactor rather than the original 12650 // chain. 12651 if (Depth > 6 \|\| Aliases.size() == 2) { 12652 Aliases.clear(); 12653 Aliases.push_back(OriginalChain); 12654 return; 12655 } 12656 12657 // Don't bother if we've been before. 12658 if (!Visited.insert(Chain.getNode()).second) 12659 continue; 12660 12661 switch (Chain.getOpcode()) { 12662 case ISD::EntryToken: 12663 // Entry token is ideal chain operand, but handled in FindBetterChain. 12664 break; 12665 12666 case ISD::LOAD: 12667 case ISD::STORE: { 12668 // Get alias information for Chain. 12669 bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && 12670 !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); 12671 12672 // If chain is alias then stop here. 12673 if (!(IsLoad && IsOpLoad) && 12674 isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { 12675 Aliases.push_back(Chain); 12676 } else { 12677 // Look further up the chain. 12678 Chains.push_back(Chain.getOperand(0)); 12679 ++Depth; 12680 } 12681 break; 12682 } 12683 12684 case ISD::TokenFactor: 12685 // We have to check each of the operands of the token factor for "small" 12686 // token factors, so we queue them up. Adding the operands to the queue 12687 // (stack) in reverse order maintains the original order and increases the 12688 // likelihood that getNode will find a matching token factor (CSE.) 12689 if (Chain.getNumOperands() > 16) { 12690 Aliases.push_back(Chain); 12691 break; 12692 } 12693 for (unsigned n = Chain.getNumOperands(); n;) 12694 Chains.push_back(Chain.getOperand(--n)); 12695 ++Depth; 12696 break; 12697 12698 default: 12699 // For all other instructions we will just have to take what we can get. 12700 Aliases.push_back(Chain); 12701 break; 12702 } 12703 } 12704 12705 // We need to be careful here to also search for aliases through the 12706 // value operand of a store, etc. Consider the following situation: 12707 // Token1 = ... 12708 // L1 = load Token1, %52 12709 // S1 = store Token1, L1, %51 12710 // L2 = load Token1, %52+8 12711 // S2 = store Token1, L2, %51+8 12712 // Token2 = Token(S1, S2) 12713 // L3 = load Token2, %53 12714 // S3 = store Token2, L3, %52 12715 // L4 = load Token2, %53+8 12716 // S4 = store Token2, L4, %52+8 12717 // If we search for aliases of S3 (which loads address %52), and we look 12718 // only through the chain, then we'll miss the trivial dependence on L1 12719 // (which also loads from %52). We then might change all loads and 12720 // stores to use Token1 as their chain operand, which could result in 12721 // copying %53 into %52 before copying %52 into %51 (which should 12722 // happen first). 12723 // 12724 // The problem is, however, that searching for such data dependencies 12725 // can become expensive, and the cost is not directly related to the 12726 // chain depth. Instead, we'll rule out such configurations here by 12727 // insisting that we've visited all chain users (except for users 12728 // of the original chain, which is not necessary). When doing this, 12729 // we need to look through nodes we don't care about (otherwise, things 12730 // like register copies will interfere with trivial cases). 12731 12732 SmallVector<const SDNode , 16> Worklist; 12733* for (const SDNode N : Visited) 12734* if (N != OriginalChain.getNode()) 12735 Worklist.push_back(N); 12736 12737 while (!Worklist.empty()) { 12738 const SDNode M = Worklist.pop_back_val(); 12739* 12740 // We have already visited M, and want to make sure we've visited any uses 12741 // of M that we care about. For uses that we've not visisted, and don't 12742 // care about, queue them to the worklist. 12743 12744 for (SDNode::use_iterator UI = M->use_begin(), 12745 UIE = M->use_end(); UI != UIE; ++UI) 12746 if (UI.getUse().getValueType() == MVT::Other && 12747 Visited.insert(UI).second) { 12748* if (isa<MemIntrinsicSDNode>(UI) \|\| isa<MemSDNode>(UI)) { 12749 // We've not visited this use, and we care about it (it could have an 12750 // ordering dependency with the original node). 12751 Aliases.clear(); 12752 Aliases.push_back(OriginalChain); 12753 return; 12754 } 12755 12756 // We've not visited this use, but we don't care about it. Mark it as 12757 // visited and enqueue it to the worklist. 12758 Worklist.push_back(UI); 12759* } 12760 } 12761} 12762 12763/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain 12764/// (aliasing node.) 12765SDValue DAGCombiner::FindBetterChain(SDNode N, SDValue OldChain) { 12766* SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. 12767 12768 // Accumulate all the aliases to this node. 12769 GatherAllAliases(N, OldChain, Aliases); 12770 12771 // If no operands then chain to entry token. 12772 if (Aliases.size() == 0) 12773 return DAG.getEntryNode(); 12774 12775 // If a single operand then chain to it. We don't need to revisit it. 12776 if (Aliases.size() == 1) 12777 return Aliases[0]; 12778 12779 // Construct a custom tailored token factor. 12780 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); 12781} 12782 12783/// This is the entry point for the file. 12784void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, 12785 CodeGenOpt::Level OptLevel) { 12786 /// This is the main entry point to this class. 12787 DAGCombiner(this, AA, OptLevel).Run(Level); 12788*}	11057 } else 11058 return SDValue(); 11059 } 11060 11061 if (UsesZeroVector) 11062 VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) : 11063 DAG.getConstantFP(0.0, VT); 11064 else 11065 // If VecIn2 is unused then change it to undef. 11066 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 11067 11068 // Check that we were able to transform all incoming values to the same 11069 // type. 11070 if (VecIn2.getValueType() != VecIn1.getValueType() \|\| 11071 VecIn1.getValueType() != VT) 11072 return SDValue(); 11073 11074 // Return the new VECTOR_SHUFFLE node. 11075 SDValue Ops[2]; 11076 Ops[0] = VecIn1; 11077 Ops[1] = VecIn2; 11078 return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); 11079 } 11080 11081 return SDValue(); 11082} 11083 11084SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode N) { 11085* // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of 11086 // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector 11087 // inputs come from at most two distinct vectors, turn this into a shuffle 11088 // node. 11089 11090 // If we only have one input vector, we don't need to do any concatenation. 11091 if (N->getNumOperands() == 1) 11092 return N->getOperand(0); 11093 11094 // Check if all of the operands are undefs. 11095 EVT VT = N->getValueType(0); 11096 if (ISD::allOperandsUndef(N)) 11097 return DAG.getUNDEF(VT); 11098 11099 // Optimize concat_vectors where one of the vectors is undef. 11100 if (N->getNumOperands() == 2 && 11101 N->getOperand(1)->getOpcode() == ISD::UNDEF) { 11102 SDValue In = N->getOperand(0); 11103 assert(In.getValueType().isVector() && "Must concat vectors"); 11104 11105 // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). 11106 if (In->getOpcode() == ISD::BITCAST && 11107 !In->getOperand(0)->getValueType(0).isVector()) { 11108 SDValue Scalar = In->getOperand(0); 11109 EVT SclTy = Scalar->getValueType(0); 11110 11111 if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) 11112 return SDValue(); 11113 11114 EVT NVT = EVT::getVectorVT(DAG.getContext(), SclTy, 11115* VT.getSizeInBits() / SclTy.getSizeInBits()); 11116 if (!TLI.isTypeLegal(NVT) \|\| !TLI.isTypeLegal(Scalar.getValueType())) 11117 return SDValue(); 11118 11119 SDLoc dl = SDLoc(N); 11120 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); 11121 return DAG.getNode(ISD::BITCAST, dl, VT, Res); 11122 } 11123 } 11124 11125 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) 11126 // -> (BUILD_VECTOR A, B, ..., C, D, ...) 11127 if (N->getNumOperands() == 2 && 11128 N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 11129 N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { 11130 EVT VT = N->getValueType(0); 11131 SDValue N0 = N->getOperand(0); 11132 SDValue N1 = N->getOperand(1); 11133 SmallVector<SDValue, 8> Opnds; 11134 unsigned BuildVecNumElts = N0.getNumOperands(); 11135 11136 EVT SclTy0 = N0.getOperand(0)->getValueType(0); 11137 EVT SclTy1 = N1.getOperand(0)->getValueType(0); 11138 if (SclTy0.isFloatingPoint()) { 11139 for (unsigned i = 0; i != BuildVecNumElts; ++i) 11140 Opnds.push_back(N0.getOperand(i)); 11141 for (unsigned i = 0; i != BuildVecNumElts; ++i) 11142 Opnds.push_back(N1.getOperand(i)); 11143 } else { 11144 // If BUILD_VECTOR are from built from integer, they may have different 11145 // operand types. Get the smaller type and truncate all operands to it. 11146 EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; 11147 for (unsigned i = 0; i != BuildVecNumElts; ++i) 11148 Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, 11149 N0.getOperand(i))); 11150 for (unsigned i = 0; i != BuildVecNumElts; ++i) 11151 Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, 11152 N1.getOperand(i))); 11153 } 11154 11155 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 11156 } 11157 11158 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR 11159 // nodes often generate nop CONCAT_VECTOR nodes. 11160 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that 11161 // place the incoming vectors at the exact same location. 11162 SDValue SingleSource = SDValue(); 11163 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); 11164 11165 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 11166 SDValue Op = N->getOperand(i); 11167 11168 if (Op.getOpcode() == ISD::UNDEF) 11169 continue; 11170 11171 // Check if this is the identity extract: 11172 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 11173 return SDValue(); 11174 11175 // Find the single incoming vector for the extract_subvector. 11176 if (SingleSource.getNode()) { 11177 if (Op.getOperand(0) != SingleSource) 11178 return SDValue(); 11179 } else { 11180 SingleSource = Op.getOperand(0); 11181 11182 // Check the source type is the same as the type of the result. 11183 // If not, this concat may extend the vector, so we can not 11184 // optimize it away. 11185 if (SingleSource.getValueType() != N->getValueType(0)) 11186 return SDValue(); 11187 } 11188 11189 unsigned IdentityIndex = i * PartNumElem; 11190 ConstantSDNode CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 11191* // The extract index must be constant. 11192 if (!CS) 11193 return SDValue(); 11194 11195 // Check that we are reading from the identity index. 11196 if (CS->getZExtValue() != IdentityIndex) 11197 return SDValue(); 11198 } 11199 11200 if (SingleSource.getNode()) 11201 return SingleSource; 11202 11203 return SDValue(); 11204} 11205 11206SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 11207 EVT NVT = N->getValueType(0); 11208 SDValue V = N->getOperand(0); 11209 11210 if (V->getOpcode() == ISD::CONCAT_VECTORS) { 11211 // Combine: 11212 // (extract_subvec (concat V1, V2, ...), i) 11213 // Into: 11214 // Vi if possible 11215 // Only operand 0 is checked as 'concat' assumes all inputs of the same 11216 // type. 11217 if (V->getOperand(0).getValueType() != NVT) 11218 return SDValue(); 11219 unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 11220 unsigned NumElems = NVT.getVectorNumElements(); 11221 assert((Idx % NumElems) == 0 && 11222 "IDX in concat is not a multiple of the result vector length."); 11223 return V->getOperand(Idx / NumElems); 11224 } 11225 11226 // Skip bitcasting 11227 if (V->getOpcode() == ISD::BITCAST) 11228 V = V.getOperand(0); 11229 11230 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 11231 SDLoc dl(N); 11232 // Handle only simple case where vector being inserted and vector 11233 // being extracted are of same type, and are half size of larger vectors. 11234 EVT BigVT = V->getOperand(0).getValueType(); 11235 EVT SmallVT = V->getOperand(1).getValueType(); 11236 if (!NVT.bitsEq(SmallVT) \|\| NVT.getSizeInBits()2 != BigVT.getSizeInBits()) 11237* return SDValue(); 11238 11239 // Only handle cases where both indexes are constants with the same type. 11240 ConstantSDNode ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 11241* ConstantSDNode InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 11242* 11243 if (InsIdx && ExtIdx && 11244 InsIdx->getValueType(0).getSizeInBits() <= 64 && 11245 ExtIdx->getValueType(0).getSizeInBits() <= 64) { 11246 // Combine: 11247 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 11248 // Into: 11249 // indices are equal or bit offsets are equal => V1 11250 // otherwise => (extract_subvec V1, ExtIdx) 11251 if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == 11252 ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) 11253 return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); 11254 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, 11255 DAG.getNode(ISD::BITCAST, dl, 11256 N->getOperand(0).getValueType(), 11257 V->getOperand(0)), N->getOperand(1)); 11258 } 11259 } 11260 11261 return SDValue(); 11262} 11263 11264static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, 11265 SDValue V, SelectionDAG &DAG) { 11266 SDLoc DL(V); 11267 EVT VT = V.getValueType(); 11268 11269 switch (V.getOpcode()) { 11270 default: 11271 return V; 11272 11273 case ISD::CONCAT_VECTORS: { 11274 EVT OpVT = V->getOperand(0).getValueType(); 11275 int OpSize = OpVT.getVectorNumElements(); 11276 SmallBitVector OpUsedElements(OpSize, false); 11277 bool FoundSimplification = false; 11278 SmallVector<SDValue, 4> NewOps; 11279 NewOps.reserve(V->getNumOperands()); 11280 for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { 11281 SDValue Op = V->getOperand(i); 11282 bool OpUsed = false; 11283 for (int j = 0; j < OpSize; ++j) 11284 if (UsedElements[i * OpSize + j]) { 11285 OpUsedElements[j] = true; 11286 OpUsed = true; 11287 } 11288 NewOps.push_back( 11289 OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) 11290 : DAG.getUNDEF(OpVT)); 11291 FoundSimplification \|= Op == NewOps.back(); 11292 OpUsedElements.reset(); 11293 } 11294 if (FoundSimplification) 11295 V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); 11296 return V; 11297 } 11298 11299 case ISD::INSERT_SUBVECTOR: { 11300 SDValue BaseV = V->getOperand(0); 11301 SDValue SubV = V->getOperand(1); 11302 auto IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); 11303* if (!IdxN) 11304 return V; 11305 11306 int SubSize = SubV.getValueType().getVectorNumElements(); 11307 int Idx = IdxN->getZExtValue(); 11308 bool SubVectorUsed = false; 11309 SmallBitVector SubUsedElements(SubSize, false); 11310 for (int i = 0; i < SubSize; ++i) 11311 if (UsedElements[i + Idx]) { 11312 SubVectorUsed = true; 11313 SubUsedElements[i] = true; 11314 UsedElements[i + Idx] = false; 11315 } 11316 11317 // Now recurse on both the base and sub vectors. 11318 SDValue SimplifiedSubV = 11319 SubVectorUsed 11320 ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) 11321 : DAG.getUNDEF(SubV.getValueType()); 11322 SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); 11323 if (SimplifiedSubV != SubV \|\| SimplifiedBaseV != BaseV) 11324 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 11325 SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); 11326 return V; 11327 } 11328 } 11329} 11330 11331static SDValue simplifyShuffleOperands(ShuffleVectorSDNode SVN, SDValue N0, 11332* SDValue N1, SelectionDAG &DAG) { 11333 EVT VT = SVN->getValueType(0); 11334 int NumElts = VT.getVectorNumElements(); 11335 SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); 11336 for (int M : SVN->getMask()) 11337 if (M >= 0 && M < NumElts) 11338 N0UsedElements[M] = true; 11339 else if (M >= NumElts) 11340 N1UsedElements[M - NumElts] = true; 11341 11342 SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); 11343 SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); 11344 if (S0 == N0 && S1 == N1) 11345 return SDValue(); 11346 11347 return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); 11348} 11349 11350// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. 11351static SDValue partitionShuffleOfConcats(SDNode N, SelectionDAG &DAG) { 11352* EVT VT = N->getValueType(0); 11353 unsigned NumElts = VT.getVectorNumElements(); 11354 11355 SDValue N0 = N->getOperand(0); 11356 SDValue N1 = N->getOperand(1); 11357 ShuffleVectorSDNode SVN = cast<ShuffleVectorSDNode>(N); 11358* 11359 SmallVector<SDValue, 4> Ops; 11360 EVT ConcatVT = N0.getOperand(0).getValueType(); 11361 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); 11362 unsigned NumConcats = NumElts / NumElemsPerConcat; 11363 11364 // Look at every vector that's inserted. We're looking for exact 11365 // subvector-sized copies from a concatenated vector 11366 for (unsigned I = 0; I != NumConcats; ++I) { 11367 // Make sure we're dealing with a copy. 11368 unsigned Begin = I * NumElemsPerConcat; 11369 bool AllUndef = true, NoUndef = true; 11370 for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { 11371 if (SVN->getMaskElt(J) >= 0) 11372 AllUndef = false; 11373 else 11374 NoUndef = false; 11375 } 11376 11377 if (NoUndef) { 11378 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) 11379 return SDValue(); 11380 11381 for (unsigned J = 1; J != NumElemsPerConcat; ++J) 11382 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) 11383 return SDValue(); 11384 11385 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; 11386 if (FirstElt < N0.getNumOperands()) 11387 Ops.push_back(N0.getOperand(FirstElt)); 11388 else 11389 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); 11390 11391 } else if (AllUndef) { 11392 Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); 11393 } else { // Mixed with general masks and undefs, can't do optimization. 11394 return SDValue(); 11395 } 11396 } 11397 11398 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 11399} 11400 11401SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode N) { 11402* EVT VT = N->getValueType(0); 11403 unsigned NumElts = VT.getVectorNumElements(); 11404 11405 SDValue N0 = N->getOperand(0); 11406 SDValue N1 = N->getOperand(1); 11407 11408 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 11409 11410 // Canonicalize shuffle undef, undef -> undef 11411 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 11412 return DAG.getUNDEF(VT); 11413 11414 ShuffleVectorSDNode SVN = cast<ShuffleVectorSDNode>(N); 11415* 11416 // Canonicalize shuffle v, v -> v, undef 11417 if (N0 == N1) { 11418 SmallVector<int, 8> NewMask; 11419 for (unsigned i = 0; i != NumElts; ++i) { 11420 int Idx = SVN->getMaskElt(i); 11421 if (Idx >= (int)NumElts) Idx -= NumElts; 11422 NewMask.push_back(Idx); 11423 } 11424 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), 11425 &NewMask[0]); 11426 } 11427 11428 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 11429 if (N0.getOpcode() == ISD::UNDEF) { 11430 SmallVector<int, 8> NewMask; 11431 for (unsigned i = 0; i != NumElts; ++i) { 11432 int Idx = SVN->getMaskElt(i); 11433 if (Idx >= 0) { 11434 if (Idx >= (int)NumElts) 11435 Idx -= NumElts; 11436 else 11437 Idx = -1; // remove reference to lhs 11438 } 11439 NewMask.push_back(Idx); 11440 } 11441 return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), 11442 &NewMask[0]); 11443 } 11444 11445 // Remove references to rhs if it is undef 11446 if (N1.getOpcode() == ISD::UNDEF) { 11447 bool Changed = false; 11448 SmallVector<int, 8> NewMask; 11449 for (unsigned i = 0; i != NumElts; ++i) { 11450 int Idx = SVN->getMaskElt(i); 11451 if (Idx >= (int)NumElts) { 11452 Idx = -1; 11453 Changed = true; 11454 } 11455 NewMask.push_back(Idx); 11456 } 11457 if (Changed) 11458 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); 11459 } 11460 11461 // If it is a splat, check if the argument vector is another splat or a 11462 // build_vector with all scalar elements the same. 11463 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 11464 SDNode V = N0.getNode(); 11465* 11466 // If this is a bit convert that changes the element type of the vector but 11467 // not the number of vector elements, look through it. Be careful not to 11468 // look though conversions that change things like v4f32 to v2f64. 11469 if (V->getOpcode() == ISD::BITCAST) { 11470 SDValue ConvInput = V->getOperand(0); 11471 if (ConvInput.getValueType().isVector() && 11472 ConvInput.getValueType().getVectorNumElements() == NumElts) 11473 V = ConvInput.getNode(); 11474 } 11475 11476 if (V->getOpcode() == ISD::BUILD_VECTOR) { 11477 assert(V->getNumOperands() == NumElts && 11478 "BUILD_VECTOR has wrong number of operands"); 11479 SDValue Base; 11480 bool AllSame = true; 11481 for (unsigned i = 0; i != NumElts; ++i) { 11482 if (V->getOperand(i).getOpcode() != ISD::UNDEF) { 11483 Base = V->getOperand(i); 11484 break; 11485 } 11486 } 11487 // Splat of <u, u, u, u>, return <u, u, u, u> 11488 if (!Base.getNode()) 11489 return N0; 11490 for (unsigned i = 0; i != NumElts; ++i) { 11491 if (V->getOperand(i) != Base) { 11492 AllSame = false; 11493 break; 11494 } 11495 } 11496 // Splat of <x, x, x, x>, return <x, x, x, x> 11497 if (AllSame) 11498 return N0; 11499 } 11500 } 11501 11502 // There are various patterns used to build up a vector from smaller vectors, 11503 // subvectors, or elements. Scan chains of these and replace unused insertions 11504 // or components with undef. 11505 if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) 11506 return S; 11507 11508 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 11509 Level < AfterLegalizeVectorOps && 11510 (N1.getOpcode() == ISD::UNDEF \|\| 11511 (N1.getOpcode() == ISD::CONCAT_VECTORS && 11512 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { 11513 SDValue V = partitionShuffleOfConcats(N, DAG); 11514 11515 if (V.getNode()) 11516 return V; 11517 } 11518 11519 // Canonicalize shuffles according to rules: 11520 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) 11521 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) 11522 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) 11523 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && 11524 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 11525 TLI.isTypeLegal(VT)) { 11526 // The incoming shuffle must be of the same type as the result of the 11527 // current shuffle. 11528 assert(N1->getOperand(0).getValueType() == VT && 11529 "Shuffle types don't match"); 11530 11531 SDValue SV0 = N1->getOperand(0); 11532 SDValue SV1 = N1->getOperand(1); 11533 bool HasSameOp0 = N0 == SV0; 11534 bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; 11535 if (HasSameOp0 \|\| IsSV1Undef \|\| N0 == SV1) 11536 // Commute the operands of this shuffle so that next rule 11537 // will trigger. 11538 return DAG.getCommutedVectorShuffle(SVN); 11539* } 11540 11541 // Try to fold according to rules: 11542 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 11543 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 11544 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 11545 // Don't try to fold shuffles with illegal type. 11546 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 11547 TLI.isTypeLegal(VT)) { 11548 ShuffleVectorSDNode OtherSV = cast<ShuffleVectorSDNode>(N0); 11549* 11550 // The incoming shuffle must be of the same type as the result of the 11551 // current shuffle. 11552 assert(OtherSV->getOperand(0).getValueType() == VT && 11553 "Shuffle types don't match"); 11554 11555 SDValue SV0, SV1; 11556 SmallVector<int, 4> Mask; 11557 // Compute the combined shuffle mask for a shuffle with SV0 as the first 11558 // operand, and SV1 as the second operand. 11559 for (unsigned i = 0; i != NumElts; ++i) { 11560 int Idx = SVN->getMaskElt(i); 11561 if (Idx < 0) { 11562 // Propagate Undef. 11563 Mask.push_back(Idx); 11564 continue; 11565 } 11566 11567 SDValue CurrentVec; 11568 if (Idx < (int)NumElts) { 11569 // This shuffle index refers to the inner shuffle N0. Lookup the inner 11570 // shuffle mask to identify which vector is actually referenced. 11571 Idx = OtherSV->getMaskElt(Idx); 11572 if (Idx < 0) { 11573 // Propagate Undef. 11574 Mask.push_back(Idx); 11575 continue; 11576 } 11577 11578 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) 11579 : OtherSV->getOperand(1); 11580 } else { 11581 // This shuffle index references an element within N1. 11582 CurrentVec = N1; 11583 } 11584 11585 // Simple case where 'CurrentVec' is UNDEF. 11586 if (CurrentVec.getOpcode() == ISD::UNDEF) { 11587 Mask.push_back(-1); 11588 continue; 11589 } 11590 11591 // Canonicalize the shuffle index. We don't know yet if CurrentVec 11592 // will be the first or second operand of the combined shuffle. 11593 Idx = Idx % NumElts; 11594 if (!SV0.getNode() \|\| SV0 == CurrentVec) { 11595 // Ok. CurrentVec is the left hand side. 11596 // Update the mask accordingly. 11597 SV0 = CurrentVec; 11598 Mask.push_back(Idx); 11599 continue; 11600 } 11601 11602 // Bail out if we cannot convert the shuffle pair into a single shuffle. 11603 if (SV1.getNode() && SV1 != CurrentVec) 11604 return SDValue(); 11605 11606 // Ok. CurrentVec is the right hand side. 11607 // Update the mask accordingly. 11608 SV1 = CurrentVec; 11609 Mask.push_back(Idx + NumElts); 11610 } 11611 11612 // Check if all indices in Mask are Undef. In case, propagate Undef. 11613 bool isUndefMask = true; 11614 for (unsigned i = 0; i != NumElts && isUndefMask; ++i) 11615 isUndefMask &= Mask[i] < 0; 11616 11617 if (isUndefMask) 11618 return DAG.getUNDEF(VT); 11619 11620 if (!SV0.getNode()) 11621 SV0 = DAG.getUNDEF(VT); 11622 if (!SV1.getNode()) 11623 SV1 = DAG.getUNDEF(VT); 11624 11625 // Avoid introducing shuffles with illegal mask. 11626 if (!TLI.isShuffleMaskLegal(Mask, VT)) { 11627 // Compute the commuted shuffle mask and test again. 11628 for (unsigned i = 0; i != NumElts; ++i) { 11629 int idx = Mask[i]; 11630 if (idx < 0) 11631 continue; 11632 else if (idx < (int)NumElts) 11633 Mask[i] = idx + NumElts; 11634 else 11635 Mask[i] = idx - NumElts; 11636 } 11637 11638 if (!TLI.isShuffleMaskLegal(Mask, VT)) 11639 return SDValue(); 11640 11641 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) 11642 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) 11643 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) 11644 std::swap(SV0, SV1); 11645 } 11646 11647 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 11648 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 11649 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 11650 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); 11651 } 11652 11653 return SDValue(); 11654} 11655 11656SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode N) { 11657* SDValue N0 = N->getOperand(0); 11658 SDValue N2 = N->getOperand(2); 11659 11660 // If the input vector is a concatenation, and the insert replaces 11661 // one of the halves, we can optimize into a single concat_vectors. 11662 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 11663 N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { 11664 APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); 11665 EVT VT = N->getValueType(0); 11666 11667 // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> 11668 // (concat_vectors Z, Y) 11669 if (InsIdx == 0) 11670 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 11671 N->getOperand(1), N0.getOperand(1)); 11672 11673 // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> 11674 // (concat_vectors X, Z) 11675 if (InsIdx == VT.getVectorNumElements()/2) 11676 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 11677 N0.getOperand(0), N->getOperand(1)); 11678 } 11679 11680 return SDValue(); 11681} 11682 11683/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle 11684/// with the destination vector and a zero vector. 11685/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 11686/// vector_shuffle V, Zero, <0, 4, 2, 4> 11687SDValue DAGCombiner::XformToShuffleWithZero(SDNode N) { 11688* EVT VT = N->getValueType(0); 11689 SDLoc dl(N); 11690 SDValue LHS = N->getOperand(0); 11691 SDValue RHS = N->getOperand(1); 11692 if (N->getOpcode() == ISD::AND) { 11693 if (RHS.getOpcode() == ISD::BITCAST) 11694 RHS = RHS.getOperand(0); 11695 if (RHS.getOpcode() == ISD::BUILD_VECTOR) { 11696 SmallVector<int, 8> Indices; 11697 unsigned NumElts = RHS.getNumOperands(); 11698 for (unsigned i = 0; i != NumElts; ++i) { 11699 SDValue Elt = RHS.getOperand(i); 11700 if (!isa<ConstantSDNode>(Elt)) 11701 return SDValue(); 11702 11703 if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) 11704 Indices.push_back(i); 11705 else if (cast<ConstantSDNode>(Elt)->isNullValue()) 11706 Indices.push_back(NumElts+i); 11707 else 11708 return SDValue(); 11709 } 11710 11711 // Let's see if the target supports this vector_shuffle. 11712 EVT RVT = RHS.getValueType(); 11713 if (!TLI.isVectorClearMaskLegal(Indices, RVT)) 11714 return SDValue(); 11715 11716 // Return the new VECTOR_SHUFFLE node. 11717 EVT EltVT = RVT.getVectorElementType(); 11718 SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), 11719 DAG.getConstant(0, EltVT)); 11720 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); 11721 LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); 11722 SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); 11723 return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); 11724 } 11725 } 11726 11727 return SDValue(); 11728} 11729 11730/// Visit a binary vector operation, like ADD. 11731SDValue DAGCombiner::SimplifyVBinOp(SDNode N) { 11732* assert(N->getValueType(0).isVector() && 11733 "SimplifyVBinOp only works on vectors!"); 11734 11735 SDValue LHS = N->getOperand(0); 11736 SDValue RHS = N->getOperand(1); 11737 SDValue Shuffle = XformToShuffleWithZero(N); 11738 if (Shuffle.getNode()) return Shuffle; 11739 11740 // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold 11741 // this operation. 11742 if (LHS.getOpcode() == ISD::BUILD_VECTOR && 11743 RHS.getOpcode() == ISD::BUILD_VECTOR) { 11744 // Check if both vectors are constants. If not bail out. 11745 if (!(cast<BuildVectorSDNode>(LHS)->isConstant() && 11746 cast<BuildVectorSDNode>(RHS)->isConstant())) 11747 return SDValue(); 11748 11749 SmallVector<SDValue, 8> Ops; 11750 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 11751 SDValue LHSOp = LHS.getOperand(i); 11752 SDValue RHSOp = RHS.getOperand(i); 11753 11754 // Can't fold divide by zero. 11755 if (N->getOpcode() == ISD::SDIV \|\| N->getOpcode() == ISD::UDIV \|\| 11756 N->getOpcode() == ISD::FDIV) { 11757 if ((RHSOp.getOpcode() == ISD::Constant && 11758 cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) \|\| 11759 (RHSOp.getOpcode() == ISD::ConstantFP && 11760 cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) 11761 break; 11762 } 11763 11764 EVT VT = LHSOp.getValueType(); 11765 EVT RVT = RHSOp.getValueType(); 11766 if (RVT != VT) { 11767 // Integer BUILD_VECTOR operands may have types larger than the element 11768 // size (e.g., when the element type is not legal). Prior to type 11769 // legalization, the types may not match between the two BUILD_VECTORS. 11770 // Truncate one of the operands to make them match. 11771 if (RVT.getSizeInBits() > VT.getSizeInBits()) { 11772 RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); 11773 } else { 11774 LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); 11775 VT = RVT; 11776 } 11777 } 11778 SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, 11779 LHSOp, RHSOp); 11780 if (FoldOp.getOpcode() != ISD::UNDEF && 11781 FoldOp.getOpcode() != ISD::Constant && 11782 FoldOp.getOpcode() != ISD::ConstantFP) 11783 break; 11784 Ops.push_back(FoldOp); 11785 AddToWorklist(FoldOp.getNode()); 11786 } 11787 11788 if (Ops.size() == LHS.getNumOperands()) 11789 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); 11790 } 11791 11792 // Type legalization might introduce new shuffles in the DAG. 11793 // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) 11794 // -> (shuffle (VBinOp (A, B)), Undef, Mask). 11795 if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && 11796 isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && 11797 LHS.getOperand(1).getOpcode() == ISD::UNDEF && 11798 RHS.getOperand(1).getOpcode() == ISD::UNDEF) { 11799 ShuffleVectorSDNode SVN0 = cast<ShuffleVectorSDNode>(LHS); 11800* ShuffleVectorSDNode SVN1 = cast<ShuffleVectorSDNode>(RHS); 11801* 11802 if (SVN0->getMask().equals(SVN1->getMask())) { 11803 EVT VT = N->getValueType(0); 11804 SDValue UndefVector = LHS.getOperand(1); 11805 SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 11806 LHS.getOperand(0), RHS.getOperand(0)); 11807 AddUsersToWorklist(N); 11808 return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, 11809 &SVN0->getMask()[0]); 11810 } 11811 } 11812 11813 return SDValue(); 11814} 11815 11816/// Visit a binary vector operation, like FABS/FNEG. 11817SDValue DAGCombiner::SimplifyVUnaryOp(SDNode N) { 11818* assert(N->getValueType(0).isVector() && 11819 "SimplifyVUnaryOp only works on vectors!"); 11820 11821 SDValue N0 = N->getOperand(0); 11822 11823 if (N0.getOpcode() != ISD::BUILD_VECTOR) 11824 return SDValue(); 11825 11826 // Operand is a BUILD_VECTOR node, see if we can constant fold it. 11827 SmallVector<SDValue, 8> Ops; 11828 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 11829 SDValue Op = N0.getOperand(i); 11830 if (Op.getOpcode() != ISD::UNDEF && 11831 Op.getOpcode() != ISD::ConstantFP) 11832 break; 11833 EVT EltVT = Op.getValueType(); 11834 SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); 11835 if (FoldOp.getOpcode() != ISD::UNDEF && 11836 FoldOp.getOpcode() != ISD::ConstantFP) 11837 break; 11838 Ops.push_back(FoldOp); 11839 AddToWorklist(FoldOp.getNode()); 11840 } 11841 11842 if (Ops.size() != N0.getNumOperands()) 11843 return SDValue(); 11844 11845 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); 11846} 11847 11848SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, 11849 SDValue N1, SDValue N2){ 11850 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 11851 11852 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 11853 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 11854 11855 // If we got a simplified select_cc node back from SimplifySelectCC, then 11856 // break it down into a new SETCC node, and a new SELECT node, and then return 11857 // the SELECT node, since we were called with a SELECT node. 11858 if (SCC.getNode()) { 11859 // Check to see if we got a select_cc back (to turn into setcc/select). 11860 // Otherwise, just return whatever node we got back, like fabs. 11861 if (SCC.getOpcode() == ISD::SELECT_CC) { 11862 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), 11863 N0.getValueType(), 11864 SCC.getOperand(0), SCC.getOperand(1), 11865 SCC.getOperand(4)); 11866 AddToWorklist(SETCC.getNode()); 11867 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, 11868 SCC.getOperand(2), SCC.getOperand(3)); 11869 } 11870 11871 return SCC; 11872 } 11873 return SDValue(); 11874} 11875 11876/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values 11877/// being selected between, see if we can simplify the select. Callers of this 11878/// should assume that TheSelect is deleted if this returns true. As such, they 11879/// should return the appropriate thing (e.g. the node) back to the top-level of 11880/// the DAG combiner loop to avoid it being looked at. 11881bool DAGCombiner::SimplifySelectOps(SDNode TheSelect, SDValue LHS, 11882* SDValue RHS) { 11883 11884 // Cannot simplify select with vector condition 11885 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 11886 11887 // If this is a select from two identical things, try to pull the operation 11888 // through the select. 11889 if (LHS.getOpcode() != RHS.getOpcode() \|\| 11890 !LHS.hasOneUse() \|\| !RHS.hasOneUse()) 11891 return false; 11892 11893 // If this is a load and the token chain is identical, replace the select 11894 // of two loads with a load through a select of the address to load from. 11895 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 11896 // constants have been dropped into the constant pool. 11897 if (LHS.getOpcode() == ISD::LOAD) { 11898 LoadSDNode LLD = cast<LoadSDNode>(LHS); 11899* LoadSDNode RLD = cast<LoadSDNode>(RHS); 11900* 11901 // Token chains must be identical. 11902 if (LHS.getOperand(0) != RHS.getOperand(0) \|\| 11903 // Do not let this transformation reduce the number of volatile loads. 11904 LLD->isVolatile() \|\| RLD->isVolatile() \|\| 11905 // If this is an EXTLOAD, the VT's must match. 11906 LLD->getMemoryVT() != RLD->getMemoryVT() \|\| 11907 // If this is an EXTLOAD, the kind of extension must match. 11908 (LLD->getExtensionType() != RLD->getExtensionType() && 11909 // The only exception is if one of the extensions is anyext. 11910 LLD->getExtensionType() != ISD::EXTLOAD && 11911 RLD->getExtensionType() != ISD::EXTLOAD) \|\| 11912 // FIXME: this discards src value information. This is 11913 // over-conservative. It would be beneficial to be able to remember 11914 // both potential memory locations. Since we are discarding 11915 // src value info, don't do the transformation if the memory 11916 // locations are not in the default address space. 11917 LLD->getPointerInfo().getAddrSpace() != 0 \|\| 11918 RLD->getPointerInfo().getAddrSpace() != 0 \|\| 11919 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), 11920 LLD->getBasePtr().getValueType())) 11921 return false; 11922 11923 // Check that the select condition doesn't reach either load. If so, 11924 // folding this will induce a cycle into the DAG. If not, this is safe to 11925 // xform, so create a select of the addresses. 11926 SDValue Addr; 11927 if (TheSelect->getOpcode() == ISD::SELECT) { 11928 SDNode CondNode = TheSelect->getOperand(0).getNode(); 11929* if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) \|\| 11930 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) 11931 return false; 11932 // The loads must not depend on one another. 11933 if (LLD->isPredecessorOf(RLD) \|\| 11934 RLD->isPredecessorOf(LLD)) 11935 return false; 11936 Addr = DAG.getSelect(SDLoc(TheSelect), 11937 LLD->getBasePtr().getValueType(), 11938 TheSelect->getOperand(0), LLD->getBasePtr(), 11939 RLD->getBasePtr()); 11940 } else { // Otherwise SELECT_CC 11941 SDNode CondLHS = TheSelect->getOperand(0).getNode(); 11942* SDNode CondRHS = TheSelect->getOperand(1).getNode(); 11943* 11944 if ((LLD->hasAnyUseOfValue(1) && 11945 (LLD->isPredecessorOf(CondLHS) \|\| LLD->isPredecessorOf(CondRHS))) \|\| 11946 (RLD->hasAnyUseOfValue(1) && 11947 (RLD->isPredecessorOf(CondLHS) \|\| RLD->isPredecessorOf(CondRHS)))) 11948 return false; 11949 11950 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), 11951 LLD->getBasePtr().getValueType(), 11952 TheSelect->getOperand(0), 11953 TheSelect->getOperand(1), 11954 LLD->getBasePtr(), RLD->getBasePtr(), 11955 TheSelect->getOperand(4)); 11956 } 11957 11958 SDValue Load; 11959 // It is safe to replace the two loads if they have different alignments, 11960 // but the new load must be the minimum (most restrictive) alignment of the 11961 // inputs. 11962 bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); 11963 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); 11964 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 11965 Load = DAG.getLoad(TheSelect->getValueType(0), 11966 SDLoc(TheSelect), 11967 // FIXME: Discards pointer and AA info. 11968 LLD->getChain(), Addr, MachinePointerInfo(), 11969 LLD->isVolatile(), LLD->isNonTemporal(), 11970 isInvariant, Alignment); 11971 } else { 11972 Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? 11973 RLD->getExtensionType() : LLD->getExtensionType(), 11974 SDLoc(TheSelect), 11975 TheSelect->getValueType(0), 11976 // FIXME: Discards pointer and AA info. 11977 LLD->getChain(), Addr, MachinePointerInfo(), 11978 LLD->getMemoryVT(), LLD->isVolatile(), 11979 LLD->isNonTemporal(), isInvariant, Alignment); 11980 } 11981 11982 // Users of the select now use the result of the load. 11983 CombineTo(TheSelect, Load); 11984 11985 // Users of the old loads now use the new load's chain. We know the 11986 // old-load value is dead now. 11987 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 11988 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 11989 return true; 11990 } 11991 11992 return false; 11993} 11994 11995/// Simplify an expression of the form (N0 cond N1) ? N2 : N3 11996/// where 'cond' is the comparison specified by CC. 11997SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, 11998 SDValue N2, SDValue N3, 11999 ISD::CondCode CC, bool NotExtCompare) { 12000 // (x ? y : y) -> y. 12001 if (N2 == N3) return N2; 12002 12003 EVT VT = N2.getValueType(); 12004 ConstantSDNode N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 12005* ConstantSDNode N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 12006* ConstantSDNode N3C = dyn_cast<ConstantSDNode>(N3.getNode()); 12007* 12008 // Determine if the condition we're dealing with is constant 12009 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 12010 N0, N1, CC, DL, false); 12011 if (SCC.getNode()) AddToWorklist(SCC.getNode()); 12012 ConstantSDNode SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); 12013* 12014 // fold select_cc true, x, y -> x 12015 if (SCCC && !SCCC->isNullValue()) 12016 return N2; 12017 // fold select_cc false, x, y -> y 12018 if (SCCC && SCCC->isNullValue()) 12019 return N3; 12020 12021 // Check to see if we can simplify the select into an fabs node 12022 if (ConstantFPSDNode CFP = dyn_cast<ConstantFPSDNode>(N1)) { 12023* // Allow either -0.0 or 0.0 12024 if (CFP->getValueAPF().isZero()) { 12025 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs 12026 if ((CC == ISD::SETGE \|\| CC == ISD::SETGT) && 12027 N0 == N2 && N3.getOpcode() == ISD::FNEG && 12028 N2 == N3.getOperand(0)) 12029 return DAG.getNode(ISD::FABS, DL, VT, N0); 12030 12031 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs 12032 if ((CC == ISD::SETLT \|\| CC == ISD::SETLE) && 12033 N0 == N3 && N2.getOpcode() == ISD::FNEG && 12034 N2.getOperand(0) == N3) 12035 return DAG.getNode(ISD::FABS, DL, VT, N3); 12036 } 12037 } 12038 12039 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 12040 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 12041 // in it. This is a win when the constant is not otherwise available because 12042 // it replaces two constant pool loads with one. We only do this if the FP 12043 // type is known to be legal, because if it isn't, then we are before legalize 12044 // types an we want the other legalization to happen first (e.g. to avoid 12045 // messing with soft float) and if the ConstantFP is not legal, because if 12046 // it is legal, we may not need to store the FP constant in a constant pool. 12047 if (ConstantFPSDNode TV = dyn_cast<ConstantFPSDNode>(N2)) 12048* if (ConstantFPSDNode FV = dyn_cast<ConstantFPSDNode>(N3)) { 12049* if (TLI.isTypeLegal(N2.getValueType()) && 12050 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 12051 TargetLowering::Legal && 12052 !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && 12053 !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && 12054 // If both constants have multiple uses, then we won't need to do an 12055 // extra load, they are likely around in registers for other users. 12056 (TV->hasOneUse() \|\| FV->hasOneUse())) { 12057 Constant Elts[] = { 12058* const_cast<ConstantFP>(FV->getConstantFPValue()), 12059* const_cast<ConstantFP>(TV->getConstantFPValue()) 12060* }; 12061 Type FPTy = Elts[0]->getType(); 12062* const DataLayout &TD = TLI.getDataLayout(); 12063* 12064 // Create a ConstantArray of the two constants. 12065 Constant CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 12066* SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), 12067 TD.getPrefTypeAlignment(FPTy)); 12068 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 12069 12070 // Get the offsets to the 0 and 1 element of the array so that we can 12071 // select between them. 12072 SDValue Zero = DAG.getIntPtrConstant(0); 12073 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 12074 SDValue One = DAG.getIntPtrConstant(EltSize); 12075 12076 SDValue Cond = DAG.getSetCC(DL, 12077 getSetCCResultType(N0.getValueType()), 12078 N0, N1, CC); 12079 AddToWorklist(Cond.getNode()); 12080 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), 12081 Cond, One, Zero); 12082 AddToWorklist(CstOffset.getNode()); 12083 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, 12084 CstOffset); 12085 AddToWorklist(CPIdx.getNode()); 12086 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 12087 MachinePointerInfo::getConstantPool(), false, 12088 false, false, Alignment); 12089 12090 } 12091 } 12092 12093 // Check to see if we can perform the "gzip trick", transforming 12094 // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) 12095 if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && 12096 (N1C->isNullValue() \|\| // (a < 0) ? b : 0 12097 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 12098 EVT XType = N0.getValueType(); 12099 EVT AType = N2.getValueType(); 12100 if (XType.bitsGE(AType)) { 12101 // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a 12102 // single-bit constant. 12103 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { 12104 unsigned ShCtV = N2C->getAPIntValue().logBase2(); 12105 ShCtV = XType.getSizeInBits()-ShCtV-1; 12106 SDValue ShCt = DAG.getConstant(ShCtV, 12107 getShiftAmountTy(N0.getValueType())); 12108 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), 12109 XType, N0, ShCt); 12110 AddToWorklist(Shift.getNode()); 12111 12112 if (XType.bitsGT(AType)) { 12113 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 12114 AddToWorklist(Shift.getNode()); 12115 } 12116 12117 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 12118 } 12119 12120 SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), 12121 XType, N0, 12122 DAG.getConstant(XType.getSizeInBits()-1, 12123 getShiftAmountTy(N0.getValueType()))); 12124 AddToWorklist(Shift.getNode()); 12125 12126 if (XType.bitsGT(AType)) { 12127 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 12128 AddToWorklist(Shift.getNode()); 12129 } 12130 12131 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 12132 } 12133 } 12134 12135 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 12136 // where y is has a single bit set. 12137 // A plaintext description would be, we can turn the SELECT_CC into an AND 12138 // when the condition can be materialized as an all-ones register. Any 12139 // single bit-test can be materialized as an all-ones register with 12140 // shift-left and shift-right-arith. 12141 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 12142 N0->getValueType(0) == VT && 12143 N1C && N1C->isNullValue() && 12144 N2C && N2C->isNullValue()) { 12145 SDValue AndLHS = N0->getOperand(0); 12146 ConstantSDNode ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 12147* if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 12148 // Shift the tested bit over the sign bit. 12149 APInt AndMask = ConstAndRHS->getAPIntValue(); 12150 SDValue ShlAmt = 12151 DAG.getConstant(AndMask.countLeadingZeros(), 12152 getShiftAmountTy(AndLHS.getValueType())); 12153 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); 12154 12155 // Now arithmetic right shift it all the way over, so the result is either 12156 // all-ones, or zero. 12157 SDValue ShrAmt = 12158 DAG.getConstant(AndMask.getBitWidth()-1, 12159 getShiftAmountTy(Shl.getValueType())); 12160 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); 12161 12162 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 12163 } 12164 } 12165 12166 // fold select C, 16, 0 -> shl C, 4 12167 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && 12168 TLI.getBooleanContents(N0.getValueType()) == 12169 TargetLowering::ZeroOrOneBooleanContent) { 12170 12171 // If the caller doesn't want us to simplify this into a zext of a compare, 12172 // don't do it. 12173 if (NotExtCompare && N2C->getAPIntValue() == 1) 12174 return SDValue(); 12175 12176 // Get a SetCC of the condition 12177 // NOTE: Don't create a SETCC if it's not legal on this target. 12178 if (!LegalOperations \|\| 12179 TLI.isOperationLegal(ISD::SETCC, 12180 LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { 12181 SDValue Temp, SCC; 12182 // cast from setcc result type to select result type 12183 if (LegalTypes) { 12184 SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), 12185 N0, N1, CC); 12186 if (N2.getValueType().bitsLT(SCC.getValueType())) 12187 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), 12188 N2.getValueType()); 12189 else 12190 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 12191 N2.getValueType(), SCC); 12192 } else { 12193 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); 12194 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 12195 N2.getValueType(), SCC); 12196 } 12197 12198 AddToWorklist(SCC.getNode()); 12199 AddToWorklist(Temp.getNode()); 12200 12201 if (N2C->getAPIntValue() == 1) 12202 return Temp; 12203 12204 // shl setcc result by log2 n2c 12205 return DAG.getNode( 12206 ISD::SHL, DL, N2.getValueType(), Temp, 12207 DAG.getConstant(N2C->getAPIntValue().logBase2(), 12208 getShiftAmountTy(Temp.getValueType()))); 12209 } 12210 } 12211 12212 // Check to see if this is the equivalent of setcc 12213 // FIXME: Turn all of these into setcc if setcc if setcc is legal 12214 // otherwise, go ahead with the folds. 12215 if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { 12216 EVT XType = N0.getValueType(); 12217 if (!LegalOperations \|\| 12218 TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { 12219 SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); 12220 if (Res.getValueType() != VT) 12221 Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); 12222 return Res; 12223 } 12224 12225 // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) 12226 if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && 12227 (!LegalOperations \|\| 12228 TLI.isOperationLegal(ISD::CTLZ, XType))) { 12229 SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); 12230 return DAG.getNode(ISD::SRL, DL, XType, Ctlz, 12231 DAG.getConstant(Log2_32(XType.getSizeInBits()), 12232 getShiftAmountTy(Ctlz.getValueType()))); 12233 } 12234 // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) 12235 if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { 12236 SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), 12237 XType, DAG.getConstant(0, XType), N0); 12238 SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); 12239 return DAG.getNode(ISD::SRL, DL, XType, 12240 DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), 12241 DAG.getConstant(XType.getSizeInBits()-1, 12242 getShiftAmountTy(XType))); 12243 } 12244 // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) 12245 if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { 12246 SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, 12247 DAG.getConstant(XType.getSizeInBits()-1, 12248 getShiftAmountTy(N0.getValueType()))); 12249 return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); 12250 } 12251 } 12252 12253 // Check to see if this is an integer abs. 12254 // select_cc setg[te] X, 0, X, -X -> 12255 // select_cc setgt X, -1, X, -X -> 12256 // select_cc setl[te] X, 0, -X, X -> 12257 // select_cc setlt X, 1, -X, X -> 12258 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 12259 if (N1C) { 12260 ConstantSDNode SubC = nullptr; 12261* if (((N1C->isNullValue() && (CC == ISD::SETGT \|\| CC == ISD::SETGE)) \|\| 12262 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 12263 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 12264 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 12265 else if (((N1C->isNullValue() && (CC == ISD::SETLT \|\| CC == ISD::SETLE)) \|\| 12266 (N1C->isOne() && CC == ISD::SETLT)) && 12267 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 12268 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 12269 12270 EVT XType = N0.getValueType(); 12271 if (SubC && SubC->isNullValue() && XType.isInteger()) { 12272 SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, 12273 N0, 12274 DAG.getConstant(XType.getSizeInBits()-1, 12275 getShiftAmountTy(N0.getValueType()))); 12276 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), 12277 XType, N0, Shift); 12278 AddToWorklist(Shift.getNode()); 12279 AddToWorklist(Add.getNode()); 12280 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 12281 } 12282 } 12283 12284 return SDValue(); 12285} 12286 12287/// This is a stub for TargetLowering::SimplifySetCC. 12288SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, 12289 SDValue N1, ISD::CondCode Cond, 12290 SDLoc DL, bool foldBooleans) { 12291 TargetLowering::DAGCombinerInfo 12292 DagCombineInfo(DAG, Level, false, this); 12293 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 12294} 12295 12296/// Given an ISD::SDIV node expressing a divide by constant, return 12297/// a DAG expression to select that will generate the same value by multiplying 12298/// by a magic number. 12299/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 12300SDValue DAGCombiner::BuildSDIV(SDNode N) { 12301* ConstantSDNode C = isConstOrConstSplat(N->getOperand(1)); 12302* if (!C) 12303 return SDValue(); 12304 12305 // Avoid division by zero. 12306 if (!C->getAPIntValue()) 12307 return SDValue(); 12308 12309 std::vector<SDNode> Built; 12310* SDValue S = 12311 TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 12312 12313 for (SDNode N : Built) 12314* AddToWorklist(N); 12315 return S; 12316} 12317 12318/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a 12319/// DAG expression that will generate the same value by right shifting. 12320SDValue DAGCombiner::BuildSDIVPow2(SDNode N) { 12321* ConstantSDNode C = isConstOrConstSplat(N->getOperand(1)); 12322* if (!C) 12323 return SDValue(); 12324 12325 // Avoid division by zero. 12326 if (!C->getAPIntValue()) 12327 return SDValue(); 12328 12329 std::vector<SDNode > Built; 12330* SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); 12331 12332 for (SDNode N : Built) 12333* AddToWorklist(N); 12334 return S; 12335} 12336 12337/// Given an ISD::UDIV node expressing a divide by constant, return a DAG 12338/// expression that will generate the same value by multiplying by a magic 12339/// number. 12340/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 12341SDValue DAGCombiner::BuildUDIV(SDNode N) { 12342* ConstantSDNode C = isConstOrConstSplat(N->getOperand(1)); 12343* if (!C) 12344 return SDValue(); 12345 12346 // Avoid division by zero. 12347 if (!C->getAPIntValue()) 12348 return SDValue(); 12349 12350 std::vector<SDNode> Built; 12351* SDValue S = 12352 TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 12353 12354 for (SDNode N : Built) 12355* AddToWorklist(N); 12356 return S; 12357} 12358 12359SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { 12360 if (Level >= AfterLegalizeDAG) 12361 return SDValue(); 12362 12363 // Expose the DAG combiner to the target combiner implementations. 12364 TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); 12365 12366 unsigned Iterations = 0; 12367 if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { 12368 if (Iterations) { 12369 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 12370 // For the reciprocal, we need to find the zero of the function: 12371 // F(X) = A X - 1 [which has a zero at X = 1/A] 12372 // => 12373 // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 12374 // does not require additional intermediate precision] 12375 EVT VT = Op.getValueType(); 12376 SDLoc DL(Op); 12377 SDValue FPOne = DAG.getConstantFP(1.0, VT); 12378 12379 AddToWorklist(Est.getNode()); 12380 12381 // Newton iterations: Est = Est + Est (1 - Arg * Est) 12382 for (unsigned i = 0; i < Iterations; ++i) { 12383 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est); 12384 AddToWorklist(NewEst.getNode()); 12385 12386 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst); 12387 AddToWorklist(NewEst.getNode()); 12388 12389 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); 12390 AddToWorklist(NewEst.getNode()); 12391 12392 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst); 12393 AddToWorklist(Est.getNode()); 12394 } 12395 } 12396 return Est; 12397 } 12398 12399 return SDValue(); 12400} 12401 12402/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 12403/// For the reciprocal sqrt, we need to find the zero of the function: 12404/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 12405/// => 12406/// X_{i+1} = X_i (1.5 - A X_i^2 / 2) 12407/// As a result, we precompute A/2 prior to the iteration loop. 12408SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, 12409 unsigned Iterations) { 12410 EVT VT = Arg.getValueType(); 12411 SDLoc DL(Arg); 12412 SDValue ThreeHalves = DAG.getConstantFP(1.5, VT); 12413 12414 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that 12415 // this entire sequence requires only one FP constant. 12416 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); 12417 AddToWorklist(HalfArg.getNode()); 12418 12419 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); 12420 AddToWorklist(HalfArg.getNode()); 12421 12422 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 12423 for (unsigned i = 0; i < Iterations; ++i) { 12424 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); 12425 AddToWorklist(NewEst.getNode()); 12426 12427 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); 12428 AddToWorklist(NewEst.getNode()); 12429 12430 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); 12431 AddToWorklist(NewEst.getNode()); 12432 12433 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); 12434 AddToWorklist(Est.getNode()); 12435 } 12436 return Est; 12437} 12438 12439/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 12440/// For the reciprocal sqrt, we need to find the zero of the function: 12441/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 12442/// => 12443/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) 12444SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, 12445 unsigned Iterations) { 12446 EVT VT = Arg.getValueType(); 12447 SDLoc DL(Arg); 12448 SDValue MinusThree = DAG.getConstantFP(-3.0, VT); 12449 SDValue MinusHalf = DAG.getConstantFP(-0.5, VT); 12450 12451 // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) 12452 for (unsigned i = 0; i < Iterations; ++i) { 12453 SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); 12454 AddToWorklist(HalfEst.getNode()); 12455 12456 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); 12457 AddToWorklist(Est.getNode()); 12458 12459 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); 12460 AddToWorklist(Est.getNode()); 12461 12462 Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); 12463 AddToWorklist(Est.getNode()); 12464 12465 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); 12466 AddToWorklist(Est.getNode()); 12467 } 12468 return Est; 12469} 12470 12471SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { 12472 if (Level >= AfterLegalizeDAG) 12473 return SDValue(); 12474 12475 // Expose the DAG combiner to the target combiner implementations. 12476 TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); 12477 unsigned Iterations = 0; 12478 bool UseOneConstNR = false; 12479 if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { 12480 AddToWorklist(Est.getNode()); 12481 if (Iterations) { 12482 Est = UseOneConstNR ? 12483 BuildRsqrtNROneConst(Op, Est, Iterations) : 12484 BuildRsqrtNRTwoConst(Op, Est, Iterations); 12485 } 12486 return Est; 12487 } 12488 12489 return SDValue(); 12490} 12491 12492/// Return true if base is a frame index, which is known not to alias with 12493/// anything but itself. Provides base object and offset as results. 12494static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, 12495 const GlobalValue &GV, const void &CV) { 12496 // Assume it is a primitive operation. 12497 Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; 12498 12499 // If it's an adding a simple constant then integrate the offset. 12500 if (Base.getOpcode() == ISD::ADD) { 12501 if (ConstantSDNode C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { 12502* Base = Base.getOperand(0); 12503 Offset += C->getZExtValue(); 12504 } 12505 } 12506 12507 // Return the underlying GlobalValue, and update the Offset. Return false 12508 // for GlobalAddressSDNode since the same GlobalAddress may be represented 12509 // by multiple nodes with different offsets. 12510 if (GlobalAddressSDNode G = dyn_cast<GlobalAddressSDNode>(Base)) { 12511* GV = G->getGlobal(); 12512 Offset += G->getOffset(); 12513 return false; 12514 } 12515 12516 // Return the underlying Constant value, and update the Offset. Return false 12517 // for ConstantSDNodes since the same constant pool entry may be represented 12518 // by multiple nodes with different offsets. 12519 if (ConstantPoolSDNode C = dyn_cast<ConstantPoolSDNode>(Base)) { 12520* CV = C->isMachineConstantPoolEntry() ? (const void )C->getMachineCPVal() 12521* : (const void )C->getConstVal(); 12522* Offset += C->getOffset(); 12523 return false; 12524 } 12525 // If it's any of the following then it can't alias with anything but itself. 12526 return isa<FrameIndexSDNode>(Base); 12527} 12528 12529/// Return true if there is any possibility that the two addresses overlap. 12530bool DAGCombiner::isAlias(LSBaseSDNode Op0, LSBaseSDNode Op1) const { 12531 // If they are the same then they must be aliases. 12532 if (Op0->getBasePtr() == Op1->getBasePtr()) return true; 12533 12534 // If they are both volatile then they cannot be reordered. 12535 if (Op0->isVolatile() && Op1->isVolatile()) return true; 12536 12537 // Gather base node and offset information. 12538 SDValue Base1, Base2; 12539 int64_t Offset1, Offset2; 12540 const GlobalValue GV1, GV2; 12541 const void CV1, CV2; 12542 bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), 12543 Base1, Offset1, GV1, CV1); 12544 bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), 12545 Base2, Offset2, GV2, CV2); 12546 12547 // If they have a same base address then check to see if they overlap. 12548 if (Base1 == Base2 \|\| (GV1 && (GV1 == GV2)) \|\| (CV1 && (CV1 == CV2))) 12549 return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 \|\| 12550 (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); 12551 12552 // It is possible for different frame indices to alias each other, mostly 12553 // when tail call optimization reuses return address slots for arguments. 12554 // To catch this case, look up the actual index of frame indices to compute 12555 // the real alias relationship. 12556 if (isFrameIndex1 && isFrameIndex2) { 12557 MachineFrameInfo MFI = DAG.getMachineFunction().getFrameInfo(); 12558* Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); 12559 Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); 12560 return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 \|\| 12561 (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); 12562 } 12563 12564 // Otherwise, if we know what the bases are, and they aren't identical, then 12565 // we know they cannot alias. 12566 if ((isFrameIndex1 \|\| CV1 \|\| GV1) && (isFrameIndex2 \|\| CV2 \|\| GV2)) 12567 return false; 12568 12569 // If we know required SrcValue1 and SrcValue2 have relatively large alignment 12570 // compared to the size and offset of the access, we may be able to prove they 12571 // do not alias. This check is conservative for now to catch cases created by 12572 // splitting vector types. 12573 if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && 12574 (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && 12575 (Op0->getMemoryVT().getSizeInBits() >> 3 == 12576 Op1->getMemoryVT().getSizeInBits() >> 3) && 12577 (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { 12578 int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); 12579 int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); 12580 12581 // There is no overlap between these relatively aligned accesses of similar 12582 // size, return no alias. 12583 if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 \|\| 12584 (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) 12585 return false; 12586 } 12587 12588 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 12589 ? CombinerGlobalAA 12590 : DAG.getSubtarget().useAA(); 12591#ifndef NDEBUG 12592 if (CombinerAAOnlyFunc.getNumOccurrences() && 12593 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 12594 UseAA = false; 12595#endif 12596 if (UseAA && 12597 Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { 12598 // Use alias analysis information. 12599 int64_t MinOffset = std::min(Op0->getSrcValueOffset(), 12600 Op1->getSrcValueOffset()); 12601 int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + 12602 Op0->getSrcValueOffset() - MinOffset; 12603 int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + 12604 Op1->getSrcValueOffset() - MinOffset; 12605 AliasAnalysis::AliasResult AAResult = 12606 AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), 12607 Overlap1, 12608 UseTBAA ? Op0->getAAInfo() : AAMDNodes()), 12609 AliasAnalysis::Location(Op1->getMemOperand()->getValue(), 12610 Overlap2, 12611 UseTBAA ? Op1->getAAInfo() : AAMDNodes())); 12612 if (AAResult == AliasAnalysis::NoAlias) 12613 return false; 12614 } 12615 12616 // Otherwise we have to assume they alias. 12617 return true; 12618} 12619 12620/// Walk up chain skipping non-aliasing memory nodes, 12621/// looking for aliasing nodes and adding them to the Aliases vector. 12622void DAGCombiner::GatherAllAliases(SDNode N, SDValue OriginalChain, 12623* SmallVectorImpl<SDValue> &Aliases) { 12624 SmallVector<SDValue, 8> Chains; // List of chains to visit. 12625 SmallPtrSet<SDNode , 16> Visited; // Visited node set. 12626* 12627 // Get alias information for node. 12628 bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); 12629 12630 // Starting off. 12631 Chains.push_back(OriginalChain); 12632 unsigned Depth = 0; 12633 12634 // Look at each chain and determine if it is an alias. If so, add it to the 12635 // aliases list. If not, then continue up the chain looking for the next 12636 // candidate. 12637 while (!Chains.empty()) { 12638 SDValue Chain = Chains.back(); 12639 Chains.pop_back(); 12640 12641 // For TokenFactor nodes, look at each operand and only continue up the 12642 // chain until we find two aliases. If we've seen two aliases, assume we'll 12643 // find more and revert to original chain since the xform is unlikely to be 12644 // profitable. 12645 // 12646 // FIXME: The depth check could be made to return the last non-aliasing 12647 // chain we found before we hit a tokenfactor rather than the original 12648 // chain. 12649 if (Depth > 6 \|\| Aliases.size() == 2) { 12650 Aliases.clear(); 12651 Aliases.push_back(OriginalChain); 12652 return; 12653 } 12654 12655 // Don't bother if we've been before. 12656 if (!Visited.insert(Chain.getNode()).second) 12657 continue; 12658 12659 switch (Chain.getOpcode()) { 12660 case ISD::EntryToken: 12661 // Entry token is ideal chain operand, but handled in FindBetterChain. 12662 break; 12663 12664 case ISD::LOAD: 12665 case ISD::STORE: { 12666 // Get alias information for Chain. 12667 bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && 12668 !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); 12669 12670 // If chain is alias then stop here. 12671 if (!(IsLoad && IsOpLoad) && 12672 isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { 12673 Aliases.push_back(Chain); 12674 } else { 12675 // Look further up the chain. 12676 Chains.push_back(Chain.getOperand(0)); 12677 ++Depth; 12678 } 12679 break; 12680 } 12681 12682 case ISD::TokenFactor: 12683 // We have to check each of the operands of the token factor for "small" 12684 // token factors, so we queue them up. Adding the operands to the queue 12685 // (stack) in reverse order maintains the original order and increases the 12686 // likelihood that getNode will find a matching token factor (CSE.) 12687 if (Chain.getNumOperands() > 16) { 12688 Aliases.push_back(Chain); 12689 break; 12690 } 12691 for (unsigned n = Chain.getNumOperands(); n;) 12692 Chains.push_back(Chain.getOperand(--n)); 12693 ++Depth; 12694 break; 12695 12696 default: 12697 // For all other instructions we will just have to take what we can get. 12698 Aliases.push_back(Chain); 12699 break; 12700 } 12701 } 12702 12703 // We need to be careful here to also search for aliases through the 12704 // value operand of a store, etc. Consider the following situation: 12705 // Token1 = ... 12706 // L1 = load Token1, %52 12707 // S1 = store Token1, L1, %51 12708 // L2 = load Token1, %52+8 12709 // S2 = store Token1, L2, %51+8 12710 // Token2 = Token(S1, S2) 12711 // L3 = load Token2, %53 12712 // S3 = store Token2, L3, %52 12713 // L4 = load Token2, %53+8 12714 // S4 = store Token2, L4, %52+8 12715 // If we search for aliases of S3 (which loads address %52), and we look 12716 // only through the chain, then we'll miss the trivial dependence on L1 12717 // (which also loads from %52). We then might change all loads and 12718 // stores to use Token1 as their chain operand, which could result in 12719 // copying %53 into %52 before copying %52 into %51 (which should 12720 // happen first). 12721 // 12722 // The problem is, however, that searching for such data dependencies 12723 // can become expensive, and the cost is not directly related to the 12724 // chain depth. Instead, we'll rule out such configurations here by 12725 // insisting that we've visited all chain users (except for users 12726 // of the original chain, which is not necessary). When doing this, 12727 // we need to look through nodes we don't care about (otherwise, things 12728 // like register copies will interfere with trivial cases). 12729 12730 SmallVector<const SDNode , 16> Worklist; 12731* for (const SDNode N : Visited) 12732* if (N != OriginalChain.getNode()) 12733 Worklist.push_back(N); 12734 12735 while (!Worklist.empty()) { 12736 const SDNode M = Worklist.pop_back_val(); 12737* 12738 // We have already visited M, and want to make sure we've visited any uses 12739 // of M that we care about. For uses that we've not visisted, and don't 12740 // care about, queue them to the worklist. 12741 12742 for (SDNode::use_iterator UI = M->use_begin(), 12743 UIE = M->use_end(); UI != UIE; ++UI) 12744 if (UI.getUse().getValueType() == MVT::Other && 12745 Visited.insert(UI).second) { 12746* if (isa<MemIntrinsicSDNode>(UI) \|\| isa<MemSDNode>(UI)) { 12747 // We've not visited this use, and we care about it (it could have an 12748 // ordering dependency with the original node). 12749 Aliases.clear(); 12750 Aliases.push_back(OriginalChain); 12751 return; 12752 } 12753 12754 // We've not visited this use, but we don't care about it. Mark it as 12755 // visited and enqueue it to the worklist. 12756 Worklist.push_back(UI); 12757* } 12758 } 12759} 12760 12761/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain 12762/// (aliasing node.) 12763SDValue DAGCombiner::FindBetterChain(SDNode N, SDValue OldChain) { 12764* SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. 12765 12766 // Accumulate all the aliases to this node. 12767 GatherAllAliases(N, OldChain, Aliases); 12768 12769 // If no operands then chain to entry token. 12770 if (Aliases.size() == 0) 12771 return DAG.getEntryNode(); 12772 12773 // If a single operand then chain to it. We don't need to revisit it. 12774 if (Aliases.size() == 1) 12775 return Aliases[0]; 12776 12777 // Construct a custom tailored token factor. 12778 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); 12779} 12780 12781/// This is the entry point for the file. 12782void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, 12783 CodeGenOpt::Level OptLevel) { 12784 /// This is the main entry point to this class. 12785 DAGCombiner(this, AA, OptLevel).Run(Level); 12786*}