X86ISelLowering.cpp (193574) | X86ISelLowering.cpp (193630) |
---|---|
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// --- 7677 unchanged lines hidden (view full) --- 7686 int64_t Offset = 0; 7687 if (TLI.isGAPlusOffset(Base, GV, Offset)) 7688 return (GV->getAlignment() >= N && (Offset % N) == 0); 7689 // DAG combine handles the stack object case. 7690 return false; 7691} 7692 7693static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, | 1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// --- 7677 unchanged lines hidden (view full) --- 7686 int64_t Offset = 0; 7687 if (TLI.isGAPlusOffset(Base, GV, Offset)) 7688 return (GV->getAlignment() >= N && (Offset % N) == 0); 7689 // DAG combine handles the stack object case. 7690 return false; 7691} 7692 7693static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, |
7694 MVT EVT, SDNode *&Base, | 7694 MVT EVT, LoadSDNode *&LDBase, 7695 unsigned &LastLoadedElt, |
7695 SelectionDAG &DAG, MachineFrameInfo *MFI, 7696 const TargetLowering &TLI) { | 7696 SelectionDAG &DAG, MachineFrameInfo *MFI, 7697 const TargetLowering &TLI) { |
7697 Base = NULL; | 7698 LDBase = NULL; 7699 LastLoadedElt = -1; |
7698 for (unsigned i = 0; i < NumElems; ++i) { 7699 if (N->getMaskElt(i) < 0) { | 7700 for (unsigned i = 0; i < NumElems; ++i) { 7701 if (N->getMaskElt(i) < 0) { |
7700 if (!Base) | 7702 if (!LDBase) |
7701 return false; 7702 continue; 7703 } 7704 7705 SDValue Elt = DAG.getShuffleScalarElt(N, i); 7706 if (!Elt.getNode() || 7707 (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) 7708 return false; | 7703 return false; 7704 continue; 7705 } 7706 7707 SDValue Elt = DAG.getShuffleScalarElt(N, i); 7708 if (!Elt.getNode() || 7709 (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) 7710 return false; |
7709 if (!Base) { 7710 Base = Elt.getNode(); 7711 if (Base->getOpcode() == ISD::UNDEF) | 7711 if (!LDBase) { 7712 if (Elt.getNode()->getOpcode() == ISD::UNDEF) |
7712 return false; | 7713 return false; |
7714 LDBase = cast<LoadSDNode>(Elt.getNode()); 7715 LastLoadedElt = i; |
|
7713 continue; 7714 } 7715 if (Elt.getOpcode() == ISD::UNDEF) 7716 continue; 7717 7718 LoadSDNode *LD = cast<LoadSDNode>(Elt); | 7716 continue; 7717 } 7718 if (Elt.getOpcode() == ISD::UNDEF) 7719 continue; 7720 7721 LoadSDNode *LD = cast<LoadSDNode>(Elt); |
7719 LoadSDNode *LDBase = cast<LoadSDNode>(Base); | |
7720 if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI)) 7721 return false; | 7722 if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI)) 7723 return false; |
7724 LastLoadedElt = i; |
|
7722 } 7723 return true; 7724} 7725 7726/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 7727/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 7728/// if the load addresses are consecutive, non-overlapping, and in the right 7729/// order. In the case of v2i64, it will see if it can rewrite the 7730/// shuffle to be an appropriate build vector so it can take advantage of 7731// performBuildVectorCombine. 7732static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 7733 const TargetLowering &TLI) { 7734 DebugLoc dl = N->getDebugLoc(); 7735 MVT VT = N->getValueType(0); 7736 MVT EVT = VT.getVectorElementType(); 7737 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7738 unsigned NumElems = VT.getVectorNumElements(); 7739 | 7725 } 7726 return true; 7727} 7728 7729/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 7730/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 7731/// if the load addresses are consecutive, non-overlapping, and in the right 7732/// order. In the case of v2i64, it will see if it can rewrite the 7733/// shuffle to be an appropriate build vector so it can take advantage of 7734// performBuildVectorCombine. 7735static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 7736 const TargetLowering &TLI) { 7737 DebugLoc dl = N->getDebugLoc(); 7738 MVT VT = N->getValueType(0); 7739 MVT EVT = VT.getVectorElementType(); 7740 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7741 unsigned NumElems = VT.getVectorNumElements(); 7742 |
7740 // For x86-32 machines, if we see an insert and then a shuffle in a v2i64 7741 // where the upper half is 0, it is advantageous to rewrite it as a build 7742 // vector of (0, val) so it can use movq. 7743 if (VT == MVT::v2i64) { 7744 SDValue In[2]; 7745 In[0] = N->getOperand(0); 7746 In[1] = N->getOperand(1); 7747 int Idx0 = SVN->getMaskElt(0); 7748 int Idx1 = SVN->getMaskElt(1); 7749 // FIXME: can we take advantage of undef index? 7750 if (Idx0 >= 0 && Idx1 >= 0 && 7751 In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT && 7752 In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) { 7753 ConstantSDNode* InsertVecIdx = 7754 dyn_cast<ConstantSDNode>(In[Idx0/2].getOperand(2)); 7755 if (InsertVecIdx && 7756 InsertVecIdx->getZExtValue() == (unsigned)(Idx0 % 2) && 7757 isZeroNode(In[Idx1/2].getOperand(Idx1 % 2))) { 7758 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 7759 In[Idx0/2].getOperand(1), 7760 In[Idx1/2].getOperand(Idx1 % 2)); 7761 } 7762 } 7763 } | 7743 if (VT.getSizeInBits() != 128) 7744 return SDValue(); |
7764 7765 // Try to combine a vector_shuffle into a 128-bit load. 7766 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); | 7745 7746 // Try to combine a vector_shuffle into a 128-bit load. 7747 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); |
7767 SDNode *Base = NULL; 7768 if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, Base, DAG, MFI, TLI)) | 7748 LoadSDNode *LD = NULL; 7749 unsigned LastLoadedElt; 7750 if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, LD, LastLoadedElt, DAG, 7751 MFI, TLI)) |
7769 return SDValue(); 7770 | 7752 return SDValue(); 7753 |
7771 LoadSDNode *LD = cast<LoadSDNode>(Base); 7772 if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI)) | 7754 if (LastLoadedElt == NumElems - 1) { 7755 if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI)) 7756 return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), 7757 LD->getSrcValue(), LD->getSrcValueOffset(), 7758 LD->isVolatile()); |
7773 return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), 7774 LD->getSrcValue(), LD->getSrcValueOffset(), | 7759 return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), 7760 LD->getSrcValue(), LD->getSrcValueOffset(), |
7775 LD->isVolatile()); 7776 return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), 7777 LD->getSrcValue(), LD->getSrcValueOffset(), 7778 LD->isVolatile(), LD->getAlignment()); 7779} 7780 7781/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. 7782static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, 7783 TargetLowering::DAGCombinerInfo &DCI, 7784 const X86Subtarget *Subtarget, 7785 const TargetLowering &TLI) { 7786 unsigned NumOps = N->getNumOperands(); 7787 DebugLoc dl = N->getDebugLoc(); 7788 7789 // Ignore single operand BUILD_VECTOR. 7790 if (NumOps == 1) 7791 return SDValue(); 7792 7793 MVT VT = N->getValueType(0); 7794 MVT EVT = VT.getVectorElementType(); 7795 7796 // Before or during type legalization, we want to try and convert a 7797 // build_vector of an i64 load and a zero value into vzext_movl before the 7798 // legalizer can break it up. 7799 // FIXME: does the case below remove the need to do this? 7800 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) { 7801 if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) 7802 return SDValue(); 7803 7804 // This must be an insertion into a zero vector. 7805 SDValue HighElt = N->getOperand(1); 7806 if (!isZeroNode(HighElt)) 7807 return SDValue(); 7808 7809 // Value must be a load. 7810 SDNode *Base = N->getOperand(0).getNode(); 7811 if (!isa<LoadSDNode>(Base)) { 7812 if (Base->getOpcode() != ISD::BIT_CONVERT) 7813 return SDValue(); 7814 Base = Base->getOperand(0).getNode(); 7815 if (!isa<LoadSDNode>(Base)) 7816 return SDValue(); 7817 } 7818 7819 // Transform it into VZEXT_LOAD addr. 7820 LoadSDNode *LD = cast<LoadSDNode>(Base); 7821 7822 // Load must not be an extload. 7823 if (LD->getExtensionType() != ISD::NON_EXTLOAD) 7824 return SDValue(); 7825 7826 // Load type should legal type so we don't have to legalize it. 7827 if (!TLI.isTypeLegal(VT)) 7828 return SDValue(); 7829 7830 SDVTList Tys = DAG.getVTList(VT, MVT::Other); | 7761 LD->isVolatile(), LD->getAlignment()); 7762 } else if (NumElems == 4 && LastLoadedElt == 1) { 7763 SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); |
7831 SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; 7832 SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); | 7764 SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; 7765 SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); |
7833 TargetLowering::TargetLoweringOpt TLO(DAG); 7834 TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); 7835 DCI.CommitTargetLoweringOpt(TLO); 7836 return ResNode; 7837 } 7838 7839 // The type legalizer will have broken apart v2i64 build_vector created during 7840 // widening before the code which handles that case is run. Look for build 7841 // vector (load, load + 4, 0/undef, 0/undef) 7842 if (VT == MVT::v4i32 || VT == MVT::v4f32) { 7843 LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0)); 7844 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1)); 7845 if (!LD0 || !LD1) 7846 return SDValue(); 7847 if (LD0->getExtensionType() != ISD::NON_EXTLOAD || 7848 LD1->getExtensionType() != ISD::NON_EXTLOAD) 7849 return SDValue(); 7850 // Make sure the second elt is a consecutive load. 7851 if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1, 7852 DAG.getMachineFunction().getFrameInfo())) 7853 return SDValue(); 7854 7855 SDValue N2 = N->getOperand(2); 7856 SDValue N3 = N->getOperand(3); 7857 if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF) 7858 return SDValue(); 7859 if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF) 7860 return SDValue(); 7861 7862 SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); 7863 SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() }; 7864 SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); 7865 TargetLowering::TargetLoweringOpt TLO(DAG); 7866 TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1)); 7867 DCI.CommitTargetLoweringOpt(TLO); | |
7868 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); 7869 } 7870 return SDValue(); 7871} 7872 7873/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 7874static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 7875 const X86Subtarget *Subtarget) { --- 585 unchanged lines hidden (view full) --- 8461 TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8462 if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || 8463 TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) 8464 DCI.CommitTargetLoweringOpt(TLO); 8465 } 8466 return SDValue(); 8467} 8468 | 7766 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); 7767 } 7768 return SDValue(); 7769} 7770 7771/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 7772static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 7773 const X86Subtarget *Subtarget) { --- 585 unchanged lines hidden (view full) --- 8359 TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8360 if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || 8361 TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) 8362 DCI.CommitTargetLoweringOpt(TLO); 8363 } 8364 return SDValue(); 8365} 8366 |
8367static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { 8368 SDValue Op = N->getOperand(0); 8369 if (Op.getOpcode() == ISD::BIT_CONVERT) 8370 Op = Op.getOperand(0); 8371 MVT VT = N->getValueType(0), OpVT = Op.getValueType(); 8372 if (Op.getOpcode() == X86ISD::VZEXT_LOAD && 8373 VT.getVectorElementType().getSizeInBits() == 8374 OpVT.getVectorElementType().getSizeInBits()) { 8375 return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); 8376 } 8377 return SDValue(); 8378} 8379 |
|
8469SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, 8470 DAGCombinerInfo &DCI) const { 8471 SelectionDAG &DAG = DCI.DAG; 8472 switch (N->getOpcode()) { 8473 default: break; 8474 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); | 8380SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, 8381 DAGCombinerInfo &DCI) const { 8382 SelectionDAG &DAG = DCI.DAG; 8383 switch (N->getOpcode()) { 8384 default: break; 8385 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); |
8475 case ISD::BUILD_VECTOR: 8476 return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this); | |
8477 case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); 8478 case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); 8479 case ISD::MUL: return PerformMulCombine(N, DAG, DCI); 8480 case ISD::SHL: 8481 case ISD::SRA: 8482 case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); 8483 case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); 8484 case X86ISD::FXOR: 8485 case X86ISD::FOR: return PerformFORCombine(N, DAG); 8486 case X86ISD::FAND: return PerformFANDCombine(N, DAG); 8487 case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); | 8386 case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); 8387 case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); 8388 case ISD::MUL: return PerformMulCombine(N, DAG, DCI); 8389 case ISD::SHL: 8390 case ISD::SRA: 8391 case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); 8392 case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); 8393 case X86ISD::FXOR: 8394 case X86ISD::FOR: return PerformFORCombine(N, DAG); 8395 case X86ISD::FAND: return PerformFANDCombine(N, DAG); 8396 case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); |
8397 case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); |
|
8488 } 8489 8490 return SDValue(); 8491} 8492 8493//===----------------------------------------------------------------------===// 8494// X86 Inline Assembly Support 8495//===----------------------------------------------------------------------===// --- 378 unchanged lines hidden --- | 8398 } 8399 8400 return SDValue(); 8401} 8402 8403//===----------------------------------------------------------------------===// 8404// X86 Inline Assembly Support 8405//===----------------------------------------------------------------------===// --- 378 unchanged lines hidden --- |