Deleted Added
full compact
7694c7694,7695
< MVT EVT, SDNode *&Base,
---
> MVT EVT, LoadSDNode *&LDBase,
> unsigned &LastLoadedElt,
7697c7698,7699
< Base = NULL;
---
> LDBase = NULL;
> LastLoadedElt = -1;
7700c7702
< if (!Base)
---
> if (!LDBase)
7709,7711c7711,7712
< if (!Base) {
< Base = Elt.getNode();
< if (Base->getOpcode() == ISD::UNDEF)
---
> if (!LDBase) {
> if (Elt.getNode()->getOpcode() == ISD::UNDEF)
7712a7714,7715
> LDBase = cast<LoadSDNode>(Elt.getNode());
> LastLoadedElt = i;
7719d7721
< LoadSDNode *LDBase = cast<LoadSDNode>(Base);
7721a7724
> LastLoadedElt = i;
7740,7763c7743,7744
< // For x86-32 machines, if we see an insert and then a shuffle in a v2i64
< // where the upper half is 0, it is advantageous to rewrite it as a build
< // vector of (0, val) so it can use movq.
< if (VT == MVT::v2i64) {
< SDValue In[2];
< In[0] = N->getOperand(0);
< In[1] = N->getOperand(1);
< int Idx0 = SVN->getMaskElt(0);
< int Idx1 = SVN->getMaskElt(1);
< // FIXME: can we take advantage of undef index?
< if (Idx0 >= 0 && Idx1 >= 0 &&
< In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT &&
< In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) {
< ConstantSDNode* InsertVecIdx =
< dyn_cast<ConstantSDNode>(In[Idx0/2].getOperand(2));
< if (InsertVecIdx &&
< InsertVecIdx->getZExtValue() == (unsigned)(Idx0 % 2) &&
< isZeroNode(In[Idx1/2].getOperand(Idx1 % 2))) {
< return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
< In[Idx0/2].getOperand(1),
< In[Idx1/2].getOperand(Idx1 % 2));
< }
< }
< }
---
> if (VT.getSizeInBits() != 128)
> return SDValue();
7767,7768c7748,7751
< SDNode *Base = NULL;
< if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, Base, DAG, MFI, TLI))
---
> LoadSDNode *LD = NULL;
> unsigned LastLoadedElt;
> if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, LD, LastLoadedElt, DAG,
> MFI, TLI))
7771,7772c7754,7758
< LoadSDNode *LD = cast<LoadSDNode>(Base);
< if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI))
---
> if (LastLoadedElt == NumElems - 1) {
> if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI))
> return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
> LD->getSrcValue(), LD->getSrcValueOffset(),
> LD->isVolatile());
7775,7830c7761,7763
< LD->isVolatile());
< return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
< LD->getSrcValue(), LD->getSrcValueOffset(),
< LD->isVolatile(), LD->getAlignment());
< }
<
< /// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
< static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
< TargetLowering::DAGCombinerInfo &DCI,
< const X86Subtarget *Subtarget,
< const TargetLowering &TLI) {
< unsigned NumOps = N->getNumOperands();
< DebugLoc dl = N->getDebugLoc();
<
< // Ignore single operand BUILD_VECTOR.
< if (NumOps == 1)
< return SDValue();
<
< MVT VT = N->getValueType(0);
< MVT EVT = VT.getVectorElementType();
<
< // Before or during type legalization, we want to try and convert a
< // build_vector of an i64 load and a zero value into vzext_movl before the
< // legalizer can break it up.
< // FIXME: does the case below remove the need to do this?
< if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) {
< if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
< return SDValue();
<
< // This must be an insertion into a zero vector.
< SDValue HighElt = N->getOperand(1);
< if (!isZeroNode(HighElt))
< return SDValue();
<
< // Value must be a load.
< SDNode *Base = N->getOperand(0).getNode();
< if (!isa<LoadSDNode>(Base)) {
< if (Base->getOpcode() != ISD::BIT_CONVERT)
< return SDValue();
< Base = Base->getOperand(0).getNode();
< if (!isa<LoadSDNode>(Base))
< return SDValue();
< }
<
< // Transform it into VZEXT_LOAD addr.
< LoadSDNode *LD = cast<LoadSDNode>(Base);
<
< // Load must not be an extload.
< if (LD->getExtensionType() != ISD::NON_EXTLOAD)
< return SDValue();
<
< // Load type should legal type so we don't have to legalize it.
< if (!TLI.isTypeLegal(VT))
< return SDValue();
<
< SDVTList Tys = DAG.getVTList(VT, MVT::Other);
---
> LD->isVolatile(), LD->getAlignment());
> } else if (NumElems == 4 && LastLoadedElt == 1) {
> SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
7833,7867d7765
< TargetLowering::TargetLoweringOpt TLO(DAG);
< TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
< DCI.CommitTargetLoweringOpt(TLO);
< return ResNode;
< }
<
< // The type legalizer will have broken apart v2i64 build_vector created during
< // widening before the code which handles that case is run. Look for build
< // vector (load, load + 4, 0/undef, 0/undef)
< if (VT == MVT::v4i32 || VT == MVT::v4f32) {
< LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0));
< LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1));
< if (!LD0 || !LD1)
< return SDValue();
< if (LD0->getExtensionType() != ISD::NON_EXTLOAD ||
< LD1->getExtensionType() != ISD::NON_EXTLOAD)
< return SDValue();
< // Make sure the second elt is a consecutive load.
< if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1,
< DAG.getMachineFunction().getFrameInfo()))
< return SDValue();
<
< SDValue N2 = N->getOperand(2);
< SDValue N3 = N->getOperand(3);
< if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF)
< return SDValue();
< if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF)
< return SDValue();
<
< SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
< SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() };
< SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
< TargetLowering::TargetLoweringOpt TLO(DAG);
< TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1));
< DCI.CommitTargetLoweringOpt(TLO);
8468a8367,8379
> static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
> SDValue Op = N->getOperand(0);
> if (Op.getOpcode() == ISD::BIT_CONVERT)
> Op = Op.getOperand(0);
> MVT VT = N->getValueType(0), OpVT = Op.getValueType();
> if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
> VT.getVectorElementType().getSizeInBits() ==
> OpVT.getVectorElementType().getSizeInBits()) {
> return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
> }
> return SDValue();
> }
>
8475,8476d8385
< case ISD::BUILD_VECTOR:
< return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);
8487a8397
> case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);