ARMISelLowering.cpp revision 280400
1193323Sed//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file defines the interfaces that ARM uses to lower LLVM code into a
11193323Sed// selection DAG.
12193323Sed//
13193323Sed//===----------------------------------------------------------------------===//
14193323Sed
15234353Sdim#include "ARMISelLowering.h"
16218893Sdim#include "ARMCallingConv.h"
17193323Sed#include "ARMConstantPoolValue.h"
18193323Sed#include "ARMMachineFunctionInfo.h"
19198090Srdivacky#include "ARMPerfectShuffle.h"
20193323Sed#include "ARMSubtarget.h"
21193323Sed#include "ARMTargetMachine.h"
22198090Srdivacky#include "ARMTargetObjectFile.h"
23226633Sdim#include "MCTargetDesc/ARMAddressingModes.h"
24249423Sdim#include "llvm/ADT/Statistic.h"
25249423Sdim#include "llvm/ADT/StringExtras.h"
26193323Sed#include "llvm/CodeGen/CallingConvLower.h"
27218893Sdim#include "llvm/CodeGen/IntrinsicLowering.h"
28193323Sed#include "llvm/CodeGen/MachineBasicBlock.h"
29193323Sed#include "llvm/CodeGen/MachineFrameInfo.h"
30193323Sed#include "llvm/CodeGen/MachineFunction.h"
31193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h"
32280031Sdim#include "llvm/CodeGen/MachineJumpTableInfo.h"
33226633Sdim#include "llvm/CodeGen/MachineModuleInfo.h"
34193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h"
35193323Sed#include "llvm/CodeGen/SelectionDAG.h"
36249423Sdim#include "llvm/IR/CallingConv.h"
37249423Sdim#include "llvm/IR/Constants.h"
38249423Sdim#include "llvm/IR/Function.h"
39249423Sdim#include "llvm/IR/GlobalValue.h"
40276479Sdim#include "llvm/IR/IRBuilder.h"
41249423Sdim#include "llvm/IR/Instruction.h"
42249423Sdim#include "llvm/IR/Instructions.h"
43249423Sdim#include "llvm/IR/Intrinsics.h"
44249423Sdim#include "llvm/IR/Type.h"
45204961Srdivacky#include "llvm/MC/MCSectionMachO.h"
46207618Srdivacky#include "llvm/Support/CommandLine.h"
47276479Sdim#include "llvm/Support/Debug.h"
48198090Srdivacky#include "llvm/Support/ErrorHandling.h"
49193323Sed#include "llvm/Support/MathExtras.h"
50249423Sdim#include "llvm/Target/TargetOptions.h"
51261991Sdim#include <utility>
52193323Sedusing namespace llvm;
53193323Sed
54276479Sdim#define DEBUG_TYPE "arm-isel"
55276479Sdim
56210299SedSTATISTIC(NumTailCalls, "Number of tail calls");
57218893SdimSTATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
58239462SdimSTATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
59210299Sed
60218893Sdimcl::opt<bool>
61207618SrdivackyEnableARMLongCalls("arm-long-calls", cl::Hidden,
62210299Sed  cl::desc("Generate calls via indirect call instructions"),
63207618Srdivacky  cl::init(false));
64207618Srdivacky
65210299Sedstatic cl::opt<bool>
66210299SedARMInterworking("arm-interworking", cl::Hidden,
67210299Sed  cl::desc("Enable / disable ARM interworking (for debugging only)"),
68210299Sed  cl::init(true));
69210299Sed
70234353Sdimnamespace {
71223017Sdim  class ARMCCState : public CCState {
72223017Sdim  public:
73223017Sdim    ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
74280031Sdim               SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
75280031Sdim               ParmContext PC)
76280031Sdim        : CCState(CC, isVarArg, MF, locs, C) {
77223017Sdim      assert(((PC == Call) || (PC == Prologue)) &&
78223017Sdim             "ARMCCState users must specify whether their context is call"
79223017Sdim             "or prologue generation.");
80223017Sdim      CallOrPrologue = PC;
81223017Sdim    }
82223017Sdim  };
83223017Sdim}
84223017Sdim
85221345Sdim// The APCS parameter registers.
86276479Sdimstatic const MCPhysReg GPRArgRegs[] = {
87221345Sdim  ARM::R0, ARM::R1, ARM::R2, ARM::R3
88221345Sdim};
89221345Sdim
90239462Sdimvoid ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
91239462Sdim                                       MVT PromotedBitwiseVT) {
92194710Sed  if (VT != PromotedLdStVT) {
93239462Sdim    setOperationAction(ISD::LOAD, VT, Promote);
94239462Sdim    AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
95194710Sed
96239462Sdim    setOperationAction(ISD::STORE, VT, Promote);
97239462Sdim    AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
98194710Sed  }
99194710Sed
100239462Sdim  MVT ElemTy = VT.getVectorElementType();
101194710Sed  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
102239462Sdim    setOperationAction(ISD::SETCC, VT, Custom);
103239462Sdim  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
104239462Sdim  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
105234353Sdim  if (ElemTy == MVT::i32) {
106239462Sdim    setOperationAction(ISD::SINT_TO_FP, VT, Custom);
107239462Sdim    setOperationAction(ISD::UINT_TO_FP, VT, Custom);
108239462Sdim    setOperationAction(ISD::FP_TO_SINT, VT, Custom);
109239462Sdim    setOperationAction(ISD::FP_TO_UINT, VT, Custom);
110234353Sdim  } else {
111239462Sdim    setOperationAction(ISD::SINT_TO_FP, VT, Expand);
112239462Sdim    setOperationAction(ISD::UINT_TO_FP, VT, Expand);
113239462Sdim    setOperationAction(ISD::FP_TO_SINT, VT, Expand);
114239462Sdim    setOperationAction(ISD::FP_TO_UINT, VT, Expand);
115198090Srdivacky  }
116239462Sdim  setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
117239462Sdim  setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
118239462Sdim  setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
119239462Sdim  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
120239462Sdim  setOperationAction(ISD::SELECT,            VT, Expand);
121239462Sdim  setOperationAction(ISD::SELECT_CC,         VT, Expand);
122243830Sdim  setOperationAction(ISD::VSELECT,           VT, Expand);
123239462Sdim  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
124194710Sed  if (VT.isInteger()) {
125239462Sdim    setOperationAction(ISD::SHL, VT, Custom);
126239462Sdim    setOperationAction(ISD::SRA, VT, Custom);
127239462Sdim    setOperationAction(ISD::SRL, VT, Custom);
128194710Sed  }
129194710Sed
130194710Sed  // Promote all bit-wise operations.
131194710Sed  if (VT.isInteger() && VT != PromotedBitwiseVT) {
132239462Sdim    setOperationAction(ISD::AND, VT, Promote);
133239462Sdim    AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
134239462Sdim    setOperationAction(ISD::OR,  VT, Promote);
135239462Sdim    AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
136239462Sdim    setOperationAction(ISD::XOR, VT, Promote);
137239462Sdim    AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
138194710Sed  }
139198090Srdivacky
140198090Srdivacky  // Neon does not support vector divide/remainder operations.
141239462Sdim  setOperationAction(ISD::SDIV, VT, Expand);
142239462Sdim  setOperationAction(ISD::UDIV, VT, Expand);
143239462Sdim  setOperationAction(ISD::FDIV, VT, Expand);
144239462Sdim  setOperationAction(ISD::SREM, VT, Expand);
145239462Sdim  setOperationAction(ISD::UREM, VT, Expand);
146239462Sdim  setOperationAction(ISD::FREM, VT, Expand);
147194710Sed}
148194710Sed
149239462Sdimvoid ARMTargetLowering::addDRTypeForNEON(MVT VT) {
150239462Sdim  addRegisterClass(VT, &ARM::DPRRegClass);
151194710Sed  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
152194710Sed}
153194710Sed
154239462Sdimvoid ARMTargetLowering::addQRTypeForNEON(MVT VT) {
155262613Sdim  addRegisterClass(VT, &ARM::DPairRegClass);
156194710Sed  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
157194710Sed}
158194710Sed
159280031SdimARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
160280031Sdim    : TargetLowering(TM) {
161193323Sed  Subtarget = &TM.getSubtarget<ARMSubtarget>();
162280031Sdim  RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
163280031Sdim  Itins = TM.getSubtargetImpl()->getInstrItineraryData();
164193323Sed
165226633Sdim  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
166226633Sdim
167276479Sdim  if (Subtarget->isTargetMachO()) {
168193323Sed    // Uses VFP for Thumb libfuncs if available.
169261991Sdim    if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
170276479Sdim        Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) {
171193323Sed      // Single-precision floating-point arithmetic.
172193323Sed      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
173193323Sed      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
174193323Sed      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
175193323Sed      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
176193323Sed
177193323Sed      // Double-precision floating-point arithmetic.
178193323Sed      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
179193323Sed      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
180193323Sed      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
181193323Sed      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
182193323Sed
183193323Sed      // Single-precision comparisons.
184193323Sed      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
185193323Sed      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
186193323Sed      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
187193323Sed      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
188193323Sed      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
189193323Sed      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
190193323Sed      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
191193323Sed      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
192193323Sed
193193323Sed      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
194193323Sed      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
195193323Sed      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
196193323Sed      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
197193323Sed      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
198193323Sed      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
199193323Sed      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
200193323Sed      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
201193323Sed
202193323Sed      // Double-precision comparisons.
203193323Sed      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
204193323Sed      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
205193323Sed      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
206193323Sed      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
207193323Sed      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
208193323Sed      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
209193323Sed      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
210193323Sed      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
211193323Sed
212193323Sed      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
213193323Sed      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
214193323Sed      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
215193323Sed      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
216193323Sed      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
217193323Sed      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
218193323Sed      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
219193323Sed      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
220193323Sed
221193323Sed      // Floating-point to integer conversions.
222193323Sed      // i64 conversions are done via library routines even when generating VFP
223193323Sed      // instructions, so use the same ones.
224193323Sed      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
225193323Sed      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
226193323Sed      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
227193323Sed      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
228193323Sed
229193323Sed      // Conversions between floating types.
230193323Sed      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
231193323Sed      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
232193323Sed
233193323Sed      // Integer to floating-point conversions.
234193323Sed      // i64 conversions are done via library routines even when generating VFP
235193323Sed      // instructions, so use the same ones.
236193323Sed      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
237193323Sed      // e.g., __floatunsidf vs. __floatunssidfvfp.
238193323Sed      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
239193323Sed      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
240193323Sed      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
241193323Sed      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
242193323Sed    }
243193323Sed  }
244193323Sed
245193323Sed  // These libcalls are not available in 32-bit.
246276479Sdim  setLibcallName(RTLIB::SHL_I128, nullptr);
247276479Sdim  setLibcallName(RTLIB::SRL_I128, nullptr);
248276479Sdim  setLibcallName(RTLIB::SRA_I128, nullptr);
249193323Sed
250276479Sdim  if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
251276479Sdim      !Subtarget->isTargetWindows()) {
252276479Sdim    static const struct {
253276479Sdim      const RTLIB::Libcall Op;
254276479Sdim      const char * const Name;
255276479Sdim      const CallingConv::ID CC;
256276479Sdim      const ISD::CondCode Cond;
257276479Sdim    } LibraryCalls[] = {
258276479Sdim      // Double-precision floating-point arithmetic helper functions
259276479Sdim      // RTABI chapter 4.1.2, Table 2
260276479Sdim      { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
261276479Sdim      { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
262276479Sdim      { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
263276479Sdim      { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
264218893Sdim
265276479Sdim      // Double-precision floating-point comparison helper functions
266276479Sdim      // RTABI chapter 4.1.2, Table 3
267276479Sdim      { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
268276479Sdim      { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
269276479Sdim      { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
270276479Sdim      { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
271276479Sdim      { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
272276479Sdim      { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
273276479Sdim      { RTLIB::UO_F64,  "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
274276479Sdim      { RTLIB::O_F64,   "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
275218893Sdim
276276479Sdim      // Single-precision floating-point arithmetic helper functions
277276479Sdim      // RTABI chapter 4.1.2, Table 4
278276479Sdim      { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
279276479Sdim      { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
280276479Sdim      { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
281276479Sdim      { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
282218893Sdim
283276479Sdim      // Single-precision floating-point comparison helper functions
284276479Sdim      // RTABI chapter 4.1.2, Table 5
285276479Sdim      { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
286276479Sdim      { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
287276479Sdim      { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
288276479Sdim      { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
289276479Sdim      { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
290276479Sdim      { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
291276479Sdim      { RTLIB::UO_F32,  "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
292276479Sdim      { RTLIB::O_F32,   "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
293218893Sdim
294276479Sdim      // Floating-point to integer conversions.
295276479Sdim      // RTABI chapter 4.1.2, Table 6
296276479Sdim      { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
297276479Sdim      { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
298276479Sdim      { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
299276479Sdim      { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
300276479Sdim      { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
301276479Sdim      { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
302276479Sdim      { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
303276479Sdim      { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
304218893Sdim
305276479Sdim      // Conversions between floating types.
306276479Sdim      // RTABI chapter 4.1.2, Table 7
307276479Sdim      { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
308280031Sdim      { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
309276479Sdim      { RTLIB::FPEXT_F32_F64,   "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
310218893Sdim
311276479Sdim      // Integer to floating-point conversions.
312276479Sdim      // RTABI chapter 4.1.2, Table 8
313276479Sdim      { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
314276479Sdim      { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
315276479Sdim      { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
316276479Sdim      { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
317276479Sdim      { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
318276479Sdim      { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
319276479Sdim      { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
320276479Sdim      { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
321218893Sdim
322276479Sdim      // Long long helper functions
323276479Sdim      // RTABI chapter 4.2, Table 9
324276479Sdim      { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
325276479Sdim      { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
326276479Sdim      { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
327276479Sdim      { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
328218893Sdim
329276479Sdim      // Integer division functions
330276479Sdim      // RTABI chapter 4.3.1
331276479Sdim      { RTLIB::SDIV_I8,  "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
332276479Sdim      { RTLIB::SDIV_I16, "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
333276479Sdim      { RTLIB::SDIV_I32, "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
334276479Sdim      { RTLIB::SDIV_I64, "__aeabi_ldivmod",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
335276479Sdim      { RTLIB::UDIV_I8,  "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
336276479Sdim      { RTLIB::UDIV_I16, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
337276479Sdim      { RTLIB::UDIV_I32, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
338276479Sdim      { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
339198090Srdivacky
340276479Sdim      // Memory operations
341276479Sdim      // RTABI chapter 4.3.4
342276479Sdim      { RTLIB::MEMCPY,  "__aeabi_memcpy",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
343276479Sdim      { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
344276479Sdim      { RTLIB::MEMSET,  "__aeabi_memset",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
345276479Sdim    };
346276479Sdim
347276479Sdim    for (const auto &LC : LibraryCalls) {
348276479Sdim      setLibcallName(LC.Op, LC.Name);
349276479Sdim      setLibcallCallingConv(LC.Op, LC.CC);
350276479Sdim      if (LC.Cond != ISD::SETCC_INVALID)
351276479Sdim        setCmpLibcallCC(LC.Op, LC.Cond);
352276479Sdim    }
353221345Sdim  }
354221345Sdim
355276479Sdim  if (Subtarget->isTargetWindows()) {
356276479Sdim    static const struct {
357276479Sdim      const RTLIB::Libcall Op;
358276479Sdim      const char * const Name;
359276479Sdim      const CallingConv::ID CC;
360276479Sdim    } LibraryCalls[] = {
361276479Sdim      { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
362276479Sdim      { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
363276479Sdim      { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
364276479Sdim      { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
365276479Sdim      { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
366276479Sdim      { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
367276479Sdim      { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
368276479Sdim      { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
369276479Sdim    };
370276479Sdim
371276479Sdim    for (const auto &LC : LibraryCalls) {
372276479Sdim      setLibcallName(LC.Op, LC.Name);
373276479Sdim      setLibcallCallingConv(LC.Op, LC.CC);
374276479Sdim    }
375276479Sdim  }
376276479Sdim
377226633Sdim  // Use divmod compiler-rt calls for iOS 5.0 and later.
378261991Sdim  if (Subtarget->getTargetTriple().isiOS() &&
379226633Sdim      !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
380226633Sdim    setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
381226633Sdim    setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
382226633Sdim  }
383226633Sdim
384280031Sdim  // The half <-> float conversion functions are always soft-float, but are
385280031Sdim  // needed for some targets which use a hard-float calling convention by
386280031Sdim  // default.
387280031Sdim  if (Subtarget->isAAPCS_ABI()) {
388280031Sdim    setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
389280031Sdim    setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
390280031Sdim    setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
391280031Sdim  } else {
392280031Sdim    setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
393280031Sdim    setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
394280031Sdim    setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
395280031Sdim  }
396280031Sdim
397198090Srdivacky  if (Subtarget->isThumb1Only())
398239462Sdim    addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
399193323Sed  else
400239462Sdim    addRegisterClass(MVT::i32, &ARM::GPRRegClass);
401234353Sdim  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
402234353Sdim      !Subtarget->isThumb1Only()) {
403239462Sdim    addRegisterClass(MVT::f32, &ARM::SPRRegClass);
404280031Sdim    addRegisterClass(MVT::f64, &ARM::DPRRegClass);
405193323Sed  }
406194710Sed
407280031Sdim  for (MVT VT : MVT::vector_valuetypes()) {
408280031Sdim    for (MVT InnerVT : MVT::vector_valuetypes()) {
409280031Sdim      setTruncStoreAction(VT, InnerVT, Expand);
410280031Sdim      setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
411280031Sdim      setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
412280031Sdim      setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
413280031Sdim    }
414276479Sdim
415280031Sdim    setOperationAction(ISD::MULHS, VT, Expand);
416280031Sdim    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
417280031Sdim    setOperationAction(ISD::MULHU, VT, Expand);
418280031Sdim    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
419276479Sdim
420280031Sdim    setOperationAction(ISD::BSWAP, VT, Expand);
421234353Sdim  }
422234353Sdim
423234353Sdim  setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
424261991Sdim  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
425234353Sdim
426194710Sed  if (Subtarget->hasNEON()) {
427194710Sed    addDRTypeForNEON(MVT::v2f32);
428194710Sed    addDRTypeForNEON(MVT::v8i8);
429194710Sed    addDRTypeForNEON(MVT::v4i16);
430194710Sed    addDRTypeForNEON(MVT::v2i32);
431194710Sed    addDRTypeForNEON(MVT::v1i64);
432194710Sed
433194710Sed    addQRTypeForNEON(MVT::v4f32);
434194710Sed    addQRTypeForNEON(MVT::v2f64);
435194710Sed    addQRTypeForNEON(MVT::v16i8);
436194710Sed    addQRTypeForNEON(MVT::v8i16);
437194710Sed    addQRTypeForNEON(MVT::v4i32);
438194710Sed    addQRTypeForNEON(MVT::v2i64);
439194710Sed
440198090Srdivacky    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
441198090Srdivacky    // neither Neon nor VFP support any arithmetic operations on it.
442234353Sdim    // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
443234353Sdim    // supported for v4f32.
444198090Srdivacky    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
445198090Srdivacky    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
446198090Srdivacky    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
447234353Sdim    // FIXME: Code duplication: FDIV and FREM are expanded always, see
448234353Sdim    // ARMTargetLowering::addTypeForNEON method for details.
449198090Srdivacky    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
450198090Srdivacky    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
451234353Sdim    // FIXME: Create unittest.
452234353Sdim    // In another words, find a way when "copysign" appears in DAG with vector
453234353Sdim    // operands.
454198090Srdivacky    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
455234353Sdim    // FIXME: Code duplication: SETCC has custom operation action, see
456234353Sdim    // ARMTargetLowering::addTypeForNEON method for details.
457226633Sdim    setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
458234353Sdim    // FIXME: Create unittest for FNEG and for FABS.
459198090Srdivacky    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
460198090Srdivacky    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
461198090Srdivacky    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
462198090Srdivacky    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
463198090Srdivacky    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
464198090Srdivacky    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
465198090Srdivacky    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
466198090Srdivacky    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
467198090Srdivacky    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
468198090Srdivacky    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
469198090Srdivacky    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
470198090Srdivacky    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
471234353Sdim    // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
472198090Srdivacky    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
473198090Srdivacky    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
474198090Srdivacky    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
475198090Srdivacky    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
476198090Srdivacky    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
477249423Sdim    setOperationAction(ISD::FMA, MVT::v2f64, Expand);
478198090Srdivacky
479234353Sdim    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
480234353Sdim    setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
481234353Sdim    setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
482234353Sdim    setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
483234353Sdim    setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
484234353Sdim    setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
485234353Sdim    setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
486234353Sdim    setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
487234353Sdim    setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
488234353Sdim    setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
489249423Sdim    setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
490249423Sdim    setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
491249423Sdim    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
492249423Sdim    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
493243830Sdim    setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
494212904Sdim
495249423Sdim    // Mark v2f32 intrinsics.
496249423Sdim    setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
497249423Sdim    setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
498249423Sdim    setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
499249423Sdim    setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
500249423Sdim    setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
501249423Sdim    setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
502249423Sdim    setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
503249423Sdim    setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
504249423Sdim    setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
505249423Sdim    setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
506249423Sdim    setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
507249423Sdim    setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
508249423Sdim    setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
509249423Sdim    setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
510249423Sdim    setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
511249423Sdim
512198090Srdivacky    // Neon does not support some operations on v1i64 and v2i64 types.
513198090Srdivacky    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
514212904Sdim    // Custom handling for some quad-vector types to detect VMULL.
515212904Sdim    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
516212904Sdim    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
517212904Sdim    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
518218893Sdim    // Custom handling for some vector types to avoid expensive expansions
519218893Sdim    setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
520218893Sdim    setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
521218893Sdim    setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
522218893Sdim    setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
523226633Sdim    setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
524226633Sdim    setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
525221345Sdim    // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
526234353Sdim    // a destination type that is wider than the source, and nor does
527234353Sdim    // it have a FP_TO_[SU]INT instruction with a narrower destination than
528234353Sdim    // source.
529221345Sdim    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
530221345Sdim    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
531234353Sdim    setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
532234353Sdim    setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
533198090Srdivacky
534249423Sdim    setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
535249423Sdim    setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
536249423Sdim
537249423Sdim    // NEON does not have single instruction CTPOP for vectors with element
538249423Sdim    // types wider than 8-bits.  However, custom lowering can leverage the
539249423Sdim    // v8i8/v16i8 vcnt instruction.
540249423Sdim    setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
541249423Sdim    setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
542249423Sdim    setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
543249423Sdim    setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
544249423Sdim
545249423Sdim    // NEON only has FMA instructions as of VFP4.
546249423Sdim    if (!Subtarget->hasVFP4()) {
547249423Sdim      setOperationAction(ISD::FMA, MVT::v2f32, Expand);
548249423Sdim      setOperationAction(ISD::FMA, MVT::v4f32, Expand);
549249423Sdim    }
550249423Sdim
551218893Sdim    setTargetDAGCombine(ISD::INTRINSIC_VOID);
552218893Sdim    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
553194710Sed    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
554194710Sed    setTargetDAGCombine(ISD::SHL);
555194710Sed    setTargetDAGCombine(ISD::SRL);
556194710Sed    setTargetDAGCombine(ISD::SRA);
557194710Sed    setTargetDAGCombine(ISD::SIGN_EXTEND);
558194710Sed    setTargetDAGCombine(ISD::ZERO_EXTEND);
559194710Sed    setTargetDAGCombine(ISD::ANY_EXTEND);
560204642Srdivacky    setTargetDAGCombine(ISD::SELECT_CC);
561218893Sdim    setTargetDAGCombine(ISD::BUILD_VECTOR);
562218893Sdim    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
563218893Sdim    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
564218893Sdim    setTargetDAGCombine(ISD::STORE);
565224145Sdim    setTargetDAGCombine(ISD::FP_TO_SINT);
566224145Sdim    setTargetDAGCombine(ISD::FP_TO_UINT);
567224145Sdim    setTargetDAGCombine(ISD::FDIV);
568234353Sdim
569234353Sdim    // It is legal to extload from v4i8 to v4i16 or v4i32.
570234353Sdim    MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
571234353Sdim                  MVT::v4i16, MVT::v2i16,
572234353Sdim                  MVT::v2i32};
573234353Sdim    for (unsigned i = 0; i < 6; ++i) {
574280031Sdim      for (MVT VT : MVT::integer_vector_valuetypes()) {
575280031Sdim        setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal);
576280031Sdim        setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal);
577280031Sdim        setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal);
578280031Sdim      }
579234353Sdim    }
580194710Sed  }
581194710Sed
582243830Sdim  // ARM and Thumb2 support UMLAL/SMLAL.
583243830Sdim  if (!Subtarget->isThumb1Only())
584243830Sdim    setTargetDAGCombine(ISD::ADDC);
585243830Sdim
586280031Sdim  if (Subtarget->isFPOnlySP()) {
587280031Sdim    // When targetting a floating-point unit with only single-precision
588280031Sdim    // operations, f64 is legal for the few double-precision instructions which
589280031Sdim    // are present However, no double-precision operations other than moves,
590280031Sdim    // loads and stores are provided by the hardware.
591280031Sdim    setOperationAction(ISD::FADD,       MVT::f64, Expand);
592280031Sdim    setOperationAction(ISD::FSUB,       MVT::f64, Expand);
593280031Sdim    setOperationAction(ISD::FMUL,       MVT::f64, Expand);
594280031Sdim    setOperationAction(ISD::FMA,        MVT::f64, Expand);
595280031Sdim    setOperationAction(ISD::FDIV,       MVT::f64, Expand);
596280031Sdim    setOperationAction(ISD::FREM,       MVT::f64, Expand);
597280031Sdim    setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
598280031Sdim    setOperationAction(ISD::FGETSIGN,   MVT::f64, Expand);
599280031Sdim    setOperationAction(ISD::FNEG,       MVT::f64, Expand);
600280031Sdim    setOperationAction(ISD::FABS,       MVT::f64, Expand);
601280031Sdim    setOperationAction(ISD::FSQRT,      MVT::f64, Expand);
602280031Sdim    setOperationAction(ISD::FSIN,       MVT::f64, Expand);
603280031Sdim    setOperationAction(ISD::FCOS,       MVT::f64, Expand);
604280031Sdim    setOperationAction(ISD::FPOWI,      MVT::f64, Expand);
605280031Sdim    setOperationAction(ISD::FPOW,       MVT::f64, Expand);
606280031Sdim    setOperationAction(ISD::FLOG,       MVT::f64, Expand);
607280031Sdim    setOperationAction(ISD::FLOG2,      MVT::f64, Expand);
608280031Sdim    setOperationAction(ISD::FLOG10,     MVT::f64, Expand);
609280031Sdim    setOperationAction(ISD::FEXP,       MVT::f64, Expand);
610280031Sdim    setOperationAction(ISD::FEXP2,      MVT::f64, Expand);
611280031Sdim    setOperationAction(ISD::FCEIL,      MVT::f64, Expand);
612280031Sdim    setOperationAction(ISD::FTRUNC,     MVT::f64, Expand);
613280031Sdim    setOperationAction(ISD::FRINT,      MVT::f64, Expand);
614280031Sdim    setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
615280031Sdim    setOperationAction(ISD::FFLOOR,     MVT::f64, Expand);
616280031Sdim    setOperationAction(ISD::FP_ROUND,   MVT::f32, Custom);
617280031Sdim    setOperationAction(ISD::FP_EXTEND,  MVT::f64, Custom);
618280031Sdim  }
619243830Sdim
620193323Sed  computeRegisterProperties();
621193323Sed
622276479Sdim  // ARM does not have floating-point extending loads.
623280031Sdim  for (MVT VT : MVT::fp_valuetypes()) {
624280031Sdim    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
625280031Sdim    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
626280031Sdim  }
627193323Sed
628276479Sdim  // ... or truncating stores
629276479Sdim  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
630276479Sdim  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
631276479Sdim  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
632276479Sdim
633193323Sed  // ARM does not have i1 sign extending load.
634280031Sdim  for (MVT VT : MVT::integer_valuetypes())
635280031Sdim    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
636193323Sed
637193323Sed  // ARM supports all 4 flavors of integer indexed load / store.
638195340Sed  if (!Subtarget->isThumb1Only()) {
639195340Sed    for (unsigned im = (unsigned)ISD::PRE_INC;
640195340Sed         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
641195340Sed      setIndexedLoadAction(im,  MVT::i1,  Legal);
642195340Sed      setIndexedLoadAction(im,  MVT::i8,  Legal);
643195340Sed      setIndexedLoadAction(im,  MVT::i16, Legal);
644195340Sed      setIndexedLoadAction(im,  MVT::i32, Legal);
645195340Sed      setIndexedStoreAction(im, MVT::i1,  Legal);
646195340Sed      setIndexedStoreAction(im, MVT::i8,  Legal);
647195340Sed      setIndexedStoreAction(im, MVT::i16, Legal);
648195340Sed      setIndexedStoreAction(im, MVT::i32, Legal);
649195340Sed    }
650193323Sed  }
651193323Sed
652276479Sdim  setOperationAction(ISD::SADDO, MVT::i32, Custom);
653276479Sdim  setOperationAction(ISD::UADDO, MVT::i32, Custom);
654276479Sdim  setOperationAction(ISD::SSUBO, MVT::i32, Custom);
655276479Sdim  setOperationAction(ISD::USUBO, MVT::i32, Custom);
656276479Sdim
657193323Sed  // i64 operation support.
658221345Sdim  setOperationAction(ISD::MUL,     MVT::i64, Expand);
659221345Sdim  setOperationAction(ISD::MULHU,   MVT::i32, Expand);
660198090Srdivacky  if (Subtarget->isThumb1Only()) {
661193323Sed    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
662193323Sed    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
663193323Sed  }
664224145Sdim  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
665224145Sdim      || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
666221345Sdim    setOperationAction(ISD::MULHS, MVT::i32, Expand);
667221345Sdim
668198892Srdivacky  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
669198892Srdivacky  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
670198892Srdivacky  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
671193323Sed  setOperationAction(ISD::SRL,       MVT::i64, Custom);
672193323Sed  setOperationAction(ISD::SRA,       MVT::i64, Custom);
673193323Sed
674226633Sdim  if (!Subtarget->isThumb1Only()) {
675226633Sdim    // FIXME: We should do this for Thumb1 as well.
676226633Sdim    setOperationAction(ISD::ADDC,    MVT::i32, Custom);
677226633Sdim    setOperationAction(ISD::ADDE,    MVT::i32, Custom);
678226633Sdim    setOperationAction(ISD::SUBC,    MVT::i32, Custom);
679226633Sdim    setOperationAction(ISD::SUBE,    MVT::i32, Custom);
680226633Sdim  }
681226633Sdim
682193323Sed  // ARM does not have ROTL.
683193323Sed  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
684202878Srdivacky  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
685193323Sed  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
686195098Sed  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
687193323Sed    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
688193323Sed
689234353Sdim  // These just redirect to CTTZ and CTLZ on ARM.
690234353Sdim  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
691234353Sdim  setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
692234353Sdim
693261991Sdim  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
694261991Sdim
695193323Sed  // Only ARMv6 has BSWAP.
696193323Sed  if (!Subtarget->hasV6Ops())
697193323Sed    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
698193323Sed
699243830Sdim  if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
700243830Sdim      !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
701243830Sdim    // These are expanded into libcalls if the cpu doesn't have HW divider.
702208599Srdivacky    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
703208599Srdivacky    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
704208599Srdivacky  }
705261991Sdim
706261991Sdim  // FIXME: Also set divmod for SREM on EABI
707193323Sed  setOperationAction(ISD::SREM,  MVT::i32, Expand);
708193323Sed  setOperationAction(ISD::UREM,  MVT::i32, Expand);
709261991Sdim  // Register based DivRem for AEABI (RTABI 4.2)
710261991Sdim  if (Subtarget->isTargetAEABI()) {
711261991Sdim    setLibcallName(RTLIB::SDIVREM_I8,  "__aeabi_idivmod");
712261991Sdim    setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
713261991Sdim    setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
714261991Sdim    setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
715261991Sdim    setLibcallName(RTLIB::UDIVREM_I8,  "__aeabi_uidivmod");
716261991Sdim    setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
717261991Sdim    setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
718261991Sdim    setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
719193323Sed
720261991Sdim    setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
721261991Sdim    setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
722261991Sdim    setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
723261991Sdim    setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
724261991Sdim    setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
725261991Sdim    setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
726261991Sdim    setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
727261991Sdim    setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
728261991Sdim
729261991Sdim    setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
730261991Sdim    setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
731261991Sdim  } else {
732261991Sdim    setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
733261991Sdim    setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
734261991Sdim  }
735261991Sdim
736193323Sed  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
737193323Sed  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
738193323Sed  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
739193323Sed  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
740198892Srdivacky  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
741193323Sed
742208599Srdivacky  setOperationAction(ISD::TRAP, MVT::Other, Legal);
743208599Srdivacky
744193323Sed  // Use the default implementation.
745193323Sed  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
746193323Sed  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
747193323Sed  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
748193323Sed  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
749193323Sed  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
750193323Sed  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
751218893Sdim
752276479Sdim  if (!Subtarget->isTargetMachO()) {
753276479Sdim    // Non-MachO platforms may return values in these registers via the
754234353Sdim    // personality function.
755234353Sdim    setExceptionPointerRegister(ARM::R0);
756234353Sdim    setExceptionSelectorRegister(ARM::R1);
757234353Sdim  }
758234353Sdim
759276479Sdim  if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
760276479Sdim    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
761276479Sdim  else
762276479Sdim    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
763276479Sdim
764212904Sdim  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
765280031Sdim  // the default expansion. If we are targeting a single threaded system,
766280031Sdim  // then set them all for expand so we can lower them later into their
767280031Sdim  // non-atomic form.
768280031Sdim  if (TM.Options.ThreadModel == ThreadModel::Single)
769280031Sdim    setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
770280031Sdim  else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
771276479Sdim    // ATOMIC_FENCE needs custom lowering; the others should have been expanded
772276479Sdim    // to ldrex/strex loops already.
773261991Sdim    setOperationAction(ISD::ATOMIC_FENCE,     MVT::Other, Custom);
774276479Sdim
775261991Sdim    // On v8, we have particularly efficient implementations of atomic fences
776261991Sdim    // if they can be combined with nearby atomic loads and stores.
777261991Sdim    if (!Subtarget->hasV8Ops()) {
778280031Sdim      // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
779261991Sdim      setInsertFencesForAtomic(true);
780261991Sdim    }
781210299Sed  } else {
782261991Sdim    // If there's anything we can use as a barrier, go through custom lowering
783261991Sdim    // for ATOMIC_FENCE.
784261991Sdim    setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other,
785261991Sdim                       Subtarget->hasAnyDataBarrier() ? Custom : Expand);
786261991Sdim
787210299Sed    // Set them all for expansion, which will force libcalls.
788210299Sed    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
789210299Sed    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
790210299Sed    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
791210299Sed    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
792210299Sed    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
793210299Sed    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
794210299Sed    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
795210299Sed    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
796221345Sdim    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
797221345Sdim    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
798221345Sdim    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
799221345Sdim    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
800226633Sdim    // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
801226633Sdim    // Unordered/Monotonic case.
802226633Sdim    setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
803226633Sdim    setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
804210299Sed  }
805193323Sed
806218893Sdim  setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
807218893Sdim
808210299Sed  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
809210299Sed  if (!Subtarget->hasV6Ops()) {
810193323Sed    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
811193323Sed    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
812193323Sed  }
813193323Sed  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
814193323Sed
815234353Sdim  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
816234353Sdim      !Subtarget->isThumb1Only()) {
817202878Srdivacky    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
818202878Srdivacky    // iff target supports vfp2.
819218893Sdim    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
820212904Sdim    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
821212904Sdim  }
822193323Sed
823193323Sed  // We want to custom lower some of our intrinsics.
824193323Sed  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
825210299Sed  if (Subtarget->isTargetDarwin()) {
826210299Sed    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
827210299Sed    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
828223017Sdim    setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
829210299Sed  }
830193323Sed
831193323Sed  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
832193323Sed  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
833193323Sed  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
834212904Sdim  setOperationAction(ISD::SELECT,    MVT::i32, Custom);
835212904Sdim  setOperationAction(ISD::SELECT,    MVT::f32, Custom);
836212904Sdim  setOperationAction(ISD::SELECT,    MVT::f64, Custom);
837193323Sed  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
838193323Sed  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
839193323Sed  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
840193323Sed
841193323Sed  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
842193323Sed  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
843193323Sed  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
844193323Sed  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
845193323Sed  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
846193323Sed
847193323Sed  // We don't support sin/cos/fmod/copysign/pow
848193323Sed  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
849193323Sed  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
850193323Sed  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
851193323Sed  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
852249423Sdim  setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
853249423Sdim  setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
854193323Sed  setOperationAction(ISD::FREM,      MVT::f64, Expand);
855193323Sed  setOperationAction(ISD::FREM,      MVT::f32, Expand);
856234353Sdim  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
857234353Sdim      !Subtarget->isThumb1Only()) {
858193323Sed    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
859193323Sed    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
860193323Sed  }
861193323Sed  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
862193323Sed  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
863193323Sed
864234353Sdim  if (!Subtarget->hasVFP4()) {
865234353Sdim    setOperationAction(ISD::FMA, MVT::f64, Expand);
866234353Sdim    setOperationAction(ISD::FMA, MVT::f32, Expand);
867234353Sdim  }
868224145Sdim
869205218Srdivacky  // Various VFP goodness
870234353Sdim  if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
871205218Srdivacky    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
872205218Srdivacky    if (Subtarget->hasVFP2()) {
873205218Srdivacky      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
874205218Srdivacky      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
875205218Srdivacky      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
876205218Srdivacky      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
877205218Srdivacky    }
878276479Sdim
879280031Sdim    // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
880280031Sdim    if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
881276479Sdim      setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
882276479Sdim      setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
883276479Sdim    }
884276479Sdim
885276479Sdim    // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
886205407Srdivacky    if (!Subtarget->hasFP16()) {
887276479Sdim      setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
888276479Sdim      setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
889205218Srdivacky    }
890193323Sed  }
891276479Sdim
892261991Sdim  // Combine sin / cos into one node or libcall if possible.
893261991Sdim  if (Subtarget->hasSinCos()) {
894261991Sdim    setLibcallName(RTLIB::SINCOS_F32, "sincosf");
895261991Sdim    setLibcallName(RTLIB::SINCOS_F64, "sincos");
896280031Sdim    if (Subtarget->getTargetTriple().isiOS()) {
897261991Sdim      // For iOS, we don't want to the normal expansion of a libcall to
898261991Sdim      // sincos. We want to issue a libcall to __sincos_stret.
899261991Sdim      setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
900261991Sdim      setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
901261991Sdim    }
902261991Sdim  }
903193323Sed
904280031Sdim  // FP-ARMv8 implements a lot of rounding-like FP operations.
905280031Sdim  if (Subtarget->hasFPARMv8()) {
906280031Sdim    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
907280031Sdim    setOperationAction(ISD::FCEIL, MVT::f32, Legal);
908280031Sdim    setOperationAction(ISD::FROUND, MVT::f32, Legal);
909280031Sdim    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
910280031Sdim    setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
911280031Sdim    setOperationAction(ISD::FRINT, MVT::f32, Legal);
912280031Sdim    if (!Subtarget->isFPOnlySP()) {
913280031Sdim      setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
914280031Sdim      setOperationAction(ISD::FCEIL, MVT::f64, Legal);
915280031Sdim      setOperationAction(ISD::FROUND, MVT::f64, Legal);
916280031Sdim      setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
917280031Sdim      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
918280031Sdim      setOperationAction(ISD::FRINT, MVT::f64, Legal);
919280031Sdim    }
920280031Sdim  }
921193323Sed  // We have target-specific dag combine patterns for the following nodes:
922199481Srdivacky  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
923193323Sed  setTargetDAGCombine(ISD::ADD);
924193323Sed  setTargetDAGCombine(ISD::SUB);
925208599Srdivacky  setTargetDAGCombine(ISD::MUL);
926243830Sdim  setTargetDAGCombine(ISD::AND);
927243830Sdim  setTargetDAGCombine(ISD::OR);
928243830Sdim  setTargetDAGCombine(ISD::XOR);
929193323Sed
930234353Sdim  if (Subtarget->hasV6Ops())
931234353Sdim    setTargetDAGCombine(ISD::SRL);
932234353Sdim
933193323Sed  setStackPointerRegisterToSaveRestore(ARM::SP);
934193323Sed
935234353Sdim  if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
936234353Sdim      !Subtarget->hasVFP2())
937208599Srdivacky    setSchedulingPreference(Sched::RegPressure);
938208599Srdivacky  else
939208599Srdivacky    setSchedulingPreference(Sched::Hybrid);
940208599Srdivacky
941218893Sdim  //// temporary - rewrite interface to use type
942249423Sdim  MaxStoresPerMemset = 8;
943249423Sdim  MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
944249423Sdim  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
945249423Sdim  MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
946249423Sdim  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
947249423Sdim  MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
948194612Sed
949210299Sed  // On ARM arguments smaller than 4 bytes are extended, so all arguments
950210299Sed  // are at least 4 bytes aligned.
951210299Sed  setMinStackArgumentAlignment(4);
952210299Sed
953239462Sdim  // Prefer likely predicted branches to selects on out-of-order cores.
954249423Sdim  PredictableSelectIsExpensive = Subtarget->isLikeA9();
955239462Sdim
956223017Sdim  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
957193323Sed}
958193323Sed
959218893Sdim// FIXME: It might make sense to define the representative register class as the
960218893Sdim// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
961218893Sdim// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
962218893Sdim// SPR's representative would be DPR_VFP2. This should work well if register
963218893Sdim// pressure tracking were modified such that a register use would increment the
964218893Sdim// pressure of the register class's representative and all of it's super
965218893Sdim// classes' representatives transitively. We have not implemented this because
966218893Sdim// of the difficulty prior to coalescing of modeling operand register classes
967221345Sdim// due to the common occurrence of cross class copies and subregister insertions
968218893Sdim// and extractions.
969212904Sdimstd::pair<const TargetRegisterClass*, uint8_t>
970249423SdimARMTargetLowering::findRepresentativeClass(MVT VT) const{
971276479Sdim  const TargetRegisterClass *RRC = nullptr;
972212904Sdim  uint8_t Cost = 1;
973249423Sdim  switch (VT.SimpleTy) {
974212904Sdim  default:
975212904Sdim    return TargetLowering::findRepresentativeClass(VT);
976212904Sdim  // Use DPR as representative register class for all floating point
977212904Sdim  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
978212904Sdim  // the cost is 1 for both f32 and f64.
979212904Sdim  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
980212904Sdim  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
981239462Sdim    RRC = &ARM::DPRRegClass;
982218893Sdim    // When NEON is used for SP, only half of the register file is available
983218893Sdim    // because operations that define both SP and DP results will be constrained
984218893Sdim    // to the VFP2 class (D0-D15). We currently model this constraint prior to
985218893Sdim    // coalescing by double-counting the SP regs. See the FIXME above.
986218893Sdim    if (Subtarget->useNEONForSinglePrecisionFP())
987218893Sdim      Cost = 2;
988212904Sdim    break;
989212904Sdim  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
990212904Sdim  case MVT::v4f32: case MVT::v2f64:
991239462Sdim    RRC = &ARM::DPRRegClass;
992212904Sdim    Cost = 2;
993212904Sdim    break;
994212904Sdim  case MVT::v4i64:
995239462Sdim    RRC = &ARM::DPRRegClass;
996212904Sdim    Cost = 4;
997212904Sdim    break;
998212904Sdim  case MVT::v8i64:
999239462Sdim    RRC = &ARM::DPRRegClass;
1000212904Sdim    Cost = 8;
1001212904Sdim    break;
1002212904Sdim  }
1003212904Sdim  return std::make_pair(RRC, Cost);
1004212904Sdim}
1005212904Sdim
1006193323Sedconst char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1007193323Sed  switch (Opcode) {
1008276479Sdim  default: return nullptr;
1009193323Sed  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
1010218893Sdim  case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
1011193323Sed  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
1012193323Sed  case ARMISD::CALL:          return "ARMISD::CALL";
1013193323Sed  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
1014193323Sed  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
1015193323Sed  case ARMISD::tCALL:         return "ARMISD::tCALL";
1016193323Sed  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
1017193323Sed  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
1018198090Srdivacky  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
1019193323Sed  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
1020261991Sdim  case ARMISD::INTRET_FLAG:   return "ARMISD::INTRET_FLAG";
1021193323Sed  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
1022193323Sed  case ARMISD::CMP:           return "ARMISD::CMP";
1023239462Sdim  case ARMISD::CMN:           return "ARMISD::CMN";
1024195340Sed  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
1025193323Sed  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
1026193323Sed  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
1027210299Sed  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
1028193323Sed  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
1029234353Sdim
1030193323Sed  case ARMISD::CMOV:          return "ARMISD::CMOV";
1031193323Sed
1032202878Srdivacky  case ARMISD::RBIT:          return "ARMISD::RBIT";
1033202878Srdivacky
1034193323Sed  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
1035193323Sed  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
1036193323Sed  case ARMISD::SITOF:         return "ARMISD::SITOF";
1037193323Sed  case ARMISD::UITOF:         return "ARMISD::UITOF";
1038193323Sed
1039193323Sed  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
1040193323Sed  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
1041193323Sed  case ARMISD::RRX:           return "ARMISD::RRX";
1042193323Sed
1043226633Sdim  case ARMISD::ADDC:          return "ARMISD::ADDC";
1044226633Sdim  case ARMISD::ADDE:          return "ARMISD::ADDE";
1045226633Sdim  case ARMISD::SUBC:          return "ARMISD::SUBC";
1046226633Sdim  case ARMISD::SUBE:          return "ARMISD::SUBE";
1047226633Sdim
1048218893Sdim  case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
1049218893Sdim  case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
1050193323Sed
1051198892Srdivacky  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1052198892Srdivacky  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
1053198892Srdivacky
1054210299Sed  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
1055218893Sdim
1056193323Sed  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1057194710Sed
1058198090Srdivacky  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
1059198090Srdivacky
1060218893Sdim  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1061200581Srdivacky
1062218893Sdim  case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
1063218893Sdim
1064276479Sdim  case ARMISD::WIN__CHKSTK:   return "ARMISD:::WIN__CHKSTK";
1065276479Sdim
1066194710Sed  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
1067218893Sdim  case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
1068194710Sed  case ARMISD::VCGE:          return "ARMISD::VCGE";
1069218893Sdim  case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
1070218893Sdim  case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
1071194710Sed  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
1072194710Sed  case ARMISD::VCGT:          return "ARMISD::VCGT";
1073218893Sdim  case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
1074218893Sdim  case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
1075194710Sed  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
1076194710Sed  case ARMISD::VTST:          return "ARMISD::VTST";
1077194710Sed
1078194710Sed  case ARMISD::VSHL:          return "ARMISD::VSHL";
1079194710Sed  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
1080194710Sed  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
1081194710Sed  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
1082194710Sed  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
1083194710Sed  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
1084194710Sed  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
1085194710Sed  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
1086194710Sed  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
1087194710Sed  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
1088194710Sed  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
1089194710Sed  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
1090194710Sed  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
1091194710Sed  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
1092194710Sed  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
1093194710Sed  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
1094194710Sed  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
1095210299Sed  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
1096210299Sed  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
1097234353Sdim  case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
1098198090Srdivacky  case ARMISD::VDUP:          return "ARMISD::VDUP";
1099198090Srdivacky  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
1100198090Srdivacky  case ARMISD::VEXT:          return "ARMISD::VEXT";
1101198090Srdivacky  case ARMISD::VREV64:        return "ARMISD::VREV64";
1102198090Srdivacky  case ARMISD::VREV32:        return "ARMISD::VREV32";
1103198090Srdivacky  case ARMISD::VREV16:        return "ARMISD::VREV16";
1104198090Srdivacky  case ARMISD::VZIP:          return "ARMISD::VZIP";
1105198090Srdivacky  case ARMISD::VUZP:          return "ARMISD::VUZP";
1106198090Srdivacky  case ARMISD::VTRN:          return "ARMISD::VTRN";
1107221345Sdim  case ARMISD::VTBL1:         return "ARMISD::VTBL1";
1108221345Sdim  case ARMISD::VTBL2:         return "ARMISD::VTBL2";
1109212904Sdim  case ARMISD::VMULLs:        return "ARMISD::VMULLs";
1110212904Sdim  case ARMISD::VMULLu:        return "ARMISD::VMULLu";
1111243830Sdim  case ARMISD::UMLAL:         return "ARMISD::UMLAL";
1112243830Sdim  case ARMISD::SMLAL:         return "ARMISD::SMLAL";
1113210299Sed  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
1114204642Srdivacky  case ARMISD::FMAX:          return "ARMISD::FMAX";
1115204642Srdivacky  case ARMISD::FMIN:          return "ARMISD::FMIN";
1116261991Sdim  case ARMISD::VMAXNM:        return "ARMISD::VMAX";
1117261991Sdim  case ARMISD::VMINNM:        return "ARMISD::VMIN";
1118212904Sdim  case ARMISD::BFI:           return "ARMISD::BFI";
1119218893Sdim  case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
1120218893Sdim  case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
1121221345Sdim  case ARMISD::VBSL:          return "ARMISD::VBSL";
1122218893Sdim  case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
1123218893Sdim  case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
1124218893Sdim  case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
1125218893Sdim  case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
1126218893Sdim  case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
1127218893Sdim  case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
1128218893Sdim  case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
1129218893Sdim  case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
1130218893Sdim  case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
1131218893Sdim  case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
1132218893Sdim  case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
1133218893Sdim  case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
1134218893Sdim  case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
1135218893Sdim  case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
1136218893Sdim  case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
1137218893Sdim  case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
1138218893Sdim  case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
1139218893Sdim  case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
1140218893Sdim  case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
1141218893Sdim  case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
1142193323Sed  }
1143193323Sed}
1144193323Sed
1145261991SdimEVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
1146226633Sdim  if (!VT.isVector()) return getPointerTy();
1147226633Sdim  return VT.changeVectorElementTypeToInteger();
1148226633Sdim}
1149226633Sdim
1150208599Srdivacky/// getRegClassFor - Return the register class that should be used for the
1151208599Srdivacky/// specified value type.
1152249423Sdimconst TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
1153208599Srdivacky  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1154208599Srdivacky  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1155208599Srdivacky  // load / store 4 to 8 consecutive D registers.
1156208599Srdivacky  if (Subtarget->hasNEON()) {
1157208599Srdivacky    if (VT == MVT::v4i64)
1158239462Sdim      return &ARM::QQPRRegClass;
1159239462Sdim    if (VT == MVT::v8i64)
1160239462Sdim      return &ARM::QQQQPRRegClass;
1161208599Srdivacky  }
1162208599Srdivacky  return TargetLowering::getRegClassFor(VT);
1163208599Srdivacky}
1164208599Srdivacky
1165212904Sdim// Create a fast isel object.
1166212904SdimFastISel *
1167239462SdimARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1168239462Sdim                                  const TargetLibraryInfo *libInfo) const {
1169239462Sdim  return ARM::createFastISel(funcInfo, libInfo);
1170212904Sdim}
1171212904Sdim
1172212904Sdim/// getMaximalGlobalOffset - Returns the maximal possible offset which can
1173212904Sdim/// be used for loads / stores from the global.
1174212904Sdimunsigned ARMTargetLowering::getMaximalGlobalOffset() const {
1175212904Sdim  return (Subtarget->isThumb1Only() ? 127 : 4095);
1176212904Sdim}
1177212904Sdim
1178208599SrdivackySched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1179210299Sed  unsigned NumVals = N->getNumValues();
1180210299Sed  if (!NumVals)
1181210299Sed    return Sched::RegPressure;
1182210299Sed
1183210299Sed  for (unsigned i = 0; i != NumVals; ++i) {
1184208599Srdivacky    EVT VT = N->getValueType(i);
1185218893Sdim    if (VT == MVT::Glue || VT == MVT::Other)
1186218893Sdim      continue;
1187208599Srdivacky    if (VT.isFloatingPoint() || VT.isVector())
1188234353Sdim      return Sched::ILP;
1189208599Srdivacky  }
1190210299Sed
1191210299Sed  if (!N->isMachineOpcode())
1192210299Sed    return Sched::RegPressure;
1193210299Sed
1194210299Sed  // Load are scheduled for latency even if there instruction itinerary
1195210299Sed  // is not available.
1196280031Sdim  const TargetInstrInfo *TII =
1197280031Sdim      getTargetMachine().getSubtargetImpl()->getInstrInfo();
1198224145Sdim  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1199218893Sdim
1200224145Sdim  if (MCID.getNumDefs() == 0)
1201218893Sdim    return Sched::RegPressure;
1202218893Sdim  if (!Itins->isEmpty() &&
1203224145Sdim      Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1204234353Sdim    return Sched::ILP;
1205210299Sed
1206208599Srdivacky  return Sched::RegPressure;
1207208599Srdivacky}
1208208599Srdivacky
1209193323Sed//===----------------------------------------------------------------------===//
1210193323Sed// Lowering Code
1211193323Sed//===----------------------------------------------------------------------===//
1212193323Sed
1213193323Sed/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1214193323Sedstatic ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1215193323Sed  switch (CC) {
1216198090Srdivacky  default: llvm_unreachable("Unknown condition code!");
1217193323Sed  case ISD::SETNE:  return ARMCC::NE;
1218193323Sed  case ISD::SETEQ:  return ARMCC::EQ;
1219193323Sed  case ISD::SETGT:  return ARMCC::GT;
1220193323Sed  case ISD::SETGE:  return ARMCC::GE;
1221193323Sed  case ISD::SETLT:  return ARMCC::LT;
1222193323Sed  case ISD::SETLE:  return ARMCC::LE;
1223193323Sed  case ISD::SETUGT: return ARMCC::HI;
1224193323Sed  case ISD::SETUGE: return ARMCC::HS;
1225193323Sed  case ISD::SETULT: return ARMCC::LO;
1226193323Sed  case ISD::SETULE: return ARMCC::LS;
1227193323Sed  }
1228193323Sed}
1229193323Sed
1230198090Srdivacky/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1231198090Srdivackystatic void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1232193323Sed                        ARMCC::CondCodes &CondCode2) {
1233193323Sed  CondCode2 = ARMCC::AL;
1234193323Sed  switch (CC) {
1235198090Srdivacky  default: llvm_unreachable("Unknown FP condition!");
1236193323Sed  case ISD::SETEQ:
1237193323Sed  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1238193323Sed  case ISD::SETGT:
1239193323Sed  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1240193323Sed  case ISD::SETGE:
1241193323Sed  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1242193323Sed  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1243198090Srdivacky  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1244193323Sed  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1245193323Sed  case ISD::SETO:   CondCode = ARMCC::VC; break;
1246193323Sed  case ISD::SETUO:  CondCode = ARMCC::VS; break;
1247193323Sed  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1248193323Sed  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1249193323Sed  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1250193323Sed  case ISD::SETLT:
1251193323Sed  case ISD::SETULT: CondCode = ARMCC::LT; break;
1252193323Sed  case ISD::SETLE:
1253193323Sed  case ISD::SETULE: CondCode = ARMCC::LE; break;
1254193323Sed  case ISD::SETNE:
1255193323Sed  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1256193323Sed  }
1257193323Sed}
1258193323Sed
1259193323Sed//===----------------------------------------------------------------------===//
1260193323Sed//                      Calling Convention Implementation
1261193323Sed//===----------------------------------------------------------------------===//
1262193323Sed
1263193323Sed#include "ARMGenCallingConv.inc"
1264193323Sed
1265276479Sdim/// getEffectiveCallingConv - Get the effective calling convention, taking into
1266276479Sdim/// account presence of floating point hardware and calling convention
1267276479Sdim/// limitations, such as support for variadic functions.
1268276479SdimCallingConv::ID
1269276479SdimARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1270276479Sdim                                           bool isVarArg) const {
1271194612Sed  switch (CC) {
1272194612Sed  default:
1273198090Srdivacky    llvm_unreachable("Unsupported calling convention");
1274276479Sdim  case CallingConv::ARM_AAPCS:
1275276479Sdim  case CallingConv::ARM_APCS:
1276276479Sdim  case CallingConv::GHC:
1277276479Sdim    return CC;
1278276479Sdim  case CallingConv::ARM_AAPCS_VFP:
1279276479Sdim    return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
1280276479Sdim  case CallingConv::C:
1281218893Sdim    if (!Subtarget->isAAPCS_ABI())
1282276479Sdim      return CallingConv::ARM_APCS;
1283276479Sdim    else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1284234353Sdim             getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1285234353Sdim             !isVarArg)
1286276479Sdim      return CallingConv::ARM_AAPCS_VFP;
1287276479Sdim    else
1288276479Sdim      return CallingConv::ARM_AAPCS;
1289276479Sdim  case CallingConv::Fast:
1290276479Sdim    if (!Subtarget->isAAPCS_ABI()) {
1291276479Sdim      if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1292276479Sdim        return CallingConv::Fast;
1293276479Sdim      return CallingConv::ARM_APCS;
1294276479Sdim    } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1295276479Sdim      return CallingConv::ARM_AAPCS_VFP;
1296276479Sdim    else
1297276479Sdim      return CallingConv::ARM_AAPCS;
1298218893Sdim  }
1299276479Sdim}
1300276479Sdim
1301276479Sdim/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1302276479Sdim/// CallingConvention.
1303276479SdimCCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1304276479Sdim                                                 bool Return,
1305276479Sdim                                                 bool isVarArg) const {
1306276479Sdim  switch (getEffectiveCallingConv(CC, isVarArg)) {
1307276479Sdim  default:
1308276479Sdim    llvm_unreachable("Unsupported calling convention");
1309276479Sdim  case CallingConv::ARM_APCS:
1310276479Sdim    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1311194612Sed  case CallingConv::ARM_AAPCS:
1312218893Sdim    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1313276479Sdim  case CallingConv::ARM_AAPCS_VFP:
1314276479Sdim    return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1315276479Sdim  case CallingConv::Fast:
1316276479Sdim    return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1317239462Sdim  case CallingConv::GHC:
1318239462Sdim    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1319194612Sed  }
1320194612Sed}
1321194612Sed
1322198090Srdivacky/// LowerCallResult - Lower the result values of a call into the
1323198090Srdivacky/// appropriate copies out of appropriate physical registers.
1324198090SrdivackySDValue
1325198090SrdivackyARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
1326198090Srdivacky                                   CallingConv::ID CallConv, bool isVarArg,
1327198090Srdivacky                                   const SmallVectorImpl<ISD::InputArg> &Ins,
1328261991Sdim                                   SDLoc dl, SelectionDAG &DAG,
1329251662Sdim                                   SmallVectorImpl<SDValue> &InVals,
1330251662Sdim                                   bool isThisReturn, SDValue ThisVal) const {
1331193323Sed
1332193323Sed  // Assign locations to each value returned by this call.
1333193323Sed  SmallVector<CCValAssign, 16> RVLocs;
1334280031Sdim  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1335280031Sdim                    *DAG.getContext(), Call);
1336198090Srdivacky  CCInfo.AnalyzeCallResult(Ins,
1337198090Srdivacky                           CCAssignFnForNode(CallConv, /* Return*/ true,
1338198090Srdivacky                                             isVarArg));
1339193323Sed
1340193323Sed  // Copy all of the result registers out of their specified physreg.
1341193323Sed  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1342193323Sed    CCValAssign VA = RVLocs[i];
1343193323Sed
1344251662Sdim    // Pass 'this' value directly from the argument to return value, to avoid
1345251662Sdim    // reg unit interference
1346251662Sdim    if (i == 0 && isThisReturn) {
1347251662Sdim      assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1348251662Sdim             "unexpected return calling convention register assignment");
1349251662Sdim      InVals.push_back(ThisVal);
1350251662Sdim      continue;
1351251662Sdim    }
1352251662Sdim
1353193323Sed    SDValue Val;
1354193323Sed    if (VA.needsCustom()) {
1355194710Sed      // Handle f64 or half of a v2f64.
1356193323Sed      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1357193323Sed                                      InFlag);
1358193323Sed      Chain = Lo.getValue(1);
1359193323Sed      InFlag = Lo.getValue(2);
1360193323Sed      VA = RVLocs[++i]; // skip ahead to next loc
1361193323Sed      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1362193323Sed                                      InFlag);
1363193323Sed      Chain = Hi.getValue(1);
1364193323Sed      InFlag = Hi.getValue(2);
1365276479Sdim      if (!Subtarget->isLittle())
1366276479Sdim        std::swap (Lo, Hi);
1367199481Srdivacky      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1368194710Sed
1369194710Sed      if (VA.getLocVT() == MVT::v2f64) {
1370194710Sed        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1371194710Sed        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1372194710Sed                          DAG.getConstant(0, MVT::i32));
1373194710Sed
1374194710Sed        VA = RVLocs[++i]; // skip ahead to next loc
1375194710Sed        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1376194710Sed        Chain = Lo.getValue(1);
1377194710Sed        InFlag = Lo.getValue(2);
1378194710Sed        VA = RVLocs[++i]; // skip ahead to next loc
1379194710Sed        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1380194710Sed        Chain = Hi.getValue(1);
1381194710Sed        InFlag = Hi.getValue(2);
1382276479Sdim        if (!Subtarget->isLittle())
1383276479Sdim          std::swap (Lo, Hi);
1384199481Srdivacky        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1385194710Sed        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1386194710Sed                          DAG.getConstant(1, MVT::i32));
1387194710Sed      }
1388193323Sed    } else {
1389193323Sed      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1390193323Sed                               InFlag);
1391193323Sed      Chain = Val.getValue(1);
1392193323Sed      InFlag = Val.getValue(2);
1393193323Sed    }
1394193323Sed
1395193323Sed    switch (VA.getLocInfo()) {
1396198090Srdivacky    default: llvm_unreachable("Unknown loc info!");
1397193323Sed    case CCValAssign::Full: break;
1398193323Sed    case CCValAssign::BCvt:
1399218893Sdim      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1400193323Sed      break;
1401193323Sed    }
1402193323Sed
1403198090Srdivacky    InVals.push_back(Val);
1404193323Sed  }
1405193323Sed
1406198090Srdivacky  return Chain;
1407193323Sed}
1408193323Sed
1409193323Sed/// LowerMemOpCallTo - Store the argument to the stack.
1410193323SedSDValue
1411198090SrdivackyARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
1412198090Srdivacky                                    SDValue StackPtr, SDValue Arg,
1413261991Sdim                                    SDLoc dl, SelectionDAG &DAG,
1414198090Srdivacky                                    const CCValAssign &VA,
1415207618Srdivacky                                    ISD::ArgFlagsTy Flags) const {
1416193323Sed  unsigned LocMemOffset = VA.getLocMemOffset();
1417193323Sed  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1418193323Sed  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
1419193323Sed  return DAG.getStore(Chain, dl, Arg, PtrOff,
1420218893Sdim                      MachinePointerInfo::getStack(LocMemOffset),
1421203954Srdivacky                      false, false, 0);
1422193323Sed}
1423193323Sed
1424261991Sdimvoid ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
1425194710Sed                                         SDValue Chain, SDValue &Arg,
1426194710Sed                                         RegsToPassVector &RegsToPass,
1427194710Sed                                         CCValAssign &VA, CCValAssign &NextVA,
1428194710Sed                                         SDValue &StackPtr,
1429261991Sdim                                         SmallVectorImpl<SDValue> &MemOpChains,
1430207618Srdivacky                                         ISD::ArgFlagsTy Flags) const {
1431194710Sed
1432199481Srdivacky  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1433194710Sed                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
1434276479Sdim  unsigned id = Subtarget->isLittle() ? 0 : 1;
1435276479Sdim  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1436194710Sed
1437194710Sed  if (NextVA.isRegLoc())
1438276479Sdim    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1439194710Sed  else {
1440194710Sed    assert(NextVA.isMemLoc());
1441276479Sdim    if (!StackPtr.getNode())
1442194710Sed      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1443194710Sed
1444276479Sdim    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1445198090Srdivacky                                           dl, DAG, NextVA,
1446198090Srdivacky                                           Flags));
1447194710Sed  }
1448194710Sed}
1449194710Sed
1450198090Srdivacky/// LowerCall - Lowering a call into a callseq_start <-
1451193323Sed/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1452193323Sed/// nodes.
1453198090SrdivackySDValue
1454239462SdimARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1455207618Srdivacky                             SmallVectorImpl<SDValue> &InVals) const {
1456239462Sdim  SelectionDAG &DAG                     = CLI.DAG;
1457261991Sdim  SDLoc &dl                          = CLI.DL;
1458261991Sdim  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1459261991Sdim  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
1460261991Sdim  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
1461239462Sdim  SDValue Chain                         = CLI.Chain;
1462239462Sdim  SDValue Callee                        = CLI.Callee;
1463239462Sdim  bool &isTailCall                      = CLI.IsTailCall;
1464239462Sdim  CallingConv::ID CallConv              = CLI.CallConv;
1465239462Sdim  bool doesNotRet                       = CLI.DoesNotReturn;
1466239462Sdim  bool isVarArg                         = CLI.IsVarArg;
1467239462Sdim
1468210299Sed  MachineFunction &MF = DAG.getMachineFunction();
1469251662Sdim  bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1470251662Sdim  bool isThisReturn   = false;
1471251662Sdim  bool isSibCall      = false;
1472276479Sdim
1473226633Sdim  // Disable tail calls if they're not supported.
1474276479Sdim  if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls)
1475210299Sed    isTailCall = false;
1476276479Sdim
1477210299Sed  if (isTailCall) {
1478210299Sed    // Check if it's really possible to do a tail call.
1479210299Sed    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1480251662Sdim                    isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1481210299Sed                                                   Outs, OutVals, Ins, DAG);
1482276479Sdim    if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
1483276479Sdim      report_fatal_error("failed to perform tail call elimination on a call "
1484276479Sdim                         "site marked musttail");
1485210299Sed    // We don't support GuaranteedTailCallOpt for ARM, only automatically
1486210299Sed    // detected sibcalls.
1487210299Sed    if (isTailCall) {
1488210299Sed      ++NumTailCalls;
1489251662Sdim      isSibCall = true;
1490210299Sed    }
1491210299Sed  }
1492193323Sed
1493193323Sed  // Analyze operands of the call, assigning locations to each operand.
1494193323Sed  SmallVector<CCValAssign, 16> ArgLocs;
1495280031Sdim  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1496280031Sdim                    *DAG.getContext(), Call);
1497198090Srdivacky  CCInfo.AnalyzeCallOperands(Outs,
1498198090Srdivacky                             CCAssignFnForNode(CallConv, /* Return*/ false,
1499198090Srdivacky                                               isVarArg));
1500193323Sed
1501193323Sed  // Get a count of how many bytes are to be pushed on the stack.
1502193323Sed  unsigned NumBytes = CCInfo.getNextStackOffset();
1503193323Sed
1504210299Sed  // For tail calls, memory operands are available in our caller's stack.
1505251662Sdim  if (isSibCall)
1506210299Sed    NumBytes = 0;
1507210299Sed
1508193323Sed  // Adjust the stack pointer for the new arguments...
1509193323Sed  // These operations are automatically eliminated by the prolog/epilog pass
1510251662Sdim  if (!isSibCall)
1511261991Sdim    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
1512261991Sdim                                 dl);
1513193323Sed
1514204642Srdivacky  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1515193323Sed
1516194710Sed  RegsToPassVector RegsToPass;
1517193323Sed  SmallVector<SDValue, 8> MemOpChains;
1518193323Sed
1519193323Sed  // Walk the register/memloc assignments, inserting copies/loads.  In the case
1520193323Sed  // of tail call optimization, arguments are handled later.
1521193323Sed  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1522193323Sed       i != e;
1523193323Sed       ++i, ++realArgIdx) {
1524193323Sed    CCValAssign &VA = ArgLocs[i];
1525210299Sed    SDValue Arg = OutVals[realArgIdx];
1526198090Srdivacky    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1527221345Sdim    bool isByVal = Flags.isByVal();
1528193323Sed
1529193323Sed    // Promote the value if needed.
1530193323Sed    switch (VA.getLocInfo()) {
1531198090Srdivacky    default: llvm_unreachable("Unknown loc info!");
1532193323Sed    case CCValAssign::Full: break;
1533193323Sed    case CCValAssign::SExt:
1534193323Sed      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1535193323Sed      break;
1536193323Sed    case CCValAssign::ZExt:
1537193323Sed      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1538193323Sed      break;
1539193323Sed    case CCValAssign::AExt:
1540193323Sed      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1541193323Sed      break;
1542193323Sed    case CCValAssign::BCvt:
1543218893Sdim      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1544193323Sed      break;
1545193323Sed    }
1546193323Sed
1547198090Srdivacky    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1548193323Sed    if (VA.needsCustom()) {
1549194710Sed      if (VA.getLocVT() == MVT::v2f64) {
1550194710Sed        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1551194710Sed                                  DAG.getConstant(0, MVT::i32));
1552194710Sed        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1553194710Sed                                  DAG.getConstant(1, MVT::i32));
1554193323Sed
1555198090Srdivacky        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1556194710Sed                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1557194710Sed
1558194710Sed        VA = ArgLocs[++i]; // skip ahead to next loc
1559194710Sed        if (VA.isRegLoc()) {
1560198090Srdivacky          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1561194710Sed                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1562194710Sed        } else {
1563194710Sed          assert(VA.isMemLoc());
1564194710Sed
1565198090Srdivacky          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1566198090Srdivacky                                                 dl, DAG, VA, Flags));
1567194710Sed        }
1568194710Sed      } else {
1569198090Srdivacky        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1570194710Sed                         StackPtr, MemOpChains, Flags);
1571193323Sed      }
1572193323Sed    } else if (VA.isRegLoc()) {
1573251662Sdim      if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
1574251662Sdim        assert(VA.getLocVT() == MVT::i32 &&
1575251662Sdim               "unexpected calling convention register assignment");
1576251662Sdim        assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1577251662Sdim               "unexpected use of 'returned'");
1578251662Sdim        isThisReturn = true;
1579251662Sdim      }
1580193323Sed      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1581221345Sdim    } else if (isByVal) {
1582221345Sdim      assert(VA.isMemLoc());
1583221345Sdim      unsigned offset = 0;
1584221345Sdim
1585221345Sdim      // True if this byval aggregate will be split between registers
1586221345Sdim      // and memory.
1587251662Sdim      unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1588277320Sdim      unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1589251662Sdim
1590251662Sdim      if (CurByValIdx < ByValArgsCount) {
1591251662Sdim
1592251662Sdim        unsigned RegBegin, RegEnd;
1593251662Sdim        CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1594251662Sdim
1595221345Sdim        EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1596221345Sdim        unsigned int i, j;
1597251662Sdim        for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1598221345Sdim          SDValue Const = DAG.getConstant(4*i, MVT::i32);
1599221345Sdim          SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1600221345Sdim          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1601221345Sdim                                     MachinePointerInfo(),
1602261991Sdim                                     false, false, false,
1603261991Sdim                                     DAG.InferPtrAlignment(AddArg));
1604221345Sdim          MemOpChains.push_back(Load.getValue(1));
1605221345Sdim          RegsToPass.push_back(std::make_pair(j, Load));
1606221345Sdim        }
1607251662Sdim
1608251662Sdim        // If parameter size outsides register area, "offset" value
1609251662Sdim        // helps us to calculate stack slot for remained part properly.
1610251662Sdim        offset = RegEnd - RegBegin;
1611251662Sdim
1612251662Sdim        CCInfo.nextInRegsParam();
1613221345Sdim      }
1614221345Sdim
1615251662Sdim      if (Flags.getByValSize() > 4*offset) {
1616239462Sdim        unsigned LocMemOffset = VA.getLocMemOffset();
1617239462Sdim        SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
1618239462Sdim        SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
1619239462Sdim                                  StkPtrOff);
1620239462Sdim        SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
1621239462Sdim        SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
1622239462Sdim        SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
1623239462Sdim                                           MVT::i32);
1624239462Sdim        SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
1625221345Sdim
1626239462Sdim        SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1627239462Sdim        SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1628239462Sdim        MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1629276479Sdim                                          Ops));
1630239462Sdim      }
1631251662Sdim    } else if (!isSibCall) {
1632193323Sed      assert(VA.isMemLoc());
1633193323Sed
1634198090Srdivacky      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1635198090Srdivacky                                             dl, DAG, VA, Flags));
1636193323Sed    }
1637193323Sed  }
1638193323Sed
1639193323Sed  if (!MemOpChains.empty())
1640276479Sdim    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1641193323Sed
1642193323Sed  // Build a sequence of copy-to-reg nodes chained together with token chain
1643193323Sed  // and flag operands which copy the outgoing args into the appropriate regs.
1644193323Sed  SDValue InFlag;
1645210299Sed  // Tail call byval lowering might overwrite argument registers so in case of
1646210299Sed  // tail call optimization the copies to registers are lowered later.
1647210299Sed  if (!isTailCall)
1648210299Sed    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1649210299Sed      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1650210299Sed                               RegsToPass[i].second, InFlag);
1651210299Sed      InFlag = Chain.getValue(1);
1652210299Sed    }
1653210299Sed
1654210299Sed  // For tail calls lower the arguments to the 'real' stack slot.
1655210299Sed  if (isTailCall) {
1656210299Sed    // Force all the incoming stack arguments to be loaded from the stack
1657210299Sed    // before any new outgoing arguments are stored to the stack, because the
1658210299Sed    // outgoing stack slots may alias the incoming argument stack slots, and
1659210299Sed    // the alias isn't otherwise explicit. This is slightly more conservative
1660210299Sed    // than necessary, because it means that each store effectively depends
1661210299Sed    // on every argument instead of just those arguments it would clobber.
1662210299Sed
1663221345Sdim    // Do not flag preceding copytoreg stuff together with the following stuff.
1664210299Sed    InFlag = SDValue();
1665210299Sed    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1666210299Sed      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1667210299Sed                               RegsToPass[i].second, InFlag);
1668210299Sed      InFlag = Chain.getValue(1);
1669210299Sed    }
1670251662Sdim    InFlag = SDValue();
1671193323Sed  }
1672193323Sed
1673193323Sed  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1674193323Sed  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1675193323Sed  // node so that legalize doesn't hack it.
1676193323Sed  bool isDirect = false;
1677193323Sed  bool isARMFunc = false;
1678193323Sed  bool isLocalARMFunc = false;
1679199481Srdivacky  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1680207618Srdivacky
1681207618Srdivacky  if (EnableARMLongCalls) {
1682276479Sdim    assert((Subtarget->isTargetWindows() ||
1683276479Sdim            getTargetMachine().getRelocationModel() == Reloc::Static) &&
1684276479Sdim           "long-calls with non-static relocation model!");
1685207618Srdivacky    // Handle a global address or an external symbol. If it's not one of
1686207618Srdivacky    // those, the target's already in a register, so we don't need to do
1687207618Srdivacky    // anything extra.
1688207618Srdivacky    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1689207618Srdivacky      const GlobalValue *GV = G->getGlobal();
1690207618Srdivacky      // Create a constant pool entry for the callee address
1691218893Sdim      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1692226633Sdim      ARMConstantPoolValue *CPV =
1693226633Sdim        ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
1694226633Sdim
1695207618Srdivacky      // Get the address of the callee into a register
1696207618Srdivacky      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1697207618Srdivacky      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1698207618Srdivacky      Callee = DAG.getLoad(getPointerTy(), dl,
1699207618Srdivacky                           DAG.getEntryNode(), CPAddr,
1700218893Sdim                           MachinePointerInfo::getConstantPool(),
1701234353Sdim                           false, false, false, 0);
1702207618Srdivacky    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1703207618Srdivacky      const char *Sym = S->getSymbol();
1704207618Srdivacky
1705207618Srdivacky      // Create a constant pool entry for the callee address
1706218893Sdim      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1707226633Sdim      ARMConstantPoolValue *CPV =
1708226633Sdim        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
1709226633Sdim                                      ARMPCLabelIndex, 0);
1710207618Srdivacky      // Get the address of the callee into a register
1711207618Srdivacky      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1712207618Srdivacky      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1713207618Srdivacky      Callee = DAG.getLoad(getPointerTy(), dl,
1714207618Srdivacky                           DAG.getEntryNode(), CPAddr,
1715218893Sdim                           MachinePointerInfo::getConstantPool(),
1716234353Sdim                           false, false, false, 0);
1717207618Srdivacky    }
1718207618Srdivacky  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1719207618Srdivacky    const GlobalValue *GV = G->getGlobal();
1720193323Sed    isDirect = true;
1721198090Srdivacky    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1722276479Sdim    bool isStub = (isExt && Subtarget->isTargetMachO()) &&
1723193323Sed                   getTargetMachine().getRelocationModel() != Reloc::Static;
1724280031Sdim    isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1725193323Sed    // ARM call to a local ARM function is predicable.
1726210299Sed    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
1727193323Sed    // tBX takes a register source operand.
1728276479Sdim    if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1729276479Sdim      assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
1730276479Sdim      Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
1731280031Sdim                           DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
1732280031Sdim                                                      0, ARMII::MO_NONLAZY));
1733280031Sdim      Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
1734280031Sdim                           MachinePointerInfo::getGOT(), false, false, true, 0);
1735276479Sdim    } else if (Subtarget->isTargetCOFF()) {
1736276479Sdim      assert(Subtarget->isTargetWindows() &&
1737276479Sdim             "Windows is the only supported COFF target");
1738276479Sdim      unsigned TargetFlags = GV->hasDLLImportStorageClass()
1739276479Sdim                                 ? ARMII::MO_DLLIMPORT
1740276479Sdim                                 : ARMII::MO_NO_FLAG;
1741276479Sdim      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
1742276479Sdim                                          TargetFlags);
1743276479Sdim      if (GV->hasDLLImportStorageClass())
1744276479Sdim        Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
1745276479Sdim                             DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
1746276479Sdim                                         Callee), MachinePointerInfo::getGOT(),
1747276479Sdim                             false, false, false, 0);
1748218893Sdim    } else {
1749218893Sdim      // On ELF targets for PIC code, direct calls should go through the PLT
1750218893Sdim      unsigned OpFlags = 0;
1751218893Sdim      if (Subtarget->isTargetELF() &&
1752249423Sdim          getTargetMachine().getRelocationModel() == Reloc::PIC_)
1753218893Sdim        OpFlags = ARMII::MO_PLT;
1754218893Sdim      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
1755218893Sdim    }
1756193323Sed  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1757193323Sed    isDirect = true;
1758276479Sdim    bool isStub = Subtarget->isTargetMachO() &&
1759193323Sed                  getTargetMachine().getRelocationModel() != Reloc::Static;
1760280031Sdim    isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1761193323Sed    // tBX takes a register source operand.
1762193323Sed    const char *Sym = S->getSymbol();
1763198090Srdivacky    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1764218893Sdim      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1765226633Sdim      ARMConstantPoolValue *CPV =
1766226633Sdim        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
1767226633Sdim                                      ARMPCLabelIndex, 4);
1768193323Sed      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1769193323Sed      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1770193323Sed      Callee = DAG.getLoad(getPointerTy(), dl,
1771198892Srdivacky                           DAG.getEntryNode(), CPAddr,
1772218893Sdim                           MachinePointerInfo::getConstantPool(),
1773234353Sdim                           false, false, false, 0);
1774199481Srdivacky      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1775193323Sed      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1776193323Sed                           getPointerTy(), Callee, PICLabel);
1777218893Sdim    } else {
1778218893Sdim      unsigned OpFlags = 0;
1779218893Sdim      // On ELF targets for PIC code, direct calls should go through the PLT
1780218893Sdim      if (Subtarget->isTargetELF() &&
1781218893Sdim                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
1782218893Sdim        OpFlags = ARMII::MO_PLT;
1783218893Sdim      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
1784218893Sdim    }
1785193323Sed  }
1786193323Sed
1787193323Sed  // FIXME: handle tail calls differently.
1788193323Sed  unsigned CallOpc;
1789276479Sdim  bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
1790276479Sdim      AttributeSet::FunctionIndex, Attribute::MinSize);
1791193323Sed  if (Subtarget->isThumb()) {
1792198090Srdivacky    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1793193323Sed      CallOpc = ARMISD::CALL_NOLINK;
1794193323Sed    else
1795193323Sed      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1796193323Sed  } else {
1797243830Sdim    if (!isDirect && !Subtarget->hasV5TOps())
1798234353Sdim      CallOpc = ARMISD::CALL_NOLINK;
1799243830Sdim    else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
1800243830Sdim               // Emit regular call when code size is the priority
1801243830Sdim               !HasMinSizeAttr)
1802234353Sdim      // "mov lr, pc; b _foo" to avoid confusing the RSP
1803234353Sdim      CallOpc = ARMISD::CALL_NOLINK;
1804234353Sdim    else
1805234353Sdim      CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
1806193323Sed  }
1807193323Sed
1808193323Sed  std::vector<SDValue> Ops;
1809193323Sed  Ops.push_back(Chain);
1810193323Sed  Ops.push_back(Callee);
1811193323Sed
1812193323Sed  // Add argument registers to the end of the list so that they are known live
1813193323Sed  // into the call.
1814193323Sed  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1815193323Sed    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1816193323Sed                                  RegsToPass[i].second.getValueType()));
1817193323Sed
1818234353Sdim  // Add a register mask operand representing the call-preserved registers.
1819261991Sdim  if (!isTailCall) {
1820261991Sdim    const uint32_t *Mask;
1821280031Sdim    const TargetRegisterInfo *TRI =
1822280031Sdim        getTargetMachine().getSubtargetImpl()->getRegisterInfo();
1823261991Sdim    const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
1824261991Sdim    if (isThisReturn) {
1825261991Sdim      // For 'this' returns, use the R0-preserving mask if applicable
1826261991Sdim      Mask = ARI->getThisReturnPreservedMask(CallConv);
1827261991Sdim      if (!Mask) {
1828261991Sdim        // Set isThisReturn to false if the calling convention is not one that
1829261991Sdim        // allows 'returned' to be modeled in this way, so LowerCallResult does
1830261991Sdim        // not try to pass 'this' straight through
1831261991Sdim        isThisReturn = false;
1832261991Sdim        Mask = ARI->getCallPreservedMask(CallConv);
1833261991Sdim      }
1834261991Sdim    } else
1835261991Sdim      Mask = ARI->getCallPreservedMask(CallConv);
1836251662Sdim
1837261991Sdim    assert(Mask && "Missing call preserved mask for calling convention");
1838261991Sdim    Ops.push_back(DAG.getRegisterMask(Mask));
1839261991Sdim  }
1840234353Sdim
1841193323Sed  if (InFlag.getNode())
1842193323Sed    Ops.push_back(InFlag);
1843210299Sed
1844218893Sdim  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1845210299Sed  if (isTailCall)
1846276479Sdim    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
1847210299Sed
1848193323Sed  // Returns a chain and a flag for retval copy to use.
1849276479Sdim  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
1850193323Sed  InFlag = Chain.getValue(1);
1851193323Sed
1852193323Sed  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1853261991Sdim                             DAG.getIntPtrConstant(0, true), InFlag, dl);
1854198090Srdivacky  if (!Ins.empty())
1855193323Sed    InFlag = Chain.getValue(1);
1856193323Sed
1857193323Sed  // Handle result values, copying them out of physregs into vregs that we
1858193323Sed  // return.
1859251662Sdim  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1860251662Sdim                         InVals, isThisReturn,
1861251662Sdim                         isThisReturn ? OutVals[0] : SDValue());
1862193323Sed}
1863193323Sed
1864221345Sdim/// HandleByVal - Every parameter *after* a byval parameter is passed
1865221345Sdim/// on the stack.  Remember the next parameter register to allocate,
1866221345Sdim/// and then confiscate the rest of the parameter registers to insure
1867221345Sdim/// this.
1868221345Sdimvoid
1869243830SdimARMTargetLowering::HandleByVal(
1870243830Sdim    CCState *State, unsigned &size, unsigned Align) const {
1871221345Sdim  unsigned reg = State->AllocateReg(GPRArgRegs, 4);
1872221345Sdim  assert((State->getCallOrPrologue() == Prologue ||
1873221345Sdim          State->getCallOrPrologue() == Call) &&
1874221345Sdim         "unhandled ParmContext");
1875251662Sdim
1876251662Sdim  if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
1877243830Sdim    if (Subtarget->isAAPCS_ABI() && Align > 4) {
1878243830Sdim      unsigned AlignInRegs = Align / 4;
1879243830Sdim      unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
1880243830Sdim      for (unsigned i = 0; i < Waste; ++i)
1881243830Sdim        reg = State->AllocateReg(GPRArgRegs, 4);
1882221345Sdim    }
1883243830Sdim    if (reg != 0) {
1884251662Sdim      unsigned excess = 4 * (ARM::R4 - reg);
1885251662Sdim
1886251662Sdim      // Special case when NSAA != SP and parameter size greater than size of
1887251662Sdim      // all remained GPR regs. In that case we can't split parameter, we must
1888251662Sdim      // send it to stack. We also must set NCRN to R4, so waste all
1889251662Sdim      // remained registers.
1890276479Sdim      const unsigned NSAAOffset = State->getNextStackOffset();
1891251662Sdim      if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
1892251662Sdim        while (State->AllocateReg(GPRArgRegs, 4))
1893251662Sdim          ;
1894251662Sdim        return;
1895251662Sdim      }
1896251662Sdim
1897251662Sdim      // First register for byval parameter is the first register that wasn't
1898251662Sdim      // allocated before this method call, so it would be "reg".
1899251662Sdim      // If parameter is small enough to be saved in range [reg, r4), then
1900251662Sdim      // the end (first after last) register would be reg + param-size-in-regs,
1901251662Sdim      // else parameter would be splitted between registers and stack,
1902251662Sdim      // end register would be r4 in this case.
1903251662Sdim      unsigned ByValRegBegin = reg;
1904261991Sdim      unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
1905251662Sdim      State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
1906251662Sdim      // Note, first register is allocated in the beginning of function already,
1907251662Sdim      // allocate remained amount of registers we need.
1908251662Sdim      for (unsigned i = reg+1; i != ByValRegEnd; ++i)
1909251662Sdim        State->AllocateReg(GPRArgRegs, 4);
1910276479Sdim      // A byval parameter that is split between registers and memory needs its
1911276479Sdim      // size truncated here.
1912276479Sdim      // In the case where the entire structure fits in registers, we set the
1913276479Sdim      // size in memory to zero.
1914276479Sdim      if (size < excess)
1915276479Sdim        size = 0;
1916276479Sdim      else
1917276479Sdim        size -= excess;
1918243830Sdim    }
1919221345Sdim  }
1920221345Sdim}
1921221345Sdim
1922210299Sed/// MatchingStackOffset - Return true if the given stack call argument is
1923210299Sed/// already available in the same position (relatively) of the caller's
1924210299Sed/// incoming argument stack.
1925210299Sedstatic
1926210299Sedbool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1927210299Sed                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1928234353Sdim                         const TargetInstrInfo *TII) {
1929210299Sed  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1930210299Sed  int FI = INT_MAX;
1931210299Sed  if (Arg.getOpcode() == ISD::CopyFromReg) {
1932210299Sed    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1933218893Sdim    if (!TargetRegisterInfo::isVirtualRegister(VR))
1934210299Sed      return false;
1935210299Sed    MachineInstr *Def = MRI->getVRegDef(VR);
1936210299Sed    if (!Def)
1937210299Sed      return false;
1938210299Sed    if (!Flags.isByVal()) {
1939210299Sed      if (!TII->isLoadFromStackSlot(Def, FI))
1940210299Sed        return false;
1941210299Sed    } else {
1942210299Sed      return false;
1943210299Sed    }
1944210299Sed  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1945210299Sed    if (Flags.isByVal())
1946210299Sed      // ByVal argument is passed in as a pointer but it's now being
1947210299Sed      // dereferenced. e.g.
1948210299Sed      // define @foo(%struct.X* %A) {
1949210299Sed      //   tail call @bar(%struct.X* byval %A)
1950210299Sed      // }
1951210299Sed      return false;
1952210299Sed    SDValue Ptr = Ld->getBasePtr();
1953210299Sed    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1954210299Sed    if (!FINode)
1955210299Sed      return false;
1956210299Sed    FI = FINode->getIndex();
1957210299Sed  } else
1958210299Sed    return false;
1959210299Sed
1960210299Sed  assert(FI != INT_MAX);
1961210299Sed  if (!MFI->isFixedObjectIndex(FI))
1962210299Sed    return false;
1963210299Sed  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1964210299Sed}
1965210299Sed
1966210299Sed/// IsEligibleForTailCallOptimization - Check whether the call is eligible
1967210299Sed/// for tail call optimization. Targets which want to do tail call
1968210299Sed/// optimization should implement this function.
1969210299Sedbool
1970210299SedARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
1971210299Sed                                                     CallingConv::ID CalleeCC,
1972210299Sed                                                     bool isVarArg,
1973210299Sed                                                     bool isCalleeStructRet,
1974210299Sed                                                     bool isCallerStructRet,
1975210299Sed                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1976210299Sed                                    const SmallVectorImpl<SDValue> &OutVals,
1977210299Sed                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1978210299Sed                                                     SelectionDAG& DAG) const {
1979210299Sed  const Function *CallerF = DAG.getMachineFunction().getFunction();
1980210299Sed  CallingConv::ID CallerCC = CallerF->getCallingConv();
1981210299Sed  bool CCMatch = CallerCC == CalleeCC;
1982210299Sed
1983210299Sed  // Look for obvious safe cases to perform tail call optimization that do not
1984210299Sed  // require ABI changes. This is what gcc calls sibcall.
1985210299Sed
1986210299Sed  // Do not sibcall optimize vararg calls unless the call site is not passing
1987210299Sed  // any arguments.
1988210299Sed  if (isVarArg && !Outs.empty())
1989210299Sed    return false;
1990210299Sed
1991261991Sdim  // Exception-handling functions need a special set of instructions to indicate
1992261991Sdim  // a return to the hardware. Tail-calling another function would probably
1993261991Sdim  // break this.
1994261991Sdim  if (CallerF->hasFnAttribute("interrupt"))
1995261991Sdim    return false;
1996261991Sdim
1997210299Sed  // Also avoid sibcall optimization if either caller or callee uses struct
1998210299Sed  // return semantics.
1999210299Sed  if (isCalleeStructRet || isCallerStructRet)
2000210299Sed    return false;
2001210299Sed
2002210299Sed  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
2003224145Sdim  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
2004224145Sdim  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
2005224145Sdim  // support in the assembler and linker to be used. This would need to be
2006224145Sdim  // fixed to fully support tail calls in Thumb1.
2007224145Sdim  //
2008210299Sed  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
2009210299Sed  // LR.  This means if we need to reload LR, it takes an extra instructions,
2010210299Sed  // which outweighs the value of the tail call; but here we don't know yet
2011210299Sed  // whether LR is going to be used.  Probably the right approach is to
2012218893Sdim  // generate the tail call here and turn it back into CALL/RET in
2013210299Sed  // emitEpilogue if LR is used.
2014210299Sed
2015210299Sed  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
2016210299Sed  // but we need to make sure there are enough registers; the only valid
2017210299Sed  // registers are the 4 used for parameters.  We don't currently do this
2018210299Sed  // case.
2019218893Sdim  if (Subtarget->isThumb1Only())
2020218893Sdim    return false;
2021210299Sed
2022280031Sdim  // Externally-defined functions with weak linkage should not be
2023280031Sdim  // tail-called on ARM when the OS does not support dynamic
2024280031Sdim  // pre-emption of symbols, as the AAELF spec requires normal calls
2025280031Sdim  // to undefined weak functions to be replaced with a NOP or jump to the
2026280031Sdim  // next instruction. The behaviour of branch instructions in this
2027280031Sdim  // situation (as used for tail calls) is implementation-defined, so we
2028280031Sdim  // cannot rely on the linker replacing the tail call with a return.
2029280031Sdim  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2030280031Sdim    const GlobalValue *GV = G->getGlobal();
2031280031Sdim    const Triple TT(getTargetMachine().getTargetTriple());
2032280031Sdim    if (GV->hasExternalWeakLinkage() &&
2033280031Sdim        (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2034280031Sdim      return false;
2035280031Sdim  }
2036280031Sdim
2037210299Sed  // If the calling conventions do not match, then we'd better make sure the
2038210299Sed  // results are returned in the same way as what the caller expects.
2039210299Sed  if (!CCMatch) {
2040210299Sed    SmallVector<CCValAssign, 16> RVLocs1;
2041280031Sdim    ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
2042280031Sdim                       *DAG.getContext(), Call);
2043210299Sed    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
2044210299Sed
2045210299Sed    SmallVector<CCValAssign, 16> RVLocs2;
2046280031Sdim    ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
2047280031Sdim                       *DAG.getContext(), Call);
2048210299Sed    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
2049210299Sed
2050210299Sed    if (RVLocs1.size() != RVLocs2.size())
2051210299Sed      return false;
2052210299Sed    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
2053210299Sed      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
2054210299Sed        return false;
2055210299Sed      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
2056210299Sed        return false;
2057210299Sed      if (RVLocs1[i].isRegLoc()) {
2058210299Sed        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
2059210299Sed          return false;
2060210299Sed      } else {
2061210299Sed        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
2062210299Sed          return false;
2063210299Sed      }
2064210299Sed    }
2065210299Sed  }
2066210299Sed
2067243830Sdim  // If Caller's vararg or byval argument has been split between registers and
2068243830Sdim  // stack, do not perform tail call, since part of the argument is in caller's
2069243830Sdim  // local frame.
2070243830Sdim  const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
2071243830Sdim                                      getInfo<ARMFunctionInfo>();
2072251662Sdim  if (AFI_Caller->getArgRegsSaveSize())
2073243830Sdim    return false;
2074243830Sdim
2075210299Sed  // If the callee takes no arguments then go on to check the results of the
2076210299Sed  // call.
2077210299Sed  if (!Outs.empty()) {
2078210299Sed    // Check if stack adjustment is needed. For now, do not do this if any
2079210299Sed    // argument is passed on the stack.
2080210299Sed    SmallVector<CCValAssign, 16> ArgLocs;
2081280031Sdim    ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
2082280031Sdim                      *DAG.getContext(), Call);
2083210299Sed    CCInfo.AnalyzeCallOperands(Outs,
2084210299Sed                               CCAssignFnForNode(CalleeCC, false, isVarArg));
2085210299Sed    if (CCInfo.getNextStackOffset()) {
2086210299Sed      MachineFunction &MF = DAG.getMachineFunction();
2087210299Sed
2088210299Sed      // Check if the arguments are already laid out in the right way as
2089210299Sed      // the caller's fixed stack objects.
2090210299Sed      MachineFrameInfo *MFI = MF.getFrameInfo();
2091210299Sed      const MachineRegisterInfo *MRI = &MF.getRegInfo();
2092280031Sdim      const TargetInstrInfo *TII =
2093280031Sdim          getTargetMachine().getSubtargetImpl()->getInstrInfo();
2094210299Sed      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2095210299Sed           i != e;
2096210299Sed           ++i, ++realArgIdx) {
2097210299Sed        CCValAssign &VA = ArgLocs[i];
2098210299Sed        EVT RegVT = VA.getLocVT();
2099210299Sed        SDValue Arg = OutVals[realArgIdx];
2100210299Sed        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2101210299Sed        if (VA.getLocInfo() == CCValAssign::Indirect)
2102210299Sed          return false;
2103210299Sed        if (VA.needsCustom()) {
2104210299Sed          // f64 and vector types are split into multiple registers or
2105210299Sed          // register/stack-slot combinations.  The types will not match
2106210299Sed          // the registers; give up on memory f64 refs until we figure
2107210299Sed          // out what to do about this.
2108210299Sed          if (!VA.isRegLoc())
2109210299Sed            return false;
2110210299Sed          if (!ArgLocs[++i].isRegLoc())
2111218893Sdim            return false;
2112210299Sed          if (RegVT == MVT::v2f64) {
2113210299Sed            if (!ArgLocs[++i].isRegLoc())
2114210299Sed              return false;
2115210299Sed            if (!ArgLocs[++i].isRegLoc())
2116210299Sed              return false;
2117210299Sed          }
2118210299Sed        } else if (!VA.isRegLoc()) {
2119210299Sed          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2120210299Sed                                   MFI, MRI, TII))
2121210299Sed            return false;
2122210299Sed        }
2123210299Sed      }
2124210299Sed    }
2125210299Sed  }
2126210299Sed
2127210299Sed  return true;
2128210299Sed}
2129210299Sed
2130249423Sdimbool
2131249423SdimARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2132249423Sdim                                  MachineFunction &MF, bool isVarArg,
2133249423Sdim                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
2134249423Sdim                                  LLVMContext &Context) const {
2135249423Sdim  SmallVector<CCValAssign, 16> RVLocs;
2136280031Sdim  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2137249423Sdim  return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
2138249423Sdim                                                    isVarArg));
2139249423Sdim}
2140249423Sdim
2141261991Sdimstatic SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2142261991Sdim                                    SDLoc DL, SelectionDAG &DAG) {
2143261991Sdim  const MachineFunction &MF = DAG.getMachineFunction();
2144261991Sdim  const Function *F = MF.getFunction();
2145261991Sdim
2146261991Sdim  StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2147261991Sdim
2148261991Sdim  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2149261991Sdim  // version of the "preferred return address". These offsets affect the return
2150261991Sdim  // instruction if this is a return from PL1 without hypervisor extensions.
2151261991Sdim  //    IRQ/FIQ: +4     "subs pc, lr, #4"
2152261991Sdim  //    SWI:     0      "subs pc, lr, #0"
2153261991Sdim  //    ABORT:   +4     "subs pc, lr, #4"
2154261991Sdim  //    UNDEF:   +4/+2  "subs pc, lr, #0"
2155261991Sdim  // UNDEF varies depending on where the exception came from ARM or Thumb
2156261991Sdim  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2157261991Sdim
2158261991Sdim  int64_t LROffset;
2159261991Sdim  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2160261991Sdim      IntKind == "ABORT")
2161261991Sdim    LROffset = 4;
2162261991Sdim  else if (IntKind == "SWI" || IntKind == "UNDEF")
2163261991Sdim    LROffset = 0;
2164261991Sdim  else
2165261991Sdim    report_fatal_error("Unsupported interrupt attribute. If present, value "
2166261991Sdim                       "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2167261991Sdim
2168261991Sdim  RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
2169261991Sdim
2170276479Sdim  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2171261991Sdim}
2172261991Sdim
2173198090SrdivackySDValue
2174198090SrdivackyARMTargetLowering::LowerReturn(SDValue Chain,
2175198090Srdivacky                               CallingConv::ID CallConv, bool isVarArg,
2176198090Srdivacky                               const SmallVectorImpl<ISD::OutputArg> &Outs,
2177210299Sed                               const SmallVectorImpl<SDValue> &OutVals,
2178261991Sdim                               SDLoc dl, SelectionDAG &DAG) const {
2179193323Sed
2180193323Sed  // CCValAssign - represent the assignment of the return value to a location.
2181193323Sed  SmallVector<CCValAssign, 16> RVLocs;
2182193323Sed
2183193323Sed  // CCState - Info about the registers and stack slots.
2184280031Sdim  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2185280031Sdim                    *DAG.getContext(), Call);
2186193323Sed
2187198090Srdivacky  // Analyze outgoing return values.
2188198090Srdivacky  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
2189198090Srdivacky                                               isVarArg));
2190193323Sed
2191193323Sed  SDValue Flag;
2192249423Sdim  SmallVector<SDValue, 4> RetOps;
2193249423Sdim  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2194276479Sdim  bool isLittleEndian = Subtarget->isLittle();
2195193323Sed
2196280031Sdim  MachineFunction &MF = DAG.getMachineFunction();
2197280031Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2198280031Sdim  AFI->setReturnRegsCount(RVLocs.size());
2199280031Sdim
2200193323Sed  // Copy the result values into the output registers.
2201193323Sed  for (unsigned i = 0, realRVLocIdx = 0;
2202193323Sed       i != RVLocs.size();
2203193323Sed       ++i, ++realRVLocIdx) {
2204193323Sed    CCValAssign &VA = RVLocs[i];
2205193323Sed    assert(VA.isRegLoc() && "Can only return in registers!");
2206193323Sed
2207210299Sed    SDValue Arg = OutVals[realRVLocIdx];
2208193323Sed
2209193323Sed    switch (VA.getLocInfo()) {
2210198090Srdivacky    default: llvm_unreachable("Unknown loc info!");
2211193323Sed    case CCValAssign::Full: break;
2212193323Sed    case CCValAssign::BCvt:
2213218893Sdim      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2214193323Sed      break;
2215193323Sed    }
2216193323Sed
2217193323Sed    if (VA.needsCustom()) {
2218194710Sed      if (VA.getLocVT() == MVT::v2f64) {
2219194710Sed        // Extract the first half and return it in two registers.
2220194710Sed        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2221194710Sed                                   DAG.getConstant(0, MVT::i32));
2222199481Srdivacky        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2223194710Sed                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
2224194710Sed
2225276479Sdim        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2226276479Sdim                                 HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2227276479Sdim                                 Flag);
2228194710Sed        Flag = Chain.getValue(1);
2229249423Sdim        RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2230194710Sed        VA = RVLocs[++i]; // skip ahead to next loc
2231194710Sed        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2232276479Sdim                                 HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2233276479Sdim                                 Flag);
2234194710Sed        Flag = Chain.getValue(1);
2235249423Sdim        RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2236194710Sed        VA = RVLocs[++i]; // skip ahead to next loc
2237194710Sed
2238194710Sed        // Extract the 2nd half and fall through to handle it as an f64 value.
2239194710Sed        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2240194710Sed                          DAG.getConstant(1, MVT::i32));
2241194710Sed      }
2242194710Sed      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
2243194710Sed      // available.
2244199481Srdivacky      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2245276479Sdim                                  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2246276479Sdim      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2247276479Sdim                               fmrrd.getValue(isLittleEndian ? 0 : 1),
2248276479Sdim                               Flag);
2249193323Sed      Flag = Chain.getValue(1);
2250249423Sdim      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2251193323Sed      VA = RVLocs[++i]; // skip ahead to next loc
2252276479Sdim      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2253276479Sdim                               fmrrd.getValue(isLittleEndian ? 1 : 0),
2254193323Sed                               Flag);
2255193323Sed    } else
2256193323Sed      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2257193323Sed
2258193323Sed    // Guarantee that all emitted copies are
2259193323Sed    // stuck together, avoiding something bad.
2260193323Sed    Flag = Chain.getValue(1);
2261249423Sdim    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2262193323Sed  }
2263193323Sed
2264249423Sdim  // Update chain and glue.
2265249423Sdim  RetOps[0] = Chain;
2266193323Sed  if (Flag.getNode())
2267249423Sdim    RetOps.push_back(Flag);
2268193323Sed
2269261991Sdim  // CPUs which aren't M-class use a special sequence to return from
2270261991Sdim  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2271261991Sdim  // though we use "subs pc, lr, #N").
2272261991Sdim  //
2273261991Sdim  // M-class CPUs actually use a normal return sequence with a special
2274261991Sdim  // (hardware-provided) value in LR, so the normal code path works.
2275261991Sdim  if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2276261991Sdim      !Subtarget->isMClass()) {
2277261991Sdim    if (Subtarget->isThumb1Only())
2278261991Sdim      report_fatal_error("interrupt attribute is not supported in Thumb1");
2279261991Sdim    return LowerInterruptReturn(RetOps, dl, DAG);
2280261991Sdim  }
2281261991Sdim
2282276479Sdim  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2283193323Sed}
2284193323Sed
2285234353Sdimbool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2286218893Sdim  if (N->getNumValues() != 1)
2287218893Sdim    return false;
2288218893Sdim  if (!N->hasNUsesOfValue(1, 0))
2289218893Sdim    return false;
2290218893Sdim
2291234353Sdim  SDValue TCChain = Chain;
2292234353Sdim  SDNode *Copy = *N->use_begin();
2293234353Sdim  if (Copy->getOpcode() == ISD::CopyToReg) {
2294234353Sdim    // If the copy has a glue operand, we conservatively assume it isn't safe to
2295234353Sdim    // perform a tail call.
2296234353Sdim    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2297234353Sdim      return false;
2298234353Sdim    TCChain = Copy->getOperand(0);
2299234353Sdim  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2300234353Sdim    SDNode *VMov = Copy;
2301218893Sdim    // f64 returned in a pair of GPRs.
2302234353Sdim    SmallPtrSet<SDNode*, 2> Copies;
2303234353Sdim    for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2304218893Sdim         UI != UE; ++UI) {
2305218893Sdim      if (UI->getOpcode() != ISD::CopyToReg)
2306218893Sdim        return false;
2307234353Sdim      Copies.insert(*UI);
2308218893Sdim    }
2309234353Sdim    if (Copies.size() > 2)
2310234353Sdim      return false;
2311234353Sdim
2312234353Sdim    for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2313234353Sdim         UI != UE; ++UI) {
2314234353Sdim      SDValue UseChain = UI->getOperand(0);
2315234353Sdim      if (Copies.count(UseChain.getNode()))
2316234353Sdim        // Second CopyToReg
2317234353Sdim        Copy = *UI;
2318280031Sdim      else {
2319280031Sdim        // We are at the top of this chain.
2320280031Sdim        // If the copy has a glue operand, we conservatively assume it
2321280031Sdim        // isn't safe to perform a tail call.
2322280031Sdim        if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2323280031Sdim          return false;
2324234353Sdim        // First CopyToReg
2325234353Sdim        TCChain = UseChain;
2326280031Sdim      }
2327234353Sdim    }
2328234353Sdim  } else if (Copy->getOpcode() == ISD::BITCAST) {
2329218893Sdim    // f32 returned in a single GPR.
2330234353Sdim    if (!Copy->hasOneUse())
2331218893Sdim      return false;
2332234353Sdim    Copy = *Copy->use_begin();
2333234353Sdim    if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2334218893Sdim      return false;
2335280031Sdim    // If the copy has a glue operand, we conservatively assume it isn't safe to
2336280031Sdim    // perform a tail call.
2337280031Sdim    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2338280031Sdim      return false;
2339261991Sdim    TCChain = Copy->getOperand(0);
2340218893Sdim  } else {
2341218893Sdim    return false;
2342218893Sdim  }
2343218893Sdim
2344218893Sdim  bool HasRet = false;
2345234353Sdim  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2346234353Sdim       UI != UE; ++UI) {
2347261991Sdim    if (UI->getOpcode() != ARMISD::RET_FLAG &&
2348261991Sdim        UI->getOpcode() != ARMISD::INTRET_FLAG)
2349234353Sdim      return false;
2350234353Sdim    HasRet = true;
2351218893Sdim  }
2352218893Sdim
2353234353Sdim  if (!HasRet)
2354234353Sdim    return false;
2355234353Sdim
2356234353Sdim  Chain = TCChain;
2357234353Sdim  return true;
2358218893Sdim}
2359218893Sdim
2360221345Sdimbool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2361276479Sdim  if (!Subtarget->supportsTailCall())
2362221345Sdim    return false;
2363221345Sdim
2364276479Sdim  if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
2365221345Sdim    return false;
2366221345Sdim
2367221345Sdim  return !Subtarget->isThumb1Only();
2368221345Sdim}
2369221345Sdim
2370193323Sed// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2371198090Srdivacky// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2372193323Sed// one of the above mentioned nodes. It has to be wrapped because otherwise
2373193323Sed// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2374193323Sed// be used to form addressing mode. These wrapped nodes will be selected
2375193323Sed// into MOVi.
2376193323Sedstatic SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
2377198090Srdivacky  EVT PtrVT = Op.getValueType();
2378193323Sed  // FIXME there is no actual debug info here
2379261991Sdim  SDLoc dl(Op);
2380193323Sed  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2381193323Sed  SDValue Res;
2382193323Sed  if (CP->isMachineConstantPoolEntry())
2383193323Sed    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2384193323Sed                                    CP->getAlignment());
2385193323Sed  else
2386193323Sed    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2387193323Sed                                    CP->getAlignment());
2388193323Sed  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2389193323Sed}
2390193323Sed
2391212904Sdimunsigned ARMTargetLowering::getJumpTableEncoding() const {
2392212904Sdim  return MachineJumpTableInfo::EK_Inline;
2393212904Sdim}
2394212904Sdim
2395207618SrdivackySDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2396207618Srdivacky                                             SelectionDAG &DAG) const {
2397199481Srdivacky  MachineFunction &MF = DAG.getMachineFunction();
2398199481Srdivacky  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2399199481Srdivacky  unsigned ARMPCLabelIndex = 0;
2400261991Sdim  SDLoc DL(Op);
2401198892Srdivacky  EVT PtrVT = getPointerTy();
2402207618Srdivacky  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2403198892Srdivacky  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2404198892Srdivacky  SDValue CPAddr;
2405198892Srdivacky  if (RelocM == Reloc::Static) {
2406198892Srdivacky    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2407198892Srdivacky  } else {
2408198892Srdivacky    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2409218893Sdim    ARMPCLabelIndex = AFI->createPICLabelUId();
2410226633Sdim    ARMConstantPoolValue *CPV =
2411226633Sdim      ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2412226633Sdim                                      ARMCP::CPBlockAddress, PCAdj);
2413198892Srdivacky    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2414198892Srdivacky  }
2415198892Srdivacky  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2416198892Srdivacky  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
2417218893Sdim                               MachinePointerInfo::getConstantPool(),
2418234353Sdim                               false, false, false, 0);
2419198892Srdivacky  if (RelocM == Reloc::Static)
2420198892Srdivacky    return Result;
2421199481Srdivacky  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2422198892Srdivacky  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2423198892Srdivacky}
2424198892Srdivacky
2425193323Sed// Lower ISD::GlobalTLSAddress using the "general dynamic" model
2426193323SedSDValue
2427193323SedARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2428207618Srdivacky                                                 SelectionDAG &DAG) const {
2429261991Sdim  SDLoc dl(GA);
2430198090Srdivacky  EVT PtrVT = getPointerTy();
2431193323Sed  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2432199481Srdivacky  MachineFunction &MF = DAG.getMachineFunction();
2433199481Srdivacky  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2434218893Sdim  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2435193323Sed  ARMConstantPoolValue *CPV =
2436226633Sdim    ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2437226633Sdim                                    ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2438193323Sed  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2439193323Sed  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2440198892Srdivacky  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
2441218893Sdim                         MachinePointerInfo::getConstantPool(),
2442234353Sdim                         false, false, false, 0);
2443193323Sed  SDValue Chain = Argument.getValue(1);
2444193323Sed
2445199481Srdivacky  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2446193323Sed  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2447193323Sed
2448193323Sed  // call __tls_get_addr.
2449193323Sed  ArgListTy Args;
2450193323Sed  ArgListEntry Entry;
2451193323Sed  Entry.Node = Argument;
2452226633Sdim  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2453193323Sed  Args.push_back(Entry);
2454276479Sdim
2455193323Sed  // FIXME: is there useful debug info available here?
2456276479Sdim  TargetLowering::CallLoweringInfo CLI(DAG);
2457276479Sdim  CLI.setDebugLoc(dl).setChain(Chain)
2458276479Sdim    .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
2459276479Sdim               DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
2460276479Sdim               0);
2461276479Sdim
2462239462Sdim  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2463193323Sed  return CallResult.first;
2464193323Sed}
2465193323Sed
2466193323Sed// Lower ISD::GlobalTLSAddress using the "initial exec" or
2467193323Sed// "local exec" model.
2468193323SedSDValue
2469193323SedARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2470239462Sdim                                        SelectionDAG &DAG,
2471239462Sdim                                        TLSModel::Model model) const {
2472207618Srdivacky  const GlobalValue *GV = GA->getGlobal();
2473261991Sdim  SDLoc dl(GA);
2474193323Sed  SDValue Offset;
2475193323Sed  SDValue Chain = DAG.getEntryNode();
2476198090Srdivacky  EVT PtrVT = getPointerTy();
2477193323Sed  // Get the Thread Pointer
2478193323Sed  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2479193323Sed
2480239462Sdim  if (model == TLSModel::InitialExec) {
2481199481Srdivacky    MachineFunction &MF = DAG.getMachineFunction();
2482199481Srdivacky    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2483218893Sdim    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2484199481Srdivacky    // Initial exec model.
2485193323Sed    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2486193323Sed    ARMConstantPoolValue *CPV =
2487226633Sdim      ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2488226633Sdim                                      ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2489226633Sdim                                      true);
2490193323Sed    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2491193323Sed    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2492198892Srdivacky    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2493218893Sdim                         MachinePointerInfo::getConstantPool(),
2494234353Sdim                         false, false, false, 0);
2495193323Sed    Chain = Offset.getValue(1);
2496193323Sed
2497199481Srdivacky    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2498193323Sed    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2499193323Sed
2500198892Srdivacky    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2501218893Sdim                         MachinePointerInfo::getConstantPool(),
2502234353Sdim                         false, false, false, 0);
2503193323Sed  } else {
2504193323Sed    // local exec model
2505239462Sdim    assert(model == TLSModel::LocalExec);
2506226633Sdim    ARMConstantPoolValue *CPV =
2507226633Sdim      ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2508193323Sed    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2509193323Sed    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2510198892Srdivacky    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2511218893Sdim                         MachinePointerInfo::getConstantPool(),
2512234353Sdim                         false, false, false, 0);
2513193323Sed  }
2514193323Sed
2515193323Sed  // The address of the thread local variable is the add of the thread
2516193323Sed  // pointer with the offset of the variable.
2517193323Sed  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2518193323Sed}
2519193323Sed
2520193323SedSDValue
2521207618SrdivackyARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2522193323Sed  // TODO: implement the "local dynamic" model
2523193323Sed  assert(Subtarget->isTargetELF() &&
2524193323Sed         "TLS not implemented for non-ELF targets");
2525193323Sed  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2526239462Sdim
2527239462Sdim  TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2528239462Sdim
2529239462Sdim  switch (model) {
2530239462Sdim    case TLSModel::GeneralDynamic:
2531239462Sdim    case TLSModel::LocalDynamic:
2532239462Sdim      return LowerToTLSGeneralDynamicModel(GA, DAG);
2533239462Sdim    case TLSModel::InitialExec:
2534239462Sdim    case TLSModel::LocalExec:
2535239462Sdim      return LowerToTLSExecModels(GA, DAG, model);
2536239462Sdim  }
2537239462Sdim  llvm_unreachable("bogus TLS model");
2538193323Sed}
2539193323Sed
2540193323SedSDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
2541207618Srdivacky                                                 SelectionDAG &DAG) const {
2542198090Srdivacky  EVT PtrVT = getPointerTy();
2543261991Sdim  SDLoc dl(Op);
2544207618Srdivacky  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2545249423Sdim  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2546193323Sed    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
2547193323Sed    ARMConstantPoolValue *CPV =
2548226633Sdim      ARMConstantPoolConstant::Create(GV,
2549226633Sdim                                      UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
2550193323Sed    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2551193323Sed    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2552193323Sed    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
2553198090Srdivacky                                 CPAddr,
2554218893Sdim                                 MachinePointerInfo::getConstantPool(),
2555234353Sdim                                 false, false, false, 0);
2556193323Sed    SDValue Chain = Result.getValue(1);
2557193323Sed    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2558193323Sed    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
2559193323Sed    if (!UseGOTOFF)
2560198090Srdivacky      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
2561234353Sdim                           MachinePointerInfo::getGOT(),
2562234353Sdim                           false, false, false, 0);
2563193323Sed    return Result;
2564218893Sdim  }
2565218893Sdim
2566218893Sdim  // If we have T2 ops, we can materialize the address directly via movt/movw
2567218893Sdim  // pair. This is always cheaper.
2568276479Sdim  if (Subtarget->useMovt(DAG.getMachineFunction())) {
2569218893Sdim    ++NumMovwMovt;
2570218893Sdim    // FIXME: Once remat is capable of dealing with instructions with register
2571218893Sdim    // operands, expand this into two nodes.
2572218893Sdim    return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
2573218893Sdim                       DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2574193323Sed  } else {
2575218893Sdim    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
2576218893Sdim    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2577218893Sdim    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2578218893Sdim                       MachinePointerInfo::getConstantPool(),
2579234353Sdim                       false, false, false, 0);
2580193323Sed  }
2581193323Sed}
2582193323Sed
2583193323SedSDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
2584207618Srdivacky                                                    SelectionDAG &DAG) const {
2585198090Srdivacky  EVT PtrVT = getPointerTy();
2586261991Sdim  SDLoc dl(Op);
2587207618Srdivacky  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2588193323Sed  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2589218893Sdim
2590276479Sdim  if (Subtarget->useMovt(DAG.getMachineFunction()))
2591218893Sdim    ++NumMovwMovt;
2592218893Sdim
2593276479Sdim  // FIXME: Once remat is capable of dealing with instructions with register
2594276479Sdim  // operands, expand this into multiple nodes
2595276479Sdim  unsigned Wrapper =
2596276479Sdim      RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
2597218893Sdim
2598276479Sdim  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
2599276479Sdim  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
2600193323Sed
2601276479Sdim  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
2602276479Sdim    Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
2603276479Sdim                         MachinePointerInfo::getGOT(), false, false, false, 0);
2604276479Sdim  return Result;
2605276479Sdim}
2606193323Sed
2607276479SdimSDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
2608276479Sdim                                                     SelectionDAG &DAG) const {
2609276479Sdim  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
2610276479Sdim  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
2611276479Sdim         "Windows on ARM expects to use movw/movt");
2612198090Srdivacky
2613276479Sdim  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2614276479Sdim  const ARMII::TOF TargetFlags =
2615276479Sdim    (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
2616276479Sdim  EVT PtrVT = getPointerTy();
2617276479Sdim  SDValue Result;
2618276479Sdim  SDLoc DL(Op);
2619193323Sed
2620276479Sdim  ++NumMovwMovt;
2621276479Sdim
2622276479Sdim  // FIXME: Once remat is capable of dealing with instructions with register
2623276479Sdim  // operands, expand this into two nodes.
2624276479Sdim  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
2625276479Sdim                       DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
2626276479Sdim                                                  TargetFlags));
2627276479Sdim  if (GV->hasDLLImportStorageClass())
2628276479Sdim    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2629276479Sdim                         MachinePointerInfo::getGOT(), false, false, false, 0);
2630193323Sed  return Result;
2631193323Sed}
2632193323Sed
2633193323SedSDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
2634207618Srdivacky                                                    SelectionDAG &DAG) const {
2635193323Sed  assert(Subtarget->isTargetELF() &&
2636193323Sed         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
2637199481Srdivacky  MachineFunction &MF = DAG.getMachineFunction();
2638199481Srdivacky  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2639218893Sdim  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2640198090Srdivacky  EVT PtrVT = getPointerTy();
2641261991Sdim  SDLoc dl(Op);
2642193323Sed  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2643226633Sdim  ARMConstantPoolValue *CPV =
2644226633Sdim    ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
2645226633Sdim                                  ARMPCLabelIndex, PCAdj);
2646193323Sed  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2647193323Sed  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2648198090Srdivacky  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2649218893Sdim                               MachinePointerInfo::getConstantPool(),
2650234353Sdim                               false, false, false, 0);
2651199481Srdivacky  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2652193323Sed  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2653193323Sed}
2654193323Sed
2655193323SedSDValue
2656208599SrdivackyARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
2657261991Sdim  SDLoc dl(Op);
2658210299Sed  SDValue Val = DAG.getConstant(0, MVT::i32);
2659226633Sdim  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
2660226633Sdim                     DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
2661208599Srdivacky                     Op.getOperand(1), Val);
2662208599Srdivacky}
2663208599Srdivacky
2664208599SrdivackySDValue
2665208599SrdivackyARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
2666261991Sdim  SDLoc dl(Op);
2667208599Srdivacky  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
2668208599Srdivacky                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
2669208599Srdivacky}
2670208599Srdivacky
2671208599SrdivackySDValue
2672203954SrdivackyARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
2673210299Sed                                          const ARMSubtarget *Subtarget) const {
2674193323Sed  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2675261991Sdim  SDLoc dl(Op);
2676193323Sed  switch (IntNo) {
2677193323Sed  default: return SDValue();    // Don't custom lower most intrinsics.
2678276479Sdim  case Intrinsic::arm_rbit: {
2679276479Sdim    assert(Op.getOperand(1).getValueType() == MVT::i32 &&
2680276479Sdim           "RBIT intrinsic must have i32 type!");
2681276479Sdim    return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
2682276479Sdim  }
2683198090Srdivacky  case Intrinsic::arm_thread_pointer: {
2684198090Srdivacky    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2685198090Srdivacky    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2686198090Srdivacky  }
2687198090Srdivacky  case Intrinsic::eh_sjlj_lsda: {
2688198090Srdivacky    MachineFunction &MF = DAG.getMachineFunction();
2689199481Srdivacky    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2690218893Sdim    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2691198090Srdivacky    EVT PtrVT = getPointerTy();
2692198090Srdivacky    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2693198090Srdivacky    SDValue CPAddr;
2694198090Srdivacky    unsigned PCAdj = (RelocM != Reloc::PIC_)
2695198090Srdivacky      ? 0 : (Subtarget->isThumb() ? 4 : 8);
2696198090Srdivacky    ARMConstantPoolValue *CPV =
2697226633Sdim      ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
2698226633Sdim                                      ARMCP::CPLSDA, PCAdj);
2699198090Srdivacky    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2700198090Srdivacky    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2701198090Srdivacky    SDValue Result =
2702198892Srdivacky      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2703218893Sdim                  MachinePointerInfo::getConstantPool(),
2704234353Sdim                  false, false, false, 0);
2705198090Srdivacky
2706198090Srdivacky    if (RelocM == Reloc::PIC_) {
2707199481Srdivacky      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2708198090Srdivacky      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2709198090Srdivacky    }
2710198090Srdivacky    return Result;
2711198090Srdivacky  }
2712221345Sdim  case Intrinsic::arm_neon_vmulls:
2713221345Sdim  case Intrinsic::arm_neon_vmullu: {
2714221345Sdim    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
2715221345Sdim      ? ARMISD::VMULLs : ARMISD::VMULLu;
2716261991Sdim    return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
2717221345Sdim                       Op.getOperand(1), Op.getOperand(2));
2718193323Sed  }
2719221345Sdim  }
2720193323Sed}
2721193323Sed
2722226633Sdimstatic SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
2723226633Sdim                                 const ARMSubtarget *Subtarget) {
2724226633Sdim  // FIXME: handle "fence singlethread" more efficiently.
2725261991Sdim  SDLoc dl(Op);
2726226633Sdim  if (!Subtarget->hasDataBarrier()) {
2727226633Sdim    // Some ARMv6 cpus can support data barriers with an mcr instruction.
2728226633Sdim    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2729226633Sdim    // here.
2730226633Sdim    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
2731261991Sdim           "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
2732226633Sdim    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2733226633Sdim                       DAG.getConstant(0, MVT::i32));
2734226633Sdim  }
2735226633Sdim
2736261991Sdim  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
2737261991Sdim  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
2738280031Sdim  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
2739261991Sdim  if (Subtarget->isMClass()) {
2740261991Sdim    // Only a full system barrier exists in the M-class architectures.
2741261991Sdim    Domain = ARM_MB::SY;
2742261991Sdim  } else if (Subtarget->isSwift() && Ord == Release) {
2743261991Sdim    // Swift happens to implement ISHST barriers in a way that's compatible with
2744261991Sdim    // Release semantics but weaker than ISH so we'd be fools not to use
2745261991Sdim    // it. Beware: other processors probably don't!
2746261991Sdim    Domain = ARM_MB::ISHST;
2747261991Sdim  }
2748261991Sdim
2749261991Sdim  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
2750261991Sdim                     DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
2751261991Sdim                     DAG.getConstant(Domain, MVT::i32));
2752226633Sdim}
2753226633Sdim
2754218893Sdimstatic SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
2755218893Sdim                             const ARMSubtarget *Subtarget) {
2756218893Sdim  // ARM pre v5TE and Thumb1 does not have preload instructions.
2757218893Sdim  if (!(Subtarget->isThumb2() ||
2758218893Sdim        (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
2759218893Sdim    // Just preserve the chain.
2760218893Sdim    return Op.getOperand(0);
2761218893Sdim
2762261991Sdim  SDLoc dl(Op);
2763218893Sdim  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
2764218893Sdim  if (!isRead &&
2765218893Sdim      (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
2766218893Sdim    // ARMv7 with MP extension has PLDW.
2767218893Sdim    return Op.getOperand(0);
2768218893Sdim
2769224145Sdim  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2770224145Sdim  if (Subtarget->isThumb()) {
2771218893Sdim    // Invert the bits.
2772218893Sdim    isRead = ~isRead & 1;
2773224145Sdim    isData = ~isData & 1;
2774224145Sdim  }
2775218893Sdim
2776218893Sdim  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
2777218893Sdim                     Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
2778218893Sdim                     DAG.getConstant(isData, MVT::i32));
2779218893Sdim}
2780218893Sdim
2781207618Srdivackystatic SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
2782207618Srdivacky  MachineFunction &MF = DAG.getMachineFunction();
2783207618Srdivacky  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
2784207618Srdivacky
2785193323Sed  // vastart just stores the address of the VarArgsFrameIndex slot into the
2786193323Sed  // memory location argument.
2787261991Sdim  SDLoc dl(Op);
2788198090Srdivacky  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2789207618Srdivacky  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2790193323Sed  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2791218893Sdim  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2792218893Sdim                      MachinePointerInfo(SV), false, false, 0);
2793193323Sed}
2794193323Sed
2795193323SedSDValue
2796194710SedARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
2797194710Sed                                        SDValue &Root, SelectionDAG &DAG,
2798261991Sdim                                        SDLoc dl) const {
2799194710Sed  MachineFunction &MF = DAG.getMachineFunction();
2800194710Sed  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2801194710Sed
2802234353Sdim  const TargetRegisterClass *RC;
2803198090Srdivacky  if (AFI->isThumb1OnlyFunction())
2804239462Sdim    RC = &ARM::tGPRRegClass;
2805194710Sed  else
2806239462Sdim    RC = &ARM::GPRRegClass;
2807194710Sed
2808194710Sed  // Transform the arguments stored in physical registers into virtual ones.
2809219077Sdim  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2810194710Sed  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2811194710Sed
2812194710Sed  SDValue ArgValue2;
2813194710Sed  if (NextVA.isMemLoc()) {
2814194710Sed    MachineFrameInfo *MFI = MF.getFrameInfo();
2815210299Sed    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
2816194710Sed
2817194710Sed    // Create load node to retrieve arguments from the stack.
2818194710Sed    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2819198892Srdivacky    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
2820218893Sdim                            MachinePointerInfo::getFixedStack(FI),
2821234353Sdim                            false, false, false, 0);
2822194710Sed  } else {
2823219077Sdim    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2824194710Sed    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2825194710Sed  }
2826276479Sdim  if (!Subtarget->isLittle())
2827276479Sdim    std::swap (ArgValue, ArgValue2);
2828199481Srdivacky  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
2829194710Sed}
2830194710Sed
2831221345Sdimvoid
2832221345SdimARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
2833251662Sdim                                  unsigned InRegsParamRecordIdx,
2834261991Sdim                                  unsigned ArgSize,
2835251662Sdim                                  unsigned &ArgRegsSize,
2836251662Sdim                                  unsigned &ArgRegsSaveSize)
2837221345Sdim  const {
2838221345Sdim  unsigned NumGPRs;
2839251662Sdim  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
2840251662Sdim    unsigned RBegin, REnd;
2841251662Sdim    CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
2842251662Sdim    NumGPRs = REnd - RBegin;
2843251662Sdim  } else {
2844221345Sdim    unsigned int firstUnalloced;
2845221345Sdim    firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
2846221345Sdim                                                sizeof(GPRArgRegs) /
2847221345Sdim                                                sizeof(GPRArgRegs[0]));
2848221345Sdim    NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
2849221345Sdim  }
2850221345Sdim
2851280031Sdim  unsigned Align = MF.getTarget()
2852280031Sdim                       .getSubtargetImpl()
2853280031Sdim                       ->getFrameLowering()
2854280031Sdim                       ->getStackAlignment();
2855251662Sdim  ArgRegsSize = NumGPRs * 4;
2856261991Sdim
2857261991Sdim  // If parameter is split between stack and GPRs...
2858276479Sdim  if (NumGPRs && Align > 4 &&
2859261991Sdim      (ArgRegsSize < ArgSize ||
2860261991Sdim        InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
2861276479Sdim    // Add padding for part of param recovered from GPRs.  For example,
2862276479Sdim    // if Align == 8, its last byte must be at address K*8 - 1.
2863261991Sdim    // We need to do it, since remained (stack) part of parameter has
2864261991Sdim    // stack alignment, and we need to "attach" "GPRs head" without gaps
2865261991Sdim    // to it:
2866261991Sdim    // Stack:
2867261991Sdim    // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
2868261991Sdim    // [ [padding] [GPRs head] ] [        Tail passed via stack       ....
2869261991Sdim    //
2870261991Sdim    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2871261991Sdim    unsigned Padding =
2872276479Sdim        OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
2873261991Sdim    ArgRegsSaveSize = ArgRegsSize + Padding;
2874261991Sdim  } else
2875261991Sdim    // We don't need to extend regs save size for byval parameters if they
2876261991Sdim    // are passed via GPRs only.
2877261991Sdim    ArgRegsSaveSize = ArgRegsSize;
2878221345Sdim}
2879221345Sdim
2880221345Sdim// The remaining GPRs hold either the beginning of variable-argument
2881249423Sdim// data, or the beginning of an aggregate passed by value (usually
2882221345Sdim// byval).  Either way, we allocate stack slots adjacent to the data
2883221345Sdim// provided by our caller, and store the unallocated registers there.
2884221345Sdim// If this is a variadic function, the va_list pointer will begin with
2885221345Sdim// these values; otherwise, this reassembles a (byval) structure that
2886221345Sdim// was split between registers and memory.
2887251662Sdim// Return: The frame index registers were stored into.
2888251662Sdimint
2889251662SdimARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
2890261991Sdim                                  SDLoc dl, SDValue &Chain,
2891251662Sdim                                  const Value *OrigArg,
2892251662Sdim                                  unsigned InRegsParamRecordIdx,
2893251662Sdim                                  unsigned OffsetFromOrigArg,
2894251662Sdim                                  unsigned ArgOffset,
2895261991Sdim                                  unsigned ArgSize,
2896276479Sdim                                  bool ForceMutable,
2897276479Sdim                                  unsigned ByValStoreOffset,
2898276479Sdim                                  unsigned TotalArgRegsSaveSize) const {
2899251662Sdim
2900251662Sdim  // Currently, two use-cases possible:
2901276479Sdim  // Case #1. Non-var-args function, and we meet first byval parameter.
2902251662Sdim  //          Setup first unallocated register as first byval register;
2903251662Sdim  //          eat all remained registers
2904251662Sdim  //          (these two actions are performed by HandleByVal method).
2905251662Sdim  //          Then, here, we initialize stack frame with
2906251662Sdim  //          "store-reg" instructions.
2907251662Sdim  // Case #2. Var-args function, that doesn't contain byval parameters.
2908251662Sdim  //          The same: eat all remained unallocated registers,
2909251662Sdim  //          initialize stack frame.
2910251662Sdim
2911221345Sdim  MachineFunction &MF = DAG.getMachineFunction();
2912221345Sdim  MachineFrameInfo *MFI = MF.getFrameInfo();
2913221345Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2914251662Sdim  unsigned firstRegToSaveIndex, lastRegToSaveIndex;
2915251662Sdim  unsigned RBegin, REnd;
2916251662Sdim  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
2917251662Sdim    CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
2918251662Sdim    firstRegToSaveIndex = RBegin - ARM::R0;
2919251662Sdim    lastRegToSaveIndex = REnd - ARM::R0;
2920251662Sdim  } else {
2921221345Sdim    firstRegToSaveIndex = CCInfo.getFirstUnallocated
2922261991Sdim      (GPRArgRegs, array_lengthof(GPRArgRegs));
2923251662Sdim    lastRegToSaveIndex = 4;
2924221345Sdim  }
2925221345Sdim
2926251662Sdim  unsigned ArgRegsSize, ArgRegsSaveSize;
2927261991Sdim  computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
2928261991Sdim                 ArgRegsSize, ArgRegsSaveSize);
2929221345Sdim
2930251662Sdim  // Store any by-val regs to their spots on the stack so that they may be
2931251662Sdim  // loaded by deferencing the result of formal parameter pointer or va_next.
2932251662Sdim  // Note: once stack area for byval/varargs registers
2933251662Sdim  // was initialized, it can't be initialized again.
2934251662Sdim  if (ArgRegsSaveSize) {
2935261991Sdim    unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
2936261991Sdim
2937261991Sdim    if (Padding) {
2938261991Sdim      assert(AFI->getStoredByValParamsPadding() == 0 &&
2939261991Sdim             "The only parameter may be padded.");
2940261991Sdim      AFI->setStoredByValParamsPadding(Padding);
2941261991Sdim    }
2942261991Sdim
2943276479Sdim    int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
2944276479Sdim                                            Padding +
2945276479Sdim                                              ByValStoreOffset -
2946276479Sdim                                              (int64_t)TotalArgRegsSaveSize,
2947276479Sdim                                            false);
2948251662Sdim    SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
2949276479Sdim    if (Padding) {
2950276479Sdim       MFI->CreateFixedObject(Padding,
2951276479Sdim                              ArgOffset + ByValStoreOffset -
2952276479Sdim                                (int64_t)ArgRegsSaveSize,
2953276479Sdim                              false);
2954276479Sdim    }
2955251662Sdim
2956221345Sdim    SmallVector<SDValue, 4> MemOps;
2957251662Sdim    for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
2958251662Sdim         ++firstRegToSaveIndex, ++i) {
2959234353Sdim      const TargetRegisterClass *RC;
2960221345Sdim      if (AFI->isThumb1OnlyFunction())
2961239462Sdim        RC = &ARM::tGPRRegClass;
2962221345Sdim      else
2963239462Sdim        RC = &ARM::GPRRegClass;
2964221345Sdim
2965221345Sdim      unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
2966221345Sdim      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2967221345Sdim      SDValue Store =
2968221345Sdim        DAG.getStore(Val.getValue(1), dl, Val, FIN,
2969243830Sdim                     MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
2970221345Sdim                     false, false, 0);
2971221345Sdim      MemOps.push_back(Store);
2972221345Sdim      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2973221345Sdim                        DAG.getConstant(4, getPointerTy()));
2974221345Sdim    }
2975251662Sdim
2976251662Sdim    AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
2977251662Sdim
2978221345Sdim    if (!MemOps.empty())
2979276479Sdim      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2980251662Sdim    return FrameIndex;
2981276479Sdim  } else {
2982276479Sdim    if (ArgSize == 0) {
2983276479Sdim      // We cannot allocate a zero-byte object for the first variadic argument,
2984276479Sdim      // so just make up a size.
2985276479Sdim      ArgSize = 4;
2986276479Sdim    }
2987221345Sdim    // This will point to the next argument passed via stack.
2988261991Sdim    return MFI->CreateFixedObject(
2989276479Sdim      ArgSize, ArgOffset, !ForceMutable);
2990276479Sdim  }
2991221345Sdim}
2992221345Sdim
2993251662Sdim// Setup stack frame, the va_list pointer will start from.
2994251662Sdimvoid
2995251662SdimARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
2996261991Sdim                                        SDLoc dl, SDValue &Chain,
2997251662Sdim                                        unsigned ArgOffset,
2998276479Sdim                                        unsigned TotalArgRegsSaveSize,
2999251662Sdim                                        bool ForceMutable) const {
3000251662Sdim  MachineFunction &MF = DAG.getMachineFunction();
3001251662Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3002251662Sdim
3003251662Sdim  // Try to store any remaining integer argument regs
3004251662Sdim  // to their spots on the stack so that they may be loaded by deferencing
3005251662Sdim  // the result of va_next.
3006251662Sdim  // If there is no regs to be stored, just point address after last
3007251662Sdim  // argument passed via stack.
3008251662Sdim  int FrameIndex =
3009276479Sdim    StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3010276479Sdim                   CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
3011276479Sdim                   0, TotalArgRegsSaveSize);
3012251662Sdim
3013251662Sdim  AFI->setVarArgsFrameIndex(FrameIndex);
3014251662Sdim}
3015251662Sdim
3016194710SedSDValue
3017198090SrdivackyARMTargetLowering::LowerFormalArguments(SDValue Chain,
3018198090Srdivacky                                        CallingConv::ID CallConv, bool isVarArg,
3019198090Srdivacky                                        const SmallVectorImpl<ISD::InputArg>
3020198090Srdivacky                                          &Ins,
3021261991Sdim                                        SDLoc dl, SelectionDAG &DAG,
3022207618Srdivacky                                        SmallVectorImpl<SDValue> &InVals)
3023207618Srdivacky                                          const {
3024193323Sed  MachineFunction &MF = DAG.getMachineFunction();
3025193323Sed  MachineFrameInfo *MFI = MF.getFrameInfo();
3026193323Sed
3027193323Sed  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3028193323Sed
3029193323Sed  // Assign locations to all of the incoming arguments.
3030193323Sed  SmallVector<CCValAssign, 16> ArgLocs;
3031280031Sdim  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3032280031Sdim                    *DAG.getContext(), Prologue);
3033198090Srdivacky  CCInfo.AnalyzeFormalArguments(Ins,
3034198090Srdivacky                                CCAssignFnForNode(CallConv, /* Return*/ false,
3035198090Srdivacky                                                  isVarArg));
3036249423Sdim
3037193323Sed  SmallVector<SDValue, 16> ArgValues;
3038221345Sdim  int lastInsIndex = -1;
3039221345Sdim  SDValue ArgValue;
3040243830Sdim  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
3041243830Sdim  unsigned CurArgIdx = 0;
3042251662Sdim
3043251662Sdim  // Initially ArgRegsSaveSize is zero.
3044251662Sdim  // Then we increase this value each time we meet byval parameter.
3045251662Sdim  // We also increase this value in case of varargs function.
3046251662Sdim  AFI->setArgRegsSaveSize(0);
3047251662Sdim
3048276479Sdim  unsigned ByValStoreOffset = 0;
3049276479Sdim  unsigned TotalArgRegsSaveSize = 0;
3050276479Sdim  unsigned ArgRegsSaveSizeMaxAlign = 4;
3051276479Sdim
3052276479Sdim  // Calculate the amount of stack space that we need to allocate to store
3053276479Sdim  // byval and variadic arguments that are passed in registers.
3054276479Sdim  // We need to know this before we allocate the first byval or variadic
3055276479Sdim  // argument, as they will be allocated a stack slot below the CFA (Canonical
3056276479Sdim  // Frame Address, the stack pointer at entry to the function).
3057193323Sed  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3058193323Sed    CCValAssign &VA = ArgLocs[i];
3059276479Sdim    if (VA.isMemLoc()) {
3060276479Sdim      int index = VA.getValNo();
3061276479Sdim      if (index != lastInsIndex) {
3062276479Sdim        ISD::ArgFlagsTy Flags = Ins[index].Flags;
3063276479Sdim        if (Flags.isByVal()) {
3064276479Sdim          unsigned ExtraArgRegsSize;
3065276479Sdim          unsigned ExtraArgRegsSaveSize;
3066277320Sdim          computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
3067276479Sdim                         Flags.getByValSize(),
3068276479Sdim                         ExtraArgRegsSize, ExtraArgRegsSaveSize);
3069276479Sdim
3070276479Sdim          TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
3071276479Sdim          if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
3072276479Sdim              ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
3073276479Sdim          CCInfo.nextInRegsParam();
3074276479Sdim        }
3075276479Sdim        lastInsIndex = index;
3076276479Sdim      }
3077276479Sdim    }
3078276479Sdim  }
3079276479Sdim  CCInfo.rewindByValRegsInfo();
3080276479Sdim  lastInsIndex = -1;
3081280031Sdim  if (isVarArg && MFI->hasVAStart()) {
3082276479Sdim    unsigned ExtraArgRegsSize;
3083276479Sdim    unsigned ExtraArgRegsSaveSize;
3084276479Sdim    computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
3085276479Sdim                   ExtraArgRegsSize, ExtraArgRegsSaveSize);
3086276479Sdim    TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
3087276479Sdim  }
3088276479Sdim  // If the arg regs save area contains N-byte aligned values, the
3089276479Sdim  // bottom of it must be at least N-byte aligned.
3090276479Sdim  TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
3091276479Sdim  TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
3092276479Sdim
3093276479Sdim  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3094276479Sdim    CCValAssign &VA = ArgLocs[i];
3095243830Sdim    std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
3096243830Sdim    CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
3097193323Sed    // Arguments stored in registers.
3098193323Sed    if (VA.isRegLoc()) {
3099198090Srdivacky      EVT RegVT = VA.getLocVT();
3100193323Sed
3101194710Sed      if (VA.needsCustom()) {
3102194710Sed        // f64 and vector types are split up into multiple registers or
3103194710Sed        // combinations of registers and stack slots.
3104194710Sed        if (VA.getLocVT() == MVT::v2f64) {
3105194710Sed          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3106198090Srdivacky                                                   Chain, DAG, dl);
3107194710Sed          VA = ArgLocs[++i]; // skip ahead to next loc
3108207618Srdivacky          SDValue ArgValue2;
3109207618Srdivacky          if (VA.isMemLoc()) {
3110210299Sed            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
3111207618Srdivacky            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
3112207618Srdivacky            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3113218893Sdim                                    MachinePointerInfo::getFixedStack(FI),
3114234353Sdim                                    false, false, false, 0);
3115207618Srdivacky          } else {
3116207618Srdivacky            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3117207618Srdivacky                                             Chain, DAG, dl);
3118207618Srdivacky          }
3119194710Sed          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3120194710Sed          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3121194710Sed                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
3122194710Sed          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3123194710Sed                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
3124194710Sed        } else
3125198090Srdivacky          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3126193323Sed
3127194710Sed      } else {
3128234353Sdim        const TargetRegisterClass *RC;
3129198090Srdivacky
3130198090Srdivacky        if (RegVT == MVT::f32)
3131239462Sdim          RC = &ARM::SPRRegClass;
3132198090Srdivacky        else if (RegVT == MVT::f64)
3133239462Sdim          RC = &ARM::DPRRegClass;
3134198090Srdivacky        else if (RegVT == MVT::v2f64)
3135239462Sdim          RC = &ARM::QPRRegClass;
3136198090Srdivacky        else if (RegVT == MVT::i32)
3137280031Sdim          RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3138280031Sdim                                           : &ARM::GPRRegClass;
3139194710Sed        else
3140198090Srdivacky          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3141193323Sed
3142194710Sed        // Transform the arguments in physical registers into virtual ones.
3143219077Sdim        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3144198090Srdivacky        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3145193323Sed      }
3146193323Sed
3147193323Sed      // If this is an 8 or 16-bit value, it is really passed promoted
3148193323Sed      // to 32 bits.  Insert an assert[sz]ext to capture this, then
3149193323Sed      // truncate to the right size.
3150193323Sed      switch (VA.getLocInfo()) {
3151198090Srdivacky      default: llvm_unreachable("Unknown loc info!");
3152193323Sed      case CCValAssign::Full: break;
3153193323Sed      case CCValAssign::BCvt:
3154218893Sdim        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3155193323Sed        break;
3156193323Sed      case CCValAssign::SExt:
3157193323Sed        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3158193323Sed                               DAG.getValueType(VA.getValVT()));
3159193323Sed        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3160193323Sed        break;
3161193323Sed      case CCValAssign::ZExt:
3162193323Sed        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3163193323Sed                               DAG.getValueType(VA.getValVT()));
3164193323Sed        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3165193323Sed        break;
3166193323Sed      }
3167193323Sed
3168198090Srdivacky      InVals.push_back(ArgValue);
3169193323Sed
3170193323Sed    } else { // VA.isRegLoc()
3171193323Sed
3172193323Sed      // sanity check
3173193323Sed      assert(VA.isMemLoc());
3174193323Sed      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3175193323Sed
3176221345Sdim      int index = ArgLocs[i].getValNo();
3177193323Sed
3178221345Sdim      // Some Ins[] entries become multiple ArgLoc[] entries.
3179221345Sdim      // Process them only once.
3180221345Sdim      if (index != lastInsIndex)
3181221345Sdim        {
3182221345Sdim          ISD::ArgFlagsTy Flags = Ins[index].Flags;
3183223017Sdim          // FIXME: For now, all byval parameter objects are marked mutable.
3184221345Sdim          // This can be changed with more analysis.
3185221345Sdim          // In case of tail call optimization mark all arguments mutable.
3186221345Sdim          // Since they could be overwritten by lowering of arguments in case of
3187221345Sdim          // a tail call.
3188221345Sdim          if (Flags.isByVal()) {
3189277320Sdim            unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3190276479Sdim
3191276479Sdim            ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
3192251662Sdim            int FrameIndex = StoreByValRegs(
3193251662Sdim                CCInfo, DAG, dl, Chain, CurOrigArg,
3194251662Sdim                CurByValIndex,
3195251662Sdim                Ins[VA.getValNo()].PartOffset,
3196251662Sdim                VA.getLocMemOffset(),
3197261991Sdim                Flags.getByValSize(),
3198276479Sdim                true /*force mutable frames*/,
3199276479Sdim                ByValStoreOffset,
3200276479Sdim                TotalArgRegsSaveSize);
3201276479Sdim            ByValStoreOffset += Flags.getByValSize();
3202276479Sdim            ByValStoreOffset = std::min(ByValStoreOffset, 16U);
3203251662Sdim            InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
3204251662Sdim            CCInfo.nextInRegsParam();
3205221345Sdim          } else {
3206276479Sdim            unsigned FIOffset = VA.getLocMemOffset();
3207221345Sdim            int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3208261991Sdim                                            FIOffset, true);
3209221345Sdim
3210221345Sdim            // Create load nodes to retrieve arguments from the stack.
3211221345Sdim            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
3212221345Sdim            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3213221345Sdim                                         MachinePointerInfo::getFixedStack(FI),
3214234353Sdim                                         false, false, false, 0));
3215221345Sdim          }
3216221345Sdim          lastInsIndex = index;
3217221345Sdim        }
3218193323Sed    }
3219193323Sed  }
3220193323Sed
3221193323Sed  // varargs
3222280031Sdim  if (isVarArg && MFI->hasVAStart())
3223251662Sdim    VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3224276479Sdim                         CCInfo.getNextStackOffset(),
3225276479Sdim                         TotalArgRegsSaveSize);
3226193323Sed
3227276479Sdim  AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
3228276479Sdim
3229198090Srdivacky  return Chain;
3230193323Sed}
3231193323Sed
3232193323Sed/// isFloatingPointZero - Return true if this is +0.0.
3233193323Sedstatic bool isFloatingPointZero(SDValue Op) {
3234193323Sed  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3235193323Sed    return CFP->getValueAPF().isPosZero();
3236193323Sed  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3237193323Sed    // Maybe this has already been legalized into the constant pool?
3238193323Sed    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3239193323Sed      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3240193323Sed      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3241207618Srdivacky        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3242193323Sed          return CFP->getValueAPF().isPosZero();
3243193323Sed    }
3244280031Sdim  } else if (Op->getOpcode() == ISD::BITCAST &&
3245280031Sdim             Op->getValueType(0) == MVT::f64) {
3246280031Sdim    // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3247280031Sdim    // created by LowerConstantFP().
3248280031Sdim    SDValue BitcastOp = Op->getOperand(0);
3249280031Sdim    if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
3250280031Sdim      SDValue MoveOp = BitcastOp->getOperand(0);
3251280031Sdim      if (MoveOp->getOpcode() == ISD::TargetConstant &&
3252280031Sdim          cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
3253280031Sdim        return true;
3254280031Sdim      }
3255280031Sdim    }
3256193323Sed  }
3257193323Sed  return false;
3258193323Sed}
3259193323Sed
3260193323Sed/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3261193323Sed/// the given operands.
3262199481SrdivackySDValue
3263199481SrdivackyARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3264210299Sed                             SDValue &ARMcc, SelectionDAG &DAG,
3265261991Sdim                             SDLoc dl) const {
3266193323Sed  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3267193323Sed    unsigned C = RHSC->getZExtValue();
3268199481Srdivacky    if (!isLegalICmpImmediate(C)) {
3269193323Sed      // Constant does not fit, try adjusting it by one?
3270193323Sed      switch (CC) {
3271193323Sed      default: break;
3272193323Sed      case ISD::SETLT:
3273193323Sed      case ISD::SETGE:
3274212904Sdim        if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3275193323Sed          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3276193323Sed          RHS = DAG.getConstant(C-1, MVT::i32);
3277193323Sed        }
3278193323Sed        break;
3279193323Sed      case ISD::SETULT:
3280193323Sed      case ISD::SETUGE:
3281212904Sdim        if (C != 0 && isLegalICmpImmediate(C-1)) {
3282193323Sed          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3283193323Sed          RHS = DAG.getConstant(C-1, MVT::i32);
3284193323Sed        }
3285193323Sed        break;
3286193323Sed      case ISD::SETLE:
3287193323Sed      case ISD::SETGT:
3288212904Sdim        if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3289193323Sed          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3290193323Sed          RHS = DAG.getConstant(C+1, MVT::i32);
3291193323Sed        }
3292193323Sed        break;
3293193323Sed      case ISD::SETULE:
3294193323Sed      case ISD::SETUGT:
3295212904Sdim        if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3296193323Sed          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3297193323Sed          RHS = DAG.getConstant(C+1, MVT::i32);
3298193323Sed        }
3299193323Sed        break;
3300193323Sed      }
3301193323Sed    }
3302193323Sed  }
3303193323Sed
3304193323Sed  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3305193323Sed  ARMISD::NodeType CompareType;
3306193323Sed  switch (CondCode) {
3307193323Sed  default:
3308193323Sed    CompareType = ARMISD::CMP;
3309193323Sed    break;
3310193323Sed  case ARMCC::EQ:
3311193323Sed  case ARMCC::NE:
3312195340Sed    // Uses only Z Flag
3313195340Sed    CompareType = ARMISD::CMPZ;
3314193323Sed    break;
3315193323Sed  }
3316210299Sed  ARMcc = DAG.getConstant(CondCode, MVT::i32);
3317218893Sdim  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3318193323Sed}
3319193323Sed
3320193323Sed/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3321210299SedSDValue
3322210299SedARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
3323261991Sdim                             SDLoc dl) const {
3324280031Sdim  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3325193323Sed  SDValue Cmp;
3326193323Sed  if (!isFloatingPointZero(RHS))
3327218893Sdim    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
3328193323Sed  else
3329218893Sdim    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
3330218893Sdim  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3331193323Sed}
3332193323Sed
3333221345Sdim/// duplicateCmp - Glue values can have only one use, so this function
3334221345Sdim/// duplicates a comparison node.
3335221345SdimSDValue
3336221345SdimARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3337221345Sdim  unsigned Opc = Cmp.getOpcode();
3338261991Sdim  SDLoc DL(Cmp);
3339221345Sdim  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3340221345Sdim    return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3341221345Sdim
3342221345Sdim  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3343221345Sdim  Cmp = Cmp.getOperand(0);
3344221345Sdim  Opc = Cmp.getOpcode();
3345221345Sdim  if (Opc == ARMISD::CMPFP)
3346221345Sdim    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3347221345Sdim  else {
3348221345Sdim    assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3349221345Sdim    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
3350221345Sdim  }
3351221345Sdim  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3352221345Sdim}
3353221345Sdim
3354276479Sdimstd::pair<SDValue, SDValue>
3355276479SdimARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3356276479Sdim                                 SDValue &ARMcc) const {
3357276479Sdim  assert(Op.getValueType() == MVT::i32 &&  "Unsupported value type");
3358276479Sdim
3359276479Sdim  SDValue Value, OverflowCmp;
3360276479Sdim  SDValue LHS = Op.getOperand(0);
3361276479Sdim  SDValue RHS = Op.getOperand(1);
3362276479Sdim
3363276479Sdim
3364276479Sdim  // FIXME: We are currently always generating CMPs because we don't support
3365276479Sdim  // generating CMN through the backend. This is not as good as the natural
3366276479Sdim  // CMP case because it causes a register dependency and cannot be folded
3367276479Sdim  // later.
3368276479Sdim
3369276479Sdim  switch (Op.getOpcode()) {
3370276479Sdim  default:
3371276479Sdim    llvm_unreachable("Unknown overflow instruction!");
3372276479Sdim  case ISD::SADDO:
3373276479Sdim    ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
3374276479Sdim    Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
3375276479Sdim    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
3376276479Sdim    break;
3377276479Sdim  case ISD::UADDO:
3378276479Sdim    ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
3379276479Sdim    Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
3380276479Sdim    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
3381276479Sdim    break;
3382276479Sdim  case ISD::SSUBO:
3383276479Sdim    ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
3384276479Sdim    Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
3385276479Sdim    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
3386276479Sdim    break;
3387276479Sdim  case ISD::USUBO:
3388276479Sdim    ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
3389276479Sdim    Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
3390276479Sdim    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
3391276479Sdim    break;
3392276479Sdim  } // switch (...)
3393276479Sdim
3394276479Sdim  return std::make_pair(Value, OverflowCmp);
3395276479Sdim}
3396276479Sdim
3397276479Sdim
3398276479SdimSDValue
3399276479SdimARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3400276479Sdim  // Let legalize expand this if it isn't a legal type yet.
3401276479Sdim  if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3402276479Sdim    return SDValue();
3403276479Sdim
3404276479Sdim  SDValue Value, OverflowCmp;
3405276479Sdim  SDValue ARMcc;
3406276479Sdim  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3407276479Sdim  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3408276479Sdim  // We use 0 and 1 as false and true values.
3409276479Sdim  SDValue TVal = DAG.getConstant(1, MVT::i32);
3410276479Sdim  SDValue FVal = DAG.getConstant(0, MVT::i32);
3411276479Sdim  EVT VT = Op.getValueType();
3412276479Sdim
3413276479Sdim  SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal,
3414276479Sdim                                 ARMcc, CCR, OverflowCmp);
3415276479Sdim
3416276479Sdim  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3417276479Sdim  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
3418276479Sdim}
3419276479Sdim
3420276479Sdim
3421212904SdimSDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3422212904Sdim  SDValue Cond = Op.getOperand(0);
3423212904Sdim  SDValue SelectTrue = Op.getOperand(1);
3424212904Sdim  SDValue SelectFalse = Op.getOperand(2);
3425261991Sdim  SDLoc dl(Op);
3426276479Sdim  unsigned Opc = Cond.getOpcode();
3427212904Sdim
3428276479Sdim  if (Cond.getResNo() == 1 &&
3429276479Sdim      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3430276479Sdim       Opc == ISD::USUBO)) {
3431276479Sdim    if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3432276479Sdim      return SDValue();
3433276479Sdim
3434276479Sdim    SDValue Value, OverflowCmp;
3435276479Sdim    SDValue ARMcc;
3436276479Sdim    std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3437276479Sdim    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3438276479Sdim    EVT VT = Op.getValueType();
3439276479Sdim
3440280031Sdim    return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR,
3441280031Sdim                   OverflowCmp, DAG);
3442276479Sdim  }
3443276479Sdim
3444212904Sdim  // Convert:
3445212904Sdim  //
3446212904Sdim  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
3447212904Sdim  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
3448212904Sdim  //
3449212904Sdim  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
3450212904Sdim    const ConstantSDNode *CMOVTrue =
3451212904Sdim      dyn_cast<ConstantSDNode>(Cond.getOperand(0));
3452212904Sdim    const ConstantSDNode *CMOVFalse =
3453212904Sdim      dyn_cast<ConstantSDNode>(Cond.getOperand(1));
3454212904Sdim
3455212904Sdim    if (CMOVTrue && CMOVFalse) {
3456212904Sdim      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
3457212904Sdim      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
3458212904Sdim
3459212904Sdim      SDValue True;
3460212904Sdim      SDValue False;
3461212904Sdim      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
3462212904Sdim        True = SelectTrue;
3463212904Sdim        False = SelectFalse;
3464212904Sdim      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
3465212904Sdim        True = SelectFalse;
3466212904Sdim        False = SelectTrue;
3467212904Sdim      }
3468212904Sdim
3469212904Sdim      if (True.getNode() && False.getNode()) {
3470223017Sdim        EVT VT = Op.getValueType();
3471212904Sdim        SDValue ARMcc = Cond.getOperand(2);
3472212904Sdim        SDValue CCR = Cond.getOperand(3);
3473221345Sdim        SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
3474223017Sdim        assert(True.getValueType() == VT);
3475280031Sdim        return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
3476212904Sdim      }
3477212904Sdim    }
3478212904Sdim  }
3479212904Sdim
3480234353Sdim  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
3481234353Sdim  // undefined bits before doing a full-word comparison with zero.
3482234353Sdim  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
3483234353Sdim                     DAG.getConstant(1, Cond.getValueType()));
3484234353Sdim
3485212904Sdim  return DAG.getSelectCC(dl, Cond,
3486212904Sdim                         DAG.getConstant(0, Cond.getValueType()),
3487212904Sdim                         SelectTrue, SelectFalse, ISD::SETNE);
3488212904Sdim}
3489212904Sdim
3490261991Sdimstatic ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
3491261991Sdim  if (CC == ISD::SETNE)
3492261991Sdim    return ISD::SETEQ;
3493276479Sdim  return ISD::getSetCCInverse(CC, true);
3494261991Sdim}
3495261991Sdim
3496261991Sdimstatic void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
3497261991Sdim                                 bool &swpCmpOps, bool &swpVselOps) {
3498261991Sdim  // Start by selecting the GE condition code for opcodes that return true for
3499261991Sdim  // 'equality'
3500261991Sdim  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
3501261991Sdim      CC == ISD::SETULE)
3502261991Sdim    CondCode = ARMCC::GE;
3503261991Sdim
3504261991Sdim  // and GT for opcodes that return false for 'equality'.
3505261991Sdim  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
3506261991Sdim           CC == ISD::SETULT)
3507261991Sdim    CondCode = ARMCC::GT;
3508261991Sdim
3509261991Sdim  // Since we are constrained to GE/GT, if the opcode contains 'less', we need
3510261991Sdim  // to swap the compare operands.
3511261991Sdim  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
3512261991Sdim      CC == ISD::SETULT)
3513261991Sdim    swpCmpOps = true;
3514261991Sdim
3515261991Sdim  // Both GT and GE are ordered comparisons, and return false for 'unordered'.
3516261991Sdim  // If we have an unordered opcode, we need to swap the operands to the VSEL
3517261991Sdim  // instruction (effectively negating the condition).
3518261991Sdim  //
3519261991Sdim  // This also has the effect of swapping which one of 'less' or 'greater'
3520261991Sdim  // returns true, so we also swap the compare operands. It also switches
3521261991Sdim  // whether we return true for 'equality', so we compensate by picking the
3522261991Sdim  // opposite condition code to our original choice.
3523261991Sdim  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
3524261991Sdim      CC == ISD::SETUGT) {
3525261991Sdim    swpCmpOps = !swpCmpOps;
3526261991Sdim    swpVselOps = !swpVselOps;
3527261991Sdim    CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
3528261991Sdim  }
3529261991Sdim
3530261991Sdim  // 'ordered' is 'anything but unordered', so use the VS condition code and
3531261991Sdim  // swap the VSEL operands.
3532261991Sdim  if (CC == ISD::SETO) {
3533261991Sdim    CondCode = ARMCC::VS;
3534261991Sdim    swpVselOps = true;
3535261991Sdim  }
3536261991Sdim
3537261991Sdim  // 'unordered or not equal' is 'anything but equal', so use the EQ condition
3538261991Sdim  // code and swap the VSEL operands.
3539261991Sdim  if (CC == ISD::SETUNE) {
3540261991Sdim    CondCode = ARMCC::EQ;
3541261991Sdim    swpVselOps = true;
3542261991Sdim  }
3543261991Sdim}
3544261991Sdim
3545280031SdimSDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
3546280031Sdim                                   SDValue TrueVal, SDValue ARMcc, SDValue CCR,
3547280031Sdim                                   SDValue Cmp, SelectionDAG &DAG) const {
3548280031Sdim  if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
3549280031Sdim    FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
3550280031Sdim                           DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
3551280031Sdim    TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
3552280031Sdim                          DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
3553280031Sdim
3554280031Sdim    SDValue TrueLow = TrueVal.getValue(0);
3555280031Sdim    SDValue TrueHigh = TrueVal.getValue(1);
3556280031Sdim    SDValue FalseLow = FalseVal.getValue(0);
3557280031Sdim    SDValue FalseHigh = FalseVal.getValue(1);
3558280031Sdim
3559280031Sdim    SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
3560280031Sdim                              ARMcc, CCR, Cmp);
3561280031Sdim    SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
3562280031Sdim                               ARMcc, CCR, duplicateCmp(Cmp, DAG));
3563280031Sdim
3564280031Sdim    return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
3565280031Sdim  } else {
3566280031Sdim    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
3567280031Sdim                       Cmp);
3568280031Sdim  }
3569280031Sdim}
3570280031Sdim
3571207618SrdivackySDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
3572198090Srdivacky  EVT VT = Op.getValueType();
3573193323Sed  SDValue LHS = Op.getOperand(0);
3574193323Sed  SDValue RHS = Op.getOperand(1);
3575193323Sed  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3576193323Sed  SDValue TrueVal = Op.getOperand(2);
3577193323Sed  SDValue FalseVal = Op.getOperand(3);
3578261991Sdim  SDLoc dl(Op);
3579193323Sed
3580280031Sdim  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
3581280031Sdim    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
3582280031Sdim                                                    dl);
3583280031Sdim
3584280031Sdim    // If softenSetCCOperands only returned one value, we should compare it to
3585280031Sdim    // zero.
3586280031Sdim    if (!RHS.getNode()) {
3587280031Sdim      RHS = DAG.getConstant(0, LHS.getValueType());
3588280031Sdim      CC = ISD::SETNE;
3589280031Sdim    }
3590280031Sdim  }
3591280031Sdim
3592193323Sed  if (LHS.getValueType() == MVT::i32) {
3593261991Sdim    // Try to generate VSEL on ARMv8.
3594261991Sdim    // The VSEL instruction can't use all the usual ARM condition
3595261991Sdim    // codes: it only has two bits to select the condition code, so it's
3596261991Sdim    // constrained to use only GE, GT, VS and EQ.
3597261991Sdim    //
3598261991Sdim    // To implement all the various ISD::SETXXX opcodes, we sometimes need to
3599261991Sdim    // swap the operands of the previous compare instruction (effectively
3600261991Sdim    // inverting the compare condition, swapping 'less' and 'greater') and
3601261991Sdim    // sometimes need to swap the operands to the VSEL (which inverts the
3602261991Sdim    // condition in the sense of firing whenever the previous condition didn't)
3603261991Sdim    if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
3604261991Sdim                                      TrueVal.getValueType() == MVT::f64)) {
3605261991Sdim      ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3606261991Sdim      if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
3607261991Sdim          CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
3608261991Sdim        CC = getInverseCCForVSEL(CC);
3609261991Sdim        std::swap(TrueVal, FalseVal);
3610261991Sdim      }
3611261991Sdim    }
3612261991Sdim
3613210299Sed    SDValue ARMcc;
3614193323Sed    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3615210299Sed    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3616280031Sdim    return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
3617193323Sed  }
3618193323Sed
3619193323Sed  ARMCC::CondCodes CondCode, CondCode2;
3620198090Srdivacky  FPCCToARMCC(CC, CondCode, CondCode2);
3621193323Sed
3622261991Sdim  // Try to generate VSEL on ARMv8.
3623261991Sdim  if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
3624261991Sdim                                    TrueVal.getValueType() == MVT::f64)) {
3625261991Sdim    // We can select VMAXNM/VMINNM from a compare followed by a select with the
3626261991Sdim    // same operands, as follows:
3627261991Sdim    //   c = fcmp [ogt, olt, ugt, ult] a, b
3628261991Sdim    //   select c, a, b
3629261991Sdim    // We only do this in unsafe-fp-math, because signed zeros and NaNs are
3630261991Sdim    // handled differently than the original code sequence.
3631280031Sdim    if (getTargetMachine().Options.UnsafeFPMath) {
3632280031Sdim      if (LHS == TrueVal && RHS == FalseVal) {
3633280031Sdim        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
3634280031Sdim          return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
3635280031Sdim        if (CC == ISD::SETOLT || CC == ISD::SETULT)
3636280031Sdim          return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
3637280031Sdim      } else if (LHS == FalseVal && RHS == TrueVal) {
3638280031Sdim        if (CC == ISD::SETOLT || CC == ISD::SETULT)
3639280031Sdim          return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
3640280031Sdim        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
3641280031Sdim          return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
3642280031Sdim      }
3643261991Sdim    }
3644261991Sdim
3645261991Sdim    bool swpCmpOps = false;
3646261991Sdim    bool swpVselOps = false;
3647261991Sdim    checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
3648261991Sdim
3649261991Sdim    if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
3650261991Sdim        CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
3651261991Sdim      if (swpCmpOps)
3652261991Sdim        std::swap(LHS, RHS);
3653261991Sdim      if (swpVselOps)
3654261991Sdim        std::swap(TrueVal, FalseVal);
3655261991Sdim    }
3656261991Sdim  }
3657261991Sdim
3658210299Sed  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
3659210299Sed  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
3660193323Sed  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3661280031Sdim  SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
3662193323Sed  if (CondCode2 != ARMCC::AL) {
3663210299Sed    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
3664193323Sed    // FIXME: Needs another CMP because flag can have but one use.
3665193323Sed    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
3666280031Sdim    Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
3667193323Sed  }
3668193323Sed  return Result;
3669193323Sed}
3670193323Sed
3671210299Sed/// canChangeToInt - Given the fp compare operand, return true if it is suitable
3672210299Sed/// to morph to an integer compare sequence.
3673210299Sedstatic bool canChangeToInt(SDValue Op, bool &SeenZero,
3674210299Sed                           const ARMSubtarget *Subtarget) {
3675210299Sed  SDNode *N = Op.getNode();
3676210299Sed  if (!N->hasOneUse())
3677210299Sed    // Otherwise it requires moving the value from fp to integer registers.
3678210299Sed    return false;
3679210299Sed  if (!N->getNumValues())
3680210299Sed    return false;
3681210299Sed  EVT VT = Op.getValueType();
3682210299Sed  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
3683210299Sed    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
3684210299Sed    // vmrs are very slow, e.g. cortex-a8.
3685210299Sed    return false;
3686210299Sed
3687210299Sed  if (isFloatingPointZero(Op)) {
3688210299Sed    SeenZero = true;
3689210299Sed    return true;
3690210299Sed  }
3691210299Sed  return ISD::isNormalLoad(N);
3692210299Sed}
3693210299Sed
3694210299Sedstatic SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
3695210299Sed  if (isFloatingPointZero(Op))
3696210299Sed    return DAG.getConstant(0, MVT::i32);
3697210299Sed
3698210299Sed  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
3699261991Sdim    return DAG.getLoad(MVT::i32, SDLoc(Op),
3700218893Sdim                       Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
3701210299Sed                       Ld->isVolatile(), Ld->isNonTemporal(),
3702234353Sdim                       Ld->isInvariant(), Ld->getAlignment());
3703210299Sed
3704210299Sed  llvm_unreachable("Unknown VFP cmp argument!");
3705210299Sed}
3706210299Sed
3707210299Sedstatic void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
3708210299Sed                           SDValue &RetVal1, SDValue &RetVal2) {
3709210299Sed  if (isFloatingPointZero(Op)) {
3710210299Sed    RetVal1 = DAG.getConstant(0, MVT::i32);
3711210299Sed    RetVal2 = DAG.getConstant(0, MVT::i32);
3712210299Sed    return;
3713210299Sed  }
3714210299Sed
3715210299Sed  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
3716210299Sed    SDValue Ptr = Ld->getBasePtr();
3717261991Sdim    RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),
3718210299Sed                          Ld->getChain(), Ptr,
3719218893Sdim                          Ld->getPointerInfo(),
3720210299Sed                          Ld->isVolatile(), Ld->isNonTemporal(),
3721234353Sdim                          Ld->isInvariant(), Ld->getAlignment());
3722210299Sed
3723210299Sed    EVT PtrType = Ptr.getValueType();
3724210299Sed    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
3725261991Sdim    SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),
3726210299Sed                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
3727261991Sdim    RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),
3728210299Sed                          Ld->getChain(), NewPtr,
3729218893Sdim                          Ld->getPointerInfo().getWithOffset(4),
3730210299Sed                          Ld->isVolatile(), Ld->isNonTemporal(),
3731234353Sdim                          Ld->isInvariant(), NewAlign);
3732210299Sed    return;
3733210299Sed  }
3734210299Sed
3735210299Sed  llvm_unreachable("Unknown VFP cmp argument!");
3736210299Sed}
3737210299Sed
3738210299Sed/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
3739210299Sed/// f32 and even f64 comparisons to integer ones.
3740210299SedSDValue
3741210299SedARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
3742210299Sed  SDValue Chain = Op.getOperand(0);
3743210299Sed  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3744210299Sed  SDValue LHS = Op.getOperand(2);
3745210299Sed  SDValue RHS = Op.getOperand(3);
3746210299Sed  SDValue Dest = Op.getOperand(4);
3747261991Sdim  SDLoc dl(Op);
3748210299Sed
3749234353Sdim  bool LHSSeenZero = false;
3750234353Sdim  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
3751234353Sdim  bool RHSSeenZero = false;
3752234353Sdim  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
3753234353Sdim  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
3754221345Sdim    // If unsafe fp math optimization is enabled and there are no other uses of
3755221345Sdim    // the CMP operands, and the condition code is EQ or NE, we can optimize it
3756210299Sed    // to an integer comparison.
3757210299Sed    if (CC == ISD::SETOEQ)
3758210299Sed      CC = ISD::SETEQ;
3759210299Sed    else if (CC == ISD::SETUNE)
3760210299Sed      CC = ISD::SETNE;
3761210299Sed
3762234353Sdim    SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
3763210299Sed    SDValue ARMcc;
3764210299Sed    if (LHS.getValueType() == MVT::f32) {
3765234353Sdim      LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
3766234353Sdim                        bitcastf32Toi32(LHS, DAG), Mask);
3767234353Sdim      RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
3768234353Sdim                        bitcastf32Toi32(RHS, DAG), Mask);
3769210299Sed      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3770210299Sed      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3771210299Sed      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
3772210299Sed                         Chain, Dest, ARMcc, CCR, Cmp);
3773210299Sed    }
3774210299Sed
3775210299Sed    SDValue LHS1, LHS2;
3776210299Sed    SDValue RHS1, RHS2;
3777210299Sed    expandf64Toi32(LHS, DAG, LHS1, LHS2);
3778210299Sed    expandf64Toi32(RHS, DAG, RHS1, RHS2);
3779234353Sdim    LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
3780234353Sdim    RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
3781210299Sed    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3782210299Sed    ARMcc = DAG.getConstant(CondCode, MVT::i32);
3783218893Sdim    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
3784210299Sed    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
3785276479Sdim    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
3786210299Sed  }
3787210299Sed
3788210299Sed  return SDValue();
3789210299Sed}
3790210299Sed
3791207618SrdivackySDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3792210299Sed  SDValue Chain = Op.getOperand(0);
3793193323Sed  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3794210299Sed  SDValue LHS = Op.getOperand(2);
3795210299Sed  SDValue RHS = Op.getOperand(3);
3796210299Sed  SDValue Dest = Op.getOperand(4);
3797261991Sdim  SDLoc dl(Op);
3798193323Sed
3799280031Sdim  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
3800280031Sdim    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
3801280031Sdim                                                    dl);
3802280031Sdim
3803280031Sdim    // If softenSetCCOperands only returned one value, we should compare it to
3804280031Sdim    // zero.
3805280031Sdim    if (!RHS.getNode()) {
3806280031Sdim      RHS = DAG.getConstant(0, LHS.getValueType());
3807280031Sdim      CC = ISD::SETNE;
3808280031Sdim    }
3809280031Sdim  }
3810280031Sdim
3811193323Sed  if (LHS.getValueType() == MVT::i32) {
3812210299Sed    SDValue ARMcc;
3813210299Sed    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3814193323Sed    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3815193323Sed    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
3816210299Sed                       Chain, Dest, ARMcc, CCR, Cmp);
3817193323Sed  }
3818193323Sed
3819193323Sed  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
3820210299Sed
3821234353Sdim  if (getTargetMachine().Options.UnsafeFPMath &&
3822210299Sed      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
3823210299Sed       CC == ISD::SETNE || CC == ISD::SETUNE)) {
3824210299Sed    SDValue Result = OptimizeVFPBrcond(Op, DAG);
3825210299Sed    if (Result.getNode())
3826210299Sed      return Result;
3827210299Sed  }
3828210299Sed
3829193323Sed  ARMCC::CondCodes CondCode, CondCode2;
3830198090Srdivacky  FPCCToARMCC(CC, CondCode, CondCode2);
3831193323Sed
3832210299Sed  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
3833193323Sed  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
3834193323Sed  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3835218893Sdim  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
3836210299Sed  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
3837276479Sdim  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
3838193323Sed  if (CondCode2 != ARMCC::AL) {
3839210299Sed    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
3840210299Sed    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
3841276479Sdim    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
3842193323Sed  }
3843193323Sed  return Res;
3844193323Sed}
3845193323Sed
3846207618SrdivackySDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
3847193323Sed  SDValue Chain = Op.getOperand(0);
3848193323Sed  SDValue Table = Op.getOperand(1);
3849193323Sed  SDValue Index = Op.getOperand(2);
3850261991Sdim  SDLoc dl(Op);
3851193323Sed
3852198090Srdivacky  EVT PTy = getPointerTy();
3853193323Sed  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
3854193323Sed  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3855198090Srdivacky  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
3856193323Sed  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
3857193323Sed  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
3858193323Sed  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
3859193323Sed  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
3860198090Srdivacky  if (Subtarget->isThumb2()) {
3861198090Srdivacky    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
3862198090Srdivacky    // which does another jump to the destination. This also makes it easier
3863198090Srdivacky    // to translate it to TBB / TBH later.
3864198090Srdivacky    // FIXME: This might not work if the function is extremely large.
3865198090Srdivacky    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
3866198090Srdivacky                       Addr, Op.getOperand(2), JTI, UId);
3867198090Srdivacky  }
3868198090Srdivacky  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
3869198892Srdivacky    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
3870218893Sdim                       MachinePointerInfo::getJumpTable(),
3871234353Sdim                       false, false, false, 0);
3872198090Srdivacky    Chain = Addr.getValue(1);
3873193323Sed    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
3874198090Srdivacky    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
3875198090Srdivacky  } else {
3876198892Srdivacky    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
3877234353Sdim                       MachinePointerInfo::getJumpTable(),
3878234353Sdim                       false, false, false, 0);
3879198090Srdivacky    Chain = Addr.getValue(1);
3880198090Srdivacky    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
3881198090Srdivacky  }
3882193323Sed}
3883193323Sed
3884234353Sdimstatic SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
3885234353Sdim  EVT VT = Op.getValueType();
3886261991Sdim  SDLoc dl(Op);
3887234353Sdim
3888234353Sdim  if (Op.getValueType().getVectorElementType() == MVT::i32) {
3889234353Sdim    if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
3890234353Sdim      return Op;
3891234353Sdim    return DAG.UnrollVectorOp(Op.getNode());
3892234353Sdim  }
3893234353Sdim
3894234353Sdim  assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
3895234353Sdim         "Invalid type for custom lowering!");
3896234353Sdim  if (VT != MVT::v4i16)
3897234353Sdim    return DAG.UnrollVectorOp(Op.getNode());
3898234353Sdim
3899234353Sdim  Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
3900234353Sdim  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
3901234353Sdim}
3902234353Sdim
3903280031SdimSDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
3904234353Sdim  EVT VT = Op.getValueType();
3905234353Sdim  if (VT.isVector())
3906234353Sdim    return LowerVectorFP_TO_INT(Op, DAG);
3907234353Sdim
3908280031Sdim  if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
3909280031Sdim    RTLIB::Libcall LC;
3910280031Sdim    if (Op.getOpcode() == ISD::FP_TO_SINT)
3911280031Sdim      LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
3912280031Sdim                              Op.getValueType());
3913280031Sdim    else
3914280031Sdim      LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
3915280031Sdim                              Op.getValueType());
3916280031Sdim    return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
3917280031Sdim                       /*isSigned*/ false, SDLoc(Op)).first;
3918280031Sdim  }
3919280031Sdim
3920261991Sdim  SDLoc dl(Op);
3921205218Srdivacky  unsigned Opc;
3922205218Srdivacky
3923205218Srdivacky  switch (Op.getOpcode()) {
3924234353Sdim  default: llvm_unreachable("Invalid opcode!");
3925205218Srdivacky  case ISD::FP_TO_SINT:
3926205218Srdivacky    Opc = ARMISD::FTOSI;
3927205218Srdivacky    break;
3928205218Srdivacky  case ISD::FP_TO_UINT:
3929205218Srdivacky    Opc = ARMISD::FTOUI;
3930205218Srdivacky    break;
3931205218Srdivacky  }
3932193323Sed  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
3933218893Sdim  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
3934193323Sed}
3935193323Sed
3936221345Sdimstatic SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
3937221345Sdim  EVT VT = Op.getValueType();
3938261991Sdim  SDLoc dl(Op);
3939221345Sdim
3940234353Sdim  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
3941234353Sdim    if (VT.getVectorElementType() == MVT::f32)
3942234353Sdim      return Op;
3943234353Sdim    return DAG.UnrollVectorOp(Op.getNode());
3944234353Sdim  }
3945234353Sdim
3946226633Sdim  assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
3947226633Sdim         "Invalid type for custom lowering!");
3948221345Sdim  if (VT != MVT::v4f32)
3949221345Sdim    return DAG.UnrollVectorOp(Op.getNode());
3950221345Sdim
3951221345Sdim  unsigned CastOpc;
3952221345Sdim  unsigned Opc;
3953221345Sdim  switch (Op.getOpcode()) {
3954234353Sdim  default: llvm_unreachable("Invalid opcode!");
3955221345Sdim  case ISD::SINT_TO_FP:
3956221345Sdim    CastOpc = ISD::SIGN_EXTEND;
3957221345Sdim    Opc = ISD::SINT_TO_FP;
3958221345Sdim    break;
3959221345Sdim  case ISD::UINT_TO_FP:
3960221345Sdim    CastOpc = ISD::ZERO_EXTEND;
3961221345Sdim    Opc = ISD::UINT_TO_FP;
3962221345Sdim    break;
3963221345Sdim  }
3964221345Sdim
3965221345Sdim  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
3966221345Sdim  return DAG.getNode(Opc, dl, VT, Op);
3967221345Sdim}
3968221345Sdim
3969280031SdimSDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
3970198090Srdivacky  EVT VT = Op.getValueType();
3971221345Sdim  if (VT.isVector())
3972221345Sdim    return LowerVectorINT_TO_FP(Op, DAG);
3973221345Sdim
3974280031Sdim  if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
3975280031Sdim    RTLIB::Libcall LC;
3976280031Sdim    if (Op.getOpcode() == ISD::SINT_TO_FP)
3977280031Sdim      LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
3978280031Sdim                              Op.getValueType());
3979280031Sdim    else
3980280031Sdim      LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
3981280031Sdim                              Op.getValueType());
3982280031Sdim    return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
3983280031Sdim                       /*isSigned*/ false, SDLoc(Op)).first;
3984280031Sdim  }
3985280031Sdim
3986261991Sdim  SDLoc dl(Op);
3987205218Srdivacky  unsigned Opc;
3988193323Sed
3989205218Srdivacky  switch (Op.getOpcode()) {
3990234353Sdim  default: llvm_unreachable("Invalid opcode!");
3991205218Srdivacky  case ISD::SINT_TO_FP:
3992205218Srdivacky    Opc = ARMISD::SITOF;
3993205218Srdivacky    break;
3994205218Srdivacky  case ISD::UINT_TO_FP:
3995205218Srdivacky    Opc = ARMISD::UITOF;
3996205218Srdivacky    break;
3997205218Srdivacky  }
3998205218Srdivacky
3999218893Sdim  Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
4000193323Sed  return DAG.getNode(Opc, dl, VT, Op);
4001193323Sed}
4002193323Sed
4003210299SedSDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
4004193323Sed  // Implement fcopysign with a fabs and a conditional fneg.
4005193323Sed  SDValue Tmp0 = Op.getOperand(0);
4006193323Sed  SDValue Tmp1 = Op.getOperand(1);
4007261991Sdim  SDLoc dl(Op);
4008198090Srdivacky  EVT VT = Op.getValueType();
4009198090Srdivacky  EVT SrcVT = Tmp1.getValueType();
4010219077Sdim  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4011219077Sdim    Tmp0.getOpcode() == ARMISD::VMOVDRR;
4012219077Sdim  bool UseNEON = !InGPR && Subtarget->hasNEON();
4013218893Sdim
4014219077Sdim  if (UseNEON) {
4015219077Sdim    // Use VBSL to copy the sign bit.
4016219077Sdim    unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4017219077Sdim    SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
4018219077Sdim                               DAG.getTargetConstant(EncodedVal, MVT::i32));
4019219077Sdim    EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4020219077Sdim    if (VT == MVT::f64)
4021219077Sdim      Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4022219077Sdim                         DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4023219077Sdim                         DAG.getConstant(32, MVT::i32));
4024219077Sdim    else /*if (VT == MVT::f32)*/
4025219077Sdim      Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4026219077Sdim    if (SrcVT == MVT::f32) {
4027219077Sdim      Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4028219077Sdim      if (VT == MVT::f64)
4029219077Sdim        Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4030219077Sdim                           DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4031219077Sdim                           DAG.getConstant(32, MVT::i32));
4032221345Sdim    } else if (VT == MVT::f32)
4033221345Sdim      Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4034221345Sdim                         DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4035221345Sdim                         DAG.getConstant(32, MVT::i32));
4036219077Sdim    Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4037219077Sdim    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4038219077Sdim
4039219077Sdim    SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4040219077Sdim                                            MVT::i32);
4041219077Sdim    AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4042219077Sdim    SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4043219077Sdim                                  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4044221345Sdim
4045219077Sdim    SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4046219077Sdim                              DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4047219077Sdim                              DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4048221345Sdim    if (VT == MVT::f32) {
4049219077Sdim      Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4050219077Sdim      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4051219077Sdim                        DAG.getConstant(0, MVT::i32));
4052219077Sdim    } else {
4053219077Sdim      Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4054219077Sdim    }
4055219077Sdim
4056219077Sdim    return Res;
4057219077Sdim  }
4058219077Sdim
4059218893Sdim  // Bitcast operand 1 to i32.
4060218893Sdim  if (SrcVT == MVT::f64)
4061218893Sdim    Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4062276479Sdim                       Tmp1).getValue(1);
4063218893Sdim  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4064218893Sdim
4065219077Sdim  // Or in the signbit with integer operations.
4066219077Sdim  SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
4067219077Sdim  SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
4068219077Sdim  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4069219077Sdim  if (VT == MVT::f32) {
4070219077Sdim    Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
4071219077Sdim                       DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
4072219077Sdim    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4073219077Sdim                       DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
4074218893Sdim  }
4075218893Sdim
4076219077Sdim  // f64: Or the high part with signbit and then combine two parts.
4077219077Sdim  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4078276479Sdim                     Tmp0);
4079219077Sdim  SDValue Lo = Tmp0.getValue(0);
4080219077Sdim  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
4081219077Sdim  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
4082219077Sdim  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
4083193323Sed}
4084193323Sed
4085208599SrdivackySDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
4086208599Srdivacky  MachineFunction &MF = DAG.getMachineFunction();
4087208599Srdivacky  MachineFrameInfo *MFI = MF.getFrameInfo();
4088208599Srdivacky  MFI->setReturnAddressIsTaken(true);
4089208599Srdivacky
4090276479Sdim  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
4091276479Sdim    return SDValue();
4092276479Sdim
4093208599Srdivacky  EVT VT = Op.getValueType();
4094261991Sdim  SDLoc dl(Op);
4095208599Srdivacky  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4096208599Srdivacky  if (Depth) {
4097208599Srdivacky    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4098208599Srdivacky    SDValue Offset = DAG.getConstant(4, MVT::i32);
4099208599Srdivacky    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
4100208599Srdivacky                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
4101234353Sdim                       MachinePointerInfo(), false, false, false, 0);
4102208599Srdivacky  }
4103208599Srdivacky
4104208599Srdivacky  // Return LR, which contains the return address. Mark it an implicit live-in.
4105219077Sdim  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4106208599Srdivacky  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
4107208599Srdivacky}
4108208599Srdivacky
4109207618SrdivackySDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
4110276479Sdim  const ARMBaseRegisterInfo &ARI =
4111276479Sdim    *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
4112276479Sdim  MachineFunction &MF = DAG.getMachineFunction();
4113276479Sdim  MachineFrameInfo *MFI = MF.getFrameInfo();
4114193323Sed  MFI->setFrameAddressIsTaken(true);
4115208599Srdivacky
4116198090Srdivacky  EVT VT = Op.getValueType();
4117261991Sdim  SDLoc dl(Op);  // FIXME probably not meaningful
4118193323Sed  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4119276479Sdim  unsigned FrameReg = ARI.getFrameRegister(MF);
4120193323Sed  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
4121193323Sed  while (Depth--)
4122218893Sdim    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
4123218893Sdim                            MachinePointerInfo(),
4124234353Sdim                            false, false, false, 0);
4125193323Sed  return FrameAddr;
4126193323Sed}
4127193323Sed
4128276479Sdim// FIXME? Maybe this could be a TableGen attribute on some registers and
4129276479Sdim// this table could be generated automatically from RegInfo.
4130276479Sdimunsigned ARMTargetLowering::getRegisterByName(const char* RegName,
4131276479Sdim                                              EVT VT) const {
4132276479Sdim  unsigned Reg = StringSwitch<unsigned>(RegName)
4133276479Sdim                       .Case("sp", ARM::SP)
4134276479Sdim                       .Default(0);
4135276479Sdim  if (Reg)
4136276479Sdim    return Reg;
4137276479Sdim  report_fatal_error("Invalid register name global variable");
4138276479Sdim}
4139276479Sdim
4140218893Sdim/// ExpandBITCAST - If the target supports VFP, this function is called to
4141207618Srdivacky/// expand a bit convert where either the source or destination type is i64 to
4142207618Srdivacky/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
4143207618Srdivacky/// operand type is illegal (e.g., v2f32 for a target that doesn't support
4144207618Srdivacky/// vectors), since the legalizer won't know what to do with that.
4145218893Sdimstatic SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
4146207618Srdivacky  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4147261991Sdim  SDLoc dl(N);
4148193323Sed  SDValue Op = N->getOperand(0);
4149207618Srdivacky
4150207618Srdivacky  // This function is only supposed to be called for i64 types, either as the
4151207618Srdivacky  // source or destination of the bit convert.
4152207618Srdivacky  EVT SrcVT = Op.getValueType();
4153207618Srdivacky  EVT DstVT = N->getValueType(0);
4154207618Srdivacky  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
4155218893Sdim         "ExpandBITCAST called for non-i64 type");
4156207618Srdivacky
4157207618Srdivacky  // Turn i64->f64 into VMOVDRR.
4158207618Srdivacky  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
4159193323Sed    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4160193323Sed                             DAG.getConstant(0, MVT::i32));
4161193323Sed    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4162193323Sed                             DAG.getConstant(1, MVT::i32));
4163218893Sdim    return DAG.getNode(ISD::BITCAST, dl, DstVT,
4164210299Sed                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
4165193323Sed  }
4166193323Sed
4167199481Srdivacky  // Turn f64->i64 into VMOVRRD.
4168207618Srdivacky  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
4169276479Sdim    SDValue Cvt;
4170276479Sdim    if (TLI.isBigEndian() && SrcVT.isVector() &&
4171276479Sdim        SrcVT.getVectorNumElements() > 1)
4172276479Sdim      Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4173276479Sdim                        DAG.getVTList(MVT::i32, MVT::i32),
4174276479Sdim                        DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
4175276479Sdim    else
4176276479Sdim      Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4177276479Sdim                        DAG.getVTList(MVT::i32, MVT::i32), Op);
4178207618Srdivacky    // Merge the pieces into a single i64 value.
4179207618Srdivacky    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
4180207618Srdivacky  }
4181193323Sed
4182207618Srdivacky  return SDValue();
4183193323Sed}
4184193323Sed
4185194710Sed/// getZeroVector - Returns a vector of specified type with all zero elements.
4186210299Sed/// Zero vectors are used to represent vector negation and in those cases
4187210299Sed/// will be implemented with the NEON VNEG instruction.  However, VNEG does
4188210299Sed/// not support i64 elements, so sometimes the zero vectors will need to be
4189210299Sed/// explicitly constructed.  Regardless, use a canonical VMOV to create the
4190210299Sed/// zero vector.
4191261991Sdimstatic SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
4192194710Sed  assert(VT.isVector() && "Expected a vector type");
4193210299Sed  // The canonical modified immediate encoding of a zero vector is....0!
4194210299Sed  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
4195210299Sed  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
4196210299Sed  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
4197218893Sdim  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4198194710Sed}
4199194710Sed
4200198892Srdivacky/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4201198892Srdivacky/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4202207618SrdivackySDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
4203207618Srdivacky                                                SelectionDAG &DAG) const {
4204198892Srdivacky  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4205198892Srdivacky  EVT VT = Op.getValueType();
4206198892Srdivacky  unsigned VTBits = VT.getSizeInBits();
4207261991Sdim  SDLoc dl(Op);
4208198892Srdivacky  SDValue ShOpLo = Op.getOperand(0);
4209198892Srdivacky  SDValue ShOpHi = Op.getOperand(1);
4210198892Srdivacky  SDValue ShAmt  = Op.getOperand(2);
4211210299Sed  SDValue ARMcc;
4212198892Srdivacky  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4213198892Srdivacky
4214198892Srdivacky  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
4215198892Srdivacky
4216198892Srdivacky  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4217198892Srdivacky                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
4218198892Srdivacky  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4219198892Srdivacky  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4220198892Srdivacky                                   DAG.getConstant(VTBits, MVT::i32));
4221198892Srdivacky  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4222198892Srdivacky  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4223198892Srdivacky  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4224198892Srdivacky
4225198892Srdivacky  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4226198892Srdivacky  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
4227210299Sed                          ARMcc, DAG, dl);
4228198892Srdivacky  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4229210299Sed  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
4230198892Srdivacky                           CCR, Cmp);
4231198892Srdivacky
4232198892Srdivacky  SDValue Ops[2] = { Lo, Hi };
4233276479Sdim  return DAG.getMergeValues(Ops, dl);
4234198892Srdivacky}
4235198892Srdivacky
4236198892Srdivacky/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4237198892Srdivacky/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4238207618SrdivackySDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
4239207618Srdivacky                                               SelectionDAG &DAG) const {
4240198892Srdivacky  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4241198892Srdivacky  EVT VT = Op.getValueType();
4242198892Srdivacky  unsigned VTBits = VT.getSizeInBits();
4243261991Sdim  SDLoc dl(Op);
4244198892Srdivacky  SDValue ShOpLo = Op.getOperand(0);
4245198892Srdivacky  SDValue ShOpHi = Op.getOperand(1);
4246198892Srdivacky  SDValue ShAmt  = Op.getOperand(2);
4247210299Sed  SDValue ARMcc;
4248198892Srdivacky
4249198892Srdivacky  assert(Op.getOpcode() == ISD::SHL_PARTS);
4250198892Srdivacky  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4251198892Srdivacky                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
4252198892Srdivacky  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4253198892Srdivacky  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4254198892Srdivacky                                   DAG.getConstant(VTBits, MVT::i32));
4255198892Srdivacky  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4256198892Srdivacky  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4257198892Srdivacky
4258198892Srdivacky  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4259198892Srdivacky  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4260198892Srdivacky  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
4261210299Sed                          ARMcc, DAG, dl);
4262198892Srdivacky  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4263210299Sed  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
4264198892Srdivacky                           CCR, Cmp);
4265198892Srdivacky
4266198892Srdivacky  SDValue Ops[2] = { Lo, Hi };
4267276479Sdim  return DAG.getMergeValues(Ops, dl);
4268198892Srdivacky}
4269198892Srdivacky
4270218893SdimSDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4271212904Sdim                                            SelectionDAG &DAG) const {
4272212904Sdim  // The rounding mode is in bits 23:22 of the FPSCR.
4273212904Sdim  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
4274212904Sdim  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
4275212904Sdim  // so that the shift + and get folded into a bitfield extract.
4276261991Sdim  SDLoc dl(Op);
4277212904Sdim  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
4278212904Sdim                              DAG.getConstant(Intrinsic::arm_get_fpscr,
4279212904Sdim                                              MVT::i32));
4280218893Sdim  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
4281212904Sdim                                  DAG.getConstant(1U << 22, MVT::i32));
4282212904Sdim  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
4283212904Sdim                              DAG.getConstant(22, MVT::i32));
4284218893Sdim  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
4285212904Sdim                     DAG.getConstant(3, MVT::i32));
4286212904Sdim}
4287212904Sdim
4288202878Srdivackystatic SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
4289202878Srdivacky                         const ARMSubtarget *ST) {
4290202878Srdivacky  EVT VT = N->getValueType(0);
4291261991Sdim  SDLoc dl(N);
4292202878Srdivacky
4293202878Srdivacky  if (!ST->hasV6T2Ops())
4294202878Srdivacky    return SDValue();
4295202878Srdivacky
4296202878Srdivacky  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
4297202878Srdivacky  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
4298202878Srdivacky}
4299202878Srdivacky
4300249423Sdim/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
4301249423Sdim/// for each 16-bit element from operand, repeated.  The basic idea is to
4302249423Sdim/// leverage vcnt to get the 8-bit counts, gather and add the results.
4303249423Sdim///
4304249423Sdim/// Trace for v4i16:
4305249423Sdim/// input    = [v0    v1    v2    v3   ] (vi 16-bit element)
4306249423Sdim/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
4307249423Sdim/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
4308249423Sdim/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
4309249423Sdim///            [b0 b1 b2 b3 b4 b5 b6 b7]
4310249423Sdim///           +[b1 b0 b3 b2 b5 b4 b7 b6]
4311249423Sdim/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
4312249423Sdim/// vuzp:    = [k0 k1 k2 k3 k0 k1 k2 k3]  each ki is 8-bits)
4313249423Sdimstatic SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
4314249423Sdim  EVT VT = N->getValueType(0);
4315261991Sdim  SDLoc DL(N);
4316249423Sdim
4317249423Sdim  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
4318249423Sdim  SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
4319249423Sdim  SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
4320249423Sdim  SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
4321249423Sdim  SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
4322249423Sdim  return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
4323249423Sdim}
4324249423Sdim
4325249423Sdim/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
4326249423Sdim/// bit-count for each 16-bit element from the operand.  We need slightly
4327249423Sdim/// different sequencing for v4i16 and v8i16 to stay within NEON's available
4328249423Sdim/// 64/128-bit registers.
4329249423Sdim///
4330249423Sdim/// Trace for v4i16:
4331249423Sdim/// input           = [v0    v1    v2    v3    ] (vi 16-bit element)
4332249423Sdim/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
4333249423Sdim/// v8i16:Extended  = [k0    k1    k2    k3    k0    k1    k2    k3    ]
4334249423Sdim/// v4i16:Extracted = [k0    k1    k2    k3    ]
4335249423Sdimstatic SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
4336249423Sdim  EVT VT = N->getValueType(0);
4337261991Sdim  SDLoc DL(N);
4338249423Sdim
4339249423Sdim  SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
4340249423Sdim  if (VT.is64BitVector()) {
4341249423Sdim    SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
4342249423Sdim    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
4343249423Sdim                       DAG.getIntPtrConstant(0));
4344249423Sdim  } else {
4345249423Sdim    SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
4346249423Sdim                                    BitCounts, DAG.getIntPtrConstant(0));
4347249423Sdim    return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
4348249423Sdim  }
4349249423Sdim}
4350249423Sdim
4351249423Sdim/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
4352249423Sdim/// bit-count for each 32-bit element from the operand.  The idea here is
4353249423Sdim/// to split the vector into 16-bit elements, leverage the 16-bit count
4354249423Sdim/// routine, and then combine the results.
4355249423Sdim///
4356249423Sdim/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
4357249423Sdim/// input    = [v0    v1    ] (vi: 32-bit elements)
4358249423Sdim/// Bitcast  = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
4359249423Sdim/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
4360249423Sdim/// vrev: N0 = [k1 k0 k3 k2 ]
4361249423Sdim///            [k0 k1 k2 k3 ]
4362249423Sdim///       N1 =+[k1 k0 k3 k2 ]
4363249423Sdim///            [k0 k2 k1 k3 ]
4364249423Sdim///       N2 =+[k1 k3 k0 k2 ]
4365249423Sdim///            [k0    k2    k1    k3    ]
4366249423Sdim/// Extended =+[k1    k3    k0    k2    ]
4367249423Sdim///            [k0    k2    ]
4368249423Sdim/// Extracted=+[k1    k3    ]
4369249423Sdim///
4370249423Sdimstatic SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
4371249423Sdim  EVT VT = N->getValueType(0);
4372261991Sdim  SDLoc DL(N);
4373249423Sdim
4374249423Sdim  EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
4375249423Sdim
4376249423Sdim  SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
4377249423Sdim  SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
4378249423Sdim  SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
4379249423Sdim  SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
4380249423Sdim  SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
4381249423Sdim
4382249423Sdim  if (VT.is64BitVector()) {
4383249423Sdim    SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
4384249423Sdim    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
4385249423Sdim                       DAG.getIntPtrConstant(0));
4386249423Sdim  } else {
4387249423Sdim    SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
4388249423Sdim                                    DAG.getIntPtrConstant(0));
4389249423Sdim    return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
4390249423Sdim  }
4391249423Sdim}
4392249423Sdim
4393249423Sdimstatic SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
4394249423Sdim                          const ARMSubtarget *ST) {
4395249423Sdim  EVT VT = N->getValueType(0);
4396249423Sdim
4397249423Sdim  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
4398249423Sdim  assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
4399249423Sdim          VT == MVT::v4i16 || VT == MVT::v8i16) &&
4400249423Sdim         "Unexpected type for custom ctpop lowering");
4401249423Sdim
4402249423Sdim  if (VT.getVectorElementType() == MVT::i32)
4403249423Sdim    return lowerCTPOP32BitElements(N, DAG);
4404249423Sdim  else
4405249423Sdim    return lowerCTPOP16BitElements(N, DAG);
4406249423Sdim}
4407249423Sdim
4408194710Sedstatic SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
4409194710Sed                          const ARMSubtarget *ST) {
4410198090Srdivacky  EVT VT = N->getValueType(0);
4411261991Sdim  SDLoc dl(N);
4412194710Sed
4413218893Sdim  if (!VT.isVector())
4414218893Sdim    return SDValue();
4415218893Sdim
4416194710Sed  // Lower vector shifts on NEON to use VSHL.
4417218893Sdim  assert(ST->hasNEON() && "unexpected vector shift");
4418194710Sed
4419218893Sdim  // Left shifts translate directly to the vshiftu intrinsic.
4420218893Sdim  if (N->getOpcode() == ISD::SHL)
4421218893Sdim    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
4422218893Sdim                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
4423218893Sdim                       N->getOperand(0), N->getOperand(1));
4424194710Sed
4425218893Sdim  assert((N->getOpcode() == ISD::SRA ||
4426218893Sdim          N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
4427194710Sed
4428218893Sdim  // NEON uses the same intrinsics for both left and right shifts.  For
4429218893Sdim  // right shifts, the shift amounts are negative, so negate the vector of
4430218893Sdim  // shift amounts.
4431218893Sdim  EVT ShiftVT = N->getOperand(1).getValueType();
4432218893Sdim  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
4433218893Sdim                                     getZeroVector(ShiftVT, DAG, dl),
4434218893Sdim                                     N->getOperand(1));
4435218893Sdim  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
4436218893Sdim                             Intrinsic::arm_neon_vshifts :
4437218893Sdim                             Intrinsic::arm_neon_vshiftu);
4438218893Sdim  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
4439218893Sdim                     DAG.getConstant(vshiftInt, MVT::i32),
4440218893Sdim                     N->getOperand(0), NegatedCount);
4441218893Sdim}
4442194710Sed
4443218893Sdimstatic SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
4444218893Sdim                                const ARMSubtarget *ST) {
4445218893Sdim  EVT VT = N->getValueType(0);
4446261991Sdim  SDLoc dl(N);
4447218893Sdim
4448198090Srdivacky  // We can get here for a node like i32 = ISD::SHL i32, i64
4449198090Srdivacky  if (VT != MVT::i64)
4450198090Srdivacky    return SDValue();
4451198090Srdivacky
4452198090Srdivacky  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
4453193323Sed         "Unknown shift to lower!");
4454193323Sed
4455193323Sed  // We only lower SRA, SRL of 1 here, all others use generic lowering.
4456193323Sed  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
4457193323Sed      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
4458193323Sed    return SDValue();
4459193323Sed
4460193323Sed  // If we are in thumb mode, we don't have RRX.
4461198090Srdivacky  if (ST->isThumb1Only()) return SDValue();
4462193323Sed
4463193323Sed  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
4464193323Sed  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
4465208599Srdivacky                           DAG.getConstant(0, MVT::i32));
4466193323Sed  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
4467208599Srdivacky                           DAG.getConstant(1, MVT::i32));
4468193323Sed
4469193323Sed  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
4470193323Sed  // captures the result into a carry flag.
4471193323Sed  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
4472276479Sdim  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
4473193323Sed
4474193323Sed  // The low part is an ARMISD::RRX operand, which shifts the carry in.
4475193323Sed  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
4476193323Sed
4477193323Sed  // Merge the pieces into a single i64 value.
4478193323Sed return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
4479193323Sed}
4480193323Sed
4481194710Sedstatic SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
4482194710Sed  SDValue TmpOp0, TmpOp1;
4483194710Sed  bool Invert = false;
4484194710Sed  bool Swap = false;
4485194710Sed  unsigned Opc = 0;
4486194710Sed
4487194710Sed  SDValue Op0 = Op.getOperand(0);
4488194710Sed  SDValue Op1 = Op.getOperand(1);
4489194710Sed  SDValue CC = Op.getOperand(2);
4490280031Sdim  EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
4491198090Srdivacky  EVT VT = Op.getValueType();
4492194710Sed  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4493261991Sdim  SDLoc dl(Op);
4494194710Sed
4495280031Sdim  if (Op1.getValueType().isFloatingPoint()) {
4496194710Sed    switch (SetCCOpcode) {
4497234353Sdim    default: llvm_unreachable("Illegal FP comparison");
4498194710Sed    case ISD::SETUNE:
4499194710Sed    case ISD::SETNE:  Invert = true; // Fallthrough
4500194710Sed    case ISD::SETOEQ:
4501194710Sed    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
4502194710Sed    case ISD::SETOLT:
4503194710Sed    case ISD::SETLT: Swap = true; // Fallthrough
4504194710Sed    case ISD::SETOGT:
4505194710Sed    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
4506194710Sed    case ISD::SETOLE:
4507194710Sed    case ISD::SETLE:  Swap = true; // Fallthrough
4508194710Sed    case ISD::SETOGE:
4509194710Sed    case ISD::SETGE: Opc = ARMISD::VCGE; break;
4510194710Sed    case ISD::SETUGE: Swap = true; // Fallthrough
4511194710Sed    case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
4512194710Sed    case ISD::SETUGT: Swap = true; // Fallthrough
4513194710Sed    case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
4514194710Sed    case ISD::SETUEQ: Invert = true; // Fallthrough
4515194710Sed    case ISD::SETONE:
4516194710Sed      // Expand this to (OLT | OGT).
4517194710Sed      TmpOp0 = Op0;
4518194710Sed      TmpOp1 = Op1;
4519194710Sed      Opc = ISD::OR;
4520280031Sdim      Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
4521280031Sdim      Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
4522194710Sed      break;
4523194710Sed    case ISD::SETUO: Invert = true; // Fallthrough
4524194710Sed    case ISD::SETO:
4525194710Sed      // Expand this to (OLT | OGE).
4526194710Sed      TmpOp0 = Op0;
4527194710Sed      TmpOp1 = Op1;
4528194710Sed      Opc = ISD::OR;
4529280031Sdim      Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
4530280031Sdim      Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
4531194710Sed      break;
4532194710Sed    }
4533194710Sed  } else {
4534194710Sed    // Integer comparisons.
4535194710Sed    switch (SetCCOpcode) {
4536234353Sdim    default: llvm_unreachable("Illegal integer comparison");
4537194710Sed    case ISD::SETNE:  Invert = true;
4538194710Sed    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
4539194710Sed    case ISD::SETLT:  Swap = true;
4540194710Sed    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
4541194710Sed    case ISD::SETLE:  Swap = true;
4542194710Sed    case ISD::SETGE:  Opc = ARMISD::VCGE; break;
4543194710Sed    case ISD::SETULT: Swap = true;
4544194710Sed    case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
4545194710Sed    case ISD::SETULE: Swap = true;
4546194710Sed    case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
4547194710Sed    }
4548194710Sed
4549198090Srdivacky    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
4550194710Sed    if (Opc == ARMISD::VCEQ) {
4551194710Sed
4552194710Sed      SDValue AndOp;
4553194710Sed      if (ISD::isBuildVectorAllZeros(Op1.getNode()))
4554194710Sed        AndOp = Op0;
4555194710Sed      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
4556194710Sed        AndOp = Op1;
4557194710Sed
4558194710Sed      // Ignore bitconvert.
4559218893Sdim      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
4560194710Sed        AndOp = AndOp.getOperand(0);
4561194710Sed
4562194710Sed      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
4563194710Sed        Opc = ARMISD::VTST;
4564280031Sdim        Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
4565280031Sdim        Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
4566194710Sed        Invert = !Invert;
4567194710Sed      }
4568194710Sed    }
4569194710Sed  }
4570194710Sed
4571194710Sed  if (Swap)
4572194710Sed    std::swap(Op0, Op1);
4573194710Sed
4574218893Sdim  // If one of the operands is a constant vector zero, attempt to fold the
4575218893Sdim  // comparison to a specialized compare-against-zero form.
4576218893Sdim  SDValue SingleOp;
4577218893Sdim  if (ISD::isBuildVectorAllZeros(Op1.getNode()))
4578218893Sdim    SingleOp = Op0;
4579218893Sdim  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
4580218893Sdim    if (Opc == ARMISD::VCGE)
4581218893Sdim      Opc = ARMISD::VCLEZ;
4582218893Sdim    else if (Opc == ARMISD::VCGT)
4583218893Sdim      Opc = ARMISD::VCLTZ;
4584218893Sdim    SingleOp = Op1;
4585218893Sdim  }
4586194710Sed
4587218893Sdim  SDValue Result;
4588218893Sdim  if (SingleOp.getNode()) {
4589218893Sdim    switch (Opc) {
4590218893Sdim    case ARMISD::VCEQ:
4591280031Sdim      Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
4592218893Sdim    case ARMISD::VCGE:
4593280031Sdim      Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
4594218893Sdim    case ARMISD::VCLEZ:
4595280031Sdim      Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
4596218893Sdim    case ARMISD::VCGT:
4597280031Sdim      Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
4598218893Sdim    case ARMISD::VCLTZ:
4599280031Sdim      Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
4600218893Sdim    default:
4601280031Sdim      Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
4602218893Sdim    }
4603218893Sdim  } else {
4604280031Sdim     Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
4605218893Sdim  }
4606218893Sdim
4607280031Sdim  Result = DAG.getSExtOrTrunc(Result, dl, VT);
4608280031Sdim
4609194710Sed  if (Invert)
4610194710Sed    Result = DAG.getNOT(dl, Result, VT);
4611194710Sed
4612194710Sed  return Result;
4613194710Sed}
4614194710Sed
4615210299Sed/// isNEONModifiedImm - Check if the specified splat value corresponds to a
4616210299Sed/// valid vector constant for a NEON instruction with a "modified immediate"
4617210299Sed/// operand (e.g., VMOV).  If so, return the encoded value.
4618210299Sedstatic SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
4619210299Sed                                 unsigned SplatBitSize, SelectionDAG &DAG,
4620218893Sdim                                 EVT &VT, bool is128Bits, NEONModImmType type) {
4621210299Sed  unsigned OpCmode, Imm;
4622210299Sed
4623210299Sed  // SplatBitSize is set to the smallest size that splats the vector, so a
4624210299Sed  // zero vector will always have SplatBitSize == 8.  However, NEON modified
4625210299Sed  // immediate instructions others than VMOV do not support the 8-bit encoding
4626210299Sed  // of a zero vector, and the default encoding of zero is supposed to be the
4627210299Sed  // 32-bit version.
4628210299Sed  if (SplatBits == 0)
4629210299Sed    SplatBitSize = 32;
4630210299Sed
4631194710Sed  switch (SplatBitSize) {
4632194710Sed  case 8:
4633218893Sdim    if (type != VMOVModImm)
4634210299Sed      return SDValue();
4635210299Sed    // Any 1-byte value is OK.  Op=0, Cmode=1110.
4636194710Sed    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
4637210299Sed    OpCmode = 0xe;
4638210299Sed    Imm = SplatBits;
4639210299Sed    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
4640210299Sed    break;
4641194710Sed
4642194710Sed  case 16:
4643194710Sed    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
4644210299Sed    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
4645210299Sed    if ((SplatBits & ~0xff) == 0) {
4646210299Sed      // Value = 0x00nn: Op=x, Cmode=100x.
4647210299Sed      OpCmode = 0x8;
4648210299Sed      Imm = SplatBits;
4649210299Sed      break;
4650210299Sed    }
4651210299Sed    if ((SplatBits & ~0xff00) == 0) {
4652210299Sed      // Value = 0xnn00: Op=x, Cmode=101x.
4653210299Sed      OpCmode = 0xa;
4654210299Sed      Imm = SplatBits >> 8;
4655210299Sed      break;
4656210299Sed    }
4657210299Sed    return SDValue();
4658194710Sed
4659194710Sed  case 32:
4660194710Sed    // NEON's 32-bit VMOV supports splat values where:
4661194710Sed    // * only one byte is nonzero, or
4662194710Sed    // * the least significant byte is 0xff and the second byte is nonzero, or
4663194710Sed    // * the least significant 2 bytes are 0xff and the third is nonzero.
4664210299Sed    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
4665210299Sed    if ((SplatBits & ~0xff) == 0) {
4666210299Sed      // Value = 0x000000nn: Op=x, Cmode=000x.
4667210299Sed      OpCmode = 0;
4668210299Sed      Imm = SplatBits;
4669210299Sed      break;
4670210299Sed    }
4671210299Sed    if ((SplatBits & ~0xff00) == 0) {
4672210299Sed      // Value = 0x0000nn00: Op=x, Cmode=001x.
4673210299Sed      OpCmode = 0x2;
4674210299Sed      Imm = SplatBits >> 8;
4675210299Sed      break;
4676210299Sed    }
4677210299Sed    if ((SplatBits & ~0xff0000) == 0) {
4678210299Sed      // Value = 0x00nn0000: Op=x, Cmode=010x.
4679210299Sed      OpCmode = 0x4;
4680210299Sed      Imm = SplatBits >> 16;
4681210299Sed      break;
4682210299Sed    }
4683210299Sed    if ((SplatBits & ~0xff000000) == 0) {
4684210299Sed      // Value = 0xnn000000: Op=x, Cmode=011x.
4685210299Sed      OpCmode = 0x6;
4686210299Sed      Imm = SplatBits >> 24;
4687210299Sed      break;
4688210299Sed    }
4689194710Sed
4690218893Sdim    // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
4691218893Sdim    if (type == OtherModImm) return SDValue();
4692218893Sdim
4693194710Sed    if ((SplatBits & ~0xffff) == 0 &&
4694210299Sed        ((SplatBits | SplatUndef) & 0xff) == 0xff) {
4695210299Sed      // Value = 0x0000nnff: Op=x, Cmode=1100.
4696210299Sed      OpCmode = 0xc;
4697210299Sed      Imm = SplatBits >> 8;
4698210299Sed      break;
4699210299Sed    }
4700194710Sed
4701194710Sed    if ((SplatBits & ~0xffffff) == 0 &&
4702210299Sed        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
4703210299Sed      // Value = 0x00nnffff: Op=x, Cmode=1101.
4704210299Sed      OpCmode = 0xd;
4705210299Sed      Imm = SplatBits >> 16;
4706210299Sed      break;
4707210299Sed    }
4708194710Sed
4709194710Sed    // Note: there are a few 32-bit splat values (specifically: 00ffff00,
4710194710Sed    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
4711194710Sed    // VMOV.I32.  A (very) minor optimization would be to replicate the value
4712194710Sed    // and fall through here to test for a valid 64-bit splat.  But, then the
4713194710Sed    // caller would also need to check and handle the change in size.
4714210299Sed    return SDValue();
4715194710Sed
4716194710Sed  case 64: {
4717218893Sdim    if (type != VMOVModImm)
4718210299Sed      return SDValue();
4719194710Sed    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
4720194710Sed    uint64_t BitMask = 0xff;
4721194710Sed    uint64_t Val = 0;
4722210299Sed    unsigned ImmMask = 1;
4723210299Sed    Imm = 0;
4724194710Sed    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
4725210299Sed      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
4726194710Sed        Val |= BitMask;
4727210299Sed        Imm |= ImmMask;
4728210299Sed      } else if ((SplatBits & BitMask) != 0) {
4729194710Sed        return SDValue();
4730210299Sed      }
4731194710Sed      BitMask <<= 8;
4732210299Sed      ImmMask <<= 1;
4733194710Sed    }
4734276479Sdim
4735276479Sdim    if (DAG.getTargetLoweringInfo().isBigEndian())
4736276479Sdim      // swap higher and lower 32 bit word
4737276479Sdim      Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
4738276479Sdim
4739210299Sed    // Op=1, Cmode=1110.
4740210299Sed    OpCmode = 0x1e;
4741210299Sed    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
4742210299Sed    break;
4743194710Sed  }
4744194710Sed
4745194710Sed  default:
4746210299Sed    llvm_unreachable("unexpected size for isNEONModifiedImm");
4747194710Sed  }
4748194710Sed
4749210299Sed  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
4750210299Sed  return DAG.getTargetConstant(EncodedVal, MVT::i32);
4751194710Sed}
4752194710Sed
4753234353SdimSDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
4754234353Sdim                                           const ARMSubtarget *ST) const {
4755261991Sdim  if (!ST->hasVFP3())
4756234353Sdim    return SDValue();
4757234353Sdim
4758261991Sdim  bool IsDouble = Op.getValueType() == MVT::f64;
4759234353Sdim  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
4760234353Sdim
4761280031Sdim  // Use the default (constant pool) lowering for double constants when we have
4762280031Sdim  // an SP-only FPU
4763280031Sdim  if (IsDouble && Subtarget->isFPOnlySP())
4764280031Sdim    return SDValue();
4765280031Sdim
4766234353Sdim  // Try splatting with a VMOV.f32...
4767234353Sdim  APFloat FPVal = CFP->getValueAPF();
4768261991Sdim  int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
4769261991Sdim
4770234353Sdim  if (ImmVal != -1) {
4771261991Sdim    if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
4772261991Sdim      // We have code in place to select a valid ConstantFP already, no need to
4773261991Sdim      // do any mangling.
4774261991Sdim      return Op;
4775261991Sdim    }
4776261991Sdim
4777261991Sdim    // It's a float and we are trying to use NEON operations where
4778261991Sdim    // possible. Lower it to a splat followed by an extract.
4779261991Sdim    SDLoc DL(Op);
4780234353Sdim    SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
4781234353Sdim    SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
4782234353Sdim                                      NewVal);
4783234353Sdim    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
4784234353Sdim                       DAG.getConstant(0, MVT::i32));
4785234353Sdim  }
4786234353Sdim
4787261991Sdim  // The rest of our options are NEON only, make sure that's allowed before
4788261991Sdim  // proceeding..
4789261991Sdim  if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
4790261991Sdim    return SDValue();
4791261991Sdim
4792234353Sdim  EVT VMovVT;
4793261991Sdim  uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
4794261991Sdim
4795261991Sdim  // It wouldn't really be worth bothering for doubles except for one very
4796261991Sdim  // important value, which does happen to match: 0.0. So make sure we don't do
4797261991Sdim  // anything stupid.
4798261991Sdim  if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
4799261991Sdim    return SDValue();
4800261991Sdim
4801261991Sdim  // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
4802261991Sdim  SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
4803261991Sdim                                     false, VMOVModImm);
4804234353Sdim  if (NewVal != SDValue()) {
4805261991Sdim    SDLoc DL(Op);
4806234353Sdim    SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
4807234353Sdim                                      NewVal);
4808261991Sdim    if (IsDouble)
4809261991Sdim      return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
4810261991Sdim
4811261991Sdim    // It's a float: cast and extract a vector element.
4812234353Sdim    SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
4813234353Sdim                                       VecConstant);
4814234353Sdim    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
4815234353Sdim                       DAG.getConstant(0, MVT::i32));
4816234353Sdim  }
4817234353Sdim
4818234353Sdim  // Finally, try a VMVN.i32
4819261991Sdim  NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
4820261991Sdim                             false, VMVNModImm);
4821234353Sdim  if (NewVal != SDValue()) {
4822261991Sdim    SDLoc DL(Op);
4823234353Sdim    SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
4824261991Sdim
4825261991Sdim    if (IsDouble)
4826261991Sdim      return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
4827261991Sdim
4828261991Sdim    // It's a float: cast and extract a vector element.
4829234353Sdim    SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
4830234353Sdim                                       VecConstant);
4831234353Sdim    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
4832234353Sdim                       DAG.getConstant(0, MVT::i32));
4833234353Sdim  }
4834234353Sdim
4835234353Sdim  return SDValue();
4836234353Sdim}
4837234353Sdim
4838243830Sdim// check if an VEXT instruction can handle the shuffle mask when the
4839243830Sdim// vector sources of the shuffle are the same.
4840243830Sdimstatic bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
4841243830Sdim  unsigned NumElts = VT.getVectorNumElements();
4842234353Sdim
4843243830Sdim  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
4844243830Sdim  if (M[0] < 0)
4845243830Sdim    return false;
4846243830Sdim
4847243830Sdim  Imm = M[0];
4848243830Sdim
4849243830Sdim  // If this is a VEXT shuffle, the immediate value is the index of the first
4850243830Sdim  // element.  The other shuffle indices must be the successive elements after
4851243830Sdim  // the first one.
4852243830Sdim  unsigned ExpectedElt = Imm;
4853243830Sdim  for (unsigned i = 1; i < NumElts; ++i) {
4854243830Sdim    // Increment the expected index.  If it wraps around, just follow it
4855243830Sdim    // back to index zero and keep going.
4856243830Sdim    ++ExpectedElt;
4857243830Sdim    if (ExpectedElt == NumElts)
4858243830Sdim      ExpectedElt = 0;
4859243830Sdim
4860243830Sdim    if (M[i] < 0) continue; // ignore UNDEF indices
4861243830Sdim    if (ExpectedElt != static_cast<unsigned>(M[i]))
4862243830Sdim      return false;
4863243830Sdim  }
4864243830Sdim
4865243830Sdim  return true;
4866243830Sdim}
4867243830Sdim
4868243830Sdim
4869234353Sdimstatic bool isVEXTMask(ArrayRef<int> M, EVT VT,
4870198090Srdivacky                       bool &ReverseVEXT, unsigned &Imm) {
4871198090Srdivacky  unsigned NumElts = VT.getVectorNumElements();
4872198090Srdivacky  ReverseVEXT = false;
4873212904Sdim
4874212904Sdim  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
4875212904Sdim  if (M[0] < 0)
4876212904Sdim    return false;
4877212904Sdim
4878198090Srdivacky  Imm = M[0];
4879198090Srdivacky
4880198090Srdivacky  // If this is a VEXT shuffle, the immediate value is the index of the first
4881198090Srdivacky  // element.  The other shuffle indices must be the successive elements after
4882198090Srdivacky  // the first one.
4883198090Srdivacky  unsigned ExpectedElt = Imm;
4884198090Srdivacky  for (unsigned i = 1; i < NumElts; ++i) {
4885198090Srdivacky    // Increment the expected index.  If it wraps around, it may still be
4886198090Srdivacky    // a VEXT but the source vectors must be swapped.
4887198090Srdivacky    ExpectedElt += 1;
4888198090Srdivacky    if (ExpectedElt == NumElts * 2) {
4889198090Srdivacky      ExpectedElt = 0;
4890198090Srdivacky      ReverseVEXT = true;
4891198090Srdivacky    }
4892198090Srdivacky
4893212904Sdim    if (M[i] < 0) continue; // ignore UNDEF indices
4894198090Srdivacky    if (ExpectedElt != static_cast<unsigned>(M[i]))
4895198090Srdivacky      return false;
4896198090Srdivacky  }
4897198090Srdivacky
4898198090Srdivacky  // Adjust the index value if the source operands will be swapped.
4899198090Srdivacky  if (ReverseVEXT)
4900198090Srdivacky    Imm -= NumElts;
4901198090Srdivacky
4902198090Srdivacky  return true;
4903198090Srdivacky}
4904198090Srdivacky
4905198090Srdivacky/// isVREVMask - Check if a vector shuffle corresponds to a VREV
4906198090Srdivacky/// instruction with the specified blocksize.  (The order of the elements
4907198090Srdivacky/// within each block of the vector is reversed.)
4908234353Sdimstatic bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
4909198090Srdivacky  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
4910198090Srdivacky         "Only possible block sizes for VREV are: 16, 32, 64");
4911198090Srdivacky
4912198396Srdivacky  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4913198396Srdivacky  if (EltSz == 64)
4914198396Srdivacky    return false;
4915198396Srdivacky
4916198090Srdivacky  unsigned NumElts = VT.getVectorNumElements();
4917198090Srdivacky  unsigned BlockElts = M[0] + 1;
4918212904Sdim  // If the first shuffle index is UNDEF, be optimistic.
4919212904Sdim  if (M[0] < 0)
4920212904Sdim    BlockElts = BlockSize / EltSz;
4921198090Srdivacky
4922198090Srdivacky  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
4923198090Srdivacky    return false;
4924198090Srdivacky
4925198090Srdivacky  for (unsigned i = 0; i < NumElts; ++i) {
4926212904Sdim    if (M[i] < 0) continue; // ignore UNDEF indices
4927212904Sdim    if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
4928198090Srdivacky      return false;
4929198090Srdivacky  }
4930198090Srdivacky
4931198090Srdivacky  return true;
4932198090Srdivacky}
4933198090Srdivacky
4934234353Sdimstatic bool isVTBLMask(ArrayRef<int> M, EVT VT) {
4935221345Sdim  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
4936221345Sdim  // range, then 0 is placed into the resulting vector. So pretty much any mask
4937221345Sdim  // of 8 elements can work here.
4938221345Sdim  return VT == MVT::v8i8 && M.size() == 8;
4939221345Sdim}
4940221345Sdim
4941234353Sdimstatic bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4942198396Srdivacky  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4943198396Srdivacky  if (EltSz == 64)
4944198396Srdivacky    return false;
4945198396Srdivacky
4946198090Srdivacky  unsigned NumElts = VT.getVectorNumElements();
4947198090Srdivacky  WhichResult = (M[0] == 0 ? 0 : 1);
4948198090Srdivacky  for (unsigned i = 0; i < NumElts; i += 2) {
4949212904Sdim    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
4950212904Sdim        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
4951198090Srdivacky      return false;
4952198090Srdivacky  }
4953198090Srdivacky  return true;
4954198090Srdivacky}
4955198090Srdivacky
4956200581Srdivacky/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
4957200581Srdivacky/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
4958200581Srdivacky/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
4959234353Sdimstatic bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
4960200581Srdivacky  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4961200581Srdivacky  if (EltSz == 64)
4962200581Srdivacky    return false;
4963200581Srdivacky
4964200581Srdivacky  unsigned NumElts = VT.getVectorNumElements();
4965200581Srdivacky  WhichResult = (M[0] == 0 ? 0 : 1);
4966200581Srdivacky  for (unsigned i = 0; i < NumElts; i += 2) {
4967212904Sdim    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
4968212904Sdim        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
4969200581Srdivacky      return false;
4970200581Srdivacky  }
4971200581Srdivacky  return true;
4972200581Srdivacky}
4973200581Srdivacky
4974234353Sdimstatic bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4975198396Srdivacky  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4976198396Srdivacky  if (EltSz == 64)
4977198396Srdivacky    return false;
4978198396Srdivacky
4979198090Srdivacky  unsigned NumElts = VT.getVectorNumElements();
4980198090Srdivacky  WhichResult = (M[0] == 0 ? 0 : 1);
4981198090Srdivacky  for (unsigned i = 0; i != NumElts; ++i) {
4982212904Sdim    if (M[i] < 0) continue; // ignore UNDEF indices
4983198090Srdivacky    if ((unsigned) M[i] != 2 * i + WhichResult)
4984198090Srdivacky      return false;
4985198090Srdivacky  }
4986198090Srdivacky
4987198090Srdivacky  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
4988198396Srdivacky  if (VT.is64BitVector() && EltSz == 32)
4989198090Srdivacky    return false;
4990198090Srdivacky
4991198090Srdivacky  return true;
4992198090Srdivacky}
4993198090Srdivacky
4994200581Srdivacky/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
4995200581Srdivacky/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
4996200581Srdivacky/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
4997234353Sdimstatic bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
4998200581Srdivacky  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4999200581Srdivacky  if (EltSz == 64)
5000200581Srdivacky    return false;
5001200581Srdivacky
5002200581Srdivacky  unsigned Half = VT.getVectorNumElements() / 2;
5003200581Srdivacky  WhichResult = (M[0] == 0 ? 0 : 1);
5004200581Srdivacky  for (unsigned j = 0; j != 2; ++j) {
5005200581Srdivacky    unsigned Idx = WhichResult;
5006200581Srdivacky    for (unsigned i = 0; i != Half; ++i) {
5007212904Sdim      int MIdx = M[i + j * Half];
5008212904Sdim      if (MIdx >= 0 && (unsigned) MIdx != Idx)
5009200581Srdivacky        return false;
5010200581Srdivacky      Idx += 2;
5011200581Srdivacky    }
5012200581Srdivacky  }
5013200581Srdivacky
5014200581Srdivacky  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5015200581Srdivacky  if (VT.is64BitVector() && EltSz == 32)
5016200581Srdivacky    return false;
5017200581Srdivacky
5018200581Srdivacky  return true;
5019200581Srdivacky}
5020200581Srdivacky
5021234353Sdimstatic bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5022198396Srdivacky  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
5023198396Srdivacky  if (EltSz == 64)
5024198396Srdivacky    return false;
5025198396Srdivacky
5026198090Srdivacky  unsigned NumElts = VT.getVectorNumElements();
5027198090Srdivacky  WhichResult = (M[0] == 0 ? 0 : 1);
5028198090Srdivacky  unsigned Idx = WhichResult * NumElts / 2;
5029198090Srdivacky  for (unsigned i = 0; i != NumElts; i += 2) {
5030212904Sdim    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
5031212904Sdim        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
5032198090Srdivacky      return false;
5033198090Srdivacky    Idx += 1;
5034198090Srdivacky  }
5035198090Srdivacky
5036198090Srdivacky  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5037198396Srdivacky  if (VT.is64BitVector() && EltSz == 32)
5038198090Srdivacky    return false;
5039198090Srdivacky
5040198090Srdivacky  return true;
5041198090Srdivacky}
5042198090Srdivacky
5043200581Srdivacky/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
5044200581Srdivacky/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5045200581Srdivacky/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5046234353Sdimstatic bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5047200581Srdivacky  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
5048200581Srdivacky  if (EltSz == 64)
5049200581Srdivacky    return false;
5050200581Srdivacky
5051200581Srdivacky  unsigned NumElts = VT.getVectorNumElements();
5052200581Srdivacky  WhichResult = (M[0] == 0 ? 0 : 1);
5053200581Srdivacky  unsigned Idx = WhichResult * NumElts / 2;
5054200581Srdivacky  for (unsigned i = 0; i != NumElts; i += 2) {
5055212904Sdim    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
5056212904Sdim        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
5057200581Srdivacky      return false;
5058200581Srdivacky    Idx += 1;
5059200581Srdivacky  }
5060200581Srdivacky
5061200581Srdivacky  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5062200581Srdivacky  if (VT.is64BitVector() && EltSz == 32)
5063200581Srdivacky    return false;
5064200581Srdivacky
5065200581Srdivacky  return true;
5066200581Srdivacky}
5067200581Srdivacky
5068249423Sdim/// \return true if this is a reverse operation on an vector.
5069249423Sdimstatic bool isReverseMask(ArrayRef<int> M, EVT VT) {
5070249423Sdim  unsigned NumElts = VT.getVectorNumElements();
5071249423Sdim  // Make sure the mask has the right size.
5072249423Sdim  if (NumElts != M.size())
5073249423Sdim      return false;
5074249423Sdim
5075249423Sdim  // Look for <15, ..., 3, -1, 1, 0>.
5076249423Sdim  for (unsigned i = 0; i != NumElts; ++i)
5077249423Sdim    if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
5078249423Sdim      return false;
5079249423Sdim
5080249423Sdim  return true;
5081249423Sdim}
5082249423Sdim
5083212904Sdim// If N is an integer constant that can be moved into a register in one
5084212904Sdim// instruction, return an SDValue of such a constant (will become a MOV
5085212904Sdim// instruction).  Otherwise return null.
5086212904Sdimstatic SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
5087261991Sdim                                     const ARMSubtarget *ST, SDLoc dl) {
5088212904Sdim  uint64_t Val;
5089212904Sdim  if (!isa<ConstantSDNode>(N))
5090212904Sdim    return SDValue();
5091212904Sdim  Val = cast<ConstantSDNode>(N)->getZExtValue();
5092212904Sdim
5093212904Sdim  if (ST->isThumb1Only()) {
5094212904Sdim    if (Val <= 255 || ~Val <= 255)
5095212904Sdim      return DAG.getConstant(Val, MVT::i32);
5096212904Sdim  } else {
5097212904Sdim    if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
5098212904Sdim      return DAG.getConstant(Val, MVT::i32);
5099212904Sdim  }
5100212904Sdim  return SDValue();
5101212904Sdim}
5102212904Sdim
5103194710Sed// If this is a case we can't handle, return null and let the default
5104194710Sed// expansion code take care of it.
5105218893SdimSDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
5106218893Sdim                                             const ARMSubtarget *ST) const {
5107198090Srdivacky  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
5108261991Sdim  SDLoc dl(Op);
5109198090Srdivacky  EVT VT = Op.getValueType();
5110194710Sed
5111194710Sed  APInt SplatBits, SplatUndef;
5112194710Sed  unsigned SplatBitSize;
5113194710Sed  bool HasAnyUndefs;
5114194710Sed  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
5115198090Srdivacky    if (SplatBitSize <= 64) {
5116210299Sed      // Check if an immediate VMOV works.
5117210299Sed      EVT VmovVT;
5118210299Sed      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
5119210299Sed                                      SplatUndef.getZExtValue(), SplatBitSize,
5120218893Sdim                                      DAG, VmovVT, VT.is128BitVector(),
5121218893Sdim                                      VMOVModImm);
5122210299Sed      if (Val.getNode()) {
5123210299Sed        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
5124218893Sdim        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
5125210299Sed      }
5126210299Sed
5127210299Sed      // Try an immediate VMVN.
5128226633Sdim      uint64_t NegatedImm = (~SplatBits).getZExtValue();
5129210299Sed      Val = isNEONModifiedImm(NegatedImm,
5130210299Sed                                      SplatUndef.getZExtValue(), SplatBitSize,
5131218893Sdim                                      DAG, VmovVT, VT.is128BitVector(),
5132218893Sdim                                      VMVNModImm);
5133210299Sed      if (Val.getNode()) {
5134210299Sed        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
5135218893Sdim        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
5136210299Sed      }
5137234353Sdim
5138234353Sdim      // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
5139234353Sdim      if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
5140234353Sdim        int ImmVal = ARM_AM::getFP32Imm(SplatBits);
5141234353Sdim        if (ImmVal != -1) {
5142234353Sdim          SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
5143234353Sdim          return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
5144234353Sdim        }
5145234353Sdim      }
5146198090Srdivacky    }
5147194710Sed  }
5148194710Sed
5149208599Srdivacky  // Scan through the operands to see if only one value is used.
5150243830Sdim  //
5151243830Sdim  // As an optimisation, even if more than one value is used it may be more
5152243830Sdim  // profitable to splat with one value then change some lanes.
5153243830Sdim  //
5154243830Sdim  // Heuristically we decide to do this if the vector has a "dominant" value,
5155243830Sdim  // defined as splatted to more than half of the lanes.
5156208599Srdivacky  unsigned NumElts = VT.getVectorNumElements();
5157208599Srdivacky  bool isOnlyLowElement = true;
5158208599Srdivacky  bool usesOnlyOneValue = true;
5159243830Sdim  bool hasDominantValue = false;
5160208599Srdivacky  bool isConstant = true;
5161243830Sdim
5162243830Sdim  // Map of the number of times a particular SDValue appears in the
5163243830Sdim  // element list.
5164243830Sdim  DenseMap<SDValue, unsigned> ValueCounts;
5165208599Srdivacky  SDValue Value;
5166208599Srdivacky  for (unsigned i = 0; i < NumElts; ++i) {
5167208599Srdivacky    SDValue V = Op.getOperand(i);
5168208599Srdivacky    if (V.getOpcode() == ISD::UNDEF)
5169208599Srdivacky      continue;
5170208599Srdivacky    if (i > 0)
5171208599Srdivacky      isOnlyLowElement = false;
5172208599Srdivacky    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
5173208599Srdivacky      isConstant = false;
5174208599Srdivacky
5175243830Sdim    ValueCounts.insert(std::make_pair(V, 0));
5176243830Sdim    unsigned &Count = ValueCounts[V];
5177249423Sdim
5178243830Sdim    // Is this value dominant? (takes up more than half of the lanes)
5179243830Sdim    if (++Count > (NumElts / 2)) {
5180243830Sdim      hasDominantValue = true;
5181208599Srdivacky      Value = V;
5182243830Sdim    }
5183198090Srdivacky  }
5184243830Sdim  if (ValueCounts.size() != 1)
5185243830Sdim    usesOnlyOneValue = false;
5186243830Sdim  if (!Value.getNode() && ValueCounts.size() > 0)
5187243830Sdim    Value = ValueCounts.begin()->first;
5188198090Srdivacky
5189243830Sdim  if (ValueCounts.size() == 0)
5190208599Srdivacky    return DAG.getUNDEF(VT);
5191208599Srdivacky
5192261991Sdim  // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
5193261991Sdim  // Keep going if we are hitting this case.
5194261991Sdim  if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
5195208599Srdivacky    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
5196208599Srdivacky
5197212904Sdim  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
5198212904Sdim
5199218893Sdim  // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
5200218893Sdim  // i32 and try again.
5201243830Sdim  if (hasDominantValue && EltSize <= 32) {
5202243830Sdim    if (!isConstant) {
5203243830Sdim      SDValue N;
5204243830Sdim
5205243830Sdim      // If we are VDUPing a value that comes directly from a vector, that will
5206243830Sdim      // cause an unnecessary move to and from a GPR, where instead we could
5207249423Sdim      // just use VDUPLANE. We can only do this if the lane being extracted
5208249423Sdim      // is at a constant index, as the VDUP from lane instructions only have
5209249423Sdim      // constant-index forms.
5210249423Sdim      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5211249423Sdim          isa<ConstantSDNode>(Value->getOperand(1))) {
5212243830Sdim        // We need to create a new undef vector to use for the VDUPLANE if the
5213243830Sdim        // size of the vector from which we get the value is different than the
5214243830Sdim        // size of the vector that we need to create. We will insert the element
5215243830Sdim        // such that the register coalescer will remove unnecessary copies.
5216243830Sdim        if (VT != Value->getOperand(0).getValueType()) {
5217243830Sdim          ConstantSDNode *constIndex;
5218243830Sdim          constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
5219243830Sdim          assert(constIndex && "The index is not a constant!");
5220243830Sdim          unsigned index = constIndex->getAPIntValue().getLimitedValue() %
5221243830Sdim                             VT.getVectorNumElements();
5222243830Sdim          N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
5223243830Sdim                 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
5224243830Sdim                        Value, DAG.getConstant(index, MVT::i32)),
5225243830Sdim                           DAG.getConstant(index, MVT::i32));
5226249423Sdim        } else
5227243830Sdim          N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
5228243830Sdim                        Value->getOperand(0), Value->getOperand(1));
5229249423Sdim      } else
5230243830Sdim        N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
5231243830Sdim
5232243830Sdim      if (!usesOnlyOneValue) {
5233243830Sdim        // The dominant value was splatted as 'N', but we now have to insert
5234243830Sdim        // all differing elements.
5235243830Sdim        for (unsigned I = 0; I < NumElts; ++I) {
5236243830Sdim          if (Op.getOperand(I) == Value)
5237243830Sdim            continue;
5238243830Sdim          SmallVector<SDValue, 3> Ops;
5239243830Sdim          Ops.push_back(N);
5240243830Sdim          Ops.push_back(Op.getOperand(I));
5241243830Sdim          Ops.push_back(DAG.getConstant(I, MVT::i32));
5242276479Sdim          N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
5243243830Sdim        }
5244243830Sdim      }
5245243830Sdim      return N;
5246243830Sdim    }
5247218893Sdim    if (VT.getVectorElementType().isFloatingPoint()) {
5248218893Sdim      SmallVector<SDValue, 8> Ops;
5249218893Sdim      for (unsigned i = 0; i < NumElts; ++i)
5250218893Sdim        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
5251218893Sdim                                  Op.getOperand(i)));
5252218893Sdim      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
5253276479Sdim      SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
5254218893Sdim      Val = LowerBUILD_VECTOR(Val, DAG, ST);
5255212904Sdim      if (Val.getNode())
5256218893Sdim        return DAG.getNode(ISD::BITCAST, dl, VT, Val);
5257212904Sdim    }
5258243830Sdim    if (usesOnlyOneValue) {
5259243830Sdim      SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
5260243830Sdim      if (isConstant && Val.getNode())
5261249423Sdim        return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
5262243830Sdim    }
5263212904Sdim  }
5264212904Sdim
5265212904Sdim  // If all elements are constants and the case above didn't get hit, fall back
5266212904Sdim  // to the default expansion, which will generate a load from the constant
5267212904Sdim  // pool.
5268208599Srdivacky  if (isConstant)
5269208599Srdivacky    return SDValue();
5270208599Srdivacky
5271218893Sdim  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
5272218893Sdim  if (NumElts >= 4) {
5273218893Sdim    SDValue shuffle = ReconstructShuffle(Op, DAG);
5274218893Sdim    if (shuffle != SDValue())
5275218893Sdim      return shuffle;
5276212904Sdim  }
5277208599Srdivacky
5278208599Srdivacky  // Vectors with 32- or 64-bit elements can be built by directly assigning
5279210299Sed  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
5280210299Sed  // will be legalized.
5281208599Srdivacky  if (EltSize >= 32) {
5282208599Srdivacky    // Do the expansion with floating-point types, since that is what the VFP
5283208599Srdivacky    // registers are defined to use, and since i64 is not legal.
5284208599Srdivacky    EVT EltVT = EVT::getFloatingPointVT(EltSize);
5285208599Srdivacky    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
5286210299Sed    SmallVector<SDValue, 8> Ops;
5287210299Sed    for (unsigned i = 0; i < NumElts; ++i)
5288218893Sdim      Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
5289276479Sdim    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
5290218893Sdim    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
5291208599Srdivacky  }
5292208599Srdivacky
5293261991Sdim  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
5294261991Sdim  // know the default expansion would otherwise fall back on something even
5295261991Sdim  // worse. For a vector with one or two non-undef values, that's
5296261991Sdim  // scalar_to_vector for the elements followed by a shuffle (provided the
5297261991Sdim  // shuffle is valid for the target) and materialization element by element
5298261991Sdim  // on the stack followed by a load for everything else.
5299261991Sdim  if (!isConstant && !usesOnlyOneValue) {
5300261991Sdim    SDValue Vec = DAG.getUNDEF(VT);
5301261991Sdim    for (unsigned i = 0 ; i < NumElts; ++i) {
5302261991Sdim      SDValue V = Op.getOperand(i);
5303261991Sdim      if (V.getOpcode() == ISD::UNDEF)
5304261991Sdim        continue;
5305261991Sdim      SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
5306261991Sdim      Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
5307261991Sdim    }
5308261991Sdim    return Vec;
5309261991Sdim  }
5310261991Sdim
5311194710Sed  return SDValue();
5312194710Sed}
5313194710Sed
5314218893Sdim// Gather data to see if the operation can be modelled as a
5315218893Sdim// shuffle in combination with VEXTs.
5316218893SdimSDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
5317218893Sdim                                              SelectionDAG &DAG) const {
5318261991Sdim  SDLoc dl(Op);
5319218893Sdim  EVT VT = Op.getValueType();
5320218893Sdim  unsigned NumElts = VT.getVectorNumElements();
5321218893Sdim
5322218893Sdim  SmallVector<SDValue, 2> SourceVecs;
5323218893Sdim  SmallVector<unsigned, 2> MinElts;
5324218893Sdim  SmallVector<unsigned, 2> MaxElts;
5325218893Sdim
5326218893Sdim  for (unsigned i = 0; i < NumElts; ++i) {
5327218893Sdim    SDValue V = Op.getOperand(i);
5328218893Sdim    if (V.getOpcode() == ISD::UNDEF)
5329218893Sdim      continue;
5330218893Sdim    else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
5331218893Sdim      // A shuffle can only come from building a vector from various
5332218893Sdim      // elements of other vectors.
5333218893Sdim      return SDValue();
5334226633Sdim    } else if (V.getOperand(0).getValueType().getVectorElementType() !=
5335226633Sdim               VT.getVectorElementType()) {
5336226633Sdim      // This code doesn't know how to handle shuffles where the vector
5337226633Sdim      // element types do not match (this happens because type legalization
5338226633Sdim      // promotes the return type of EXTRACT_VECTOR_ELT).
5339226633Sdim      // FIXME: It might be appropriate to extend this code to handle
5340226633Sdim      // mismatched types.
5341226633Sdim      return SDValue();
5342218893Sdim    }
5343218893Sdim
5344218893Sdim    // Record this extraction against the appropriate vector if possible...
5345218893Sdim    SDValue SourceVec = V.getOperand(0);
5346239462Sdim    // If the element number isn't a constant, we can't effectively
5347239462Sdim    // analyze what's going on.
5348239462Sdim    if (!isa<ConstantSDNode>(V.getOperand(1)))
5349239462Sdim      return SDValue();
5350218893Sdim    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5351218893Sdim    bool FoundSource = false;
5352218893Sdim    for (unsigned j = 0; j < SourceVecs.size(); ++j) {
5353218893Sdim      if (SourceVecs[j] == SourceVec) {
5354218893Sdim        if (MinElts[j] > EltNo)
5355218893Sdim          MinElts[j] = EltNo;
5356218893Sdim        if (MaxElts[j] < EltNo)
5357218893Sdim          MaxElts[j] = EltNo;
5358218893Sdim        FoundSource = true;
5359218893Sdim        break;
5360218893Sdim      }
5361218893Sdim    }
5362218893Sdim
5363218893Sdim    // Or record a new source if not...
5364218893Sdim    if (!FoundSource) {
5365218893Sdim      SourceVecs.push_back(SourceVec);
5366218893Sdim      MinElts.push_back(EltNo);
5367218893Sdim      MaxElts.push_back(EltNo);
5368218893Sdim    }
5369218893Sdim  }
5370218893Sdim
5371218893Sdim  // Currently only do something sane when at most two source vectors
5372218893Sdim  // involved.
5373218893Sdim  if (SourceVecs.size() > 2)
5374218893Sdim    return SDValue();
5375218893Sdim
5376218893Sdim  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
5377218893Sdim  int VEXTOffsets[2] = {0, 0};
5378218893Sdim
5379218893Sdim  // This loop extracts the usage patterns of the source vectors
5380218893Sdim  // and prepares appropriate SDValues for a shuffle if possible.
5381218893Sdim  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
5382218893Sdim    if (SourceVecs[i].getValueType() == VT) {
5383218893Sdim      // No VEXT necessary
5384218893Sdim      ShuffleSrcs[i] = SourceVecs[i];
5385218893Sdim      VEXTOffsets[i] = 0;
5386218893Sdim      continue;
5387218893Sdim    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
5388218893Sdim      // It probably isn't worth padding out a smaller vector just to
5389218893Sdim      // break it down again in a shuffle.
5390218893Sdim      return SDValue();
5391218893Sdim    }
5392218893Sdim
5393218893Sdim    // Since only 64-bit and 128-bit vectors are legal on ARM and
5394218893Sdim    // we've eliminated the other cases...
5395218893Sdim    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
5396218893Sdim           "unexpected vector sizes in ReconstructShuffle");
5397218893Sdim
5398218893Sdim    if (MaxElts[i] - MinElts[i] >= NumElts) {
5399218893Sdim      // Span too large for a VEXT to cope
5400218893Sdim      return SDValue();
5401218893Sdim    }
5402218893Sdim
5403218893Sdim    if (MinElts[i] >= NumElts) {
5404218893Sdim      // The extraction can just take the second half
5405218893Sdim      VEXTOffsets[i] = NumElts;
5406218893Sdim      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
5407218893Sdim                                   SourceVecs[i],
5408218893Sdim                                   DAG.getIntPtrConstant(NumElts));
5409218893Sdim    } else if (MaxElts[i] < NumElts) {
5410218893Sdim      // The extraction can just take the first half
5411218893Sdim      VEXTOffsets[i] = 0;
5412218893Sdim      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
5413218893Sdim                                   SourceVecs[i],
5414218893Sdim                                   DAG.getIntPtrConstant(0));
5415218893Sdim    } else {
5416218893Sdim      // An actual VEXT is needed
5417218893Sdim      VEXTOffsets[i] = MinElts[i];
5418218893Sdim      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
5419218893Sdim                                     SourceVecs[i],
5420218893Sdim                                     DAG.getIntPtrConstant(0));
5421218893Sdim      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
5422218893Sdim                                     SourceVecs[i],
5423218893Sdim                                     DAG.getIntPtrConstant(NumElts));
5424218893Sdim      ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
5425218893Sdim                                   DAG.getConstant(VEXTOffsets[i], MVT::i32));
5426218893Sdim    }
5427218893Sdim  }
5428218893Sdim
5429218893Sdim  SmallVector<int, 8> Mask;
5430218893Sdim
5431218893Sdim  for (unsigned i = 0; i < NumElts; ++i) {
5432218893Sdim    SDValue Entry = Op.getOperand(i);
5433218893Sdim    if (Entry.getOpcode() == ISD::UNDEF) {
5434218893Sdim      Mask.push_back(-1);
5435218893Sdim      continue;
5436218893Sdim    }
5437218893Sdim
5438218893Sdim    SDValue ExtractVec = Entry.getOperand(0);
5439218893Sdim    int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
5440218893Sdim                                          .getOperand(1))->getSExtValue();
5441218893Sdim    if (ExtractVec == SourceVecs[0]) {
5442218893Sdim      Mask.push_back(ExtractElt - VEXTOffsets[0]);
5443218893Sdim    } else {
5444218893Sdim      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
5445218893Sdim    }
5446218893Sdim  }
5447218893Sdim
5448218893Sdim  // Final check before we try to produce nonsense...
5449218893Sdim  if (isShuffleMaskLegal(Mask, VT))
5450218893Sdim    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
5451218893Sdim                                &Mask[0]);
5452218893Sdim
5453218893Sdim  return SDValue();
5454218893Sdim}
5455218893Sdim
5456198090Srdivacky/// isShuffleMaskLegal - Targets can use this to indicate that they only
5457198090Srdivacky/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
5458198090Srdivacky/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
5459198090Srdivacky/// are assumed to be legal.
5460198090Srdivackybool
5461198090SrdivackyARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
5462198090Srdivacky                                      EVT VT) const {
5463198090Srdivacky  if (VT.getVectorNumElements() == 4 &&
5464198090Srdivacky      (VT.is128BitVector() || VT.is64BitVector())) {
5465198090Srdivacky    unsigned PFIndexes[4];
5466198090Srdivacky    for (unsigned i = 0; i != 4; ++i) {
5467198090Srdivacky      if (M[i] < 0)
5468198090Srdivacky        PFIndexes[i] = 8;
5469198090Srdivacky      else
5470198090Srdivacky        PFIndexes[i] = M[i];
5471198090Srdivacky    }
5472198090Srdivacky
5473198090Srdivacky    // Compute the index in the perfect shuffle table.
5474198090Srdivacky    unsigned PFTableIndex =
5475198090Srdivacky      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
5476198090Srdivacky    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
5477198090Srdivacky    unsigned Cost = (PFEntry >> 30);
5478198090Srdivacky
5479198090Srdivacky    if (Cost <= 4)
5480198090Srdivacky      return true;
5481198090Srdivacky  }
5482198090Srdivacky
5483198090Srdivacky  bool ReverseVEXT;
5484198090Srdivacky  unsigned Imm, WhichResult;
5485198090Srdivacky
5486210299Sed  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
5487210299Sed  return (EltSize >= 32 ||
5488210299Sed          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
5489198090Srdivacky          isVREVMask(M, VT, 64) ||
5490198090Srdivacky          isVREVMask(M, VT, 32) ||
5491198090Srdivacky          isVREVMask(M, VT, 16) ||
5492198090Srdivacky          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
5493221345Sdim          isVTBLMask(M, VT) ||
5494198090Srdivacky          isVTRNMask(M, VT, WhichResult) ||
5495198090Srdivacky          isVUZPMask(M, VT, WhichResult) ||
5496200581Srdivacky          isVZIPMask(M, VT, WhichResult) ||
5497200581Srdivacky          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
5498200581Srdivacky          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
5499249423Sdim          isVZIP_v_undef_Mask(M, VT, WhichResult) ||
5500249423Sdim          ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
5501194710Sed}
5502194710Sed
5503198090Srdivacky/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
5504198090Srdivacky/// the specified operations to build the shuffle.
5505198090Srdivackystatic SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
5506198090Srdivacky                                      SDValue RHS, SelectionDAG &DAG,
5507261991Sdim                                      SDLoc dl) {
5508198090Srdivacky  unsigned OpNum = (PFEntry >> 26) & 0x0F;
5509198090Srdivacky  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
5510198090Srdivacky  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
5511198090Srdivacky
5512198090Srdivacky  enum {
5513198090Srdivacky    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5514198090Srdivacky    OP_VREV,
5515198090Srdivacky    OP_VDUP0,
5516198090Srdivacky    OP_VDUP1,
5517198090Srdivacky    OP_VDUP2,
5518198090Srdivacky    OP_VDUP3,
5519198090Srdivacky    OP_VEXT1,
5520198090Srdivacky    OP_VEXT2,
5521198090Srdivacky    OP_VEXT3,
5522198090Srdivacky    OP_VUZPL, // VUZP, left result
5523198090Srdivacky    OP_VUZPR, // VUZP, right result
5524198090Srdivacky    OP_VZIPL, // VZIP, left result
5525198090Srdivacky    OP_VZIPR, // VZIP, right result
5526198090Srdivacky    OP_VTRNL, // VTRN, left result
5527198090Srdivacky    OP_VTRNR  // VTRN, right result
5528198090Srdivacky  };
5529198090Srdivacky
5530198090Srdivacky  if (OpNum == OP_COPY) {
5531198090Srdivacky    if (LHSID == (1*9+2)*9+3) return LHS;
5532198090Srdivacky    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
5533198090Srdivacky    return RHS;
5534198090Srdivacky  }
5535198090Srdivacky
5536198090Srdivacky  SDValue OpLHS, OpRHS;
5537198090Srdivacky  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5538198090Srdivacky  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5539198090Srdivacky  EVT VT = OpLHS.getValueType();
5540198090Srdivacky
5541198090Srdivacky  switch (OpNum) {
5542198090Srdivacky  default: llvm_unreachable("Unknown shuffle opcode!");
5543198090Srdivacky  case OP_VREV:
5544223017Sdim    // VREV divides the vector in half and swaps within the half.
5545223017Sdim    if (VT.getVectorElementType() == MVT::i32 ||
5546223017Sdim        VT.getVectorElementType() == MVT::f32)
5547223017Sdim      return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
5548223017Sdim    // vrev <4 x i16> -> VREV32
5549223017Sdim    if (VT.getVectorElementType() == MVT::i16)
5550223017Sdim      return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
5551223017Sdim    // vrev <4 x i8> -> VREV16
5552223017Sdim    assert(VT.getVectorElementType() == MVT::i8);
5553223017Sdim    return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
5554198090Srdivacky  case OP_VDUP0:
5555198090Srdivacky  case OP_VDUP1:
5556198090Srdivacky  case OP_VDUP2:
5557198090Srdivacky  case OP_VDUP3:
5558198090Srdivacky    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
5559198090Srdivacky                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
5560198090Srdivacky  case OP_VEXT1:
5561198090Srdivacky  case OP_VEXT2:
5562198090Srdivacky  case OP_VEXT3:
5563198090Srdivacky    return DAG.getNode(ARMISD::VEXT, dl, VT,
5564198090Srdivacky                       OpLHS, OpRHS,
5565198090Srdivacky                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
5566198090Srdivacky  case OP_VUZPL:
5567198090Srdivacky  case OP_VUZPR:
5568198090Srdivacky    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
5569198090Srdivacky                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
5570198090Srdivacky  case OP_VZIPL:
5571198090Srdivacky  case OP_VZIPR:
5572198090Srdivacky    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
5573198090Srdivacky                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
5574198090Srdivacky  case OP_VTRNL:
5575198090Srdivacky  case OP_VTRNR:
5576198090Srdivacky    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
5577198090Srdivacky                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
5578198090Srdivacky  }
5579194710Sed}
5580194710Sed
5581221345Sdimstatic SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
5582234353Sdim                                       ArrayRef<int> ShuffleMask,
5583221345Sdim                                       SelectionDAG &DAG) {
5584221345Sdim  // Check to see if we can use the VTBL instruction.
5585221345Sdim  SDValue V1 = Op.getOperand(0);
5586221345Sdim  SDValue V2 = Op.getOperand(1);
5587261991Sdim  SDLoc DL(Op);
5588221345Sdim
5589221345Sdim  SmallVector<SDValue, 8> VTBLMask;
5590234353Sdim  for (ArrayRef<int>::iterator
5591221345Sdim         I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
5592221345Sdim    VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
5593221345Sdim
5594221345Sdim  if (V2.getNode()->getOpcode() == ISD::UNDEF)
5595221345Sdim    return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
5596276479Sdim                       DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
5597221345Sdim
5598221345Sdim  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
5599276479Sdim                     DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
5600221345Sdim}
5601221345Sdim
5602249423Sdimstatic SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
5603249423Sdim                                                      SelectionDAG &DAG) {
5604261991Sdim  SDLoc DL(Op);
5605249423Sdim  SDValue OpLHS = Op.getOperand(0);
5606249423Sdim  EVT VT = OpLHS.getValueType();
5607249423Sdim
5608249423Sdim  assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
5609249423Sdim         "Expect an v8i16/v16i8 type");
5610249423Sdim  OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
5611249423Sdim  // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
5612249423Sdim  // extract the first 8 bytes into the top double word and the last 8 bytes
5613249423Sdim  // into the bottom double word. The v8i16 case is similar.
5614249423Sdim  unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
5615249423Sdim  return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
5616249423Sdim                     DAG.getConstant(ExtractNum, MVT::i32));
5617249423Sdim}
5618249423Sdim
5619198090Srdivackystatic SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
5620198090Srdivacky  SDValue V1 = Op.getOperand(0);
5621198090Srdivacky  SDValue V2 = Op.getOperand(1);
5622261991Sdim  SDLoc dl(Op);
5623198090Srdivacky  EVT VT = Op.getValueType();
5624198090Srdivacky  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5625198090Srdivacky
5626198090Srdivacky  // Convert shuffles that are directly supported on NEON to target-specific
5627198090Srdivacky  // DAG nodes, instead of keeping them as shuffles and matching them again
5628198090Srdivacky  // during code selection.  This is more efficient and avoids the possibility
5629198090Srdivacky  // of inconsistencies between legalization and selection.
5630198090Srdivacky  // FIXME: floating-point vectors should be canonicalized to integer vectors
5631198090Srdivacky  // of the same time so that they get CSEd properly.
5632234353Sdim  ArrayRef<int> ShuffleMask = SVN->getMask();
5633198090Srdivacky
5634210299Sed  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
5635210299Sed  if (EltSize <= 32) {
5636210299Sed    if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
5637210299Sed      int Lane = SVN->getSplatIndex();
5638210299Sed      // If this is undef splat, generate it via "just" vdup, if possible.
5639210299Sed      if (Lane == -1) Lane = 0;
5640198892Srdivacky
5641234353Sdim      // Test if V1 is a SCALAR_TO_VECTOR.
5642210299Sed      if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
5643210299Sed        return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
5644210299Sed      }
5645234353Sdim      // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
5646234353Sdim      // (and probably will turn into a SCALAR_TO_VECTOR once legalization
5647234353Sdim      // reaches it).
5648234353Sdim      if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
5649234353Sdim          !isa<ConstantSDNode>(V1.getOperand(0))) {
5650234353Sdim        bool IsScalarToVector = true;
5651234353Sdim        for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
5652234353Sdim          if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
5653234353Sdim            IsScalarToVector = false;
5654234353Sdim            break;
5655234353Sdim          }
5656234353Sdim        if (IsScalarToVector)
5657234353Sdim          return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
5658234353Sdim      }
5659210299Sed      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
5660210299Sed                         DAG.getConstant(Lane, MVT::i32));
5661198090Srdivacky    }
5662198090Srdivacky
5663210299Sed    bool ReverseVEXT;
5664210299Sed    unsigned Imm;
5665210299Sed    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
5666210299Sed      if (ReverseVEXT)
5667210299Sed        std::swap(V1, V2);
5668210299Sed      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
5669210299Sed                         DAG.getConstant(Imm, MVT::i32));
5670210299Sed    }
5671198090Srdivacky
5672210299Sed    if (isVREVMask(ShuffleMask, VT, 64))
5673210299Sed      return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
5674210299Sed    if (isVREVMask(ShuffleMask, VT, 32))
5675210299Sed      return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
5676210299Sed    if (isVREVMask(ShuffleMask, VT, 16))
5677210299Sed      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
5678198090Srdivacky
5679243830Sdim    if (V2->getOpcode() == ISD::UNDEF &&
5680243830Sdim        isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
5681243830Sdim      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
5682243830Sdim                         DAG.getConstant(Imm, MVT::i32));
5683243830Sdim    }
5684243830Sdim
5685210299Sed    // Check for Neon shuffles that modify both input vectors in place.
5686210299Sed    // If both results are used, i.e., if there are two shuffles with the same
5687210299Sed    // source operands and with masks corresponding to both results of one of
5688210299Sed    // these operations, DAG memoization will ensure that a single node is
5689210299Sed    // used for both shuffles.
5690210299Sed    unsigned WhichResult;
5691210299Sed    if (isVTRNMask(ShuffleMask, VT, WhichResult))
5692210299Sed      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
5693210299Sed                         V1, V2).getValue(WhichResult);
5694210299Sed    if (isVUZPMask(ShuffleMask, VT, WhichResult))
5695210299Sed      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
5696210299Sed                         V1, V2).getValue(WhichResult);
5697210299Sed    if (isVZIPMask(ShuffleMask, VT, WhichResult))
5698210299Sed      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
5699210299Sed                         V1, V2).getValue(WhichResult);
5700198090Srdivacky
5701210299Sed    if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
5702210299Sed      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
5703210299Sed                         V1, V1).getValue(WhichResult);
5704210299Sed    if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
5705210299Sed      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
5706210299Sed                         V1, V1).getValue(WhichResult);
5707210299Sed    if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
5708210299Sed      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
5709210299Sed                         V1, V1).getValue(WhichResult);
5710210299Sed  }
5711200581Srdivacky
5712198090Srdivacky  // If the shuffle is not directly supported and it has 4 elements, use
5713198090Srdivacky  // the PerfectShuffle-generated table to synthesize it from other shuffles.
5714208599Srdivacky  unsigned NumElts = VT.getVectorNumElements();
5715208599Srdivacky  if (NumElts == 4) {
5716198090Srdivacky    unsigned PFIndexes[4];
5717198090Srdivacky    for (unsigned i = 0; i != 4; ++i) {
5718198090Srdivacky      if (ShuffleMask[i] < 0)
5719198090Srdivacky        PFIndexes[i] = 8;
5720198090Srdivacky      else
5721198090Srdivacky        PFIndexes[i] = ShuffleMask[i];
5722198090Srdivacky    }
5723198090Srdivacky
5724198090Srdivacky    // Compute the index in the perfect shuffle table.
5725198090Srdivacky    unsigned PFTableIndex =
5726198090Srdivacky      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
5727198090Srdivacky    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
5728198090Srdivacky    unsigned Cost = (PFEntry >> 30);
5729198090Srdivacky
5730198090Srdivacky    if (Cost <= 4)
5731198090Srdivacky      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
5732198090Srdivacky  }
5733198090Srdivacky
5734210299Sed  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
5735208599Srdivacky  if (EltSize >= 32) {
5736208599Srdivacky    // Do the expansion with floating-point types, since that is what the VFP
5737208599Srdivacky    // registers are defined to use, and since i64 is not legal.
5738208599Srdivacky    EVT EltVT = EVT::getFloatingPointVT(EltSize);
5739208599Srdivacky    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
5740218893Sdim    V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
5741218893Sdim    V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
5742210299Sed    SmallVector<SDValue, 8> Ops;
5743208599Srdivacky    for (unsigned i = 0; i < NumElts; ++i) {
5744208599Srdivacky      if (ShuffleMask[i] < 0)
5745210299Sed        Ops.push_back(DAG.getUNDEF(EltVT));
5746210299Sed      else
5747210299Sed        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
5748210299Sed                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,
5749210299Sed                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
5750210299Sed                                                  MVT::i32)));
5751208599Srdivacky    }
5752276479Sdim    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
5753218893Sdim    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
5754208599Srdivacky  }
5755208599Srdivacky
5756249423Sdim  if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
5757249423Sdim    return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
5758249423Sdim
5759221345Sdim  if (VT == MVT::v8i8) {
5760221345Sdim    SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
5761221345Sdim    if (NewOp.getNode())
5762221345Sdim      return NewOp;
5763221345Sdim  }
5764221345Sdim
5765198090Srdivacky  return SDValue();
5766198090Srdivacky}
5767198090Srdivacky
5768234353Sdimstatic SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
5769234353Sdim  // INSERT_VECTOR_ELT is legal only for immediate indexes.
5770234353Sdim  SDValue Lane = Op.getOperand(2);
5771234353Sdim  if (!isa<ConstantSDNode>(Lane))
5772234353Sdim    return SDValue();
5773234353Sdim
5774234353Sdim  return Op;
5775234353Sdim}
5776234353Sdim
5777194710Sedstatic SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
5778218893Sdim  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
5779218893Sdim  SDValue Lane = Op.getOperand(1);
5780218893Sdim  if (!isa<ConstantSDNode>(Lane))
5781218893Sdim    return SDValue();
5782218893Sdim
5783194710Sed  SDValue Vec = Op.getOperand(0);
5784218893Sdim  if (Op.getValueType() == MVT::i32 &&
5785218893Sdim      Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
5786261991Sdim    SDLoc dl(Op);
5787218893Sdim    return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
5788218893Sdim  }
5789218893Sdim
5790218893Sdim  return Op;
5791194710Sed}
5792194710Sed
5793198090Srdivackystatic SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
5794198090Srdivacky  // The only time a CONCAT_VECTORS operation can have legal types is when
5795198090Srdivacky  // two 64-bit vectors are concatenated to a 128-bit vector.
5796198090Srdivacky  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
5797198090Srdivacky         "unexpected CONCAT_VECTORS");
5798261991Sdim  SDLoc dl(Op);
5799198090Srdivacky  SDValue Val = DAG.getUNDEF(MVT::v2f64);
5800198090Srdivacky  SDValue Op0 = Op.getOperand(0);
5801198090Srdivacky  SDValue Op1 = Op.getOperand(1);
5802198090Srdivacky  if (Op0.getOpcode() != ISD::UNDEF)
5803198090Srdivacky    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
5804218893Sdim                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
5805198090Srdivacky                      DAG.getIntPtrConstant(0));
5806198090Srdivacky  if (Op1.getOpcode() != ISD::UNDEF)
5807198090Srdivacky    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
5808218893Sdim                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
5809198090Srdivacky                      DAG.getIntPtrConstant(1));
5810218893Sdim  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
5811194710Sed}
5812194710Sed
5813218893Sdim/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
5814218893Sdim/// element has been zero/sign-extended, depending on the isSigned parameter,
5815218893Sdim/// from an integer type half its size.
5816218893Sdimstatic bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
5817218893Sdim                                   bool isSigned) {
5818218893Sdim  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
5819218893Sdim  EVT VT = N->getValueType(0);
5820218893Sdim  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
5821218893Sdim    SDNode *BVN = N->getOperand(0).getNode();
5822218893Sdim    if (BVN->getValueType(0) != MVT::v4i32 ||
5823218893Sdim        BVN->getOpcode() != ISD::BUILD_VECTOR)
5824218893Sdim      return false;
5825218893Sdim    unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
5826218893Sdim    unsigned HiElt = 1 - LoElt;
5827218893Sdim    ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
5828218893Sdim    ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
5829218893Sdim    ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
5830218893Sdim    ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
5831218893Sdim    if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
5832218893Sdim      return false;
5833218893Sdim    if (isSigned) {
5834218893Sdim      if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
5835218893Sdim          Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
5836218893Sdim        return true;
5837218893Sdim    } else {
5838218893Sdim      if (Hi0->isNullValue() && Hi1->isNullValue())
5839218893Sdim        return true;
5840218893Sdim    }
5841218893Sdim    return false;
5842218893Sdim  }
5843218893Sdim
5844218893Sdim  if (N->getOpcode() != ISD::BUILD_VECTOR)
5845218893Sdim    return false;
5846218893Sdim
5847218893Sdim  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
5848218893Sdim    SDNode *Elt = N->getOperand(i).getNode();
5849218893Sdim    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
5850218893Sdim      unsigned EltSize = VT.getVectorElementType().getSizeInBits();
5851218893Sdim      unsigned HalfSize = EltSize / 2;
5852218893Sdim      if (isSigned) {
5853234353Sdim        if (!isIntN(HalfSize, C->getSExtValue()))
5854218893Sdim          return false;
5855218893Sdim      } else {
5856234353Sdim        if (!isUIntN(HalfSize, C->getZExtValue()))
5857218893Sdim          return false;
5858218893Sdim      }
5859218893Sdim      continue;
5860218893Sdim    }
5861218893Sdim    return false;
5862218893Sdim  }
5863218893Sdim
5864218893Sdim  return true;
5865218893Sdim}
5866218893Sdim
5867218893Sdim/// isSignExtended - Check if a node is a vector value that is sign-extended
5868218893Sdim/// or a constant BUILD_VECTOR with sign-extended elements.
5869218893Sdimstatic bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
5870218893Sdim  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
5871218893Sdim    return true;
5872218893Sdim  if (isExtendedBUILD_VECTOR(N, DAG, true))
5873218893Sdim    return true;
5874218893Sdim  return false;
5875218893Sdim}
5876218893Sdim
5877218893Sdim/// isZeroExtended - Check if a node is a vector value that is zero-extended
5878218893Sdim/// or a constant BUILD_VECTOR with zero-extended elements.
5879218893Sdimstatic bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
5880218893Sdim  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
5881218893Sdim    return true;
5882218893Sdim  if (isExtendedBUILD_VECTOR(N, DAG, false))
5883218893Sdim    return true;
5884218893Sdim  return false;
5885218893Sdim}
5886218893Sdim
5887251662Sdimstatic EVT getExtensionTo64Bits(const EVT &OrigVT) {
5888251662Sdim  if (OrigVT.getSizeInBits() >= 64)
5889251662Sdim    return OrigVT;
5890251662Sdim
5891251662Sdim  assert(OrigVT.isSimple() && "Expecting a simple value type");
5892251662Sdim
5893251662Sdim  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
5894251662Sdim  switch (OrigSimpleTy) {
5895251662Sdim  default: llvm_unreachable("Unexpected Vector Type");
5896251662Sdim  case MVT::v2i8:
5897251662Sdim  case MVT::v2i16:
5898251662Sdim     return MVT::v2i32;
5899251662Sdim  case MVT::v4i8:
5900251662Sdim    return  MVT::v4i16;
5901251662Sdim  }
5902251662Sdim}
5903251662Sdim
5904249423Sdim/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
5905249423Sdim/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
5906249423Sdim/// We insert the required extension here to get the vector to fill a D register.
5907249423Sdimstatic SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
5908249423Sdim                                            const EVT &OrigTy,
5909249423Sdim                                            const EVT &ExtTy,
5910249423Sdim                                            unsigned ExtOpcode) {
5911249423Sdim  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
5912249423Sdim  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
5913249423Sdim  // 64-bits we need to insert a new extension so that it will be 64-bits.
5914249423Sdim  assert(ExtTy.is128BitVector() && "Unexpected extension size");
5915249423Sdim  if (OrigTy.getSizeInBits() >= 64)
5916249423Sdim    return N;
5917249423Sdim
5918249423Sdim  // Must extend size to at least 64 bits to be used as an operand for VMULL.
5919251662Sdim  EVT NewVT = getExtensionTo64Bits(OrigTy);
5920251662Sdim
5921261991Sdim  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
5922249423Sdim}
5923249423Sdim
5924249423Sdim/// SkipLoadExtensionForVMULL - return a load of the original vector size that
5925249423Sdim/// does not do any sign/zero extension. If the original vector is less
5926249423Sdim/// than 64 bits, an appropriate extension will be added after the load to
5927249423Sdim/// reach a total size of 64 bits. We have to add the extension separately
5928249423Sdim/// because ARM does not have a sign/zero extending load for vectors.
5929249423Sdimstatic SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
5930251662Sdim  EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
5931251662Sdim
5932251662Sdim  // The load already has the right type.
5933251662Sdim  if (ExtendedTy == LD->getMemoryVT())
5934261991Sdim    return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
5935249423Sdim                LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
5936249423Sdim                LD->isNonTemporal(), LD->isInvariant(),
5937249423Sdim                LD->getAlignment());
5938251662Sdim
5939251662Sdim  // We need to create a zextload/sextload. We cannot just create a load
5940251662Sdim  // followed by a zext/zext node because LowerMUL is also run during normal
5941251662Sdim  // operation legalization where we can't create illegal types.
5942261991Sdim  return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
5943251662Sdim                        LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
5944280031Sdim                        LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),
5945251662Sdim                        LD->isNonTemporal(), LD->getAlignment());
5946249423Sdim}
5947249423Sdim
5948249423Sdim/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
5949249423Sdim/// extending load, or BUILD_VECTOR with extended elements, return the
5950249423Sdim/// unextended value. The unextended vector should be 64 bits so that it can
5951249423Sdim/// be used as an operand to a VMULL instruction. If the original vector size
5952249423Sdim/// before extension is less than 64 bits we add a an extension to resize
5953249423Sdim/// the vector to 64 bits.
5954249423Sdimstatic SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
5955212904Sdim  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
5956249423Sdim    return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
5957249423Sdim                                        N->getOperand(0)->getValueType(0),
5958249423Sdim                                        N->getValueType(0),
5959249423Sdim                                        N->getOpcode());
5960249423Sdim
5961218893Sdim  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
5962249423Sdim    return SkipLoadExtensionForVMULL(LD, DAG);
5963249423Sdim
5964218893Sdim  // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
5965218893Sdim  // have been legalized as a BITCAST from v4i32.
5966218893Sdim  if (N->getOpcode() == ISD::BITCAST) {
5967218893Sdim    SDNode *BVN = N->getOperand(0).getNode();
5968218893Sdim    assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
5969218893Sdim           BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
5970218893Sdim    unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
5971261991Sdim    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
5972218893Sdim                       BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
5973218893Sdim  }
5974218893Sdim  // Construct a new BUILD_VECTOR with elements truncated to half the size.
5975218893Sdim  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
5976218893Sdim  EVT VT = N->getValueType(0);
5977218893Sdim  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
5978218893Sdim  unsigned NumElts = VT.getVectorNumElements();
5979218893Sdim  MVT TruncVT = MVT::getIntegerVT(EltSize);
5980218893Sdim  SmallVector<SDValue, 8> Ops;
5981218893Sdim  for (unsigned i = 0; i != NumElts; ++i) {
5982218893Sdim    ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
5983218893Sdim    const APInt &CInt = C->getAPIntValue();
5984239462Sdim    // Element types smaller than 32 bits are not legal, so use i32 elements.
5985239462Sdim    // The values are implicitly truncated so sext vs. zext doesn't matter.
5986239462Sdim    Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
5987218893Sdim  }
5988261991Sdim  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
5989276479Sdim                     MVT::getVectorVT(TruncVT, NumElts), Ops);
5990212904Sdim}
5991212904Sdim
5992221345Sdimstatic bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
5993221345Sdim  unsigned Opcode = N->getOpcode();
5994221345Sdim  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
5995221345Sdim    SDNode *N0 = N->getOperand(0).getNode();
5996221345Sdim    SDNode *N1 = N->getOperand(1).getNode();
5997221345Sdim    return N0->hasOneUse() && N1->hasOneUse() &&
5998221345Sdim      isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
5999221345Sdim  }
6000221345Sdim  return false;
6001221345Sdim}
6002221345Sdim
6003221345Sdimstatic bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
6004221345Sdim  unsigned Opcode = N->getOpcode();
6005221345Sdim  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
6006221345Sdim    SDNode *N0 = N->getOperand(0).getNode();
6007221345Sdim    SDNode *N1 = N->getOperand(1).getNode();
6008221345Sdim    return N0->hasOneUse() && N1->hasOneUse() &&
6009221345Sdim      isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
6010221345Sdim  }
6011221345Sdim  return false;
6012221345Sdim}
6013221345Sdim
6014212904Sdimstatic SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
6015212904Sdim  // Multiplications are only custom-lowered for 128-bit vectors so that
6016212904Sdim  // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
6017212904Sdim  EVT VT = Op.getValueType();
6018249423Sdim  assert(VT.is128BitVector() && VT.isInteger() &&
6019249423Sdim         "unexpected type for custom-lowering ISD::MUL");
6020212904Sdim  SDNode *N0 = Op.getOperand(0).getNode();
6021212904Sdim  SDNode *N1 = Op.getOperand(1).getNode();
6022212904Sdim  unsigned NewOpc = 0;
6023221345Sdim  bool isMLA = false;
6024221345Sdim  bool isN0SExt = isSignExtended(N0, DAG);
6025221345Sdim  bool isN1SExt = isSignExtended(N1, DAG);
6026221345Sdim  if (isN0SExt && isN1SExt)
6027212904Sdim    NewOpc = ARMISD::VMULLs;
6028221345Sdim  else {
6029221345Sdim    bool isN0ZExt = isZeroExtended(N0, DAG);
6030221345Sdim    bool isN1ZExt = isZeroExtended(N1, DAG);
6031221345Sdim    if (isN0ZExt && isN1ZExt)
6032221345Sdim      NewOpc = ARMISD::VMULLu;
6033221345Sdim    else if (isN1SExt || isN1ZExt) {
6034221345Sdim      // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
6035221345Sdim      // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
6036221345Sdim      if (isN1SExt && isAddSubSExt(N0, DAG)) {
6037221345Sdim        NewOpc = ARMISD::VMULLs;
6038221345Sdim        isMLA = true;
6039221345Sdim      } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
6040221345Sdim        NewOpc = ARMISD::VMULLu;
6041221345Sdim        isMLA = true;
6042221345Sdim      } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
6043221345Sdim        std::swap(N0, N1);
6044221345Sdim        NewOpc = ARMISD::VMULLu;
6045221345Sdim        isMLA = true;
6046221345Sdim      }
6047221345Sdim    }
6048212904Sdim
6049221345Sdim    if (!NewOpc) {
6050221345Sdim      if (VT == MVT::v2i64)
6051221345Sdim        // Fall through to expand this.  It is not legal.
6052221345Sdim        return SDValue();
6053221345Sdim      else
6054221345Sdim        // Other vector multiplications are legal.
6055221345Sdim        return Op;
6056221345Sdim    }
6057221345Sdim  }
6058221345Sdim
6059212904Sdim  // Legalize to a VMULL instruction.
6060261991Sdim  SDLoc DL(Op);
6061221345Sdim  SDValue Op0;
6062249423Sdim  SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
6063221345Sdim  if (!isMLA) {
6064249423Sdim    Op0 = SkipExtensionForVMULL(N0, DAG);
6065221345Sdim    assert(Op0.getValueType().is64BitVector() &&
6066221345Sdim           Op1.getValueType().is64BitVector() &&
6067221345Sdim           "unexpected types for extended operands to VMULL");
6068221345Sdim    return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
6069221345Sdim  }
6070212904Sdim
6071221345Sdim  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
6072221345Sdim  // isel lowering to take advantage of no-stall back to back vmul + vmla.
6073221345Sdim  //   vmull q0, d4, d6
6074221345Sdim  //   vmlal q0, d5, d6
6075221345Sdim  // is faster than
6076221345Sdim  //   vaddl q0, d4, d5
6077221345Sdim  //   vmovl q1, d6
6078221345Sdim  //   vmul  q0, q0, q1
6079249423Sdim  SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
6080249423Sdim  SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
6081221345Sdim  EVT Op1VT = Op1.getValueType();
6082221345Sdim  return DAG.getNode(N0->getOpcode(), DL, VT,
6083221345Sdim                     DAG.getNode(NewOpc, DL, VT,
6084221345Sdim                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
6085221345Sdim                     DAG.getNode(NewOpc, DL, VT,
6086221345Sdim                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
6087212904Sdim}
6088212904Sdim
6089221345Sdimstatic SDValue
6090261991SdimLowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
6091218893Sdim  // Convert to float
6092218893Sdim  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
6093218893Sdim  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
6094218893Sdim  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
6095218893Sdim  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
6096218893Sdim  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
6097218893Sdim  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
6098218893Sdim  // Get reciprocal estimate.
6099218893Sdim  // float4 recip = vrecpeq_f32(yf);
6100221345Sdim  Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
6101218893Sdim                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
6102218893Sdim  // Because char has a smaller range than uchar, we can actually get away
6103218893Sdim  // without any newton steps.  This requires that we use a weird bias
6104218893Sdim  // of 0xb000, however (again, this has been exhaustively tested).
6105218893Sdim  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
6106218893Sdim  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
6107218893Sdim  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
6108218893Sdim  Y = DAG.getConstant(0xb000, MVT::i32);
6109218893Sdim  Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
6110218893Sdim  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
6111218893Sdim  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
6112218893Sdim  // Convert back to short.
6113218893Sdim  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
6114218893Sdim  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
6115218893Sdim  return X;
6116218893Sdim}
6117218893Sdim
6118221345Sdimstatic SDValue
6119261991SdimLowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
6120218893Sdim  SDValue N2;
6121218893Sdim  // Convert to float.
6122218893Sdim  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
6123218893Sdim  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
6124218893Sdim  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
6125218893Sdim  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
6126218893Sdim  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
6127218893Sdim  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
6128221345Sdim
6129218893Sdim  // Use reciprocal estimate and one refinement step.
6130218893Sdim  // float4 recip = vrecpeq_f32(yf);
6131218893Sdim  // recip *= vrecpsq_f32(yf, recip);
6132221345Sdim  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
6133218893Sdim                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
6134221345Sdim  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
6135218893Sdim                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
6136218893Sdim                   N1, N2);
6137218893Sdim  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
6138218893Sdim  // Because short has a smaller range than ushort, we can actually get away
6139218893Sdim  // with only a single newton step.  This requires that we use a weird bias
6140218893Sdim  // of 89, however (again, this has been exhaustively tested).
6141223017Sdim  // float4 result = as_float4(as_int4(xf*recip) + 0x89);
6142218893Sdim  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
6143218893Sdim  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
6144223017Sdim  N1 = DAG.getConstant(0x89, MVT::i32);
6145218893Sdim  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
6146218893Sdim  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
6147218893Sdim  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
6148218893Sdim  // Convert back to integer and return.
6149218893Sdim  // return vmovn_s32(vcvt_s32_f32(result));
6150218893Sdim  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
6151218893Sdim  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
6152218893Sdim  return N0;
6153218893Sdim}
6154218893Sdim
6155218893Sdimstatic SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
6156218893Sdim  EVT VT = Op.getValueType();
6157218893Sdim  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
6158218893Sdim         "unexpected type for custom-lowering ISD::SDIV");
6159218893Sdim
6160261991Sdim  SDLoc dl(Op);
6161218893Sdim  SDValue N0 = Op.getOperand(0);
6162218893Sdim  SDValue N1 = Op.getOperand(1);
6163218893Sdim  SDValue N2, N3;
6164221345Sdim
6165218893Sdim  if (VT == MVT::v8i8) {
6166218893Sdim    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
6167218893Sdim    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
6168221345Sdim
6169218893Sdim    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
6170218893Sdim                     DAG.getIntPtrConstant(4));
6171218893Sdim    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
6172221345Sdim                     DAG.getIntPtrConstant(4));
6173218893Sdim    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
6174218893Sdim                     DAG.getIntPtrConstant(0));
6175218893Sdim    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
6176218893Sdim                     DAG.getIntPtrConstant(0));
6177218893Sdim
6178218893Sdim    N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
6179218893Sdim    N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
6180218893Sdim
6181218893Sdim    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
6182218893Sdim    N0 = LowerCONCAT_VECTORS(N0, DAG);
6183221345Sdim
6184218893Sdim    N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
6185218893Sdim    return N0;
6186218893Sdim  }
6187218893Sdim  return LowerSDIV_v4i16(N0, N1, dl, DAG);
6188218893Sdim}
6189218893Sdim
6190218893Sdimstatic SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
6191218893Sdim  EVT VT = Op.getValueType();
6192218893Sdim  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
6193218893Sdim         "unexpected type for custom-lowering ISD::UDIV");
6194218893Sdim
6195261991Sdim  SDLoc dl(Op);
6196218893Sdim  SDValue N0 = Op.getOperand(0);
6197218893Sdim  SDValue N1 = Op.getOperand(1);
6198218893Sdim  SDValue N2, N3;
6199221345Sdim
6200218893Sdim  if (VT == MVT::v8i8) {
6201218893Sdim    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
6202218893Sdim    N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
6203221345Sdim
6204218893Sdim    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
6205218893Sdim                     DAG.getIntPtrConstant(4));
6206218893Sdim    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
6207221345Sdim                     DAG.getIntPtrConstant(4));
6208218893Sdim    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
6209218893Sdim                     DAG.getIntPtrConstant(0));
6210218893Sdim    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
6211218893Sdim                     DAG.getIntPtrConstant(0));
6212221345Sdim
6213218893Sdim    N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
6214218893Sdim    N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
6215221345Sdim
6216218893Sdim    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
6217218893Sdim    N0 = LowerCONCAT_VECTORS(N0, DAG);
6218221345Sdim
6219221345Sdim    N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
6220218893Sdim                     DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
6221218893Sdim                     N0);
6222218893Sdim    return N0;
6223218893Sdim  }
6224221345Sdim
6225218893Sdim  // v4i16 sdiv ... Convert to float.
6226218893Sdim  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
6227218893Sdim  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
6228218893Sdim  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
6229218893Sdim  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
6230218893Sdim  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
6231223017Sdim  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
6232218893Sdim
6233218893Sdim  // Use reciprocal estimate and two refinement steps.
6234218893Sdim  // float4 recip = vrecpeq_f32(yf);
6235218893Sdim  // recip *= vrecpsq_f32(yf, recip);
6236218893Sdim  // recip *= vrecpsq_f32(yf, recip);
6237221345Sdim  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
6238223017Sdim                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
6239221345Sdim  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
6240218893Sdim                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
6241223017Sdim                   BN1, N2);
6242218893Sdim  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
6243221345Sdim  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
6244218893Sdim                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
6245223017Sdim                   BN1, N2);
6246218893Sdim  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
6247218893Sdim  // Simply multiplying by the reciprocal estimate can leave us a few ulps
6248218893Sdim  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
6249218893Sdim  // and that it will never cause us to return an answer too large).
6250223017Sdim  // float4 result = as_float4(as_int4(xf*recip) + 2);
6251218893Sdim  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
6252218893Sdim  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
6253218893Sdim  N1 = DAG.getConstant(2, MVT::i32);
6254218893Sdim  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
6255218893Sdim  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
6256218893Sdim  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
6257218893Sdim  // Convert back to integer and return.
6258218893Sdim  // return vmovn_u32(vcvt_s32_f32(result));
6259218893Sdim  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
6260218893Sdim  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
6261218893Sdim  return N0;
6262218893Sdim}
6263218893Sdim
6264226633Sdimstatic SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
6265226633Sdim  EVT VT = Op.getNode()->getValueType(0);
6266226633Sdim  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
6267226633Sdim
6268226633Sdim  unsigned Opc;
6269226633Sdim  bool ExtraOp = false;
6270226633Sdim  switch (Op.getOpcode()) {
6271234353Sdim  default: llvm_unreachable("Invalid code");
6272226633Sdim  case ISD::ADDC: Opc = ARMISD::ADDC; break;
6273226633Sdim  case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
6274226633Sdim  case ISD::SUBC: Opc = ARMISD::SUBC; break;
6275226633Sdim  case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
6276226633Sdim  }
6277226633Sdim
6278226633Sdim  if (!ExtraOp)
6279261991Sdim    return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
6280226633Sdim                       Op.getOperand(1));
6281261991Sdim  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
6282226633Sdim                     Op.getOperand(1), Op.getOperand(2));
6283226633Sdim}
6284226633Sdim
6285261991SdimSDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
6286261991Sdim  assert(Subtarget->isTargetDarwin());
6287261991Sdim
6288261991Sdim  // For iOS, we want to call an alternative entry point: __sincos_stret,
6289261991Sdim  // return values are passed via sret.
6290261991Sdim  SDLoc dl(Op);
6291261991Sdim  SDValue Arg = Op.getOperand(0);
6292261991Sdim  EVT ArgVT = Arg.getValueType();
6293261991Sdim  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
6294261991Sdim
6295261991Sdim  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
6296261991Sdim  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6297261991Sdim
6298261991Sdim  // Pair of floats / doubles used to pass the result.
6299280031Sdim  StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
6300261991Sdim
6301261991Sdim  // Create stack object for sret.
6302261991Sdim  const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
6303261991Sdim  const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
6304261991Sdim  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
6305261991Sdim  SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
6306261991Sdim
6307261991Sdim  ArgListTy Args;
6308261991Sdim  ArgListEntry Entry;
6309261991Sdim
6310261991Sdim  Entry.Node = SRet;
6311261991Sdim  Entry.Ty = RetTy->getPointerTo();
6312261991Sdim  Entry.isSExt = false;
6313261991Sdim  Entry.isZExt = false;
6314261991Sdim  Entry.isSRet = true;
6315261991Sdim  Args.push_back(Entry);
6316261991Sdim
6317261991Sdim  Entry.Node = Arg;
6318261991Sdim  Entry.Ty = ArgTy;
6319261991Sdim  Entry.isSExt = false;
6320261991Sdim  Entry.isZExt = false;
6321261991Sdim  Args.push_back(Entry);
6322261991Sdim
6323261991Sdim  const char *LibcallName  = (ArgVT == MVT::f64)
6324261991Sdim  ? "__sincos_stret" : "__sincosf_stret";
6325261991Sdim  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
6326261991Sdim
6327276479Sdim  TargetLowering::CallLoweringInfo CLI(DAG);
6328276479Sdim  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
6329276479Sdim    .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
6330276479Sdim               std::move(Args), 0)
6331276479Sdim    .setDiscardResult();
6332276479Sdim
6333261991Sdim  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
6334261991Sdim
6335261991Sdim  SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
6336261991Sdim                                MachinePointerInfo(), false, false, false, 0);
6337261991Sdim
6338261991Sdim  // Address of cos field.
6339261991Sdim  SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
6340261991Sdim                            DAG.getIntPtrConstant(ArgVT.getStoreSize()));
6341261991Sdim  SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
6342261991Sdim                                MachinePointerInfo(), false, false, false, 0);
6343261991Sdim
6344261991Sdim  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
6345261991Sdim  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
6346261991Sdim                     LoadSin.getValue(0), LoadCos.getValue(0));
6347261991Sdim}
6348261991Sdim
6349226633Sdimstatic SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
6350226633Sdim  // Monotonic load/store is legal for all targets
6351226633Sdim  if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
6352226633Sdim    return Op;
6353226633Sdim
6354276479Sdim  // Acquire/Release load/store is not legal for targets without a
6355226633Sdim  // dmb or equivalent available.
6356226633Sdim  return SDValue();
6357226633Sdim}
6358226633Sdim
6359261991Sdimstatic void ReplaceREADCYCLECOUNTER(SDNode *N,
6360261991Sdim                                    SmallVectorImpl<SDValue> &Results,
6361261991Sdim                                    SelectionDAG &DAG,
6362261991Sdim                                    const ARMSubtarget *Subtarget) {
6363261991Sdim  SDLoc DL(N);
6364261991Sdim  SDValue Cycles32, OutChain;
6365261991Sdim
6366261991Sdim  if (Subtarget->hasPerfMon()) {
6367261991Sdim    // Under Power Management extensions, the cycle-count is:
6368261991Sdim    //    mrc p15, #0, <Rt>, c9, c13, #0
6369261991Sdim    SDValue Ops[] = { N->getOperand(0), // Chain
6370261991Sdim                      DAG.getConstant(Intrinsic::arm_mrc, MVT::i32),
6371261991Sdim                      DAG.getConstant(15, MVT::i32),
6372261991Sdim                      DAG.getConstant(0, MVT::i32),
6373261991Sdim                      DAG.getConstant(9, MVT::i32),
6374261991Sdim                      DAG.getConstant(13, MVT::i32),
6375261991Sdim                      DAG.getConstant(0, MVT::i32)
6376261991Sdim    };
6377261991Sdim
6378261991Sdim    Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
6379276479Sdim                           DAG.getVTList(MVT::i32, MVT::Other), Ops);
6380261991Sdim    OutChain = Cycles32.getValue(1);
6381261991Sdim  } else {
6382261991Sdim    // Intrinsic is defined to return 0 on unsupported platforms. Technically
6383261991Sdim    // there are older ARM CPUs that have implementation-specific ways of
6384261991Sdim    // obtaining this information (FIXME!).
6385261991Sdim    Cycles32 = DAG.getConstant(0, MVT::i32);
6386261991Sdim    OutChain = DAG.getEntryNode();
6387261991Sdim  }
6388261991Sdim
6389261991Sdim
6390261991Sdim  SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
6391261991Sdim                                 Cycles32, DAG.getConstant(0, MVT::i32));
6392261991Sdim  Results.push_back(Cycles64);
6393261991Sdim  Results.push_back(OutChain);
6394261991Sdim}
6395261991Sdim
6396207618SrdivackySDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
6397193323Sed  switch (Op.getOpcode()) {
6398198090Srdivacky  default: llvm_unreachable("Don't know how to custom lower this!");
6399193323Sed  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
6400198892Srdivacky  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
6401193323Sed  case ISD::GlobalAddress:
6402276479Sdim    switch (Subtarget->getTargetTriple().getObjectFormat()) {
6403276479Sdim    default: llvm_unreachable("unknown object format");
6404276479Sdim    case Triple::COFF:
6405276479Sdim      return LowerGlobalAddressWindows(Op, DAG);
6406276479Sdim    case Triple::ELF:
6407276479Sdim      return LowerGlobalAddressELF(Op, DAG);
6408276479Sdim    case Triple::MachO:
6409276479Sdim      return LowerGlobalAddressDarwin(Op, DAG);
6410276479Sdim    }
6411221345Sdim  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
6412212904Sdim  case ISD::SELECT:        return LowerSELECT(Op, DAG);
6413199481Srdivacky  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
6414199481Srdivacky  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
6415193323Sed  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
6416207618Srdivacky  case ISD::VASTART:       return LowerVASTART(Op, DAG);
6417226633Sdim  case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6418218893Sdim  case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
6419193323Sed  case ISD::SINT_TO_FP:
6420193323Sed  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
6421193323Sed  case ISD::FP_TO_SINT:
6422193323Sed  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
6423193323Sed  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
6424208599Srdivacky  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
6425193323Sed  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
6426193323Sed  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
6427208599Srdivacky  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
6428208599Srdivacky  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
6429203954Srdivacky  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
6430203954Srdivacky                                                               Subtarget);
6431221345Sdim  case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG);
6432194710Sed  case ISD::SHL:
6433193323Sed  case ISD::SRL:
6434194710Sed  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
6435199481Srdivacky  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
6436198892Srdivacky  case ISD::SRL_PARTS:
6437199481Srdivacky  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
6438202878Srdivacky  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
6439249423Sdim  case ISD::CTPOP:         return LowerCTPOP(Op.getNode(), DAG, Subtarget);
6440226633Sdim  case ISD::SETCC:         return LowerVSETCC(Op, DAG);
6441234353Sdim  case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
6442212904Sdim  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
6443194710Sed  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
6444234353Sdim  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
6445194710Sed  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
6446198090Srdivacky  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
6447212904Sdim  case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
6448212904Sdim  case ISD::MUL:           return LowerMUL(Op, DAG);
6449218893Sdim  case ISD::SDIV:          return LowerSDIV(Op, DAG);
6450218893Sdim  case ISD::UDIV:          return LowerUDIV(Op, DAG);
6451226633Sdim  case ISD::ADDC:
6452226633Sdim  case ISD::ADDE:
6453226633Sdim  case ISD::SUBC:
6454226633Sdim  case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
6455276479Sdim  case ISD::SADDO:
6456276479Sdim  case ISD::UADDO:
6457276479Sdim  case ISD::SSUBO:
6458276479Sdim  case ISD::USUBO:
6459276479Sdim    return LowerXALUO(Op, DAG);
6460226633Sdim  case ISD::ATOMIC_LOAD:
6461226633Sdim  case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
6462261991Sdim  case ISD::FSINCOS:       return LowerFSINCOS(Op, DAG);
6463261991Sdim  case ISD::SDIVREM:
6464261991Sdim  case ISD::UDIVREM:       return LowerDivRem(Op, DAG);
6465276479Sdim  case ISD::DYNAMIC_STACKALLOC:
6466276479Sdim    if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
6467276479Sdim      return LowerDYNAMIC_STACKALLOC(Op, DAG);
6468276479Sdim    llvm_unreachable("Don't know how to custom lower this!");
6469280031Sdim  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
6470280031Sdim  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
6471193323Sed  }
6472193323Sed}
6473193323Sed
6474193323Sed/// ReplaceNodeResults - Replace the results of node with an illegal result
6475193323Sed/// type with new values built out of custom code.
6476193323Sedvoid ARMTargetLowering::ReplaceNodeResults(SDNode *N,
6477193323Sed                                           SmallVectorImpl<SDValue>&Results,
6478207618Srdivacky                                           SelectionDAG &DAG) const {
6479207618Srdivacky  SDValue Res;
6480193323Sed  switch (N->getOpcode()) {
6481193323Sed  default:
6482198090Srdivacky    llvm_unreachable("Don't know how to custom expand this!");
6483218893Sdim  case ISD::BITCAST:
6484218893Sdim    Res = ExpandBITCAST(N, DAG);
6485207618Srdivacky    break;
6486193323Sed  case ISD::SRL:
6487207618Srdivacky  case ISD::SRA:
6488218893Sdim    Res = Expand64BitShift(N, DAG, Subtarget);
6489207618Srdivacky    break;
6490261991Sdim  case ISD::READCYCLECOUNTER:
6491261991Sdim    ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
6492261991Sdim    return;
6493193323Sed  }
6494207618Srdivacky  if (Res.getNode())
6495207618Srdivacky    Results.push_back(Res);
6496193323Sed}
6497193323Sed
6498193323Sed//===----------------------------------------------------------------------===//
6499193323Sed//                           ARM Scheduler Hooks
6500193323Sed//===----------------------------------------------------------------------===//
6501193323Sed
6502226633Sdim/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
6503226633Sdim/// registers the function context.
6504226633Sdimvoid ARMTargetLowering::
6505226633SdimSetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
6506226633Sdim                       MachineBasicBlock *DispatchBB, int FI) const {
6507280031Sdim  const TargetInstrInfo *TII =
6508280031Sdim      getTargetMachine().getSubtargetImpl()->getInstrInfo();
6509226633Sdim  DebugLoc dl = MI->getDebugLoc();
6510226633Sdim  MachineFunction *MF = MBB->getParent();
6511226633Sdim  MachineRegisterInfo *MRI = &MF->getRegInfo();
6512226633Sdim  MachineConstantPool *MCP = MF->getConstantPool();
6513226633Sdim  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
6514226633Sdim  const Function *F = MF->getFunction();
6515226633Sdim
6516226633Sdim  bool isThumb = Subtarget->isThumb();
6517226633Sdim  bool isThumb2 = Subtarget->isThumb2();
6518226633Sdim
6519226633Sdim  unsigned PCLabelId = AFI->createPICLabelUId();
6520226633Sdim  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
6521226633Sdim  ARMConstantPoolValue *CPV =
6522226633Sdim    ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
6523226633Sdim  unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
6524226633Sdim
6525280031Sdim  const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
6526280031Sdim                                           : &ARM::GPRRegClass;
6527226633Sdim
6528226633Sdim  // Grab constant pool and fixed stack memory operands.
6529226633Sdim  MachineMemOperand *CPMMO =
6530226633Sdim    MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
6531226633Sdim                             MachineMemOperand::MOLoad, 4, 4);
6532226633Sdim
6533226633Sdim  MachineMemOperand *FIMMOSt =
6534226633Sdim    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
6535226633Sdim                             MachineMemOperand::MOStore, 4, 4);
6536226633Sdim
6537226633Sdim  // Load the address of the dispatch MBB into the jump buffer.
6538226633Sdim  if (isThumb2) {
6539226633Sdim    // Incoming value: jbuf
6540226633Sdim    //   ldr.n  r5, LCPI1_1
6541226633Sdim    //   orr    r5, r5, #1
6542226633Sdim    //   add    r5, pc
6543226633Sdim    //   str    r5, [$jbuf, #+4] ; &jbuf[1]
6544226633Sdim    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6545226633Sdim    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
6546226633Sdim                   .addConstantPoolIndex(CPI)
6547226633Sdim                   .addMemOperand(CPMMO));
6548226633Sdim    // Set the low bit because of thumb mode.
6549226633Sdim    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
6550226633Sdim    AddDefaultCC(
6551226633Sdim      AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
6552226633Sdim                     .addReg(NewVReg1, RegState::Kill)
6553226633Sdim                     .addImm(0x01)));
6554226633Sdim    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6555226633Sdim    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
6556226633Sdim      .addReg(NewVReg2, RegState::Kill)
6557226633Sdim      .addImm(PCLabelId);
6558226633Sdim    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
6559226633Sdim                   .addReg(NewVReg3, RegState::Kill)
6560226633Sdim                   .addFrameIndex(FI)
6561226633Sdim                   .addImm(36)  // &jbuf[1] :: pc
6562226633Sdim                   .addMemOperand(FIMMOSt));
6563226633Sdim  } else if (isThumb) {
6564226633Sdim    // Incoming value: jbuf
6565226633Sdim    //   ldr.n  r1, LCPI1_4
6566226633Sdim    //   add    r1, pc
6567226633Sdim    //   mov    r2, #1
6568226633Sdim    //   orrs   r1, r2
6569226633Sdim    //   add    r2, $jbuf, #+4 ; &jbuf[1]
6570226633Sdim    //   str    r1, [r2]
6571226633Sdim    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6572226633Sdim    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
6573226633Sdim                   .addConstantPoolIndex(CPI)
6574226633Sdim                   .addMemOperand(CPMMO));
6575226633Sdim    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
6576226633Sdim    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
6577226633Sdim      .addReg(NewVReg1, RegState::Kill)
6578226633Sdim      .addImm(PCLabelId);
6579226633Sdim    // Set the low bit because of thumb mode.
6580226633Sdim    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6581226633Sdim    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
6582226633Sdim                   .addReg(ARM::CPSR, RegState::Define)
6583226633Sdim                   .addImm(1));
6584226633Sdim    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6585226633Sdim    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
6586226633Sdim                   .addReg(ARM::CPSR, RegState::Define)
6587226633Sdim                   .addReg(NewVReg2, RegState::Kill)
6588226633Sdim                   .addReg(NewVReg3, RegState::Kill));
6589226633Sdim    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
6590280031Sdim    BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
6591280031Sdim            .addFrameIndex(FI)
6592280031Sdim            .addImm(36); // &jbuf[1] :: pc
6593226633Sdim    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
6594226633Sdim                   .addReg(NewVReg4, RegState::Kill)
6595226633Sdim                   .addReg(NewVReg5, RegState::Kill)
6596226633Sdim                   .addImm(0)
6597226633Sdim                   .addMemOperand(FIMMOSt));
6598226633Sdim  } else {
6599226633Sdim    // Incoming value: jbuf
6600226633Sdim    //   ldr  r1, LCPI1_1
6601226633Sdim    //   add  r1, pc, r1
6602226633Sdim    //   str  r1, [$jbuf, #+4] ; &jbuf[1]
6603226633Sdim    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6604226633Sdim    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12),  NewVReg1)
6605226633Sdim                   .addConstantPoolIndex(CPI)
6606226633Sdim                   .addImm(0)
6607226633Sdim                   .addMemOperand(CPMMO));
6608226633Sdim    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
6609226633Sdim    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
6610226633Sdim                   .addReg(NewVReg1, RegState::Kill)
6611226633Sdim                   .addImm(PCLabelId));
6612226633Sdim    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
6613226633Sdim                   .addReg(NewVReg2, RegState::Kill)
6614226633Sdim                   .addFrameIndex(FI)
6615226633Sdim                   .addImm(36)  // &jbuf[1] :: pc
6616226633Sdim                   .addMemOperand(FIMMOSt));
6617226633Sdim  }
6618226633Sdim}
6619226633Sdim
6620226633SdimMachineBasicBlock *ARMTargetLowering::
6621226633SdimEmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
6622280031Sdim  const TargetInstrInfo *TII =
6623280031Sdim      getTargetMachine().getSubtargetImpl()->getInstrInfo();
6624226633Sdim  DebugLoc dl = MI->getDebugLoc();
6625226633Sdim  MachineFunction *MF = MBB->getParent();
6626226633Sdim  MachineRegisterInfo *MRI = &MF->getRegInfo();
6627226633Sdim  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
6628226633Sdim  MachineFrameInfo *MFI = MF->getFrameInfo();
6629226633Sdim  int FI = MFI->getFunctionContextIndex();
6630226633Sdim
6631280031Sdim  const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
6632280031Sdim                                                        : &ARM::GPRnopcRegClass;
6633226633Sdim
6634226633Sdim  // Get a mapping of the call site numbers to all of the landing pads they're
6635226633Sdim  // associated with.
6636226633Sdim  DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
6637226633Sdim  unsigned MaxCSNum = 0;
6638226633Sdim  MachineModuleInfo &MMI = MF->getMMI();
6639234353Sdim  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
6640234353Sdim       ++BB) {
6641226633Sdim    if (!BB->isLandingPad()) continue;
6642226633Sdim
6643226633Sdim    // FIXME: We should assert that the EH_LABEL is the first MI in the landing
6644226633Sdim    // pad.
6645226633Sdim    for (MachineBasicBlock::iterator
6646226633Sdim           II = BB->begin(), IE = BB->end(); II != IE; ++II) {
6647226633Sdim      if (!II->isEHLabel()) continue;
6648226633Sdim
6649226633Sdim      MCSymbol *Sym = II->getOperand(0).getMCSymbol();
6650226633Sdim      if (!MMI.hasCallSiteLandingPad(Sym)) continue;
6651226633Sdim
6652226633Sdim      SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
6653226633Sdim      for (SmallVectorImpl<unsigned>::iterator
6654226633Sdim             CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
6655226633Sdim           CSI != CSE; ++CSI) {
6656226633Sdim        CallSiteNumToLPad[*CSI].push_back(BB);
6657226633Sdim        MaxCSNum = std::max(MaxCSNum, *CSI);
6658226633Sdim      }
6659221345Sdim      break;
6660221345Sdim    }
6661221345Sdim  }
6662221345Sdim
6663226633Sdim  // Get an ordered list of the machine basic blocks for the jump table.
6664226633Sdim  std::vector<MachineBasicBlock*> LPadList;
6665226633Sdim  SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
6666226633Sdim  LPadList.reserve(CallSiteNumToLPad.size());
6667226633Sdim  for (unsigned I = 1; I <= MaxCSNum; ++I) {
6668226633Sdim    SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
6669226633Sdim    for (SmallVectorImpl<MachineBasicBlock*>::iterator
6670226633Sdim           II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
6671226633Sdim      LPadList.push_back(*II);
6672226633Sdim      InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
6673226633Sdim    }
6674226633Sdim  }
6675226633Sdim
6676226633Sdim  assert(!LPadList.empty() &&
6677226633Sdim         "No landing pad destinations for the dispatch jump table!");
6678226633Sdim
6679226633Sdim  // Create the jump table and associated information.
6680226633Sdim  MachineJumpTableInfo *JTI =
6681226633Sdim    MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
6682226633Sdim  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
6683226633Sdim  unsigned UId = AFI->createJumpTableUId();
6684249423Sdim  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
6685226633Sdim
6686226633Sdim  // Create the MBBs for the dispatch code.
6687226633Sdim
6688226633Sdim  // Shove the dispatch's address into the return slot in the function context.
6689226633Sdim  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
6690226633Sdim  DispatchBB->setIsLandingPad();
6691226633Sdim
6692226633Sdim  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
6693249423Sdim  unsigned trap_opcode;
6694249423Sdim  if (Subtarget->isThumb())
6695249423Sdim    trap_opcode = ARM::tTRAP;
6696249423Sdim  else
6697249423Sdim    trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
6698249423Sdim
6699249423Sdim  BuildMI(TrapBB, dl, TII->get(trap_opcode));
6700226633Sdim  DispatchBB->addSuccessor(TrapBB);
6701226633Sdim
6702226633Sdim  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
6703226633Sdim  DispatchBB->addSuccessor(DispContBB);
6704226633Sdim
6705234353Sdim  // Insert and MBBs.
6706226633Sdim  MF->insert(MF->end(), DispatchBB);
6707226633Sdim  MF->insert(MF->end(), DispContBB);
6708226633Sdim  MF->insert(MF->end(), TrapBB);
6709226633Sdim
6710226633Sdim  // Insert code into the entry block that creates and registers the function
6711226633Sdim  // context.
6712226633Sdim  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
6713226633Sdim
6714226633Sdim  MachineMemOperand *FIMMOLd =
6715226633Sdim    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
6716226633Sdim                             MachineMemOperand::MOLoad |
6717226633Sdim                             MachineMemOperand::MOVolatile, 4, 4);
6718226633Sdim
6719243830Sdim  MachineInstrBuilder MIB;
6720243830Sdim  MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
6721234353Sdim
6722243830Sdim  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
6723243830Sdim  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
6724243830Sdim
6725243830Sdim  // Add a register mask with no preserved registers.  This results in all
6726243830Sdim  // registers being marked as clobbered.
6727243830Sdim  MIB.addRegMask(RI.getNoPreservedMask());
6728243830Sdim
6729234353Sdim  unsigned NumLPads = LPadList.size();
6730226633Sdim  if (Subtarget->isThumb2()) {
6731226633Sdim    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6732226633Sdim    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
6733226633Sdim                   .addFrameIndex(FI)
6734226633Sdim                   .addImm(4)
6735226633Sdim                   .addMemOperand(FIMMOLd));
6736234353Sdim
6737234353Sdim    if (NumLPads < 256) {
6738234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
6739234353Sdim                     .addReg(NewVReg1)
6740234353Sdim                     .addImm(LPadList.size()));
6741234353Sdim    } else {
6742234353Sdim      unsigned VReg1 = MRI->createVirtualRegister(TRC);
6743234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
6744234353Sdim                     .addImm(NumLPads & 0xFFFF));
6745234353Sdim
6746234353Sdim      unsigned VReg2 = VReg1;
6747234353Sdim      if ((NumLPads & 0xFFFF0000) != 0) {
6748234353Sdim        VReg2 = MRI->createVirtualRegister(TRC);
6749234353Sdim        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
6750234353Sdim                       .addReg(VReg1)
6751234353Sdim                       .addImm(NumLPads >> 16));
6752234353Sdim      }
6753234353Sdim
6754234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
6755234353Sdim                     .addReg(NewVReg1)
6756234353Sdim                     .addReg(VReg2));
6757234353Sdim    }
6758234353Sdim
6759226633Sdim    BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
6760226633Sdim      .addMBB(TrapBB)
6761226633Sdim      .addImm(ARMCC::HI)
6762226633Sdim      .addReg(ARM::CPSR);
6763226633Sdim
6764234353Sdim    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6765234353Sdim    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
6766226633Sdim                   .addJumpTableIndex(MJTI)
6767226633Sdim                   .addImm(UId));
6768226633Sdim
6769234353Sdim    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6770226633Sdim    AddDefaultCC(
6771226633Sdim      AddDefaultPred(
6772234353Sdim        BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
6773234353Sdim        .addReg(NewVReg3, RegState::Kill)
6774226633Sdim        .addReg(NewVReg1)
6775226633Sdim        .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
6776226633Sdim
6777226633Sdim    BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
6778234353Sdim      .addReg(NewVReg4, RegState::Kill)
6779226633Sdim      .addReg(NewVReg1)
6780226633Sdim      .addJumpTableIndex(MJTI)
6781226633Sdim      .addImm(UId);
6782226633Sdim  } else if (Subtarget->isThumb()) {
6783226633Sdim    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6784226633Sdim    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
6785226633Sdim                   .addFrameIndex(FI)
6786226633Sdim                   .addImm(1)
6787226633Sdim                   .addMemOperand(FIMMOLd));
6788226633Sdim
6789234353Sdim    if (NumLPads < 256) {
6790234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
6791234353Sdim                     .addReg(NewVReg1)
6792234353Sdim                     .addImm(NumLPads));
6793234353Sdim    } else {
6794234353Sdim      MachineConstantPool *ConstantPool = MF->getConstantPool();
6795234353Sdim      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
6796234353Sdim      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
6797234353Sdim
6798234353Sdim      // MachineConstantPool wants an explicit alignment.
6799243830Sdim      unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
6800234353Sdim      if (Align == 0)
6801243830Sdim        Align = getDataLayout()->getTypeAllocSize(C->getType());
6802234353Sdim      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
6803234353Sdim
6804234353Sdim      unsigned VReg1 = MRI->createVirtualRegister(TRC);
6805234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
6806234353Sdim                     .addReg(VReg1, RegState::Define)
6807234353Sdim                     .addConstantPoolIndex(Idx));
6808234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
6809234353Sdim                     .addReg(NewVReg1)
6810234353Sdim                     .addReg(VReg1));
6811234353Sdim    }
6812234353Sdim
6813226633Sdim    BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
6814226633Sdim      .addMBB(TrapBB)
6815226633Sdim      .addImm(ARMCC::HI)
6816226633Sdim      .addReg(ARM::CPSR);
6817226633Sdim
6818226633Sdim    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
6819226633Sdim    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
6820226633Sdim                   .addReg(ARM::CPSR, RegState::Define)
6821226633Sdim                   .addReg(NewVReg1)
6822226633Sdim                   .addImm(2));
6823226633Sdim
6824226633Sdim    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6825226633Sdim    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
6826226633Sdim                   .addJumpTableIndex(MJTI)
6827226633Sdim                   .addImm(UId));
6828226633Sdim
6829226633Sdim    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6830226633Sdim    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
6831226633Sdim                   .addReg(ARM::CPSR, RegState::Define)
6832226633Sdim                   .addReg(NewVReg2, RegState::Kill)
6833226633Sdim                   .addReg(NewVReg3));
6834226633Sdim
6835226633Sdim    MachineMemOperand *JTMMOLd =
6836226633Sdim      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
6837226633Sdim                               MachineMemOperand::MOLoad, 4, 4);
6838226633Sdim
6839226633Sdim    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
6840226633Sdim    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
6841226633Sdim                   .addReg(NewVReg4, RegState::Kill)
6842226633Sdim                   .addImm(0)
6843226633Sdim                   .addMemOperand(JTMMOLd));
6844226633Sdim
6845249423Sdim    unsigned NewVReg6 = NewVReg5;
6846249423Sdim    if (RelocM == Reloc::PIC_) {
6847249423Sdim      NewVReg6 = MRI->createVirtualRegister(TRC);
6848249423Sdim      AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
6849249423Sdim                     .addReg(ARM::CPSR, RegState::Define)
6850249423Sdim                     .addReg(NewVReg5, RegState::Kill)
6851249423Sdim                     .addReg(NewVReg3));
6852249423Sdim    }
6853226633Sdim
6854226633Sdim    BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
6855226633Sdim      .addReg(NewVReg6, RegState::Kill)
6856226633Sdim      .addJumpTableIndex(MJTI)
6857226633Sdim      .addImm(UId);
6858226633Sdim  } else {
6859226633Sdim    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6860226633Sdim    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
6861226633Sdim                   .addFrameIndex(FI)
6862226633Sdim                   .addImm(4)
6863226633Sdim                   .addMemOperand(FIMMOLd));
6864234353Sdim
6865234353Sdim    if (NumLPads < 256) {
6866234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
6867234353Sdim                     .addReg(NewVReg1)
6868234353Sdim                     .addImm(NumLPads));
6869234353Sdim    } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
6870234353Sdim      unsigned VReg1 = MRI->createVirtualRegister(TRC);
6871234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
6872234353Sdim                     .addImm(NumLPads & 0xFFFF));
6873234353Sdim
6874234353Sdim      unsigned VReg2 = VReg1;
6875234353Sdim      if ((NumLPads & 0xFFFF0000) != 0) {
6876234353Sdim        VReg2 = MRI->createVirtualRegister(TRC);
6877234353Sdim        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
6878234353Sdim                       .addReg(VReg1)
6879234353Sdim                       .addImm(NumLPads >> 16));
6880234353Sdim      }
6881234353Sdim
6882234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
6883234353Sdim                     .addReg(NewVReg1)
6884234353Sdim                     .addReg(VReg2));
6885234353Sdim    } else {
6886234353Sdim      MachineConstantPool *ConstantPool = MF->getConstantPool();
6887234353Sdim      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
6888234353Sdim      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
6889234353Sdim
6890234353Sdim      // MachineConstantPool wants an explicit alignment.
6891243830Sdim      unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
6892234353Sdim      if (Align == 0)
6893243830Sdim        Align = getDataLayout()->getTypeAllocSize(C->getType());
6894234353Sdim      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
6895234353Sdim
6896234353Sdim      unsigned VReg1 = MRI->createVirtualRegister(TRC);
6897234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
6898234353Sdim                     .addReg(VReg1, RegState::Define)
6899234353Sdim                     .addConstantPoolIndex(Idx)
6900234353Sdim                     .addImm(0));
6901234353Sdim      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
6902234353Sdim                     .addReg(NewVReg1)
6903234353Sdim                     .addReg(VReg1, RegState::Kill));
6904234353Sdim    }
6905234353Sdim
6906226633Sdim    BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
6907226633Sdim      .addMBB(TrapBB)
6908226633Sdim      .addImm(ARMCC::HI)
6909226633Sdim      .addReg(ARM::CPSR);
6910226633Sdim
6911234353Sdim    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6912226633Sdim    AddDefaultCC(
6913234353Sdim      AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
6914226633Sdim                     .addReg(NewVReg1)
6915226633Sdim                     .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
6916234353Sdim    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6917234353Sdim    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
6918226633Sdim                   .addJumpTableIndex(MJTI)
6919226633Sdim                   .addImm(UId));
6920226633Sdim
6921226633Sdim    MachineMemOperand *JTMMOLd =
6922226633Sdim      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
6923226633Sdim                               MachineMemOperand::MOLoad, 4, 4);
6924234353Sdim    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
6925226633Sdim    AddDefaultPred(
6926234353Sdim      BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
6927234353Sdim      .addReg(NewVReg3, RegState::Kill)
6928234353Sdim      .addReg(NewVReg4)
6929226633Sdim      .addImm(0)
6930226633Sdim      .addMemOperand(JTMMOLd));
6931226633Sdim
6932249423Sdim    if (RelocM == Reloc::PIC_) {
6933249423Sdim      BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
6934249423Sdim        .addReg(NewVReg5, RegState::Kill)
6935249423Sdim        .addReg(NewVReg4)
6936249423Sdim        .addJumpTableIndex(MJTI)
6937249423Sdim        .addImm(UId);
6938249423Sdim    } else {
6939249423Sdim      BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
6940249423Sdim        .addReg(NewVReg5, RegState::Kill)
6941249423Sdim        .addJumpTableIndex(MJTI)
6942249423Sdim        .addImm(UId);
6943249423Sdim    }
6944226633Sdim  }
6945226633Sdim
6946226633Sdim  // Add the jump table entries as successors to the MBB.
6947243830Sdim  SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
6948226633Sdim  for (std::vector<MachineBasicBlock*>::iterator
6949226633Sdim         I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
6950226633Sdim    MachineBasicBlock *CurMBB = *I;
6951280031Sdim    if (SeenMBBs.insert(CurMBB).second)
6952226633Sdim      DispContBB->addSuccessor(CurMBB);
6953226633Sdim  }
6954226633Sdim
6955234353Sdim  // N.B. the order the invoke BBs are processed in doesn't matter here.
6956276479Sdim  const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
6957234353Sdim  SmallVector<MachineBasicBlock*, 64> MBBLPads;
6958280031Sdim  for (MachineBasicBlock *BB : InvokeBBs) {
6959226633Sdim
6960226633Sdim    // Remove the landing pad successor from the invoke block and replace it
6961226633Sdim    // with the new dispatch block.
6962234353Sdim    SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
6963234353Sdim                                                  BB->succ_end());
6964234353Sdim    while (!Successors.empty()) {
6965234353Sdim      MachineBasicBlock *SMBB = Successors.pop_back_val();
6966226633Sdim      if (SMBB->isLandingPad()) {
6967226633Sdim        BB->removeSuccessor(SMBB);
6968234353Sdim        MBBLPads.push_back(SMBB);
6969226633Sdim      }
6970226633Sdim    }
6971226633Sdim
6972226633Sdim    BB->addSuccessor(DispatchBB);
6973226633Sdim
6974226633Sdim    // Find the invoke call and mark all of the callee-saved registers as
6975226633Sdim    // 'implicit defined' so that they're spilled. This prevents code from
6976226633Sdim    // moving instructions to before the EH block, where they will never be
6977226633Sdim    // executed.
6978226633Sdim    for (MachineBasicBlock::reverse_iterator
6979226633Sdim           II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
6980234353Sdim      if (!II->isCall()) continue;
6981226633Sdim
6982226633Sdim      DenseMap<unsigned, bool> DefRegs;
6983226633Sdim      for (MachineInstr::mop_iterator
6984226633Sdim             OI = II->operands_begin(), OE = II->operands_end();
6985226633Sdim           OI != OE; ++OI) {
6986226633Sdim        if (!OI->isReg()) continue;
6987226633Sdim        DefRegs[OI->getReg()] = true;
6988226633Sdim      }
6989226633Sdim
6990249423Sdim      MachineInstrBuilder MIB(*MF, &*II);
6991226633Sdim
6992226633Sdim      for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
6993234353Sdim        unsigned Reg = SavedRegs[i];
6994234353Sdim        if (Subtarget->isThumb2() &&
6995239462Sdim            !ARM::tGPRRegClass.contains(Reg) &&
6996239462Sdim            !ARM::hGPRRegClass.contains(Reg))
6997234353Sdim          continue;
6998239462Sdim        if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
6999234353Sdim          continue;
7000239462Sdim        if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
7001234353Sdim          continue;
7002234353Sdim        if (!DefRegs[Reg])
7003234353Sdim          MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
7004226633Sdim      }
7005226633Sdim
7006226633Sdim      break;
7007226633Sdim    }
7008226633Sdim  }
7009226633Sdim
7010234353Sdim  // Mark all former landing pads as non-landing pads. The dispatch is the only
7011234353Sdim  // landing pad now.
7012234353Sdim  for (SmallVectorImpl<MachineBasicBlock*>::iterator
7013234353Sdim         I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
7014234353Sdim    (*I)->setIsLandingPad(false);
7015234353Sdim
7016226633Sdim  // The instruction is gone now.
7017221345Sdim  MI->eraseFromParent();
7018226633Sdim
7019226633Sdim  return MBB;
7020221345Sdim}
7021221345Sdim
7022226633Sdimstatic
7023226633SdimMachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
7024226633Sdim  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
7025226633Sdim       E = MBB->succ_end(); I != E; ++I)
7026226633Sdim    if (*I != Succ)
7027226633Sdim      return *I;
7028226633Sdim  llvm_unreachable("Expecting a BB with two successors!");
7029226633Sdim}
7030226633Sdim
7031261991Sdim/// Return the load opcode for a given load size. If load size >= 8,
7032261991Sdim/// neon opcode will be returned.
7033261991Sdimstatic unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
7034261991Sdim  if (LdSize >= 8)
7035261991Sdim    return LdSize == 16 ? ARM::VLD1q32wb_fixed
7036261991Sdim                        : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
7037261991Sdim  if (IsThumb1)
7038261991Sdim    return LdSize == 4 ? ARM::tLDRi
7039261991Sdim                       : LdSize == 2 ? ARM::tLDRHi
7040261991Sdim                                     : LdSize == 1 ? ARM::tLDRBi : 0;
7041261991Sdim  if (IsThumb2)
7042261991Sdim    return LdSize == 4 ? ARM::t2LDR_POST
7043261991Sdim                       : LdSize == 2 ? ARM::t2LDRH_POST
7044261991Sdim                                     : LdSize == 1 ? ARM::t2LDRB_POST : 0;
7045261991Sdim  return LdSize == 4 ? ARM::LDR_POST_IMM
7046261991Sdim                     : LdSize == 2 ? ARM::LDRH_POST
7047261991Sdim                                   : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
7048261991Sdim}
7049261991Sdim
7050261991Sdim/// Return the store opcode for a given store size. If store size >= 8,
7051261991Sdim/// neon opcode will be returned.
7052261991Sdimstatic unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
7053261991Sdim  if (StSize >= 8)
7054261991Sdim    return StSize == 16 ? ARM::VST1q32wb_fixed
7055261991Sdim                        : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
7056261991Sdim  if (IsThumb1)
7057261991Sdim    return StSize == 4 ? ARM::tSTRi
7058261991Sdim                       : StSize == 2 ? ARM::tSTRHi
7059261991Sdim                                     : StSize == 1 ? ARM::tSTRBi : 0;
7060261991Sdim  if (IsThumb2)
7061261991Sdim    return StSize == 4 ? ARM::t2STR_POST
7062261991Sdim                       : StSize == 2 ? ARM::t2STRH_POST
7063261991Sdim                                     : StSize == 1 ? ARM::t2STRB_POST : 0;
7064261991Sdim  return StSize == 4 ? ARM::STR_POST_IMM
7065261991Sdim                     : StSize == 2 ? ARM::STRH_POST
7066261991Sdim                                   : StSize == 1 ? ARM::STRB_POST_IMM : 0;
7067261991Sdim}
7068261991Sdim
7069261991Sdim/// Emit a post-increment load operation with given size. The instructions
7070261991Sdim/// will be added to BB at Pos.
7071261991Sdimstatic void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
7072261991Sdim                       const TargetInstrInfo *TII, DebugLoc dl,
7073261991Sdim                       unsigned LdSize, unsigned Data, unsigned AddrIn,
7074261991Sdim                       unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
7075261991Sdim  unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
7076261991Sdim  assert(LdOpc != 0 && "Should have a load opcode");
7077261991Sdim  if (LdSize >= 8) {
7078261991Sdim    AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
7079261991Sdim                       .addReg(AddrOut, RegState::Define).addReg(AddrIn)
7080261991Sdim                       .addImm(0));
7081261991Sdim  } else if (IsThumb1) {
7082261991Sdim    // load + update AddrIn
7083261991Sdim    AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
7084261991Sdim                       .addReg(AddrIn).addImm(0));
7085261991Sdim    MachineInstrBuilder MIB =
7086261991Sdim        BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
7087261991Sdim    MIB = AddDefaultT1CC(MIB);
7088261991Sdim    MIB.addReg(AddrIn).addImm(LdSize);
7089261991Sdim    AddDefaultPred(MIB);
7090261991Sdim  } else if (IsThumb2) {
7091261991Sdim    AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
7092261991Sdim                       .addReg(AddrOut, RegState::Define).addReg(AddrIn)
7093261991Sdim                       .addImm(LdSize));
7094261991Sdim  } else { // arm
7095261991Sdim    AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
7096261991Sdim                       .addReg(AddrOut, RegState::Define).addReg(AddrIn)
7097261991Sdim                       .addReg(0).addImm(LdSize));
7098261991Sdim  }
7099261991Sdim}
7100261991Sdim
7101261991Sdim/// Emit a post-increment store operation with given size. The instructions
7102261991Sdim/// will be added to BB at Pos.
7103261991Sdimstatic void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
7104261991Sdim                       const TargetInstrInfo *TII, DebugLoc dl,
7105261991Sdim                       unsigned StSize, unsigned Data, unsigned AddrIn,
7106261991Sdim                       unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
7107261991Sdim  unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
7108261991Sdim  assert(StOpc != 0 && "Should have a store opcode");
7109261991Sdim  if (StSize >= 8) {
7110261991Sdim    AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
7111261991Sdim                       .addReg(AddrIn).addImm(0).addReg(Data));
7112261991Sdim  } else if (IsThumb1) {
7113261991Sdim    // store + update AddrIn
7114261991Sdim    AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
7115261991Sdim                       .addReg(AddrIn).addImm(0));
7116261991Sdim    MachineInstrBuilder MIB =
7117261991Sdim        BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
7118261991Sdim    MIB = AddDefaultT1CC(MIB);
7119261991Sdim    MIB.addReg(AddrIn).addImm(StSize);
7120261991Sdim    AddDefaultPred(MIB);
7121261991Sdim  } else if (IsThumb2) {
7122261991Sdim    AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
7123261991Sdim                       .addReg(Data).addReg(AddrIn).addImm(StSize));
7124261991Sdim  } else { // arm
7125261991Sdim    AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
7126261991Sdim                       .addReg(Data).addReg(AddrIn).addReg(0)
7127261991Sdim                       .addImm(StSize));
7128261991Sdim  }
7129261991Sdim}
7130261991Sdim
7131261991SdimMachineBasicBlock *
7132261991SdimARMTargetLowering::EmitStructByval(MachineInstr *MI,
7133261991Sdim                                   MachineBasicBlock *BB) const {
7134239462Sdim  // This pseudo instruction has 3 operands: dst, src, size
7135239462Sdim  // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
7136239462Sdim  // Otherwise, we will generate unrolled scalar copies.
7137280031Sdim  const TargetInstrInfo *TII =
7138280031Sdim      getTargetMachine().getSubtargetImpl()->getInstrInfo();
7139239462Sdim  const BasicBlock *LLVM_BB = BB->getBasicBlock();
7140239462Sdim  MachineFunction::iterator It = BB;
7141239462Sdim  ++It;
7142239462Sdim
7143239462Sdim  unsigned dest = MI->getOperand(0).getReg();
7144239462Sdim  unsigned src = MI->getOperand(1).getReg();
7145239462Sdim  unsigned SizeVal = MI->getOperand(2).getImm();
7146239462Sdim  unsigned Align = MI->getOperand(3).getImm();
7147239462Sdim  DebugLoc dl = MI->getDebugLoc();
7148239462Sdim
7149239462Sdim  MachineFunction *MF = BB->getParent();
7150239462Sdim  MachineRegisterInfo &MRI = MF->getRegInfo();
7151261991Sdim  unsigned UnitSize = 0;
7152276479Sdim  const TargetRegisterClass *TRC = nullptr;
7153276479Sdim  const TargetRegisterClass *VecTRC = nullptr;
7154239462Sdim
7155261991Sdim  bool IsThumb1 = Subtarget->isThumb1Only();
7156261991Sdim  bool IsThumb2 = Subtarget->isThumb2();
7157239462Sdim
7158239462Sdim  if (Align & 1) {
7159239462Sdim    UnitSize = 1;
7160239462Sdim  } else if (Align & 2) {
7161239462Sdim    UnitSize = 2;
7162239462Sdim  } else {
7163239462Sdim    // Check whether we can use NEON instructions.
7164249423Sdim    if (!MF->getFunction()->getAttributes().
7165249423Sdim          hasAttribute(AttributeSet::FunctionIndex,
7166249423Sdim                       Attribute::NoImplicitFloat) &&
7167239462Sdim        Subtarget->hasNEON()) {
7168261991Sdim      if ((Align % 16 == 0) && SizeVal >= 16)
7169239462Sdim        UnitSize = 16;
7170261991Sdim      else if ((Align % 8 == 0) && SizeVal >= 8)
7171239462Sdim        UnitSize = 8;
7172239462Sdim    }
7173239462Sdim    // Can't use NEON instructions.
7174261991Sdim    if (UnitSize == 0)
7175239462Sdim      UnitSize = 4;
7176239462Sdim  }
7177239462Sdim
7178261991Sdim  // Select the correct opcode and register class for unit size load/store
7179261991Sdim  bool IsNeon = UnitSize >= 8;
7180280031Sdim  TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
7181261991Sdim  if (IsNeon)
7182280031Sdim    VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
7183280031Sdim                            : UnitSize == 8 ? &ARM::DPRRegClass
7184280031Sdim                                            : nullptr;
7185261991Sdim
7186239462Sdim  unsigned BytesLeft = SizeVal % UnitSize;
7187239462Sdim  unsigned LoopSize = SizeVal - BytesLeft;
7188239462Sdim
7189239462Sdim  if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
7190239462Sdim    // Use LDR and STR to copy.
7191239462Sdim    // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
7192239462Sdim    // [destOut] = STR_POST(scratch, destIn, UnitSize)
7193239462Sdim    unsigned srcIn = src;
7194239462Sdim    unsigned destIn = dest;
7195239462Sdim    for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
7196239462Sdim      unsigned srcOut = MRI.createVirtualRegister(TRC);
7197239462Sdim      unsigned destOut = MRI.createVirtualRegister(TRC);
7198261991Sdim      unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
7199261991Sdim      emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
7200261991Sdim                 IsThumb1, IsThumb2);
7201261991Sdim      emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
7202261991Sdim                 IsThumb1, IsThumb2);
7203239462Sdim      srcIn = srcOut;
7204239462Sdim      destIn = destOut;
7205239462Sdim    }
7206239462Sdim
7207239462Sdim    // Handle the leftover bytes with LDRB and STRB.
7208239462Sdim    // [scratch, srcOut] = LDRB_POST(srcIn, 1)
7209239462Sdim    // [destOut] = STRB_POST(scratch, destIn, 1)
7210239462Sdim    for (unsigned i = 0; i < BytesLeft; i++) {
7211239462Sdim      unsigned srcOut = MRI.createVirtualRegister(TRC);
7212239462Sdim      unsigned destOut = MRI.createVirtualRegister(TRC);
7213261991Sdim      unsigned scratch = MRI.createVirtualRegister(TRC);
7214261991Sdim      emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
7215261991Sdim                 IsThumb1, IsThumb2);
7216261991Sdim      emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
7217261991Sdim                 IsThumb1, IsThumb2);
7218239462Sdim      srcIn = srcOut;
7219239462Sdim      destIn = destOut;
7220239462Sdim    }
7221239462Sdim    MI->eraseFromParent();   // The instruction is gone now.
7222239462Sdim    return BB;
7223239462Sdim  }
7224239462Sdim
7225239462Sdim  // Expand the pseudo op to a loop.
7226239462Sdim  // thisMBB:
7227239462Sdim  //   ...
7228239462Sdim  //   movw varEnd, # --> with thumb2
7229239462Sdim  //   movt varEnd, #
7230239462Sdim  //   ldrcp varEnd, idx --> without thumb2
7231239462Sdim  //   fallthrough --> loopMBB
7232239462Sdim  // loopMBB:
7233239462Sdim  //   PHI varPhi, varEnd, varLoop
7234239462Sdim  //   PHI srcPhi, src, srcLoop
7235239462Sdim  //   PHI destPhi, dst, destLoop
7236239462Sdim  //   [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
7237239462Sdim  //   [destLoop] = STR_POST(scratch, destPhi, UnitSize)
7238239462Sdim  //   subs varLoop, varPhi, #UnitSize
7239239462Sdim  //   bne loopMBB
7240239462Sdim  //   fallthrough --> exitMBB
7241239462Sdim  // exitMBB:
7242239462Sdim  //   epilogue to handle left-over bytes
7243239462Sdim  //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
7244239462Sdim  //   [destOut] = STRB_POST(scratch, destLoop, 1)
7245239462Sdim  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7246239462Sdim  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7247239462Sdim  MF->insert(It, loopMBB);
7248239462Sdim  MF->insert(It, exitMBB);
7249239462Sdim
7250239462Sdim  // Transfer the remainder of BB and its successor edges to exitMBB.
7251239462Sdim  exitMBB->splice(exitMBB->begin(), BB,
7252276479Sdim                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
7253239462Sdim  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
7254239462Sdim
7255239462Sdim  // Load an immediate to varEnd.
7256239462Sdim  unsigned varEnd = MRI.createVirtualRegister(TRC);
7257261991Sdim  if (IsThumb2) {
7258261991Sdim    unsigned Vtmp = varEnd;
7259239462Sdim    if ((LoopSize & 0xFFFF0000) != 0)
7260261991Sdim      Vtmp = MRI.createVirtualRegister(TRC);
7261261991Sdim    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
7262261991Sdim                       .addImm(LoopSize & 0xFFFF));
7263239462Sdim
7264239462Sdim    if ((LoopSize & 0xFFFF0000) != 0)
7265239462Sdim      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
7266261991Sdim                         .addReg(Vtmp).addImm(LoopSize >> 16));
7267239462Sdim  } else {
7268239462Sdim    MachineConstantPool *ConstantPool = MF->getConstantPool();
7269239462Sdim    Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
7270239462Sdim    const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
7271239462Sdim
7272239462Sdim    // MachineConstantPool wants an explicit alignment.
7273243830Sdim    unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
7274239462Sdim    if (Align == 0)
7275243830Sdim      Align = getDataLayout()->getTypeAllocSize(C->getType());
7276239462Sdim    unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
7277239462Sdim
7278261991Sdim    if (IsThumb1)
7279261991Sdim      AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
7280261991Sdim          varEnd, RegState::Define).addConstantPoolIndex(Idx));
7281261991Sdim    else
7282261991Sdim      AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
7283261991Sdim          varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
7284239462Sdim  }
7285239462Sdim  BB->addSuccessor(loopMBB);
7286239462Sdim
7287239462Sdim  // Generate the loop body:
7288239462Sdim  //   varPhi = PHI(varLoop, varEnd)
7289239462Sdim  //   srcPhi = PHI(srcLoop, src)
7290239462Sdim  //   destPhi = PHI(destLoop, dst)
7291239462Sdim  MachineBasicBlock *entryBB = BB;
7292239462Sdim  BB = loopMBB;
7293239462Sdim  unsigned varLoop = MRI.createVirtualRegister(TRC);
7294239462Sdim  unsigned varPhi = MRI.createVirtualRegister(TRC);
7295239462Sdim  unsigned srcLoop = MRI.createVirtualRegister(TRC);
7296239462Sdim  unsigned srcPhi = MRI.createVirtualRegister(TRC);
7297239462Sdim  unsigned destLoop = MRI.createVirtualRegister(TRC);
7298239462Sdim  unsigned destPhi = MRI.createVirtualRegister(TRC);
7299239462Sdim
7300239462Sdim  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
7301239462Sdim    .addReg(varLoop).addMBB(loopMBB)
7302239462Sdim    .addReg(varEnd).addMBB(entryBB);
7303239462Sdim  BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
7304239462Sdim    .addReg(srcLoop).addMBB(loopMBB)
7305239462Sdim    .addReg(src).addMBB(entryBB);
7306239462Sdim  BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
7307239462Sdim    .addReg(destLoop).addMBB(loopMBB)
7308239462Sdim    .addReg(dest).addMBB(entryBB);
7309239462Sdim
7310239462Sdim  //   [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
7311239462Sdim  //   [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
7312261991Sdim  unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
7313261991Sdim  emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
7314261991Sdim             IsThumb1, IsThumb2);
7315261991Sdim  emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
7316261991Sdim             IsThumb1, IsThumb2);
7317239462Sdim
7318261991Sdim  // Decrement loop variable by UnitSize.
7319261991Sdim  if (IsThumb1) {
7320261991Sdim    MachineInstrBuilder MIB =
7321261991Sdim        BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
7322261991Sdim    MIB = AddDefaultT1CC(MIB);
7323261991Sdim    MIB.addReg(varPhi).addImm(UnitSize);
7324261991Sdim    AddDefaultPred(MIB);
7325239462Sdim  } else {
7326261991Sdim    MachineInstrBuilder MIB =
7327261991Sdim        BuildMI(*BB, BB->end(), dl,
7328261991Sdim                TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
7329261991Sdim    AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
7330261991Sdim    MIB->getOperand(5).setReg(ARM::CPSR);
7331261991Sdim    MIB->getOperand(5).setIsDef(true);
7332239462Sdim  }
7333261991Sdim  BuildMI(*BB, BB->end(), dl,
7334261991Sdim          TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
7335261991Sdim      .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
7336239462Sdim
7337239462Sdim  // loopMBB can loop back to loopMBB or fall through to exitMBB.
7338239462Sdim  BB->addSuccessor(loopMBB);
7339239462Sdim  BB->addSuccessor(exitMBB);
7340239462Sdim
7341239462Sdim  // Add epilogue to handle BytesLeft.
7342239462Sdim  BB = exitMBB;
7343239462Sdim  MachineInstr *StartOfExit = exitMBB->begin();
7344239462Sdim
7345239462Sdim  //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
7346239462Sdim  //   [destOut] = STRB_POST(scratch, destLoop, 1)
7347239462Sdim  unsigned srcIn = srcLoop;
7348239462Sdim  unsigned destIn = destLoop;
7349239462Sdim  for (unsigned i = 0; i < BytesLeft; i++) {
7350239462Sdim    unsigned srcOut = MRI.createVirtualRegister(TRC);
7351239462Sdim    unsigned destOut = MRI.createVirtualRegister(TRC);
7352261991Sdim    unsigned scratch = MRI.createVirtualRegister(TRC);
7353261991Sdim    emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
7354261991Sdim               IsThumb1, IsThumb2);
7355261991Sdim    emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
7356261991Sdim               IsThumb1, IsThumb2);
7357239462Sdim    srcIn = srcOut;
7358239462Sdim    destIn = destOut;
7359239462Sdim  }
7360239462Sdim
7361239462Sdim  MI->eraseFromParent();   // The instruction is gone now.
7362239462Sdim  return BB;
7363239462Sdim}
7364239462Sdim
7365200581SrdivackyMachineBasicBlock *
7366276479SdimARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
7367276479Sdim                                       MachineBasicBlock *MBB) const {
7368276479Sdim  const TargetMachine &TM = getTargetMachine();
7369280031Sdim  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
7370276479Sdim  DebugLoc DL = MI->getDebugLoc();
7371276479Sdim
7372276479Sdim  assert(Subtarget->isTargetWindows() &&
7373276479Sdim         "__chkstk is only supported on Windows");
7374276479Sdim  assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
7375276479Sdim
7376276479Sdim  // __chkstk takes the number of words to allocate on the stack in R4, and
7377276479Sdim  // returns the stack adjustment in number of bytes in R4.  This will not
7378276479Sdim  // clober any other registers (other than the obvious lr).
7379276479Sdim  //
7380276479Sdim  // Although, technically, IP should be considered a register which may be
7381276479Sdim  // clobbered, the call itself will not touch it.  Windows on ARM is a pure
7382276479Sdim  // thumb-2 environment, so there is no interworking required.  As a result, we
7383276479Sdim  // do not expect a veneer to be emitted by the linker, clobbering IP.
7384276479Sdim  //
7385276479Sdim  // Each module receives its own copy of __chkstk, so no import thunk is
7386276479Sdim  // required, again, ensuring that IP is not clobbered.
7387276479Sdim  //
7388276479Sdim  // Finally, although some linkers may theoretically provide a trampoline for
7389276479Sdim  // out of range calls (which is quite common due to a 32M range limitation of
7390276479Sdim  // branches for Thumb), we can generate the long-call version via
7391276479Sdim  // -mcmodel=large, alleviating the need for the trampoline which may clobber
7392276479Sdim  // IP.
7393276479Sdim
7394276479Sdim  switch (TM.getCodeModel()) {
7395276479Sdim  case CodeModel::Small:
7396276479Sdim  case CodeModel::Medium:
7397276479Sdim  case CodeModel::Default:
7398276479Sdim  case CodeModel::Kernel:
7399276479Sdim    BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
7400276479Sdim      .addImm((unsigned)ARMCC::AL).addReg(0)
7401276479Sdim      .addExternalSymbol("__chkstk")
7402276479Sdim      .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
7403276479Sdim      .addReg(ARM::R4, RegState::Implicit | RegState::Define)
7404276479Sdim      .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
7405276479Sdim    break;
7406276479Sdim  case CodeModel::Large:
7407276479Sdim  case CodeModel::JITDefault: {
7408276479Sdim    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
7409276479Sdim    unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
7410276479Sdim
7411276479Sdim    BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
7412276479Sdim      .addExternalSymbol("__chkstk");
7413276479Sdim    BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
7414276479Sdim      .addImm((unsigned)ARMCC::AL).addReg(0)
7415276479Sdim      .addReg(Reg, RegState::Kill)
7416276479Sdim      .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
7417276479Sdim      .addReg(ARM::R4, RegState::Implicit | RegState::Define)
7418276479Sdim      .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
7419276479Sdim    break;
7420276479Sdim  }
7421276479Sdim  }
7422276479Sdim
7423276479Sdim  AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
7424276479Sdim                                      ARM::SP)
7425276479Sdim                              .addReg(ARM::SP).addReg(ARM::R4)));
7426276479Sdim
7427276479Sdim  MI->eraseFromParent();
7428276479Sdim  return MBB;
7429276479Sdim}
7430276479Sdim
7431276479SdimMachineBasicBlock *
7432193323SedARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
7433207618Srdivacky                                               MachineBasicBlock *BB) const {
7434280031Sdim  const TargetInstrInfo *TII =
7435280031Sdim      getTargetMachine().getSubtargetImpl()->getInstrInfo();
7436193323Sed  DebugLoc dl = MI->getDebugLoc();
7437200581Srdivacky  bool isThumb2 = Subtarget->isThumb2();
7438193323Sed  switch (MI->getOpcode()) {
7439221345Sdim  default: {
7440200581Srdivacky    MI->dump();
7441198090Srdivacky    llvm_unreachable("Unexpected instr type to insert");
7442221345Sdim  }
7443226633Sdim  // The Thumb2 pre-indexed stores have the same MI operands, they just
7444226633Sdim  // define them differently in the .td files from the isel patterns, so
7445226633Sdim  // they need pseudos.
7446226633Sdim  case ARM::t2STR_preidx:
7447226633Sdim    MI->setDesc(TII->get(ARM::t2STR_PRE));
7448226633Sdim    return BB;
7449226633Sdim  case ARM::t2STRB_preidx:
7450226633Sdim    MI->setDesc(TII->get(ARM::t2STRB_PRE));
7451226633Sdim    return BB;
7452226633Sdim  case ARM::t2STRH_preidx:
7453226633Sdim    MI->setDesc(TII->get(ARM::t2STRH_PRE));
7454226633Sdim    return BB;
7455226633Sdim
7456226633Sdim  case ARM::STRi_preidx:
7457226633Sdim  case ARM::STRBi_preidx: {
7458226633Sdim    unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
7459226633Sdim      ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
7460226633Sdim    // Decode the offset.
7461226633Sdim    unsigned Offset = MI->getOperand(4).getImm();
7462226633Sdim    bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
7463226633Sdim    Offset = ARM_AM::getAM2Offset(Offset);
7464226633Sdim    if (isSub)
7465226633Sdim      Offset = -Offset;
7466226633Sdim
7467226633Sdim    MachineMemOperand *MMO = *MI->memoperands_begin();
7468226633Sdim    BuildMI(*BB, MI, dl, TII->get(NewOpc))
7469226633Sdim      .addOperand(MI->getOperand(0))  // Rn_wb
7470226633Sdim      .addOperand(MI->getOperand(1))  // Rt
7471226633Sdim      .addOperand(MI->getOperand(2))  // Rn
7472226633Sdim      .addImm(Offset)                 // offset (skip GPR==zero_reg)
7473226633Sdim      .addOperand(MI->getOperand(5))  // pred
7474226633Sdim      .addOperand(MI->getOperand(6))
7475226633Sdim      .addMemOperand(MMO);
7476226633Sdim    MI->eraseFromParent();
7477226633Sdim    return BB;
7478226633Sdim  }
7479226633Sdim  case ARM::STRr_preidx:
7480226633Sdim  case ARM::STRBr_preidx:
7481226633Sdim  case ARM::STRH_preidx: {
7482226633Sdim    unsigned NewOpc;
7483226633Sdim    switch (MI->getOpcode()) {
7484226633Sdim    default: llvm_unreachable("unexpected opcode!");
7485226633Sdim    case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
7486226633Sdim    case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
7487226633Sdim    case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
7488226633Sdim    }
7489226633Sdim    MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
7490226633Sdim    for (unsigned i = 0; i < MI->getNumOperands(); ++i)
7491226633Sdim      MIB.addOperand(MI->getOperand(i));
7492226633Sdim    MI->eraseFromParent();
7493226633Sdim    return BB;
7494226633Sdim  }
7495200581Srdivacky
7496198090Srdivacky  case ARM::tMOVCCr_pseudo: {
7497193323Sed    // To "insert" a SELECT_CC instruction, we actually have to insert the
7498193323Sed    // diamond control-flow pattern.  The incoming instruction knows the
7499193323Sed    // destination vreg to set, the condition code register to branch on, the
7500193323Sed    // true/false values to select between, and a branch opcode to use.
7501193323Sed    const BasicBlock *LLVM_BB = BB->getBasicBlock();
7502193323Sed    MachineFunction::iterator It = BB;
7503193323Sed    ++It;
7504193323Sed
7505193323Sed    //  thisMBB:
7506193323Sed    //  ...
7507193323Sed    //   TrueVal = ...
7508193323Sed    //   cmpTY ccX, r1, r2
7509193323Sed    //   bCC copy1MBB
7510193323Sed    //   fallthrough --> copy0MBB
7511193323Sed    MachineBasicBlock *thisMBB  = BB;
7512193323Sed    MachineFunction *F = BB->getParent();
7513193323Sed    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
7514193323Sed    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
7515193323Sed    F->insert(It, copy0MBB);
7516193323Sed    F->insert(It, sinkMBB);
7517210299Sed
7518210299Sed    // Transfer the remainder of BB and its successor edges to sinkMBB.
7519210299Sed    sinkMBB->splice(sinkMBB->begin(), BB,
7520276479Sdim                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
7521210299Sed    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
7522210299Sed
7523193323Sed    BB->addSuccessor(copy0MBB);
7524193323Sed    BB->addSuccessor(sinkMBB);
7525193323Sed
7526210299Sed    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
7527210299Sed      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
7528210299Sed
7529193323Sed    //  copy0MBB:
7530193323Sed    //   %FalseValue = ...
7531193323Sed    //   # fallthrough to sinkMBB
7532193323Sed    BB = copy0MBB;
7533193323Sed
7534193323Sed    // Update machine-CFG edges
7535193323Sed    BB->addSuccessor(sinkMBB);
7536193323Sed
7537193323Sed    //  sinkMBB:
7538193323Sed    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
7539193323Sed    //  ...
7540193323Sed    BB = sinkMBB;
7541210299Sed    BuildMI(*BB, BB->begin(), dl,
7542210299Sed            TII->get(ARM::PHI), MI->getOperand(0).getReg())
7543193323Sed      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
7544193323Sed      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
7545193323Sed
7546210299Sed    MI->eraseFromParent();   // The pseudo instruction is gone now.
7547193323Sed    return BB;
7548193323Sed  }
7549198090Srdivacky
7550210299Sed  case ARM::BCCi64:
7551210299Sed  case ARM::BCCZi64: {
7552218893Sdim    // If there is an unconditional branch to the other successor, remove it.
7553276479Sdim    BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
7554218893Sdim
7555210299Sed    // Compare both parts that make up the double comparison separately for
7556210299Sed    // equality.
7557210299Sed    bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
7558210299Sed
7559210299Sed    unsigned LHS1 = MI->getOperand(1).getReg();
7560210299Sed    unsigned LHS2 = MI->getOperand(2).getReg();
7561210299Sed    if (RHSisZero) {
7562210299Sed      AddDefaultPred(BuildMI(BB, dl,
7563210299Sed                             TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
7564210299Sed                     .addReg(LHS1).addImm(0));
7565210299Sed      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
7566210299Sed        .addReg(LHS2).addImm(0)
7567210299Sed        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
7568210299Sed    } else {
7569210299Sed      unsigned RHS1 = MI->getOperand(3).getReg();
7570210299Sed      unsigned RHS2 = MI->getOperand(4).getReg();
7571210299Sed      AddDefaultPred(BuildMI(BB, dl,
7572210299Sed                             TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
7573210299Sed                     .addReg(LHS1).addReg(RHS1));
7574210299Sed      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
7575210299Sed        .addReg(LHS2).addReg(RHS2)
7576210299Sed        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
7577210299Sed    }
7578210299Sed
7579210299Sed    MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
7580210299Sed    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
7581210299Sed    if (MI->getOperand(0).getImm() == ARMCC::NE)
7582210299Sed      std::swap(destMBB, exitMBB);
7583210299Sed
7584210299Sed    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
7585210299Sed      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
7586226633Sdim    if (isThumb2)
7587226633Sdim      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
7588226633Sdim    else
7589226633Sdim      BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
7590210299Sed
7591210299Sed    MI->eraseFromParent();   // The pseudo instruction is gone now.
7592210299Sed    return BB;
7593210299Sed  }
7594226633Sdim
7595234353Sdim  case ARM::Int_eh_sjlj_setjmp:
7596234353Sdim  case ARM::Int_eh_sjlj_setjmp_nofp:
7597234353Sdim  case ARM::tInt_eh_sjlj_setjmp:
7598234353Sdim  case ARM::t2Int_eh_sjlj_setjmp:
7599234353Sdim  case ARM::t2Int_eh_sjlj_setjmp_nofp:
7600234353Sdim    EmitSjLjDispatchBlock(MI, BB);
7601234353Sdim    return BB;
7602234353Sdim
7603226633Sdim  case ARM::ABS:
7604226633Sdim  case ARM::t2ABS: {
7605226633Sdim    // To insert an ABS instruction, we have to insert the
7606226633Sdim    // diamond control-flow pattern.  The incoming instruction knows the
7607226633Sdim    // source vreg to test against 0, the destination vreg to set,
7608226633Sdim    // the condition code register to branch on, the
7609234353Sdim    // true/false values to select between, and a branch opcode to use.
7610226633Sdim    // It transforms
7611226633Sdim    //     V1 = ABS V0
7612226633Sdim    // into
7613226633Sdim    //     V2 = MOVS V0
7614226633Sdim    //     BCC                      (branch to SinkBB if V0 >= 0)
7615226633Sdim    //     RSBBB: V3 = RSBri V2, 0  (compute ABS if V2 < 0)
7616234353Sdim    //     SinkBB: V1 = PHI(V2, V3)
7617226633Sdim    const BasicBlock *LLVM_BB = BB->getBasicBlock();
7618226633Sdim    MachineFunction::iterator BBI = BB;
7619226633Sdim    ++BBI;
7620226633Sdim    MachineFunction *Fn = BB->getParent();
7621226633Sdim    MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
7622226633Sdim    MachineBasicBlock *SinkBB  = Fn->CreateMachineBasicBlock(LLVM_BB);
7623226633Sdim    Fn->insert(BBI, RSBBB);
7624226633Sdim    Fn->insert(BBI, SinkBB);
7625226633Sdim
7626226633Sdim    unsigned int ABSSrcReg = MI->getOperand(1).getReg();
7627226633Sdim    unsigned int ABSDstReg = MI->getOperand(0).getReg();
7628226633Sdim    bool isThumb2 = Subtarget->isThumb2();
7629226633Sdim    MachineRegisterInfo &MRI = Fn->getRegInfo();
7630226633Sdim    // In Thumb mode S must not be specified if source register is the SP or
7631226633Sdim    // PC and if destination register is the SP, so restrict register class
7632280031Sdim    unsigned NewRsbDstReg =
7633280031Sdim      MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
7634226633Sdim
7635226633Sdim    // Transfer the remainder of BB and its successor edges to sinkMBB.
7636226633Sdim    SinkBB->splice(SinkBB->begin(), BB,
7637276479Sdim                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
7638226633Sdim    SinkBB->transferSuccessorsAndUpdatePHIs(BB);
7639226633Sdim
7640226633Sdim    BB->addSuccessor(RSBBB);
7641226633Sdim    BB->addSuccessor(SinkBB);
7642226633Sdim
7643226633Sdim    // fall through to SinkMBB
7644226633Sdim    RSBBB->addSuccessor(SinkBB);
7645226633Sdim
7646239462Sdim    // insert a cmp at the end of BB
7647239462Sdim    AddDefaultPred(BuildMI(BB, dl,
7648239462Sdim                           TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
7649239462Sdim                   .addReg(ABSSrcReg).addImm(0));
7650226633Sdim
7651226633Sdim    // insert a bcc with opposite CC to ARMCC::MI at the end of BB
7652234353Sdim    BuildMI(BB, dl,
7653226633Sdim      TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
7654226633Sdim      .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
7655226633Sdim
7656226633Sdim    // insert rsbri in RSBBB
7657226633Sdim    // Note: BCC and rsbri will be converted into predicated rsbmi
7658226633Sdim    // by if-conversion pass
7659234353Sdim    BuildMI(*RSBBB, RSBBB->begin(), dl,
7660226633Sdim      TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
7661239462Sdim      .addReg(ABSSrcReg, RegState::Kill)
7662226633Sdim      .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
7663226633Sdim
7664234353Sdim    // insert PHI in SinkBB,
7665226633Sdim    // reuse ABSDstReg to not change uses of ABS instruction
7666226633Sdim    BuildMI(*SinkBB, SinkBB->begin(), dl,
7667226633Sdim      TII->get(ARM::PHI), ABSDstReg)
7668226633Sdim      .addReg(NewRsbDstReg).addMBB(RSBBB)
7669239462Sdim      .addReg(ABSSrcReg).addMBB(BB);
7670226633Sdim
7671226633Sdim    // remove ABS instruction
7672234353Sdim    MI->eraseFromParent();
7673226633Sdim
7674226633Sdim    // return last added BB
7675226633Sdim    return SinkBB;
7676193323Sed  }
7677239462Sdim  case ARM::COPY_STRUCT_BYVAL_I32:
7678239462Sdim    ++NumLoopByVals;
7679239462Sdim    return EmitStructByval(MI, BB);
7680276479Sdim  case ARM::WIN__CHKSTK:
7681276479Sdim    return EmitLowered__chkstk(MI, BB);
7682226633Sdim  }
7683193323Sed}
7684193323Sed
7685226633Sdimvoid ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
7686226633Sdim                                                      SDNode *Node) const {
7687234353Sdim  const MCInstrDesc *MCID = &MI->getDesc();
7688226633Sdim  // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
7689226633Sdim  // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
7690226633Sdim  // operand is still set to noreg. If needed, set the optional operand's
7691226633Sdim  // register to CPSR, and remove the redundant implicit def.
7692226633Sdim  //
7693234353Sdim  // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
7694226633Sdim
7695226633Sdim  // Rename pseudo opcodes.
7696226633Sdim  unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
7697226633Sdim  if (NewOpc) {
7698280031Sdim    const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
7699280031Sdim        getTargetMachine().getSubtargetImpl()->getInstrInfo());
7700234353Sdim    MCID = &TII->get(NewOpc);
7701234353Sdim
7702234353Sdim    assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
7703234353Sdim           "converted opcode should be the same except for cc_out");
7704234353Sdim
7705234353Sdim    MI->setDesc(*MCID);
7706234353Sdim
7707234353Sdim    // Add the optional cc_out operand
7708234353Sdim    MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
7709226633Sdim  }
7710234353Sdim  unsigned ccOutIdx = MCID->getNumOperands() - 1;
7711226633Sdim
7712226633Sdim  // Any ARM instruction that sets the 's' bit should specify an optional
7713226633Sdim  // "cc_out" operand in the last operand position.
7714234353Sdim  if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
7715226633Sdim    assert(!NewOpc && "Optional cc_out operand required");
7716226633Sdim    return;
7717226633Sdim  }
7718226633Sdim  // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
7719226633Sdim  // since we already have an optional CPSR def.
7720226633Sdim  bool definesCPSR = false;
7721226633Sdim  bool deadCPSR = false;
7722234353Sdim  for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
7723226633Sdim       i != e; ++i) {
7724226633Sdim    const MachineOperand &MO = MI->getOperand(i);
7725226633Sdim    if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
7726226633Sdim      definesCPSR = true;
7727226633Sdim      if (MO.isDead())
7728226633Sdim        deadCPSR = true;
7729226633Sdim      MI->RemoveOperand(i);
7730226633Sdim      break;
7731226633Sdim    }
7732226633Sdim  }
7733226633Sdim  if (!definesCPSR) {
7734226633Sdim    assert(!NewOpc && "Optional cc_out operand required");
7735226633Sdim    return;
7736226633Sdim  }
7737226633Sdim  assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
7738226633Sdim  if (deadCPSR) {
7739226633Sdim    assert(!MI->getOperand(ccOutIdx).getReg() &&
7740226633Sdim           "expect uninitialized optional cc_out operand");
7741226633Sdim    return;
7742226633Sdim  }
7743226633Sdim
7744226633Sdim  // If this instruction was defined with an optional CPSR def and its dag node
7745226633Sdim  // had a live implicit CPSR def, then activate the optional CPSR def.
7746226633Sdim  MachineOperand &MO = MI->getOperand(ccOutIdx);
7747226633Sdim  MO.setReg(ARM::CPSR);
7748226633Sdim  MO.setIsDef(true);
7749226633Sdim}
7750226633Sdim
7751193323Sed//===----------------------------------------------------------------------===//
7752193323Sed//                           ARM Optimization Hooks
7753193323Sed//===----------------------------------------------------------------------===//
7754193323Sed
7755239462Sdim// Helper function that checks if N is a null or all ones constant.
7756239462Sdimstatic inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
7757239462Sdim  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
7758239462Sdim  if (!C)
7759239462Sdim    return false;
7760239462Sdim  return AllOnes ? C->isAllOnesValue() : C->isNullValue();
7761239462Sdim}
7762239462Sdim
7763243830Sdim// Return true if N is conditionally 0 or all ones.
7764243830Sdim// Detects these expressions where cc is an i1 value:
7765243830Sdim//
7766243830Sdim//   (select cc 0, y)   [AllOnes=0]
7767243830Sdim//   (select cc y, 0)   [AllOnes=0]
7768243830Sdim//   (zext cc)          [AllOnes=0]
7769243830Sdim//   (sext cc)          [AllOnes=0/1]
7770243830Sdim//   (select cc -1, y)  [AllOnes=1]
7771243830Sdim//   (select cc y, -1)  [AllOnes=1]
7772243830Sdim//
7773243830Sdim// Invert is set when N is the null/all ones constant when CC is false.
7774243830Sdim// OtherOp is set to the alternative value of N.
7775243830Sdimstatic bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
7776243830Sdim                                       SDValue &CC, bool &Invert,
7777243830Sdim                                       SDValue &OtherOp,
7778243830Sdim                                       SelectionDAG &DAG) {
7779243830Sdim  switch (N->getOpcode()) {
7780243830Sdim  default: return false;
7781243830Sdim  case ISD::SELECT: {
7782243830Sdim    CC = N->getOperand(0);
7783243830Sdim    SDValue N1 = N->getOperand(1);
7784243830Sdim    SDValue N2 = N->getOperand(2);
7785243830Sdim    if (isZeroOrAllOnes(N1, AllOnes)) {
7786243830Sdim      Invert = false;
7787243830Sdim      OtherOp = N2;
7788243830Sdim      return true;
7789243830Sdim    }
7790243830Sdim    if (isZeroOrAllOnes(N2, AllOnes)) {
7791243830Sdim      Invert = true;
7792243830Sdim      OtherOp = N1;
7793243830Sdim      return true;
7794243830Sdim    }
7795243830Sdim    return false;
7796243830Sdim  }
7797243830Sdim  case ISD::ZERO_EXTEND:
7798243830Sdim    // (zext cc) can never be the all ones value.
7799243830Sdim    if (AllOnes)
7800243830Sdim      return false;
7801243830Sdim    // Fall through.
7802243830Sdim  case ISD::SIGN_EXTEND: {
7803243830Sdim    EVT VT = N->getValueType(0);
7804243830Sdim    CC = N->getOperand(0);
7805243830Sdim    if (CC.getValueType() != MVT::i1)
7806243830Sdim      return false;
7807243830Sdim    Invert = !AllOnes;
7808243830Sdim    if (AllOnes)
7809243830Sdim      // When looking for an AllOnes constant, N is an sext, and the 'other'
7810243830Sdim      // value is 0.
7811243830Sdim      OtherOp = DAG.getConstant(0, VT);
7812243830Sdim    else if (N->getOpcode() == ISD::ZERO_EXTEND)
7813243830Sdim      // When looking for a 0 constant, N can be zext or sext.
7814243830Sdim      OtherOp = DAG.getConstant(1, VT);
7815243830Sdim    else
7816243830Sdim      OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
7817243830Sdim    return true;
7818243830Sdim  }
7819243830Sdim  }
7820243830Sdim}
7821243830Sdim
7822239462Sdim// Combine a constant select operand into its use:
7823239462Sdim//
7824243830Sdim//   (add (select cc, 0, c), x)  -> (select cc, x, (add, x, c))
7825243830Sdim//   (sub x, (select cc, 0, c))  -> (select cc, x, (sub, x, c))
7826243830Sdim//   (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))  [AllOnes=1]
7827243830Sdim//   (or  (select cc, 0, c), x)  -> (select cc, x, (or, x, c))
7828243830Sdim//   (xor (select cc, 0, c), x)  -> (select cc, x, (xor, x, c))
7829239462Sdim//
7830239462Sdim// The transform is rejected if the select doesn't have a constant operand that
7831243830Sdim// is null, or all ones when AllOnes is set.
7832239462Sdim//
7833243830Sdim// Also recognize sext/zext from i1:
7834243830Sdim//
7835243830Sdim//   (add (zext cc), x) -> (select cc (add x, 1), x)
7836243830Sdim//   (add (sext cc), x) -> (select cc (add x, -1), x)
7837243830Sdim//
7838243830Sdim// These transformations eventually create predicated instructions.
7839243830Sdim//
7840239462Sdim// @param N       The node to transform.
7841239462Sdim// @param Slct    The N operand that is a select.
7842239462Sdim// @param OtherOp The other N operand (x above).
7843239462Sdim// @param DCI     Context.
7844243830Sdim// @param AllOnes Require the select constant to be all ones instead of null.
7845239462Sdim// @returns The new node, or SDValue() on failure.
7846193323Sedstatic
7847193323SedSDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
7848243830Sdim                            TargetLowering::DAGCombinerInfo &DCI,
7849243830Sdim                            bool AllOnes = false) {
7850193323Sed  SelectionDAG &DAG = DCI.DAG;
7851198090Srdivacky  EVT VT = N->getValueType(0);
7852243830Sdim  SDValue NonConstantVal;
7853243830Sdim  SDValue CCOp;
7854243830Sdim  bool SwapSelectOps;
7855243830Sdim  if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
7856243830Sdim                                  NonConstantVal, DAG))
7857243830Sdim    return SDValue();
7858193323Sed
7859243830Sdim  // Slct is now know to be the desired identity constant when CC is true.
7860243830Sdim  SDValue TrueVal = OtherOp;
7861261991Sdim  SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
7862243830Sdim                                 OtherOp, NonConstantVal);
7863243830Sdim  // Unless SwapSelectOps says CC should be false.
7864243830Sdim  if (SwapSelectOps)
7865243830Sdim    std::swap(TrueVal, FalseVal);
7866193323Sed
7867261991Sdim  return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
7868243830Sdim                     CCOp, TrueVal, FalseVal);
7869243830Sdim}
7870193323Sed
7871243830Sdim// Attempt combineSelectAndUse on each operand of a commutative operator N.
7872243830Sdimstatic
7873243830SdimSDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
7874243830Sdim                                       TargetLowering::DAGCombinerInfo &DCI) {
7875243830Sdim  SDValue N0 = N->getOperand(0);
7876243830Sdim  SDValue N1 = N->getOperand(1);
7877243830Sdim  if (N0.getNode()->hasOneUse()) {
7878243830Sdim    SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
7879243830Sdim    if (Result.getNode())
7880243830Sdim      return Result;
7881193323Sed  }
7882243830Sdim  if (N1.getNode()->hasOneUse()) {
7883243830Sdim    SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
7884243830Sdim    if (Result.getNode())
7885243830Sdim      return Result;
7886243830Sdim  }
7887243830Sdim  return SDValue();
7888193323Sed}
7889193323Sed
7890224145Sdim// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
7891224145Sdim// (only after legalization).
7892224145Sdimstatic SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
7893224145Sdim                                 TargetLowering::DAGCombinerInfo &DCI,
7894224145Sdim                                 const ARMSubtarget *Subtarget) {
7895224145Sdim
7896224145Sdim  // Only perform optimization if after legalize, and if NEON is available. We
7897224145Sdim  // also expected both operands to be BUILD_VECTORs.
7898224145Sdim  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
7899224145Sdim      || N0.getOpcode() != ISD::BUILD_VECTOR
7900224145Sdim      || N1.getOpcode() != ISD::BUILD_VECTOR)
7901224145Sdim    return SDValue();
7902224145Sdim
7903224145Sdim  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
7904224145Sdim  EVT VT = N->getValueType(0);
7905224145Sdim  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
7906224145Sdim    return SDValue();
7907224145Sdim
7908224145Sdim  // Check that the vector operands are of the right form.
7909224145Sdim  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
7910224145Sdim  // operands, where N is the size of the formed vector.
7911224145Sdim  // Each EXTRACT_VECTOR should have the same input vector and odd or even
7912224145Sdim  // index such that we have a pair wise add pattern.
7913224145Sdim
7914224145Sdim  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
7915224145Sdim  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
7916224145Sdim    return SDValue();
7917224145Sdim  SDValue Vec = N0->getOperand(0)->getOperand(0);
7918224145Sdim  SDNode *V = Vec.getNode();
7919224145Sdim  unsigned nextIndex = 0;
7920224145Sdim
7921224145Sdim  // For each operands to the ADD which are BUILD_VECTORs,
7922224145Sdim  // check to see if each of their operands are an EXTRACT_VECTOR with
7923224145Sdim  // the same vector and appropriate index.
7924224145Sdim  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
7925224145Sdim    if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
7926224145Sdim        && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
7927224145Sdim
7928224145Sdim      SDValue ExtVec0 = N0->getOperand(i);
7929224145Sdim      SDValue ExtVec1 = N1->getOperand(i);
7930224145Sdim
7931224145Sdim      // First operand is the vector, verify its the same.
7932224145Sdim      if (V != ExtVec0->getOperand(0).getNode() ||
7933224145Sdim          V != ExtVec1->getOperand(0).getNode())
7934224145Sdim        return SDValue();
7935224145Sdim
7936224145Sdim      // Second is the constant, verify its correct.
7937224145Sdim      ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
7938224145Sdim      ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
7939224145Sdim
7940224145Sdim      // For the constant, we want to see all the even or all the odd.
7941224145Sdim      if (!C0 || !C1 || C0->getZExtValue() != nextIndex
7942224145Sdim          || C1->getZExtValue() != nextIndex+1)
7943224145Sdim        return SDValue();
7944224145Sdim
7945224145Sdim      // Increment index.
7946224145Sdim      nextIndex+=2;
7947224145Sdim    } else
7948224145Sdim      return SDValue();
7949224145Sdim  }
7950224145Sdim
7951224145Sdim  // Create VPADDL node.
7952224145Sdim  SelectionDAG &DAG = DCI.DAG;
7953224145Sdim  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7954224145Sdim
7955224145Sdim  // Build operand list.
7956224145Sdim  SmallVector<SDValue, 8> Ops;
7957224145Sdim  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
7958224145Sdim                                TLI.getPointerTy()));
7959224145Sdim
7960224145Sdim  // Input is the vector.
7961224145Sdim  Ops.push_back(Vec);
7962224145Sdim
7963224145Sdim  // Get widened type and narrowed type.
7964224145Sdim  MVT widenType;
7965224145Sdim  unsigned numElem = VT.getVectorNumElements();
7966276479Sdim
7967276479Sdim  EVT inputLaneType = Vec.getValueType().getVectorElementType();
7968276479Sdim  switch (inputLaneType.getSimpleVT().SimpleTy) {
7969224145Sdim    case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
7970224145Sdim    case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
7971224145Sdim    case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
7972224145Sdim    default:
7973234353Sdim      llvm_unreachable("Invalid vector element type for padd optimization.");
7974224145Sdim  }
7975224145Sdim
7976276479Sdim  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops);
7977276479Sdim  unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
7978276479Sdim  return DAG.getNode(ExtOp, SDLoc(N), VT, tmp);
7979224145Sdim}
7980224145Sdim
7981243830Sdimstatic SDValue findMUL_LOHI(SDValue V) {
7982243830Sdim  if (V->getOpcode() == ISD::UMUL_LOHI ||
7983243830Sdim      V->getOpcode() == ISD::SMUL_LOHI)
7984243830Sdim    return V;
7985243830Sdim  return SDValue();
7986243830Sdim}
7987243830Sdim
7988243830Sdimstatic SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
7989243830Sdim                                     TargetLowering::DAGCombinerInfo &DCI,
7990243830Sdim                                     const ARMSubtarget *Subtarget) {
7991243830Sdim
7992243830Sdim  if (Subtarget->isThumb1Only()) return SDValue();
7993243830Sdim
7994243830Sdim  // Only perform the checks after legalize when the pattern is available.
7995243830Sdim  if (DCI.isBeforeLegalize()) return SDValue();
7996243830Sdim
7997243830Sdim  // Look for multiply add opportunities.
7998243830Sdim  // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
7999243830Sdim  // each add nodes consumes a value from ISD::UMUL_LOHI and there is
8000243830Sdim  // a glue link from the first add to the second add.
8001243830Sdim  // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
8002243830Sdim  // a S/UMLAL instruction.
8003243830Sdim  //          loAdd   UMUL_LOHI
8004243830Sdim  //            \    / :lo    \ :hi
8005243830Sdim  //             \  /          \          [no multiline comment]
8006243830Sdim  //              ADDC         |  hiAdd
8007243830Sdim  //                 \ :glue  /  /
8008243830Sdim  //                  \      /  /
8009243830Sdim  //                    ADDE
8010243830Sdim  //
8011243830Sdim  assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
8012243830Sdim  SDValue AddcOp0 = AddcNode->getOperand(0);
8013243830Sdim  SDValue AddcOp1 = AddcNode->getOperand(1);
8014243830Sdim
8015243830Sdim  // Check if the two operands are from the same mul_lohi node.
8016243830Sdim  if (AddcOp0.getNode() == AddcOp1.getNode())
8017243830Sdim    return SDValue();
8018243830Sdim
8019243830Sdim  assert(AddcNode->getNumValues() == 2 &&
8020243830Sdim         AddcNode->getValueType(0) == MVT::i32 &&
8021261991Sdim         "Expect ADDC with two result values. First: i32");
8022243830Sdim
8023261991Sdim  // Check that we have a glued ADDC node.
8024261991Sdim  if (AddcNode->getValueType(1) != MVT::Glue)
8025261991Sdim    return SDValue();
8026261991Sdim
8027243830Sdim  // Check that the ADDC adds the low result of the S/UMUL_LOHI.
8028243830Sdim  if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
8029243830Sdim      AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
8030243830Sdim      AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
8031243830Sdim      AddcOp1->getOpcode() != ISD::SMUL_LOHI)
8032243830Sdim    return SDValue();
8033243830Sdim
8034243830Sdim  // Look for the glued ADDE.
8035243830Sdim  SDNode* AddeNode = AddcNode->getGluedUser();
8036276479Sdim  if (!AddeNode)
8037243830Sdim    return SDValue();
8038243830Sdim
8039243830Sdim  // Make sure it is really an ADDE.
8040243830Sdim  if (AddeNode->getOpcode() != ISD::ADDE)
8041243830Sdim    return SDValue();
8042243830Sdim
8043243830Sdim  assert(AddeNode->getNumOperands() == 3 &&
8044243830Sdim         AddeNode->getOperand(2).getValueType() == MVT::Glue &&
8045243830Sdim         "ADDE node has the wrong inputs");
8046243830Sdim
8047243830Sdim  // Check for the triangle shape.
8048243830Sdim  SDValue AddeOp0 = AddeNode->getOperand(0);
8049243830Sdim  SDValue AddeOp1 = AddeNode->getOperand(1);
8050243830Sdim
8051243830Sdim  // Make sure that the ADDE operands are not coming from the same node.
8052243830Sdim  if (AddeOp0.getNode() == AddeOp1.getNode())
8053243830Sdim    return SDValue();
8054243830Sdim
8055243830Sdim  // Find the MUL_LOHI node walking up ADDE's operands.
8056243830Sdim  bool IsLeftOperandMUL = false;
8057243830Sdim  SDValue MULOp = findMUL_LOHI(AddeOp0);
8058243830Sdim  if (MULOp == SDValue())
8059243830Sdim   MULOp = findMUL_LOHI(AddeOp1);
8060243830Sdim  else
8061243830Sdim    IsLeftOperandMUL = true;
8062243830Sdim  if (MULOp == SDValue())
8063243830Sdim     return SDValue();
8064243830Sdim
8065243830Sdim  // Figure out the right opcode.
8066243830Sdim  unsigned Opc = MULOp->getOpcode();
8067243830Sdim  unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
8068243830Sdim
8069243830Sdim  // Figure out the high and low input values to the MLAL node.
8070243830Sdim  SDValue* HiMul = &MULOp;
8071276479Sdim  SDValue* HiAdd = nullptr;
8072276479Sdim  SDValue* LoMul = nullptr;
8073276479Sdim  SDValue* LowAdd = nullptr;
8074243830Sdim
8075243830Sdim  if (IsLeftOperandMUL)
8076243830Sdim    HiAdd = &AddeOp1;
8077243830Sdim  else
8078243830Sdim    HiAdd = &AddeOp0;
8079243830Sdim
8080243830Sdim
8081243830Sdim  if (AddcOp0->getOpcode() == Opc) {
8082243830Sdim    LoMul = &AddcOp0;
8083243830Sdim    LowAdd = &AddcOp1;
8084243830Sdim  }
8085243830Sdim  if (AddcOp1->getOpcode() == Opc) {
8086243830Sdim    LoMul = &AddcOp1;
8087243830Sdim    LowAdd = &AddcOp0;
8088243830Sdim  }
8089243830Sdim
8090276479Sdim  if (!LoMul)
8091243830Sdim    return SDValue();
8092243830Sdim
8093243830Sdim  if (LoMul->getNode() != HiMul->getNode())
8094243830Sdim    return SDValue();
8095243830Sdim
8096243830Sdim  // Create the merged node.
8097243830Sdim  SelectionDAG &DAG = DCI.DAG;
8098243830Sdim
8099243830Sdim  // Build operand list.
8100243830Sdim  SmallVector<SDValue, 8> Ops;
8101243830Sdim  Ops.push_back(LoMul->getOperand(0));
8102243830Sdim  Ops.push_back(LoMul->getOperand(1));
8103243830Sdim  Ops.push_back(*LowAdd);
8104243830Sdim  Ops.push_back(*HiAdd);
8105243830Sdim
8106261991Sdim  SDValue MLALNode =  DAG.getNode(FinalOpc, SDLoc(AddcNode),
8107276479Sdim                                 DAG.getVTList(MVT::i32, MVT::i32), Ops);
8108243830Sdim
8109243830Sdim  // Replace the ADDs' nodes uses by the MLA node's values.
8110243830Sdim  SDValue HiMLALResult(MLALNode.getNode(), 1);
8111243830Sdim  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
8112243830Sdim
8113243830Sdim  SDValue LoMLALResult(MLALNode.getNode(), 0);
8114243830Sdim  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
8115243830Sdim
8116243830Sdim  // Return original node to notify the driver to stop replacing.
8117243830Sdim  SDValue resNode(AddcNode, 0);
8118243830Sdim  return resNode;
8119243830Sdim}
8120243830Sdim
8121243830Sdim/// PerformADDCCombine - Target-specific dag combine transform from
8122243830Sdim/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
8123243830Sdimstatic SDValue PerformADDCCombine(SDNode *N,
8124243830Sdim                                 TargetLowering::DAGCombinerInfo &DCI,
8125243830Sdim                                 const ARMSubtarget *Subtarget) {
8126243830Sdim
8127243830Sdim  return AddCombineTo64bitMLAL(N, DCI, Subtarget);
8128243830Sdim
8129243830Sdim}
8130243830Sdim
8131212904Sdim/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
8132212904Sdim/// operands N0 and N1.  This is a helper for PerformADDCombine that is
8133212904Sdim/// called with the default operands, and if that fails, with commuted
8134212904Sdim/// operands.
8135212904Sdimstatic SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
8136224145Sdim                                          TargetLowering::DAGCombinerInfo &DCI,
8137224145Sdim                                          const ARMSubtarget *Subtarget){
8138224145Sdim
8139224145Sdim  // Attempt to create vpaddl for this add.
8140224145Sdim  SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
8141224145Sdim  if (Result.getNode())
8142224145Sdim    return Result;
8143224145Sdim
8144193323Sed  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
8145243830Sdim  if (N0.getNode()->hasOneUse()) {
8146193323Sed    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
8147193323Sed    if (Result.getNode()) return Result;
8148193323Sed  }
8149193323Sed  return SDValue();
8150193323Sed}
8151193323Sed
8152212904Sdim/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
8153212904Sdim///
8154212904Sdimstatic SDValue PerformADDCombine(SDNode *N,
8155224145Sdim                                 TargetLowering::DAGCombinerInfo &DCI,
8156224145Sdim                                 const ARMSubtarget *Subtarget) {
8157212904Sdim  SDValue N0 = N->getOperand(0);
8158212904Sdim  SDValue N1 = N->getOperand(1);
8159212904Sdim
8160212904Sdim  // First try with the default operand order.
8161224145Sdim  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
8162212904Sdim  if (Result.getNode())
8163212904Sdim    return Result;
8164212904Sdim
8165212904Sdim  // If that didn't work, try again with the operands commuted.
8166224145Sdim  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
8167212904Sdim}
8168212904Sdim
8169193323Sed/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
8170212904Sdim///
8171193323Sedstatic SDValue PerformSUBCombine(SDNode *N,
8172193323Sed                                 TargetLowering::DAGCombinerInfo &DCI) {
8173212904Sdim  SDValue N0 = N->getOperand(0);
8174212904Sdim  SDValue N1 = N->getOperand(1);
8175193323Sed
8176193323Sed  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
8177243830Sdim  if (N1.getNode()->hasOneUse()) {
8178193323Sed    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
8179193323Sed    if (Result.getNode()) return Result;
8180193323Sed  }
8181193323Sed
8182193323Sed  return SDValue();
8183193323Sed}
8184193323Sed
8185221345Sdim/// PerformVMULCombine
8186221345Sdim/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
8187221345Sdim/// special multiplier accumulator forwarding.
8188221345Sdim///   vmul d3, d0, d2
8189221345Sdim///   vmla d3, d1, d2
8190221345Sdim/// is faster than
8191221345Sdim///   vadd d3, d0, d1
8192221345Sdim///   vmul d3, d3, d2
8193261991Sdim//  However, for (A + B) * (A + B),
8194261991Sdim//    vadd d2, d0, d1
8195261991Sdim//    vmul d3, d0, d2
8196261991Sdim//    vmla d3, d1, d2
8197261991Sdim//  is slower than
8198261991Sdim//    vadd d2, d0, d1
8199261991Sdim//    vmul d3, d2, d2
8200221345Sdimstatic SDValue PerformVMULCombine(SDNode *N,
8201221345Sdim                                  TargetLowering::DAGCombinerInfo &DCI,
8202221345Sdim                                  const ARMSubtarget *Subtarget) {
8203221345Sdim  if (!Subtarget->hasVMLxForwarding())
8204221345Sdim    return SDValue();
8205221345Sdim
8206221345Sdim  SelectionDAG &DAG = DCI.DAG;
8207221345Sdim  SDValue N0 = N->getOperand(0);
8208221345Sdim  SDValue N1 = N->getOperand(1);
8209221345Sdim  unsigned Opcode = N0.getOpcode();
8210221345Sdim  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
8211221345Sdim      Opcode != ISD::FADD && Opcode != ISD::FSUB) {
8212224145Sdim    Opcode = N1.getOpcode();
8213221345Sdim    if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
8214221345Sdim        Opcode != ISD::FADD && Opcode != ISD::FSUB)
8215221345Sdim      return SDValue();
8216221345Sdim    std::swap(N0, N1);
8217221345Sdim  }
8218221345Sdim
8219261991Sdim  if (N0 == N1)
8220261991Sdim    return SDValue();
8221261991Sdim
8222221345Sdim  EVT VT = N->getValueType(0);
8223261991Sdim  SDLoc DL(N);
8224221345Sdim  SDValue N00 = N0->getOperand(0);
8225221345Sdim  SDValue N01 = N0->getOperand(1);
8226221345Sdim  return DAG.getNode(Opcode, DL, VT,
8227221345Sdim                     DAG.getNode(ISD::MUL, DL, VT, N00, N1),
8228221345Sdim                     DAG.getNode(ISD::MUL, DL, VT, N01, N1));
8229221345Sdim}
8230221345Sdim
8231208599Srdivackystatic SDValue PerformMULCombine(SDNode *N,
8232208599Srdivacky                                 TargetLowering::DAGCombinerInfo &DCI,
8233208599Srdivacky                                 const ARMSubtarget *Subtarget) {
8234208599Srdivacky  SelectionDAG &DAG = DCI.DAG;
8235208599Srdivacky
8236208599Srdivacky  if (Subtarget->isThumb1Only())
8237208599Srdivacky    return SDValue();
8238208599Srdivacky
8239208599Srdivacky  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
8240208599Srdivacky    return SDValue();
8241208599Srdivacky
8242208599Srdivacky  EVT VT = N->getValueType(0);
8243221345Sdim  if (VT.is64BitVector() || VT.is128BitVector())
8244221345Sdim    return PerformVMULCombine(N, DCI, Subtarget);
8245208599Srdivacky  if (VT != MVT::i32)
8246208599Srdivacky    return SDValue();
8247208599Srdivacky
8248208599Srdivacky  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
8249208599Srdivacky  if (!C)
8250208599Srdivacky    return SDValue();
8251208599Srdivacky
8252234353Sdim  int64_t MulAmt = C->getSExtValue();
8253261991Sdim  unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
8254234353Sdim
8255208599Srdivacky  ShiftAmt = ShiftAmt & (32 - 1);
8256208599Srdivacky  SDValue V = N->getOperand(0);
8257261991Sdim  SDLoc DL(N);
8258208599Srdivacky
8259208599Srdivacky  SDValue Res;
8260208599Srdivacky  MulAmt >>= ShiftAmt;
8261208599Srdivacky
8262234353Sdim  if (MulAmt >= 0) {
8263234353Sdim    if (isPowerOf2_32(MulAmt - 1)) {
8264234353Sdim      // (mul x, 2^N + 1) => (add (shl x, N), x)
8265234353Sdim      Res = DAG.getNode(ISD::ADD, DL, VT,
8266234353Sdim                        V,
8267234353Sdim                        DAG.getNode(ISD::SHL, DL, VT,
8268234353Sdim                                    V,
8269234353Sdim                                    DAG.getConstant(Log2_32(MulAmt - 1),
8270234353Sdim                                                    MVT::i32)));
8271234353Sdim    } else if (isPowerOf2_32(MulAmt + 1)) {
8272234353Sdim      // (mul x, 2^N - 1) => (sub (shl x, N), x)
8273234353Sdim      Res = DAG.getNode(ISD::SUB, DL, VT,
8274234353Sdim                        DAG.getNode(ISD::SHL, DL, VT,
8275234353Sdim                                    V,
8276234353Sdim                                    DAG.getConstant(Log2_32(MulAmt + 1),
8277234353Sdim                                                    MVT::i32)),
8278234353Sdim                        V);
8279234353Sdim    } else
8280234353Sdim      return SDValue();
8281234353Sdim  } else {
8282234353Sdim    uint64_t MulAmtAbs = -MulAmt;
8283234353Sdim    if (isPowerOf2_32(MulAmtAbs + 1)) {
8284234353Sdim      // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
8285234353Sdim      Res = DAG.getNode(ISD::SUB, DL, VT,
8286234353Sdim                        V,
8287234353Sdim                        DAG.getNode(ISD::SHL, DL, VT,
8288234353Sdim                                    V,
8289234353Sdim                                    DAG.getConstant(Log2_32(MulAmtAbs + 1),
8290234353Sdim                                                    MVT::i32)));
8291234353Sdim    } else if (isPowerOf2_32(MulAmtAbs - 1)) {
8292234353Sdim      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
8293234353Sdim      Res = DAG.getNode(ISD::ADD, DL, VT,
8294234353Sdim                        V,
8295234353Sdim                        DAG.getNode(ISD::SHL, DL, VT,
8296234353Sdim                                    V,
8297234353Sdim                                    DAG.getConstant(Log2_32(MulAmtAbs-1),
8298234353Sdim                                                    MVT::i32)));
8299234353Sdim      Res = DAG.getNode(ISD::SUB, DL, VT,
8300234353Sdim                        DAG.getConstant(0, MVT::i32),Res);
8301234353Sdim
8302234353Sdim    } else
8303234353Sdim      return SDValue();
8304234353Sdim  }
8305234353Sdim
8306208599Srdivacky  if (ShiftAmt != 0)
8307234353Sdim    Res = DAG.getNode(ISD::SHL, DL, VT,
8308234353Sdim                      Res, DAG.getConstant(ShiftAmt, MVT::i32));
8309208599Srdivacky
8310208599Srdivacky  // Do not add new nodes to DAG combiner worklist.
8311208599Srdivacky  DCI.CombineTo(N, Res, false);
8312208599Srdivacky  return SDValue();
8313208599Srdivacky}
8314208599Srdivacky
8315218893Sdimstatic SDValue PerformANDCombine(SDNode *N,
8316234353Sdim                                 TargetLowering::DAGCombinerInfo &DCI,
8317234353Sdim                                 const ARMSubtarget *Subtarget) {
8318221345Sdim
8319218893Sdim  // Attempt to use immediate-form VBIC
8320218893Sdim  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
8321261991Sdim  SDLoc dl(N);
8322218893Sdim  EVT VT = N->getValueType(0);
8323218893Sdim  SelectionDAG &DAG = DCI.DAG;
8324218893Sdim
8325221345Sdim  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8326221345Sdim    return SDValue();
8327221345Sdim
8328218893Sdim  APInt SplatBits, SplatUndef;
8329218893Sdim  unsigned SplatBitSize;
8330218893Sdim  bool HasAnyUndefs;
8331218893Sdim  if (BVN &&
8332218893Sdim      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
8333218893Sdim    if (SplatBitSize <= 64) {
8334218893Sdim      EVT VbicVT;
8335218893Sdim      SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
8336218893Sdim                                      SplatUndef.getZExtValue(), SplatBitSize,
8337218893Sdim                                      DAG, VbicVT, VT.is128BitVector(),
8338218893Sdim                                      OtherModImm);
8339218893Sdim      if (Val.getNode()) {
8340218893Sdim        SDValue Input =
8341218893Sdim          DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
8342218893Sdim        SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
8343218893Sdim        return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
8344218893Sdim      }
8345218893Sdim    }
8346218893Sdim  }
8347218893Sdim
8348234353Sdim  if (!Subtarget->isThumb1Only()) {
8349243830Sdim    // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
8350243830Sdim    SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
8351243830Sdim    if (Result.getNode())
8352243830Sdim      return Result;
8353234353Sdim  }
8354234353Sdim
8355218893Sdim  return SDValue();
8356218893Sdim}
8357218893Sdim
8358212904Sdim/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
8359212904Sdimstatic SDValue PerformORCombine(SDNode *N,
8360212904Sdim                                TargetLowering::DAGCombinerInfo &DCI,
8361212904Sdim                                const ARMSubtarget *Subtarget) {
8362218893Sdim  // Attempt to use immediate-form VORR
8363218893Sdim  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
8364261991Sdim  SDLoc dl(N);
8365218893Sdim  EVT VT = N->getValueType(0);
8366218893Sdim  SelectionDAG &DAG = DCI.DAG;
8367218893Sdim
8368221345Sdim  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8369221345Sdim    return SDValue();
8370221345Sdim
8371218893Sdim  APInt SplatBits, SplatUndef;
8372218893Sdim  unsigned SplatBitSize;
8373218893Sdim  bool HasAnyUndefs;
8374218893Sdim  if (BVN && Subtarget->hasNEON() &&
8375218893Sdim      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
8376218893Sdim    if (SplatBitSize <= 64) {
8377218893Sdim      EVT VorrVT;
8378218893Sdim      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
8379218893Sdim                                      SplatUndef.getZExtValue(), SplatBitSize,
8380218893Sdim                                      DAG, VorrVT, VT.is128BitVector(),
8381218893Sdim                                      OtherModImm);
8382218893Sdim      if (Val.getNode()) {
8383218893Sdim        SDValue Input =
8384218893Sdim          DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
8385218893Sdim        SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
8386218893Sdim        return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
8387218893Sdim      }
8388218893Sdim    }
8389218893Sdim  }
8390218893Sdim
8391234353Sdim  if (!Subtarget->isThumb1Only()) {
8392243830Sdim    // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
8393243830Sdim    SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
8394243830Sdim    if (Result.getNode())
8395243830Sdim      return Result;
8396234353Sdim  }
8397234353Sdim
8398239462Sdim  // The code below optimizes (or (and X, Y), Z).
8399239462Sdim  // The AND operand needs to have a single user to make these optimizations
8400239462Sdim  // profitable.
8401221345Sdim  SDValue N0 = N->getOperand(0);
8402239462Sdim  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
8403221345Sdim    return SDValue();
8404221345Sdim  SDValue N1 = N->getOperand(1);
8405221345Sdim
8406221345Sdim  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
8407221345Sdim  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
8408221345Sdim      DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
8409221345Sdim    APInt SplatUndef;
8410221345Sdim    unsigned SplatBitSize;
8411221345Sdim    bool HasAnyUndefs;
8412221345Sdim
8413261991Sdim    APInt SplatBits0, SplatBits1;
8414221345Sdim    BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
8415261991Sdim    BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
8416261991Sdim    // Ensure that the second operand of both ands are constants
8417221345Sdim    if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
8418261991Sdim                                      HasAnyUndefs) && !HasAnyUndefs) {
8419261991Sdim        if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
8420261991Sdim                                          HasAnyUndefs) && !HasAnyUndefs) {
8421261991Sdim            // Ensure that the bit width of the constants are the same and that
8422261991Sdim            // the splat arguments are logical inverses as per the pattern we
8423261991Sdim            // are trying to simplify.
8424261991Sdim            if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
8425261991Sdim                SplatBits0 == ~SplatBits1) {
8426261991Sdim                // Canonicalize the vector type to make instruction selection
8427261991Sdim                // simpler.
8428261991Sdim                EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
8429261991Sdim                SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
8430261991Sdim                                             N0->getOperand(1),
8431261991Sdim                                             N0->getOperand(0),
8432261991Sdim                                             N1->getOperand(0));
8433261991Sdim                return DAG.getNode(ISD::BITCAST, dl, VT, Result);
8434261991Sdim            }
8435261991Sdim        }
8436221345Sdim    }
8437221345Sdim  }
8438221345Sdim
8439212904Sdim  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
8440212904Sdim  // reasonable.
8441212904Sdim
8442212904Sdim  // BFI is only available on V6T2+
8443212904Sdim  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
8444212904Sdim    return SDValue();
8445212904Sdim
8446261991Sdim  SDLoc DL(N);
8447212904Sdim  // 1) or (and A, mask), val => ARMbfi A, val, mask
8448212904Sdim  //      iff (val & mask) == val
8449212904Sdim  //
8450212904Sdim  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
8451212904Sdim  //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
8452221345Sdim  //          && mask == ~mask2
8453212904Sdim  //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
8454221345Sdim  //          && ~mask == mask2
8455212904Sdim  //  (i.e., copy a bitfield value into another bitfield of the same width)
8456212904Sdim
8457212904Sdim  if (VT != MVT::i32)
8458212904Sdim    return SDValue();
8459212904Sdim
8460218893Sdim  SDValue N00 = N0.getOperand(0);
8461212904Sdim
8462212904Sdim  // The value and the mask need to be constants so we can verify this is
8463212904Sdim  // actually a bitfield set. If the mask is 0xffff, we can do better
8464212904Sdim  // via a movt instruction, so don't use BFI in that case.
8465218893Sdim  SDValue MaskOp = N0.getOperand(1);
8466218893Sdim  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
8467218893Sdim  if (!MaskC)
8468212904Sdim    return SDValue();
8469218893Sdim  unsigned Mask = MaskC->getZExtValue();
8470212904Sdim  if (Mask == 0xffff)
8471212904Sdim    return SDValue();
8472212904Sdim  SDValue Res;
8473212904Sdim  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
8474218893Sdim  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8475218893Sdim  if (N1C) {
8476218893Sdim    unsigned Val = N1C->getZExtValue();
8477218893Sdim    if ((Val & ~Mask) != Val)
8478212904Sdim      return SDValue();
8479212904Sdim
8480218893Sdim    if (ARM::isBitFieldInvertedMask(Mask)) {
8481261991Sdim      Val >>= countTrailingZeros(~Mask);
8482212904Sdim
8483218893Sdim      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
8484218893Sdim                        DAG.getConstant(Val, MVT::i32),
8485218893Sdim                        DAG.getConstant(Mask, MVT::i32));
8486218893Sdim
8487218893Sdim      // Do not add new nodes to DAG combiner worklist.
8488218893Sdim      DCI.CombineTo(N, Res, false);
8489218893Sdim      return SDValue();
8490218893Sdim    }
8491212904Sdim  } else if (N1.getOpcode() == ISD::AND) {
8492212904Sdim    // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
8493218893Sdim    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
8494218893Sdim    if (!N11C)
8495212904Sdim      return SDValue();
8496218893Sdim    unsigned Mask2 = N11C->getZExtValue();
8497212904Sdim
8498221345Sdim    // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
8499221345Sdim    // as is to match.
8500212904Sdim    if (ARM::isBitFieldInvertedMask(Mask) &&
8501221345Sdim        (Mask == ~Mask2)) {
8502212904Sdim      // The pack halfword instruction works better for masks that fit it,
8503212904Sdim      // so use that when it's available.
8504212904Sdim      if (Subtarget->hasT2ExtractPack() &&
8505212904Sdim          (Mask == 0xffff || Mask == 0xffff0000))
8506212904Sdim        return SDValue();
8507212904Sdim      // 2a
8508261991Sdim      unsigned amt = countTrailingZeros(Mask2);
8509212904Sdim      Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
8510221345Sdim                        DAG.getConstant(amt, MVT::i32));
8511218893Sdim      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
8512212904Sdim                        DAG.getConstant(Mask, MVT::i32));
8513212904Sdim      // Do not add new nodes to DAG combiner worklist.
8514212904Sdim      DCI.CombineTo(N, Res, false);
8515218893Sdim      return SDValue();
8516212904Sdim    } else if (ARM::isBitFieldInvertedMask(~Mask) &&
8517221345Sdim               (~Mask == Mask2)) {
8518212904Sdim      // The pack halfword instruction works better for masks that fit it,
8519212904Sdim      // so use that when it's available.
8520212904Sdim      if (Subtarget->hasT2ExtractPack() &&
8521212904Sdim          (Mask2 == 0xffff || Mask2 == 0xffff0000))
8522212904Sdim        return SDValue();
8523212904Sdim      // 2b
8524261991Sdim      unsigned lsb = countTrailingZeros(Mask);
8525218893Sdim      Res = DAG.getNode(ISD::SRL, DL, VT, N00,
8526212904Sdim                        DAG.getConstant(lsb, MVT::i32));
8527212904Sdim      Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
8528221345Sdim                        DAG.getConstant(Mask2, MVT::i32));
8529212904Sdim      // Do not add new nodes to DAG combiner worklist.
8530212904Sdim      DCI.CombineTo(N, Res, false);
8531218893Sdim      return SDValue();
8532212904Sdim    }
8533212904Sdim  }
8534212904Sdim
8535218893Sdim  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
8536218893Sdim      N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
8537218893Sdim      ARM::isBitFieldInvertedMask(~Mask)) {
8538218893Sdim    // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
8539218893Sdim    // where lsb(mask) == #shamt and masked bits of B are known zero.
8540218893Sdim    SDValue ShAmt = N00.getOperand(1);
8541218893Sdim    unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
8542261991Sdim    unsigned LSB = countTrailingZeros(Mask);
8543218893Sdim    if (ShAmtC != LSB)
8544218893Sdim      return SDValue();
8545218893Sdim
8546218893Sdim    Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
8547218893Sdim                      DAG.getConstant(~Mask, MVT::i32));
8548218893Sdim
8549218893Sdim    // Do not add new nodes to DAG combiner worklist.
8550218893Sdim    DCI.CombineTo(N, Res, false);
8551218893Sdim  }
8552218893Sdim
8553212904Sdim  return SDValue();
8554212904Sdim}
8555212904Sdim
8556234353Sdimstatic SDValue PerformXORCombine(SDNode *N,
8557234353Sdim                                 TargetLowering::DAGCombinerInfo &DCI,
8558234353Sdim                                 const ARMSubtarget *Subtarget) {
8559234353Sdim  EVT VT = N->getValueType(0);
8560234353Sdim  SelectionDAG &DAG = DCI.DAG;
8561234353Sdim
8562234353Sdim  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8563234353Sdim    return SDValue();
8564234353Sdim
8565234353Sdim  if (!Subtarget->isThumb1Only()) {
8566243830Sdim    // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
8567243830Sdim    SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
8568243830Sdim    if (Result.getNode())
8569243830Sdim      return Result;
8570234353Sdim  }
8571234353Sdim
8572234353Sdim  return SDValue();
8573234353Sdim}
8574234353Sdim
8575224145Sdim/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
8576224145Sdim/// the bits being cleared by the AND are not demanded by the BFI.
8577218893Sdimstatic SDValue PerformBFICombine(SDNode *N,
8578218893Sdim                                 TargetLowering::DAGCombinerInfo &DCI) {
8579218893Sdim  SDValue N1 = N->getOperand(1);
8580218893Sdim  if (N1.getOpcode() == ISD::AND) {
8581218893Sdim    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
8582218893Sdim    if (!N11C)
8583218893Sdim      return SDValue();
8584224145Sdim    unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
8585261991Sdim    unsigned LSB = countTrailingZeros(~InvMask);
8586261991Sdim    unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
8587280031Sdim    assert(Width <
8588280031Sdim               static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
8589280031Sdim           "undefined behavior");
8590280031Sdim    unsigned Mask = (1u << Width) - 1;
8591218893Sdim    unsigned Mask2 = N11C->getZExtValue();
8592224145Sdim    if ((Mask & (~Mask2)) == 0)
8593261991Sdim      return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
8594218893Sdim                             N->getOperand(0), N1.getOperand(0),
8595218893Sdim                             N->getOperand(2));
8596218893Sdim  }
8597218893Sdim  return SDValue();
8598218893Sdim}
8599218893Sdim
8600202878Srdivacky/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
8601202878Srdivacky/// ARMISD::VMOVRRD.
8602199481Srdivackystatic SDValue PerformVMOVRRDCombine(SDNode *N,
8603280031Sdim                                     TargetLowering::DAGCombinerInfo &DCI,
8604280031Sdim                                     const ARMSubtarget *Subtarget) {
8605218893Sdim  // vmovrrd(vmovdrr x, y) -> x,y
8606193323Sed  SDValue InDouble = N->getOperand(0);
8607280031Sdim  if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
8608193323Sed    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
8609221345Sdim
8610221345Sdim  // vmovrrd(load f64) -> (load i32), (load i32)
8611221345Sdim  SDNode *InNode = InDouble.getNode();
8612221345Sdim  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
8613221345Sdim      InNode->getValueType(0) == MVT::f64 &&
8614221345Sdim      InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
8615221345Sdim      !cast<LoadSDNode>(InNode)->isVolatile()) {
8616221345Sdim    // TODO: Should this be done for non-FrameIndex operands?
8617221345Sdim    LoadSDNode *LD = cast<LoadSDNode>(InNode);
8618221345Sdim
8619221345Sdim    SelectionDAG &DAG = DCI.DAG;
8620261991Sdim    SDLoc DL(LD);
8621221345Sdim    SDValue BasePtr = LD->getBasePtr();
8622221345Sdim    SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
8623221345Sdim                                 LD->getPointerInfo(), LD->isVolatile(),
8624234353Sdim                                 LD->isNonTemporal(), LD->isInvariant(),
8625234353Sdim                                 LD->getAlignment());
8626221345Sdim
8627221345Sdim    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
8628221345Sdim                                    DAG.getConstant(4, MVT::i32));
8629221345Sdim    SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
8630221345Sdim                                 LD->getPointerInfo(), LD->isVolatile(),
8631234353Sdim                                 LD->isNonTemporal(), LD->isInvariant(),
8632221345Sdim                                 std::min(4U, LD->getAlignment() / 2));
8633221345Sdim
8634221345Sdim    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
8635276479Sdim    if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
8636276479Sdim      std::swap (NewLD1, NewLD2);
8637221345Sdim    SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
8638221345Sdim    return Result;
8639221345Sdim  }
8640221345Sdim
8641193323Sed  return SDValue();
8642193323Sed}
8643193323Sed
8644218893Sdim/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
8645218893Sdim/// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.
8646218893Sdimstatic SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
8647218893Sdim  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
8648218893Sdim  SDValue Op0 = N->getOperand(0);
8649218893Sdim  SDValue Op1 = N->getOperand(1);
8650218893Sdim  if (Op0.getOpcode() == ISD::BITCAST)
8651218893Sdim    Op0 = Op0.getOperand(0);
8652218893Sdim  if (Op1.getOpcode() == ISD::BITCAST)
8653218893Sdim    Op1 = Op1.getOperand(0);
8654218893Sdim  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
8655218893Sdim      Op0.getNode() == Op1.getNode() &&
8656218893Sdim      Op0.getResNo() == 0 && Op1.getResNo() == 1)
8657261991Sdim    return DAG.getNode(ISD::BITCAST, SDLoc(N),
8658218893Sdim                       N->getValueType(0), Op0.getOperand(0));
8659218893Sdim  return SDValue();
8660218893Sdim}
8661218893Sdim
8662218893Sdim/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
8663218893Sdim/// are normal, non-volatile loads.  If so, it is profitable to bitcast an
8664218893Sdim/// i64 vector to have f64 elements, since the value can then be loaded
8665218893Sdim/// directly into a VFP register.
8666218893Sdimstatic bool hasNormalLoadOperand(SDNode *N) {
8667218893Sdim  unsigned NumElts = N->getValueType(0).getVectorNumElements();
8668218893Sdim  for (unsigned i = 0; i < NumElts; ++i) {
8669218893Sdim    SDNode *Elt = N->getOperand(i).getNode();
8670218893Sdim    if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
8671218893Sdim      return true;
8672218893Sdim  }
8673218893Sdim  return false;
8674218893Sdim}
8675218893Sdim
8676218893Sdim/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
8677218893Sdim/// ISD::BUILD_VECTOR.
8678218893Sdimstatic SDValue PerformBUILD_VECTORCombine(SDNode *N,
8679280031Sdim                                          TargetLowering::DAGCombinerInfo &DCI,
8680280031Sdim                                          const ARMSubtarget *Subtarget) {
8681218893Sdim  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
8682218893Sdim  // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
8683218893Sdim  // into a pair of GPRs, which is fine when the value is used as a scalar,
8684218893Sdim  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
8685218893Sdim  SelectionDAG &DAG = DCI.DAG;
8686218893Sdim  if (N->getNumOperands() == 2) {
8687218893Sdim    SDValue RV = PerformVMOVDRRCombine(N, DAG);
8688218893Sdim    if (RV.getNode())
8689218893Sdim      return RV;
8690218893Sdim  }
8691218893Sdim
8692218893Sdim  // Load i64 elements as f64 values so that type legalization does not split
8693218893Sdim  // them up into i32 values.
8694218893Sdim  EVT VT = N->getValueType(0);
8695218893Sdim  if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
8696218893Sdim    return SDValue();
8697261991Sdim  SDLoc dl(N);
8698218893Sdim  SmallVector<SDValue, 8> Ops;
8699218893Sdim  unsigned NumElts = VT.getVectorNumElements();
8700218893Sdim  for (unsigned i = 0; i < NumElts; ++i) {
8701218893Sdim    SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
8702218893Sdim    Ops.push_back(V);
8703218893Sdim    // Make the DAGCombiner fold the bitcast.
8704218893Sdim    DCI.AddToWorklist(V.getNode());
8705218893Sdim  }
8706218893Sdim  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
8707276479Sdim  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops);
8708218893Sdim  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
8709218893Sdim}
8710218893Sdim
8711261991Sdim/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
8712261991Sdimstatic SDValue
8713261991SdimPerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
8714261991Sdim  // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
8715261991Sdim  // At that time, we may have inserted bitcasts from integer to float.
8716261991Sdim  // If these bitcasts have survived DAGCombine, change the lowering of this
8717261991Sdim  // BUILD_VECTOR in something more vector friendly, i.e., that does not
8718261991Sdim  // force to use floating point types.
8719261991Sdim
8720261991Sdim  // Make sure we can change the type of the vector.
8721261991Sdim  // This is possible iff:
8722261991Sdim  // 1. The vector is only used in a bitcast to a integer type. I.e.,
8723261991Sdim  //    1.1. Vector is used only once.
8724261991Sdim  //    1.2. Use is a bit convert to an integer type.
8725261991Sdim  // 2. The size of its operands are 32-bits (64-bits are not legal).
8726261991Sdim  EVT VT = N->getValueType(0);
8727261991Sdim  EVT EltVT = VT.getVectorElementType();
8728261991Sdim
8729261991Sdim  // Check 1.1. and 2.
8730261991Sdim  if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
8731261991Sdim    return SDValue();
8732261991Sdim
8733261991Sdim  // By construction, the input type must be float.
8734261991Sdim  assert(EltVT == MVT::f32 && "Unexpected type!");
8735261991Sdim
8736261991Sdim  // Check 1.2.
8737261991Sdim  SDNode *Use = *N->use_begin();
8738261991Sdim  if (Use->getOpcode() != ISD::BITCAST ||
8739261991Sdim      Use->getValueType(0).isFloatingPoint())
8740261991Sdim    return SDValue();
8741261991Sdim
8742261991Sdim  // Check profitability.
8743261991Sdim  // Model is, if more than half of the relevant operands are bitcast from
8744261991Sdim  // i32, turn the build_vector into a sequence of insert_vector_elt.
8745261991Sdim  // Relevant operands are everything that is not statically
8746261991Sdim  // (i.e., at compile time) bitcasted.
8747261991Sdim  unsigned NumOfBitCastedElts = 0;
8748261991Sdim  unsigned NumElts = VT.getVectorNumElements();
8749261991Sdim  unsigned NumOfRelevantElts = NumElts;
8750261991Sdim  for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
8751261991Sdim    SDValue Elt = N->getOperand(Idx);
8752261991Sdim    if (Elt->getOpcode() == ISD::BITCAST) {
8753261991Sdim      // Assume only bit cast to i32 will go away.
8754261991Sdim      if (Elt->getOperand(0).getValueType() == MVT::i32)
8755261991Sdim        ++NumOfBitCastedElts;
8756261991Sdim    } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt))
8757261991Sdim      // Constants are statically casted, thus do not count them as
8758261991Sdim      // relevant operands.
8759261991Sdim      --NumOfRelevantElts;
8760261991Sdim  }
8761261991Sdim
8762261991Sdim  // Check if more than half of the elements require a non-free bitcast.
8763261991Sdim  if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
8764261991Sdim    return SDValue();
8765261991Sdim
8766261991Sdim  SelectionDAG &DAG = DCI.DAG;
8767261991Sdim  // Create the new vector type.
8768261991Sdim  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
8769261991Sdim  // Check if the type is legal.
8770261991Sdim  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8771261991Sdim  if (!TLI.isTypeLegal(VecVT))
8772261991Sdim    return SDValue();
8773261991Sdim
8774261991Sdim  // Combine:
8775261991Sdim  // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
8776261991Sdim  // => BITCAST INSERT_VECTOR_ELT
8777261991Sdim  //                      (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
8778261991Sdim  //                      (BITCAST EN), N.
8779261991Sdim  SDValue Vec = DAG.getUNDEF(VecVT);
8780261991Sdim  SDLoc dl(N);
8781261991Sdim  for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
8782261991Sdim    SDValue V = N->getOperand(Idx);
8783261991Sdim    if (V.getOpcode() == ISD::UNDEF)
8784261991Sdim      continue;
8785261991Sdim    if (V.getOpcode() == ISD::BITCAST &&
8786261991Sdim        V->getOperand(0).getValueType() == MVT::i32)
8787261991Sdim      // Fold obvious case.
8788261991Sdim      V = V.getOperand(0);
8789261991Sdim    else {
8790276479Sdim      V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
8791261991Sdim      // Make the DAGCombiner fold the bitcasts.
8792261991Sdim      DCI.AddToWorklist(V.getNode());
8793261991Sdim    }
8794261991Sdim    SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32);
8795261991Sdim    Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
8796261991Sdim  }
8797261991Sdim  Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
8798261991Sdim  // Make the DAGCombiner fold the bitcasts.
8799261991Sdim  DCI.AddToWorklist(Vec.getNode());
8800261991Sdim  return Vec;
8801261991Sdim}
8802261991Sdim
8803218893Sdim/// PerformInsertEltCombine - Target-specific dag combine xforms for
8804218893Sdim/// ISD::INSERT_VECTOR_ELT.
8805218893Sdimstatic SDValue PerformInsertEltCombine(SDNode *N,
8806218893Sdim                                       TargetLowering::DAGCombinerInfo &DCI) {
8807218893Sdim  // Bitcast an i64 load inserted into a vector to f64.
8808218893Sdim  // Otherwise, the i64 value will be legalized to a pair of i32 values.
8809218893Sdim  EVT VT = N->getValueType(0);
8810218893Sdim  SDNode *Elt = N->getOperand(1).getNode();
8811218893Sdim  if (VT.getVectorElementType() != MVT::i64 ||
8812218893Sdim      !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
8813218893Sdim    return SDValue();
8814218893Sdim
8815218893Sdim  SelectionDAG &DAG = DCI.DAG;
8816261991Sdim  SDLoc dl(N);
8817218893Sdim  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
8818218893Sdim                                 VT.getVectorNumElements());
8819218893Sdim  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
8820218893Sdim  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
8821218893Sdim  // Make the DAGCombiner fold the bitcasts.
8822218893Sdim  DCI.AddToWorklist(Vec.getNode());
8823218893Sdim  DCI.AddToWorklist(V.getNode());
8824218893Sdim  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
8825218893Sdim                               Vec, V, N->getOperand(2));
8826218893Sdim  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
8827218893Sdim}
8828218893Sdim
8829218893Sdim/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
8830218893Sdim/// ISD::VECTOR_SHUFFLE.
8831218893Sdimstatic SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
8832218893Sdim  // The LLVM shufflevector instruction does not require the shuffle mask
8833218893Sdim  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
8834218893Sdim  // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the
8835218893Sdim  // operands do not match the mask length, they are extended by concatenating
8836218893Sdim  // them with undef vectors.  That is probably the right thing for other
8837218893Sdim  // targets, but for NEON it is better to concatenate two double-register
8838218893Sdim  // size vector operands into a single quad-register size vector.  Do that
8839218893Sdim  // transformation here:
8840218893Sdim  //   shuffle(concat(v1, undef), concat(v2, undef)) ->
8841218893Sdim  //   shuffle(concat(v1, v2), undef)
8842218893Sdim  SDValue Op0 = N->getOperand(0);
8843218893Sdim  SDValue Op1 = N->getOperand(1);
8844218893Sdim  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
8845218893Sdim      Op1.getOpcode() != ISD::CONCAT_VECTORS ||
8846218893Sdim      Op0.getNumOperands() != 2 ||
8847218893Sdim      Op1.getNumOperands() != 2)
8848218893Sdim    return SDValue();
8849218893Sdim  SDValue Concat0Op1 = Op0.getOperand(1);
8850218893Sdim  SDValue Concat1Op1 = Op1.getOperand(1);
8851218893Sdim  if (Concat0Op1.getOpcode() != ISD::UNDEF ||
8852218893Sdim      Concat1Op1.getOpcode() != ISD::UNDEF)
8853218893Sdim    return SDValue();
8854218893Sdim  // Skip the transformation if any of the types are illegal.
8855218893Sdim  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8856218893Sdim  EVT VT = N->getValueType(0);
8857218893Sdim  if (!TLI.isTypeLegal(VT) ||
8858218893Sdim      !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
8859218893Sdim      !TLI.isTypeLegal(Concat1Op1.getValueType()))
8860218893Sdim    return SDValue();
8861218893Sdim
8862261991Sdim  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
8863218893Sdim                                  Op0.getOperand(0), Op1.getOperand(0));
8864218893Sdim  // Translate the shuffle mask.
8865218893Sdim  SmallVector<int, 16> NewMask;
8866218893Sdim  unsigned NumElts = VT.getVectorNumElements();
8867218893Sdim  unsigned HalfElts = NumElts/2;
8868218893Sdim  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8869218893Sdim  for (unsigned n = 0; n < NumElts; ++n) {
8870218893Sdim    int MaskElt = SVN->getMaskElt(n);
8871218893Sdim    int NewElt = -1;
8872218893Sdim    if (MaskElt < (int)HalfElts)
8873218893Sdim      NewElt = MaskElt;
8874218893Sdim    else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
8875218893Sdim      NewElt = HalfElts + MaskElt - NumElts;
8876218893Sdim    NewMask.push_back(NewElt);
8877218893Sdim  }
8878261991Sdim  return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
8879218893Sdim                              DAG.getUNDEF(VT), NewMask.data());
8880218893Sdim}
8881218893Sdim
8882218893Sdim/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
8883218893Sdim/// NEON load/store intrinsics to merge base address updates.
8884218893Sdimstatic SDValue CombineBaseUpdate(SDNode *N,
8885218893Sdim                                 TargetLowering::DAGCombinerInfo &DCI) {
8886218893Sdim  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
8887218893Sdim    return SDValue();
8888218893Sdim
8889218893Sdim  SelectionDAG &DAG = DCI.DAG;
8890218893Sdim  bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
8891218893Sdim                      N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
8892218893Sdim  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
8893218893Sdim  SDValue Addr = N->getOperand(AddrOpIdx);
8894218893Sdim
8895218893Sdim  // Search for a use of the address operand that is an increment.
8896218893Sdim  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
8897218893Sdim         UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
8898218893Sdim    SDNode *User = *UI;
8899218893Sdim    if (User->getOpcode() != ISD::ADD ||
8900218893Sdim        UI.getUse().getResNo() != Addr.getResNo())
8901218893Sdim      continue;
8902218893Sdim
8903218893Sdim    // Check that the add is independent of the load/store.  Otherwise, folding
8904218893Sdim    // it would create a cycle.
8905218893Sdim    if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
8906218893Sdim      continue;
8907218893Sdim
8908218893Sdim    // Find the new opcode for the updating load/store.
8909218893Sdim    bool isLoad = true;
8910218893Sdim    bool isLaneOp = false;
8911218893Sdim    unsigned NewOpc = 0;
8912218893Sdim    unsigned NumVecs = 0;
8913218893Sdim    if (isIntrinsic) {
8914218893Sdim      unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8915218893Sdim      switch (IntNo) {
8916234353Sdim      default: llvm_unreachable("unexpected intrinsic for Neon base update");
8917218893Sdim      case Intrinsic::arm_neon_vld1:     NewOpc = ARMISD::VLD1_UPD;
8918218893Sdim        NumVecs = 1; break;
8919218893Sdim      case Intrinsic::arm_neon_vld2:     NewOpc = ARMISD::VLD2_UPD;
8920218893Sdim        NumVecs = 2; break;
8921218893Sdim      case Intrinsic::arm_neon_vld3:     NewOpc = ARMISD::VLD3_UPD;
8922218893Sdim        NumVecs = 3; break;
8923218893Sdim      case Intrinsic::arm_neon_vld4:     NewOpc = ARMISD::VLD4_UPD;
8924218893Sdim        NumVecs = 4; break;
8925218893Sdim      case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
8926218893Sdim        NumVecs = 2; isLaneOp = true; break;
8927218893Sdim      case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
8928218893Sdim        NumVecs = 3; isLaneOp = true; break;
8929218893Sdim      case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
8930218893Sdim        NumVecs = 4; isLaneOp = true; break;
8931218893Sdim      case Intrinsic::arm_neon_vst1:     NewOpc = ARMISD::VST1_UPD;
8932218893Sdim        NumVecs = 1; isLoad = false; break;
8933218893Sdim      case Intrinsic::arm_neon_vst2:     NewOpc = ARMISD::VST2_UPD;
8934218893Sdim        NumVecs = 2; isLoad = false; break;
8935218893Sdim      case Intrinsic::arm_neon_vst3:     NewOpc = ARMISD::VST3_UPD;
8936218893Sdim        NumVecs = 3; isLoad = false; break;
8937218893Sdim      case Intrinsic::arm_neon_vst4:     NewOpc = ARMISD::VST4_UPD;
8938218893Sdim        NumVecs = 4; isLoad = false; break;
8939218893Sdim      case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
8940218893Sdim        NumVecs = 2; isLoad = false; isLaneOp = true; break;
8941218893Sdim      case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
8942218893Sdim        NumVecs = 3; isLoad = false; isLaneOp = true; break;
8943218893Sdim      case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
8944218893Sdim        NumVecs = 4; isLoad = false; isLaneOp = true; break;
8945218893Sdim      }
8946218893Sdim    } else {
8947218893Sdim      isLaneOp = true;
8948218893Sdim      switch (N->getOpcode()) {
8949234353Sdim      default: llvm_unreachable("unexpected opcode for Neon base update");
8950218893Sdim      case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
8951218893Sdim      case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
8952218893Sdim      case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
8953218893Sdim      }
8954218893Sdim    }
8955218893Sdim
8956218893Sdim    // Find the size of memory referenced by the load/store.
8957218893Sdim    EVT VecTy;
8958218893Sdim    if (isLoad)
8959218893Sdim      VecTy = N->getValueType(0);
8960221345Sdim    else
8961218893Sdim      VecTy = N->getOperand(AddrOpIdx+1).getValueType();
8962218893Sdim    unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
8963218893Sdim    if (isLaneOp)
8964218893Sdim      NumBytes /= VecTy.getVectorNumElements();
8965218893Sdim
8966218893Sdim    // If the increment is a constant, it must match the memory ref size.
8967218893Sdim    SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
8968218893Sdim    if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
8969218893Sdim      uint64_t IncVal = CInc->getZExtValue();
8970218893Sdim      if (IncVal != NumBytes)
8971218893Sdim        continue;
8972218893Sdim    } else if (NumBytes >= 3 * 16) {
8973218893Sdim      // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
8974218893Sdim      // separate instructions that make it harder to use a non-constant update.
8975218893Sdim      continue;
8976218893Sdim    }
8977218893Sdim
8978218893Sdim    // Create the new updating load/store node.
8979218893Sdim    EVT Tys[6];
8980218893Sdim    unsigned NumResultVecs = (isLoad ? NumVecs : 0);
8981218893Sdim    unsigned n;
8982218893Sdim    for (n = 0; n < NumResultVecs; ++n)
8983218893Sdim      Tys[n] = VecTy;
8984218893Sdim    Tys[n++] = MVT::i32;
8985218893Sdim    Tys[n] = MVT::Other;
8986280031Sdim    SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
8987218893Sdim    SmallVector<SDValue, 8> Ops;
8988218893Sdim    Ops.push_back(N->getOperand(0)); // incoming chain
8989218893Sdim    Ops.push_back(N->getOperand(AddrOpIdx));
8990218893Sdim    Ops.push_back(Inc);
8991218893Sdim    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
8992218893Sdim      Ops.push_back(N->getOperand(i));
8993218893Sdim    }
8994218893Sdim    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
8995261991Sdim    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
8996276479Sdim                                           Ops, MemInt->getMemoryVT(),
8997218893Sdim                                           MemInt->getMemOperand());
8998218893Sdim
8999218893Sdim    // Update the uses.
9000218893Sdim    std::vector<SDValue> NewResults;
9001218893Sdim    for (unsigned i = 0; i < NumResultVecs; ++i) {
9002218893Sdim      NewResults.push_back(SDValue(UpdN.getNode(), i));
9003218893Sdim    }
9004218893Sdim    NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
9005218893Sdim    DCI.CombineTo(N, NewResults);
9006218893Sdim    DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
9007218893Sdim
9008218893Sdim    break;
9009221345Sdim  }
9010218893Sdim  return SDValue();
9011218893Sdim}
9012218893Sdim
9013218893Sdim/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
9014218893Sdim/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
9015218893Sdim/// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
9016218893Sdim/// return true.
9017218893Sdimstatic bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
9018218893Sdim  SelectionDAG &DAG = DCI.DAG;
9019218893Sdim  EVT VT = N->getValueType(0);
9020218893Sdim  // vldN-dup instructions only support 64-bit vectors for N > 1.
9021218893Sdim  if (!VT.is64BitVector())
9022218893Sdim    return false;
9023218893Sdim
9024218893Sdim  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
9025218893Sdim  SDNode *VLD = N->getOperand(0).getNode();
9026218893Sdim  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
9027218893Sdim    return false;
9028218893Sdim  unsigned NumVecs = 0;
9029218893Sdim  unsigned NewOpc = 0;
9030218893Sdim  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
9031218893Sdim  if (IntNo == Intrinsic::arm_neon_vld2lane) {
9032218893Sdim    NumVecs = 2;
9033218893Sdim    NewOpc = ARMISD::VLD2DUP;
9034218893Sdim  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
9035218893Sdim    NumVecs = 3;
9036218893Sdim    NewOpc = ARMISD::VLD3DUP;
9037218893Sdim  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
9038218893Sdim    NumVecs = 4;
9039218893Sdim    NewOpc = ARMISD::VLD4DUP;
9040218893Sdim  } else {
9041218893Sdim    return false;
9042218893Sdim  }
9043218893Sdim
9044218893Sdim  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
9045218893Sdim  // numbers match the load.
9046218893Sdim  unsigned VLDLaneNo =
9047218893Sdim    cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
9048218893Sdim  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
9049218893Sdim       UI != UE; ++UI) {
9050218893Sdim    // Ignore uses of the chain result.
9051218893Sdim    if (UI.getUse().getResNo() == NumVecs)
9052218893Sdim      continue;
9053218893Sdim    SDNode *User = *UI;
9054218893Sdim    if (User->getOpcode() != ARMISD::VDUPLANE ||
9055218893Sdim        VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
9056218893Sdim      return false;
9057218893Sdim  }
9058218893Sdim
9059218893Sdim  // Create the vldN-dup node.
9060218893Sdim  EVT Tys[5];
9061218893Sdim  unsigned n;
9062218893Sdim  for (n = 0; n < NumVecs; ++n)
9063218893Sdim    Tys[n] = VT;
9064218893Sdim  Tys[n] = MVT::Other;
9065280031Sdim  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
9066218893Sdim  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
9067218893Sdim  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
9068261991Sdim  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
9069276479Sdim                                           Ops, VLDMemInt->getMemoryVT(),
9070218893Sdim                                           VLDMemInt->getMemOperand());
9071218893Sdim
9072218893Sdim  // Update the uses.
9073218893Sdim  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
9074218893Sdim       UI != UE; ++UI) {
9075218893Sdim    unsigned ResNo = UI.getUse().getResNo();
9076218893Sdim    // Ignore uses of the chain result.
9077218893Sdim    if (ResNo == NumVecs)
9078218893Sdim      continue;
9079218893Sdim    SDNode *User = *UI;
9080218893Sdim    DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
9081218893Sdim  }
9082218893Sdim
9083218893Sdim  // Now the vldN-lane intrinsic is dead except for its chain result.
9084218893Sdim  // Update uses of the chain.
9085218893Sdim  std::vector<SDValue> VLDDupResults;
9086218893Sdim  for (unsigned n = 0; n < NumVecs; ++n)
9087218893Sdim    VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
9088218893Sdim  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
9089218893Sdim  DCI.CombineTo(VLD, VLDDupResults);
9090218893Sdim
9091218893Sdim  return true;
9092218893Sdim}
9093218893Sdim
9094210299Sed/// PerformVDUPLANECombine - Target-specific dag combine xforms for
9095210299Sed/// ARMISD::VDUPLANE.
9096210299Sedstatic SDValue PerformVDUPLANECombine(SDNode *N,
9097210299Sed                                      TargetLowering::DAGCombinerInfo &DCI) {
9098210299Sed  SDValue Op = N->getOperand(0);
9099210299Sed
9100218893Sdim  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
9101218893Sdim  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
9102218893Sdim  if (CombineVLDDUP(N, DCI))
9103218893Sdim    return SDValue(N, 0);
9104218893Sdim
9105218893Sdim  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
9106218893Sdim  // redundant.  Ignore bit_converts for now; element sizes are checked below.
9107218893Sdim  while (Op.getOpcode() == ISD::BITCAST)
9108210299Sed    Op = Op.getOperand(0);
9109210299Sed  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
9110210299Sed    return SDValue();
9111210299Sed
9112210299Sed  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
9113210299Sed  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
9114210299Sed  // The canonical VMOV for a zero vector uses a 32-bit element size.
9115210299Sed  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9116210299Sed  unsigned EltBits;
9117210299Sed  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
9118210299Sed    EltSize = 8;
9119218893Sdim  EVT VT = N->getValueType(0);
9120210299Sed  if (EltSize > VT.getVectorElementType().getSizeInBits())
9121210299Sed    return SDValue();
9122210299Sed
9123261991Sdim  return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
9124210299Sed}
9125210299Sed
9126280031Sdim/// PerformSTORECombine - Target-specific dag combine xforms for
9127280031Sdim/// ISD::STORE.
9128280031Sdimstatic SDValue PerformSTORECombine(SDNode *N,
9129280031Sdim                                   TargetLowering::DAGCombinerInfo &DCI) {
9130280031Sdim  StoreSDNode *St = cast<StoreSDNode>(N);
9131280031Sdim  if (St->isVolatile())
9132280031Sdim    return SDValue();
9133280031Sdim
9134280031Sdim  // Optimize trunc store (of multiple scalars) to shuffle and store.  First,
9135280031Sdim  // pack all of the elements in one place.  Next, store to memory in fewer
9136280031Sdim  // chunks.
9137280031Sdim  SDValue StVal = St->getValue();
9138280031Sdim  EVT VT = StVal.getValueType();
9139280031Sdim  if (St->isTruncatingStore() && VT.isVector()) {
9140280031Sdim    SelectionDAG &DAG = DCI.DAG;
9141280031Sdim    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9142280031Sdim    EVT StVT = St->getMemoryVT();
9143280031Sdim    unsigned NumElems = VT.getVectorNumElements();
9144280031Sdim    assert(StVT != VT && "Cannot truncate to the same type");
9145280031Sdim    unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
9146280031Sdim    unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
9147280031Sdim
9148280031Sdim    // From, To sizes and ElemCount must be pow of two
9149280031Sdim    if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
9150280031Sdim
9151280031Sdim    // We are going to use the original vector elt for storing.
9152280031Sdim    // Accumulated smaller vector elements must be a multiple of the store size.
9153280031Sdim    if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
9154280031Sdim
9155280031Sdim    unsigned SizeRatio  = FromEltSz / ToEltSz;
9156280031Sdim    assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
9157280031Sdim
9158280031Sdim    // Create a type on which we perform the shuffle.
9159280031Sdim    EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
9160280031Sdim                                     NumElems*SizeRatio);
9161280031Sdim    assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
9162280031Sdim
9163280031Sdim    SDLoc DL(St);
9164280031Sdim    SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
9165280031Sdim    SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
9166280031Sdim    for (unsigned i = 0; i < NumElems; ++i)
9167280031Sdim      ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
9168280031Sdim
9169280031Sdim    // Can't shuffle using an illegal type.
9170280031Sdim    if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
9171280031Sdim
9172280031Sdim    SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
9173280031Sdim                                DAG.getUNDEF(WideVec.getValueType()),
9174280031Sdim                                ShuffleVec.data());
9175280031Sdim    // At this point all of the data is stored at the bottom of the
9176280031Sdim    // register. We now need to save it to mem.
9177280031Sdim
9178280031Sdim    // Find the largest store unit
9179280031Sdim    MVT StoreType = MVT::i8;
9180280031Sdim    for (MVT Tp : MVT::integer_valuetypes()) {
9181280031Sdim      if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
9182280031Sdim        StoreType = Tp;
9183280031Sdim    }
9184280031Sdim    // Didn't find a legal store type.
9185280031Sdim    if (!TLI.isTypeLegal(StoreType))
9186280031Sdim      return SDValue();
9187280031Sdim
9188280031Sdim    // Bitcast the original vector into a vector of store-size units
9189280031Sdim    EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
9190280031Sdim            StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
9191280031Sdim    assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
9192280031Sdim    SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
9193280031Sdim    SmallVector<SDValue, 8> Chains;
9194280031Sdim    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
9195280031Sdim                                        TLI.getPointerTy());
9196280031Sdim    SDValue BasePtr = St->getBasePtr();
9197280031Sdim
9198280031Sdim    // Perform one or more big stores into memory.
9199280031Sdim    unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
9200280031Sdim    for (unsigned I = 0; I < E; I++) {
9201280031Sdim      SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
9202280031Sdim                                   StoreType, ShuffWide,
9203280031Sdim                                   DAG.getIntPtrConstant(I));
9204280031Sdim      SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
9205280031Sdim                                St->getPointerInfo(), St->isVolatile(),
9206280031Sdim                                St->isNonTemporal(), St->getAlignment());
9207280031Sdim      BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
9208280031Sdim                            Increment);
9209280031Sdim      Chains.push_back(Ch);
9210280031Sdim    }
9211280031Sdim    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9212280031Sdim  }
9213280031Sdim
9214280031Sdim  if (!ISD::isNormalStore(St))
9215280031Sdim    return SDValue();
9216280031Sdim
9217280031Sdim  // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
9218280031Sdim  // ARM stores of arguments in the same cache line.
9219280031Sdim  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
9220280031Sdim      StVal.getNode()->hasOneUse()) {
9221280031Sdim    SelectionDAG  &DAG = DCI.DAG;
9222280031Sdim    bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
9223280031Sdim    SDLoc DL(St);
9224280031Sdim    SDValue BasePtr = St->getBasePtr();
9225280031Sdim    SDValue NewST1 = DAG.getStore(St->getChain(), DL,
9226280031Sdim                                  StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
9227280031Sdim                                  BasePtr, St->getPointerInfo(), St->isVolatile(),
9228280031Sdim                                  St->isNonTemporal(), St->getAlignment());
9229280031Sdim
9230280031Sdim    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
9231280031Sdim                                    DAG.getConstant(4, MVT::i32));
9232280031Sdim    return DAG.getStore(NewST1.getValue(0), DL,
9233280031Sdim                        StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
9234280031Sdim                        OffsetPtr, St->getPointerInfo(), St->isVolatile(),
9235280031Sdim                        St->isNonTemporal(),
9236280031Sdim                        std::min(4U, St->getAlignment() / 2));
9237280031Sdim  }
9238280031Sdim
9239280031Sdim  if (StVal.getValueType() == MVT::i64 &&
9240280031Sdim      StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
9241280031Sdim
9242280031Sdim    // Bitcast an i64 store extracted from a vector to f64.
9243280031Sdim    // Otherwise, the i64 value will be legalized to a pair of i32 values.
9244280031Sdim    SelectionDAG &DAG = DCI.DAG;
9245280031Sdim    SDLoc dl(StVal);
9246280031Sdim    SDValue IntVec = StVal.getOperand(0);
9247280031Sdim    EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
9248280031Sdim                                   IntVec.getValueType().getVectorNumElements());
9249280031Sdim    SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
9250280031Sdim    SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
9251280031Sdim                                 Vec, StVal.getOperand(1));
9252280031Sdim    dl = SDLoc(N);
9253280031Sdim    SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
9254280031Sdim    // Make the DAGCombiner fold the bitcasts.
9255280031Sdim    DCI.AddToWorklist(Vec.getNode());
9256280031Sdim    DCI.AddToWorklist(ExtElt.getNode());
9257280031Sdim    DCI.AddToWorklist(V.getNode());
9258280031Sdim    return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
9259280031Sdim                        St->getPointerInfo(), St->isVolatile(),
9260280031Sdim                        St->isNonTemporal(), St->getAlignment(),
9261280031Sdim                        St->getAAInfo());
9262280031Sdim  }
9263280031Sdim
9264280031Sdim  return SDValue();
9265280031Sdim}
9266280031Sdim
9267224145Sdim// isConstVecPow2 - Return true if each vector element is a power of 2, all
9268224145Sdim// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
9269224145Sdimstatic bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
9270224145Sdim{
9271224145Sdim  integerPart cN;
9272224145Sdim  integerPart c0 = 0;
9273224145Sdim  for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
9274224145Sdim       I != E; I++) {
9275224145Sdim    ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
9276224145Sdim    if (!C)
9277224145Sdim      return false;
9278224145Sdim
9279224145Sdim    bool isExact;
9280224145Sdim    APFloat APF = C->getValueAPF();
9281224145Sdim    if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
9282224145Sdim        != APFloat::opOK || !isExact)
9283224145Sdim      return false;
9284224145Sdim
9285224145Sdim    c0 = (I == 0) ? cN : c0;
9286224145Sdim    if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
9287224145Sdim      return false;
9288224145Sdim  }
9289224145Sdim  C = c0;
9290224145Sdim  return true;
9291224145Sdim}
9292224145Sdim
9293224145Sdim/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
9294224145Sdim/// can replace combinations of VMUL and VCVT (floating-point to integer)
9295224145Sdim/// when the VMUL has a constant operand that is a power of 2.
9296224145Sdim///
9297224145Sdim/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
9298224145Sdim///  vmul.f32        d16, d17, d16
9299224145Sdim///  vcvt.s32.f32    d16, d16
9300224145Sdim/// becomes:
9301224145Sdim///  vcvt.s32.f32    d16, d16, #3
9302224145Sdimstatic SDValue PerformVCVTCombine(SDNode *N,
9303224145Sdim                                  TargetLowering::DAGCombinerInfo &DCI,
9304224145Sdim                                  const ARMSubtarget *Subtarget) {
9305224145Sdim  SelectionDAG &DAG = DCI.DAG;
9306224145Sdim  SDValue Op = N->getOperand(0);
9307224145Sdim
9308224145Sdim  if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
9309224145Sdim      Op.getOpcode() != ISD::FMUL)
9310224145Sdim    return SDValue();
9311224145Sdim
9312224145Sdim  uint64_t C;
9313224145Sdim  SDValue N0 = Op->getOperand(0);
9314224145Sdim  SDValue ConstVec = Op->getOperand(1);
9315224145Sdim  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
9316224145Sdim
9317224145Sdim  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
9318224145Sdim      !isConstVecPow2(ConstVec, isSigned, C))
9319224145Sdim    return SDValue();
9320224145Sdim
9321261991Sdim  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
9322261991Sdim  MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
9323280031Sdim  unsigned NumLanes = Op.getValueType().getVectorNumElements();
9324280031Sdim  if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
9325280031Sdim      NumLanes > 4) {
9326261991Sdim    // These instructions only exist converting from f32 to i32. We can handle
9327261991Sdim    // smaller integers by generating an extra truncate, but larger ones would
9328280031Sdim    // be lossy. We also can't handle more then 4 lanes, since these intructions
9329280031Sdim    // only support v2i32/v4i32 types.
9330261991Sdim    return SDValue();
9331261991Sdim  }
9332261991Sdim
9333224145Sdim  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
9334224145Sdim    Intrinsic::arm_neon_vcvtfp2fxu;
9335261991Sdim  SDValue FixConv =  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
9336261991Sdim                                 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
9337261991Sdim                                 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
9338261991Sdim                                 DAG.getConstant(Log2_64(C), MVT::i32));
9339261991Sdim
9340261991Sdim  if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
9341261991Sdim    FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv);
9342261991Sdim
9343261991Sdim  return FixConv;
9344224145Sdim}
9345224145Sdim
9346224145Sdim/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
9347224145Sdim/// can replace combinations of VCVT (integer to floating-point) and VDIV
9348224145Sdim/// when the VDIV has a constant operand that is a power of 2.
9349224145Sdim///
9350224145Sdim/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
9351224145Sdim///  vcvt.f32.s32    d16, d16
9352224145Sdim///  vdiv.f32        d16, d17, d16
9353224145Sdim/// becomes:
9354224145Sdim///  vcvt.f32.s32    d16, d16, #3
9355224145Sdimstatic SDValue PerformVDIVCombine(SDNode *N,
9356224145Sdim                                  TargetLowering::DAGCombinerInfo &DCI,
9357224145Sdim                                  const ARMSubtarget *Subtarget) {
9358224145Sdim  SelectionDAG &DAG = DCI.DAG;
9359224145Sdim  SDValue Op = N->getOperand(0);
9360224145Sdim  unsigned OpOpcode = Op.getNode()->getOpcode();
9361224145Sdim
9362224145Sdim  if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
9363224145Sdim      (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
9364224145Sdim    return SDValue();
9365224145Sdim
9366224145Sdim  uint64_t C;
9367224145Sdim  SDValue ConstVec = N->getOperand(1);
9368224145Sdim  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
9369224145Sdim
9370224145Sdim  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
9371224145Sdim      !isConstVecPow2(ConstVec, isSigned, C))
9372224145Sdim    return SDValue();
9373224145Sdim
9374261991Sdim  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
9375261991Sdim  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
9376261991Sdim  if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
9377261991Sdim    // These instructions only exist converting from i32 to f32. We can handle
9378261991Sdim    // smaller integers by generating an extra extend, but larger ones would
9379261991Sdim    // be lossy.
9380261991Sdim    return SDValue();
9381261991Sdim  }
9382261991Sdim
9383261991Sdim  SDValue ConvInput = Op.getOperand(0);
9384261991Sdim  unsigned NumLanes = Op.getValueType().getVectorNumElements();
9385261991Sdim  if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
9386261991Sdim    ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
9387261991Sdim                            SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
9388261991Sdim                            ConvInput);
9389261991Sdim
9390224145Sdim  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
9391224145Sdim    Intrinsic::arm_neon_vcvtfxu2fp;
9392261991Sdim  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
9393224145Sdim                     Op.getValueType(),
9394224145Sdim                     DAG.getConstant(IntrinsicOpcode, MVT::i32),
9395261991Sdim                     ConvInput, DAG.getConstant(Log2_64(C), MVT::i32));
9396224145Sdim}
9397224145Sdim
9398224145Sdim/// Getvshiftimm - Check if this is a valid build_vector for the immediate
9399194710Sed/// operand of a vector shift operation, where all the elements of the
9400194710Sed/// build_vector must have the same constant integer value.
9401194710Sedstatic bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
9402194710Sed  // Ignore bit_converts.
9403218893Sdim  while (Op.getOpcode() == ISD::BITCAST)
9404194710Sed    Op = Op.getOperand(0);
9405194710Sed  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9406194710Sed  APInt SplatBits, SplatUndef;
9407194710Sed  unsigned SplatBitSize;
9408194710Sed  bool HasAnyUndefs;
9409194710Sed  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
9410194710Sed                                      HasAnyUndefs, ElementBits) ||
9411194710Sed      SplatBitSize > ElementBits)
9412194710Sed    return false;
9413194710Sed  Cnt = SplatBits.getSExtValue();
9414194710Sed  return true;
9415194710Sed}
9416194710Sed
9417194710Sed/// isVShiftLImm - Check if this is a valid build_vector for the immediate
9418194710Sed/// operand of a vector shift left operation.  That value must be in the range:
9419194710Sed///   0 <= Value < ElementBits for a left shift; or
9420194710Sed///   0 <= Value <= ElementBits for a long left shift.
9421198090Srdivackystatic bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
9422194710Sed  assert(VT.isVector() && "vector shift count is not a vector type");
9423194710Sed  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
9424194710Sed  if (! getVShiftImm(Op, ElementBits, Cnt))
9425194710Sed    return false;
9426194710Sed  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
9427194710Sed}
9428194710Sed
9429194710Sed/// isVShiftRImm - Check if this is a valid build_vector for the immediate
9430194710Sed/// operand of a vector shift right operation.  For a shift opcode, the value
9431194710Sed/// is positive, but for an intrinsic the value count must be negative. The
9432194710Sed/// absolute value must be in the range:
9433194710Sed///   1 <= |Value| <= ElementBits for a right shift; or
9434194710Sed///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
9435198090Srdivackystatic bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
9436194710Sed                         int64_t &Cnt) {
9437194710Sed  assert(VT.isVector() && "vector shift count is not a vector type");
9438194710Sed  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
9439194710Sed  if (! getVShiftImm(Op, ElementBits, Cnt))
9440194710Sed    return false;
9441194710Sed  if (isIntrinsic)
9442194710Sed    Cnt = -Cnt;
9443194710Sed  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
9444194710Sed}
9445194710Sed
9446194710Sed/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
9447194710Sedstatic SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
9448194710Sed  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
9449194710Sed  switch (IntNo) {
9450194710Sed  default:
9451194710Sed    // Don't do anything for most intrinsics.
9452194710Sed    break;
9453194710Sed
9454194710Sed  // Vector shifts: check for immediate versions and lower them.
9455194710Sed  // Note: This is done during DAG combining instead of DAG legalizing because
9456194710Sed  // the build_vectors for 64-bit vector element shift counts are generally
9457194710Sed  // not legal, and it is hard to see their values after they get legalized to
9458194710Sed  // loads from a constant pool.
9459194710Sed  case Intrinsic::arm_neon_vshifts:
9460194710Sed  case Intrinsic::arm_neon_vshiftu:
9461194710Sed  case Intrinsic::arm_neon_vrshifts:
9462194710Sed  case Intrinsic::arm_neon_vrshiftu:
9463194710Sed  case Intrinsic::arm_neon_vrshiftn:
9464194710Sed  case Intrinsic::arm_neon_vqshifts:
9465194710Sed  case Intrinsic::arm_neon_vqshiftu:
9466194710Sed  case Intrinsic::arm_neon_vqshiftsu:
9467194710Sed  case Intrinsic::arm_neon_vqshiftns:
9468194710Sed  case Intrinsic::arm_neon_vqshiftnu:
9469194710Sed  case Intrinsic::arm_neon_vqshiftnsu:
9470194710Sed  case Intrinsic::arm_neon_vqrshiftns:
9471194710Sed  case Intrinsic::arm_neon_vqrshiftnu:
9472194710Sed  case Intrinsic::arm_neon_vqrshiftnsu: {
9473198090Srdivacky    EVT VT = N->getOperand(1).getValueType();
9474194710Sed    int64_t Cnt;
9475194710Sed    unsigned VShiftOpc = 0;
9476194710Sed
9477194710Sed    switch (IntNo) {
9478194710Sed    case Intrinsic::arm_neon_vshifts:
9479194710Sed    case Intrinsic::arm_neon_vshiftu:
9480194710Sed      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
9481194710Sed        VShiftOpc = ARMISD::VSHL;
9482194710Sed        break;
9483194710Sed      }
9484194710Sed      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
9485194710Sed        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
9486194710Sed                     ARMISD::VSHRs : ARMISD::VSHRu);
9487194710Sed        break;
9488194710Sed      }
9489194710Sed      return SDValue();
9490194710Sed
9491194710Sed    case Intrinsic::arm_neon_vrshifts:
9492194710Sed    case Intrinsic::arm_neon_vrshiftu:
9493194710Sed      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
9494194710Sed        break;
9495194710Sed      return SDValue();
9496194710Sed
9497194710Sed    case Intrinsic::arm_neon_vqshifts:
9498194710Sed    case Intrinsic::arm_neon_vqshiftu:
9499194710Sed      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
9500194710Sed        break;
9501194710Sed      return SDValue();
9502194710Sed
9503194710Sed    case Intrinsic::arm_neon_vqshiftsu:
9504194710Sed      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
9505194710Sed        break;
9506198090Srdivacky      llvm_unreachable("invalid shift count for vqshlu intrinsic");
9507194710Sed
9508194710Sed    case Intrinsic::arm_neon_vrshiftn:
9509194710Sed    case Intrinsic::arm_neon_vqshiftns:
9510194710Sed    case Intrinsic::arm_neon_vqshiftnu:
9511194710Sed    case Intrinsic::arm_neon_vqshiftnsu:
9512194710Sed    case Intrinsic::arm_neon_vqrshiftns:
9513194710Sed    case Intrinsic::arm_neon_vqrshiftnu:
9514194710Sed    case Intrinsic::arm_neon_vqrshiftnsu:
9515194710Sed      // Narrowing shifts require an immediate right shift.
9516194710Sed      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
9517194710Sed        break;
9518210299Sed      llvm_unreachable("invalid shift count for narrowing vector shift "
9519210299Sed                       "intrinsic");
9520194710Sed
9521194710Sed    default:
9522198090Srdivacky      llvm_unreachable("unhandled vector shift");
9523194710Sed    }
9524194710Sed
9525194710Sed    switch (IntNo) {
9526194710Sed    case Intrinsic::arm_neon_vshifts:
9527194710Sed    case Intrinsic::arm_neon_vshiftu:
9528194710Sed      // Opcode already set above.
9529194710Sed      break;
9530194710Sed    case Intrinsic::arm_neon_vrshifts:
9531194710Sed      VShiftOpc = ARMISD::VRSHRs; break;
9532194710Sed    case Intrinsic::arm_neon_vrshiftu:
9533194710Sed      VShiftOpc = ARMISD::VRSHRu; break;
9534194710Sed    case Intrinsic::arm_neon_vrshiftn:
9535194710Sed      VShiftOpc = ARMISD::VRSHRN; break;
9536194710Sed    case Intrinsic::arm_neon_vqshifts:
9537194710Sed      VShiftOpc = ARMISD::VQSHLs; break;
9538194710Sed    case Intrinsic::arm_neon_vqshiftu:
9539194710Sed      VShiftOpc = ARMISD::VQSHLu; break;
9540194710Sed    case Intrinsic::arm_neon_vqshiftsu:
9541194710Sed      VShiftOpc = ARMISD::VQSHLsu; break;
9542194710Sed    case Intrinsic::arm_neon_vqshiftns:
9543194710Sed      VShiftOpc = ARMISD::VQSHRNs; break;
9544194710Sed    case Intrinsic::arm_neon_vqshiftnu:
9545194710Sed      VShiftOpc = ARMISD::VQSHRNu; break;
9546194710Sed    case Intrinsic::arm_neon_vqshiftnsu:
9547194710Sed      VShiftOpc = ARMISD::VQSHRNsu; break;
9548194710Sed    case Intrinsic::arm_neon_vqrshiftns:
9549194710Sed      VShiftOpc = ARMISD::VQRSHRNs; break;
9550194710Sed    case Intrinsic::arm_neon_vqrshiftnu:
9551194710Sed      VShiftOpc = ARMISD::VQRSHRNu; break;
9552194710Sed    case Intrinsic::arm_neon_vqrshiftnsu:
9553194710Sed      VShiftOpc = ARMISD::VQRSHRNsu; break;
9554194710Sed    }
9555194710Sed
9556261991Sdim    return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
9557194710Sed                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
9558194710Sed  }
9559194710Sed
9560194710Sed  case Intrinsic::arm_neon_vshiftins: {
9561198090Srdivacky    EVT VT = N->getOperand(1).getValueType();
9562194710Sed    int64_t Cnt;
9563194710Sed    unsigned VShiftOpc = 0;
9564194710Sed
9565194710Sed    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
9566194710Sed      VShiftOpc = ARMISD::VSLI;
9567194710Sed    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
9568194710Sed      VShiftOpc = ARMISD::VSRI;
9569194710Sed    else {
9570198090Srdivacky      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
9571194710Sed    }
9572194710Sed
9573261991Sdim    return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
9574194710Sed                       N->getOperand(1), N->getOperand(2),
9575194710Sed                       DAG.getConstant(Cnt, MVT::i32));
9576194710Sed  }
9577194710Sed
9578194710Sed  case Intrinsic::arm_neon_vqrshifts:
9579194710Sed  case Intrinsic::arm_neon_vqrshiftu:
9580194710Sed    // No immediate versions of these to check for.
9581194710Sed    break;
9582194710Sed  }
9583194710Sed
9584194710Sed  return SDValue();
9585194710Sed}
9586194710Sed
9587194710Sed/// PerformShiftCombine - Checks for immediate versions of vector shifts and
9588194710Sed/// lowers them.  As with the vector shift intrinsics, this is done during DAG
9589194710Sed/// combining instead of DAG legalizing because the build_vectors for 64-bit
9590194710Sed/// vector element shift counts are generally not legal, and it is hard to see
9591194710Sed/// their values after they get legalized to loads from a constant pool.
9592194710Sedstatic SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
9593194710Sed                                   const ARMSubtarget *ST) {
9594198090Srdivacky  EVT VT = N->getValueType(0);
9595234353Sdim  if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
9596234353Sdim    // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
9597234353Sdim    // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
9598234353Sdim    SDValue N1 = N->getOperand(1);
9599234353Sdim    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
9600234353Sdim      SDValue N0 = N->getOperand(0);
9601234353Sdim      if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
9602234353Sdim          DAG.MaskedValueIsZero(N0.getOperand(0),
9603234353Sdim                                APInt::getHighBitsSet(32, 16)))
9604261991Sdim        return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
9605234353Sdim    }
9606234353Sdim  }
9607194710Sed
9608194710Sed  // Nothing to be done for scalar shifts.
9609218893Sdim  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9610218893Sdim  if (!VT.isVector() || !TLI.isTypeLegal(VT))
9611194710Sed    return SDValue();
9612194710Sed
9613194710Sed  assert(ST->hasNEON() && "unexpected vector shift");
9614194710Sed  int64_t Cnt;
9615194710Sed
9616194710Sed  switch (N->getOpcode()) {
9617198090Srdivacky  default: llvm_unreachable("unexpected shift opcode");
9618194710Sed
9619194710Sed  case ISD::SHL:
9620194710Sed    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
9621261991Sdim      return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0),
9622194710Sed                         DAG.getConstant(Cnt, MVT::i32));
9623194710Sed    break;
9624194710Sed
9625194710Sed  case ISD::SRA:
9626194710Sed  case ISD::SRL:
9627194710Sed    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
9628194710Sed      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
9629194710Sed                            ARMISD::VSHRs : ARMISD::VSHRu);
9630261991Sdim      return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0),
9631194710Sed                         DAG.getConstant(Cnt, MVT::i32));
9632194710Sed    }
9633194710Sed  }
9634194710Sed  return SDValue();
9635194710Sed}
9636194710Sed
9637194710Sed/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
9638194710Sed/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
9639194710Sedstatic SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
9640194710Sed                                    const ARMSubtarget *ST) {
9641194710Sed  SDValue N0 = N->getOperand(0);
9642194710Sed
9643194710Sed  // Check for sign- and zero-extensions of vector extract operations of 8-
9644194710Sed  // and 16-bit vector elements.  NEON supports these directly.  They are
9645194710Sed  // handled during DAG combining because type legalization will promote them
9646194710Sed  // to 32-bit types and it is messy to recognize the operations after that.
9647194710Sed  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
9648194710Sed    SDValue Vec = N0.getOperand(0);
9649194710Sed    SDValue Lane = N0.getOperand(1);
9650198090Srdivacky    EVT VT = N->getValueType(0);
9651198090Srdivacky    EVT EltVT = N0.getValueType();
9652194710Sed    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9653194710Sed
9654194710Sed    if (VT == MVT::i32 &&
9655194710Sed        (EltVT == MVT::i8 || EltVT == MVT::i16) &&
9656218893Sdim        TLI.isTypeLegal(Vec.getValueType()) &&
9657218893Sdim        isa<ConstantSDNode>(Lane)) {
9658194710Sed
9659194710Sed      unsigned Opc = 0;
9660194710Sed      switch (N->getOpcode()) {
9661198090Srdivacky      default: llvm_unreachable("unexpected opcode");
9662194710Sed      case ISD::SIGN_EXTEND:
9663194710Sed        Opc = ARMISD::VGETLANEs;
9664194710Sed        break;
9665194710Sed      case ISD::ZERO_EXTEND:
9666194710Sed      case ISD::ANY_EXTEND:
9667194710Sed        Opc = ARMISD::VGETLANEu;
9668194710Sed        break;
9669194710Sed      }
9670261991Sdim      return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
9671194710Sed    }
9672194710Sed  }
9673194710Sed
9674194710Sed  return SDValue();
9675194710Sed}
9676194710Sed
9677204642Srdivacky/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
9678204642Srdivacky/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
9679204642Srdivackystatic SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
9680204642Srdivacky                                       const ARMSubtarget *ST) {
9681204642Srdivacky  // If the target supports NEON, try to use vmax/vmin instructions for f32
9682212904Sdim  // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
9683204642Srdivacky  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
9684204642Srdivacky  // a NaN; only do the transformation when it matches that behavior.
9685204642Srdivacky
9686204642Srdivacky  // For now only do this when using NEON for FP operations; if using VFP, it
9687204642Srdivacky  // is not obvious that the benefit outweighs the cost of switching to the
9688204642Srdivacky  // NEON pipeline.
9689204642Srdivacky  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
9690204642Srdivacky      N->getValueType(0) != MVT::f32)
9691204642Srdivacky    return SDValue();
9692204642Srdivacky
9693204642Srdivacky  SDValue CondLHS = N->getOperand(0);
9694204642Srdivacky  SDValue CondRHS = N->getOperand(1);
9695204642Srdivacky  SDValue LHS = N->getOperand(2);
9696204642Srdivacky  SDValue RHS = N->getOperand(3);
9697204642Srdivacky  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
9698204642Srdivacky
9699204642Srdivacky  unsigned Opcode = 0;
9700204642Srdivacky  bool IsReversed;
9701204642Srdivacky  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
9702204642Srdivacky    IsReversed = false; // x CC y ? x : y
9703204642Srdivacky  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
9704204642Srdivacky    IsReversed = true ; // x CC y ? y : x
9705204642Srdivacky  } else {
9706204642Srdivacky    return SDValue();
9707204642Srdivacky  }
9708204642Srdivacky
9709204642Srdivacky  bool IsUnordered;
9710204642Srdivacky  switch (CC) {
9711204642Srdivacky  default: break;
9712204642Srdivacky  case ISD::SETOLT:
9713204642Srdivacky  case ISD::SETOLE:
9714204642Srdivacky  case ISD::SETLT:
9715204642Srdivacky  case ISD::SETLE:
9716204642Srdivacky  case ISD::SETULT:
9717204642Srdivacky  case ISD::SETULE:
9718204642Srdivacky    // If LHS is NaN, an ordered comparison will be false and the result will
9719204642Srdivacky    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
9720204642Srdivacky    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
9721204642Srdivacky    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
9722204642Srdivacky    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
9723204642Srdivacky      break;
9724204642Srdivacky    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
9725204642Srdivacky    // will return -0, so vmin can only be used for unsafe math or if one of
9726204642Srdivacky    // the operands is known to be nonzero.
9727204642Srdivacky    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
9728234353Sdim        !DAG.getTarget().Options.UnsafeFPMath &&
9729204642Srdivacky        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
9730204642Srdivacky      break;
9731204642Srdivacky    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
9732204642Srdivacky    break;
9733204642Srdivacky
9734204642Srdivacky  case ISD::SETOGT:
9735204642Srdivacky  case ISD::SETOGE:
9736204642Srdivacky  case ISD::SETGT:
9737204642Srdivacky  case ISD::SETGE:
9738204642Srdivacky  case ISD::SETUGT:
9739204642Srdivacky  case ISD::SETUGE:
9740204642Srdivacky    // If LHS is NaN, an ordered comparison will be false and the result will
9741204642Srdivacky    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
9742204642Srdivacky    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
9743204642Srdivacky    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
9744204642Srdivacky    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
9745204642Srdivacky      break;
9746204642Srdivacky    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
9747204642Srdivacky    // will return +0, so vmax can only be used for unsafe math or if one of
9748204642Srdivacky    // the operands is known to be nonzero.
9749204642Srdivacky    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
9750234353Sdim        !DAG.getTarget().Options.UnsafeFPMath &&
9751204642Srdivacky        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
9752204642Srdivacky      break;
9753204642Srdivacky    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
9754204642Srdivacky    break;
9755204642Srdivacky  }
9756204642Srdivacky
9757204642Srdivacky  if (!Opcode)
9758204642Srdivacky    return SDValue();
9759261991Sdim  return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
9760204642Srdivacky}
9761204642Srdivacky
9762224145Sdim/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
9763224145SdimSDValue
9764224145SdimARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
9765224145Sdim  SDValue Cmp = N->getOperand(4);
9766224145Sdim  if (Cmp.getOpcode() != ARMISD::CMPZ)
9767224145Sdim    // Only looking at EQ and NE cases.
9768224145Sdim    return SDValue();
9769224145Sdim
9770224145Sdim  EVT VT = N->getValueType(0);
9771261991Sdim  SDLoc dl(N);
9772224145Sdim  SDValue LHS = Cmp.getOperand(0);
9773224145Sdim  SDValue RHS = Cmp.getOperand(1);
9774224145Sdim  SDValue FalseVal = N->getOperand(0);
9775224145Sdim  SDValue TrueVal = N->getOperand(1);
9776224145Sdim  SDValue ARMcc = N->getOperand(2);
9777226633Sdim  ARMCC::CondCodes CC =
9778226633Sdim    (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
9779224145Sdim
9780224145Sdim  // Simplify
9781224145Sdim  //   mov     r1, r0
9782224145Sdim  //   cmp     r1, x
9783224145Sdim  //   mov     r0, y
9784224145Sdim  //   moveq   r0, x
9785224145Sdim  // to
9786224145Sdim  //   cmp     r0, x
9787224145Sdim  //   movne   r0, y
9788224145Sdim  //
9789224145Sdim  //   mov     r1, r0
9790224145Sdim  //   cmp     r1, x
9791224145Sdim  //   mov     r0, x
9792224145Sdim  //   movne   r0, y
9793224145Sdim  // to
9794224145Sdim  //   cmp     r0, x
9795224145Sdim  //   movne   r0, y
9796224145Sdim  /// FIXME: Turn this into a target neutral optimization?
9797224145Sdim  SDValue Res;
9798226633Sdim  if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
9799224145Sdim    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
9800224145Sdim                      N->getOperand(3), Cmp);
9801224145Sdim  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
9802224145Sdim    SDValue ARMcc;
9803224145Sdim    SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
9804224145Sdim    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
9805224145Sdim                      N->getOperand(3), NewCmp);
9806224145Sdim  }
9807224145Sdim
9808224145Sdim  if (Res.getNode()) {
9809224145Sdim    APInt KnownZero, KnownOne;
9810276479Sdim    DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
9811224145Sdim    // Capture demanded bits information that would be otherwise lost.
9812224145Sdim    if (KnownZero == 0xfffffffe)
9813224145Sdim      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
9814224145Sdim                        DAG.getValueType(MVT::i1));
9815224145Sdim    else if (KnownZero == 0xffffff00)
9816224145Sdim      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
9817224145Sdim                        DAG.getValueType(MVT::i8));
9818224145Sdim    else if (KnownZero == 0xffff0000)
9819224145Sdim      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
9820224145Sdim                        DAG.getValueType(MVT::i16));
9821224145Sdim  }
9822224145Sdim
9823224145Sdim  return Res;
9824224145Sdim}
9825224145Sdim
9826193323SedSDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
9827193323Sed                                             DAGCombinerInfo &DCI) const {
9828193323Sed  switch (N->getOpcode()) {
9829193323Sed  default: break;
9830243830Sdim  case ISD::ADDC:       return PerformADDCCombine(N, DCI, Subtarget);
9831224145Sdim  case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);
9832204642Srdivacky  case ISD::SUB:        return PerformSUBCombine(N, DCI);
9833208599Srdivacky  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
9834212904Sdim  case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
9835234353Sdim  case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);
9836234353Sdim  case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);
9837218893Sdim  case ARMISD::BFI:     return PerformBFICombine(N, DCI);
9838280031Sdim  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
9839218893Sdim  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
9840218893Sdim  case ISD::STORE:      return PerformSTORECombine(N, DCI);
9841280031Sdim  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
9842218893Sdim  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
9843218893Sdim  case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
9844210299Sed  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
9845224145Sdim  case ISD::FP_TO_SINT:
9846224145Sdim  case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
9847224145Sdim  case ISD::FDIV:       return PerformVDIVCombine(N, DCI, Subtarget);
9848204642Srdivacky  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
9849194710Sed  case ISD::SHL:
9850194710Sed  case ISD::SRA:
9851204642Srdivacky  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
9852194710Sed  case ISD::SIGN_EXTEND:
9853194710Sed  case ISD::ZERO_EXTEND:
9854204642Srdivacky  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
9855204642Srdivacky  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
9856224145Sdim  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
9857218893Sdim  case ARMISD::VLD2DUP:
9858218893Sdim  case ARMISD::VLD3DUP:
9859218893Sdim  case ARMISD::VLD4DUP:
9860218893Sdim    return CombineBaseUpdate(N, DCI);
9861261991Sdim  case ARMISD::BUILD_VECTOR:
9862261991Sdim    return PerformARMBUILD_VECTORCombine(N, DCI);
9863218893Sdim  case ISD::INTRINSIC_VOID:
9864218893Sdim  case ISD::INTRINSIC_W_CHAIN:
9865218893Sdim    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9866218893Sdim    case Intrinsic::arm_neon_vld1:
9867218893Sdim    case Intrinsic::arm_neon_vld2:
9868218893Sdim    case Intrinsic::arm_neon_vld3:
9869218893Sdim    case Intrinsic::arm_neon_vld4:
9870218893Sdim    case Intrinsic::arm_neon_vld2lane:
9871218893Sdim    case Intrinsic::arm_neon_vld3lane:
9872218893Sdim    case Intrinsic::arm_neon_vld4lane:
9873218893Sdim    case Intrinsic::arm_neon_vst1:
9874218893Sdim    case Intrinsic::arm_neon_vst2:
9875218893Sdim    case Intrinsic::arm_neon_vst3:
9876218893Sdim    case Intrinsic::arm_neon_vst4:
9877218893Sdim    case Intrinsic::arm_neon_vst2lane:
9878218893Sdim    case Intrinsic::arm_neon_vst3lane:
9879218893Sdim    case Intrinsic::arm_neon_vst4lane:
9880218893Sdim      return CombineBaseUpdate(N, DCI);
9881218893Sdim    default: break;
9882218893Sdim    }
9883218893Sdim    break;
9884193323Sed  }
9885193323Sed  return SDValue();
9886193323Sed}
9887193323Sed
9888218893Sdimbool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
9889218893Sdim                                                          EVT VT) const {
9890218893Sdim  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
9891218893Sdim}
9892218893Sdim
9893280031Sdimbool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
9894280031Sdim                                                       unsigned,
9895280031Sdim                                                       unsigned,
9896280031Sdim                                                       bool *Fast) const {
9897243830Sdim  // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
9898243830Sdim  bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
9899198090Srdivacky
9900198090Srdivacky  switch (VT.getSimpleVT().SimpleTy) {
9901198090Srdivacky  default:
9902198090Srdivacky    return false;
9903198090Srdivacky  case MVT::i8:
9904198090Srdivacky  case MVT::i16:
9905249423Sdim  case MVT::i32: {
9906243830Sdim    // Unaligned access can use (for example) LRDB, LRDH, LDR
9907249423Sdim    if (AllowsUnaligned) {
9908249423Sdim      if (Fast)
9909249423Sdim        *Fast = Subtarget->hasV7Ops();
9910249423Sdim      return true;
9911249423Sdim    }
9912249423Sdim    return false;
9913249423Sdim  }
9914239462Sdim  case MVT::f64:
9915249423Sdim  case MVT::v2f64: {
9916243830Sdim    // For any little-endian targets with neon, we can support unaligned ld/st
9917243830Sdim    // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
9918276479Sdim    // A big-endian target may also explicitly support unaligned accesses
9919249423Sdim    if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
9920249423Sdim      if (Fast)
9921249423Sdim        *Fast = true;
9922249423Sdim      return true;
9923249423Sdim    }
9924249423Sdim    return false;
9925198090Srdivacky  }
9926249423Sdim  }
9927198090Srdivacky}
9928198090Srdivacky
9929234353Sdimstatic bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
9930234353Sdim                       unsigned AlignCheck) {
9931234353Sdim  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
9932234353Sdim          (DstAlign == 0 || DstAlign % AlignCheck == 0));
9933234353Sdim}
9934234353Sdim
9935234353SdimEVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
9936234353Sdim                                           unsigned DstAlign, unsigned SrcAlign,
9937249423Sdim                                           bool IsMemset, bool ZeroMemset,
9938234353Sdim                                           bool MemcpyStrSrc,
9939234353Sdim                                           MachineFunction &MF) const {
9940234353Sdim  const Function *F = MF.getFunction();
9941234353Sdim
9942234353Sdim  // See if we can use NEON instructions for this...
9943249423Sdim  if ((!IsMemset || ZeroMemset) &&
9944249423Sdim      Subtarget->hasNEON() &&
9945249423Sdim      !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
9946249423Sdim                                       Attribute::NoImplicitFloat)) {
9947249423Sdim    bool Fast;
9948249423Sdim    if (Size >= 16 &&
9949249423Sdim        (memOpAlign(SrcAlign, DstAlign, 16) ||
9950280031Sdim         (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
9951249423Sdim      return MVT::v2f64;
9952249423Sdim    } else if (Size >= 8 &&
9953249423Sdim               (memOpAlign(SrcAlign, DstAlign, 8) ||
9954280031Sdim                (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
9955280031Sdim                 Fast))) {
9956249423Sdim      return MVT::f64;
9957234353Sdim    }
9958234353Sdim  }
9959234353Sdim
9960234353Sdim  // Lowering to i32/i16 if the size permits.
9961249423Sdim  if (Size >= 4)
9962234353Sdim    return MVT::i32;
9963249423Sdim  else if (Size >= 2)
9964234353Sdim    return MVT::i16;
9965234353Sdim
9966234353Sdim  // Let the target-independent logic figure it out.
9967234353Sdim  return MVT::Other;
9968234353Sdim}
9969234353Sdim
9970249423Sdimbool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
9971249423Sdim  if (Val.getOpcode() != ISD::LOAD)
9972249423Sdim    return false;
9973249423Sdim
9974249423Sdim  EVT VT1 = Val.getValueType();
9975249423Sdim  if (!VT1.isSimple() || !VT1.isInteger() ||
9976249423Sdim      !VT2.isSimple() || !VT2.isInteger())
9977249423Sdim    return false;
9978249423Sdim
9979249423Sdim  switch (VT1.getSimpleVT().SimpleTy) {
9980249423Sdim  default: break;
9981249423Sdim  case MVT::i1:
9982249423Sdim  case MVT::i8:
9983249423Sdim  case MVT::i16:
9984249423Sdim    // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
9985249423Sdim    return true;
9986249423Sdim  }
9987249423Sdim
9988249423Sdim  return false;
9989249423Sdim}
9990249423Sdim
9991261991Sdimbool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
9992261991Sdim  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
9993261991Sdim    return false;
9994261991Sdim
9995261991Sdim  if (!isTypeLegal(EVT::getEVT(Ty1)))
9996261991Sdim    return false;
9997261991Sdim
9998261991Sdim  assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
9999261991Sdim
10000261991Sdim  // Assuming the caller doesn't have a zeroext or signext return parameter,
10001261991Sdim  // truncation all the way down to i1 is valid.
10002261991Sdim  return true;
10003261991Sdim}
10004261991Sdim
10005261991Sdim
10006198090Srdivackystatic bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
10007198090Srdivacky  if (V < 0)
10008198090Srdivacky    return false;
10009198090Srdivacky
10010198090Srdivacky  unsigned Scale = 1;
10011198090Srdivacky  switch (VT.getSimpleVT().SimpleTy) {
10012198090Srdivacky  default: return false;
10013198090Srdivacky  case MVT::i1:
10014198090Srdivacky  case MVT::i8:
10015198090Srdivacky    // Scale == 1;
10016198090Srdivacky    break;
10017198090Srdivacky  case MVT::i16:
10018198090Srdivacky    // Scale == 2;
10019198090Srdivacky    Scale = 2;
10020198090Srdivacky    break;
10021198090Srdivacky  case MVT::i32:
10022198090Srdivacky    // Scale == 4;
10023198090Srdivacky    Scale = 4;
10024198090Srdivacky    break;
10025198090Srdivacky  }
10026198090Srdivacky
10027198090Srdivacky  if ((V & (Scale - 1)) != 0)
10028198090Srdivacky    return false;
10029198090Srdivacky  V /= Scale;
10030198090Srdivacky  return V == (V & ((1LL << 5) - 1));
10031198090Srdivacky}
10032198090Srdivacky
10033198090Srdivackystatic bool isLegalT2AddressImmediate(int64_t V, EVT VT,
10034198090Srdivacky                                      const ARMSubtarget *Subtarget) {
10035198090Srdivacky  bool isNeg = false;
10036198090Srdivacky  if (V < 0) {
10037198090Srdivacky    isNeg = true;
10038198090Srdivacky    V = - V;
10039198090Srdivacky  }
10040198090Srdivacky
10041198090Srdivacky  switch (VT.getSimpleVT().SimpleTy) {
10042198090Srdivacky  default: return false;
10043198090Srdivacky  case MVT::i1:
10044198090Srdivacky  case MVT::i8:
10045198090Srdivacky  case MVT::i16:
10046198090Srdivacky  case MVT::i32:
10047198090Srdivacky    // + imm12 or - imm8
10048198090Srdivacky    if (isNeg)
10049198090Srdivacky      return V == (V & ((1LL << 8) - 1));
10050198090Srdivacky    return V == (V & ((1LL << 12) - 1));
10051198090Srdivacky  case MVT::f32:
10052198090Srdivacky  case MVT::f64:
10053198090Srdivacky    // Same as ARM mode. FIXME: NEON?
10054198090Srdivacky    if (!Subtarget->hasVFP2())
10055198090Srdivacky      return false;
10056198090Srdivacky    if ((V & 3) != 0)
10057198090Srdivacky      return false;
10058198090Srdivacky    V >>= 2;
10059198090Srdivacky    return V == (V & ((1LL << 8) - 1));
10060198090Srdivacky  }
10061198090Srdivacky}
10062198090Srdivacky
10063193323Sed/// isLegalAddressImmediate - Return true if the integer value can be used
10064193323Sed/// as the offset of the target addressing mode for load / store of the
10065193323Sed/// given type.
10066198090Srdivackystatic bool isLegalAddressImmediate(int64_t V, EVT VT,
10067193323Sed                                    const ARMSubtarget *Subtarget) {
10068193323Sed  if (V == 0)
10069193323Sed    return true;
10070193323Sed
10071193323Sed  if (!VT.isSimple())
10072193323Sed    return false;
10073193323Sed
10074198090Srdivacky  if (Subtarget->isThumb1Only())
10075198090Srdivacky    return isLegalT1AddressImmediate(V, VT);
10076198090Srdivacky  else if (Subtarget->isThumb2())
10077198090Srdivacky    return isLegalT2AddressImmediate(V, VT, Subtarget);
10078193323Sed
10079198090Srdivacky  // ARM mode.
10080193323Sed  if (V < 0)
10081193323Sed    V = - V;
10082198090Srdivacky  switch (VT.getSimpleVT().SimpleTy) {
10083193323Sed  default: return false;
10084193323Sed  case MVT::i1:
10085193323Sed  case MVT::i8:
10086193323Sed  case MVT::i32:
10087193323Sed    // +- imm12
10088193323Sed    return V == (V & ((1LL << 12) - 1));
10089193323Sed  case MVT::i16:
10090193323Sed    // +- imm8
10091193323Sed    return V == (V & ((1LL << 8) - 1));
10092193323Sed  case MVT::f32:
10093193323Sed  case MVT::f64:
10094198090Srdivacky    if (!Subtarget->hasVFP2()) // FIXME: NEON?
10095193323Sed      return false;
10096193323Sed    if ((V & 3) != 0)
10097193323Sed      return false;
10098193323Sed    V >>= 2;
10099193323Sed    return V == (V & ((1LL << 8) - 1));
10100193323Sed  }
10101193323Sed}
10102193323Sed
10103198090Srdivackybool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
10104198090Srdivacky                                                      EVT VT) const {
10105198090Srdivacky  int Scale = AM.Scale;
10106198090Srdivacky  if (Scale < 0)
10107198090Srdivacky    return false;
10108198090Srdivacky
10109198090Srdivacky  switch (VT.getSimpleVT().SimpleTy) {
10110198090Srdivacky  default: return false;
10111198090Srdivacky  case MVT::i1:
10112198090Srdivacky  case MVT::i8:
10113198090Srdivacky  case MVT::i16:
10114198090Srdivacky  case MVT::i32:
10115198090Srdivacky    if (Scale == 1)
10116198090Srdivacky      return true;
10117198090Srdivacky    // r + r << imm
10118198090Srdivacky    Scale = Scale & ~1;
10119198090Srdivacky    return Scale == 2 || Scale == 4 || Scale == 8;
10120198090Srdivacky  case MVT::i64:
10121198090Srdivacky    // r + r
10122198090Srdivacky    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
10123198090Srdivacky      return true;
10124198090Srdivacky    return false;
10125198090Srdivacky  case MVT::isVoid:
10126198090Srdivacky    // Note, we allow "void" uses (basically, uses that aren't loads or
10127198090Srdivacky    // stores), because arm allows folding a scale into many arithmetic
10128198090Srdivacky    // operations.  This should be made more precise and revisited later.
10129198090Srdivacky
10130198090Srdivacky    // Allow r << imm, but the imm has to be a multiple of two.
10131198090Srdivacky    if (Scale & 1) return false;
10132198090Srdivacky    return isPowerOf2_32(Scale);
10133198090Srdivacky  }
10134198090Srdivacky}
10135198090Srdivacky
10136193323Sed/// isLegalAddressingMode - Return true if the addressing mode represented
10137193323Sed/// by AM is legal for this target, for a load/store of the specified type.
10138193323Sedbool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
10139226633Sdim                                              Type *Ty) const {
10140198090Srdivacky  EVT VT = getValueType(Ty, true);
10141193323Sed  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
10142193323Sed    return false;
10143193323Sed
10144193323Sed  // Can never fold addr of global into load/store.
10145193323Sed  if (AM.BaseGV)
10146193323Sed    return false;
10147193323Sed
10148193323Sed  switch (AM.Scale) {
10149193323Sed  case 0:  // no scale reg, must be "r+i" or "r", or "i".
10150193323Sed    break;
10151193323Sed  case 1:
10152198090Srdivacky    if (Subtarget->isThumb1Only())
10153193323Sed      return false;
10154193323Sed    // FALL THROUGH.
10155193323Sed  default:
10156193323Sed    // ARM doesn't support any R+R*scale+imm addr modes.
10157193323Sed    if (AM.BaseOffs)
10158193323Sed      return false;
10159193323Sed
10160193323Sed    if (!VT.isSimple())
10161193323Sed      return false;
10162193323Sed
10163198090Srdivacky    if (Subtarget->isThumb2())
10164198090Srdivacky      return isLegalT2ScaledAddressingMode(AM, VT);
10165198090Srdivacky
10166193323Sed    int Scale = AM.Scale;
10167198090Srdivacky    switch (VT.getSimpleVT().SimpleTy) {
10168193323Sed    default: return false;
10169193323Sed    case MVT::i1:
10170193323Sed    case MVT::i8:
10171193323Sed    case MVT::i32:
10172193323Sed      if (Scale < 0) Scale = -Scale;
10173193323Sed      if (Scale == 1)
10174193323Sed        return true;
10175193323Sed      // r + r << imm
10176193323Sed      return isPowerOf2_32(Scale & ~1);
10177193323Sed    case MVT::i16:
10178198090Srdivacky    case MVT::i64:
10179193323Sed      // r + r
10180193323Sed      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
10181193323Sed        return true;
10182193323Sed      return false;
10183193323Sed
10184193323Sed    case MVT::isVoid:
10185193323Sed      // Note, we allow "void" uses (basically, uses that aren't loads or
10186193323Sed      // stores), because arm allows folding a scale into many arithmetic
10187193323Sed      // operations.  This should be made more precise and revisited later.
10188193323Sed
10189193323Sed      // Allow r << imm, but the imm has to be a multiple of two.
10190198090Srdivacky      if (Scale & 1) return false;
10191198090Srdivacky      return isPowerOf2_32(Scale);
10192193323Sed    }
10193193323Sed  }
10194193323Sed  return true;
10195193323Sed}
10196193323Sed
10197199481Srdivacky/// isLegalICmpImmediate - Return true if the specified immediate is legal
10198199481Srdivacky/// icmp immediate, that is the target has icmp instructions which can compare
10199199481Srdivacky/// a register against the immediate without having to materialize the
10200199481Srdivacky/// immediate into a register.
10201199481Srdivackybool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
10202234353Sdim  // Thumb2 and ARM modes can use cmn for negative immediates.
10203199481Srdivacky  if (!Subtarget->isThumb())
10204234353Sdim    return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
10205199481Srdivacky  if (Subtarget->isThumb2())
10206234353Sdim    return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
10207234353Sdim  // Thumb1 doesn't have cmn, and only 8-bit immediates.
10208199481Srdivacky  return Imm >= 0 && Imm <= 255;
10209199481Srdivacky}
10210199481Srdivacky
10211239462Sdim/// isLegalAddImmediate - Return true if the specified immediate is a legal add
10212239462Sdim/// *or sub* immediate, that is the target has add or sub instructions which can
10213239462Sdim/// add a register with the immediate without having to materialize the
10214223017Sdim/// immediate into a register.
10215223017Sdimbool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
10216239462Sdim  // Same encoding for add/sub, just flip the sign.
10217239462Sdim  int64_t AbsImm = llvm::abs64(Imm);
10218239462Sdim  if (!Subtarget->isThumb())
10219239462Sdim    return ARM_AM::getSOImmVal(AbsImm) != -1;
10220239462Sdim  if (Subtarget->isThumb2())
10221239462Sdim    return ARM_AM::getT2SOImmVal(AbsImm) != -1;
10222239462Sdim  // Thumb1 only has 8-bit unsigned immediate.
10223239462Sdim  return AbsImm >= 0 && AbsImm <= 255;
10224223017Sdim}
10225223017Sdim
10226198090Srdivackystatic bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
10227195340Sed                                      bool isSEXTLoad, SDValue &Base,
10228195340Sed                                      SDValue &Offset, bool &isInc,
10229195340Sed                                      SelectionDAG &DAG) {
10230193323Sed  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
10231193323Sed    return false;
10232193323Sed
10233193323Sed  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
10234193323Sed    // AddressingMode 3
10235193323Sed    Base = Ptr->getOperand(0);
10236193323Sed    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
10237193323Sed      int RHSC = (int)RHS->getZExtValue();
10238193323Sed      if (RHSC < 0 && RHSC > -256) {
10239195340Sed        assert(Ptr->getOpcode() == ISD::ADD);
10240193323Sed        isInc = false;
10241193323Sed        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
10242193323Sed        return true;
10243193323Sed      }
10244193323Sed    }
10245193323Sed    isInc = (Ptr->getOpcode() == ISD::ADD);
10246193323Sed    Offset = Ptr->getOperand(1);
10247193323Sed    return true;
10248193323Sed  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
10249193323Sed    // AddressingMode 2
10250193323Sed    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
10251193323Sed      int RHSC = (int)RHS->getZExtValue();
10252193323Sed      if (RHSC < 0 && RHSC > -0x1000) {
10253195340Sed        assert(Ptr->getOpcode() == ISD::ADD);
10254193323Sed        isInc = false;
10255193323Sed        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
10256193323Sed        Base = Ptr->getOperand(0);
10257193323Sed        return true;
10258193323Sed      }
10259193323Sed    }
10260193323Sed
10261193323Sed    if (Ptr->getOpcode() == ISD::ADD) {
10262193323Sed      isInc = true;
10263226633Sdim      ARM_AM::ShiftOpc ShOpcVal=
10264226633Sdim        ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
10265193323Sed      if (ShOpcVal != ARM_AM::no_shift) {
10266193323Sed        Base = Ptr->getOperand(1);
10267193323Sed        Offset = Ptr->getOperand(0);
10268193323Sed      } else {
10269193323Sed        Base = Ptr->getOperand(0);
10270193323Sed        Offset = Ptr->getOperand(1);
10271193323Sed      }
10272193323Sed      return true;
10273193323Sed    }
10274193323Sed
10275193323Sed    isInc = (Ptr->getOpcode() == ISD::ADD);
10276193323Sed    Base = Ptr->getOperand(0);
10277193323Sed    Offset = Ptr->getOperand(1);
10278193323Sed    return true;
10279193323Sed  }
10280193323Sed
10281199481Srdivacky  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
10282193323Sed  return false;
10283193323Sed}
10284193323Sed
10285198090Srdivackystatic bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
10286195340Sed                                     bool isSEXTLoad, SDValue &Base,
10287195340Sed                                     SDValue &Offset, bool &isInc,
10288195340Sed                                     SelectionDAG &DAG) {
10289195340Sed  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
10290195340Sed    return false;
10291195340Sed
10292195340Sed  Base = Ptr->getOperand(0);
10293195340Sed  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
10294195340Sed    int RHSC = (int)RHS->getZExtValue();
10295195340Sed    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
10296195340Sed      assert(Ptr->getOpcode() == ISD::ADD);
10297195340Sed      isInc = false;
10298195340Sed      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
10299195340Sed      return true;
10300195340Sed    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
10301195340Sed      isInc = Ptr->getOpcode() == ISD::ADD;
10302195340Sed      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
10303195340Sed      return true;
10304195340Sed    }
10305195340Sed  }
10306195340Sed
10307195340Sed  return false;
10308195340Sed}
10309195340Sed
10310193323Sed/// getPreIndexedAddressParts - returns true by value, base pointer and
10311193323Sed/// offset pointer and addressing mode by reference if the node's address
10312193323Sed/// can be legally represented as pre-indexed load / store address.
10313193323Sedbool
10314193323SedARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
10315193323Sed                                             SDValue &Offset,
10316193323Sed                                             ISD::MemIndexedMode &AM,
10317193323Sed                                             SelectionDAG &DAG) const {
10318195340Sed  if (Subtarget->isThumb1Only())
10319193323Sed    return false;
10320193323Sed
10321198090Srdivacky  EVT VT;
10322193323Sed  SDValue Ptr;
10323193323Sed  bool isSEXTLoad = false;
10324193323Sed  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
10325193323Sed    Ptr = LD->getBasePtr();
10326193323Sed    VT  = LD->getMemoryVT();
10327193323Sed    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
10328193323Sed  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
10329193323Sed    Ptr = ST->getBasePtr();
10330193323Sed    VT  = ST->getMemoryVT();
10331193323Sed  } else
10332193323Sed    return false;
10333193323Sed
10334193323Sed  bool isInc;
10335195340Sed  bool isLegal = false;
10336195340Sed  if (Subtarget->isThumb2())
10337195340Sed    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
10338195340Sed                                       Offset, isInc, DAG);
10339198090Srdivacky  else
10340195340Sed    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
10341195340Sed                                        Offset, isInc, DAG);
10342195340Sed  if (!isLegal)
10343195340Sed    return false;
10344195340Sed
10345195340Sed  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
10346195340Sed  return true;
10347193323Sed}
10348193323Sed
10349193323Sed/// getPostIndexedAddressParts - returns true by value, base pointer and
10350193323Sed/// offset pointer and addressing mode by reference if this node can be
10351193323Sed/// combined with a load / store to form a post-indexed load / store.
10352193323Sedbool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
10353193323Sed                                                   SDValue &Base,
10354193323Sed                                                   SDValue &Offset,
10355193323Sed                                                   ISD::MemIndexedMode &AM,
10356193323Sed                                                   SelectionDAG &DAG) const {
10357195340Sed  if (Subtarget->isThumb1Only())
10358193323Sed    return false;
10359193323Sed
10360198090Srdivacky  EVT VT;
10361193323Sed  SDValue Ptr;
10362193323Sed  bool isSEXTLoad = false;
10363193323Sed  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
10364193323Sed    VT  = LD->getMemoryVT();
10365208599Srdivacky    Ptr = LD->getBasePtr();
10366193323Sed    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
10367193323Sed  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
10368193323Sed    VT  = ST->getMemoryVT();
10369208599Srdivacky    Ptr = ST->getBasePtr();
10370193323Sed  } else
10371193323Sed    return false;
10372193323Sed
10373193323Sed  bool isInc;
10374195340Sed  bool isLegal = false;
10375195340Sed  if (Subtarget->isThumb2())
10376195340Sed    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
10377208599Srdivacky                                       isInc, DAG);
10378198090Srdivacky  else
10379195340Sed    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
10380195340Sed                                        isInc, DAG);
10381195340Sed  if (!isLegal)
10382195340Sed    return false;
10383195340Sed
10384208599Srdivacky  if (Ptr != Base) {
10385208599Srdivacky    // Swap base ptr and offset to catch more post-index load / store when
10386208599Srdivacky    // it's legal. In Thumb2 mode, offset must be an immediate.
10387208599Srdivacky    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
10388208599Srdivacky        !Subtarget->isThumb2())
10389208599Srdivacky      std::swap(Base, Offset);
10390208599Srdivacky
10391208599Srdivacky    // Post-indexed load / store update the base pointer.
10392208599Srdivacky    if (Ptr != Base)
10393208599Srdivacky      return false;
10394208599Srdivacky  }
10395208599Srdivacky
10396195340Sed  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
10397195340Sed  return true;
10398193323Sed}
10399193323Sed
10400276479Sdimvoid ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
10401276479Sdim                                                      APInt &KnownZero,
10402276479Sdim                                                      APInt &KnownOne,
10403276479Sdim                                                      const SelectionDAG &DAG,
10404276479Sdim                                                      unsigned Depth) const {
10405261991Sdim  unsigned BitWidth = KnownOne.getBitWidth();
10406261991Sdim  KnownZero = KnownOne = APInt(BitWidth, 0);
10407193323Sed  switch (Op.getOpcode()) {
10408193323Sed  default: break;
10409261991Sdim  case ARMISD::ADDC:
10410261991Sdim  case ARMISD::ADDE:
10411261991Sdim  case ARMISD::SUBC:
10412261991Sdim  case ARMISD::SUBE:
10413261991Sdim    // These nodes' second result is a boolean
10414261991Sdim    if (Op.getResNo() == 0)
10415261991Sdim      break;
10416261991Sdim    KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
10417261991Sdim    break;
10418193323Sed  case ARMISD::CMOV: {
10419193323Sed    // Bits are known zero/one if known on the LHS and RHS.
10420276479Sdim    DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
10421193323Sed    if (KnownZero == 0 && KnownOne == 0) return;
10422193323Sed
10423193323Sed    APInt KnownZeroRHS, KnownOneRHS;
10424276479Sdim    DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
10425193323Sed    KnownZero &= KnownZeroRHS;
10426193323Sed    KnownOne  &= KnownOneRHS;
10427193323Sed    return;
10428193323Sed  }
10429276479Sdim  case ISD::INTRINSIC_W_CHAIN: {
10430276479Sdim    ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
10431276479Sdim    Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
10432276479Sdim    switch (IntID) {
10433276479Sdim    default: return;
10434276479Sdim    case Intrinsic::arm_ldaex:
10435276479Sdim    case Intrinsic::arm_ldrex: {
10436276479Sdim      EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
10437276479Sdim      unsigned MemBits = VT.getScalarType().getSizeInBits();
10438276479Sdim      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
10439276479Sdim      return;
10440276479Sdim    }
10441276479Sdim    }
10442193323Sed  }
10443276479Sdim  }
10444193323Sed}
10445193323Sed
10446193323Sed//===----------------------------------------------------------------------===//
10447193323Sed//                           ARM Inline Assembly Support
10448193323Sed//===----------------------------------------------------------------------===//
10449193323Sed
10450218893Sdimbool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
10451218893Sdim  // Looking for "rev" which is V6+.
10452218893Sdim  if (!Subtarget->hasV6Ops())
10453218893Sdim    return false;
10454218893Sdim
10455218893Sdim  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
10456218893Sdim  std::string AsmStr = IA->getAsmString();
10457218893Sdim  SmallVector<StringRef, 4> AsmPieces;
10458218893Sdim  SplitString(AsmStr, AsmPieces, ";\n");
10459218893Sdim
10460218893Sdim  switch (AsmPieces.size()) {
10461218893Sdim  default: return false;
10462218893Sdim  case 1:
10463218893Sdim    AsmStr = AsmPieces[0];
10464218893Sdim    AsmPieces.clear();
10465218893Sdim    SplitString(AsmStr, AsmPieces, " \t,");
10466218893Sdim
10467218893Sdim    // rev $0, $1
10468218893Sdim    if (AsmPieces.size() == 3 &&
10469218893Sdim        AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
10470218893Sdim        IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
10471226633Sdim      IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
10472218893Sdim      if (Ty && Ty->getBitWidth() == 32)
10473218893Sdim        return IntrinsicLowering::LowerToByteSwap(CI);
10474218893Sdim    }
10475218893Sdim    break;
10476218893Sdim  }
10477218893Sdim
10478218893Sdim  return false;
10479218893Sdim}
10480218893Sdim
10481193323Sed/// getConstraintType - Given a constraint letter, return the type of
10482193323Sed/// constraint it is for this target.
10483193323SedARMTargetLowering::ConstraintType
10484193323SedARMTargetLowering::getConstraintType(const std::string &Constraint) const {
10485193323Sed  if (Constraint.size() == 1) {
10486193323Sed    switch (Constraint[0]) {
10487193323Sed    default:  break;
10488193323Sed    case 'l': return C_RegisterClass;
10489193323Sed    case 'w': return C_RegisterClass;
10490224145Sdim    case 'h': return C_RegisterClass;
10491224145Sdim    case 'x': return C_RegisterClass;
10492224145Sdim    case 't': return C_RegisterClass;
10493224145Sdim    case 'j': return C_Other; // Constant for movw.
10494226633Sdim      // An address with a single base register. Due to the way we
10495226633Sdim      // currently handle addresses it is the same as an 'r' memory constraint.
10496226633Sdim    case 'Q': return C_Memory;
10497193323Sed    }
10498224145Sdim  } else if (Constraint.size() == 2) {
10499224145Sdim    switch (Constraint[0]) {
10500224145Sdim    default: break;
10501224145Sdim    // All 'U+' constraints are addresses.
10502224145Sdim    case 'U': return C_Memory;
10503224145Sdim    }
10504193323Sed  }
10505193323Sed  return TargetLowering::getConstraintType(Constraint);
10506193323Sed}
10507193323Sed
10508218893Sdim/// Examine constraint type and operand type and determine a weight value.
10509218893Sdim/// This object must already have been set up with the operand type
10510218893Sdim/// and the current alternative constraint selected.
10511218893SdimTargetLowering::ConstraintWeight
10512218893SdimARMTargetLowering::getSingleConstraintMatchWeight(
10513218893Sdim    AsmOperandInfo &info, const char *constraint) const {
10514218893Sdim  ConstraintWeight weight = CW_Invalid;
10515218893Sdim  Value *CallOperandVal = info.CallOperandVal;
10516218893Sdim    // If we don't have a value, we can't do a match,
10517218893Sdim    // but allow it at the lowest weight.
10518276479Sdim  if (!CallOperandVal)
10519218893Sdim    return CW_Default;
10520226633Sdim  Type *type = CallOperandVal->getType();
10521218893Sdim  // Look at the constraint type.
10522218893Sdim  switch (*constraint) {
10523218893Sdim  default:
10524218893Sdim    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
10525218893Sdim    break;
10526218893Sdim  case 'l':
10527218893Sdim    if (type->isIntegerTy()) {
10528218893Sdim      if (Subtarget->isThumb())
10529218893Sdim        weight = CW_SpecificReg;
10530218893Sdim      else
10531218893Sdim        weight = CW_Register;
10532218893Sdim    }
10533218893Sdim    break;
10534218893Sdim  case 'w':
10535218893Sdim    if (type->isFloatingPointTy())
10536218893Sdim      weight = CW_Register;
10537218893Sdim    break;
10538218893Sdim  }
10539218893Sdim  return weight;
10540218893Sdim}
10541218893Sdim
10542224145Sdimtypedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
10543224145SdimRCPair
10544193323SedARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
10545261991Sdim                                                MVT VT) const {
10546193323Sed  if (Constraint.size() == 1) {
10547202375Srdivacky    // GCC ARM Constraint Letters
10548193323Sed    switch (Constraint[0]) {
10549224145Sdim    case 'l': // Low regs or general regs.
10550202375Srdivacky      if (Subtarget->isThumb())
10551239462Sdim        return RCPair(0U, &ARM::tGPRRegClass);
10552239462Sdim      return RCPair(0U, &ARM::GPRRegClass);
10553224145Sdim    case 'h': // High regs or no regs.
10554224145Sdim      if (Subtarget->isThumb())
10555239462Sdim        return RCPair(0U, &ARM::hGPRRegClass);
10556224145Sdim      break;
10557193323Sed    case 'r':
10558280031Sdim      if (Subtarget->isThumb1Only())
10559280031Sdim        return RCPair(0U, &ARM::tGPRRegClass);
10560239462Sdim      return RCPair(0U, &ARM::GPRRegClass);
10561193323Sed    case 'w':
10562261991Sdim      if (VT == MVT::Other)
10563261991Sdim        break;
10564193323Sed      if (VT == MVT::f32)
10565239462Sdim        return RCPair(0U, &ARM::SPRRegClass);
10566201360Srdivacky      if (VT.getSizeInBits() == 64)
10567239462Sdim        return RCPair(0U, &ARM::DPRRegClass);
10568200581Srdivacky      if (VT.getSizeInBits() == 128)
10569239462Sdim        return RCPair(0U, &ARM::QPRRegClass);
10570193323Sed      break;
10571224145Sdim    case 'x':
10572261991Sdim      if (VT == MVT::Other)
10573261991Sdim        break;
10574224145Sdim      if (VT == MVT::f32)
10575239462Sdim        return RCPair(0U, &ARM::SPR_8RegClass);
10576224145Sdim      if (VT.getSizeInBits() == 64)
10577239462Sdim        return RCPair(0U, &ARM::DPR_8RegClass);
10578224145Sdim      if (VT.getSizeInBits() == 128)
10579239462Sdim        return RCPair(0U, &ARM::QPR_8RegClass);
10580224145Sdim      break;
10581224145Sdim    case 't':
10582224145Sdim      if (VT == MVT::f32)
10583239462Sdim        return RCPair(0U, &ARM::SPRRegClass);
10584224145Sdim      break;
10585193323Sed    }
10586193323Sed  }
10587205218Srdivacky  if (StringRef("{cc}").equals_lower(Constraint))
10588239462Sdim    return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
10589205218Srdivacky
10590193323Sed  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
10591193323Sed}
10592193323Sed
10593193323Sed/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
10594193323Sed/// vector.  If it is invalid, don't add anything to Ops.
10595193323Sedvoid ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
10596223017Sdim                                                     std::string &Constraint,
10597193323Sed                                                     std::vector<SDValue>&Ops,
10598193323Sed                                                     SelectionDAG &DAG) const {
10599276479Sdim  SDValue Result;
10600193323Sed
10601223017Sdim  // Currently only support length 1 constraints.
10602223017Sdim  if (Constraint.length() != 1) return;
10603223017Sdim
10604223017Sdim  char ConstraintLetter = Constraint[0];
10605223017Sdim  switch (ConstraintLetter) {
10606193323Sed  default: break;
10607224145Sdim  case 'j':
10608193323Sed  case 'I': case 'J': case 'K': case 'L':
10609193323Sed  case 'M': case 'N': case 'O':
10610193323Sed    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
10611193323Sed    if (!C)
10612193323Sed      return;
10613193323Sed
10614193323Sed    int64_t CVal64 = C->getSExtValue();
10615193323Sed    int CVal = (int) CVal64;
10616193323Sed    // None of these constraints allow values larger than 32 bits.  Check
10617193323Sed    // that the value fits in an int.
10618193323Sed    if (CVal != CVal64)
10619193323Sed      return;
10620193323Sed
10621223017Sdim    switch (ConstraintLetter) {
10622224145Sdim      case 'j':
10623226633Sdim        // Constant suitable for movw, must be between 0 and
10624226633Sdim        // 65535.
10625226633Sdim        if (Subtarget->hasV6T2Ops())
10626226633Sdim          if (CVal >= 0 && CVal <= 65535)
10627226633Sdim            break;
10628226633Sdim        return;
10629193323Sed      case 'I':
10630198090Srdivacky        if (Subtarget->isThumb1Only()) {
10631198090Srdivacky          // This must be a constant between 0 and 255, for ADD
10632198090Srdivacky          // immediates.
10633193323Sed          if (CVal >= 0 && CVal <= 255)
10634193323Sed            break;
10635198090Srdivacky        } else if (Subtarget->isThumb2()) {
10636198090Srdivacky          // A constant that can be used as an immediate value in a
10637198090Srdivacky          // data-processing instruction.
10638198090Srdivacky          if (ARM_AM::getT2SOImmVal(CVal) != -1)
10639198090Srdivacky            break;
10640193323Sed        } else {
10641193323Sed          // A constant that can be used as an immediate value in a
10642193323Sed          // data-processing instruction.
10643193323Sed          if (ARM_AM::getSOImmVal(CVal) != -1)
10644193323Sed            break;
10645193323Sed        }
10646193323Sed        return;
10647193323Sed
10648193323Sed      case 'J':
10649198090Srdivacky        if (Subtarget->isThumb()) {  // FIXME thumb2
10650193323Sed          // This must be a constant between -255 and -1, for negated ADD
10651193323Sed          // immediates. This can be used in GCC with an "n" modifier that
10652193323Sed          // prints the negated value, for use with SUB instructions. It is
10653193323Sed          // not useful otherwise but is implemented for compatibility.
10654193323Sed          if (CVal >= -255 && CVal <= -1)
10655193323Sed            break;
10656193323Sed        } else {
10657193323Sed          // This must be a constant between -4095 and 4095. It is not clear
10658193323Sed          // what this constraint is intended for. Implemented for
10659193323Sed          // compatibility with GCC.
10660193323Sed          if (CVal >= -4095 && CVal <= 4095)
10661193323Sed            break;
10662193323Sed        }
10663193323Sed        return;
10664193323Sed
10665193323Sed      case 'K':
10666198090Srdivacky        if (Subtarget->isThumb1Only()) {
10667193323Sed          // A 32-bit value where only one byte has a nonzero value. Exclude
10668193323Sed          // zero to match GCC. This constraint is used by GCC internally for
10669193323Sed          // constants that can be loaded with a move/shift combination.
10670193323Sed          // It is not useful otherwise but is implemented for compatibility.
10671193323Sed          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
10672193323Sed            break;
10673198090Srdivacky        } else if (Subtarget->isThumb2()) {
10674198090Srdivacky          // A constant whose bitwise inverse can be used as an immediate
10675198090Srdivacky          // value in a data-processing instruction. This can be used in GCC
10676198090Srdivacky          // with a "B" modifier that prints the inverted value, for use with
10677198090Srdivacky          // BIC and MVN instructions. It is not useful otherwise but is
10678198090Srdivacky          // implemented for compatibility.
10679198090Srdivacky          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
10680198090Srdivacky            break;
10681193323Sed        } else {
10682193323Sed          // A constant whose bitwise inverse can be used as an immediate
10683193323Sed          // value in a data-processing instruction. This can be used in GCC
10684193323Sed          // with a "B" modifier that prints the inverted value, for use with
10685193323Sed          // BIC and MVN instructions. It is not useful otherwise but is
10686193323Sed          // implemented for compatibility.
10687193323Sed          if (ARM_AM::getSOImmVal(~CVal) != -1)
10688193323Sed            break;
10689193323Sed        }
10690193323Sed        return;
10691193323Sed
10692193323Sed      case 'L':
10693198090Srdivacky        if (Subtarget->isThumb1Only()) {
10694193323Sed          // This must be a constant between -7 and 7,
10695193323Sed          // for 3-operand ADD/SUB immediate instructions.
10696193323Sed          if (CVal >= -7 && CVal < 7)
10697193323Sed            break;
10698198090Srdivacky        } else if (Subtarget->isThumb2()) {
10699198090Srdivacky          // A constant whose negation can be used as an immediate value in a
10700198090Srdivacky          // data-processing instruction. This can be used in GCC with an "n"
10701198090Srdivacky          // modifier that prints the negated value, for use with SUB
10702198090Srdivacky          // instructions. It is not useful otherwise but is implemented for
10703198090Srdivacky          // compatibility.
10704198090Srdivacky          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
10705198090Srdivacky            break;
10706193323Sed        } else {
10707193323Sed          // A constant whose negation can be used as an immediate value in a
10708193323Sed          // data-processing instruction. This can be used in GCC with an "n"
10709193323Sed          // modifier that prints the negated value, for use with SUB
10710193323Sed          // instructions. It is not useful otherwise but is implemented for
10711193323Sed          // compatibility.
10712193323Sed          if (ARM_AM::getSOImmVal(-CVal) != -1)
10713193323Sed            break;
10714193323Sed        }
10715193323Sed        return;
10716193323Sed
10717193323Sed      case 'M':
10718198090Srdivacky        if (Subtarget->isThumb()) { // FIXME thumb2
10719193323Sed          // This must be a multiple of 4 between 0 and 1020, for
10720193323Sed          // ADD sp + immediate.
10721193323Sed          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
10722193323Sed            break;
10723193323Sed        } else {
10724193323Sed          // A power of two or a constant between 0 and 32.  This is used in
10725193323Sed          // GCC for the shift amount on shifted register operands, but it is
10726193323Sed          // useful in general for any shift amounts.
10727193323Sed          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
10728193323Sed            break;
10729193323Sed        }
10730193323Sed        return;
10731193323Sed
10732193323Sed      case 'N':
10733198090Srdivacky        if (Subtarget->isThumb()) {  // FIXME thumb2
10734193323Sed          // This must be a constant between 0 and 31, for shift amounts.
10735193323Sed          if (CVal >= 0 && CVal <= 31)
10736193323Sed            break;
10737193323Sed        }
10738193323Sed        return;
10739193323Sed
10740193323Sed      case 'O':
10741198090Srdivacky        if (Subtarget->isThumb()) {  // FIXME thumb2
10742193323Sed          // This must be a multiple of 4 between -508 and 508, for
10743193323Sed          // ADD/SUB sp = sp + immediate.
10744193323Sed          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
10745193323Sed            break;
10746193323Sed        }
10747193323Sed        return;
10748193323Sed    }
10749193323Sed    Result = DAG.getTargetConstant(CVal, Op.getValueType());
10750193323Sed    break;
10751193323Sed  }
10752193323Sed
10753193323Sed  if (Result.getNode()) {
10754193323Sed    Ops.push_back(Result);
10755193323Sed    return;
10756193323Sed  }
10757210299Sed  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
10758193323Sed}
10759198090Srdivacky
10760261991SdimSDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
10761261991Sdim  assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
10762261991Sdim  unsigned Opcode = Op->getOpcode();
10763261991Sdim  assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
10764280031Sdim         "Invalid opcode for Div/Rem lowering");
10765261991Sdim  bool isSigned = (Opcode == ISD::SDIVREM);
10766261991Sdim  EVT VT = Op->getValueType(0);
10767261991Sdim  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
10768261991Sdim
10769261991Sdim  RTLIB::Libcall LC;
10770261991Sdim  switch (VT.getSimpleVT().SimpleTy) {
10771261991Sdim  default: llvm_unreachable("Unexpected request for libcall!");
10772280031Sdim  case MVT::i8:  LC = isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
10773280031Sdim  case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
10774280031Sdim  case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
10775280031Sdim  case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
10776261991Sdim  }
10777261991Sdim
10778261991Sdim  SDValue InChain = DAG.getEntryNode();
10779261991Sdim
10780261991Sdim  TargetLowering::ArgListTy Args;
10781261991Sdim  TargetLowering::ArgListEntry Entry;
10782261991Sdim  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
10783261991Sdim    EVT ArgVT = Op->getOperand(i).getValueType();
10784261991Sdim    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
10785261991Sdim    Entry.Node = Op->getOperand(i);
10786261991Sdim    Entry.Ty = ArgTy;
10787261991Sdim    Entry.isSExt = isSigned;
10788261991Sdim    Entry.isZExt = !isSigned;
10789261991Sdim    Args.push_back(Entry);
10790261991Sdim  }
10791261991Sdim
10792261991Sdim  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
10793261991Sdim                                         getPointerTy());
10794261991Sdim
10795280031Sdim  Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
10796261991Sdim
10797261991Sdim  SDLoc dl(Op);
10798276479Sdim  TargetLowering::CallLoweringInfo CLI(DAG);
10799276479Sdim  CLI.setDebugLoc(dl).setChain(InChain)
10800276479Sdim    .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
10801276479Sdim    .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
10802276479Sdim
10803261991Sdim  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
10804261991Sdim  return CallInfo.first;
10805261991Sdim}
10806261991Sdim
10807276479SdimSDValue
10808276479SdimARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
10809276479Sdim  assert(Subtarget->isTargetWindows() && "unsupported target platform");
10810276479Sdim  SDLoc DL(Op);
10811276479Sdim
10812276479Sdim  // Get the inputs.
10813276479Sdim  SDValue Chain = Op.getOperand(0);
10814276479Sdim  SDValue Size  = Op.getOperand(1);
10815276479Sdim
10816276479Sdim  SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
10817276479Sdim                              DAG.getConstant(2, MVT::i32));
10818276479Sdim
10819276479Sdim  SDValue Flag;
10820276479Sdim  Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
10821276479Sdim  Flag = Chain.getValue(1);
10822276479Sdim
10823276479Sdim  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10824276479Sdim  Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
10825276479Sdim
10826276479Sdim  SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
10827276479Sdim  Chain = NewSP.getValue(1);
10828276479Sdim
10829276479Sdim  SDValue Ops[2] = { NewSP, Chain };
10830276479Sdim  return DAG.getMergeValues(Ops, DL);
10831276479Sdim}
10832276479Sdim
10833280031SdimSDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10834280031Sdim  assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
10835280031Sdim         "Unexpected type for custom-lowering FP_EXTEND");
10836280031Sdim
10837280031Sdim  RTLIB::Libcall LC;
10838280031Sdim  LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
10839280031Sdim
10840280031Sdim  SDValue SrcVal = Op.getOperand(0);
10841280031Sdim  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
10842280031Sdim                     /*isSigned*/ false, SDLoc(Op)).first;
10843280031Sdim}
10844280031Sdim
10845280031SdimSDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
10846280031Sdim  assert(Op.getOperand(0).getValueType() == MVT::f64 &&
10847280031Sdim         Subtarget->isFPOnlySP() &&
10848280031Sdim         "Unexpected type for custom-lowering FP_ROUND");
10849280031Sdim
10850280031Sdim  RTLIB::Libcall LC;
10851280031Sdim  LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
10852280031Sdim
10853280031Sdim  SDValue SrcVal = Op.getOperand(0);
10854280031Sdim  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
10855280031Sdim                     /*isSigned*/ false, SDLoc(Op)).first;
10856280031Sdim}
10857280031Sdim
10858198090Srdivackybool
10859198090SrdivackyARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
10860198090Srdivacky  // The ARM target isn't yet aware of offsets.
10861198090Srdivacky  return false;
10862198090Srdivacky}
10863198892Srdivacky
10864212904Sdimbool ARM::isBitFieldInvertedMask(unsigned v) {
10865212904Sdim  if (v == 0xffffffff)
10866261991Sdim    return false;
10867261991Sdim
10868212904Sdim  // there can be 1's on either or both "outsides", all the "inside"
10869212904Sdim  // bits must be 0's
10870261991Sdim  unsigned TO = CountTrailingOnes_32(v);
10871261991Sdim  unsigned LO = CountLeadingOnes_32(v);
10872261991Sdim  v = (v >> TO) << TO;
10873261991Sdim  v = (v << LO) >> LO;
10874261991Sdim  return v == 0;
10875212904Sdim}
10876212904Sdim
10877198892Srdivacky/// isFPImmLegal - Returns true if the target can instruction select the
10878198892Srdivacky/// specified FP immediate natively. If false, the legalizer will
10879198892Srdivacky/// materialize the FP immediate as a load from a constant pool.
10880198892Srdivackybool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
10881198892Srdivacky  if (!Subtarget->hasVFP3())
10882198892Srdivacky    return false;
10883198892Srdivacky  if (VT == MVT::f32)
10884226633Sdim    return ARM_AM::getFP32Imm(Imm) != -1;
10885280031Sdim  if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
10886226633Sdim    return ARM_AM::getFP64Imm(Imm) != -1;
10887198892Srdivacky  return false;
10888198892Srdivacky}
10889218893Sdim
10890218893Sdim/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
10891218893Sdim/// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
10892218893Sdim/// specified in the intrinsic calls.
10893218893Sdimbool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
10894218893Sdim                                           const CallInst &I,
10895218893Sdim                                           unsigned Intrinsic) const {
10896218893Sdim  switch (Intrinsic) {
10897218893Sdim  case Intrinsic::arm_neon_vld1:
10898218893Sdim  case Intrinsic::arm_neon_vld2:
10899218893Sdim  case Intrinsic::arm_neon_vld3:
10900218893Sdim  case Intrinsic::arm_neon_vld4:
10901218893Sdim  case Intrinsic::arm_neon_vld2lane:
10902218893Sdim  case Intrinsic::arm_neon_vld3lane:
10903218893Sdim  case Intrinsic::arm_neon_vld4lane: {
10904218893Sdim    Info.opc = ISD::INTRINSIC_W_CHAIN;
10905218893Sdim    // Conservatively set memVT to the entire set of vectors loaded.
10906243830Sdim    uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
10907218893Sdim    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
10908218893Sdim    Info.ptrVal = I.getArgOperand(0);
10909218893Sdim    Info.offset = 0;
10910218893Sdim    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
10911218893Sdim    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
10912218893Sdim    Info.vol = false; // volatile loads with NEON intrinsics not supported
10913218893Sdim    Info.readMem = true;
10914218893Sdim    Info.writeMem = false;
10915218893Sdim    return true;
10916218893Sdim  }
10917218893Sdim  case Intrinsic::arm_neon_vst1:
10918218893Sdim  case Intrinsic::arm_neon_vst2:
10919218893Sdim  case Intrinsic::arm_neon_vst3:
10920218893Sdim  case Intrinsic::arm_neon_vst4:
10921218893Sdim  case Intrinsic::arm_neon_vst2lane:
10922218893Sdim  case Intrinsic::arm_neon_vst3lane:
10923218893Sdim  case Intrinsic::arm_neon_vst4lane: {
10924218893Sdim    Info.opc = ISD::INTRINSIC_VOID;
10925218893Sdim    // Conservatively set memVT to the entire set of vectors stored.
10926218893Sdim    unsigned NumElts = 0;
10927218893Sdim    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
10928226633Sdim      Type *ArgTy = I.getArgOperand(ArgI)->getType();
10929218893Sdim      if (!ArgTy->isVectorTy())
10930218893Sdim        break;
10931243830Sdim      NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
10932218893Sdim    }
10933218893Sdim    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
10934218893Sdim    Info.ptrVal = I.getArgOperand(0);
10935218893Sdim    Info.offset = 0;
10936218893Sdim    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
10937218893Sdim    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
10938218893Sdim    Info.vol = false; // volatile stores with NEON intrinsics not supported
10939218893Sdim    Info.readMem = false;
10940218893Sdim    Info.writeMem = true;
10941218893Sdim    return true;
10942218893Sdim  }
10943276479Sdim  case Intrinsic::arm_ldaex:
10944261991Sdim  case Intrinsic::arm_ldrex: {
10945261991Sdim    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
10946261991Sdim    Info.opc = ISD::INTRINSIC_W_CHAIN;
10947261991Sdim    Info.memVT = MVT::getVT(PtrTy->getElementType());
10948261991Sdim    Info.ptrVal = I.getArgOperand(0);
10949261991Sdim    Info.offset = 0;
10950261991Sdim    Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
10951261991Sdim    Info.vol = true;
10952261991Sdim    Info.readMem = true;
10953261991Sdim    Info.writeMem = false;
10954261991Sdim    return true;
10955261991Sdim  }
10956276479Sdim  case Intrinsic::arm_stlex:
10957261991Sdim  case Intrinsic::arm_strex: {
10958261991Sdim    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
10959261991Sdim    Info.opc = ISD::INTRINSIC_W_CHAIN;
10960261991Sdim    Info.memVT = MVT::getVT(PtrTy->getElementType());
10961261991Sdim    Info.ptrVal = I.getArgOperand(1);
10962261991Sdim    Info.offset = 0;
10963261991Sdim    Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
10964261991Sdim    Info.vol = true;
10965261991Sdim    Info.readMem = false;
10966261991Sdim    Info.writeMem = true;
10967261991Sdim    return true;
10968261991Sdim  }
10969276479Sdim  case Intrinsic::arm_stlexd:
10970223017Sdim  case Intrinsic::arm_strexd: {
10971223017Sdim    Info.opc = ISD::INTRINSIC_W_CHAIN;
10972223017Sdim    Info.memVT = MVT::i64;
10973223017Sdim    Info.ptrVal = I.getArgOperand(2);
10974223017Sdim    Info.offset = 0;
10975223017Sdim    Info.align = 8;
10976224145Sdim    Info.vol = true;
10977223017Sdim    Info.readMem = false;
10978223017Sdim    Info.writeMem = true;
10979223017Sdim    return true;
10980223017Sdim  }
10981276479Sdim  case Intrinsic::arm_ldaexd:
10982223017Sdim  case Intrinsic::arm_ldrexd: {
10983223017Sdim    Info.opc = ISD::INTRINSIC_W_CHAIN;
10984223017Sdim    Info.memVT = MVT::i64;
10985223017Sdim    Info.ptrVal = I.getArgOperand(0);
10986223017Sdim    Info.offset = 0;
10987223017Sdim    Info.align = 8;
10988224145Sdim    Info.vol = true;
10989223017Sdim    Info.readMem = true;
10990223017Sdim    Info.writeMem = false;
10991223017Sdim    return true;
10992223017Sdim  }
10993218893Sdim  default:
10994218893Sdim    break;
10995218893Sdim  }
10996218893Sdim
10997218893Sdim  return false;
10998218893Sdim}
10999276479Sdim
11000276479Sdim/// \brief Returns true if it is beneficial to convert a load of a constant
11001276479Sdim/// to just the constant itself.
11002276479Sdimbool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
11003276479Sdim                                                          Type *Ty) const {
11004276479Sdim  assert(Ty->isIntegerTy());
11005276479Sdim
11006276479Sdim  unsigned Bits = Ty->getPrimitiveSizeInBits();
11007276479Sdim  if (Bits == 0 || Bits > 32)
11008276479Sdim    return false;
11009276479Sdim  return true;
11010276479Sdim}
11011276479Sdim
11012280031Sdimbool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
11013280031Sdim
11014280031SdimInstruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
11015280031Sdim                                        ARM_MB::MemBOpt Domain) const {
11016280031Sdim  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11017280031Sdim
11018280031Sdim  // First, if the target has no DMB, see what fallback we can use.
11019280031Sdim  if (!Subtarget->hasDataBarrier()) {
11020280031Sdim    // Some ARMv6 cpus can support data barriers with an mcr instruction.
11021280031Sdim    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
11022280031Sdim    // here.
11023280031Sdim    if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
11024280031Sdim      Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
11025280031Sdim      Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
11026280031Sdim                        Builder.getInt32(0), Builder.getInt32(7),
11027280031Sdim                        Builder.getInt32(10), Builder.getInt32(5)};
11028280031Sdim      return Builder.CreateCall(MCR, args);
11029280031Sdim    } else {
11030280031Sdim      // Instead of using barriers, atomic accesses on these subtargets use
11031280031Sdim      // libcalls.
11032280031Sdim      llvm_unreachable("makeDMB on a target so old that it has no barriers");
11033280031Sdim    }
11034280031Sdim  } else {
11035280031Sdim    Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
11036280031Sdim    // Only a full system barrier exists in the M-class architectures.
11037280031Sdim    Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
11038280031Sdim    Constant *CDomain = Builder.getInt32(Domain);
11039280031Sdim    return Builder.CreateCall(DMB, CDomain);
11040276479Sdim  }
11041280031Sdim}
11042276479Sdim
11043280031Sdim// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11044280031SdimInstruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
11045280031Sdim                                         AtomicOrdering Ord, bool IsStore,
11046280031Sdim                                         bool IsLoad) const {
11047280031Sdim  if (!getInsertFencesForAtomic())
11048280031Sdim    return nullptr;
11049280031Sdim
11050280031Sdim  switch (Ord) {
11051280031Sdim  case NotAtomic:
11052280031Sdim  case Unordered:
11053280031Sdim    llvm_unreachable("Invalid fence: unordered/non-atomic");
11054280031Sdim  case Monotonic:
11055280031Sdim  case Acquire:
11056280031Sdim    return nullptr; // Nothing to do
11057280031Sdim  case SequentiallyConsistent:
11058280031Sdim    if (!IsStore)
11059280031Sdim      return nullptr; // Nothing to do
11060280031Sdim    /*FALLTHROUGH*/
11061280031Sdim  case Release:
11062280031Sdim  case AcquireRelease:
11063280031Sdim    if (Subtarget->isSwift())
11064280031Sdim      return makeDMB(Builder, ARM_MB::ISHST);
11065280031Sdim    // FIXME: add a comment with a link to documentation justifying this.
11066280031Sdim    else
11067280031Sdim      return makeDMB(Builder, ARM_MB::ISH);
11068280031Sdim  }
11069280031Sdim  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
11070276479Sdim}
11071276479Sdim
11072280031SdimInstruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
11073280031Sdim                                          AtomicOrdering Ord, bool IsStore,
11074280031Sdim                                          bool IsLoad) const {
11075280031Sdim  if (!getInsertFencesForAtomic())
11076280031Sdim    return nullptr;
11077280031Sdim
11078280031Sdim  switch (Ord) {
11079280031Sdim  case NotAtomic:
11080280031Sdim  case Unordered:
11081280031Sdim    llvm_unreachable("Invalid fence: unordered/not-atomic");
11082280031Sdim  case Monotonic:
11083280031Sdim  case Release:
11084280031Sdim    return nullptr; // Nothing to do
11085280031Sdim  case Acquire:
11086280031Sdim  case AcquireRelease:
11087280031Sdim  case SequentiallyConsistent:
11088280031Sdim    return makeDMB(Builder, ARM_MB::ISH);
11089280031Sdim  }
11090280031Sdim  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
11091280031Sdim}
11092280031Sdim
11093280031Sdim// Loads and stores less than 64-bits are already atomic; ones above that
11094280031Sdim// are doomed anyway, so defer to the default libcall and blame the OS when
11095280031Sdim// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
11096280031Sdim// anything for those.
11097280031Sdimbool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
11098280031Sdim  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
11099280031Sdim  return (Size == 64) && !Subtarget->isMClass();
11100280031Sdim}
11101280031Sdim
11102280031Sdim// Loads and stores less than 64-bits are already atomic; ones above that
11103280031Sdim// are doomed anyway, so defer to the default libcall and blame the OS when
11104280031Sdim// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
11105280031Sdim// anything for those.
11106280031Sdim// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
11107280031Sdim// guarantee, see DDI0406C ARM architecture reference manual,
11108280031Sdim// sections A8.8.72-74 LDRD)
11109280031Sdimbool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
11110280031Sdim  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
11111280031Sdim  return (Size == 64) && !Subtarget->isMClass();
11112280031Sdim}
11113280031Sdim
11114280031Sdim// For the real atomic operations, we have ldrex/strex up to 32 bits,
11115280031Sdim// and up to 64 bits on the non-M profiles
11116280031Sdimbool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
11117280031Sdim  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
11118280031Sdim  return Size <= (Subtarget->isMClass() ? 32U : 64U);
11119280031Sdim}
11120280031Sdim
11121280031Sdim// This has so far only been implemented for MachO.
11122280031Sdimbool ARMTargetLowering::useLoadStackGuardNode() const {
11123280031Sdim  return Subtarget->isTargetMachO();
11124280031Sdim}
11125280031Sdim
11126280031Sdimbool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
11127280031Sdim                                                  unsigned &Cost) const {
11128280031Sdim  // If we do not have NEON, vector types are not natively supported.
11129280031Sdim  if (!Subtarget->hasNEON())
11130280031Sdim    return false;
11131280031Sdim
11132280031Sdim  // Floating point values and vector values map to the same register file.
11133280031Sdim  // Therefore, althought we could do a store extract of a vector type, this is
11134280031Sdim  // better to leave at float as we have more freedom in the addressing mode for
11135280031Sdim  // those.
11136280031Sdim  if (VectorTy->isFPOrFPVectorTy())
11137280031Sdim    return false;
11138280031Sdim
11139280031Sdim  // If the index is unknown at compile time, this is very expensive to lower
11140280031Sdim  // and it is not possible to combine the store with the extract.
11141280031Sdim  if (!isa<ConstantInt>(Idx))
11142280031Sdim    return false;
11143280031Sdim
11144280031Sdim  assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
11145280031Sdim  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
11146280031Sdim  // We can do a store + vector extract on any vector that fits perfectly in a D
11147280031Sdim  // or Q register.
11148280031Sdim  if (BitWidth == 64 || BitWidth == 128) {
11149280031Sdim    Cost = 0;
11150280031Sdim    return true;
11151280031Sdim  }
11152280031Sdim  return false;
11153280031Sdim}
11154280031Sdim
11155276479SdimValue *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
11156276479Sdim                                         AtomicOrdering Ord) const {
11157276479Sdim  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11158276479Sdim  Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
11159280031Sdim  bool IsAcquire = isAtLeastAcquire(Ord);
11160276479Sdim
11161276479Sdim  // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
11162276479Sdim  // intrinsic must return {i32, i32} and we have to recombine them into a
11163276479Sdim  // single i64 here.
11164276479Sdim  if (ValTy->getPrimitiveSizeInBits() == 64) {
11165276479Sdim    Intrinsic::ID Int =
11166276479Sdim        IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
11167276479Sdim    Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
11168276479Sdim
11169276479Sdim    Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
11170276479Sdim    Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
11171276479Sdim
11172276479Sdim    Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
11173276479Sdim    Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
11174276479Sdim    if (!Subtarget->isLittle())
11175276479Sdim      std::swap (Lo, Hi);
11176276479Sdim    Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
11177276479Sdim    Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
11178276479Sdim    return Builder.CreateOr(
11179276479Sdim        Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
11180276479Sdim  }
11181276479Sdim
11182276479Sdim  Type *Tys[] = { Addr->getType() };
11183276479Sdim  Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
11184276479Sdim  Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
11185276479Sdim
11186276479Sdim  return Builder.CreateTruncOrBitCast(
11187276479Sdim      Builder.CreateCall(Ldrex, Addr),
11188276479Sdim      cast<PointerType>(Addr->getType())->getElementType());
11189276479Sdim}
11190276479Sdim
11191276479SdimValue *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
11192276479Sdim                                               Value *Addr,
11193276479Sdim                                               AtomicOrdering Ord) const {
11194276479Sdim  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11195280031Sdim  bool IsRelease = isAtLeastRelease(Ord);
11196276479Sdim
11197276479Sdim  // Since the intrinsics must have legal type, the i64 intrinsics take two
11198276479Sdim  // parameters: "i32, i32". We must marshal Val into the appropriate form
11199276479Sdim  // before the call.
11200276479Sdim  if (Val->getType()->getPrimitiveSizeInBits() == 64) {
11201276479Sdim    Intrinsic::ID Int =
11202276479Sdim        IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
11203276479Sdim    Function *Strex = Intrinsic::getDeclaration(M, Int);
11204276479Sdim    Type *Int32Ty = Type::getInt32Ty(M->getContext());
11205276479Sdim
11206276479Sdim    Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
11207276479Sdim    Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
11208276479Sdim    if (!Subtarget->isLittle())
11209276479Sdim      std::swap (Lo, Hi);
11210276479Sdim    Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
11211276479Sdim    return Builder.CreateCall3(Strex, Lo, Hi, Addr);
11212276479Sdim  }
11213276479Sdim
11214276479Sdim  Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
11215276479Sdim  Type *Tys[] = { Addr->getType() };
11216276479Sdim  Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
11217276479Sdim
11218276479Sdim  return Builder.CreateCall2(
11219276479Sdim      Strex, Builder.CreateZExtOrBitCast(
11220276479Sdim                 Val, Strex->getFunctionType()->getParamType(0)),
11221276479Sdim      Addr);
11222276479Sdim}
11223276479Sdim
11224276479Sdimenum HABaseType {
11225276479Sdim  HA_UNKNOWN = 0,
11226276479Sdim  HA_FLOAT,
11227276479Sdim  HA_DOUBLE,
11228276479Sdim  HA_VECT64,
11229276479Sdim  HA_VECT128
11230276479Sdim};
11231276479Sdim
11232276479Sdimstatic bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
11233276479Sdim                                   uint64_t &Members) {
11234276479Sdim  if (const StructType *ST = dyn_cast<StructType>(Ty)) {
11235276479Sdim    for (unsigned i = 0; i < ST->getNumElements(); ++i) {
11236276479Sdim      uint64_t SubMembers = 0;
11237276479Sdim      if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
11238276479Sdim        return false;
11239276479Sdim      Members += SubMembers;
11240276479Sdim    }
11241276479Sdim  } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
11242276479Sdim    uint64_t SubMembers = 0;
11243276479Sdim    if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
11244276479Sdim      return false;
11245276479Sdim    Members += SubMembers * AT->getNumElements();
11246276479Sdim  } else if (Ty->isFloatTy()) {
11247276479Sdim    if (Base != HA_UNKNOWN && Base != HA_FLOAT)
11248276479Sdim      return false;
11249276479Sdim    Members = 1;
11250276479Sdim    Base = HA_FLOAT;
11251276479Sdim  } else if (Ty->isDoubleTy()) {
11252276479Sdim    if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
11253276479Sdim      return false;
11254276479Sdim    Members = 1;
11255276479Sdim    Base = HA_DOUBLE;
11256276479Sdim  } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
11257276479Sdim    Members = 1;
11258276479Sdim    switch (Base) {
11259276479Sdim    case HA_FLOAT:
11260276479Sdim    case HA_DOUBLE:
11261276479Sdim      return false;
11262276479Sdim    case HA_VECT64:
11263276479Sdim      return VT->getBitWidth() == 64;
11264276479Sdim    case HA_VECT128:
11265276479Sdim      return VT->getBitWidth() == 128;
11266276479Sdim    case HA_UNKNOWN:
11267276479Sdim      switch (VT->getBitWidth()) {
11268276479Sdim      case 64:
11269276479Sdim        Base = HA_VECT64;
11270276479Sdim        return true;
11271276479Sdim      case 128:
11272276479Sdim        Base = HA_VECT128;
11273276479Sdim        return true;
11274276479Sdim      default:
11275276479Sdim        return false;
11276276479Sdim      }
11277276479Sdim    }
11278276479Sdim  }
11279276479Sdim
11280276479Sdim  return (Members > 0 && Members <= 4);
11281276479Sdim}
11282276479Sdim
11283280400Sdim/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
11284280400Sdim/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
11285280400Sdim/// passing according to AAPCS rules.
11286276479Sdimbool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
11287276479Sdim    Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
11288276479Sdim  if (getEffectiveCallingConv(CallConv, isVarArg) !=
11289276479Sdim      CallingConv::ARM_AAPCS_VFP)
11290276479Sdim    return false;
11291276479Sdim
11292276479Sdim  HABaseType Base = HA_UNKNOWN;
11293276479Sdim  uint64_t Members = 0;
11294280400Sdim  bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
11295280400Sdim  DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
11296280400Sdim
11297280400Sdim  bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
11298280400Sdim  return IsHA || IsIntArray;
11299276479Sdim}
11300