1//===- ARMISelLowering.h - ARM DAG Lowering Interface -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
15#define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
16
17#include "MCTargetDesc/ARMBaseInfo.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/ISDOpcodes.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineValueType.h"
24#include "llvm/CodeGen/SelectionDAGNodes.h"
25#include "llvm/CodeGen/TargetLowering.h"
26#include "llvm/CodeGen/ValueTypes.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/CallingConv.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InlineAsm.h"
32#include "llvm/Support/CodeGen.h"
33#include <optional>
34#include <utility>
35
36namespace llvm {
37
38class ARMSubtarget;
39class DataLayout;
40class FastISel;
41class FunctionLoweringInfo;
42class GlobalValue;
43class InstrItineraryData;
44class Instruction;
45class MachineBasicBlock;
46class MachineInstr;
47class SelectionDAG;
48class TargetLibraryInfo;
49class TargetMachine;
50class TargetRegisterInfo;
51class VectorType;
52
53  namespace ARMISD {
54
55  // ARM Specific DAG Nodes
56  enum NodeType : unsigned {
57    // Start the numbering where the builtin ops and target ops leave off.
58    FIRST_NUMBER = ISD::BUILTIN_OP_END,
59
60    Wrapper,    // Wrapper - A wrapper node for TargetConstantPool,
61                // TargetExternalSymbol, and TargetGlobalAddress.
62    WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
63                // PIC mode.
64    WrapperJT,  // WrapperJT - A wrapper node for TargetJumpTable
65
66    // Add pseudo op to model memcpy for struct byval.
67    COPY_STRUCT_BYVAL,
68
69    CALL,        // Function call.
70    CALL_PRED,   // Function call that's predicable.
71    CALL_NOLINK, // Function call with branch not branch-and-link.
72    tSECALL,     // CMSE non-secure function call.
73    t2CALL_BTI,  // Thumb function call followed by BTI instruction.
74    BRCOND,      // Conditional branch.
75    BR_JT,       // Jumptable branch.
76    BR2_JT,      // Jumptable branch (2 level - jumptable entry is a jump).
77    RET_GLUE,    // Return with a flag operand.
78    SERET_GLUE,  // CMSE Entry function return with a flag operand.
79    INTRET_GLUE, // Interrupt return with an LR-offset and a flag operand.
80
81    PIC_ADD, // Add with a PC operand and a PIC label.
82
83    ASRL, // MVE long arithmetic shift right.
84    LSRL, // MVE long shift right.
85    LSLL, // MVE long shift left.
86
87    CMP,      // ARM compare instructions.
88    CMN,      // ARM CMN instructions.
89    CMPZ,     // ARM compare that sets only Z flag.
90    CMPFP,    // ARM VFP compare instruction, sets FPSCR.
91    CMPFPE,   // ARM VFP signalling compare instruction, sets FPSCR.
92    CMPFPw0,  // ARM VFP compare against zero instruction, sets FPSCR.
93    CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets
94              // FPSCR.
95    FMSTAT,   // ARM fmstat instruction.
96
97    CMOV, // ARM conditional move instructions.
98    SUBS, // Flag-setting subtraction.
99
100    SSAT, // Signed saturation
101    USAT, // Unsigned saturation
102
103    BCC_i64,
104
105    SRL_GLUE, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
106    SRA_GLUE, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
107    RRX,      // V = RRX X, Flag     -> srl X, 1 + shift in carry flag.
108
109    ADDC, // Add with carry
110    ADDE, // Add using carry
111    SUBC, // Sub with carry
112    SUBE, // Sub using carry
113    LSLS, // Shift left producing carry
114
115    VMOVRRD, // double to two gprs.
116    VMOVDRR, // Two gprs to double.
117    VMOVSR,  // move gpr to single, used for f32 literal constructed in a gpr
118
119    EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
120    EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
121    EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
122
123    TC_RETURN, // Tail call return pseudo.
124
125    THREAD_POINTER,
126
127    DYN_ALLOC, // Dynamic allocation on the stack.
128
129    MEMBARRIER_MCR, // Memory barrier (MCR)
130
131    PRELOAD, // Preload
132
133    WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
134    WIN__DBZCHK, // Windows' divide by zero check
135
136    WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart
137    WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup.
138    LOOP_DEC, // Really a part of LE, performs the sub
139    LE,       // Low-overhead loops, Loop End
140
141    PREDICATE_CAST,  // Predicate cast for MVE i1 types
142    VECTOR_REG_CAST, // Reinterpret the current contents of a vector register
143
144    MVESEXT,  // Legalization aids for extending a vector into two/four vectors.
145    MVEZEXT,  //  or truncating two/four vectors into one. Eventually becomes
146    MVETRUNC, //  stack store/load sequence, if not optimized to anything else.
147
148    VCMP,  // Vector compare.
149    VCMPZ, // Vector compare to zero.
150    VTST,  // Vector test bits.
151
152    // Vector shift by vector
153    VSHLs, // ...left/right by signed
154    VSHLu, // ...left/right by unsigned
155
156    // Vector shift by immediate:
157    VSHLIMM,  // ...left
158    VSHRsIMM, // ...right (signed)
159    VSHRuIMM, // ...right (unsigned)
160
161    // Vector rounding shift by immediate:
162    VRSHRsIMM, // ...right (signed)
163    VRSHRuIMM, // ...right (unsigned)
164    VRSHRNIMM, // ...right narrow
165
166    // Vector saturating shift by immediate:
167    VQSHLsIMM,   // ...left (signed)
168    VQSHLuIMM,   // ...left (unsigned)
169    VQSHLsuIMM,  // ...left (signed to unsigned)
170    VQSHRNsIMM,  // ...right narrow (signed)
171    VQSHRNuIMM,  // ...right narrow (unsigned)
172    VQSHRNsuIMM, // ...right narrow (signed to unsigned)
173
174    // Vector saturating rounding shift by immediate:
175    VQRSHRNsIMM,  // ...right narrow (signed)
176    VQRSHRNuIMM,  // ...right narrow (unsigned)
177    VQRSHRNsuIMM, // ...right narrow (signed to unsigned)
178
179    // Vector shift and insert:
180    VSLIIMM, // ...left
181    VSRIIMM, // ...right
182
183    // Vector get lane (VMOV scalar to ARM core register)
184    // (These are used for 8- and 16-bit element types only.)
185    VGETLANEu, // zero-extend vector extract element
186    VGETLANEs, // sign-extend vector extract element
187
188    // Vector move immediate and move negated immediate:
189    VMOVIMM,
190    VMVNIMM,
191
192    // Vector move f32 immediate:
193    VMOVFPIMM,
194
195    // Move H <-> R, clearing top 16 bits
196    VMOVrh,
197    VMOVhr,
198
199    // Vector duplicate:
200    VDUP,
201    VDUPLANE,
202
203    // Vector shuffles:
204    VEXT,   // extract
205    VREV64, // reverse elements within 64-bit doublewords
206    VREV32, // reverse elements within 32-bit words
207    VREV16, // reverse elements within 16-bit halfwords
208    VZIP,   // zip (interleave)
209    VUZP,   // unzip (deinterleave)
210    VTRN,   // transpose
211    VTBL1,  // 1-register shuffle with mask
212    VTBL2,  // 2-register shuffle with mask
213    VMOVN,  // MVE vmovn
214
215    // MVE Saturating truncates
216    VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s)
217    VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u)
218
219    // MVE float <> half converts
220    VCVTN, // MVE vcvt f32 -> f16, truncating into either the bottom or top
221           // lanes
222    VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes
223
224    // MVE VIDUP instruction, taking a start value and increment.
225    VIDUP,
226
227    // Vector multiply long:
228    VMULLs, // ...signed
229    VMULLu, // ...unsigned
230
231    VQDMULH, // MVE vqdmulh instruction
232
233    // MVE reductions
234    VADDVs,  // sign- or zero-extend the elements of a vector to i32,
235    VADDVu,  //   add them all together, and return an i32 of their sum
236    VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask
237    VADDVpu,
238    VADDLVs,  // sign- or zero-extend elements to i64 and sum, returning
239    VADDLVu,  //   the low and high 32-bit halves of the sum
240    VADDLVAs, // Same as VADDLV[su] but also add an input accumulator
241    VADDLVAu, //   provided as low and high halves
242    VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask
243    VADDLVpu,
244    VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask
245    VADDLVApu,
246    VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply
247    VMLAVu, //   them and add the results together, returning an i32 of their sum
248    VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask
249    VMLAVpu,
250    VMLALVs,  // Same as VMLAV but with i64, returning the low and
251    VMLALVu,  //   high 32-bit halves of the sum
252    VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask
253    VMLALVpu,
254    VMLALVAs,  // Same as VMLALV but also add an input accumulator
255    VMLALVAu,  //   provided as low and high halves
256    VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask
257    VMLALVApu,
258    VMINVu, // Find minimum unsigned value of a vector and register
259    VMINVs, // Find minimum signed value of a vector and register
260    VMAXVu, // Find maximum unsigned value of a vector and register
261    VMAXVs, // Find maximum signed value of a vector and register
262
263    SMULWB,  // Signed multiply word by half word, bottom
264    SMULWT,  // Signed multiply word by half word, top
265    UMLAL,   // 64bit Unsigned Accumulate Multiply
266    SMLAL,   // 64bit Signed Accumulate Multiply
267    UMAAL,   // 64-bit Unsigned Accumulate Accumulate Multiply
268    SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
269    SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
270    SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
271    SMLALTT, // 64-bit signed accumulate multiply top, top 16
272    SMLALD,  // Signed multiply accumulate long dual
273    SMLALDX, // Signed multiply accumulate long dual exchange
274    SMLSLD,  // Signed multiply subtract long dual
275    SMLSLDX, // Signed multiply subtract long dual exchange
276    SMMLAR,  // Signed multiply long, round and add
277    SMMLSR,  // Signed multiply long, subtract and round
278
279    // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b
280    // stands for.
281    QADD8b,
282    QSUB8b,
283    QADD16b,
284    QSUB16b,
285    UQADD8b,
286    UQSUB8b,
287    UQADD16b,
288    UQSUB16b,
289
290    // Operands of the standard BUILD_VECTOR node are not legalized, which
291    // is fine if BUILD_VECTORs are always lowered to shuffles or other
292    // operations, but for ARM some BUILD_VECTORs are legal as-is and their
293    // operands need to be legalized.  Define an ARM-specific version of
294    // BUILD_VECTOR for this purpose.
295    BUILD_VECTOR,
296
297    // Bit-field insert
298    BFI,
299
300    // Vector OR with immediate
301    VORRIMM,
302    // Vector AND with NOT of immediate
303    VBICIMM,
304
305    // Pseudo vector bitwise select
306    VBSP,
307
308    // Pseudo-instruction representing a memory copy using ldm/stm
309    // instructions.
310    MEMCPY,
311
312    // Pseudo-instruction representing a memory copy using a tail predicated
313    // loop
314    MEMCPYLOOP,
315    // Pseudo-instruction representing a memset using a tail predicated
316    // loop
317    MEMSETLOOP,
318
319    // V8.1MMainline condition select
320    CSINV, // Conditional select invert.
321    CSNEG, // Conditional select negate.
322    CSINC, // Conditional select increment.
323
324    // Vector load N-element structure to all lanes:
325    VLD1DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
326    VLD2DUP,
327    VLD3DUP,
328    VLD4DUP,
329
330    // NEON loads with post-increment base updates:
331    VLD1_UPD,
332    VLD2_UPD,
333    VLD3_UPD,
334    VLD4_UPD,
335    VLD2LN_UPD,
336    VLD3LN_UPD,
337    VLD4LN_UPD,
338    VLD1DUP_UPD,
339    VLD2DUP_UPD,
340    VLD3DUP_UPD,
341    VLD4DUP_UPD,
342    VLD1x2_UPD,
343    VLD1x3_UPD,
344    VLD1x4_UPD,
345
346    // NEON stores with post-increment base updates:
347    VST1_UPD,
348    VST2_UPD,
349    VST3_UPD,
350    VST4_UPD,
351    VST2LN_UPD,
352    VST3LN_UPD,
353    VST4LN_UPD,
354    VST1x2_UPD,
355    VST1x3_UPD,
356    VST1x4_UPD,
357
358    // Load/Store of dual registers
359    LDRD,
360    STRD
361  };
362
363  } // end namespace ARMISD
364
365  namespace ARM {
366  /// Possible values of current rounding mode, which is specified in bits
367  /// 23:22 of FPSCR.
368  enum Rounding {
369    RN = 0,    // Round to Nearest
370    RP = 1,    // Round towards Plus infinity
371    RM = 2,    // Round towards Minus infinity
372    RZ = 3,    // Round towards Zero
373    rmMask = 3 // Bit mask selecting rounding mode
374  };
375
376  // Bit position of rounding mode bits in FPSCR.
377  const unsigned RoundingBitsPos = 22;
378
379  // Bits of floating-point status. These are NZCV flags, QC bit and cumulative
380  // FP exception bits.
381  const unsigned FPStatusBits = 0xf800009f;
382
383  // Some bits in the FPSCR are not yet defined.  They must be preserved when
384  // modifying the contents.
385  const unsigned FPReservedBits = 0x00006060;
386  } // namespace ARM
387
388  /// Define some predicates that are used for node matching.
389  namespace ARM {
390
391    bool isBitFieldInvertedMask(unsigned v);
392
393  } // end namespace ARM
394
395  //===--------------------------------------------------------------------===//
396  //  ARMTargetLowering - ARM Implementation of the TargetLowering interface
397
398  class ARMTargetLowering : public TargetLowering {
399  public:
400    explicit ARMTargetLowering(const TargetMachine &TM,
401                               const ARMSubtarget &STI);
402
403    unsigned getJumpTableEncoding() const override;
404    bool useSoftFloat() const override;
405
406    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
407
408    /// ReplaceNodeResults - Replace the results of node with an illegal result
409    /// type with new values built out of custom code.
410    void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
411                            SelectionDAG &DAG) const override;
412
413    const char *getTargetNodeName(unsigned Opcode) const override;
414
415    bool isSelectSupported(SelectSupportKind Kind) const override {
416      // ARM does not support scalar condition selects on vectors.
417      return (Kind != ScalarCondVectorVal);
418    }
419
420    bool isReadOnly(const GlobalValue *GV) const;
421
422    /// getSetCCResultType - Return the value type to use for ISD::SETCC.
423    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
424                           EVT VT) const override;
425
426    MachineBasicBlock *
427    EmitInstrWithCustomInserter(MachineInstr &MI,
428                                MachineBasicBlock *MBB) const override;
429
430    void AdjustInstrPostInstrSelection(MachineInstr &MI,
431                                       SDNode *Node) const override;
432
433    SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
434    SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const;
435    SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
436    SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const;
437    SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
438    SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const;
439    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
440
441    bool SimplifyDemandedBitsForTargetNode(SDValue Op,
442                                           const APInt &OriginalDemandedBits,
443                                           const APInt &OriginalDemandedElts,
444                                           KnownBits &Known,
445                                           TargetLoweringOpt &TLO,
446                                           unsigned Depth) const override;
447
448    bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
449
450    /// allowsMisalignedMemoryAccesses - Returns true if the target allows
451    /// unaligned memory accesses of the specified type. Returns whether it
452    /// is "fast" by reference in the second argument.
453    bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
454                                        Align Alignment,
455                                        MachineMemOperand::Flags Flags,
456                                        unsigned *Fast) const override;
457
458    EVT getOptimalMemOpType(const MemOp &Op,
459                            const AttributeList &FuncAttributes) const override;
460
461    bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
462    bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
463    bool isZExtFree(SDValue Val, EVT VT2) const override;
464    bool shouldSinkOperands(Instruction *I,
465                            SmallVectorImpl<Use *> &Ops) const override;
466    Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const override;
467
468    bool isFNegFree(EVT VT) const override;
469
470    bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
471
472    bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
473
474
475    /// isLegalAddressingMode - Return true if the addressing mode represented
476    /// by AM is legal for this target, for a load/store of the specified type.
477    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
478                               Type *Ty, unsigned AS,
479                               Instruction *I = nullptr) const override;
480
481    bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
482
483    /// Returns true if the addressing mode representing by AM is legal
484    /// for the Thumb1 target, for a load/store of the specified type.
485    bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
486
487    /// isLegalICmpImmediate - Return true if the specified immediate is legal
488    /// icmp immediate, that is the target has icmp instructions which can
489    /// compare a register against the immediate without having to materialize
490    /// the immediate into a register.
491    bool isLegalICmpImmediate(int64_t Imm) const override;
492
493    /// isLegalAddImmediate - Return true if the specified immediate is legal
494    /// add immediate, that is the target has add instructions which can
495    /// add a register and the immediate without having to materialize
496    /// the immediate into a register.
497    bool isLegalAddImmediate(int64_t Imm) const override;
498
499    /// getPreIndexedAddressParts - returns true by value, base pointer and
500    /// offset pointer and addressing mode by reference if the node's address
501    /// can be legally represented as pre-indexed load / store address.
502    bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
503                                   ISD::MemIndexedMode &AM,
504                                   SelectionDAG &DAG) const override;
505
506    /// getPostIndexedAddressParts - returns true by value, base pointer and
507    /// offset pointer and addressing mode by reference if this node can be
508    /// combined with a load / store to form a post-indexed load / store.
509    bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
510                                    SDValue &Offset, ISD::MemIndexedMode &AM,
511                                    SelectionDAG &DAG) const override;
512
513    void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
514                                       const APInt &DemandedElts,
515                                       const SelectionDAG &DAG,
516                                       unsigned Depth) const override;
517
518    bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
519                                      const APInt &DemandedElts,
520                                      TargetLoweringOpt &TLO) const override;
521
522    bool ExpandInlineAsm(CallInst *CI) const override;
523
524    ConstraintType getConstraintType(StringRef Constraint) const override;
525
526    /// Examine constraint string and operand type and determine a weight value.
527    /// The operand object must already have been set up with the operand type.
528    ConstraintWeight getSingleConstraintMatchWeight(
529      AsmOperandInfo &info, const char *constraint) const override;
530
531    std::pair<unsigned, const TargetRegisterClass *>
532    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
533                                 StringRef Constraint, MVT VT) const override;
534
535    const char *LowerXConstraint(EVT ConstraintVT) const override;
536
537    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
538    /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
539    /// true it means one of the asm constraint of the inline asm instruction
540    /// being processed is 'm'.
541    void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
542                                      std::vector<SDValue> &Ops,
543                                      SelectionDAG &DAG) const override;
544
545    InlineAsm::ConstraintCode
546    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
547      if (ConstraintCode == "Q")
548        return InlineAsm::ConstraintCode::Q;
549      if (ConstraintCode.size() == 2) {
550        if (ConstraintCode[0] == 'U') {
551          switch(ConstraintCode[1]) {
552          default:
553            break;
554          case 'm':
555            return InlineAsm::ConstraintCode::Um;
556          case 'n':
557            return InlineAsm::ConstraintCode::Un;
558          case 'q':
559            return InlineAsm::ConstraintCode::Uq;
560          case 's':
561            return InlineAsm::ConstraintCode::Us;
562          case 't':
563            return InlineAsm::ConstraintCode::Ut;
564          case 'v':
565            return InlineAsm::ConstraintCode::Uv;
566          case 'y':
567            return InlineAsm::ConstraintCode::Uy;
568          }
569        }
570      }
571      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
572    }
573
574    const ARMSubtarget* getSubtarget() const {
575      return Subtarget;
576    }
577
578    /// getRegClassFor - Return the register class that should be used for the
579    /// specified value type.
580    const TargetRegisterClass *
581    getRegClassFor(MVT VT, bool isDivergent = false) const override;
582
583    bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
584                                Align &PrefAlign) const override;
585
586    /// createFastISel - This method returns a target specific FastISel object,
587    /// or null if the target does not support "fast" ISel.
588    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
589                             const TargetLibraryInfo *libInfo) const override;
590
591    Sched::Preference getSchedulingPreference(SDNode *N) const override;
592
593    bool preferZeroCompareBranch() const override { return true; }
594
595    bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
596
597    bool
598    isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
599    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
600
601    /// isFPImmLegal - Returns true if the target can instruction select the
602    /// specified FP immediate natively. If false, the legalizer will
603    /// materialize the FP immediate as a load from a constant pool.
604    bool isFPImmLegal(const APFloat &Imm, EVT VT,
605                      bool ForCodeSize = false) const override;
606
607    bool getTgtMemIntrinsic(IntrinsicInfo &Info,
608                            const CallInst &I,
609                            MachineFunction &MF,
610                            unsigned Intrinsic) const override;
611
612    /// Returns true if it is beneficial to convert a load of a constant
613    /// to just the constant itself.
614    bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
615                                           Type *Ty) const override;
616
617    /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
618    /// with this index.
619    bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
620                                 unsigned Index) const override;
621
622    bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
623                              bool MathUsed) const override {
624      // Using overflow ops for overflow checks only should beneficial on ARM.
625      return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
626    }
627
628    bool shouldReassociateReduction(unsigned Opc, EVT VT) const override {
629      return Opc != ISD::VECREDUCE_ADD;
630    }
631
632    /// Returns true if an argument of type Ty needs to be passed in a
633    /// contiguous block of registers in calling convention CallConv.
634    bool functionArgumentNeedsConsecutiveRegisters(
635        Type *Ty, CallingConv::ID CallConv, bool isVarArg,
636        const DataLayout &DL) const override;
637
638    /// If a physical register, this returns the register that receives the
639    /// exception address on entry to an EH pad.
640    Register
641    getExceptionPointerRegister(const Constant *PersonalityFn) const override;
642
643    /// If a physical register, this returns the register that receives the
644    /// exception typeid on entry to a landing pad.
645    Register
646    getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
647
648    Instruction *makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const;
649    Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
650                          AtomicOrdering Ord) const override;
651    Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
652                                AtomicOrdering Ord) const override;
653
654    void
655    emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
656
657    Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
658                                  AtomicOrdering Ord) const override;
659    Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
660                                   AtomicOrdering Ord) const override;
661
662    unsigned getMaxSupportedInterleaveFactor() const override;
663
664    bool lowerInterleavedLoad(LoadInst *LI,
665                              ArrayRef<ShuffleVectorInst *> Shuffles,
666                              ArrayRef<unsigned> Indices,
667                              unsigned Factor) const override;
668    bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
669                               unsigned Factor) const override;
670
671    bool shouldInsertFencesForAtomic(const Instruction *I) const override;
672    TargetLoweringBase::AtomicExpansionKind
673    shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
674    TargetLoweringBase::AtomicExpansionKind
675    shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
676    TargetLoweringBase::AtomicExpansionKind
677    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
678    TargetLoweringBase::AtomicExpansionKind
679    shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
680
681    bool useLoadStackGuardNode() const override;
682
683    void insertSSPDeclarations(Module &M) const override;
684    Value *getSDagStackGuard(const Module &M) const override;
685    Function *getSSPStackGuardCheck(const Module &M) const override;
686
687    bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
688                                   unsigned &Cost) const override;
689
690    bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
691                          const MachineFunction &MF) const override {
692      // Do not merge to larger than i32.
693      return (MemVT.getSizeInBits() <= 32);
694    }
695
696    bool isCheapToSpeculateCttz(Type *Ty) const override;
697    bool isCheapToSpeculateCtlz(Type *Ty) const override;
698
699    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
700      return VT.isScalarInteger();
701    }
702
703    bool supportSwiftError() const override {
704      return true;
705    }
706
707    bool hasStandaloneRem(EVT VT) const override {
708      return HasStandaloneRem;
709    }
710
711    ShiftLegalizationStrategy
712    preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
713                                       unsigned ExpansionFactor) const override;
714
715    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
716    CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
717
718    /// Returns true if \p VecTy is a legal interleaved access type. This
719    /// function checks the vector element type and the overall width of the
720    /// vector.
721    bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy,
722                                      Align Alignment,
723                                      const DataLayout &DL) const;
724
725    bool isMulAddWithConstProfitable(SDValue AddNode,
726                                     SDValue ConstNode) const override;
727
728    bool alignLoopsWithOptSize() const override;
729
730    /// Returns the number of interleaved accesses that will be generated when
731    /// lowering accesses of the given type.
732    unsigned getNumInterleavedAccesses(VectorType *VecTy,
733                                       const DataLayout &DL) const;
734
735    void finalizeLowering(MachineFunction &MF) const override;
736
737    /// Return the correct alignment for the current calling convention.
738    Align getABIAlignmentForCallingConv(Type *ArgTy,
739                                        const DataLayout &DL) const override;
740
741    bool isDesirableToCommuteWithShift(const SDNode *N,
742                                       CombineLevel Level) const override;
743
744    bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
745
746    bool shouldFoldConstantShiftPairToMask(const SDNode *N,
747                                           CombineLevel Level) const override;
748
749    bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
750                                              EVT VT) const override;
751
752    bool preferIncOfAddToSubOfNot(EVT VT) const override;
753
754    bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
755
756    bool isComplexDeinterleavingSupported() const override;
757    bool isComplexDeinterleavingOperationSupported(
758        ComplexDeinterleavingOperation Operation, Type *Ty) const override;
759
760    Value *createComplexDeinterleavingIR(
761        IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
762        ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
763        Value *Accumulator = nullptr) const override;
764
765  protected:
766    std::pair<const TargetRegisterClass *, uint8_t>
767    findRepresentativeClass(const TargetRegisterInfo *TRI,
768                            MVT VT) const override;
769
770  private:
771    /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
772    /// make the right decision when generating code for different targets.
773    const ARMSubtarget *Subtarget;
774
775    const TargetRegisterInfo *RegInfo;
776
777    const InstrItineraryData *Itins;
778
779    // TODO: remove this, and have shouldInsertFencesForAtomic do the proper
780    // check.
781    bool InsertFencesForAtomic;
782
783    bool HasStandaloneRem = true;
784
785    void addTypeForNEON(MVT VT, MVT PromotedLdStVT);
786    void addDRTypeForNEON(MVT VT);
787    void addQRTypeForNEON(MVT VT);
788    std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const;
789
790    using RegsToPassVector = SmallVector<std::pair<unsigned, SDValue>, 8>;
791
792    void PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain,
793                          SDValue &Arg, RegsToPassVector &RegsToPass,
794                          CCValAssign &VA, CCValAssign &NextVA,
795                          SDValue &StackPtr,
796                          SmallVectorImpl<SDValue> &MemOpChains,
797                          bool IsTailCall,
798                          int SPDiff) const;
799    SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
800                                 SDValue &Root, SelectionDAG &DAG,
801                                 const SDLoc &dl) const;
802
803    CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC,
804                                            bool isVarArg) const;
805    CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
806                                  bool isVarArg) const;
807    std::pair<SDValue, MachinePointerInfo>
808    computeAddrForCallArg(const SDLoc &dl, SelectionDAG &DAG,
809                          const CCValAssign &VA, SDValue StackPtr,
810                          bool IsTailCall, int SPDiff) const;
811    SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
812    SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
813    SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
814    SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG,
815                                    const ARMSubtarget *Subtarget) const;
816    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
817                                    const ARMSubtarget *Subtarget) const;
818    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
819    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
820    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
821    SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
822    SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
823    SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const;
824    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
825    SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
826                                            SelectionDAG &DAG) const;
827    SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
828                                 SelectionDAG &DAG,
829                                 TLSModel::Model model) const;
830    SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
831    SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
832    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
833    SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
834    SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
835    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
836    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
837    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
838    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
839    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
840    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
841    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
842    SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
843    SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
844    SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
845    SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
846    SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
847    SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
848    SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
849                            const ARMSubtarget *ST) const;
850    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
851                              const ARMSubtarget *ST) const;
852    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
853    SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
854    SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
855    SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
856    void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
857                           SmallVectorImpl<SDValue> &Results) const;
858    SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
859                          const ARMSubtarget *Subtarget) const;
860    SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed,
861                                   SDValue &Chain) const;
862    SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const;
863    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
864    SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
865    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
866    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
867    SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
868    SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const;
869    SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
870    void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
871                   SelectionDAG &DAG) const;
872
873    Register getRegisterByName(const char* RegName, LLT VT,
874                               const MachineFunction &MF) const override;
875
876    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
877                          SmallVectorImpl<SDNode *> &Created) const override;
878
879    bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
880                                    EVT VT) const override;
881
882    SDValue MoveToHPR(const SDLoc &dl, SelectionDAG &DAG, MVT LocVT, MVT ValVT,
883                      SDValue Val) const;
884    SDValue MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG, MVT LocVT,
885                        MVT ValVT, SDValue Val) const;
886
887    SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
888
889    SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
890                            CallingConv::ID CallConv, bool isVarArg,
891                            const SmallVectorImpl<ISD::InputArg> &Ins,
892                            const SDLoc &dl, SelectionDAG &DAG,
893                            SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
894                            SDValue ThisVal) const;
895
896    bool supportSplitCSR(MachineFunction *MF) const override {
897      return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
898          MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
899    }
900
901    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
902    void insertCopiesSplitCSR(
903      MachineBasicBlock *Entry,
904      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
905
906    bool splitValueIntoRegisterParts(
907        SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
908        unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
909        const override;
910
911    SDValue joinRegisterPartsIntoValue(
912        SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
913        unsigned NumParts, MVT PartVT, EVT ValueVT,
914        std::optional<CallingConv::ID> CC) const override;
915
916    SDValue
917    LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
918                         const SmallVectorImpl<ISD::InputArg> &Ins,
919                         const SDLoc &dl, SelectionDAG &DAG,
920                         SmallVectorImpl<SDValue> &InVals) const override;
921
922    int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl,
923                       SDValue &Chain, const Value *OrigArg,
924                       unsigned InRegsParamRecordIdx, int ArgOffset,
925                       unsigned ArgSize) const;
926
927    void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
928                              const SDLoc &dl, SDValue &Chain,
929                              unsigned ArgOffset, unsigned TotalArgRegsSaveSize,
930                              bool ForceMutable = false) const;
931
932    SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
933                      SmallVectorImpl<SDValue> &InVals) const override;
934
935    /// HandleByVal - Target-specific cleanup for ByVal support.
936    void HandleByVal(CCState *, unsigned &, Align) const override;
937
938    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
939    /// for tail call optimization. Targets which want to do tail call
940    /// optimization should implement this function.
941    bool IsEligibleForTailCallOptimization(
942        SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
943        bool isCalleeStructRet, bool isCallerStructRet,
944        const SmallVectorImpl<ISD::OutputArg> &Outs,
945        const SmallVectorImpl<SDValue> &OutVals,
946        const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
947        const bool isIndirect) const;
948
949    bool CanLowerReturn(CallingConv::ID CallConv,
950                        MachineFunction &MF, bool isVarArg,
951                        const SmallVectorImpl<ISD::OutputArg> &Outs,
952                        LLVMContext &Context) const override;
953
954    SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
955                        const SmallVectorImpl<ISD::OutputArg> &Outs,
956                        const SmallVectorImpl<SDValue> &OutVals,
957                        const SDLoc &dl, SelectionDAG &DAG) const override;
958
959    bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
960
961    bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
962
963    bool shouldConsiderGEPOffsetSplit() const override { return true; }
964
965    bool isUnsupportedFloatingType(EVT VT) const;
966
967    SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
968                    SDValue ARMcc, SDValue CCR, SDValue Cmp,
969                    SelectionDAG &DAG) const;
970    SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
971                      SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
972    SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
973                      const SDLoc &dl, bool Signaling = false) const;
974    SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
975
976    SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
977
978    void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
979                                MachineBasicBlock *DispatchBB, int FI) const;
980
981    void EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const;
982
983    MachineBasicBlock *EmitStructByval(MachineInstr &MI,
984                                       MachineBasicBlock *MBB) const;
985
986    MachineBasicBlock *EmitLowered__chkstk(MachineInstr &MI,
987                                           MachineBasicBlock *MBB) const;
988    MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI,
989                                           MachineBasicBlock *MBB) const;
990    void addMVEVectorTypes(bool HasMVEFP);
991    void addAllExtLoads(const MVT From, const MVT To, LegalizeAction Action);
992    void setAllExpand(MVT VT);
993  };
994
995  enum VMOVModImmType {
996    VMOVModImm,
997    VMVNModImm,
998    MVEVMVNModImm,
999    OtherModImm
1000  };
1001
1002  namespace ARM {
1003
1004    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1005                             const TargetLibraryInfo *libInfo);
1006
1007  } // end namespace ARM
1008
1009} // end namespace llvm
1010
1011#endif // LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
1012