1//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that AArch64 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17#include "AArch64.h"
18#include "Utils/AArch64SMEAttributes.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22#include "llvm/CodeGen/TargetLowering.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Instruction.h"
25
26namespace llvm {
27
28namespace AArch64ISD {
29
30// For predicated nodes where the result is a vector, the operation is
31// controlled by a governing predicate and the inactive lanes are explicitly
32// defined with a value, please stick the following naming convention:
33//
34//    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
35//                        to source operand OP<n>.
36//
37//    _MERGE_ZERO         The result value is a vector with inactive lanes
38//                        actively zeroed.
39//
40//    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
41//                        to the last source operand which only purpose is being
42//                        a passthru value.
43//
44// For other cases where no explicit action is needed to set the inactive lanes,
45// or when the result is not a vector and it is needed or helpful to
46// distinguish a node from similar unpredicated nodes, use:
47//
48//    _PRED
49//
50enum NodeType : unsigned {
51  FIRST_NUMBER = ISD::BUILTIN_OP_END,
52  WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53  CALL,         // Function call.
54
55  // Pseudo for a OBJC call that gets emitted together with a special `mov
56  // x29, x29` marker instruction.
57  CALL_RVMARKER,
58
59  CALL_BTI, // Function call followed by a BTI instruction.
60
61  COALESCER_BARRIER,
62
63  SMSTART,
64  SMSTOP,
65  RESTORE_ZA,
66  RESTORE_ZT,
67  SAVE_ZT,
68
69  // A call with the callee in x16, i.e. "blr x16".
70  CALL_ARM64EC_TO_X64,
71
72  // Produces the full sequence of instructions for getting the thread pointer
73  // offset of a variable into X0, using the TLSDesc model.
74  TLSDESC_CALLSEQ,
75  ADRP,     // Page address of a TargetGlobalAddress operand.
76  ADR,      // ADR
77  ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
78  LOADgot,  // Load from automatically generated descriptor (e.g. Global
79            // Offset Table, TLS record).
80  RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
81  BRCOND,   // Conditional branch instruction; "b.cond".
82  CSEL,
83  CSINV, // Conditional select invert.
84  CSNEG, // Conditional select negate.
85  CSINC, // Conditional select increment.
86
87  // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
88  // ELF.
89  THREAD_POINTER,
90  ADC,
91  SBC, // adc, sbc instructions
92
93  // To avoid stack clash, allocation is performed by block and each block is
94  // probed.
95  PROBED_ALLOCA,
96
97  // Predicated instructions where inactive lanes produce undefined results.
98  ABDS_PRED,
99  ABDU_PRED,
100  FADD_PRED,
101  FDIV_PRED,
102  FMA_PRED,
103  FMAX_PRED,
104  FMAXNM_PRED,
105  FMIN_PRED,
106  FMINNM_PRED,
107  FMUL_PRED,
108  FSUB_PRED,
109  HADDS_PRED,
110  HADDU_PRED,
111  MUL_PRED,
112  MULHS_PRED,
113  MULHU_PRED,
114  RHADDS_PRED,
115  RHADDU_PRED,
116  SDIV_PRED,
117  SHL_PRED,
118  SMAX_PRED,
119  SMIN_PRED,
120  SRA_PRED,
121  SRL_PRED,
122  UDIV_PRED,
123  UMAX_PRED,
124  UMIN_PRED,
125
126  // Unpredicated vector instructions
127  BIC,
128
129  SRAD_MERGE_OP1,
130
131  // Predicated instructions with the result of inactive lanes provided by the
132  // last operand.
133  FABS_MERGE_PASSTHRU,
134  FCEIL_MERGE_PASSTHRU,
135  FFLOOR_MERGE_PASSTHRU,
136  FNEARBYINT_MERGE_PASSTHRU,
137  FNEG_MERGE_PASSTHRU,
138  FRECPX_MERGE_PASSTHRU,
139  FRINT_MERGE_PASSTHRU,
140  FROUND_MERGE_PASSTHRU,
141  FROUNDEVEN_MERGE_PASSTHRU,
142  FSQRT_MERGE_PASSTHRU,
143  FTRUNC_MERGE_PASSTHRU,
144  FP_ROUND_MERGE_PASSTHRU,
145  FP_EXTEND_MERGE_PASSTHRU,
146  UINT_TO_FP_MERGE_PASSTHRU,
147  SINT_TO_FP_MERGE_PASSTHRU,
148  FCVTZU_MERGE_PASSTHRU,
149  FCVTZS_MERGE_PASSTHRU,
150  SIGN_EXTEND_INREG_MERGE_PASSTHRU,
151  ZERO_EXTEND_INREG_MERGE_PASSTHRU,
152  ABS_MERGE_PASSTHRU,
153  NEG_MERGE_PASSTHRU,
154
155  SETCC_MERGE_ZERO,
156
157  // Arithmetic instructions which write flags.
158  ADDS,
159  SUBS,
160  ADCS,
161  SBCS,
162  ANDS,
163
164  // Conditional compares. Operands: left,right,falsecc,cc,flags
165  CCMP,
166  CCMN,
167  FCCMP,
168
169  // Floating point comparison
170  FCMP,
171
172  // Scalar-to-vector duplication
173  DUP,
174  DUPLANE8,
175  DUPLANE16,
176  DUPLANE32,
177  DUPLANE64,
178  DUPLANE128,
179
180  // Vector immedate moves
181  MOVI,
182  MOVIshift,
183  MOVIedit,
184  MOVImsl,
185  FMOV,
186  MVNIshift,
187  MVNImsl,
188
189  // Vector immediate ops
190  BICi,
191  ORRi,
192
193  // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
194  // element must be identical.
195  BSP,
196
197  // Vector shuffles
198  ZIP1,
199  ZIP2,
200  UZP1,
201  UZP2,
202  TRN1,
203  TRN2,
204  REV16,
205  REV32,
206  REV64,
207  EXT,
208  SPLICE,
209
210  // Vector shift by scalar
211  VSHL,
212  VLSHR,
213  VASHR,
214
215  // Vector shift by scalar (again)
216  SQSHL_I,
217  UQSHL_I,
218  SQSHLU_I,
219  SRSHR_I,
220  URSHR_I,
221
222  // Vector narrowing shift by immediate (bottom)
223  RSHRNB_I,
224
225  // Vector shift by constant and insert
226  VSLI,
227  VSRI,
228
229  // Vector comparisons
230  CMEQ,
231  CMGE,
232  CMGT,
233  CMHI,
234  CMHS,
235  FCMEQ,
236  FCMGE,
237  FCMGT,
238
239  // Vector zero comparisons
240  CMEQz,
241  CMGEz,
242  CMGTz,
243  CMLEz,
244  CMLTz,
245  FCMEQz,
246  FCMGEz,
247  FCMGTz,
248  FCMLEz,
249  FCMLTz,
250
251  // Vector across-lanes addition
252  // Only the lower result lane is defined.
253  SADDV,
254  UADDV,
255
256  // Unsigned sum Long across Vector
257  UADDLV,
258  SADDLV,
259
260  // Add Pairwise of two vectors
261  ADDP,
262  // Add Long Pairwise
263  SADDLP,
264  UADDLP,
265
266  // udot/sdot instructions
267  UDOT,
268  SDOT,
269
270  // Vector across-lanes min/max
271  // Only the lower result lane is defined.
272  SMINV,
273  UMINV,
274  SMAXV,
275  UMAXV,
276
277  SADDV_PRED,
278  UADDV_PRED,
279  SMAXV_PRED,
280  UMAXV_PRED,
281  SMINV_PRED,
282  UMINV_PRED,
283  ORV_PRED,
284  EORV_PRED,
285  ANDV_PRED,
286
287  // Vector bitwise insertion
288  BIT,
289
290  // Compare-and-branch
291  CBZ,
292  CBNZ,
293  TBZ,
294  TBNZ,
295
296  // Tail calls
297  TC_RETURN,
298
299  // Custom prefetch handling
300  PREFETCH,
301
302  // {s|u}int to FP within a FP register.
303  SITOF,
304  UITOF,
305
306  /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
307  /// world w.r.t vectors; which causes additional REV instructions to be
308  /// generated to compensate for the byte-swapping. But sometimes we do
309  /// need to re-interpret the data in SIMD vector registers in big-endian
310  /// mode without emitting such REV instructions.
311  NVCAST,
312
313  MRS, // MRS, also sets the flags via a glue.
314
315  SMULL,
316  UMULL,
317
318  PMULL,
319
320  // Reciprocal estimates and steps.
321  FRECPE,
322  FRECPS,
323  FRSQRTE,
324  FRSQRTS,
325
326  SUNPKHI,
327  SUNPKLO,
328  UUNPKHI,
329  UUNPKLO,
330
331  CLASTA_N,
332  CLASTB_N,
333  LASTA,
334  LASTB,
335  TBL,
336
337  // Floating-point reductions.
338  FADDA_PRED,
339  FADDV_PRED,
340  FMAXV_PRED,
341  FMAXNMV_PRED,
342  FMINV_PRED,
343  FMINNMV_PRED,
344
345  INSR,
346  PTEST,
347  PTEST_ANY,
348  PTRUE,
349
350  CTTZ_ELTS,
351
352  BITREVERSE_MERGE_PASSTHRU,
353  BSWAP_MERGE_PASSTHRU,
354  REVH_MERGE_PASSTHRU,
355  REVW_MERGE_PASSTHRU,
356  CTLZ_MERGE_PASSTHRU,
357  CTPOP_MERGE_PASSTHRU,
358  DUP_MERGE_PASSTHRU,
359  INDEX_VECTOR,
360
361  // Cast between vectors of the same element type but differ in length.
362  REINTERPRET_CAST,
363
364  // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
365  LS64_BUILD,
366  LS64_EXTRACT,
367
368  LD1_MERGE_ZERO,
369  LD1S_MERGE_ZERO,
370  LDNF1_MERGE_ZERO,
371  LDNF1S_MERGE_ZERO,
372  LDFF1_MERGE_ZERO,
373  LDFF1S_MERGE_ZERO,
374  LD1RQ_MERGE_ZERO,
375  LD1RO_MERGE_ZERO,
376
377  // Structured loads.
378  SVE_LD2_MERGE_ZERO,
379  SVE_LD3_MERGE_ZERO,
380  SVE_LD4_MERGE_ZERO,
381
382  // Unsigned gather loads.
383  GLD1_MERGE_ZERO,
384  GLD1_SCALED_MERGE_ZERO,
385  GLD1_UXTW_MERGE_ZERO,
386  GLD1_SXTW_MERGE_ZERO,
387  GLD1_UXTW_SCALED_MERGE_ZERO,
388  GLD1_SXTW_SCALED_MERGE_ZERO,
389  GLD1_IMM_MERGE_ZERO,
390  GLD1Q_MERGE_ZERO,
391  GLD1Q_INDEX_MERGE_ZERO,
392
393  // Signed gather loads
394  GLD1S_MERGE_ZERO,
395  GLD1S_SCALED_MERGE_ZERO,
396  GLD1S_UXTW_MERGE_ZERO,
397  GLD1S_SXTW_MERGE_ZERO,
398  GLD1S_UXTW_SCALED_MERGE_ZERO,
399  GLD1S_SXTW_SCALED_MERGE_ZERO,
400  GLD1S_IMM_MERGE_ZERO,
401
402  // Unsigned gather loads.
403  GLDFF1_MERGE_ZERO,
404  GLDFF1_SCALED_MERGE_ZERO,
405  GLDFF1_UXTW_MERGE_ZERO,
406  GLDFF1_SXTW_MERGE_ZERO,
407  GLDFF1_UXTW_SCALED_MERGE_ZERO,
408  GLDFF1_SXTW_SCALED_MERGE_ZERO,
409  GLDFF1_IMM_MERGE_ZERO,
410
411  // Signed gather loads.
412  GLDFF1S_MERGE_ZERO,
413  GLDFF1S_SCALED_MERGE_ZERO,
414  GLDFF1S_UXTW_MERGE_ZERO,
415  GLDFF1S_SXTW_MERGE_ZERO,
416  GLDFF1S_UXTW_SCALED_MERGE_ZERO,
417  GLDFF1S_SXTW_SCALED_MERGE_ZERO,
418  GLDFF1S_IMM_MERGE_ZERO,
419
420  // Non-temporal gather loads
421  GLDNT1_MERGE_ZERO,
422  GLDNT1_INDEX_MERGE_ZERO,
423  GLDNT1S_MERGE_ZERO,
424
425  // Contiguous masked store.
426  ST1_PRED,
427
428  // Scatter store
429  SST1_PRED,
430  SST1_SCALED_PRED,
431  SST1_UXTW_PRED,
432  SST1_SXTW_PRED,
433  SST1_UXTW_SCALED_PRED,
434  SST1_SXTW_SCALED_PRED,
435  SST1_IMM_PRED,
436  SST1Q_PRED,
437  SST1Q_INDEX_PRED,
438
439  // Non-temporal scatter store
440  SSTNT1_PRED,
441  SSTNT1_INDEX_PRED,
442
443  // SME
444  RDSVL,
445  REVD_MERGE_PASSTHRU,
446
447  // Asserts that a function argument (i32) is zero-extended to i8 by
448  // the caller
449  ASSERT_ZEXT_BOOL,
450
451  // 128-bit system register accesses
452  // lo64, hi64, chain = MRRS(chain, sysregname)
453  MRRS,
454  // chain = MSRR(chain, sysregname, lo64, hi64)
455  MSRR,
456
457  // Strict (exception-raising) floating point comparison
458  STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
459  STRICT_FCMPE,
460
461  // SME ZA loads and stores
462  SME_ZA_LDR,
463  SME_ZA_STR,
464
465  // NEON Load/Store with post-increment base updates
466  LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
467  LD3post,
468  LD4post,
469  ST2post,
470  ST3post,
471  ST4post,
472  LD1x2post,
473  LD1x3post,
474  LD1x4post,
475  ST1x2post,
476  ST1x3post,
477  ST1x4post,
478  LD1DUPpost,
479  LD2DUPpost,
480  LD3DUPpost,
481  LD4DUPpost,
482  LD1LANEpost,
483  LD2LANEpost,
484  LD3LANEpost,
485  LD4LANEpost,
486  ST2LANEpost,
487  ST3LANEpost,
488  ST4LANEpost,
489
490  STG,
491  STZG,
492  ST2G,
493  STZ2G,
494
495  LDP,
496  LDIAPP,
497  LDNP,
498  STP,
499  STILP,
500  STNP,
501
502  // Memory Operations
503  MOPS_MEMSET,
504  MOPS_MEMSET_TAGGING,
505  MOPS_MEMCOPY,
506  MOPS_MEMMOVE,
507};
508
509} // end namespace AArch64ISD
510
511namespace AArch64 {
512/// Possible values of current rounding mode, which is specified in bits
513/// 23:22 of FPCR.
514enum Rounding {
515  RN = 0,    // Round to Nearest
516  RP = 1,    // Round towards Plus infinity
517  RM = 2,    // Round towards Minus infinity
518  RZ = 3,    // Round towards Zero
519  rmMask = 3 // Bit mask selecting rounding mode
520};
521
522// Bit position of rounding mode bits in FPCR.
523const unsigned RoundingBitsPos = 22;
524
525// Registers used to pass function arguments.
526ArrayRef<MCPhysReg> getGPRArgRegs();
527ArrayRef<MCPhysReg> getFPRArgRegs();
528
529/// Maximum allowed number of unprobed bytes above SP at an ABI
530/// boundary.
531const unsigned StackProbeMaxUnprobedStack = 1024;
532
533/// Maximum number of iterations to unroll for a constant size probing loop.
534const unsigned StackProbeMaxLoopUnroll = 4;
535
536} // namespace AArch64
537
538class AArch64Subtarget;
539
540class AArch64TargetLowering : public TargetLowering {
541public:
542  explicit AArch64TargetLowering(const TargetMachine &TM,
543                                 const AArch64Subtarget &STI);
544
545  /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
546  /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
547  bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
548                           SDValue N1) const override;
549
550  /// Selects the correct CCAssignFn for a given CallingConvention value.
551  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
552
553  /// Selects the correct CCAssignFn for a given CallingConvention value.
554  CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
555
556  /// Determine which of the bits specified in Mask are known to be either zero
557  /// or one and return them in the KnownZero/KnownOne bitsets.
558  void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
559                                     const APInt &DemandedElts,
560                                     const SelectionDAG &DAG,
561                                     unsigned Depth = 0) const override;
562
563  unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
564                                           const APInt &DemandedElts,
565                                           const SelectionDAG &DAG,
566                                           unsigned Depth) const override;
567
568  MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
569    // Returning i64 unconditionally here (i.e. even for ILP32) means that the
570    // *DAG* representation of pointers will always be 64-bits. They will be
571    // truncated and extended when transferred to memory, but the 64-bit DAG
572    // allows us to use AArch64's addressing modes much more easily.
573    return MVT::getIntegerVT(64);
574  }
575
576  bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
577                                    const APInt &DemandedElts,
578                                    TargetLoweringOpt &TLO) const override;
579
580  MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
581
582  /// Returns true if the target allows unaligned memory accesses of the
583  /// specified type.
584  bool allowsMisalignedMemoryAccesses(
585      EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
586      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
587      unsigned *Fast = nullptr) const override;
588  /// LLT variant.
589  bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
590                                      Align Alignment,
591                                      MachineMemOperand::Flags Flags,
592                                      unsigned *Fast = nullptr) const override;
593
594  /// Provide custom lowering hooks for some operations.
595  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
596
597  const char *getTargetNodeName(unsigned Opcode) const override;
598
599  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
600
601  /// This method returns a target specific FastISel object, or null if the
602  /// target does not support "fast" ISel.
603  FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
604                           const TargetLibraryInfo *libInfo) const override;
605
606  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
607
608  bool isFPImmLegal(const APFloat &Imm, EVT VT,
609                    bool ForCodeSize) const override;
610
611  /// Return true if the given shuffle mask can be codegen'd directly, or if it
612  /// should be stack expanded.
613  bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
614
615  /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
616  /// shuffle mask can be codegen'd directly.
617  bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
618
619  /// Return the ISD::SETCC ValueType.
620  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
621                         EVT VT) const override;
622
623  SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
624
625  MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
626                                  MachineBasicBlock *BB) const;
627
628  MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
629                                           MachineBasicBlock *BB) const;
630
631  MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
632                                            MachineBasicBlock *MBB) const;
633
634  MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
635                                  MachineInstr &MI,
636                                  MachineBasicBlock *BB) const;
637  MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
638  MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
639                                 MachineInstr &MI, MachineBasicBlock *BB,
640                                 bool HasTile) const;
641  MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
642                                 unsigned Opcode, bool Op0IsDef) const;
643  MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
644
645  MachineBasicBlock *
646  EmitInstrWithCustomInserter(MachineInstr &MI,
647                              MachineBasicBlock *MBB) const override;
648
649  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
650                          MachineFunction &MF,
651                          unsigned Intrinsic) const override;
652
653  bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
654                             EVT NewVT) const override;
655
656  bool shouldRemoveRedundantExtend(SDValue Op) const override;
657
658  bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
659  bool isTruncateFree(EVT VT1, EVT VT2) const override;
660
661  bool isProfitableToHoist(Instruction *I) const override;
662
663  bool isZExtFree(Type *Ty1, Type *Ty2) const override;
664  bool isZExtFree(EVT VT1, EVT VT2) const override;
665  bool isZExtFree(SDValue Val, EVT VT2) const override;
666
667  bool shouldSinkOperands(Instruction *I,
668                          SmallVectorImpl<Use *> &Ops) const override;
669
670  bool optimizeExtendOrTruncateConversion(
671      Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
672
673  bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
674
675  unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
676
677  bool lowerInterleavedLoad(LoadInst *LI,
678                            ArrayRef<ShuffleVectorInst *> Shuffles,
679                            ArrayRef<unsigned> Indices,
680                            unsigned Factor) const override;
681  bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
682                             unsigned Factor) const override;
683
684  bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
685                                        LoadInst *LI) const override;
686
687  bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
688                                       StoreInst *SI) const override;
689
690  bool isLegalAddImmediate(int64_t) const override;
691  bool isLegalICmpImmediate(int64_t) const override;
692
693  bool isMulAddWithConstProfitable(SDValue AddNode,
694                                   SDValue ConstNode) const override;
695
696  bool shouldConsiderGEPOffsetSplit() const override;
697
698  EVT getOptimalMemOpType(const MemOp &Op,
699                          const AttributeList &FuncAttributes) const override;
700
701  LLT getOptimalMemOpLLT(const MemOp &Op,
702                         const AttributeList &FuncAttributes) const override;
703
704  /// Return true if the addressing mode represented by AM is legal for this
705  /// target, for a load/store of the specified type.
706  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
707                             unsigned AS,
708                             Instruction *I = nullptr) const override;
709
710  int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
711                                         int64_t MaxOffset) const override;
712
713  /// Return true if an FMA operation is faster than a pair of fmul and fadd
714  /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
715  /// returns true, otherwise fmuladd is expanded to fmul + fadd.
716  bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
717                                  EVT VT) const override;
718  bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
719
720  bool generateFMAsInMachineCombiner(EVT VT,
721                                     CodeGenOptLevel OptLevel) const override;
722
723  const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
724  ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
725
726  /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
727  bool isDesirableToCommuteWithShift(const SDNode *N,
728                                     CombineLevel Level) const override;
729
730  bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
731    return false;
732  }
733
734  /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
735  bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
736
737  /// Return true if it is profitable to fold a pair of shifts into a mask.
738  bool shouldFoldConstantShiftPairToMask(const SDNode *N,
739                                         CombineLevel Level) const override;
740
741  bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
742                                            EVT VT) const override;
743
744  /// Returns true if it is beneficial to convert a load of a constant
745  /// to just the constant itself.
746  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
747                                         Type *Ty) const override;
748
749  /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
750  /// with this index.
751  bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
752                               unsigned Index) const override;
753
754  bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
755                            bool MathUsed) const override {
756    // Using overflow ops for overflow checks only should beneficial on
757    // AArch64.
758    return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
759  }
760
761  Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
762                        AtomicOrdering Ord) const override;
763  Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
764                              AtomicOrdering Ord) const override;
765
766  void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
767
768  bool isOpSuitableForLDPSTP(const Instruction *I) const;
769  bool isOpSuitableForLSE128(const Instruction *I) const;
770  bool isOpSuitableForRCPC3(const Instruction *I) const;
771  bool shouldInsertFencesForAtomic(const Instruction *I) const override;
772  bool
773  shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
774
775  TargetLoweringBase::AtomicExpansionKind
776  shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
777  TargetLoweringBase::AtomicExpansionKind
778  shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
779  TargetLoweringBase::AtomicExpansionKind
780  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
781
782  TargetLoweringBase::AtomicExpansionKind
783  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
784
785  bool useLoadStackGuardNode() const override;
786  TargetLoweringBase::LegalizeTypeAction
787  getPreferredVectorAction(MVT VT) const override;
788
789  /// If the target has a standard location for the stack protector cookie,
790  /// returns the address of that location. Otherwise, returns nullptr.
791  Value *getIRStackGuard(IRBuilderBase &IRB) const override;
792
793  void insertSSPDeclarations(Module &M) const override;
794  Value *getSDagStackGuard(const Module &M) const override;
795  Function *getSSPStackGuardCheck(const Module &M) const override;
796
797  /// If the target has a standard location for the unsafe stack pointer,
798  /// returns the address of that location. Otherwise, returns nullptr.
799  Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
800
801  /// If a physical register, this returns the register that receives the
802  /// exception address on entry to an EH pad.
803  Register
804  getExceptionPointerRegister(const Constant *PersonalityFn) const override {
805    // FIXME: This is a guess. Has this been defined yet?
806    return AArch64::X0;
807  }
808
809  /// If a physical register, this returns the register that receives the
810  /// exception typeid on entry to a landing pad.
811  Register
812  getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
813    // FIXME: This is a guess. Has this been defined yet?
814    return AArch64::X1;
815  }
816
817  bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
818
819  bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
820                        const MachineFunction &MF) const override {
821    // Do not merge to float value size (128 bytes) if no implicit
822    // float attribute is set.
823
824    bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
825
826    if (NoFloat)
827      return (MemVT.getSizeInBits() <= 64);
828    return true;
829  }
830
831  bool isCheapToSpeculateCttz(Type *) const override {
832    return true;
833  }
834
835  bool isCheapToSpeculateCtlz(Type *) const override {
836    return true;
837  }
838
839  bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
840
841  bool hasAndNotCompare(SDValue V) const override {
842    // We can use bics for any scalar.
843    return V.getValueType().isScalarInteger();
844  }
845
846  bool hasAndNot(SDValue Y) const override {
847    EVT VT = Y.getValueType();
848
849    if (!VT.isVector())
850      return hasAndNotCompare(Y);
851
852    TypeSize TS = VT.getSizeInBits();
853    // TODO: We should be able to use bic/bif too for SVE.
854    return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
855  }
856
857  bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
858      SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
859      unsigned OldShiftOpcode, unsigned NewShiftOpcode,
860      SelectionDAG &DAG) const override;
861
862  ShiftLegalizationStrategy
863  preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
864                                     unsigned ExpansionFactor) const override;
865
866  bool shouldTransformSignedTruncationCheck(EVT XVT,
867                                            unsigned KeptBits) const override {
868    // For vectors, we don't have a preference..
869    if (XVT.isVector())
870      return false;
871
872    auto VTIsOk = [](EVT VT) -> bool {
873      return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
874             VT == MVT::i64;
875    };
876
877    // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
878    // XVT will be larger than KeptBitsVT.
879    MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
880    return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
881  }
882
883  bool preferIncOfAddToSubOfNot(EVT VT) const override;
884
885  bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
886
887  bool isComplexDeinterleavingSupported() const override;
888  bool isComplexDeinterleavingOperationSupported(
889      ComplexDeinterleavingOperation Operation, Type *Ty) const override;
890
891  Value *createComplexDeinterleavingIR(
892      IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
893      ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
894      Value *Accumulator = nullptr) const override;
895
896  bool supportSplitCSR(MachineFunction *MF) const override {
897    return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
898           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
899  }
900  void initializeSplitCSR(MachineBasicBlock *Entry) const override;
901  void insertCopiesSplitCSR(
902      MachineBasicBlock *Entry,
903      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
904
905  bool supportSwiftError() const override {
906    return true;
907  }
908
909  bool supportKCFIBundles() const override { return true; }
910
911  MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
912                              MachineBasicBlock::instr_iterator &MBBI,
913                              const TargetInstrInfo *TII) const override;
914
915  /// Enable aggressive FMA fusion on targets that want it.
916  bool enableAggressiveFMAFusion(EVT VT) const override;
917
918  /// Returns the size of the platform's va_list object.
919  unsigned getVaListSizeInBits(const DataLayout &DL) const override;
920
921  /// Returns true if \p VecTy is a legal interleaved access type. This
922  /// function checks the vector element type and the overall width of the
923  /// vector.
924  bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
925                                    bool &UseScalable) const;
926
927  /// Returns the number of interleaved accesses that will be generated when
928  /// lowering accesses of the given type.
929  unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
930                                     bool UseScalable) const;
931
932  MachineMemOperand::Flags getTargetMMOFlags(
933    const Instruction &I) const override;
934
935  bool functionArgumentNeedsConsecutiveRegisters(
936      Type *Ty, CallingConv::ID CallConv, bool isVarArg,
937      const DataLayout &DL) const override;
938
939  /// Used for exception handling on Win64.
940  bool needsFixedCatchObjects() const override;
941
942  bool fallBackToDAGISel(const Instruction &Inst) const override;
943
944  /// SVE code generation for fixed length vectors does not custom lower
945  /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
946  /// merge. However, merging them creates a BUILD_VECTOR that is just as
947  /// illegal as the original, thus leading to an infinite legalisation loop.
948  /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
949  /// vector types this override can be removed.
950  bool mergeStoresAfterLegalization(EVT VT) const override;
951
952  // If the platform/function should have a redzone, return the size in bytes.
953  unsigned getRedZoneSize(const Function &F) const {
954    if (F.hasFnAttribute(Attribute::NoRedZone))
955      return 0;
956    return 128;
957  }
958
959  bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
960  EVT getPromotedVTForPredicate(EVT VT) const;
961
962  EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
963                             bool AllowUnknown = false) const override;
964
965  bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
966
967  bool shouldExpandCttzElements(EVT VT) const override;
968
969  /// If a change in streaming mode is required on entry to/return from a
970  /// function call it emits and returns the corresponding SMSTART or SMSTOP node.
971  /// \p Entry tells whether this is before/after the Call, which is necessary
972  /// because PSTATE.SM is only queried once.
973  SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
974                              SDValue Chain, SDValue InGlue,
975                              SDValue PStateSM, bool Entry) const;
976
977  bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
978
979  // Normally SVE is only used for byte size vectors that do not fit within a
980  // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
981  // used for 64bit and 128bit vectors as well.
982  bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
983
984  // Follow NEON ABI rules even when using SVE for fixed length vectors.
985  MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
986                                    EVT VT) const override;
987  unsigned getNumRegistersForCallingConv(LLVMContext &Context,
988                                         CallingConv::ID CC,
989                                         EVT VT) const override;
990  unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
991                                                CallingConv::ID CC, EVT VT,
992                                                EVT &IntermediateVT,
993                                                unsigned &NumIntermediates,
994                                                MVT &RegisterVT) const override;
995
996  /// True if stack clash protection is enabled for this functions.
997  bool hasInlineStackProbe(const MachineFunction &MF) const override;
998
999private:
1000  /// Keep a pointer to the AArch64Subtarget around so that we can
1001  /// make the right decision when generating code for different targets.
1002  const AArch64Subtarget *Subtarget;
1003
1004  llvm::BumpPtrAllocator BumpAlloc;
1005  llvm::StringSaver Saver{BumpAlloc};
1006
1007  bool isExtFreeImpl(const Instruction *Ext) const override;
1008
1009  void addTypeForNEON(MVT VT);
1010  void addTypeForFixedLengthSVE(MVT VT, bool StreamingSVE);
1011  void addDRTypeForNEON(MVT VT);
1012  void addQRTypeForNEON(MVT VT);
1013
1014  unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
1015                                  SelectionDAG &DAG) const;
1016
1017  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
1018                               bool isVarArg,
1019                               const SmallVectorImpl<ISD::InputArg> &Ins,
1020                               const SDLoc &DL, SelectionDAG &DAG,
1021                               SmallVectorImpl<SDValue> &InVals) const override;
1022
1023  void AdjustInstrPostInstrSelection(MachineInstr &MI,
1024                                     SDNode *Node) const override;
1025
1026  SDValue LowerCall(CallLoweringInfo & /*CLI*/,
1027                    SmallVectorImpl<SDValue> &InVals) const override;
1028
1029  SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1030                          CallingConv::ID CallConv, bool isVarArg,
1031                          const SmallVectorImpl<CCValAssign> &RVLocs,
1032                          const SDLoc &DL, SelectionDAG &DAG,
1033                          SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1034                          SDValue ThisVal, bool RequiresSMChange) const;
1035
1036  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1037  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1038  SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
1039  SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1040
1041  SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
1042  SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
1043
1044  SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
1045
1046  SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1047  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1048  SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1049
1050  bool
1051  isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
1052
1053  /// Finds the incoming stack arguments which overlap the given fixed stack
1054  /// object and incorporates their load into the current chain. This prevents
1055  /// an upcoming store from clobbering the stack argument before it's used.
1056  SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
1057                              MachineFrameInfo &MFI, int ClobberedFI) const;
1058
1059  bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1060
1061  void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1062                           SDValue &Chain) const;
1063
1064  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1065                      bool isVarArg,
1066                      const SmallVectorImpl<ISD::OutputArg> &Outs,
1067                      LLVMContext &Context) const override;
1068
1069  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1070                      const SmallVectorImpl<ISD::OutputArg> &Outs,
1071                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1072                      SelectionDAG &DAG) const override;
1073
1074  SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1075                        unsigned Flag) const;
1076  SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1077                        unsigned Flag) const;
1078  SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1079                        unsigned Flag) const;
1080  SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1081                        unsigned Flag) const;
1082  SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
1083                        unsigned Flag) const;
1084  template <class NodeTy>
1085  SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1086  template <class NodeTy>
1087  SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1088  template <class NodeTy>
1089  SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1090  template <class NodeTy>
1091  SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1092  SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1093  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1094  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1095  SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1096  SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1097  SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1098                               const SDLoc &DL, SelectionDAG &DAG) const;
1099  SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1100                                 SelectionDAG &DAG) const;
1101  SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1102  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1103  SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1104  SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1105  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1106  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1107  SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1108                         SDValue TVal, SDValue FVal, const SDLoc &dl,
1109                         SelectionDAG &DAG) const;
1110  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1111  SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1112  SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1113  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1114  SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1115  SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1116  SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1117  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1118  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1119  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1120  SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1121  SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1122  SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1123  SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1124  SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1125  SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1126  SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1127  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1128  SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1129  SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1130  SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1131  SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1132  SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1133                              unsigned NewOp) const;
1134  SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1135  SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1136  SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1137  SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1138  SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1139  SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1140  SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1141  SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1142  SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1143  SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1144  SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1145  SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1146  SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1147  SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1148  SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1149  SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1150  SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1151  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1152  SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1153  SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1154  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1155  SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1156  SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1157  SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1158  SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1159  SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1160  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1161  SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1162  SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1163  SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1164  SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1165  SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1166  SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1167  SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1168  SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1169  SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1170
1171  SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1172
1173  SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1174                                               SelectionDAG &DAG) const;
1175  SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1176                                               SelectionDAG &DAG) const;
1177  SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1178  SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1179  SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1180  SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1181  SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1182                              SelectionDAG &DAG) const;
1183  SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1184  SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1185  SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1186  SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1187                                            SelectionDAG &DAG) const;
1188  SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1189                                              SelectionDAG &DAG) const;
1190  SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1191  SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1192  SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1193  SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1194                                             SelectionDAG &DAG) const;
1195  SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1196  SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1197  SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1198  SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1199  SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1200                                              SelectionDAG &DAG) const;
1201
1202  SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1203                        SmallVectorImpl<SDNode *> &Created) const override;
1204  SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1205                        SmallVectorImpl<SDNode *> &Created) const override;
1206  SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1207                          int &ExtraSteps, bool &UseOneConst,
1208                          bool Reciprocal) const override;
1209  SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1210                           int &ExtraSteps) const override;
1211  SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1212                           const DenormalMode &Mode) const override;
1213  SDValue getSqrtResultForDenormInput(SDValue Operand,
1214                                      SelectionDAG &DAG) const override;
1215  unsigned combineRepeatedFPDivisors() const override;
1216
1217  ConstraintType getConstraintType(StringRef Constraint) const override;
1218  Register getRegisterByName(const char* RegName, LLT VT,
1219                             const MachineFunction &MF) const override;
1220
1221  /// Examine constraint string and operand type and determine a weight value.
1222  /// The operand object must already have been set up with the operand type.
1223  ConstraintWeight
1224  getSingleConstraintMatchWeight(AsmOperandInfo &info,
1225                                 const char *constraint) const override;
1226
1227  std::pair<unsigned, const TargetRegisterClass *>
1228  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1229                               StringRef Constraint, MVT VT) const override;
1230
1231  const char *LowerXConstraint(EVT ConstraintVT) const override;
1232
1233  void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1234                                    std::vector<SDValue> &Ops,
1235                                    SelectionDAG &DAG) const override;
1236
1237  InlineAsm::ConstraintCode
1238  getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1239    if (ConstraintCode == "Q")
1240      return InlineAsm::ConstraintCode::Q;
1241    // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1242    //        followed by llvm_unreachable so we'll leave them unimplemented in
1243    //        the backend for now.
1244    return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1245  }
1246
1247  /// Handle Lowering flag assembly outputs.
1248  SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1249                                      const SDLoc &DL,
1250                                      const AsmOperandInfo &Constraint,
1251                                      SelectionDAG &DAG) const override;
1252
1253  bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1254  bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
1255  bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1256  bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1257  bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1258  bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1259                              SDValue &Offset, SelectionDAG &DAG) const;
1260  bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1261                                 ISD::MemIndexedMode &AM,
1262                                 SelectionDAG &DAG) const override;
1263  bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1264                                  SDValue &Offset, ISD::MemIndexedMode &AM,
1265                                  SelectionDAG &DAG) const override;
1266  bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
1267                       bool IsPre, MachineRegisterInfo &MRI) const override;
1268
1269  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1270                          SelectionDAG &DAG) const override;
1271  void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1272                             SelectionDAG &DAG) const;
1273  void ReplaceExtractSubVectorResults(SDNode *N,
1274                                      SmallVectorImpl<SDValue> &Results,
1275                                      SelectionDAG &DAG) const;
1276
1277  bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1278
1279  void finalizeLowering(MachineFunction &MF) const override;
1280
1281  bool shouldLocalize(const MachineInstr &MI,
1282                      const TargetTransformInfo *TTI) const override;
1283
1284  bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1285                                         const APInt &OriginalDemandedBits,
1286                                         const APInt &OriginalDemandedElts,
1287                                         KnownBits &Known,
1288                                         TargetLoweringOpt &TLO,
1289                                         unsigned Depth) const override;
1290
1291  bool isTargetCanonicalConstantNode(SDValue Op) const override;
1292
1293  // With the exception of data-predicate transitions, no instructions are
1294  // required to cast between legal scalable vector types. However:
1295  //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1296  //     is not universally useable.
1297  //  2. Most unpacked integer types are not legal and thus integer extends
1298  //     cannot be used to convert between unpacked and packed types.
1299  // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1300  // to transition between unpacked and packed types of the same element type,
1301  // with BITCAST used otherwise.
1302  // This function does not handle predicate bitcasts.
1303  SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1304
1305  // Returns the runtime value for PSTATE.SM by generating a call to
1306  // __arm_sme_state.
1307  SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
1308                             EVT VT) const;
1309
1310  bool preferScalarizeSplat(SDNode *N) const override;
1311
1312  unsigned getMinimumJumpTableEntries() const override;
1313};
1314
1315namespace AArch64 {
1316FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1317                         const TargetLibraryInfo *libInfo);
1318} // end namespace AArch64
1319
1320} // end namespace llvm
1321
1322#endif
1323