1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation  ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LoongArchISelLowering.h"
15#include "LoongArch.h"
16#include "LoongArchMachineFunctionInfo.h"
17#include "LoongArchRegisterInfo.h"
18#include "LoongArchSubtarget.h"
19#include "LoongArchTargetMachine.h"
20#include "MCTargetDesc/LoongArchBaseInfo.h"
21#include "MCTargetDesc/LoongArchMCTargetDesc.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/CodeGen/ISDOpcodes.h"
25#include "llvm/CodeGen/RuntimeLibcalls.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
29#include "llvm/Support/CodeGen.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/KnownBits.h"
33#include "llvm/Support/MathExtras.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42                                  cl::desc("Trap on integer division by zero."),
43                                  cl::init(false));
44
45LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46                                                 const LoongArchSubtarget &STI)
47    : TargetLowering(TM), Subtarget(STI) {
48
49  MVT GRLenVT = Subtarget.getGRLenVT();
50
51  // Set up the register classes.
52
53  addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54  if (Subtarget.hasBasicF())
55    addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56  if (Subtarget.hasBasicD())
57    addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59  static const MVT::SimpleValueType LSXVTs[] = {
60      MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61  static const MVT::SimpleValueType LASXVTs[] = {
62      MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64  if (Subtarget.hasExtLSX())
65    for (MVT VT : LSXVTs)
66      addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68  if (Subtarget.hasExtLASX())
69    for (MVT VT : LASXVTs)
70      addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72  // Set operations for LA32 and LA64.
73
74  setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
75                   MVT::i1, Promote);
76
77  setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
78  setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
79  setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
80  setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
81  setOperationAction(ISD::ROTL, GRLenVT, Expand);
82  setOperationAction(ISD::CTPOP, GRLenVT, Expand);
83
84  setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85                      ISD::JumpTable, ISD::GlobalTLSAddress},
86                     GRLenVT, Custom);
87
88  setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
89
90  setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
91  setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
92  setOperationAction(ISD::VASTART, MVT::Other, Custom);
93  setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
94
95  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
96  setOperationAction(ISD::TRAP, MVT::Other, Legal);
97
98  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
99  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
100  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
101
102  // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103  // we get to know which of sll and revb.2h is faster.
104  setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
105  setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
106
107  // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108  // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109  // and i32 could still be byte-swapped relatively cheaply.
110  setOperationAction(ISD::BSWAP, MVT::i16, Custom);
111
112  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
113  setOperationAction(ISD::BR_CC, GRLenVT, Expand);
114  setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
115  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
116  setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
117
118  setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
119  setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
120
121  // Set operations for LA64 only.
122
123  if (Subtarget.is64Bit()) {
124    setOperationAction(ISD::SHL, MVT::i32, Custom);
125    setOperationAction(ISD::SRA, MVT::i32, Custom);
126    setOperationAction(ISD::SRL, MVT::i32, Custom);
127    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
128    setOperationAction(ISD::BITCAST, MVT::i32, Custom);
129    setOperationAction(ISD::ROTR, MVT::i32, Custom);
130    setOperationAction(ISD::ROTL, MVT::i32, Custom);
131    setOperationAction(ISD::CTTZ, MVT::i32, Custom);
132    setOperationAction(ISD::CTLZ, MVT::i32, Custom);
133    setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
134    setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
135    setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
136    setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
137    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
138    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
139
140    setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
141    setOperationAction(ISD::BSWAP, MVT::i32, Custom);
142  }
143
144  // Set operations for LA32 only.
145
146  if (!Subtarget.is64Bit()) {
147    setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
148    setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
149    setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
150    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
151    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
152
153    // Set libcalls.
154    setLibcallName(RTLIB::MUL_I128, nullptr);
155    // The MULO libcall is not part of libgcc, only compiler-rt.
156    setLibcallName(RTLIB::MULO_I64, nullptr);
157  }
158
159  // The MULO libcall is not part of libgcc, only compiler-rt.
160  setLibcallName(RTLIB::MULO_I128, nullptr);
161
162  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
163
164  static const ISD::CondCode FPCCToExpand[] = {
165      ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
166      ISD::SETGE,  ISD::SETNE,  ISD::SETGT};
167
168  // Set operations for 'F' feature.
169
170  if (Subtarget.hasBasicF()) {
171    setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
172
173    setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
174    setOperationAction(ISD::BR_CC, MVT::f32, Expand);
175    setOperationAction(ISD::FMA, MVT::f32, Legal);
176    setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
177    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
178    setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
179    setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
180    setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
181    setOperationAction(ISD::FSIN, MVT::f32, Expand);
182    setOperationAction(ISD::FCOS, MVT::f32, Expand);
183    setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
184    setOperationAction(ISD::FPOW, MVT::f32, Expand);
185    setOperationAction(ISD::FREM, MVT::f32, Expand);
186
187    if (Subtarget.is64Bit())
188      setOperationAction(ISD::FRINT, MVT::f32, Legal);
189
190    if (!Subtarget.hasBasicD()) {
191      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
192      if (Subtarget.is64Bit()) {
193        setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
194        setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
195      }
196    }
197  }
198
199  // Set operations for 'D' feature.
200
201  if (Subtarget.hasBasicD()) {
202    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
204    setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
205
206    setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
207    setOperationAction(ISD::BR_CC, MVT::f64, Expand);
208    setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
209    setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
210    setOperationAction(ISD::FMA, MVT::f64, Legal);
211    setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
212    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
213    setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
214    setOperationAction(ISD::FSIN, MVT::f64, Expand);
215    setOperationAction(ISD::FCOS, MVT::f64, Expand);
216    setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
217    setOperationAction(ISD::FPOW, MVT::f64, Expand);
218    setOperationAction(ISD::FREM, MVT::f64, Expand);
219
220    if (Subtarget.is64Bit())
221      setOperationAction(ISD::FRINT, MVT::f64, Legal);
222  }
223
224  // Set operations for 'LSX' feature.
225
226  if (Subtarget.hasExtLSX()) {
227    for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
228      // Expand all truncating stores and extending loads.
229      for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
230        setTruncStoreAction(VT, InnerVT, Expand);
231        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
232        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
233        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
234      }
235      // By default everything must be expanded. Then we will selectively turn
236      // on ones that can be effectively codegen'd.
237      for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
238        setOperationAction(Op, VT, Expand);
239    }
240
241    for (MVT VT : LSXVTs) {
242      setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
243      setOperationAction(ISD::BITCAST, VT, Legal);
244      setOperationAction(ISD::UNDEF, VT, Legal);
245
246      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
247      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
248      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
249
250      setOperationAction(ISD::SETCC, VT, Legal);
251      setOperationAction(ISD::VSELECT, VT, Legal);
252    }
253    for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
254      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
255      setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
256      setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
257                         Legal);
258      setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
259                         VT, Legal);
260      setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
261      setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
262      setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
263      setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
264      setCondCodeAction(
265          {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
266          Expand);
267    }
268    for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
269      setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
270      setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
271    }
272    for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
273      setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
274      setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
275      setOperationAction(ISD::FMA, VT, Legal);
276      setOperationAction(ISD::FSQRT, VT, Legal);
277      setOperationAction(ISD::FNEG, VT, Legal);
278      setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
279                         ISD::SETUGE, ISD::SETUGT},
280                        VT, Expand);
281    }
282  }
283
284  // Set operations for 'LASX' feature.
285
286  if (Subtarget.hasExtLASX()) {
287    for (MVT VT : LASXVTs) {
288      setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
289      setOperationAction(ISD::BITCAST, VT, Legal);
290      setOperationAction(ISD::UNDEF, VT, Legal);
291
292      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
293      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
294      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
295
296      setOperationAction(ISD::SETCC, VT, Legal);
297      setOperationAction(ISD::VSELECT, VT, Legal);
298    }
299    for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
300      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
301      setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
302      setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
303                         Legal);
304      setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
305                         VT, Legal);
306      setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
307      setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
308      setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
309      setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
310      setCondCodeAction(
311          {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
312          Expand);
313    }
314    for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
315      setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
316      setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
317    }
318    for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
319      setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
320      setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
321      setOperationAction(ISD::FMA, VT, Legal);
322      setOperationAction(ISD::FSQRT, VT, Legal);
323      setOperationAction(ISD::FNEG, VT, Legal);
324      setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
325                         ISD::SETUGE, ISD::SETUGT},
326                        VT, Expand);
327    }
328  }
329
330  // Set DAG combine for LA32 and LA64.
331
332  setTargetDAGCombine(ISD::AND);
333  setTargetDAGCombine(ISD::OR);
334  setTargetDAGCombine(ISD::SRL);
335
336  // Set DAG combine for 'LSX' feature.
337
338  if (Subtarget.hasExtLSX())
339    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
340
341  // Compute derived properties from the register classes.
342  computeRegisterProperties(Subtarget.getRegisterInfo());
343
344  setStackPointerRegisterToSaveRestore(LoongArch::R3);
345
346  setBooleanContents(ZeroOrOneBooleanContent);
347  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
348
349  setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
350
351  setMinCmpXchgSizeInBits(32);
352
353  // Function alignments.
354  setMinFunctionAlignment(Align(4));
355  // Set preferred alignments.
356  setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
357  setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
358  setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
359}
360
361bool LoongArchTargetLowering::isOffsetFoldingLegal(
362    const GlobalAddressSDNode *GA) const {
363  // In order to maximise the opportunity for common subexpression elimination,
364  // keep a separate ADD node for the global address offset instead of folding
365  // it in the global address node. Later peephole optimisations may choose to
366  // fold it back in when profitable.
367  return false;
368}
369
370SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
371                                                SelectionDAG &DAG) const {
372  switch (Op.getOpcode()) {
373  case ISD::ATOMIC_FENCE:
374    return lowerATOMIC_FENCE(Op, DAG);
375  case ISD::EH_DWARF_CFA:
376    return lowerEH_DWARF_CFA(Op, DAG);
377  case ISD::GlobalAddress:
378    return lowerGlobalAddress(Op, DAG);
379  case ISD::GlobalTLSAddress:
380    return lowerGlobalTLSAddress(Op, DAG);
381  case ISD::INTRINSIC_WO_CHAIN:
382    return lowerINTRINSIC_WO_CHAIN(Op, DAG);
383  case ISD::INTRINSIC_W_CHAIN:
384    return lowerINTRINSIC_W_CHAIN(Op, DAG);
385  case ISD::INTRINSIC_VOID:
386    return lowerINTRINSIC_VOID(Op, DAG);
387  case ISD::BlockAddress:
388    return lowerBlockAddress(Op, DAG);
389  case ISD::JumpTable:
390    return lowerJumpTable(Op, DAG);
391  case ISD::SHL_PARTS:
392    return lowerShiftLeftParts(Op, DAG);
393  case ISD::SRA_PARTS:
394    return lowerShiftRightParts(Op, DAG, true);
395  case ISD::SRL_PARTS:
396    return lowerShiftRightParts(Op, DAG, false);
397  case ISD::ConstantPool:
398    return lowerConstantPool(Op, DAG);
399  case ISD::FP_TO_SINT:
400    return lowerFP_TO_SINT(Op, DAG);
401  case ISD::BITCAST:
402    return lowerBITCAST(Op, DAG);
403  case ISD::UINT_TO_FP:
404    return lowerUINT_TO_FP(Op, DAG);
405  case ISD::SINT_TO_FP:
406    return lowerSINT_TO_FP(Op, DAG);
407  case ISD::VASTART:
408    return lowerVASTART(Op, DAG);
409  case ISD::FRAMEADDR:
410    return lowerFRAMEADDR(Op, DAG);
411  case ISD::RETURNADDR:
412    return lowerRETURNADDR(Op, DAG);
413  case ISD::WRITE_REGISTER:
414    return lowerWRITE_REGISTER(Op, DAG);
415  case ISD::INSERT_VECTOR_ELT:
416    return lowerINSERT_VECTOR_ELT(Op, DAG);
417  case ISD::EXTRACT_VECTOR_ELT:
418    return lowerEXTRACT_VECTOR_ELT(Op, DAG);
419  case ISD::BUILD_VECTOR:
420    return lowerBUILD_VECTOR(Op, DAG);
421  case ISD::VECTOR_SHUFFLE:
422    return lowerVECTOR_SHUFFLE(Op, DAG);
423  }
424  return SDValue();
425}
426
427SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
428                                                     SelectionDAG &DAG) const {
429  // TODO: custom shuffle.
430  return SDValue();
431}
432
433static bool isConstantOrUndef(const SDValue Op) {
434  if (Op->isUndef())
435    return true;
436  if (isa<ConstantSDNode>(Op))
437    return true;
438  if (isa<ConstantFPSDNode>(Op))
439    return true;
440  return false;
441}
442
443static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
444  for (unsigned i = 0; i < Op->getNumOperands(); ++i)
445    if (isConstantOrUndef(Op->getOperand(i)))
446      return true;
447  return false;
448}
449
450SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
451                                                   SelectionDAG &DAG) const {
452  BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
453  EVT ResTy = Op->getValueType(0);
454  SDLoc DL(Op);
455  APInt SplatValue, SplatUndef;
456  unsigned SplatBitSize;
457  bool HasAnyUndefs;
458  bool Is128Vec = ResTy.is128BitVector();
459  bool Is256Vec = ResTy.is256BitVector();
460
461  if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
462      (!Subtarget.hasExtLASX() || !Is256Vec))
463    return SDValue();
464
465  if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
466                            /*MinSplatBits=*/8) &&
467      SplatBitSize <= 64) {
468    // We can only cope with 8, 16, 32, or 64-bit elements.
469    if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
470        SplatBitSize != 64)
471      return SDValue();
472
473    EVT ViaVecTy;
474
475    switch (SplatBitSize) {
476    default:
477      return SDValue();
478    case 8:
479      ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
480      break;
481    case 16:
482      ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
483      break;
484    case 32:
485      ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
486      break;
487    case 64:
488      ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
489      break;
490    }
491
492    // SelectionDAG::getConstant will promote SplatValue appropriately.
493    SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
494
495    // Bitcast to the type we originally wanted.
496    if (ViaVecTy != ResTy)
497      Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
498
499    return Result;
500  }
501
502  if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
503    return Op;
504
505  if (!isConstantOrUndefBUILD_VECTOR(Node)) {
506    // Use INSERT_VECTOR_ELT operations rather than expand to stores.
507    // The resulting code is the same length as the expansion, but it doesn't
508    // use memory operations.
509    EVT ResTy = Node->getValueType(0);
510
511    assert(ResTy.isVector());
512
513    unsigned NumElts = ResTy.getVectorNumElements();
514    SDValue Vector = DAG.getUNDEF(ResTy);
515    for (unsigned i = 0; i < NumElts; ++i) {
516      Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
517                           Node->getOperand(i),
518                           DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
519    }
520    return Vector;
521  }
522
523  return SDValue();
524}
525
526SDValue
527LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
528                                                 SelectionDAG &DAG) const {
529  EVT VecTy = Op->getOperand(0)->getValueType(0);
530  SDValue Idx = Op->getOperand(1);
531  EVT EltTy = VecTy.getVectorElementType();
532  unsigned NumElts = VecTy.getVectorNumElements();
533
534  if (isa<ConstantSDNode>(Idx) &&
535      (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
536       EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
537    return Op;
538
539  return SDValue();
540}
541
542SDValue
543LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
544                                                SelectionDAG &DAG) const {
545  if (isa<ConstantSDNode>(Op->getOperand(2)))
546    return Op;
547  return SDValue();
548}
549
550SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
551                                                   SelectionDAG &DAG) const {
552  SDLoc DL(Op);
553  SyncScope::ID FenceSSID =
554      static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
555
556  // singlethread fences only synchronize with signal handlers on the same
557  // thread and thus only need to preserve instruction order, not actually
558  // enforce memory ordering.
559  if (FenceSSID == SyncScope::SingleThread)
560    // MEMBARRIER is a compiler barrier; it codegens to a no-op.
561    return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
562
563  return Op;
564}
565
566SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
567                                                     SelectionDAG &DAG) const {
568
569  if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
570    DAG.getContext()->emitError(
571        "On LA64, only 64-bit registers can be written.");
572    return Op.getOperand(0);
573  }
574
575  if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
576    DAG.getContext()->emitError(
577        "On LA32, only 32-bit registers can be written.");
578    return Op.getOperand(0);
579  }
580
581  return Op;
582}
583
584SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
585                                                SelectionDAG &DAG) const {
586  if (!isa<ConstantSDNode>(Op.getOperand(0))) {
587    DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
588                                "be a constant integer");
589    return SDValue();
590  }
591
592  MachineFunction &MF = DAG.getMachineFunction();
593  MF.getFrameInfo().setFrameAddressIsTaken(true);
594  Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
595  EVT VT = Op.getValueType();
596  SDLoc DL(Op);
597  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
598  unsigned Depth = Op.getConstantOperandVal(0);
599  int GRLenInBytes = Subtarget.getGRLen() / 8;
600
601  while (Depth--) {
602    int Offset = -(GRLenInBytes * 2);
603    SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
604                              DAG.getIntPtrConstant(Offset, DL));
605    FrameAddr =
606        DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
607  }
608  return FrameAddr;
609}
610
611SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
612                                                 SelectionDAG &DAG) const {
613  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
614    return SDValue();
615
616  // Currently only support lowering return address for current frame.
617  if (Op.getConstantOperandVal(0) != 0) {
618    DAG.getContext()->emitError(
619        "return address can only be determined for the current frame");
620    return SDValue();
621  }
622
623  MachineFunction &MF = DAG.getMachineFunction();
624  MF.getFrameInfo().setReturnAddressIsTaken(true);
625  MVT GRLenVT = Subtarget.getGRLenVT();
626
627  // Return the value of the return address register, marking it an implicit
628  // live-in.
629  Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
630                              getRegClassFor(GRLenVT));
631  return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
632}
633
634SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
635                                                   SelectionDAG &DAG) const {
636  MachineFunction &MF = DAG.getMachineFunction();
637  auto Size = Subtarget.getGRLen() / 8;
638  auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
639  return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
640}
641
642SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
643                                              SelectionDAG &DAG) const {
644  MachineFunction &MF = DAG.getMachineFunction();
645  auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
646
647  SDLoc DL(Op);
648  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
649                                 getPointerTy(MF.getDataLayout()));
650
651  // vastart just stores the address of the VarArgsFrameIndex slot into the
652  // memory location argument.
653  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
654  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
655                      MachinePointerInfo(SV));
656}
657
658SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
659                                                 SelectionDAG &DAG) const {
660  assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
661         !Subtarget.hasBasicD() && "unexpected target features");
662
663  SDLoc DL(Op);
664  SDValue Op0 = Op.getOperand(0);
665  if (Op0->getOpcode() == ISD::AND) {
666    auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
667    if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
668      return Op;
669  }
670
671  if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
672      Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
673      Op0.getConstantOperandVal(2) == UINT64_C(0))
674    return Op;
675
676  if (Op0.getOpcode() == ISD::AssertZext &&
677      dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
678    return Op;
679
680  EVT OpVT = Op0.getValueType();
681  EVT RetVT = Op.getValueType();
682  RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
683  MakeLibCallOptions CallOptions;
684  CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
685  SDValue Chain = SDValue();
686  SDValue Result;
687  std::tie(Result, Chain) =
688      makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
689  return Result;
690}
691
692SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
693                                                 SelectionDAG &DAG) const {
694  assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
695         !Subtarget.hasBasicD() && "unexpected target features");
696
697  SDLoc DL(Op);
698  SDValue Op0 = Op.getOperand(0);
699
700  if ((Op0.getOpcode() == ISD::AssertSext ||
701       Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
702      dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
703    return Op;
704
705  EVT OpVT = Op0.getValueType();
706  EVT RetVT = Op.getValueType();
707  RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
708  MakeLibCallOptions CallOptions;
709  CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
710  SDValue Chain = SDValue();
711  SDValue Result;
712  std::tie(Result, Chain) =
713      makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
714  return Result;
715}
716
717SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
718                                              SelectionDAG &DAG) const {
719
720  SDLoc DL(Op);
721  SDValue Op0 = Op.getOperand(0);
722
723  if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
724      Subtarget.is64Bit() && Subtarget.hasBasicF()) {
725    SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
726    return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
727  }
728  return Op;
729}
730
731SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
732                                                 SelectionDAG &DAG) const {
733
734  SDLoc DL(Op);
735
736  if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
737      !Subtarget.hasBasicD()) {
738    SDValue Dst =
739        DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
740    return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
741  }
742
743  EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
744  SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
745  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
746}
747
748static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
749                             SelectionDAG &DAG, unsigned Flags) {
750  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
751}
752
753static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
754                             SelectionDAG &DAG, unsigned Flags) {
755  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
756                                   Flags);
757}
758
759static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
760                             SelectionDAG &DAG, unsigned Flags) {
761  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
762                                   N->getOffset(), Flags);
763}
764
765static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
766                             SelectionDAG &DAG, unsigned Flags) {
767  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
768}
769
770template <class NodeTy>
771SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
772                                         CodeModel::Model M,
773                                         bool IsLocal) const {
774  SDLoc DL(N);
775  EVT Ty = getPointerTy(DAG.getDataLayout());
776  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
777
778  switch (M) {
779  default:
780    report_fatal_error("Unsupported code model");
781
782  case CodeModel::Large: {
783    assert(Subtarget.is64Bit() && "Large code model requires LA64");
784
785    // This is not actually used, but is necessary for successfully matching
786    // the PseudoLA_*_LARGE nodes.
787    SDValue Tmp = DAG.getConstant(0, DL, Ty);
788    if (IsLocal)
789      // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
790      // eventually becomes the desired 5-insn code sequence.
791      return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
792                                        Tmp, Addr),
793                     0);
794
795    // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
796    // becomes the desired 5-insn code sequence.
797    return SDValue(
798        DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
799        0);
800  }
801
802  case CodeModel::Small:
803  case CodeModel::Medium:
804    if (IsLocal)
805      // This generates the pattern (PseudoLA_PCREL sym), which expands to
806      // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
807      return SDValue(
808          DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
809
810    // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
811    // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
812    return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
813                   0);
814  }
815}
816
817SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
818                                                   SelectionDAG &DAG) const {
819  return getAddr(cast<BlockAddressSDNode>(Op), DAG,
820                 DAG.getTarget().getCodeModel());
821}
822
823SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
824                                                SelectionDAG &DAG) const {
825  return getAddr(cast<JumpTableSDNode>(Op), DAG,
826                 DAG.getTarget().getCodeModel());
827}
828
829SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
830                                                   SelectionDAG &DAG) const {
831  return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
832                 DAG.getTarget().getCodeModel());
833}
834
835SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
836                                                    SelectionDAG &DAG) const {
837  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
838  assert(N->getOffset() == 0 && "unexpected offset in global node");
839  auto CM = DAG.getTarget().getCodeModel();
840  const GlobalValue *GV = N->getGlobal();
841
842  if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
843    if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
844      CM = *GCM;
845  }
846
847  return getAddr(N, DAG, CM, GV->isDSOLocal());
848}
849
850SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
851                                                  SelectionDAG &DAG,
852                                                  unsigned Opc,
853                                                  bool Large) const {
854  SDLoc DL(N);
855  EVT Ty = getPointerTy(DAG.getDataLayout());
856  MVT GRLenVT = Subtarget.getGRLenVT();
857
858  // This is not actually used, but is necessary for successfully matching the
859  // PseudoLA_*_LARGE nodes.
860  SDValue Tmp = DAG.getConstant(0, DL, Ty);
861  SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
862  SDValue Offset = Large
863                       ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
864                       : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
865
866  // Add the thread pointer.
867  return DAG.getNode(ISD::ADD, DL, Ty, Offset,
868                     DAG.getRegister(LoongArch::R2, GRLenVT));
869}
870
871SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
872                                                   SelectionDAG &DAG,
873                                                   unsigned Opc,
874                                                   bool Large) const {
875  SDLoc DL(N);
876  EVT Ty = getPointerTy(DAG.getDataLayout());
877  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
878
879  // This is not actually used, but is necessary for successfully matching the
880  // PseudoLA_*_LARGE nodes.
881  SDValue Tmp = DAG.getConstant(0, DL, Ty);
882
883  // Use a PC-relative addressing mode to access the dynamic GOT address.
884  SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
885  SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
886                       : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
887
888  // Prepare argument list to generate call.
889  ArgListTy Args;
890  ArgListEntry Entry;
891  Entry.Node = Load;
892  Entry.Ty = CallTy;
893  Args.push_back(Entry);
894
895  // Setup call to __tls_get_addr.
896  TargetLowering::CallLoweringInfo CLI(DAG);
897  CLI.setDebugLoc(DL)
898      .setChain(DAG.getEntryNode())
899      .setLibCallee(CallingConv::C, CallTy,
900                    DAG.getExternalSymbol("__tls_get_addr", Ty),
901                    std::move(Args));
902
903  return LowerCallTo(CLI).first;
904}
905
906SDValue
907LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
908                                               SelectionDAG &DAG) const {
909  if (DAG.getMachineFunction().getFunction().getCallingConv() ==
910      CallingConv::GHC)
911    report_fatal_error("In GHC calling convention TLS is not supported");
912
913  bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
914  assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
915
916  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
917  assert(N->getOffset() == 0 && "unexpected offset in global node");
918
919  SDValue Addr;
920  switch (getTargetMachine().getTLSModel(N->getGlobal())) {
921  case TLSModel::GeneralDynamic:
922    // In this model, application code calls the dynamic linker function
923    // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
924    // runtime.
925    Addr = getDynamicTLSAddr(N, DAG,
926                             Large ? LoongArch::PseudoLA_TLS_GD_LARGE
927                                   : LoongArch::PseudoLA_TLS_GD,
928                             Large);
929    break;
930  case TLSModel::LocalDynamic:
931    // Same as GeneralDynamic, except for assembly modifiers and relocation
932    // records.
933    Addr = getDynamicTLSAddr(N, DAG,
934                             Large ? LoongArch::PseudoLA_TLS_LD_LARGE
935                                   : LoongArch::PseudoLA_TLS_LD,
936                             Large);
937    break;
938  case TLSModel::InitialExec:
939    // This model uses the GOT to resolve TLS offsets.
940    Addr = getStaticTLSAddr(N, DAG,
941                            Large ? LoongArch::PseudoLA_TLS_IE_LARGE
942                                  : LoongArch::PseudoLA_TLS_IE,
943                            Large);
944    break;
945  case TLSModel::LocalExec:
946    // This model is used when static linking as the TLS offsets are resolved
947    // during program linking.
948    //
949    // This node doesn't need an extra argument for the large code model.
950    Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
951    break;
952  }
953
954  return Addr;
955}
956
957template <unsigned N>
958static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
959                                    SelectionDAG &DAG, bool IsSigned = false) {
960  auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
961  // Check the ImmArg.
962  if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
963      (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
964    DAG.getContext()->emitError(Op->getOperationName(0) +
965                                ": argument out of range.");
966    return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
967  }
968  return SDValue();
969}
970
971SDValue
972LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
973                                                 SelectionDAG &DAG) const {
974  SDLoc DL(Op);
975  switch (Op.getConstantOperandVal(0)) {
976  default:
977    return SDValue(); // Don't custom lower most intrinsics.
978  case Intrinsic::thread_pointer: {
979    EVT PtrVT = getPointerTy(DAG.getDataLayout());
980    return DAG.getRegister(LoongArch::R2, PtrVT);
981  }
982  case Intrinsic::loongarch_lsx_vpickve2gr_d:
983  case Intrinsic::loongarch_lsx_vpickve2gr_du:
984  case Intrinsic::loongarch_lsx_vreplvei_d:
985  case Intrinsic::loongarch_lasx_xvrepl128vei_d:
986    return checkIntrinsicImmArg<1>(Op, 2, DAG);
987  case Intrinsic::loongarch_lsx_vreplvei_w:
988  case Intrinsic::loongarch_lasx_xvrepl128vei_w:
989  case Intrinsic::loongarch_lasx_xvpickve2gr_d:
990  case Intrinsic::loongarch_lasx_xvpickve2gr_du:
991  case Intrinsic::loongarch_lasx_xvpickve_d:
992  case Intrinsic::loongarch_lasx_xvpickve_d_f:
993    return checkIntrinsicImmArg<2>(Op, 2, DAG);
994  case Intrinsic::loongarch_lasx_xvinsve0_d:
995    return checkIntrinsicImmArg<2>(Op, 3, DAG);
996  case Intrinsic::loongarch_lsx_vsat_b:
997  case Intrinsic::loongarch_lsx_vsat_bu:
998  case Intrinsic::loongarch_lsx_vrotri_b:
999  case Intrinsic::loongarch_lsx_vsllwil_h_b:
1000  case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1001  case Intrinsic::loongarch_lsx_vsrlri_b:
1002  case Intrinsic::loongarch_lsx_vsrari_b:
1003  case Intrinsic::loongarch_lsx_vreplvei_h:
1004  case Intrinsic::loongarch_lasx_xvsat_b:
1005  case Intrinsic::loongarch_lasx_xvsat_bu:
1006  case Intrinsic::loongarch_lasx_xvrotri_b:
1007  case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1008  case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1009  case Intrinsic::loongarch_lasx_xvsrlri_b:
1010  case Intrinsic::loongarch_lasx_xvsrari_b:
1011  case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1012  case Intrinsic::loongarch_lasx_xvpickve_w:
1013  case Intrinsic::loongarch_lasx_xvpickve_w_f:
1014    return checkIntrinsicImmArg<3>(Op, 2, DAG);
1015  case Intrinsic::loongarch_lasx_xvinsve0_w:
1016    return checkIntrinsicImmArg<3>(Op, 3, DAG);
1017  case Intrinsic::loongarch_lsx_vsat_h:
1018  case Intrinsic::loongarch_lsx_vsat_hu:
1019  case Intrinsic::loongarch_lsx_vrotri_h:
1020  case Intrinsic::loongarch_lsx_vsllwil_w_h:
1021  case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1022  case Intrinsic::loongarch_lsx_vsrlri_h:
1023  case Intrinsic::loongarch_lsx_vsrari_h:
1024  case Intrinsic::loongarch_lsx_vreplvei_b:
1025  case Intrinsic::loongarch_lasx_xvsat_h:
1026  case Intrinsic::loongarch_lasx_xvsat_hu:
1027  case Intrinsic::loongarch_lasx_xvrotri_h:
1028  case Intrinsic::loongarch_lasx_xvsllwil_w_h:
1029  case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
1030  case Intrinsic::loongarch_lasx_xvsrlri_h:
1031  case Intrinsic::loongarch_lasx_xvsrari_h:
1032  case Intrinsic::loongarch_lasx_xvrepl128vei_b:
1033    return checkIntrinsicImmArg<4>(Op, 2, DAG);
1034  case Intrinsic::loongarch_lsx_vsrlni_b_h:
1035  case Intrinsic::loongarch_lsx_vsrani_b_h:
1036  case Intrinsic::loongarch_lsx_vsrlrni_b_h:
1037  case Intrinsic::loongarch_lsx_vsrarni_b_h:
1038  case Intrinsic::loongarch_lsx_vssrlni_b_h:
1039  case Intrinsic::loongarch_lsx_vssrani_b_h:
1040  case Intrinsic::loongarch_lsx_vssrlni_bu_h:
1041  case Intrinsic::loongarch_lsx_vssrani_bu_h:
1042  case Intrinsic::loongarch_lsx_vssrlrni_b_h:
1043  case Intrinsic::loongarch_lsx_vssrarni_b_h:
1044  case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
1045  case Intrinsic::loongarch_lsx_vssrarni_bu_h:
1046  case Intrinsic::loongarch_lasx_xvsrlni_b_h:
1047  case Intrinsic::loongarch_lasx_xvsrani_b_h:
1048  case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
1049  case Intrinsic::loongarch_lasx_xvsrarni_b_h:
1050  case Intrinsic::loongarch_lasx_xvssrlni_b_h:
1051  case Intrinsic::loongarch_lasx_xvssrani_b_h:
1052  case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
1053  case Intrinsic::loongarch_lasx_xvssrani_bu_h:
1054  case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
1055  case Intrinsic::loongarch_lasx_xvssrarni_b_h:
1056  case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
1057  case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
1058    return checkIntrinsicImmArg<4>(Op, 3, DAG);
1059  case Intrinsic::loongarch_lsx_vsat_w:
1060  case Intrinsic::loongarch_lsx_vsat_wu:
1061  case Intrinsic::loongarch_lsx_vrotri_w:
1062  case Intrinsic::loongarch_lsx_vsllwil_d_w:
1063  case Intrinsic::loongarch_lsx_vsllwil_du_wu:
1064  case Intrinsic::loongarch_lsx_vsrlri_w:
1065  case Intrinsic::loongarch_lsx_vsrari_w:
1066  case Intrinsic::loongarch_lsx_vslei_bu:
1067  case Intrinsic::loongarch_lsx_vslei_hu:
1068  case Intrinsic::loongarch_lsx_vslei_wu:
1069  case Intrinsic::loongarch_lsx_vslei_du:
1070  case Intrinsic::loongarch_lsx_vslti_bu:
1071  case Intrinsic::loongarch_lsx_vslti_hu:
1072  case Intrinsic::loongarch_lsx_vslti_wu:
1073  case Intrinsic::loongarch_lsx_vslti_du:
1074  case Intrinsic::loongarch_lsx_vbsll_v:
1075  case Intrinsic::loongarch_lsx_vbsrl_v:
1076  case Intrinsic::loongarch_lasx_xvsat_w:
1077  case Intrinsic::loongarch_lasx_xvsat_wu:
1078  case Intrinsic::loongarch_lasx_xvrotri_w:
1079  case Intrinsic::loongarch_lasx_xvsllwil_d_w:
1080  case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
1081  case Intrinsic::loongarch_lasx_xvsrlri_w:
1082  case Intrinsic::loongarch_lasx_xvsrari_w:
1083  case Intrinsic::loongarch_lasx_xvslei_bu:
1084  case Intrinsic::loongarch_lasx_xvslei_hu:
1085  case Intrinsic::loongarch_lasx_xvslei_wu:
1086  case Intrinsic::loongarch_lasx_xvslei_du:
1087  case Intrinsic::loongarch_lasx_xvslti_bu:
1088  case Intrinsic::loongarch_lasx_xvslti_hu:
1089  case Intrinsic::loongarch_lasx_xvslti_wu:
1090  case Intrinsic::loongarch_lasx_xvslti_du:
1091  case Intrinsic::loongarch_lasx_xvbsll_v:
1092  case Intrinsic::loongarch_lasx_xvbsrl_v:
1093    return checkIntrinsicImmArg<5>(Op, 2, DAG);
1094  case Intrinsic::loongarch_lsx_vseqi_b:
1095  case Intrinsic::loongarch_lsx_vseqi_h:
1096  case Intrinsic::loongarch_lsx_vseqi_w:
1097  case Intrinsic::loongarch_lsx_vseqi_d:
1098  case Intrinsic::loongarch_lsx_vslei_b:
1099  case Intrinsic::loongarch_lsx_vslei_h:
1100  case Intrinsic::loongarch_lsx_vslei_w:
1101  case Intrinsic::loongarch_lsx_vslei_d:
1102  case Intrinsic::loongarch_lsx_vslti_b:
1103  case Intrinsic::loongarch_lsx_vslti_h:
1104  case Intrinsic::loongarch_lsx_vslti_w:
1105  case Intrinsic::loongarch_lsx_vslti_d:
1106  case Intrinsic::loongarch_lasx_xvseqi_b:
1107  case Intrinsic::loongarch_lasx_xvseqi_h:
1108  case Intrinsic::loongarch_lasx_xvseqi_w:
1109  case Intrinsic::loongarch_lasx_xvseqi_d:
1110  case Intrinsic::loongarch_lasx_xvslei_b:
1111  case Intrinsic::loongarch_lasx_xvslei_h:
1112  case Intrinsic::loongarch_lasx_xvslei_w:
1113  case Intrinsic::loongarch_lasx_xvslei_d:
1114  case Intrinsic::loongarch_lasx_xvslti_b:
1115  case Intrinsic::loongarch_lasx_xvslti_h:
1116  case Intrinsic::loongarch_lasx_xvslti_w:
1117  case Intrinsic::loongarch_lasx_xvslti_d:
1118    return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
1119  case Intrinsic::loongarch_lsx_vsrlni_h_w:
1120  case Intrinsic::loongarch_lsx_vsrani_h_w:
1121  case Intrinsic::loongarch_lsx_vsrlrni_h_w:
1122  case Intrinsic::loongarch_lsx_vsrarni_h_w:
1123  case Intrinsic::loongarch_lsx_vssrlni_h_w:
1124  case Intrinsic::loongarch_lsx_vssrani_h_w:
1125  case Intrinsic::loongarch_lsx_vssrlni_hu_w:
1126  case Intrinsic::loongarch_lsx_vssrani_hu_w:
1127  case Intrinsic::loongarch_lsx_vssrlrni_h_w:
1128  case Intrinsic::loongarch_lsx_vssrarni_h_w:
1129  case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
1130  case Intrinsic::loongarch_lsx_vssrarni_hu_w:
1131  case Intrinsic::loongarch_lsx_vfrstpi_b:
1132  case Intrinsic::loongarch_lsx_vfrstpi_h:
1133  case Intrinsic::loongarch_lasx_xvsrlni_h_w:
1134  case Intrinsic::loongarch_lasx_xvsrani_h_w:
1135  case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
1136  case Intrinsic::loongarch_lasx_xvsrarni_h_w:
1137  case Intrinsic::loongarch_lasx_xvssrlni_h_w:
1138  case Intrinsic::loongarch_lasx_xvssrani_h_w:
1139  case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
1140  case Intrinsic::loongarch_lasx_xvssrani_hu_w:
1141  case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
1142  case Intrinsic::loongarch_lasx_xvssrarni_h_w:
1143  case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
1144  case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
1145  case Intrinsic::loongarch_lasx_xvfrstpi_b:
1146  case Intrinsic::loongarch_lasx_xvfrstpi_h:
1147    return checkIntrinsicImmArg<5>(Op, 3, DAG);
1148  case Intrinsic::loongarch_lsx_vsat_d:
1149  case Intrinsic::loongarch_lsx_vsat_du:
1150  case Intrinsic::loongarch_lsx_vrotri_d:
1151  case Intrinsic::loongarch_lsx_vsrlri_d:
1152  case Intrinsic::loongarch_lsx_vsrari_d:
1153  case Intrinsic::loongarch_lasx_xvsat_d:
1154  case Intrinsic::loongarch_lasx_xvsat_du:
1155  case Intrinsic::loongarch_lasx_xvrotri_d:
1156  case Intrinsic::loongarch_lasx_xvsrlri_d:
1157  case Intrinsic::loongarch_lasx_xvsrari_d:
1158    return checkIntrinsicImmArg<6>(Op, 2, DAG);
1159  case Intrinsic::loongarch_lsx_vsrlni_w_d:
1160  case Intrinsic::loongarch_lsx_vsrani_w_d:
1161  case Intrinsic::loongarch_lsx_vsrlrni_w_d:
1162  case Intrinsic::loongarch_lsx_vsrarni_w_d:
1163  case Intrinsic::loongarch_lsx_vssrlni_w_d:
1164  case Intrinsic::loongarch_lsx_vssrani_w_d:
1165  case Intrinsic::loongarch_lsx_vssrlni_wu_d:
1166  case Intrinsic::loongarch_lsx_vssrani_wu_d:
1167  case Intrinsic::loongarch_lsx_vssrlrni_w_d:
1168  case Intrinsic::loongarch_lsx_vssrarni_w_d:
1169  case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
1170  case Intrinsic::loongarch_lsx_vssrarni_wu_d:
1171  case Intrinsic::loongarch_lasx_xvsrlni_w_d:
1172  case Intrinsic::loongarch_lasx_xvsrani_w_d:
1173  case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
1174  case Intrinsic::loongarch_lasx_xvsrarni_w_d:
1175  case Intrinsic::loongarch_lasx_xvssrlni_w_d:
1176  case Intrinsic::loongarch_lasx_xvssrani_w_d:
1177  case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
1178  case Intrinsic::loongarch_lasx_xvssrani_wu_d:
1179  case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
1180  case Intrinsic::loongarch_lasx_xvssrarni_w_d:
1181  case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
1182  case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
1183    return checkIntrinsicImmArg<6>(Op, 3, DAG);
1184  case Intrinsic::loongarch_lsx_vsrlni_d_q:
1185  case Intrinsic::loongarch_lsx_vsrani_d_q:
1186  case Intrinsic::loongarch_lsx_vsrlrni_d_q:
1187  case Intrinsic::loongarch_lsx_vsrarni_d_q:
1188  case Intrinsic::loongarch_lsx_vssrlni_d_q:
1189  case Intrinsic::loongarch_lsx_vssrani_d_q:
1190  case Intrinsic::loongarch_lsx_vssrlni_du_q:
1191  case Intrinsic::loongarch_lsx_vssrani_du_q:
1192  case Intrinsic::loongarch_lsx_vssrlrni_d_q:
1193  case Intrinsic::loongarch_lsx_vssrarni_d_q:
1194  case Intrinsic::loongarch_lsx_vssrlrni_du_q:
1195  case Intrinsic::loongarch_lsx_vssrarni_du_q:
1196  case Intrinsic::loongarch_lasx_xvsrlni_d_q:
1197  case Intrinsic::loongarch_lasx_xvsrani_d_q:
1198  case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
1199  case Intrinsic::loongarch_lasx_xvsrarni_d_q:
1200  case Intrinsic::loongarch_lasx_xvssrlni_d_q:
1201  case Intrinsic::loongarch_lasx_xvssrani_d_q:
1202  case Intrinsic::loongarch_lasx_xvssrlni_du_q:
1203  case Intrinsic::loongarch_lasx_xvssrani_du_q:
1204  case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
1205  case Intrinsic::loongarch_lasx_xvssrarni_d_q:
1206  case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
1207  case Intrinsic::loongarch_lasx_xvssrarni_du_q:
1208    return checkIntrinsicImmArg<7>(Op, 3, DAG);
1209  case Intrinsic::loongarch_lsx_vnori_b:
1210  case Intrinsic::loongarch_lsx_vshuf4i_b:
1211  case Intrinsic::loongarch_lsx_vshuf4i_h:
1212  case Intrinsic::loongarch_lsx_vshuf4i_w:
1213  case Intrinsic::loongarch_lasx_xvnori_b:
1214  case Intrinsic::loongarch_lasx_xvshuf4i_b:
1215  case Intrinsic::loongarch_lasx_xvshuf4i_h:
1216  case Intrinsic::loongarch_lasx_xvshuf4i_w:
1217  case Intrinsic::loongarch_lasx_xvpermi_d:
1218    return checkIntrinsicImmArg<8>(Op, 2, DAG);
1219  case Intrinsic::loongarch_lsx_vshuf4i_d:
1220  case Intrinsic::loongarch_lsx_vpermi_w:
1221  case Intrinsic::loongarch_lsx_vbitseli_b:
1222  case Intrinsic::loongarch_lsx_vextrins_b:
1223  case Intrinsic::loongarch_lsx_vextrins_h:
1224  case Intrinsic::loongarch_lsx_vextrins_w:
1225  case Intrinsic::loongarch_lsx_vextrins_d:
1226  case Intrinsic::loongarch_lasx_xvshuf4i_d:
1227  case Intrinsic::loongarch_lasx_xvpermi_w:
1228  case Intrinsic::loongarch_lasx_xvpermi_q:
1229  case Intrinsic::loongarch_lasx_xvbitseli_b:
1230  case Intrinsic::loongarch_lasx_xvextrins_b:
1231  case Intrinsic::loongarch_lasx_xvextrins_h:
1232  case Intrinsic::loongarch_lasx_xvextrins_w:
1233  case Intrinsic::loongarch_lasx_xvextrins_d:
1234    return checkIntrinsicImmArg<8>(Op, 3, DAG);
1235  case Intrinsic::loongarch_lsx_vrepli_b:
1236  case Intrinsic::loongarch_lsx_vrepli_h:
1237  case Intrinsic::loongarch_lsx_vrepli_w:
1238  case Intrinsic::loongarch_lsx_vrepli_d:
1239  case Intrinsic::loongarch_lasx_xvrepli_b:
1240  case Intrinsic::loongarch_lasx_xvrepli_h:
1241  case Intrinsic::loongarch_lasx_xvrepli_w:
1242  case Intrinsic::loongarch_lasx_xvrepli_d:
1243    return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
1244  case Intrinsic::loongarch_lsx_vldi:
1245  case Intrinsic::loongarch_lasx_xvldi:
1246    return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
1247  }
1248}
1249
1250// Helper function that emits error message for intrinsics with chain and return
1251// merge values of a UNDEF and the chain.
1252static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
1253                                                  StringRef ErrorMsg,
1254                                                  SelectionDAG &DAG) {
1255  DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1256  return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
1257                            SDLoc(Op));
1258}
1259
1260SDValue
1261LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1262                                                SelectionDAG &DAG) const {
1263  SDLoc DL(Op);
1264  MVT GRLenVT = Subtarget.getGRLenVT();
1265  EVT VT = Op.getValueType();
1266  SDValue Chain = Op.getOperand(0);
1267  const StringRef ErrorMsgOOR = "argument out of range";
1268  const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1269  const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1270
1271  switch (Op.getConstantOperandVal(1)) {
1272  default:
1273    return Op;
1274  case Intrinsic::loongarch_crc_w_b_w:
1275  case Intrinsic::loongarch_crc_w_h_w:
1276  case Intrinsic::loongarch_crc_w_w_w:
1277  case Intrinsic::loongarch_crc_w_d_w:
1278  case Intrinsic::loongarch_crcc_w_b_w:
1279  case Intrinsic::loongarch_crcc_w_h_w:
1280  case Intrinsic::loongarch_crcc_w_w_w:
1281  case Intrinsic::loongarch_crcc_w_d_w:
1282    return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
1283  case Intrinsic::loongarch_csrrd_w:
1284  case Intrinsic::loongarch_csrrd_d: {
1285    unsigned Imm = Op.getConstantOperandVal(2);
1286    return !isUInt<14>(Imm)
1287               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1288               : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1289                             {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1290  }
1291  case Intrinsic::loongarch_csrwr_w:
1292  case Intrinsic::loongarch_csrwr_d: {
1293    unsigned Imm = Op.getConstantOperandVal(3);
1294    return !isUInt<14>(Imm)
1295               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1296               : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1297                             {Chain, Op.getOperand(2),
1298                              DAG.getConstant(Imm, DL, GRLenVT)});
1299  }
1300  case Intrinsic::loongarch_csrxchg_w:
1301  case Intrinsic::loongarch_csrxchg_d: {
1302    unsigned Imm = Op.getConstantOperandVal(4);
1303    return !isUInt<14>(Imm)
1304               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1305               : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1306                             {Chain, Op.getOperand(2), Op.getOperand(3),
1307                              DAG.getConstant(Imm, DL, GRLenVT)});
1308  }
1309  case Intrinsic::loongarch_iocsrrd_d: {
1310    return DAG.getNode(
1311        LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1312        {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1313  }
1314#define IOCSRRD_CASE(NAME, NODE)                                               \
1315  case Intrinsic::loongarch_##NAME: {                                          \
1316    return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other},          \
1317                       {Chain, Op.getOperand(2)});                             \
1318  }
1319    IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1320    IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1321    IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1322#undef IOCSRRD_CASE
1323  case Intrinsic::loongarch_cpucfg: {
1324    return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1325                       {Chain, Op.getOperand(2)});
1326  }
1327  case Intrinsic::loongarch_lddir_d: {
1328    unsigned Imm = Op.getConstantOperandVal(3);
1329    return !isUInt<8>(Imm)
1330               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1331               : Op;
1332  }
1333  case Intrinsic::loongarch_movfcsr2gr: {
1334    if (!Subtarget.hasBasicF())
1335      return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
1336    unsigned Imm = Op.getConstantOperandVal(2);
1337    return !isUInt<2>(Imm)
1338               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1339               : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1340                             {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1341  }
1342  case Intrinsic::loongarch_lsx_vld:
1343  case Intrinsic::loongarch_lsx_vldrepl_b:
1344  case Intrinsic::loongarch_lasx_xvld:
1345  case Intrinsic::loongarch_lasx_xvldrepl_b:
1346    return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1347               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1348               : SDValue();
1349  case Intrinsic::loongarch_lsx_vldrepl_h:
1350  case Intrinsic::loongarch_lasx_xvldrepl_h:
1351    return !isShiftedInt<11, 1>(
1352               cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1353               ? emitIntrinsicWithChainErrorMessage(
1354                     Op, "argument out of range or not a multiple of 2", DAG)
1355               : SDValue();
1356  case Intrinsic::loongarch_lsx_vldrepl_w:
1357  case Intrinsic::loongarch_lasx_xvldrepl_w:
1358    return !isShiftedInt<10, 2>(
1359               cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1360               ? emitIntrinsicWithChainErrorMessage(
1361                     Op, "argument out of range or not a multiple of 4", DAG)
1362               : SDValue();
1363  case Intrinsic::loongarch_lsx_vldrepl_d:
1364  case Intrinsic::loongarch_lasx_xvldrepl_d:
1365    return !isShiftedInt<9, 3>(
1366               cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1367               ? emitIntrinsicWithChainErrorMessage(
1368                     Op, "argument out of range or not a multiple of 8", DAG)
1369               : SDValue();
1370  }
1371}
1372
1373// Helper function that emits error message for intrinsics with void return
1374// value and return the chain.
1375static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
1376                                         SelectionDAG &DAG) {
1377
1378  DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1379  return Op.getOperand(0);
1380}
1381
1382SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1383                                                     SelectionDAG &DAG) const {
1384  SDLoc DL(Op);
1385  MVT GRLenVT = Subtarget.getGRLenVT();
1386  SDValue Chain = Op.getOperand(0);
1387  uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
1388  SDValue Op2 = Op.getOperand(2);
1389  const StringRef ErrorMsgOOR = "argument out of range";
1390  const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1391  const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1392  const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1393
1394  switch (IntrinsicEnum) {
1395  default:
1396    // TODO: Add more Intrinsics.
1397    return SDValue();
1398  case Intrinsic::loongarch_cacop_d:
1399  case Intrinsic::loongarch_cacop_w: {
1400    if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1401      return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
1402    if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1403      return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
1404    // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1405    unsigned Imm1 = Op2->getAsZExtVal();
1406    int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
1407    if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
1408      return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
1409    return Op;
1410  }
1411  case Intrinsic::loongarch_dbar: {
1412    unsigned Imm = Op2->getAsZExtVal();
1413    return !isUInt<15>(Imm)
1414               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1415               : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1416                             DAG.getConstant(Imm, DL, GRLenVT));
1417  }
1418  case Intrinsic::loongarch_ibar: {
1419    unsigned Imm = Op2->getAsZExtVal();
1420    return !isUInt<15>(Imm)
1421               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1422               : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1423                             DAG.getConstant(Imm, DL, GRLenVT));
1424  }
1425  case Intrinsic::loongarch_break: {
1426    unsigned Imm = Op2->getAsZExtVal();
1427    return !isUInt<15>(Imm)
1428               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1429               : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1430                             DAG.getConstant(Imm, DL, GRLenVT));
1431  }
1432  case Intrinsic::loongarch_movgr2fcsr: {
1433    if (!Subtarget.hasBasicF())
1434      return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
1435    unsigned Imm = Op2->getAsZExtVal();
1436    return !isUInt<2>(Imm)
1437               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1438               : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1439                             DAG.getConstant(Imm, DL, GRLenVT),
1440                             DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1441                                         Op.getOperand(3)));
1442  }
1443  case Intrinsic::loongarch_syscall: {
1444    unsigned Imm = Op2->getAsZExtVal();
1445    return !isUInt<15>(Imm)
1446               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1447               : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1448                             DAG.getConstant(Imm, DL, GRLenVT));
1449  }
1450#define IOCSRWR_CASE(NAME, NODE)                                               \
1451  case Intrinsic::loongarch_##NAME: {                                          \
1452    SDValue Op3 = Op.getOperand(3);                                            \
1453    return Subtarget.is64Bit()                                                 \
1454               ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain,        \
1455                             DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),  \
1456                             DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3))  \
1457               : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2,   \
1458                             Op3);                                             \
1459  }
1460    IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1461    IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1462    IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1463#undef IOCSRWR_CASE
1464  case Intrinsic::loongarch_iocsrwr_d: {
1465    return !Subtarget.is64Bit()
1466               ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1467               : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1468                             Op2,
1469                             DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1470                                         Op.getOperand(3)));
1471  }
1472#define ASRT_LE_GT_CASE(NAME)                                                  \
1473  case Intrinsic::loongarch_##NAME: {                                          \
1474    return !Subtarget.is64Bit()                                                \
1475               ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)           \
1476               : Op;                                                           \
1477  }
1478    ASRT_LE_GT_CASE(asrtle_d)
1479    ASRT_LE_GT_CASE(asrtgt_d)
1480#undef ASRT_LE_GT_CASE
1481  case Intrinsic::loongarch_ldpte_d: {
1482    unsigned Imm = Op.getConstantOperandVal(3);
1483    return !Subtarget.is64Bit()
1484               ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1485           : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1486                             : Op;
1487  }
1488  case Intrinsic::loongarch_lsx_vst:
1489  case Intrinsic::loongarch_lasx_xvst:
1490    return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
1491               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1492               : SDValue();
1493  case Intrinsic::loongarch_lasx_xvstelm_b:
1494    return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1495            !isUInt<5>(Op.getConstantOperandVal(5)))
1496               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1497               : SDValue();
1498  case Intrinsic::loongarch_lsx_vstelm_b:
1499    return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1500            !isUInt<4>(Op.getConstantOperandVal(5)))
1501               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1502               : SDValue();
1503  case Intrinsic::loongarch_lasx_xvstelm_h:
1504    return (!isShiftedInt<8, 1>(
1505                cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1506            !isUInt<4>(Op.getConstantOperandVal(5)))
1507               ? emitIntrinsicErrorMessage(
1508                     Op, "argument out of range or not a multiple of 2", DAG)
1509               : SDValue();
1510  case Intrinsic::loongarch_lsx_vstelm_h:
1511    return (!isShiftedInt<8, 1>(
1512                cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1513            !isUInt<3>(Op.getConstantOperandVal(5)))
1514               ? emitIntrinsicErrorMessage(
1515                     Op, "argument out of range or not a multiple of 2", DAG)
1516               : SDValue();
1517  case Intrinsic::loongarch_lasx_xvstelm_w:
1518    return (!isShiftedInt<8, 2>(
1519                cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1520            !isUInt<3>(Op.getConstantOperandVal(5)))
1521               ? emitIntrinsicErrorMessage(
1522                     Op, "argument out of range or not a multiple of 4", DAG)
1523               : SDValue();
1524  case Intrinsic::loongarch_lsx_vstelm_w:
1525    return (!isShiftedInt<8, 2>(
1526                cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1527            !isUInt<2>(Op.getConstantOperandVal(5)))
1528               ? emitIntrinsicErrorMessage(
1529                     Op, "argument out of range or not a multiple of 4", DAG)
1530               : SDValue();
1531  case Intrinsic::loongarch_lasx_xvstelm_d:
1532    return (!isShiftedInt<8, 3>(
1533                cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1534            !isUInt<2>(Op.getConstantOperandVal(5)))
1535               ? emitIntrinsicErrorMessage(
1536                     Op, "argument out of range or not a multiple of 8", DAG)
1537               : SDValue();
1538  case Intrinsic::loongarch_lsx_vstelm_d:
1539    return (!isShiftedInt<8, 3>(
1540                cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1541            !isUInt<1>(Op.getConstantOperandVal(5)))
1542               ? emitIntrinsicErrorMessage(
1543                     Op, "argument out of range or not a multiple of 8", DAG)
1544               : SDValue();
1545  }
1546}
1547
1548SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1549                                                     SelectionDAG &DAG) const {
1550  SDLoc DL(Op);
1551  SDValue Lo = Op.getOperand(0);
1552  SDValue Hi = Op.getOperand(1);
1553  SDValue Shamt = Op.getOperand(2);
1554  EVT VT = Lo.getValueType();
1555
1556  // if Shamt-GRLen < 0: // Shamt < GRLen
1557  //   Lo = Lo << Shamt
1558  //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1559  // else:
1560  //   Lo = 0
1561  //   Hi = Lo << (Shamt-GRLen)
1562
1563  SDValue Zero = DAG.getConstant(0, DL, VT);
1564  SDValue One = DAG.getConstant(1, DL, VT);
1565  SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1566  SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1567  SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1568  SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1569
1570  SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1571  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1572  SDValue ShiftRightLo =
1573      DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
1574  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1575  SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1576  SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
1577
1578  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1579
1580  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1581  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1582
1583  SDValue Parts[2] = {Lo, Hi};
1584  return DAG.getMergeValues(Parts, DL);
1585}
1586
1587SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1588                                                      SelectionDAG &DAG,
1589                                                      bool IsSRA) const {
1590  SDLoc DL(Op);
1591  SDValue Lo = Op.getOperand(0);
1592  SDValue Hi = Op.getOperand(1);
1593  SDValue Shamt = Op.getOperand(2);
1594  EVT VT = Lo.getValueType();
1595
1596  // SRA expansion:
1597  //   if Shamt-GRLen < 0: // Shamt < GRLen
1598  //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1599  //     Hi = Hi >>s Shamt
1600  //   else:
1601  //     Lo = Hi >>s (Shamt-GRLen);
1602  //     Hi = Hi >>s (GRLen-1)
1603  //
1604  // SRL expansion:
1605  //   if Shamt-GRLen < 0: // Shamt < GRLen
1606  //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1607  //     Hi = Hi >>u Shamt
1608  //   else:
1609  //     Lo = Hi >>u (Shamt-GRLen);
1610  //     Hi = 0;
1611
1612  unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1613
1614  SDValue Zero = DAG.getConstant(0, DL, VT);
1615  SDValue One = DAG.getConstant(1, DL, VT);
1616  SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1617  SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1618  SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1619  SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1620
1621  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1622  SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1623  SDValue ShiftLeftHi =
1624      DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
1625  SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1626  SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1627  SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
1628  SDValue HiFalse =
1629      IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
1630
1631  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1632
1633  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1634  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1635
1636  SDValue Parts[2] = {Lo, Hi};
1637  return DAG.getMergeValues(Parts, DL);
1638}
1639
1640// Returns the opcode of the target-specific SDNode that implements the 32-bit
1641// form of the given Opcode.
1642static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
1643  switch (Opcode) {
1644  default:
1645    llvm_unreachable("Unexpected opcode");
1646  case ISD::SHL:
1647    return LoongArchISD::SLL_W;
1648  case ISD::SRA:
1649    return LoongArchISD::SRA_W;
1650  case ISD::SRL:
1651    return LoongArchISD::SRL_W;
1652  case ISD::ROTR:
1653    return LoongArchISD::ROTR_W;
1654  case ISD::ROTL:
1655    return LoongArchISD::ROTL_W;
1656  case ISD::CTTZ:
1657    return LoongArchISD::CTZ_W;
1658  case ISD::CTLZ:
1659    return LoongArchISD::CLZ_W;
1660  }
1661}
1662
1663// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1664// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1665// otherwise be promoted to i64, making it difficult to select the
1666// SLL_W/.../*W later one because the fact the operation was originally of
1667// type i8/i16/i32 is lost.
1668static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
1669                                   unsigned ExtOpc = ISD::ANY_EXTEND) {
1670  SDLoc DL(N);
1671  LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1672  SDValue NewOp0, NewRes;
1673
1674  switch (NumOp) {
1675  default:
1676    llvm_unreachable("Unexpected NumOp");
1677  case 1: {
1678    NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1679    NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1680    break;
1681  }
1682  case 2: {
1683    NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1684    SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1685    NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1686    break;
1687  }
1688    // TODO:Handle more NumOp.
1689  }
1690
1691  // ReplaceNodeResults requires we maintain the same type for the return
1692  // value.
1693  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1694}
1695
1696// Helper function that emits error message for intrinsics with/without chain
1697// and return a UNDEF or and the chain as the results.
1698static void emitErrorAndReplaceIntrinsicResults(
1699    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
1700    StringRef ErrorMsg, bool WithChain = true) {
1701  DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1702  Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1703  if (!WithChain)
1704    return;
1705  Results.push_back(N->getOperand(0));
1706}
1707
1708template <unsigned N>
1709static void
1710replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
1711                         SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1712                         unsigned ResOp) {
1713  const StringRef ErrorMsgOOR = "argument out of range";
1714  unsigned Imm = Node->getConstantOperandVal(2);
1715  if (!isUInt<N>(Imm)) {
1716    emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
1717                                        /*WithChain=*/false);
1718    return;
1719  }
1720  SDLoc DL(Node);
1721  SDValue Vec = Node->getOperand(1);
1722
1723  SDValue PickElt =
1724      DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
1725                  DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
1726                  DAG.getValueType(Vec.getValueType().getVectorElementType()));
1727  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
1728                                PickElt.getValue(0)));
1729}
1730
1731static void replaceVecCondBranchResults(SDNode *N,
1732                                        SmallVectorImpl<SDValue> &Results,
1733                                        SelectionDAG &DAG,
1734                                        const LoongArchSubtarget &Subtarget,
1735                                        unsigned ResOp) {
1736  SDLoc DL(N);
1737  SDValue Vec = N->getOperand(1);
1738
1739  SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
1740  Results.push_back(
1741      DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
1742}
1743
1744static void
1745replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1746                                 SelectionDAG &DAG,
1747                                 const LoongArchSubtarget &Subtarget) {
1748  switch (N->getConstantOperandVal(0)) {
1749  default:
1750    llvm_unreachable("Unexpected Intrinsic.");
1751  case Intrinsic::loongarch_lsx_vpickve2gr_b:
1752    replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1753                                LoongArchISD::VPICK_SEXT_ELT);
1754    break;
1755  case Intrinsic::loongarch_lsx_vpickve2gr_h:
1756  case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1757    replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1758                                LoongArchISD::VPICK_SEXT_ELT);
1759    break;
1760  case Intrinsic::loongarch_lsx_vpickve2gr_w:
1761    replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1762                                LoongArchISD::VPICK_SEXT_ELT);
1763    break;
1764  case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1765    replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1766                                LoongArchISD::VPICK_ZEXT_ELT);
1767    break;
1768  case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1769  case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1770    replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1771                                LoongArchISD::VPICK_ZEXT_ELT);
1772    break;
1773  case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1774    replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1775                                LoongArchISD::VPICK_ZEXT_ELT);
1776    break;
1777  case Intrinsic::loongarch_lsx_bz_b:
1778  case Intrinsic::loongarch_lsx_bz_h:
1779  case Intrinsic::loongarch_lsx_bz_w:
1780  case Intrinsic::loongarch_lsx_bz_d:
1781  case Intrinsic::loongarch_lasx_xbz_b:
1782  case Intrinsic::loongarch_lasx_xbz_h:
1783  case Intrinsic::loongarch_lasx_xbz_w:
1784  case Intrinsic::loongarch_lasx_xbz_d:
1785    replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1786                                LoongArchISD::VALL_ZERO);
1787    break;
1788  case Intrinsic::loongarch_lsx_bz_v:
1789  case Intrinsic::loongarch_lasx_xbz_v:
1790    replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1791                                LoongArchISD::VANY_ZERO);
1792    break;
1793  case Intrinsic::loongarch_lsx_bnz_b:
1794  case Intrinsic::loongarch_lsx_bnz_h:
1795  case Intrinsic::loongarch_lsx_bnz_w:
1796  case Intrinsic::loongarch_lsx_bnz_d:
1797  case Intrinsic::loongarch_lasx_xbnz_b:
1798  case Intrinsic::loongarch_lasx_xbnz_h:
1799  case Intrinsic::loongarch_lasx_xbnz_w:
1800  case Intrinsic::loongarch_lasx_xbnz_d:
1801    replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1802                                LoongArchISD::VALL_NONZERO);
1803    break;
1804  case Intrinsic::loongarch_lsx_bnz_v:
1805  case Intrinsic::loongarch_lasx_xbnz_v:
1806    replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1807                                LoongArchISD::VANY_NONZERO);
1808    break;
1809  }
1810}
1811
1812void LoongArchTargetLowering::ReplaceNodeResults(
1813    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1814  SDLoc DL(N);
1815  EVT VT = N->getValueType(0);
1816  switch (N->getOpcode()) {
1817  default:
1818    llvm_unreachable("Don't know how to legalize this operation");
1819  case ISD::SHL:
1820  case ISD::SRA:
1821  case ISD::SRL:
1822  case ISD::ROTR:
1823    assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1824           "Unexpected custom legalisation");
1825    if (N->getOperand(1).getOpcode() != ISD::Constant) {
1826      Results.push_back(customLegalizeToWOp(N, DAG, 2));
1827      break;
1828    }
1829    break;
1830  case ISD::ROTL:
1831    ConstantSDNode *CN;
1832    if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1833      Results.push_back(customLegalizeToWOp(N, DAG, 2));
1834      break;
1835    }
1836    break;
1837  case ISD::FP_TO_SINT: {
1838    assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1839           "Unexpected custom legalisation");
1840    SDValue Src = N->getOperand(0);
1841    EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1842    if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1843        TargetLowering::TypeSoftenFloat) {
1844      SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1845      Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1846      return;
1847    }
1848    // If the FP type needs to be softened, emit a library call using the 'si'
1849    // version. If we left it to default legalization we'd end up with 'di'.
1850    RTLIB::Libcall LC;
1851    LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1852    MakeLibCallOptions CallOptions;
1853    EVT OpVT = Src.getValueType();
1854    CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1855    SDValue Chain = SDValue();
1856    SDValue Result;
1857    std::tie(Result, Chain) =
1858        makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1859    Results.push_back(Result);
1860    break;
1861  }
1862  case ISD::BITCAST: {
1863    SDValue Src = N->getOperand(0);
1864    EVT SrcVT = Src.getValueType();
1865    if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1866        Subtarget.hasBasicF()) {
1867      SDValue Dst =
1868          DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1869      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1870    }
1871    break;
1872  }
1873  case ISD::FP_TO_UINT: {
1874    assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1875           "Unexpected custom legalisation");
1876    auto &TLI = DAG.getTargetLoweringInfo();
1877    SDValue Tmp1, Tmp2;
1878    TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1879    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1880    break;
1881  }
1882  case ISD::BSWAP: {
1883    SDValue Src = N->getOperand(0);
1884    assert((VT == MVT::i16 || VT == MVT::i32) &&
1885           "Unexpected custom legalization");
1886    MVT GRLenVT = Subtarget.getGRLenVT();
1887    SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1888    SDValue Tmp;
1889    switch (VT.getSizeInBits()) {
1890    default:
1891      llvm_unreachable("Unexpected operand width");
1892    case 16:
1893      Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1894      break;
1895    case 32:
1896      // Only LA64 will get to here due to the size mismatch between VT and
1897      // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1898      Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1899      break;
1900    }
1901    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1902    break;
1903  }
1904  case ISD::BITREVERSE: {
1905    SDValue Src = N->getOperand(0);
1906    assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1907           "Unexpected custom legalization");
1908    MVT GRLenVT = Subtarget.getGRLenVT();
1909    SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1910    SDValue Tmp;
1911    switch (VT.getSizeInBits()) {
1912    default:
1913      llvm_unreachable("Unexpected operand width");
1914    case 8:
1915      Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1916      break;
1917    case 32:
1918      Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1919      break;
1920    }
1921    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1922    break;
1923  }
1924  case ISD::CTLZ:
1925  case ISD::CTTZ: {
1926    assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1927           "Unexpected custom legalisation");
1928    Results.push_back(customLegalizeToWOp(N, DAG, 1));
1929    break;
1930  }
1931  case ISD::INTRINSIC_W_CHAIN: {
1932    SDValue Chain = N->getOperand(0);
1933    SDValue Op2 = N->getOperand(2);
1934    MVT GRLenVT = Subtarget.getGRLenVT();
1935    const StringRef ErrorMsgOOR = "argument out of range";
1936    const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1937    const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1938
1939    switch (N->getConstantOperandVal(1)) {
1940    default:
1941      llvm_unreachable("Unexpected Intrinsic.");
1942    case Intrinsic::loongarch_movfcsr2gr: {
1943      if (!Subtarget.hasBasicF()) {
1944        emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
1945        return;
1946      }
1947      unsigned Imm = Op2->getAsZExtVal();
1948      if (!isUInt<2>(Imm)) {
1949        emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1950        return;
1951      }
1952      SDValue MOVFCSR2GRResults = DAG.getNode(
1953          LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1954          {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1955      Results.push_back(
1956          DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
1957      Results.push_back(MOVFCSR2GRResults.getValue(1));
1958      break;
1959    }
1960#define CRC_CASE_EXT_BINARYOP(NAME, NODE)                                      \
1961  case Intrinsic::loongarch_##NAME: {                                          \
1962    SDValue NODE = DAG.getNode(                                                \
1963        LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
1964        {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),               \
1965         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
1966    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
1967    Results.push_back(NODE.getValue(1));                                       \
1968    break;                                                                     \
1969  }
1970      CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1971      CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1972      CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1973      CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1974      CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1975      CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1976#undef CRC_CASE_EXT_BINARYOP
1977
1978#define CRC_CASE_EXT_UNARYOP(NAME, NODE)                                       \
1979  case Intrinsic::loongarch_##NAME: {                                          \
1980    SDValue NODE = DAG.getNode(                                                \
1981        LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
1982        {Chain, Op2,                                                           \
1983         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
1984    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
1985    Results.push_back(NODE.getValue(1));                                       \
1986    break;                                                                     \
1987  }
1988      CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1989      CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1990#undef CRC_CASE_EXT_UNARYOP
1991#define CSR_CASE(ID)                                                           \
1992  case Intrinsic::loongarch_##ID: {                                            \
1993    if (!Subtarget.is64Bit())                                                  \
1994      emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);   \
1995    break;                                                                     \
1996  }
1997      CSR_CASE(csrrd_d);
1998      CSR_CASE(csrwr_d);
1999      CSR_CASE(csrxchg_d);
2000      CSR_CASE(iocsrrd_d);
2001#undef CSR_CASE
2002    case Intrinsic::loongarch_csrrd_w: {
2003      unsigned Imm = Op2->getAsZExtVal();
2004      if (!isUInt<14>(Imm)) {
2005        emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2006        return;
2007      }
2008      SDValue CSRRDResults =
2009          DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2010                      {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2011      Results.push_back(
2012          DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
2013      Results.push_back(CSRRDResults.getValue(1));
2014      break;
2015    }
2016    case Intrinsic::loongarch_csrwr_w: {
2017      unsigned Imm = N->getConstantOperandVal(3);
2018      if (!isUInt<14>(Imm)) {
2019        emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2020        return;
2021      }
2022      SDValue CSRWRResults =
2023          DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2024                      {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2025                       DAG.getConstant(Imm, DL, GRLenVT)});
2026      Results.push_back(
2027          DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
2028      Results.push_back(CSRWRResults.getValue(1));
2029      break;
2030    }
2031    case Intrinsic::loongarch_csrxchg_w: {
2032      unsigned Imm = N->getConstantOperandVal(4);
2033      if (!isUInt<14>(Imm)) {
2034        emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2035        return;
2036      }
2037      SDValue CSRXCHGResults = DAG.getNode(
2038          LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2039          {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2040           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
2041           DAG.getConstant(Imm, DL, GRLenVT)});
2042      Results.push_back(
2043          DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
2044      Results.push_back(CSRXCHGResults.getValue(1));
2045      break;
2046    }
2047#define IOCSRRD_CASE(NAME, NODE)                                               \
2048  case Intrinsic::loongarch_##NAME: {                                          \
2049    SDValue IOCSRRDResults =                                                   \
2050        DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},            \
2051                    {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
2052    Results.push_back(                                                         \
2053        DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0)));       \
2054    Results.push_back(IOCSRRDResults.getValue(1));                             \
2055    break;                                                                     \
2056  }
2057      IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2058      IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2059      IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2060#undef IOCSRRD_CASE
2061    case Intrinsic::loongarch_cpucfg: {
2062      SDValue CPUCFGResults =
2063          DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2064                      {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
2065      Results.push_back(
2066          DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
2067      Results.push_back(CPUCFGResults.getValue(1));
2068      break;
2069    }
2070    case Intrinsic::loongarch_lddir_d: {
2071      if (!Subtarget.is64Bit()) {
2072        emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
2073        return;
2074      }
2075      break;
2076    }
2077    }
2078    break;
2079  }
2080  case ISD::READ_REGISTER: {
2081    if (Subtarget.is64Bit())
2082      DAG.getContext()->emitError(
2083          "On LA64, only 64-bit registers can be read.");
2084    else
2085      DAG.getContext()->emitError(
2086          "On LA32, only 32-bit registers can be read.");
2087    Results.push_back(DAG.getUNDEF(VT));
2088    Results.push_back(N->getOperand(0));
2089    break;
2090  }
2091  case ISD::INTRINSIC_WO_CHAIN: {
2092    replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
2093    break;
2094  }
2095  }
2096}
2097
2098static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
2099                                 TargetLowering::DAGCombinerInfo &DCI,
2100                                 const LoongArchSubtarget &Subtarget) {
2101  if (DCI.isBeforeLegalizeOps())
2102    return SDValue();
2103
2104  SDValue FirstOperand = N->getOperand(0);
2105  SDValue SecondOperand = N->getOperand(1);
2106  unsigned FirstOperandOpc = FirstOperand.getOpcode();
2107  EVT ValTy = N->getValueType(0);
2108  SDLoc DL(N);
2109  uint64_t lsb, msb;
2110  unsigned SMIdx, SMLen;
2111  ConstantSDNode *CN;
2112  SDValue NewOperand;
2113  MVT GRLenVT = Subtarget.getGRLenVT();
2114
2115  // Op's second operand must be a shifted mask.
2116  if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
2117      !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
2118    return SDValue();
2119
2120  if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
2121    // Pattern match BSTRPICK.
2122    //  $dst = and ((sra or srl) $src , lsb), (2**len - 1)
2123    //  => BSTRPICK $dst, $src, msb, lsb
2124    //  where msb = lsb + len - 1
2125
2126    // The second operand of the shift must be an immediate.
2127    if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
2128      return SDValue();
2129
2130    lsb = CN->getZExtValue();
2131
2132    // Return if the shifted mask does not start at bit 0 or the sum of its
2133    // length and lsb exceeds the word's size.
2134    if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
2135      return SDValue();
2136
2137    NewOperand = FirstOperand.getOperand(0);
2138  } else {
2139    // Pattern match BSTRPICK.
2140    //  $dst = and $src, (2**len- 1) , if len > 12
2141    //  => BSTRPICK $dst, $src, msb, lsb
2142    //  where lsb = 0 and msb = len - 1
2143
2144    // If the mask is <= 0xfff, andi can be used instead.
2145    if (CN->getZExtValue() <= 0xfff)
2146      return SDValue();
2147
2148    // Return if the MSB exceeds.
2149    if (SMIdx + SMLen > ValTy.getSizeInBits())
2150      return SDValue();
2151
2152    if (SMIdx > 0) {
2153      // Omit if the constant has more than 2 uses. This a conservative
2154      // decision. Whether it is a win depends on the HW microarchitecture.
2155      // However it should always be better for 1 and 2 uses.
2156      if (CN->use_size() > 2)
2157        return SDValue();
2158      // Return if the constant can be composed by a single LU12I.W.
2159      if ((CN->getZExtValue() & 0xfff) == 0)
2160        return SDValue();
2161      // Return if the constand can be composed by a single ADDI with
2162      // the zero register.
2163      if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
2164        return SDValue();
2165    }
2166
2167    lsb = SMIdx;
2168    NewOperand = FirstOperand;
2169  }
2170
2171  msb = lsb + SMLen - 1;
2172  SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
2173                            DAG.getConstant(msb, DL, GRLenVT),
2174                            DAG.getConstant(lsb, DL, GRLenVT));
2175  if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
2176    return NR0;
2177  // Try to optimize to
2178  //   bstrpick $Rd, $Rs, msb, lsb
2179  //   slli     $Rd, $Rd, lsb
2180  return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
2181                     DAG.getConstant(lsb, DL, GRLenVT));
2182}
2183
2184static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
2185                                 TargetLowering::DAGCombinerInfo &DCI,
2186                                 const LoongArchSubtarget &Subtarget) {
2187  if (DCI.isBeforeLegalizeOps())
2188    return SDValue();
2189
2190  // $dst = srl (and $src, Mask), Shamt
2191  // =>
2192  // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
2193  // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
2194  //
2195
2196  SDValue FirstOperand = N->getOperand(0);
2197  ConstantSDNode *CN;
2198  EVT ValTy = N->getValueType(0);
2199  SDLoc DL(N);
2200  MVT GRLenVT = Subtarget.getGRLenVT();
2201  unsigned MaskIdx, MaskLen;
2202  uint64_t Shamt;
2203
2204  // The first operand must be an AND and the second operand of the AND must be
2205  // a shifted mask.
2206  if (FirstOperand.getOpcode() != ISD::AND ||
2207      !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
2208      !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
2209    return SDValue();
2210
2211  // The second operand (shift amount) must be an immediate.
2212  if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
2213    return SDValue();
2214
2215  Shamt = CN->getZExtValue();
2216  if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
2217    return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
2218                       FirstOperand->getOperand(0),
2219                       DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2220                       DAG.getConstant(Shamt, DL, GRLenVT));
2221
2222  return SDValue();
2223}
2224
2225static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
2226                                TargetLowering::DAGCombinerInfo &DCI,
2227                                const LoongArchSubtarget &Subtarget) {
2228  MVT GRLenVT = Subtarget.getGRLenVT();
2229  EVT ValTy = N->getValueType(0);
2230  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2231  ConstantSDNode *CN0, *CN1;
2232  SDLoc DL(N);
2233  unsigned ValBits = ValTy.getSizeInBits();
2234  unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
2235  unsigned Shamt;
2236  bool SwapAndRetried = false;
2237
2238  if (DCI.isBeforeLegalizeOps())
2239    return SDValue();
2240
2241  if (ValBits != 32 && ValBits != 64)
2242    return SDValue();
2243
2244Retry:
2245  // 1st pattern to match BSTRINS:
2246  //  R = or (and X, mask0), (and (shl Y, lsb), mask1)
2247  //  where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
2248  //  =>
2249  //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2250  if (N0.getOpcode() == ISD::AND &&
2251      (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2252      isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2253      N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
2254      (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2255      isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2256      MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
2257      (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2258      (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2259      (MaskIdx0 + MaskLen0 <= ValBits)) {
2260    LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
2261    return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2262                       N1.getOperand(0).getOperand(0),
2263                       DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2264                       DAG.getConstant(MaskIdx0, DL, GRLenVT));
2265  }
2266
2267  // 2nd pattern to match BSTRINS:
2268  //  R = or (and X, mask0), (shl (and Y, mask1), lsb)
2269  //  where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2270  //  =>
2271  //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2272  if (N0.getOpcode() == ISD::AND &&
2273      (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2274      isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2275      N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2276      (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2277      (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2278      (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2279      isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2280      MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2281      (MaskIdx0 + MaskLen0 <= ValBits)) {
2282    LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2283    return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2284                       N1.getOperand(0).getOperand(0),
2285                       DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2286                       DAG.getConstant(MaskIdx0, DL, GRLenVT));
2287  }
2288
2289  // 3rd pattern to match BSTRINS:
2290  //  R = or (and X, mask0), (and Y, mask1)
2291  //  where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2292  //  =>
2293  //  R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2294  //  where msb = lsb + size - 1
2295  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2296      (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2297      isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2298      (MaskIdx0 + MaskLen0 <= 64) &&
2299      (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
2300      (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2301    LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2302    return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2303                       DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
2304                                   DAG.getConstant(MaskIdx0, DL, GRLenVT)),
2305                       DAG.getConstant(ValBits == 32
2306                                           ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2307                                           : (MaskIdx0 + MaskLen0 - 1),
2308                                       DL, GRLenVT),
2309                       DAG.getConstant(MaskIdx0, DL, GRLenVT));
2310  }
2311
2312  // 4th pattern to match BSTRINS:
2313  //  R = or (and X, mask), (shl Y, shamt)
2314  //  where mask = (2**shamt - 1)
2315  //  =>
2316  //  R = BSTRINS X, Y, ValBits - 1, shamt
2317  //  where ValBits = 32 or 64
2318  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2319      (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2320      isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
2321      MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2322      (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2323      (MaskIdx0 + MaskLen0 <= ValBits)) {
2324    LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2325    return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2326                       N1.getOperand(0),
2327                       DAG.getConstant((ValBits - 1), DL, GRLenVT),
2328                       DAG.getConstant(Shamt, DL, GRLenVT));
2329  }
2330
2331  // 5th pattern to match BSTRINS:
2332  //  R = or (and X, mask), const
2333  //  where ~mask = (2**size - 1) << lsb, mask & const = 0
2334  //  =>
2335  //  R = BSTRINS X, (const >> lsb), msb, lsb
2336  //  where msb = lsb + size - 1
2337  if (N0.getOpcode() == ISD::AND &&
2338      (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2339      isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2340      (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
2341      (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2342    LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2343    return DAG.getNode(
2344        LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2345        DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
2346        DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2347                                      : (MaskIdx0 + MaskLen0 - 1),
2348                        DL, GRLenVT),
2349        DAG.getConstant(MaskIdx0, DL, GRLenVT));
2350  }
2351
2352  // 6th pattern.
2353  // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2354  // by the incoming bits are known to be zero.
2355  // =>
2356  // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2357  //
2358  // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2359  // pattern is more common than the 1st. So we put the 1st before the 6th in
2360  // order to match as many nodes as possible.
2361  ConstantSDNode *CNMask, *CNShamt;
2362  unsigned MaskIdx, MaskLen;
2363  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2364      (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2365      isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2366      MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2367      CNShamt->getZExtValue() + MaskLen <= ValBits) {
2368    Shamt = CNShamt->getZExtValue();
2369    APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2370    if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2371      LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2372      return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2373                         N1.getOperand(0).getOperand(0),
2374                         DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
2375                         DAG.getConstant(Shamt, DL, GRLenVT));
2376    }
2377  }
2378
2379  // 7th pattern.
2380  // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2381  // overwritten by the incoming bits are known to be zero.
2382  // =>
2383  // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2384  //
2385  // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2386  // before the 7th in order to match as many nodes as possible.
2387  if (N1.getOpcode() == ISD::AND &&
2388      (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2389      isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2390      N1.getOperand(0).getOpcode() == ISD::SHL &&
2391      (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2392      CNShamt->getZExtValue() == MaskIdx) {
2393    APInt ShMask(ValBits, CNMask->getZExtValue());
2394    if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2395      LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2396      return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2397                         N1.getOperand(0).getOperand(0),
2398                         DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2399                         DAG.getConstant(MaskIdx, DL, GRLenVT));
2400    }
2401  }
2402
2403  // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2404  if (!SwapAndRetried) {
2405    std::swap(N0, N1);
2406    SwapAndRetried = true;
2407    goto Retry;
2408  }
2409
2410  SwapAndRetried = false;
2411Retry2:
2412  // 8th pattern.
2413  // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2414  // the incoming bits are known to be zero.
2415  // =>
2416  // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2417  //
2418  // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2419  // we put it here in order to match as many nodes as possible or generate less
2420  // instructions.
2421  if (N1.getOpcode() == ISD::AND &&
2422      (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2423      isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2424    APInt ShMask(ValBits, CNMask->getZExtValue());
2425    if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2426      LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2427      return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2428                         DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
2429                                     N1->getOperand(0),
2430                                     DAG.getConstant(MaskIdx, DL, GRLenVT)),
2431                         DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2432                         DAG.getConstant(MaskIdx, DL, GRLenVT));
2433    }
2434  }
2435  // Swap N0/N1 and retry.
2436  if (!SwapAndRetried) {
2437    std::swap(N0, N1);
2438    SwapAndRetried = true;
2439    goto Retry2;
2440  }
2441
2442  return SDValue();
2443}
2444
2445// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
2446static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
2447                                      TargetLowering::DAGCombinerInfo &DCI,
2448                                      const LoongArchSubtarget &Subtarget) {
2449  if (DCI.isBeforeLegalizeOps())
2450    return SDValue();
2451
2452  SDValue Src = N->getOperand(0);
2453  if (Src.getOpcode() != LoongArchISD::REVB_2W)
2454    return SDValue();
2455
2456  return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
2457                     Src.getOperand(0));
2458}
2459
2460template <unsigned N>
2461static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
2462                                       SelectionDAG &DAG,
2463                                       const LoongArchSubtarget &Subtarget,
2464                                       bool IsSigned = false) {
2465  SDLoc DL(Node);
2466  auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2467  // Check the ImmArg.
2468  if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2469      (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2470    DAG.getContext()->emitError(Node->getOperationName(0) +
2471                                ": argument out of range.");
2472    return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
2473  }
2474  return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
2475}
2476
2477template <unsigned N>
2478static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2479                                   SelectionDAG &DAG, bool IsSigned = false) {
2480  SDLoc DL(Node);
2481  EVT ResTy = Node->getValueType(0);
2482  auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2483
2484  // Check the ImmArg.
2485  if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2486      (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2487    DAG.getContext()->emitError(Node->getOperationName(0) +
2488                                ": argument out of range.");
2489    return DAG.getNode(ISD::UNDEF, DL, ResTy);
2490  }
2491  return DAG.getConstant(
2492      APInt(ResTy.getScalarType().getSizeInBits(),
2493            IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2494      DL, ResTy);
2495}
2496
2497static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
2498  SDLoc DL(Node);
2499  EVT ResTy = Node->getValueType(0);
2500  SDValue Vec = Node->getOperand(2);
2501  SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
2502  return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
2503}
2504
2505static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
2506  SDLoc DL(Node);
2507  EVT ResTy = Node->getValueType(0);
2508  SDValue One = DAG.getConstant(1, DL, ResTy);
2509  SDValue Bit =
2510      DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
2511
2512  return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
2513                     DAG.getNOT(DL, Bit, ResTy));
2514}
2515
2516template <unsigned N>
2517static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
2518  SDLoc DL(Node);
2519  EVT ResTy = Node->getValueType(0);
2520  auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2521  // Check the unsigned ImmArg.
2522  if (!isUInt<N>(CImm->getZExtValue())) {
2523    DAG.getContext()->emitError(Node->getOperationName(0) +
2524                                ": argument out of range.");
2525    return DAG.getNode(ISD::UNDEF, DL, ResTy);
2526  }
2527
2528  APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2529  SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
2530
2531  return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
2532}
2533
2534template <unsigned N>
2535static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
2536  SDLoc DL(Node);
2537  EVT ResTy = Node->getValueType(0);
2538  auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2539  // Check the unsigned ImmArg.
2540  if (!isUInt<N>(CImm->getZExtValue())) {
2541    DAG.getContext()->emitError(Node->getOperationName(0) +
2542                                ": argument out of range.");
2543    return DAG.getNode(ISD::UNDEF, DL, ResTy);
2544  }
2545
2546  APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2547  SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2548  return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
2549}
2550
2551template <unsigned N>
2552static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
2553  SDLoc DL(Node);
2554  EVT ResTy = Node->getValueType(0);
2555  auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2556  // Check the unsigned ImmArg.
2557  if (!isUInt<N>(CImm->getZExtValue())) {
2558    DAG.getContext()->emitError(Node->getOperationName(0) +
2559                                ": argument out of range.");
2560    return DAG.getNode(ISD::UNDEF, DL, ResTy);
2561  }
2562
2563  APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2564  SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2565  return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
2566}
2567
2568static SDValue
2569performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
2570                                 TargetLowering::DAGCombinerInfo &DCI,
2571                                 const LoongArchSubtarget &Subtarget) {
2572  SDLoc DL(N);
2573  switch (N->getConstantOperandVal(0)) {
2574  default:
2575    break;
2576  case Intrinsic::loongarch_lsx_vadd_b:
2577  case Intrinsic::loongarch_lsx_vadd_h:
2578  case Intrinsic::loongarch_lsx_vadd_w:
2579  case Intrinsic::loongarch_lsx_vadd_d:
2580  case Intrinsic::loongarch_lasx_xvadd_b:
2581  case Intrinsic::loongarch_lasx_xvadd_h:
2582  case Intrinsic::loongarch_lasx_xvadd_w:
2583  case Intrinsic::loongarch_lasx_xvadd_d:
2584    return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2585                       N->getOperand(2));
2586  case Intrinsic::loongarch_lsx_vaddi_bu:
2587  case Intrinsic::loongarch_lsx_vaddi_hu:
2588  case Intrinsic::loongarch_lsx_vaddi_wu:
2589  case Intrinsic::loongarch_lsx_vaddi_du:
2590  case Intrinsic::loongarch_lasx_xvaddi_bu:
2591  case Intrinsic::loongarch_lasx_xvaddi_hu:
2592  case Intrinsic::loongarch_lasx_xvaddi_wu:
2593  case Intrinsic::loongarch_lasx_xvaddi_du:
2594    return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2595                       lowerVectorSplatImm<5>(N, 2, DAG));
2596  case Intrinsic::loongarch_lsx_vsub_b:
2597  case Intrinsic::loongarch_lsx_vsub_h:
2598  case Intrinsic::loongarch_lsx_vsub_w:
2599  case Intrinsic::loongarch_lsx_vsub_d:
2600  case Intrinsic::loongarch_lasx_xvsub_b:
2601  case Intrinsic::loongarch_lasx_xvsub_h:
2602  case Intrinsic::loongarch_lasx_xvsub_w:
2603  case Intrinsic::loongarch_lasx_xvsub_d:
2604    return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2605                       N->getOperand(2));
2606  case Intrinsic::loongarch_lsx_vsubi_bu:
2607  case Intrinsic::loongarch_lsx_vsubi_hu:
2608  case Intrinsic::loongarch_lsx_vsubi_wu:
2609  case Intrinsic::loongarch_lsx_vsubi_du:
2610  case Intrinsic::loongarch_lasx_xvsubi_bu:
2611  case Intrinsic::loongarch_lasx_xvsubi_hu:
2612  case Intrinsic::loongarch_lasx_xvsubi_wu:
2613  case Intrinsic::loongarch_lasx_xvsubi_du:
2614    return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2615                       lowerVectorSplatImm<5>(N, 2, DAG));
2616  case Intrinsic::loongarch_lsx_vneg_b:
2617  case Intrinsic::loongarch_lsx_vneg_h:
2618  case Intrinsic::loongarch_lsx_vneg_w:
2619  case Intrinsic::loongarch_lsx_vneg_d:
2620  case Intrinsic::loongarch_lasx_xvneg_b:
2621  case Intrinsic::loongarch_lasx_xvneg_h:
2622  case Intrinsic::loongarch_lasx_xvneg_w:
2623  case Intrinsic::loongarch_lasx_xvneg_d:
2624    return DAG.getNode(
2625        ISD::SUB, DL, N->getValueType(0),
2626        DAG.getConstant(
2627            APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
2628                  /*isSigned=*/true),
2629            SDLoc(N), N->getValueType(0)),
2630        N->getOperand(1));
2631  case Intrinsic::loongarch_lsx_vmax_b:
2632  case Intrinsic::loongarch_lsx_vmax_h:
2633  case Intrinsic::loongarch_lsx_vmax_w:
2634  case Intrinsic::loongarch_lsx_vmax_d:
2635  case Intrinsic::loongarch_lasx_xvmax_b:
2636  case Intrinsic::loongarch_lasx_xvmax_h:
2637  case Intrinsic::loongarch_lasx_xvmax_w:
2638  case Intrinsic::loongarch_lasx_xvmax_d:
2639    return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2640                       N->getOperand(2));
2641  case Intrinsic::loongarch_lsx_vmax_bu:
2642  case Intrinsic::loongarch_lsx_vmax_hu:
2643  case Intrinsic::loongarch_lsx_vmax_wu:
2644  case Intrinsic::loongarch_lsx_vmax_du:
2645  case Intrinsic::loongarch_lasx_xvmax_bu:
2646  case Intrinsic::loongarch_lasx_xvmax_hu:
2647  case Intrinsic::loongarch_lasx_xvmax_wu:
2648  case Intrinsic::loongarch_lasx_xvmax_du:
2649    return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2650                       N->getOperand(2));
2651  case Intrinsic::loongarch_lsx_vmaxi_b:
2652  case Intrinsic::loongarch_lsx_vmaxi_h:
2653  case Intrinsic::loongarch_lsx_vmaxi_w:
2654  case Intrinsic::loongarch_lsx_vmaxi_d:
2655  case Intrinsic::loongarch_lasx_xvmaxi_b:
2656  case Intrinsic::loongarch_lasx_xvmaxi_h:
2657  case Intrinsic::loongarch_lasx_xvmaxi_w:
2658  case Intrinsic::loongarch_lasx_xvmaxi_d:
2659    return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2660                       lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2661  case Intrinsic::loongarch_lsx_vmaxi_bu:
2662  case Intrinsic::loongarch_lsx_vmaxi_hu:
2663  case Intrinsic::loongarch_lsx_vmaxi_wu:
2664  case Intrinsic::loongarch_lsx_vmaxi_du:
2665  case Intrinsic::loongarch_lasx_xvmaxi_bu:
2666  case Intrinsic::loongarch_lasx_xvmaxi_hu:
2667  case Intrinsic::loongarch_lasx_xvmaxi_wu:
2668  case Intrinsic::loongarch_lasx_xvmaxi_du:
2669    return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2670                       lowerVectorSplatImm<5>(N, 2, DAG));
2671  case Intrinsic::loongarch_lsx_vmin_b:
2672  case Intrinsic::loongarch_lsx_vmin_h:
2673  case Intrinsic::loongarch_lsx_vmin_w:
2674  case Intrinsic::loongarch_lsx_vmin_d:
2675  case Intrinsic::loongarch_lasx_xvmin_b:
2676  case Intrinsic::loongarch_lasx_xvmin_h:
2677  case Intrinsic::loongarch_lasx_xvmin_w:
2678  case Intrinsic::loongarch_lasx_xvmin_d:
2679    return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2680                       N->getOperand(2));
2681  case Intrinsic::loongarch_lsx_vmin_bu:
2682  case Intrinsic::loongarch_lsx_vmin_hu:
2683  case Intrinsic::loongarch_lsx_vmin_wu:
2684  case Intrinsic::loongarch_lsx_vmin_du:
2685  case Intrinsic::loongarch_lasx_xvmin_bu:
2686  case Intrinsic::loongarch_lasx_xvmin_hu:
2687  case Intrinsic::loongarch_lasx_xvmin_wu:
2688  case Intrinsic::loongarch_lasx_xvmin_du:
2689    return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2690                       N->getOperand(2));
2691  case Intrinsic::loongarch_lsx_vmini_b:
2692  case Intrinsic::loongarch_lsx_vmini_h:
2693  case Intrinsic::loongarch_lsx_vmini_w:
2694  case Intrinsic::loongarch_lsx_vmini_d:
2695  case Intrinsic::loongarch_lasx_xvmini_b:
2696  case Intrinsic::loongarch_lasx_xvmini_h:
2697  case Intrinsic::loongarch_lasx_xvmini_w:
2698  case Intrinsic::loongarch_lasx_xvmini_d:
2699    return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2700                       lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2701  case Intrinsic::loongarch_lsx_vmini_bu:
2702  case Intrinsic::loongarch_lsx_vmini_hu:
2703  case Intrinsic::loongarch_lsx_vmini_wu:
2704  case Intrinsic::loongarch_lsx_vmini_du:
2705  case Intrinsic::loongarch_lasx_xvmini_bu:
2706  case Intrinsic::loongarch_lasx_xvmini_hu:
2707  case Intrinsic::loongarch_lasx_xvmini_wu:
2708  case Intrinsic::loongarch_lasx_xvmini_du:
2709    return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2710                       lowerVectorSplatImm<5>(N, 2, DAG));
2711  case Intrinsic::loongarch_lsx_vmul_b:
2712  case Intrinsic::loongarch_lsx_vmul_h:
2713  case Intrinsic::loongarch_lsx_vmul_w:
2714  case Intrinsic::loongarch_lsx_vmul_d:
2715  case Intrinsic::loongarch_lasx_xvmul_b:
2716  case Intrinsic::loongarch_lasx_xvmul_h:
2717  case Intrinsic::loongarch_lasx_xvmul_w:
2718  case Intrinsic::loongarch_lasx_xvmul_d:
2719    return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
2720                       N->getOperand(2));
2721  case Intrinsic::loongarch_lsx_vmadd_b:
2722  case Intrinsic::loongarch_lsx_vmadd_h:
2723  case Intrinsic::loongarch_lsx_vmadd_w:
2724  case Intrinsic::loongarch_lsx_vmadd_d:
2725  case Intrinsic::loongarch_lasx_xvmadd_b:
2726  case Intrinsic::loongarch_lasx_xvmadd_h:
2727  case Intrinsic::loongarch_lasx_xvmadd_w:
2728  case Intrinsic::loongarch_lasx_xvmadd_d: {
2729    EVT ResTy = N->getValueType(0);
2730    return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
2731                       DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2732                                   N->getOperand(3)));
2733  }
2734  case Intrinsic::loongarch_lsx_vmsub_b:
2735  case Intrinsic::loongarch_lsx_vmsub_h:
2736  case Intrinsic::loongarch_lsx_vmsub_w:
2737  case Intrinsic::loongarch_lsx_vmsub_d:
2738  case Intrinsic::loongarch_lasx_xvmsub_b:
2739  case Intrinsic::loongarch_lasx_xvmsub_h:
2740  case Intrinsic::loongarch_lasx_xvmsub_w:
2741  case Intrinsic::loongarch_lasx_xvmsub_d: {
2742    EVT ResTy = N->getValueType(0);
2743    return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
2744                       DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2745                                   N->getOperand(3)));
2746  }
2747  case Intrinsic::loongarch_lsx_vdiv_b:
2748  case Intrinsic::loongarch_lsx_vdiv_h:
2749  case Intrinsic::loongarch_lsx_vdiv_w:
2750  case Intrinsic::loongarch_lsx_vdiv_d:
2751  case Intrinsic::loongarch_lasx_xvdiv_b:
2752  case Intrinsic::loongarch_lasx_xvdiv_h:
2753  case Intrinsic::loongarch_lasx_xvdiv_w:
2754  case Intrinsic::loongarch_lasx_xvdiv_d:
2755    return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
2756                       N->getOperand(2));
2757  case Intrinsic::loongarch_lsx_vdiv_bu:
2758  case Intrinsic::loongarch_lsx_vdiv_hu:
2759  case Intrinsic::loongarch_lsx_vdiv_wu:
2760  case Intrinsic::loongarch_lsx_vdiv_du:
2761  case Intrinsic::loongarch_lasx_xvdiv_bu:
2762  case Intrinsic::loongarch_lasx_xvdiv_hu:
2763  case Intrinsic::loongarch_lasx_xvdiv_wu:
2764  case Intrinsic::loongarch_lasx_xvdiv_du:
2765    return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
2766                       N->getOperand(2));
2767  case Intrinsic::loongarch_lsx_vmod_b:
2768  case Intrinsic::loongarch_lsx_vmod_h:
2769  case Intrinsic::loongarch_lsx_vmod_w:
2770  case Intrinsic::loongarch_lsx_vmod_d:
2771  case Intrinsic::loongarch_lasx_xvmod_b:
2772  case Intrinsic::loongarch_lasx_xvmod_h:
2773  case Intrinsic::loongarch_lasx_xvmod_w:
2774  case Intrinsic::loongarch_lasx_xvmod_d:
2775    return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
2776                       N->getOperand(2));
2777  case Intrinsic::loongarch_lsx_vmod_bu:
2778  case Intrinsic::loongarch_lsx_vmod_hu:
2779  case Intrinsic::loongarch_lsx_vmod_wu:
2780  case Intrinsic::loongarch_lsx_vmod_du:
2781  case Intrinsic::loongarch_lasx_xvmod_bu:
2782  case Intrinsic::loongarch_lasx_xvmod_hu:
2783  case Intrinsic::loongarch_lasx_xvmod_wu:
2784  case Intrinsic::loongarch_lasx_xvmod_du:
2785    return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
2786                       N->getOperand(2));
2787  case Intrinsic::loongarch_lsx_vand_v:
2788  case Intrinsic::loongarch_lasx_xvand_v:
2789    return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2790                       N->getOperand(2));
2791  case Intrinsic::loongarch_lsx_vor_v:
2792  case Intrinsic::loongarch_lasx_xvor_v:
2793    return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2794                       N->getOperand(2));
2795  case Intrinsic::loongarch_lsx_vxor_v:
2796  case Intrinsic::loongarch_lasx_xvxor_v:
2797    return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2798                       N->getOperand(2));
2799  case Intrinsic::loongarch_lsx_vnor_v:
2800  case Intrinsic::loongarch_lasx_xvnor_v: {
2801    SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2802                              N->getOperand(2));
2803    return DAG.getNOT(DL, Res, Res->getValueType(0));
2804  }
2805  case Intrinsic::loongarch_lsx_vandi_b:
2806  case Intrinsic::loongarch_lasx_xvandi_b:
2807    return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2808                       lowerVectorSplatImm<8>(N, 2, DAG));
2809  case Intrinsic::loongarch_lsx_vori_b:
2810  case Intrinsic::loongarch_lasx_xvori_b:
2811    return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2812                       lowerVectorSplatImm<8>(N, 2, DAG));
2813  case Intrinsic::loongarch_lsx_vxori_b:
2814  case Intrinsic::loongarch_lasx_xvxori_b:
2815    return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2816                       lowerVectorSplatImm<8>(N, 2, DAG));
2817  case Intrinsic::loongarch_lsx_vsll_b:
2818  case Intrinsic::loongarch_lsx_vsll_h:
2819  case Intrinsic::loongarch_lsx_vsll_w:
2820  case Intrinsic::loongarch_lsx_vsll_d:
2821  case Intrinsic::loongarch_lasx_xvsll_b:
2822  case Intrinsic::loongarch_lasx_xvsll_h:
2823  case Intrinsic::loongarch_lasx_xvsll_w:
2824  case Intrinsic::loongarch_lasx_xvsll_d:
2825    return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2826                       truncateVecElts(N, DAG));
2827  case Intrinsic::loongarch_lsx_vslli_b:
2828  case Intrinsic::loongarch_lasx_xvslli_b:
2829    return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2830                       lowerVectorSplatImm<3>(N, 2, DAG));
2831  case Intrinsic::loongarch_lsx_vslli_h:
2832  case Intrinsic::loongarch_lasx_xvslli_h:
2833    return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2834                       lowerVectorSplatImm<4>(N, 2, DAG));
2835  case Intrinsic::loongarch_lsx_vslli_w:
2836  case Intrinsic::loongarch_lasx_xvslli_w:
2837    return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2838                       lowerVectorSplatImm<5>(N, 2, DAG));
2839  case Intrinsic::loongarch_lsx_vslli_d:
2840  case Intrinsic::loongarch_lasx_xvslli_d:
2841    return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2842                       lowerVectorSplatImm<6>(N, 2, DAG));
2843  case Intrinsic::loongarch_lsx_vsrl_b:
2844  case Intrinsic::loongarch_lsx_vsrl_h:
2845  case Intrinsic::loongarch_lsx_vsrl_w:
2846  case Intrinsic::loongarch_lsx_vsrl_d:
2847  case Intrinsic::loongarch_lasx_xvsrl_b:
2848  case Intrinsic::loongarch_lasx_xvsrl_h:
2849  case Intrinsic::loongarch_lasx_xvsrl_w:
2850  case Intrinsic::loongarch_lasx_xvsrl_d:
2851    return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2852                       truncateVecElts(N, DAG));
2853  case Intrinsic::loongarch_lsx_vsrli_b:
2854  case Intrinsic::loongarch_lasx_xvsrli_b:
2855    return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2856                       lowerVectorSplatImm<3>(N, 2, DAG));
2857  case Intrinsic::loongarch_lsx_vsrli_h:
2858  case Intrinsic::loongarch_lasx_xvsrli_h:
2859    return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2860                       lowerVectorSplatImm<4>(N, 2, DAG));
2861  case Intrinsic::loongarch_lsx_vsrli_w:
2862  case Intrinsic::loongarch_lasx_xvsrli_w:
2863    return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2864                       lowerVectorSplatImm<5>(N, 2, DAG));
2865  case Intrinsic::loongarch_lsx_vsrli_d:
2866  case Intrinsic::loongarch_lasx_xvsrli_d:
2867    return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2868                       lowerVectorSplatImm<6>(N, 2, DAG));
2869  case Intrinsic::loongarch_lsx_vsra_b:
2870  case Intrinsic::loongarch_lsx_vsra_h:
2871  case Intrinsic::loongarch_lsx_vsra_w:
2872  case Intrinsic::loongarch_lsx_vsra_d:
2873  case Intrinsic::loongarch_lasx_xvsra_b:
2874  case Intrinsic::loongarch_lasx_xvsra_h:
2875  case Intrinsic::loongarch_lasx_xvsra_w:
2876  case Intrinsic::loongarch_lasx_xvsra_d:
2877    return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2878                       truncateVecElts(N, DAG));
2879  case Intrinsic::loongarch_lsx_vsrai_b:
2880  case Intrinsic::loongarch_lasx_xvsrai_b:
2881    return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2882                       lowerVectorSplatImm<3>(N, 2, DAG));
2883  case Intrinsic::loongarch_lsx_vsrai_h:
2884  case Intrinsic::loongarch_lasx_xvsrai_h:
2885    return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2886                       lowerVectorSplatImm<4>(N, 2, DAG));
2887  case Intrinsic::loongarch_lsx_vsrai_w:
2888  case Intrinsic::loongarch_lasx_xvsrai_w:
2889    return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2890                       lowerVectorSplatImm<5>(N, 2, DAG));
2891  case Intrinsic::loongarch_lsx_vsrai_d:
2892  case Intrinsic::loongarch_lasx_xvsrai_d:
2893    return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2894                       lowerVectorSplatImm<6>(N, 2, DAG));
2895  case Intrinsic::loongarch_lsx_vclz_b:
2896  case Intrinsic::loongarch_lsx_vclz_h:
2897  case Intrinsic::loongarch_lsx_vclz_w:
2898  case Intrinsic::loongarch_lsx_vclz_d:
2899  case Intrinsic::loongarch_lasx_xvclz_b:
2900  case Intrinsic::loongarch_lasx_xvclz_h:
2901  case Intrinsic::loongarch_lasx_xvclz_w:
2902  case Intrinsic::loongarch_lasx_xvclz_d:
2903    return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
2904  case Intrinsic::loongarch_lsx_vpcnt_b:
2905  case Intrinsic::loongarch_lsx_vpcnt_h:
2906  case Intrinsic::loongarch_lsx_vpcnt_w:
2907  case Intrinsic::loongarch_lsx_vpcnt_d:
2908  case Intrinsic::loongarch_lasx_xvpcnt_b:
2909  case Intrinsic::loongarch_lasx_xvpcnt_h:
2910  case Intrinsic::loongarch_lasx_xvpcnt_w:
2911  case Intrinsic::loongarch_lasx_xvpcnt_d:
2912    return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
2913  case Intrinsic::loongarch_lsx_vbitclr_b:
2914  case Intrinsic::loongarch_lsx_vbitclr_h:
2915  case Intrinsic::loongarch_lsx_vbitclr_w:
2916  case Intrinsic::loongarch_lsx_vbitclr_d:
2917  case Intrinsic::loongarch_lasx_xvbitclr_b:
2918  case Intrinsic::loongarch_lasx_xvbitclr_h:
2919  case Intrinsic::loongarch_lasx_xvbitclr_w:
2920  case Intrinsic::loongarch_lasx_xvbitclr_d:
2921    return lowerVectorBitClear(N, DAG);
2922  case Intrinsic::loongarch_lsx_vbitclri_b:
2923  case Intrinsic::loongarch_lasx_xvbitclri_b:
2924    return lowerVectorBitClearImm<3>(N, DAG);
2925  case Intrinsic::loongarch_lsx_vbitclri_h:
2926  case Intrinsic::loongarch_lasx_xvbitclri_h:
2927    return lowerVectorBitClearImm<4>(N, DAG);
2928  case Intrinsic::loongarch_lsx_vbitclri_w:
2929  case Intrinsic::loongarch_lasx_xvbitclri_w:
2930    return lowerVectorBitClearImm<5>(N, DAG);
2931  case Intrinsic::loongarch_lsx_vbitclri_d:
2932  case Intrinsic::loongarch_lasx_xvbitclri_d:
2933    return lowerVectorBitClearImm<6>(N, DAG);
2934  case Intrinsic::loongarch_lsx_vbitset_b:
2935  case Intrinsic::loongarch_lsx_vbitset_h:
2936  case Intrinsic::loongarch_lsx_vbitset_w:
2937  case Intrinsic::loongarch_lsx_vbitset_d:
2938  case Intrinsic::loongarch_lasx_xvbitset_b:
2939  case Intrinsic::loongarch_lasx_xvbitset_h:
2940  case Intrinsic::loongarch_lasx_xvbitset_w:
2941  case Intrinsic::loongarch_lasx_xvbitset_d: {
2942    EVT VecTy = N->getValueType(0);
2943    SDValue One = DAG.getConstant(1, DL, VecTy);
2944    return DAG.getNode(
2945        ISD::OR, DL, VecTy, N->getOperand(1),
2946        DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2947  }
2948  case Intrinsic::loongarch_lsx_vbitseti_b:
2949  case Intrinsic::loongarch_lasx_xvbitseti_b:
2950    return lowerVectorBitSetImm<3>(N, DAG);
2951  case Intrinsic::loongarch_lsx_vbitseti_h:
2952  case Intrinsic::loongarch_lasx_xvbitseti_h:
2953    return lowerVectorBitSetImm<4>(N, DAG);
2954  case Intrinsic::loongarch_lsx_vbitseti_w:
2955  case Intrinsic::loongarch_lasx_xvbitseti_w:
2956    return lowerVectorBitSetImm<5>(N, DAG);
2957  case Intrinsic::loongarch_lsx_vbitseti_d:
2958  case Intrinsic::loongarch_lasx_xvbitseti_d:
2959    return lowerVectorBitSetImm<6>(N, DAG);
2960  case Intrinsic::loongarch_lsx_vbitrev_b:
2961  case Intrinsic::loongarch_lsx_vbitrev_h:
2962  case Intrinsic::loongarch_lsx_vbitrev_w:
2963  case Intrinsic::loongarch_lsx_vbitrev_d:
2964  case Intrinsic::loongarch_lasx_xvbitrev_b:
2965  case Intrinsic::loongarch_lasx_xvbitrev_h:
2966  case Intrinsic::loongarch_lasx_xvbitrev_w:
2967  case Intrinsic::loongarch_lasx_xvbitrev_d: {
2968    EVT VecTy = N->getValueType(0);
2969    SDValue One = DAG.getConstant(1, DL, VecTy);
2970    return DAG.getNode(
2971        ISD::XOR, DL, VecTy, N->getOperand(1),
2972        DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2973  }
2974  case Intrinsic::loongarch_lsx_vbitrevi_b:
2975  case Intrinsic::loongarch_lasx_xvbitrevi_b:
2976    return lowerVectorBitRevImm<3>(N, DAG);
2977  case Intrinsic::loongarch_lsx_vbitrevi_h:
2978  case Intrinsic::loongarch_lasx_xvbitrevi_h:
2979    return lowerVectorBitRevImm<4>(N, DAG);
2980  case Intrinsic::loongarch_lsx_vbitrevi_w:
2981  case Intrinsic::loongarch_lasx_xvbitrevi_w:
2982    return lowerVectorBitRevImm<5>(N, DAG);
2983  case Intrinsic::loongarch_lsx_vbitrevi_d:
2984  case Intrinsic::loongarch_lasx_xvbitrevi_d:
2985    return lowerVectorBitRevImm<6>(N, DAG);
2986  case Intrinsic::loongarch_lsx_vfadd_s:
2987  case Intrinsic::loongarch_lsx_vfadd_d:
2988  case Intrinsic::loongarch_lasx_xvfadd_s:
2989  case Intrinsic::loongarch_lasx_xvfadd_d:
2990    return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
2991                       N->getOperand(2));
2992  case Intrinsic::loongarch_lsx_vfsub_s:
2993  case Intrinsic::loongarch_lsx_vfsub_d:
2994  case Intrinsic::loongarch_lasx_xvfsub_s:
2995  case Intrinsic::loongarch_lasx_xvfsub_d:
2996    return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
2997                       N->getOperand(2));
2998  case Intrinsic::loongarch_lsx_vfmul_s:
2999  case Intrinsic::loongarch_lsx_vfmul_d:
3000  case Intrinsic::loongarch_lasx_xvfmul_s:
3001  case Intrinsic::loongarch_lasx_xvfmul_d:
3002    return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
3003                       N->getOperand(2));
3004  case Intrinsic::loongarch_lsx_vfdiv_s:
3005  case Intrinsic::loongarch_lsx_vfdiv_d:
3006  case Intrinsic::loongarch_lasx_xvfdiv_s:
3007  case Intrinsic::loongarch_lasx_xvfdiv_d:
3008    return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
3009                       N->getOperand(2));
3010  case Intrinsic::loongarch_lsx_vfmadd_s:
3011  case Intrinsic::loongarch_lsx_vfmadd_d:
3012  case Intrinsic::loongarch_lasx_xvfmadd_s:
3013  case Intrinsic::loongarch_lasx_xvfmadd_d:
3014    return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
3015                       N->getOperand(2), N->getOperand(3));
3016  case Intrinsic::loongarch_lsx_vinsgr2vr_b:
3017    return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3018                       N->getOperand(1), N->getOperand(2),
3019                       legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
3020  case Intrinsic::loongarch_lsx_vinsgr2vr_h:
3021  case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
3022    return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3023                       N->getOperand(1), N->getOperand(2),
3024                       legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
3025  case Intrinsic::loongarch_lsx_vinsgr2vr_w:
3026  case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
3027    return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3028                       N->getOperand(1), N->getOperand(2),
3029                       legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
3030  case Intrinsic::loongarch_lsx_vinsgr2vr_d:
3031    return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3032                       N->getOperand(1), N->getOperand(2),
3033                       legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
3034  case Intrinsic::loongarch_lsx_vreplgr2vr_b:
3035  case Intrinsic::loongarch_lsx_vreplgr2vr_h:
3036  case Intrinsic::loongarch_lsx_vreplgr2vr_w:
3037  case Intrinsic::loongarch_lsx_vreplgr2vr_d:
3038  case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
3039  case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
3040  case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
3041  case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
3042    EVT ResTy = N->getValueType(0);
3043    SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
3044    return DAG.getBuildVector(ResTy, DL, Ops);
3045  }
3046  case Intrinsic::loongarch_lsx_vreplve_b:
3047  case Intrinsic::loongarch_lsx_vreplve_h:
3048  case Intrinsic::loongarch_lsx_vreplve_w:
3049  case Intrinsic::loongarch_lsx_vreplve_d:
3050  case Intrinsic::loongarch_lasx_xvreplve_b:
3051  case Intrinsic::loongarch_lasx_xvreplve_h:
3052  case Intrinsic::loongarch_lasx_xvreplve_w:
3053  case Intrinsic::loongarch_lasx_xvreplve_d:
3054    return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
3055                       N->getOperand(1),
3056                       DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
3057                                   N->getOperand(2)));
3058  }
3059  return SDValue();
3060}
3061
3062SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
3063                                                   DAGCombinerInfo &DCI) const {
3064  SelectionDAG &DAG = DCI.DAG;
3065  switch (N->getOpcode()) {
3066  default:
3067    break;
3068  case ISD::AND:
3069    return performANDCombine(N, DAG, DCI, Subtarget);
3070  case ISD::OR:
3071    return performORCombine(N, DAG, DCI, Subtarget);
3072  case ISD::SRL:
3073    return performSRLCombine(N, DAG, DCI, Subtarget);
3074  case LoongArchISD::BITREV_W:
3075    return performBITREV_WCombine(N, DAG, DCI, Subtarget);
3076  case ISD::INTRINSIC_WO_CHAIN:
3077    return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
3078  }
3079  return SDValue();
3080}
3081
3082static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
3083                                              MachineBasicBlock *MBB) {
3084  if (!ZeroDivCheck)
3085    return MBB;
3086
3087  // Build instructions:
3088  // MBB:
3089  //   div(or mod)   $dst, $dividend, $divisor
3090  //   bnez          $divisor, SinkMBB
3091  // BreakMBB:
3092  //   break         7 // BRK_DIVZERO
3093  // SinkMBB:
3094  //   fallthrough
3095  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3096  MachineFunction::iterator It = ++MBB->getIterator();
3097  MachineFunction *MF = MBB->getParent();
3098  auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3099  auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3100  MF->insert(It, BreakMBB);
3101  MF->insert(It, SinkMBB);
3102
3103  // Transfer the remainder of MBB and its successor edges to SinkMBB.
3104  SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
3105  SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
3106
3107  const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
3108  DebugLoc DL = MI.getDebugLoc();
3109  MachineOperand &Divisor = MI.getOperand(2);
3110  Register DivisorReg = Divisor.getReg();
3111
3112  // MBB:
3113  BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
3114      .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
3115      .addMBB(SinkMBB);
3116  MBB->addSuccessor(BreakMBB);
3117  MBB->addSuccessor(SinkMBB);
3118
3119  // BreakMBB:
3120  // See linux header file arch/loongarch/include/uapi/asm/break.h for the
3121  // definition of BRK_DIVZERO.
3122  BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
3123  BreakMBB->addSuccessor(SinkMBB);
3124
3125  // Clear Divisor's kill flag.
3126  Divisor.setIsKill(false);
3127
3128  return SinkMBB;
3129}
3130
3131static MachineBasicBlock *
3132emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
3133                        const LoongArchSubtarget &Subtarget) {
3134  unsigned CondOpc;
3135  switch (MI.getOpcode()) {
3136  default:
3137    llvm_unreachable("Unexpected opcode");
3138  case LoongArch::PseudoVBZ:
3139    CondOpc = LoongArch::VSETEQZ_V;
3140    break;
3141  case LoongArch::PseudoVBZ_B:
3142    CondOpc = LoongArch::VSETANYEQZ_B;
3143    break;
3144  case LoongArch::PseudoVBZ_H:
3145    CondOpc = LoongArch::VSETANYEQZ_H;
3146    break;
3147  case LoongArch::PseudoVBZ_W:
3148    CondOpc = LoongArch::VSETANYEQZ_W;
3149    break;
3150  case LoongArch::PseudoVBZ_D:
3151    CondOpc = LoongArch::VSETANYEQZ_D;
3152    break;
3153  case LoongArch::PseudoVBNZ:
3154    CondOpc = LoongArch::VSETNEZ_V;
3155    break;
3156  case LoongArch::PseudoVBNZ_B:
3157    CondOpc = LoongArch::VSETALLNEZ_B;
3158    break;
3159  case LoongArch::PseudoVBNZ_H:
3160    CondOpc = LoongArch::VSETALLNEZ_H;
3161    break;
3162  case LoongArch::PseudoVBNZ_W:
3163    CondOpc = LoongArch::VSETALLNEZ_W;
3164    break;
3165  case LoongArch::PseudoVBNZ_D:
3166    CondOpc = LoongArch::VSETALLNEZ_D;
3167    break;
3168  case LoongArch::PseudoXVBZ:
3169    CondOpc = LoongArch::XVSETEQZ_V;
3170    break;
3171  case LoongArch::PseudoXVBZ_B:
3172    CondOpc = LoongArch::XVSETANYEQZ_B;
3173    break;
3174  case LoongArch::PseudoXVBZ_H:
3175    CondOpc = LoongArch::XVSETANYEQZ_H;
3176    break;
3177  case LoongArch::PseudoXVBZ_W:
3178    CondOpc = LoongArch::XVSETANYEQZ_W;
3179    break;
3180  case LoongArch::PseudoXVBZ_D:
3181    CondOpc = LoongArch::XVSETANYEQZ_D;
3182    break;
3183  case LoongArch::PseudoXVBNZ:
3184    CondOpc = LoongArch::XVSETNEZ_V;
3185    break;
3186  case LoongArch::PseudoXVBNZ_B:
3187    CondOpc = LoongArch::XVSETALLNEZ_B;
3188    break;
3189  case LoongArch::PseudoXVBNZ_H:
3190    CondOpc = LoongArch::XVSETALLNEZ_H;
3191    break;
3192  case LoongArch::PseudoXVBNZ_W:
3193    CondOpc = LoongArch::XVSETALLNEZ_W;
3194    break;
3195  case LoongArch::PseudoXVBNZ_D:
3196    CondOpc = LoongArch::XVSETALLNEZ_D;
3197    break;
3198  }
3199
3200  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3201  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3202  DebugLoc DL = MI.getDebugLoc();
3203  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3204  MachineFunction::iterator It = ++BB->getIterator();
3205
3206  MachineFunction *F = BB->getParent();
3207  MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
3208  MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
3209  MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
3210
3211  F->insert(It, FalseBB);
3212  F->insert(It, TrueBB);
3213  F->insert(It, SinkBB);
3214
3215  // Transfer the remainder of MBB and its successor edges to Sink.
3216  SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
3217  SinkBB->transferSuccessorsAndUpdatePHIs(BB);
3218
3219  // Insert the real instruction to BB.
3220  Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
3221  BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
3222
3223  // Insert branch.
3224  BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
3225  BB->addSuccessor(FalseBB);
3226  BB->addSuccessor(TrueBB);
3227
3228  // FalseBB.
3229  Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3230  BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
3231      .addReg(LoongArch::R0)
3232      .addImm(0);
3233  BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
3234  FalseBB->addSuccessor(SinkBB);
3235
3236  // TrueBB.
3237  Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3238  BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
3239      .addReg(LoongArch::R0)
3240      .addImm(1);
3241  TrueBB->addSuccessor(SinkBB);
3242
3243  // SinkBB: merge the results.
3244  BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
3245          MI.getOperand(0).getReg())
3246      .addReg(RD1)
3247      .addMBB(FalseBB)
3248      .addReg(RD2)
3249      .addMBB(TrueBB);
3250
3251  // The pseudo instruction is gone now.
3252  MI.eraseFromParent();
3253  return SinkBB;
3254}
3255
3256static MachineBasicBlock *
3257emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
3258                     const LoongArchSubtarget &Subtarget) {
3259  unsigned InsOp;
3260  unsigned HalfSize;
3261  switch (MI.getOpcode()) {
3262  default:
3263    llvm_unreachable("Unexpected opcode");
3264  case LoongArch::PseudoXVINSGR2VR_B:
3265    HalfSize = 16;
3266    InsOp = LoongArch::VINSGR2VR_B;
3267    break;
3268  case LoongArch::PseudoXVINSGR2VR_H:
3269    HalfSize = 8;
3270    InsOp = LoongArch::VINSGR2VR_H;
3271    break;
3272  }
3273  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3274  const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3275  const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3276  DebugLoc DL = MI.getDebugLoc();
3277  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3278  // XDst = vector_insert XSrc, Elt, Idx
3279  Register XDst = MI.getOperand(0).getReg();
3280  Register XSrc = MI.getOperand(1).getReg();
3281  Register Elt = MI.getOperand(2).getReg();
3282  unsigned Idx = MI.getOperand(3).getImm();
3283
3284  Register ScratchReg1 = XSrc;
3285  if (Idx >= HalfSize) {
3286    ScratchReg1 = MRI.createVirtualRegister(RC);
3287    BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3288        .addReg(XSrc)
3289        .addReg(XSrc)
3290        .addImm(1);
3291  }
3292
3293  Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
3294  Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
3295  BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3296      .addReg(ScratchReg1, 0, LoongArch::sub_128);
3297  BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
3298      .addReg(ScratchSubReg1)
3299      .addReg(Elt)
3300      .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
3301
3302  Register ScratchReg2 = XDst;
3303  if (Idx >= HalfSize)
3304    ScratchReg2 = MRI.createVirtualRegister(RC);
3305
3306  BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3307      .addImm(0)
3308      .addReg(ScratchSubReg2)
3309      .addImm(LoongArch::sub_128);
3310
3311  if (Idx >= HalfSize)
3312    BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3313        .addReg(XSrc)
3314        .addReg(ScratchReg2)
3315        .addImm(2);
3316
3317  MI.eraseFromParent();
3318  return BB;
3319}
3320
3321MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
3322    MachineInstr &MI, MachineBasicBlock *BB) const {
3323  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3324  DebugLoc DL = MI.getDebugLoc();
3325
3326  switch (MI.getOpcode()) {
3327  default:
3328    llvm_unreachable("Unexpected instr type to insert");
3329  case LoongArch::DIV_W:
3330  case LoongArch::DIV_WU:
3331  case LoongArch::MOD_W:
3332  case LoongArch::MOD_WU:
3333  case LoongArch::DIV_D:
3334  case LoongArch::DIV_DU:
3335  case LoongArch::MOD_D:
3336  case LoongArch::MOD_DU:
3337    return insertDivByZeroTrap(MI, BB);
3338    break;
3339  case LoongArch::WRFCSR: {
3340    BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3341            LoongArch::FCSR0 + MI.getOperand(0).getImm())
3342        .addReg(MI.getOperand(1).getReg());
3343    MI.eraseFromParent();
3344    return BB;
3345  }
3346  case LoongArch::RDFCSR: {
3347    MachineInstr *ReadFCSR =
3348        BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3349                MI.getOperand(0).getReg())
3350            .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3351    ReadFCSR->getOperand(1).setIsUndef();
3352    MI.eraseFromParent();
3353    return BB;
3354  }
3355  case LoongArch::PseudoVBZ:
3356  case LoongArch::PseudoVBZ_B:
3357  case LoongArch::PseudoVBZ_H:
3358  case LoongArch::PseudoVBZ_W:
3359  case LoongArch::PseudoVBZ_D:
3360  case LoongArch::PseudoVBNZ:
3361  case LoongArch::PseudoVBNZ_B:
3362  case LoongArch::PseudoVBNZ_H:
3363  case LoongArch::PseudoVBNZ_W:
3364  case LoongArch::PseudoVBNZ_D:
3365  case LoongArch::PseudoXVBZ:
3366  case LoongArch::PseudoXVBZ_B:
3367  case LoongArch::PseudoXVBZ_H:
3368  case LoongArch::PseudoXVBZ_W:
3369  case LoongArch::PseudoXVBZ_D:
3370  case LoongArch::PseudoXVBNZ:
3371  case LoongArch::PseudoXVBNZ_B:
3372  case LoongArch::PseudoXVBNZ_H:
3373  case LoongArch::PseudoXVBNZ_W:
3374  case LoongArch::PseudoXVBNZ_D:
3375    return emitVecCondBranchPseudo(MI, BB, Subtarget);
3376  case LoongArch::PseudoXVINSGR2VR_B:
3377  case LoongArch::PseudoXVINSGR2VR_H:
3378    return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
3379  }
3380}
3381
3382bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
3383    EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3384    unsigned *Fast) const {
3385  if (!Subtarget.hasUAL())
3386    return false;
3387
3388  // TODO: set reasonable speed number.
3389  if (Fast)
3390    *Fast = 1;
3391  return true;
3392}
3393
3394const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
3395  switch ((LoongArchISD::NodeType)Opcode) {
3396  case LoongArchISD::FIRST_NUMBER:
3397    break;
3398
3399#define NODE_NAME_CASE(node)                                                   \
3400  case LoongArchISD::node:                                                     \
3401    return "LoongArchISD::" #node;
3402
3403    // TODO: Add more target-dependent nodes later.
3404    NODE_NAME_CASE(CALL)
3405    NODE_NAME_CASE(CALL_MEDIUM)
3406    NODE_NAME_CASE(CALL_LARGE)
3407    NODE_NAME_CASE(RET)
3408    NODE_NAME_CASE(TAIL)
3409    NODE_NAME_CASE(TAIL_MEDIUM)
3410    NODE_NAME_CASE(TAIL_LARGE)
3411    NODE_NAME_CASE(SLL_W)
3412    NODE_NAME_CASE(SRA_W)
3413    NODE_NAME_CASE(SRL_W)
3414    NODE_NAME_CASE(BSTRINS)
3415    NODE_NAME_CASE(BSTRPICK)
3416    NODE_NAME_CASE(MOVGR2FR_W_LA64)
3417    NODE_NAME_CASE(MOVFR2GR_S_LA64)
3418    NODE_NAME_CASE(FTINT)
3419    NODE_NAME_CASE(REVB_2H)
3420    NODE_NAME_CASE(REVB_2W)
3421    NODE_NAME_CASE(BITREV_4B)
3422    NODE_NAME_CASE(BITREV_W)
3423    NODE_NAME_CASE(ROTR_W)
3424    NODE_NAME_CASE(ROTL_W)
3425    NODE_NAME_CASE(CLZ_W)
3426    NODE_NAME_CASE(CTZ_W)
3427    NODE_NAME_CASE(DBAR)
3428    NODE_NAME_CASE(IBAR)
3429    NODE_NAME_CASE(BREAK)
3430    NODE_NAME_CASE(SYSCALL)
3431    NODE_NAME_CASE(CRC_W_B_W)
3432    NODE_NAME_CASE(CRC_W_H_W)
3433    NODE_NAME_CASE(CRC_W_W_W)
3434    NODE_NAME_CASE(CRC_W_D_W)
3435    NODE_NAME_CASE(CRCC_W_B_W)
3436    NODE_NAME_CASE(CRCC_W_H_W)
3437    NODE_NAME_CASE(CRCC_W_W_W)
3438    NODE_NAME_CASE(CRCC_W_D_W)
3439    NODE_NAME_CASE(CSRRD)
3440    NODE_NAME_CASE(CSRWR)
3441    NODE_NAME_CASE(CSRXCHG)
3442    NODE_NAME_CASE(IOCSRRD_B)
3443    NODE_NAME_CASE(IOCSRRD_H)
3444    NODE_NAME_CASE(IOCSRRD_W)
3445    NODE_NAME_CASE(IOCSRRD_D)
3446    NODE_NAME_CASE(IOCSRWR_B)
3447    NODE_NAME_CASE(IOCSRWR_H)
3448    NODE_NAME_CASE(IOCSRWR_W)
3449    NODE_NAME_CASE(IOCSRWR_D)
3450    NODE_NAME_CASE(CPUCFG)
3451    NODE_NAME_CASE(MOVGR2FCSR)
3452    NODE_NAME_CASE(MOVFCSR2GR)
3453    NODE_NAME_CASE(CACOP_D)
3454    NODE_NAME_CASE(CACOP_W)
3455    NODE_NAME_CASE(VPICK_SEXT_ELT)
3456    NODE_NAME_CASE(VPICK_ZEXT_ELT)
3457    NODE_NAME_CASE(VREPLVE)
3458    NODE_NAME_CASE(VALL_ZERO)
3459    NODE_NAME_CASE(VANY_ZERO)
3460    NODE_NAME_CASE(VALL_NONZERO)
3461    NODE_NAME_CASE(VANY_NONZERO)
3462  }
3463#undef NODE_NAME_CASE
3464  return nullptr;
3465}
3466
3467//===----------------------------------------------------------------------===//
3468//                     Calling Convention Implementation
3469//===----------------------------------------------------------------------===//
3470
3471// Eight general-purpose registers a0-a7 used for passing integer arguments,
3472// with a0-a1 reused to return values. Generally, the GPRs are used to pass
3473// fixed-point arguments, and floating-point arguments when no FPR is available
3474// or with soft float ABI.
3475const MCPhysReg ArgGPRs[] = {LoongArch::R4,  LoongArch::R5, LoongArch::R6,
3476                             LoongArch::R7,  LoongArch::R8, LoongArch::R9,
3477                             LoongArch::R10, LoongArch::R11};
3478// Eight floating-point registers fa0-fa7 used for passing floating-point
3479// arguments, and fa0-fa1 are also used to return values.
3480const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3481                               LoongArch::F3, LoongArch::F4, LoongArch::F5,
3482                               LoongArch::F6, LoongArch::F7};
3483// FPR32 and FPR64 alias each other.
3484const MCPhysReg ArgFPR64s[] = {
3485    LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3486    LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3487
3488const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3489                            LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3490                            LoongArch::VR6, LoongArch::VR7};
3491
3492const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3493                            LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3494                            LoongArch::XR6, LoongArch::XR7};
3495
3496// Pass a 2*GRLen argument that has been split into two GRLen values through
3497// registers or the stack as necessary.
3498static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3499                                     CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3500                                     unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3501                                     ISD::ArgFlagsTy ArgFlags2) {
3502  unsigned GRLenInBytes = GRLen / 8;
3503  if (Register Reg = State.AllocateReg(ArgGPRs)) {
3504    // At least one half can be passed via register.
3505    State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3506                                     VA1.getLocVT(), CCValAssign::Full));
3507  } else {
3508    // Both halves must be passed on the stack, with proper alignment.
3509    Align StackAlign =
3510        std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3511    State.addLoc(
3512        CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
3513                            State.AllocateStack(GRLenInBytes, StackAlign),
3514                            VA1.getLocVT(), CCValAssign::Full));
3515    State.addLoc(CCValAssign::getMem(
3516        ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3517        LocVT2, CCValAssign::Full));
3518    return false;
3519  }
3520  if (Register Reg = State.AllocateReg(ArgGPRs)) {
3521    // The second half can also be passed via register.
3522    State.addLoc(
3523        CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3524  } else {
3525    // The second half is passed via the stack, without additional alignment.
3526    State.addLoc(CCValAssign::getMem(
3527        ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3528        LocVT2, CCValAssign::Full));
3529  }
3530  return false;
3531}
3532
3533// Implements the LoongArch calling convention. Returns true upon failure.
3534static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
3535                         unsigned ValNo, MVT ValVT,
3536                         CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3537                         CCState &State, bool IsFixed, bool IsRet,
3538                         Type *OrigTy) {
3539  unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3540  assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3541  MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3542  MVT LocVT = ValVT;
3543
3544  // Any return value split into more than two values can't be returned
3545  // directly.
3546  if (IsRet && ValNo > 1)
3547    return true;
3548
3549  // If passing a variadic argument, or if no FPR is available.
3550  bool UseGPRForFloat = true;
3551
3552  switch (ABI) {
3553  default:
3554    llvm_unreachable("Unexpected ABI");
3555  case LoongArchABI::ABI_ILP32S:
3556  case LoongArchABI::ABI_ILP32F:
3557  case LoongArchABI::ABI_LP64F:
3558    report_fatal_error("Unimplemented ABI");
3559    break;
3560  case LoongArchABI::ABI_ILP32D:
3561  case LoongArchABI::ABI_LP64D:
3562    UseGPRForFloat = !IsFixed;
3563    break;
3564  case LoongArchABI::ABI_LP64S:
3565    break;
3566  }
3567
3568  // FPR32 and FPR64 alias each other.
3569  if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3570    UseGPRForFloat = true;
3571
3572  if (UseGPRForFloat && ValVT == MVT::f32) {
3573    LocVT = GRLenVT;
3574    LocInfo = CCValAssign::BCvt;
3575  } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3576    LocVT = MVT::i64;
3577    LocInfo = CCValAssign::BCvt;
3578  } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3579    // TODO: Handle passing f64 on LA32 with D feature.
3580    report_fatal_error("Passing f64 with GPR on LA32 is undefined");
3581  }
3582
3583  // If this is a variadic argument, the LoongArch calling convention requires
3584  // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3585  // byte alignment. An aligned register should be used regardless of whether
3586  // the original argument was split during legalisation or not. The argument
3587  // will not be passed by registers if the original type is larger than
3588  // 2*GRLen, so the register alignment rule does not apply.
3589  unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3590  if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3591      DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
3592    unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3593    // Skip 'odd' register if necessary.
3594    if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3595      State.AllocateReg(ArgGPRs);
3596  }
3597
3598  SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3599  SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3600      State.getPendingArgFlags();
3601
3602  assert(PendingLocs.size() == PendingArgFlags.size() &&
3603         "PendingLocs and PendingArgFlags out of sync");
3604
3605  // Split arguments might be passed indirectly, so keep track of the pending
3606  // values.
3607  if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3608    LocVT = GRLenVT;
3609    LocInfo = CCValAssign::Indirect;
3610    PendingLocs.push_back(
3611        CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3612    PendingArgFlags.push_back(ArgFlags);
3613    if (!ArgFlags.isSplitEnd()) {
3614      return false;
3615    }
3616  }
3617
3618  // If the split argument only had two elements, it should be passed directly
3619  // in registers or on the stack.
3620  if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3621      PendingLocs.size() <= 2) {
3622    assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3623    // Apply the normal calling convention rules to the first half of the
3624    // split argument.
3625    CCValAssign VA = PendingLocs[0];
3626    ISD::ArgFlagsTy AF = PendingArgFlags[0];
3627    PendingLocs.clear();
3628    PendingArgFlags.clear();
3629    return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
3630                                    ArgFlags);
3631  }
3632
3633  // Allocate to a register if possible, or else a stack slot.
3634  Register Reg;
3635  unsigned StoreSizeBytes = GRLen / 8;
3636  Align StackAlign = Align(GRLen / 8);
3637
3638  if (ValVT == MVT::f32 && !UseGPRForFloat)
3639    Reg = State.AllocateReg(ArgFPR32s);
3640  else if (ValVT == MVT::f64 && !UseGPRForFloat)
3641    Reg = State.AllocateReg(ArgFPR64s);
3642  else if (ValVT.is128BitVector())
3643    Reg = State.AllocateReg(ArgVRs);
3644  else if (ValVT.is256BitVector())
3645    Reg = State.AllocateReg(ArgXRs);
3646  else
3647    Reg = State.AllocateReg(ArgGPRs);
3648
3649  unsigned StackOffset =
3650      Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
3651
3652  // If we reach this point and PendingLocs is non-empty, we must be at the
3653  // end of a split argument that must be passed indirectly.
3654  if (!PendingLocs.empty()) {
3655    assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3656    assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3657    for (auto &It : PendingLocs) {
3658      if (Reg)
3659        It.convertToReg(Reg);
3660      else
3661        It.convertToMem(StackOffset);
3662      State.addLoc(It);
3663    }
3664    PendingLocs.clear();
3665    PendingArgFlags.clear();
3666    return false;
3667  }
3668  assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3669         "Expected an GRLenVT at this stage");
3670
3671  if (Reg) {
3672    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3673    return false;
3674  }
3675
3676  // When a floating-point value is passed on the stack, no bit-cast is needed.
3677  if (ValVT.isFloatingPoint()) {
3678    LocVT = ValVT;
3679    LocInfo = CCValAssign::Full;
3680  }
3681
3682  State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3683  return false;
3684}
3685
3686void LoongArchTargetLowering::analyzeInputArgs(
3687    MachineFunction &MF, CCState &CCInfo,
3688    const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3689    LoongArchCCAssignFn Fn) const {
3690  FunctionType *FType = MF.getFunction().getFunctionType();
3691  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3692    MVT ArgVT = Ins[i].VT;
3693    Type *ArgTy = nullptr;
3694    if (IsRet)
3695      ArgTy = FType->getReturnType();
3696    else if (Ins[i].isOrigArg())
3697      ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3698    LoongArchABI::ABI ABI =
3699        MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
3700    if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3701           CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3702      LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3703                        << '\n');
3704      llvm_unreachable("");
3705    }
3706  }
3707}
3708
3709void LoongArchTargetLowering::analyzeOutputArgs(
3710    MachineFunction &MF, CCState &CCInfo,
3711    const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3712    CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3713  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3714    MVT ArgVT = Outs[i].VT;
3715    Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3716    LoongArchABI::ABI ABI =
3717        MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
3718    if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3719           CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3720      LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3721                        << "\n");
3722      llvm_unreachable("");
3723    }
3724  }
3725}
3726
3727// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3728// values.
3729static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
3730                                   const CCValAssign &VA, const SDLoc &DL) {
3731  switch (VA.getLocInfo()) {
3732  default:
3733    llvm_unreachable("Unexpected CCValAssign::LocInfo");
3734  case CCValAssign::Full:
3735  case CCValAssign::Indirect:
3736    break;
3737  case CCValAssign::BCvt:
3738    if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3739      Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3740    else
3741      Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3742    break;
3743  }
3744  return Val;
3745}
3746
3747static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
3748                                const CCValAssign &VA, const SDLoc &DL,
3749                                const LoongArchTargetLowering &TLI) {
3750  MachineFunction &MF = DAG.getMachineFunction();
3751  MachineRegisterInfo &RegInfo = MF.getRegInfo();
3752  EVT LocVT = VA.getLocVT();
3753  SDValue Val;
3754  const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3755  Register VReg = RegInfo.createVirtualRegister(RC);
3756  RegInfo.addLiveIn(VA.getLocReg(), VReg);
3757  Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3758
3759  return convertLocVTToValVT(DAG, Val, VA, DL);
3760}
3761
3762// The caller is responsible for loading the full value if the argument is
3763// passed with CCValAssign::Indirect.
3764static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
3765                                const CCValAssign &VA, const SDLoc &DL) {
3766  MachineFunction &MF = DAG.getMachineFunction();
3767  MachineFrameInfo &MFI = MF.getFrameInfo();
3768  EVT ValVT = VA.getValVT();
3769  int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
3770                                 /*IsImmutable=*/true);
3771  SDValue FIN = DAG.getFrameIndex(
3772      FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
3773
3774  ISD::LoadExtType ExtType;
3775  switch (VA.getLocInfo()) {
3776  default:
3777    llvm_unreachable("Unexpected CCValAssign::LocInfo");
3778  case CCValAssign::Full:
3779  case CCValAssign::Indirect:
3780  case CCValAssign::BCvt:
3781    ExtType = ISD::NON_EXTLOAD;
3782    break;
3783  }
3784  return DAG.getExtLoad(
3785      ExtType, DL, VA.getLocVT(), Chain, FIN,
3786      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
3787}
3788
3789static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
3790                                   const CCValAssign &VA, const SDLoc &DL) {
3791  EVT LocVT = VA.getLocVT();
3792
3793  switch (VA.getLocInfo()) {
3794  default:
3795    llvm_unreachable("Unexpected CCValAssign::LocInfo");
3796  case CCValAssign::Full:
3797    break;
3798  case CCValAssign::BCvt:
3799    if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3800      Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
3801    else
3802      Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3803    break;
3804  }
3805  return Val;
3806}
3807
3808static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3809                             CCValAssign::LocInfo LocInfo,
3810                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
3811  if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3812    // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
3813    //                        s0    s1  s2  s3  s4  s5  s6  s7  s8
3814    static const MCPhysReg GPRList[] = {
3815        LoongArch::R23, LoongArch::R24, LoongArch::R25,
3816        LoongArch::R26, LoongArch::R27, LoongArch::R28,
3817        LoongArch::R29, LoongArch::R30, LoongArch::R31};
3818    if (unsigned Reg = State.AllocateReg(GPRList)) {
3819      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3820      return false;
3821    }
3822  }
3823
3824  if (LocVT == MVT::f32) {
3825    // Pass in STG registers: F1, F2, F3, F4
3826    //                        fs0,fs1,fs2,fs3
3827    static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
3828                                          LoongArch::F26, LoongArch::F27};
3829    if (unsigned Reg = State.AllocateReg(FPR32List)) {
3830      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3831      return false;
3832    }
3833  }
3834
3835  if (LocVT == MVT::f64) {
3836    // Pass in STG registers: D1, D2, D3, D4
3837    //                        fs4,fs5,fs6,fs7
3838    static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
3839                                          LoongArch::F30_64, LoongArch::F31_64};
3840    if (unsigned Reg = State.AllocateReg(FPR64List)) {
3841      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3842      return false;
3843    }
3844  }
3845
3846  report_fatal_error("No registers left in GHC calling convention");
3847  return true;
3848}
3849
3850// Transform physical registers into virtual registers.
3851SDValue LoongArchTargetLowering::LowerFormalArguments(
3852    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3853    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3854    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3855
3856  MachineFunction &MF = DAG.getMachineFunction();
3857
3858  switch (CallConv) {
3859  default:
3860    llvm_unreachable("Unsupported calling convention");
3861  case CallingConv::C:
3862  case CallingConv::Fast:
3863    break;
3864  case CallingConv::GHC:
3865    if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
3866        !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
3867      report_fatal_error(
3868          "GHC calling convention requires the F and D extensions");
3869  }
3870
3871  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3872  MVT GRLenVT = Subtarget.getGRLenVT();
3873  unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
3874  // Used with varargs to acumulate store chains.
3875  std::vector<SDValue> OutChains;
3876
3877  // Assign locations to all of the incoming arguments.
3878  SmallVector<CCValAssign> ArgLocs;
3879  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3880
3881  if (CallConv == CallingConv::GHC)
3882    CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
3883  else
3884    analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
3885
3886  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3887    CCValAssign &VA = ArgLocs[i];
3888    SDValue ArgValue;
3889    if (VA.isRegLoc())
3890      ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3891    else
3892      ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3893    if (VA.getLocInfo() == CCValAssign::Indirect) {
3894      // If the original argument was split and passed by reference, we need to
3895      // load all parts of it here (using the same address).
3896      InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3897                                   MachinePointerInfo()));
3898      unsigned ArgIndex = Ins[i].OrigArgIndex;
3899      unsigned ArgPartOffset = Ins[i].PartOffset;
3900      assert(ArgPartOffset == 0);
3901      while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3902        CCValAssign &PartVA = ArgLocs[i + 1];
3903        unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
3904        SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
3905        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
3906        InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3907                                     MachinePointerInfo()));
3908        ++i;
3909      }
3910      continue;
3911    }
3912    InVals.push_back(ArgValue);
3913  }
3914
3915  if (IsVarArg) {
3916    ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
3917    unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3918    const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
3919    MachineFrameInfo &MFI = MF.getFrameInfo();
3920    MachineRegisterInfo &RegInfo = MF.getRegInfo();
3921    auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
3922
3923    // Offset of the first variable argument from stack pointer, and size of
3924    // the vararg save area. For now, the varargs save area is either zero or
3925    // large enough to hold a0-a7.
3926    int VaArgOffset, VarArgsSaveSize;
3927
3928    // If all registers are allocated, then all varargs must be passed on the
3929    // stack and we don't need to save any argregs.
3930    if (ArgRegs.size() == Idx) {
3931      VaArgOffset = CCInfo.getStackSize();
3932      VarArgsSaveSize = 0;
3933    } else {
3934      VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
3935      VaArgOffset = -VarArgsSaveSize;
3936    }
3937
3938    // Record the frame index of the first variable argument
3939    // which is a value necessary to VASTART.
3940    int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3941    LoongArchFI->setVarArgsFrameIndex(FI);
3942
3943    // If saving an odd number of registers then create an extra stack slot to
3944    // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
3945    // offsets to even-numbered registered remain 2*GRLen-aligned.
3946    if (Idx % 2) {
3947      MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
3948                            true);
3949      VarArgsSaveSize += GRLenInBytes;
3950    }
3951
3952    // Copy the integer registers that may have been used for passing varargs
3953    // to the vararg save area.
3954    for (unsigned I = Idx; I < ArgRegs.size();
3955         ++I, VaArgOffset += GRLenInBytes) {
3956      const Register Reg = RegInfo.createVirtualRegister(RC);
3957      RegInfo.addLiveIn(ArgRegs[I], Reg);
3958      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
3959      FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3960      SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3961      SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3962                                   MachinePointerInfo::getFixedStack(MF, FI));
3963      cast<StoreSDNode>(Store.getNode())
3964          ->getMemOperand()
3965          ->setValue((Value *)nullptr);
3966      OutChains.push_back(Store);
3967    }
3968    LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
3969  }
3970
3971  // All stores are grouped in one node to allow the matching between
3972  // the size of Ins and InVals. This only happens for vararg functions.
3973  if (!OutChains.empty()) {
3974    OutChains.push_back(Chain);
3975    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3976  }
3977
3978  return Chain;
3979}
3980
3981bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3982  return CI->isTailCall();
3983}
3984
3985// Check if the return value is used as only a return value, as otherwise
3986// we can't perform a tail-call.
3987bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
3988                                                 SDValue &Chain) const {
3989  if (N->getNumValues() != 1)
3990    return false;
3991  if (!N->hasNUsesOfValue(1, 0))
3992    return false;
3993
3994  SDNode *Copy = *N->use_begin();
3995  if (Copy->getOpcode() != ISD::CopyToReg)
3996    return false;
3997
3998  // If the ISD::CopyToReg has a glue operand, we conservatively assume it
3999  // isn't safe to perform a tail call.
4000  if (Copy->getGluedNode())
4001    return false;
4002
4003  // The copy must be used by a LoongArchISD::RET, and nothing else.
4004  bool HasRet = false;
4005  for (SDNode *Node : Copy->uses()) {
4006    if (Node->getOpcode() != LoongArchISD::RET)
4007      return false;
4008    HasRet = true;
4009  }
4010
4011  if (!HasRet)
4012    return false;
4013
4014  Chain = Copy->getOperand(0);
4015  return true;
4016}
4017
4018// Check whether the call is eligible for tail call optimization.
4019bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
4020    CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4021    const SmallVectorImpl<CCValAssign> &ArgLocs) const {
4022
4023  auto CalleeCC = CLI.CallConv;
4024  auto &Outs = CLI.Outs;
4025  auto &Caller = MF.getFunction();
4026  auto CallerCC = Caller.getCallingConv();
4027
4028  // Do not tail call opt if the stack is used to pass parameters.
4029  if (CCInfo.getStackSize() != 0)
4030    return false;
4031
4032  // Do not tail call opt if any parameters need to be passed indirectly.
4033  for (auto &VA : ArgLocs)
4034    if (VA.getLocInfo() == CCValAssign::Indirect)
4035      return false;
4036
4037  // Do not tail call opt if either caller or callee uses struct return
4038  // semantics.
4039  auto IsCallerStructRet = Caller.hasStructRetAttr();
4040  auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4041  if (IsCallerStructRet || IsCalleeStructRet)
4042    return false;
4043
4044  // Do not tail call opt if either the callee or caller has a byval argument.
4045  for (auto &Arg : Outs)
4046    if (Arg.Flags.isByVal())
4047      return false;
4048
4049  // The callee has to preserve all registers the caller needs to preserve.
4050  const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
4051  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4052  if (CalleeCC != CallerCC) {
4053    const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4054    if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4055      return false;
4056  }
4057  return true;
4058}
4059
4060static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
4061  return DAG.getDataLayout().getPrefTypeAlign(
4062      VT.getTypeForEVT(*DAG.getContext()));
4063}
4064
4065// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4066// and output parameter nodes.
4067SDValue
4068LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
4069                                   SmallVectorImpl<SDValue> &InVals) const {
4070  SelectionDAG &DAG = CLI.DAG;
4071  SDLoc &DL = CLI.DL;
4072  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4073  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4074  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4075  SDValue Chain = CLI.Chain;
4076  SDValue Callee = CLI.Callee;
4077  CallingConv::ID CallConv = CLI.CallConv;
4078  bool IsVarArg = CLI.IsVarArg;
4079  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4080  MVT GRLenVT = Subtarget.getGRLenVT();
4081  bool &IsTailCall = CLI.IsTailCall;
4082
4083  MachineFunction &MF = DAG.getMachineFunction();
4084
4085  // Analyze the operands of the call, assigning locations to each operand.
4086  SmallVector<CCValAssign> ArgLocs;
4087  CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4088
4089  if (CallConv == CallingConv::GHC)
4090    ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
4091  else
4092    analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
4093
4094  // Check if it's really possible to do a tail call.
4095  if (IsTailCall)
4096    IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4097
4098  if (IsTailCall)
4099    ++NumTailCalls;
4100  else if (CLI.CB && CLI.CB->isMustTailCall())
4101    report_fatal_error("failed to perform tail call elimination on a call "
4102                       "site marked musttail");
4103
4104  // Get a count of how many bytes are to be pushed on the stack.
4105  unsigned NumBytes = ArgCCInfo.getStackSize();
4106
4107  // Create local copies for byval args.
4108  SmallVector<SDValue> ByValArgs;
4109  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4110    ISD::ArgFlagsTy Flags = Outs[i].Flags;
4111    if (!Flags.isByVal())
4112      continue;
4113
4114    SDValue Arg = OutVals[i];
4115    unsigned Size = Flags.getByValSize();
4116    Align Alignment = Flags.getNonZeroByValAlign();
4117
4118    int FI =
4119        MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4120    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4121    SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
4122
4123    Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4124                          /*IsVolatile=*/false,
4125                          /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
4126                          MachinePointerInfo(), MachinePointerInfo());
4127    ByValArgs.push_back(FIPtr);
4128  }
4129
4130  if (!IsTailCall)
4131    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4132
4133  // Copy argument values to their designated locations.
4134  SmallVector<std::pair<Register, SDValue>> RegsToPass;
4135  SmallVector<SDValue> MemOpChains;
4136  SDValue StackPtr;
4137  for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4138    CCValAssign &VA = ArgLocs[i];
4139    SDValue ArgValue = OutVals[i];
4140    ISD::ArgFlagsTy Flags = Outs[i].Flags;
4141
4142    // Promote the value if needed.
4143    // For now, only handle fully promoted and indirect arguments.
4144    if (VA.getLocInfo() == CCValAssign::Indirect) {
4145      // Store the argument in a stack slot and pass its address.
4146      Align StackAlign =
4147          std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
4148                   getPrefTypeAlign(ArgValue.getValueType(), DAG));
4149      TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
4150      // If the original argument was split and passed by reference, we need to
4151      // store the required parts of it here (and pass just one address).
4152      unsigned ArgIndex = Outs[i].OrigArgIndex;
4153      unsigned ArgPartOffset = Outs[i].PartOffset;
4154      assert(ArgPartOffset == 0);
4155      // Calculate the total size to store. We don't have access to what we're
4156      // actually storing other than performing the loop and collecting the
4157      // info.
4158      SmallVector<std::pair<SDValue, SDValue>> Parts;
4159      while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4160        SDValue PartValue = OutVals[i + 1];
4161        unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
4162        SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
4163        EVT PartVT = PartValue.getValueType();
4164
4165        StoredSize += PartVT.getStoreSize();
4166        StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
4167        Parts.push_back(std::make_pair(PartValue, Offset));
4168        ++i;
4169      }
4170      SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
4171      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4172      MemOpChains.push_back(
4173          DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4174                       MachinePointerInfo::getFixedStack(MF, FI)));
4175      for (const auto &Part : Parts) {
4176        SDValue PartValue = Part.first;
4177        SDValue PartOffset = Part.second;
4178        SDValue Address =
4179            DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
4180        MemOpChains.push_back(
4181            DAG.getStore(Chain, DL, PartValue, Address,
4182                         MachinePointerInfo::getFixedStack(MF, FI)));
4183      }
4184      ArgValue = SpillSlot;
4185    } else {
4186      ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
4187    }
4188
4189    // Use local copy if it is a byval arg.
4190    if (Flags.isByVal())
4191      ArgValue = ByValArgs[j++];
4192
4193    if (VA.isRegLoc()) {
4194      // Queue up the argument copies and emit them at the end.
4195      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
4196    } else {
4197      assert(VA.isMemLoc() && "Argument not register or memory");
4198      assert(!IsTailCall && "Tail call not allowed if stack is used "
4199                            "for passing parameters");
4200
4201      // Work out the address of the stack slot.
4202      if (!StackPtr.getNode())
4203        StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
4204      SDValue Address =
4205          DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
4206                      DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
4207
4208      // Emit the store.
4209      MemOpChains.push_back(
4210          DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
4211    }
4212  }
4213
4214  // Join the stores, which are independent of one another.
4215  if (!MemOpChains.empty())
4216    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4217
4218  SDValue Glue;
4219
4220  // Build a sequence of copy-to-reg nodes, chained and glued together.
4221  for (auto &Reg : RegsToPass) {
4222    Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
4223    Glue = Chain.getValue(1);
4224  }
4225
4226  // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4227  // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4228  // split it and then direct call can be matched by PseudoCALL.
4229  if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
4230    const GlobalValue *GV = S->getGlobal();
4231    unsigned OpFlags =
4232        getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)
4233            ? LoongArchII::MO_CALL
4234            : LoongArchII::MO_CALL_PLT;
4235    Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
4236  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4237    unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(
4238                           *MF.getFunction().getParent(), nullptr)
4239                           ? LoongArchII::MO_CALL
4240                           : LoongArchII::MO_CALL_PLT;
4241    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
4242  }
4243
4244  // The first call operand is the chain and the second is the target address.
4245  SmallVector<SDValue> Ops;
4246  Ops.push_back(Chain);
4247  Ops.push_back(Callee);
4248
4249  // Add argument registers to the end of the list so that they are
4250  // known live into the call.
4251  for (auto &Reg : RegsToPass)
4252    Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
4253
4254  if (!IsTailCall) {
4255    // Add a register mask operand representing the call-preserved registers.
4256    const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4257    const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4258    assert(Mask && "Missing call preserved mask for calling convention");
4259    Ops.push_back(DAG.getRegisterMask(Mask));
4260  }
4261
4262  // Glue the call to the argument copies, if any.
4263  if (Glue.getNode())
4264    Ops.push_back(Glue);
4265
4266  // Emit the call.
4267  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4268  unsigned Op;
4269  switch (DAG.getTarget().getCodeModel()) {
4270  default:
4271    report_fatal_error("Unsupported code model");
4272  case CodeModel::Small:
4273    Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
4274    break;
4275  case CodeModel::Medium:
4276    assert(Subtarget.is64Bit() && "Medium code model requires LA64");
4277    Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
4278    break;
4279  case CodeModel::Large:
4280    assert(Subtarget.is64Bit() && "Large code model requires LA64");
4281    Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
4282    break;
4283  }
4284
4285  if (IsTailCall) {
4286    MF.getFrameInfo().setHasTailCall();
4287    SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
4288    DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
4289    return Ret;
4290  }
4291
4292  Chain = DAG.getNode(Op, DL, NodeTys, Ops);
4293  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4294  Glue = Chain.getValue(1);
4295
4296  // Mark the end of the call, which is glued to the call itself.
4297  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
4298  Glue = Chain.getValue(1);
4299
4300  // Assign locations to each value returned by this call.
4301  SmallVector<CCValAssign> RVLocs;
4302  CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4303  analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
4304
4305  // Copy all of the result registers out of their specified physreg.
4306  for (auto &VA : RVLocs) {
4307    // Copy the value out.
4308    SDValue RetValue =
4309        DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
4310    // Glue the RetValue to the end of the call sequence.
4311    Chain = RetValue.getValue(1);
4312    Glue = RetValue.getValue(2);
4313
4314    RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
4315
4316    InVals.push_back(RetValue);
4317  }
4318
4319  return Chain;
4320}
4321
4322bool LoongArchTargetLowering::CanLowerReturn(
4323    CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4324    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4325  SmallVector<CCValAssign> RVLocs;
4326  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4327
4328  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4329    LoongArchABI::ABI ABI =
4330        MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4331    if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
4332                     Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
4333                     nullptr))
4334      return false;
4335  }
4336  return true;
4337}
4338
4339SDValue LoongArchTargetLowering::LowerReturn(
4340    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4341    const SmallVectorImpl<ISD::OutputArg> &Outs,
4342    const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
4343    SelectionDAG &DAG) const {
4344  // Stores the assignment of the return value to a location.
4345  SmallVector<CCValAssign> RVLocs;
4346
4347  // Info about the registers and stack slot.
4348  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4349                 *DAG.getContext());
4350
4351  analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4352                    nullptr, CC_LoongArch);
4353  if (CallConv == CallingConv::GHC && !RVLocs.empty())
4354    report_fatal_error("GHC functions return void only");
4355  SDValue Glue;
4356  SmallVector<SDValue, 4> RetOps(1, Chain);
4357
4358  // Copy the result values into the output registers.
4359  for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4360    CCValAssign &VA = RVLocs[i];
4361    assert(VA.isRegLoc() && "Can only return in registers!");
4362
4363    // Handle a 'normal' return.
4364    SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
4365    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4366
4367    // Guarantee that all emitted copies are stuck together.
4368    Glue = Chain.getValue(1);
4369    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4370  }
4371
4372  RetOps[0] = Chain; // Update chain.
4373
4374  // Add the glue node if we have it.
4375  if (Glue.getNode())
4376    RetOps.push_back(Glue);
4377
4378  return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
4379}
4380
4381bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4382                                           bool ForCodeSize) const {
4383  // TODO: Maybe need more checks here after vector extension is supported.
4384  if (VT == MVT::f32 && !Subtarget.hasBasicF())
4385    return false;
4386  if (VT == MVT::f64 && !Subtarget.hasBasicD())
4387    return false;
4388  return (Imm.isZero() || Imm.isExactlyValue(+1.0));
4389}
4390
4391bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
4392  return true;
4393}
4394
4395bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
4396  return true;
4397}
4398
4399bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
4400    const Instruction *I) const {
4401  if (!Subtarget.is64Bit())
4402    return isa<LoadInst>(I) || isa<StoreInst>(I);
4403
4404  if (isa<LoadInst>(I))
4405    return true;
4406
4407  // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
4408  // require fences beacuse we can use amswap_db.[w/d].
4409  if (isa<StoreInst>(I)) {
4410    unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
4411    return (Size == 8 || Size == 16);
4412  }
4413
4414  return false;
4415}
4416
4417EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
4418                                                LLVMContext &Context,
4419                                                EVT VT) const {
4420  if (!VT.isVector())
4421    return getPointerTy(DL);
4422  return VT.changeVectorElementTypeToInteger();
4423}
4424
4425bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
4426  // TODO: Support vectors.
4427  return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
4428}
4429
4430bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4431                                                 const CallInst &I,
4432                                                 MachineFunction &MF,
4433                                                 unsigned Intrinsic) const {
4434  switch (Intrinsic) {
4435  default:
4436    return false;
4437  case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
4438  case Intrinsic::loongarch_masked_atomicrmw_add_i32:
4439  case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
4440  case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
4441    Info.opc = ISD::INTRINSIC_W_CHAIN;
4442    Info.memVT = MVT::i32;
4443    Info.ptrVal = I.getArgOperand(0);
4444    Info.offset = 0;
4445    Info.align = Align(4);
4446    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
4447                 MachineMemOperand::MOVolatile;
4448    return true;
4449    // TODO: Add more Intrinsics later.
4450  }
4451}
4452
4453TargetLowering::AtomicExpansionKind
4454LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
4455  // TODO: Add more AtomicRMWInst that needs to be extended.
4456
4457  // Since floating-point operation requires a non-trivial set of data
4458  // operations, use CmpXChg to expand.
4459  if (AI->isFloatingPointOperation() ||
4460      AI->getOperation() == AtomicRMWInst::UIncWrap ||
4461      AI->getOperation() == AtomicRMWInst::UDecWrap)
4462    return AtomicExpansionKind::CmpXChg;
4463
4464  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4465  if (Size == 8 || Size == 16)
4466    return AtomicExpansionKind::MaskedIntrinsic;
4467  return AtomicExpansionKind::None;
4468}
4469
4470static Intrinsic::ID
4471getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
4472                                    AtomicRMWInst::BinOp BinOp) {
4473  if (GRLen == 64) {
4474    switch (BinOp) {
4475    default:
4476      llvm_unreachable("Unexpected AtomicRMW BinOp");
4477    case AtomicRMWInst::Xchg:
4478      return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
4479    case AtomicRMWInst::Add:
4480      return Intrinsic::loongarch_masked_atomicrmw_add_i64;
4481    case AtomicRMWInst::Sub:
4482      return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
4483    case AtomicRMWInst::Nand:
4484      return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
4485    case AtomicRMWInst::UMax:
4486      return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
4487    case AtomicRMWInst::UMin:
4488      return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
4489    case AtomicRMWInst::Max:
4490      return Intrinsic::loongarch_masked_atomicrmw_max_i64;
4491    case AtomicRMWInst::Min:
4492      return Intrinsic::loongarch_masked_atomicrmw_min_i64;
4493      // TODO: support other AtomicRMWInst.
4494    }
4495  }
4496
4497  if (GRLen == 32) {
4498    switch (BinOp) {
4499    default:
4500      llvm_unreachable("Unexpected AtomicRMW BinOp");
4501    case AtomicRMWInst::Xchg:
4502      return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
4503    case AtomicRMWInst::Add:
4504      return Intrinsic::loongarch_masked_atomicrmw_add_i32;
4505    case AtomicRMWInst::Sub:
4506      return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
4507    case AtomicRMWInst::Nand:
4508      return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
4509      // TODO: support other AtomicRMWInst.
4510    }
4511  }
4512
4513  llvm_unreachable("Unexpected GRLen\n");
4514}
4515
4516TargetLowering::AtomicExpansionKind
4517LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
4518    AtomicCmpXchgInst *CI) const {
4519  unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
4520  if (Size == 8 || Size == 16)
4521    return AtomicExpansionKind::MaskedIntrinsic;
4522  return AtomicExpansionKind::None;
4523}
4524
4525Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
4526    IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4527    Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4528  AtomicOrdering FailOrd = CI->getFailureOrdering();
4529  Value *FailureOrdering =
4530      Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
4531
4532  // TODO: Support cmpxchg on LA32.
4533  Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
4534  CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4535  NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4536  Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4537  Type *Tys[] = {AlignedAddr->getType()};
4538  Function *MaskedCmpXchg =
4539      Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4540  Value *Result = Builder.CreateCall(
4541      MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
4542  Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4543  return Result;
4544}
4545
4546Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
4547    IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4548    Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4549  // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
4550  // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
4551  // mask, as this produces better code than the LL/SC loop emitted by
4552  // int_loongarch_masked_atomicrmw_xchg.
4553  if (AI->getOperation() == AtomicRMWInst::Xchg &&
4554      isa<ConstantInt>(AI->getValOperand())) {
4555    ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
4556    if (CVal->isZero())
4557      return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
4558                                     Builder.CreateNot(Mask, "Inv_Mask"),
4559                                     AI->getAlign(), Ord);
4560    if (CVal->isMinusOne())
4561      return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
4562                                     AI->getAlign(), Ord);
4563  }
4564
4565  unsigned GRLen = Subtarget.getGRLen();
4566  Value *Ordering =
4567      Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
4568  Type *Tys[] = {AlignedAddr->getType()};
4569  Function *LlwOpScwLoop = Intrinsic::getDeclaration(
4570      AI->getModule(),
4571      getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
4572
4573  if (GRLen == 64) {
4574    Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4575    Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4576    ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4577  }
4578
4579  Value *Result;
4580
4581  // Must pass the shift amount needed to sign extend the loaded value prior
4582  // to performing a signed comparison for min/max. ShiftAmt is the number of
4583  // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
4584  // is the number of bits to left+right shift the value in order to
4585  // sign-extend.
4586  if (AI->getOperation() == AtomicRMWInst::Min ||
4587      AI->getOperation() == AtomicRMWInst::Max) {
4588    const DataLayout &DL = AI->getModule()->getDataLayout();
4589    unsigned ValWidth =
4590        DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4591    Value *SextShamt =
4592        Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
4593    Result = Builder.CreateCall(LlwOpScwLoop,
4594                                {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4595  } else {
4596    Result =
4597        Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4598  }
4599
4600  if (GRLen == 64)
4601    Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4602  return Result;
4603}
4604
4605bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
4606    const MachineFunction &MF, EVT VT) const {
4607  VT = VT.getScalarType();
4608
4609  if (!VT.isSimple())
4610    return false;
4611
4612  switch (VT.getSimpleVT().SimpleTy) {
4613  case MVT::f32:
4614  case MVT::f64:
4615    return true;
4616  default:
4617    break;
4618  }
4619
4620  return false;
4621}
4622
4623Register LoongArchTargetLowering::getExceptionPointerRegister(
4624    const Constant *PersonalityFn) const {
4625  return LoongArch::R4;
4626}
4627
4628Register LoongArchTargetLowering::getExceptionSelectorRegister(
4629    const Constant *PersonalityFn) const {
4630  return LoongArch::R5;
4631}
4632
4633//===----------------------------------------------------------------------===//
4634//                           LoongArch Inline Assembly Support
4635//===----------------------------------------------------------------------===//
4636
4637LoongArchTargetLowering::ConstraintType
4638LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
4639  // LoongArch specific constraints in GCC: config/loongarch/constraints.md
4640  //
4641  // 'f':  A floating-point register (if available).
4642  // 'k':  A memory operand whose address is formed by a base register and
4643  //       (optionally scaled) index register.
4644  // 'l':  A signed 16-bit constant.
4645  // 'm':  A memory operand whose address is formed by a base register and
4646  //       offset that is suitable for use in instructions with the same
4647  //       addressing mode as st.w and ld.w.
4648  // 'I':  A signed 12-bit constant (for arithmetic instructions).
4649  // 'J':  Integer zero.
4650  // 'K':  An unsigned 12-bit constant (for logic instructions).
4651  // "ZB": An address that is held in a general-purpose register. The offset is
4652  //       zero.
4653  // "ZC": A memory operand whose address is formed by a base register and
4654  //       offset that is suitable for use in instructions with the same
4655  //       addressing mode as ll.w and sc.w.
4656  if (Constraint.size() == 1) {
4657    switch (Constraint[0]) {
4658    default:
4659      break;
4660    case 'f':
4661      return C_RegisterClass;
4662    case 'l':
4663    case 'I':
4664    case 'J':
4665    case 'K':
4666      return C_Immediate;
4667    case 'k':
4668      return C_Memory;
4669    }
4670  }
4671
4672  if (Constraint == "ZC" || Constraint == "ZB")
4673    return C_Memory;
4674
4675  // 'm' is handled here.
4676  return TargetLowering::getConstraintType(Constraint);
4677}
4678
4679InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
4680    StringRef ConstraintCode) const {
4681  return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
4682      .Case("k", InlineAsm::ConstraintCode::k)
4683      .Case("ZB", InlineAsm::ConstraintCode::ZB)
4684      .Case("ZC", InlineAsm::ConstraintCode::ZC)
4685      .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
4686}
4687
4688std::pair<unsigned, const TargetRegisterClass *>
4689LoongArchTargetLowering::getRegForInlineAsmConstraint(
4690    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
4691  // First, see if this is a constraint that directly corresponds to a LoongArch
4692  // register class.
4693  if (Constraint.size() == 1) {
4694    switch (Constraint[0]) {
4695    case 'r':
4696      // TODO: Support fixed vectors up to GRLen?
4697      if (VT.isVector())
4698        break;
4699      return std::make_pair(0U, &LoongArch::GPRRegClass);
4700    case 'f':
4701      if (Subtarget.hasBasicF() && VT == MVT::f32)
4702        return std::make_pair(0U, &LoongArch::FPR32RegClass);
4703      if (Subtarget.hasBasicD() && VT == MVT::f64)
4704        return std::make_pair(0U, &LoongArch::FPR64RegClass);
4705      if (Subtarget.hasExtLSX() &&
4706          TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
4707        return std::make_pair(0U, &LoongArch::LSX128RegClass);
4708      if (Subtarget.hasExtLASX() &&
4709          TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
4710        return std::make_pair(0U, &LoongArch::LASX256RegClass);
4711      break;
4712    default:
4713      break;
4714    }
4715  }
4716
4717  // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
4718  // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
4719  // constraints while the official register name is prefixed with a '$'. So we
4720  // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
4721  // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
4722  // case insensitive, so no need to convert the constraint to upper case here.
4723  //
4724  // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
4725  // decode the usage of register name aliases into their official names. And
4726  // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
4727  // official register names.
4728  if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
4729      Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
4730    bool IsFP = Constraint[2] == 'f';
4731    std::pair<StringRef, StringRef> Temp = Constraint.split('$');
4732    std::pair<unsigned, const TargetRegisterClass *> R;
4733    R = TargetLowering::getRegForInlineAsmConstraint(
4734        TRI, join_items("", Temp.first, Temp.second), VT);
4735    // Match those names to the widest floating point register type available.
4736    if (IsFP) {
4737      unsigned RegNo = R.first;
4738      if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
4739        if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
4740          unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
4741          return std::make_pair(DReg, &LoongArch::FPR64RegClass);
4742        }
4743      }
4744    }
4745    return R;
4746  }
4747
4748  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4749}
4750
4751void LoongArchTargetLowering::LowerAsmOperandForConstraint(
4752    SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
4753    SelectionDAG &DAG) const {
4754  // Currently only support length 1 constraints.
4755  if (Constraint.size() == 1) {
4756    switch (Constraint[0]) {
4757    case 'l':
4758      // Validate & create a 16-bit signed immediate operand.
4759      if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4760        uint64_t CVal = C->getSExtValue();
4761        if (isInt<16>(CVal))
4762          Ops.push_back(
4763              DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4764      }
4765      return;
4766    case 'I':
4767      // Validate & create a 12-bit signed immediate operand.
4768      if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4769        uint64_t CVal = C->getSExtValue();
4770        if (isInt<12>(CVal))
4771          Ops.push_back(
4772              DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4773      }
4774      return;
4775    case 'J':
4776      // Validate & create an integer zero operand.
4777      if (auto *C = dyn_cast<ConstantSDNode>(Op))
4778        if (C->getZExtValue() == 0)
4779          Ops.push_back(
4780              DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
4781      return;
4782    case 'K':
4783      // Validate & create a 12-bit unsigned immediate operand.
4784      if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4785        uint64_t CVal = C->getZExtValue();
4786        if (isUInt<12>(CVal))
4787          Ops.push_back(
4788              DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4789      }
4790      return;
4791    default:
4792      break;
4793    }
4794  }
4795  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4796}
4797
4798#define GET_REGISTER_MATCHER
4799#include "LoongArchGenAsmMatcher.inc"
4800
4801Register
4802LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
4803                                           const MachineFunction &MF) const {
4804  std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
4805  std::string NewRegName = Name.second.str();
4806  Register Reg = MatchRegisterAltName(NewRegName);
4807  if (Reg == LoongArch::NoRegister)
4808    Reg = MatchRegisterName(NewRegName);
4809  if (Reg == LoongArch::NoRegister)
4810    report_fatal_error(
4811        Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
4812  BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
4813  if (!ReservedRegs.test(Reg))
4814    report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
4815                             StringRef(RegName) + "\"."));
4816  return Reg;
4817}
4818
4819bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
4820                                                     EVT VT, SDValue C) const {
4821  // TODO: Support vectors.
4822  if (!VT.isScalarInteger())
4823    return false;
4824
4825  // Omit the optimization if the data size exceeds GRLen.
4826  if (VT.getSizeInBits() > Subtarget.getGRLen())
4827    return false;
4828
4829  if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
4830    const APInt &Imm = ConstNode->getAPIntValue();
4831    // Break MUL into (SLLI + ADD/SUB) or ALSL.
4832    if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
4833        (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
4834      return true;
4835    // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
4836    if (ConstNode->hasOneUse() &&
4837        ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
4838         (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
4839      return true;
4840    // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
4841    // in which the immediate has two set bits. Or Break (MUL x, imm)
4842    // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
4843    // equals to (1 << s0) - (1 << s1).
4844    if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
4845      unsigned Shifts = Imm.countr_zero();
4846      // Reject immediates which can be composed via a single LUI.
4847      if (Shifts >= 12)
4848        return false;
4849      // Reject multiplications can be optimized to
4850      // (SLLI (ALSL x, x, 1/2/3/4), s).
4851      APInt ImmPop = Imm.ashr(Shifts);
4852      if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
4853        return false;
4854      // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
4855      // since it needs one more instruction than other 3 cases.
4856      APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
4857      if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
4858          (ImmSmall - Imm).isPowerOf2())
4859        return true;
4860    }
4861  }
4862
4863  return false;
4864}
4865
4866bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
4867                                                    const AddrMode &AM,
4868                                                    Type *Ty, unsigned AS,
4869                                                    Instruction *I) const {
4870  // LoongArch has four basic addressing modes:
4871  //  1. reg
4872  //  2. reg + 12-bit signed offset
4873  //  3. reg + 14-bit signed offset left-shifted by 2
4874  //  4. reg1 + reg2
4875  // TODO: Add more checks after support vector extension.
4876
4877  // No global is ever allowed as a base.
4878  if (AM.BaseGV)
4879    return false;
4880
4881  // Require a 12 or 14 bit signed offset.
4882  if (!isInt<12>(AM.BaseOffs) || !isShiftedInt<14, 2>(AM.BaseOffs))
4883    return false;
4884
4885  switch (AM.Scale) {
4886  case 0:
4887    // "i" is not allowed.
4888    if (!AM.HasBaseReg)
4889      return false;
4890    // Otherwise we have "r+i".
4891    break;
4892  case 1:
4893    // "r+r+i" is not allowed.
4894    if (AM.HasBaseReg && AM.BaseOffs != 0)
4895      return false;
4896    // Otherwise we have "r+r" or "r+i".
4897    break;
4898  case 2:
4899    // "2*r+r" or "2*r+i" is not allowed.
4900    if (AM.HasBaseReg || AM.BaseOffs)
4901      return false;
4902    // Otherwise we have "r+r".
4903    break;
4904  default:
4905    return false;
4906  }
4907
4908  return true;
4909}
4910
4911bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
4912  return isInt<12>(Imm);
4913}
4914
4915bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
4916  return isInt<12>(Imm);
4917}
4918
4919bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
4920  // Zexts are free if they can be combined with a load.
4921  // Don't advertise i32->i64 zextload as being free for LA64. It interacts
4922  // poorly with type legalization of compares preferring sext.
4923  if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
4924    EVT MemVT = LD->getMemoryVT();
4925    if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
4926        (LD->getExtensionType() == ISD::NON_EXTLOAD ||
4927         LD->getExtensionType() == ISD::ZEXTLOAD))
4928      return true;
4929  }
4930
4931  return TargetLowering::isZExtFree(Val, VT2);
4932}
4933
4934bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
4935  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
4936}
4937
4938bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
4939  // TODO: Support vectors.
4940  if (Y.getValueType().isVector())
4941    return false;
4942
4943  return !isa<ConstantSDNode>(Y);
4944}
4945
4946ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
4947  // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
4948  return ISD::SIGN_EXTEND;
4949}
4950