1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
14#include "WebAssemblyISelLowering.h"
15#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16#include "WebAssemblyMachineFunctionInfo.h"
17#include "WebAssemblySubtarget.h"
18#include "WebAssemblyTargetMachine.h"
19#include "llvm/CodeGen/Analysis.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineJumpTableInfo.h"
23#include "llvm/CodeGen/MachineModuleInfo.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/WasmEHFuncInfo.h"
27#include "llvm/IR/DiagnosticInfo.h"
28#include "llvm/IR/DiagnosticPrinter.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsWebAssembly.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include "llvm/Target/TargetOptions.h"
36using namespace llvm;
37
38#define DEBUG_TYPE "wasm-lower"
39
40WebAssemblyTargetLowering::WebAssemblyTargetLowering(
41    const TargetMachine &TM, const WebAssemblySubtarget &STI)
42    : TargetLowering(TM), Subtarget(&STI) {
43  auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
44
45  // Booleans always contain 0 or 1.
46  setBooleanContents(ZeroOrOneBooleanContent);
47  // Except in SIMD vectors
48  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
49  // We don't know the microarchitecture here, so just reduce register pressure.
50  setSchedulingPreference(Sched::RegPressure);
51  // Tell ISel that we have a stack pointer.
52  setStackPointerRegisterToSaveRestore(
53      Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
54  // Set up the register classes.
55  addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
56  addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
57  addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
58  addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
59  if (Subtarget->hasSIMD128()) {
60    addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
61    addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
62    addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
63    addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
64    addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
65    addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
66  }
67  // Compute derived properties from the register classes.
68  computeRegisterProperties(Subtarget->getRegisterInfo());
69
70  setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
71  setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
72  setOperationAction(ISD::JumpTable, MVTPtr, Custom);
73  setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
74  setOperationAction(ISD::BRIND, MVT::Other, Custom);
75
76  // Take the default expansion for va_arg, va_copy, and va_end. There is no
77  // default action for va_start, so we do that custom.
78  setOperationAction(ISD::VASTART, MVT::Other, Custom);
79  setOperationAction(ISD::VAARG, MVT::Other, Expand);
80  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
81  setOperationAction(ISD::VAEND, MVT::Other, Expand);
82
83  for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
84    // Don't expand the floating-point types to constant pools.
85    setOperationAction(ISD::ConstantFP, T, Legal);
86    // Expand floating-point comparisons.
87    for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
88                    ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
89      setCondCodeAction(CC, T, Expand);
90    // Expand floating-point library function operators.
91    for (auto Op :
92         {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
93      setOperationAction(Op, T, Expand);
94    // Note supported floating-point library function operators that otherwise
95    // default to expand.
96    for (auto Op :
97         {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
98      setOperationAction(Op, T, Legal);
99    // Support minimum and maximum, which otherwise default to expand.
100    setOperationAction(ISD::FMINIMUM, T, Legal);
101    setOperationAction(ISD::FMAXIMUM, T, Legal);
102    // WebAssembly currently has no builtin f16 support.
103    setOperationAction(ISD::FP16_TO_FP, T, Expand);
104    setOperationAction(ISD::FP_TO_FP16, T, Expand);
105    setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
106    setTruncStoreAction(T, MVT::f16, Expand);
107  }
108
109  // Expand unavailable integer operations.
110  for (auto Op :
111       {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
112        ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
113        ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
114    for (auto T : {MVT::i32, MVT::i64})
115      setOperationAction(Op, T, Expand);
116    if (Subtarget->hasSIMD128())
117      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
118        setOperationAction(Op, T, Expand);
119  }
120
121  // SIMD-specific configuration
122  if (Subtarget->hasSIMD128()) {
123    // Hoist bitcasts out of shuffles
124    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
125
126    // Support saturating add for i8x16 and i16x8
127    for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
128      for (auto T : {MVT::v16i8, MVT::v8i16})
129        setOperationAction(Op, T, Legal);
130
131    // Support integer abs
132    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
133      setOperationAction(ISD::ABS, T, Legal);
134
135    // Custom lower BUILD_VECTORs to minimize number of replace_lanes
136    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
137                   MVT::v2f64})
138      setOperationAction(ISD::BUILD_VECTOR, T, Custom);
139
140    // We have custom shuffle lowering to expose the shuffle mask
141    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
142                   MVT::v2f64})
143      setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
144
145    // Custom lowering since wasm shifts must have a scalar shift amount
146    for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
147      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
148        setOperationAction(Op, T, Custom);
149
150    // Custom lower lane accesses to expand out variable indices
151    for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
152      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
153                     MVT::v2f64})
154        setOperationAction(Op, T, Custom);
155
156    // There is no i8x16.mul instruction
157    setOperationAction(ISD::MUL, MVT::v16i8, Expand);
158
159    // There are no vector select instructions
160    for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT})
161      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
162                     MVT::v2f64})
163        setOperationAction(Op, T, Expand);
164
165    // Expand integer operations supported for scalars but not SIMD
166    for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
167                    ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
168      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
169        setOperationAction(Op, T, Expand);
170
171    // But we do have integer min and max operations
172    for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
173      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
174        setOperationAction(Op, T, Legal);
175
176    // Expand float operations supported for scalars but not SIMD
177    for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
178                    ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
179                    ISD::FEXP, ISD::FEXP2, ISD::FRINT})
180      for (auto T : {MVT::v4f32, MVT::v2f64})
181        setOperationAction(Op, T, Expand);
182
183    // Expand operations not supported for i64x2 vectors
184    for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
185      setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
186
187    // 64x2 conversions are not in the spec
188    for (auto Op :
189         {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
190      for (auto T : {MVT::v2i64, MVT::v2f64})
191        setOperationAction(Op, T, Expand);
192  }
193
194  // As a special case, these operators use the type to mean the type to
195  // sign-extend from.
196  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
197  if (!Subtarget->hasSignExt()) {
198    // Sign extends are legal only when extending a vector extract
199    auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
200    for (auto T : {MVT::i8, MVT::i16, MVT::i32})
201      setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
202  }
203  for (auto T : MVT::integer_fixedlen_vector_valuetypes())
204    setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
205
206  // Dynamic stack allocation: use the default expansion.
207  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
208  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
209  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
210
211  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
212  setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
213  setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
214
215  // Expand these forms; we pattern-match the forms that we can handle in isel.
216  for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
217    for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
218      setOperationAction(Op, T, Expand);
219
220  // We have custom switch handling.
221  setOperationAction(ISD::BR_JT, MVT::Other, Custom);
222
223  // WebAssembly doesn't have:
224  //  - Floating-point extending loads.
225  //  - Floating-point truncating stores.
226  //  - i1 extending loads.
227  //  - truncating SIMD stores and most extending loads
228  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
229  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
230  for (auto T : MVT::integer_valuetypes())
231    for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
232      setLoadExtAction(Ext, T, MVT::i1, Promote);
233  if (Subtarget->hasSIMD128()) {
234    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
235                   MVT::v2f64}) {
236      for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
237        if (MVT(T) != MemT) {
238          setTruncStoreAction(T, MemT, Expand);
239          for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
240            setLoadExtAction(Ext, T, MemT, Expand);
241        }
242      }
243    }
244    // But some vector extending loads are legal
245    for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
246      setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
247      setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
248      setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
249    }
250  }
251
252  // Don't do anything clever with build_pairs
253  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
254
255  // Trap lowers to wasm unreachable
256  setOperationAction(ISD::TRAP, MVT::Other, Legal);
257  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
258
259  // Exception handling intrinsics
260  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
261  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
262
263  setMaxAtomicSizeInBitsSupported(64);
264
265  // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
266  // consistent with the f64 and f128 names.
267  setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
268  setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
269
270  // Define the emscripten name for return address helper.
271  // TODO: when implementing other WASM backends, make this generic or only do
272  // this on emscripten depending on what they end up doing.
273  setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
274
275  // Always convert switches to br_tables unless there is only one case, which
276  // is equivalent to a simple branch. This reduces code size for wasm, and we
277  // defer possible jump table optimizations to the VM.
278  setMinimumJumpTableEntries(2);
279}
280
281TargetLowering::AtomicExpansionKind
282WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
283  // We have wasm instructions for these
284  switch (AI->getOperation()) {
285  case AtomicRMWInst::Add:
286  case AtomicRMWInst::Sub:
287  case AtomicRMWInst::And:
288  case AtomicRMWInst::Or:
289  case AtomicRMWInst::Xor:
290  case AtomicRMWInst::Xchg:
291    return AtomicExpansionKind::None;
292  default:
293    break;
294  }
295  return AtomicExpansionKind::CmpXChg;
296}
297
298FastISel *WebAssemblyTargetLowering::createFastISel(
299    FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
300  return WebAssembly::createFastISel(FuncInfo, LibInfo);
301}
302
303MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
304                                                      EVT VT) const {
305  unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
306  if (BitWidth > 1 && BitWidth < 8)
307    BitWidth = 8;
308
309  if (BitWidth > 64) {
310    // The shift will be lowered to a libcall, and compiler-rt libcalls expect
311    // the count to be an i32.
312    BitWidth = 32;
313    assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
314           "32-bit shift counts ought to be enough for anyone");
315  }
316
317  MVT Result = MVT::getIntegerVT(BitWidth);
318  assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
319         "Unable to represent scalar shift amount type");
320  return Result;
321}
322
323// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
324// undefined result on invalid/overflow, to the WebAssembly opcode, which
325// traps on invalid/overflow.
326static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
327                                       MachineBasicBlock *BB,
328                                       const TargetInstrInfo &TII,
329                                       bool IsUnsigned, bool Int64,
330                                       bool Float64, unsigned LoweredOpcode) {
331  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
332
333  Register OutReg = MI.getOperand(0).getReg();
334  Register InReg = MI.getOperand(1).getReg();
335
336  unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
337  unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
338  unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
339  unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
340  unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
341  unsigned Eqz = WebAssembly::EQZ_I32;
342  unsigned And = WebAssembly::AND_I32;
343  int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
344  int64_t Substitute = IsUnsigned ? 0 : Limit;
345  double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
346  auto &Context = BB->getParent()->getFunction().getContext();
347  Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
348
349  const BasicBlock *LLVMBB = BB->getBasicBlock();
350  MachineFunction *F = BB->getParent();
351  MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
352  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
353  MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
354
355  MachineFunction::iterator It = ++BB->getIterator();
356  F->insert(It, FalseMBB);
357  F->insert(It, TrueMBB);
358  F->insert(It, DoneMBB);
359
360  // Transfer the remainder of BB and its successor edges to DoneMBB.
361  DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
362  DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
363
364  BB->addSuccessor(TrueMBB);
365  BB->addSuccessor(FalseMBB);
366  TrueMBB->addSuccessor(DoneMBB);
367  FalseMBB->addSuccessor(DoneMBB);
368
369  unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
370  Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
371  Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
372  CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
373  EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
374  FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
375  TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
376
377  MI.eraseFromParent();
378  // For signed numbers, we can do a single comparison to determine whether
379  // fabs(x) is within range.
380  if (IsUnsigned) {
381    Tmp0 = InReg;
382  } else {
383    BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
384  }
385  BuildMI(BB, DL, TII.get(FConst), Tmp1)
386      .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
387  BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
388
389  // For unsigned numbers, we have to do a separate comparison with zero.
390  if (IsUnsigned) {
391    Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
392    Register SecondCmpReg =
393        MRI.createVirtualRegister(&WebAssembly::I32RegClass);
394    Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
395    BuildMI(BB, DL, TII.get(FConst), Tmp1)
396        .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
397    BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
398    BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
399    CmpReg = AndReg;
400  }
401
402  BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
403
404  // Create the CFG diamond to select between doing the conversion or using
405  // the substitute value.
406  BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
407  BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
408  BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
409  BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
410  BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
411      .addReg(FalseReg)
412      .addMBB(FalseMBB)
413      .addReg(TrueReg)
414      .addMBB(TrueMBB);
415
416  return DoneMBB;
417}
418
419static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults,
420                                           DebugLoc DL, MachineBasicBlock *BB,
421                                           const TargetInstrInfo &TII) {
422  MachineInstr &CallParams = *CallResults.getPrevNode();
423  assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
424  assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
425         CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
426
427  bool IsIndirect = CallParams.getOperand(0).isReg();
428  bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
429
430  unsigned CallOp;
431  if (IsIndirect && IsRetCall) {
432    CallOp = WebAssembly::RET_CALL_INDIRECT;
433  } else if (IsIndirect) {
434    CallOp = WebAssembly::CALL_INDIRECT;
435  } else if (IsRetCall) {
436    CallOp = WebAssembly::RET_CALL;
437  } else {
438    CallOp = WebAssembly::CALL;
439  }
440
441  MachineFunction &MF = *BB->getParent();
442  const MCInstrDesc &MCID = TII.get(CallOp);
443  MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
444
445  // Move the function pointer to the end of the arguments for indirect calls
446  if (IsIndirect) {
447    auto FnPtr = CallParams.getOperand(0);
448    CallParams.RemoveOperand(0);
449    CallParams.addOperand(FnPtr);
450  }
451
452  for (auto Def : CallResults.defs())
453    MIB.add(Def);
454
455  // Add placeholders for the type index and immediate flags
456  if (IsIndirect) {
457    MIB.addImm(0);
458    MIB.addImm(0);
459  }
460
461  for (auto Use : CallParams.uses())
462    MIB.add(Use);
463
464  BB->insert(CallResults.getIterator(), MIB);
465  CallParams.eraseFromParent();
466  CallResults.eraseFromParent();
467
468  return BB;
469}
470
471MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
472    MachineInstr &MI, MachineBasicBlock *BB) const {
473  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
474  DebugLoc DL = MI.getDebugLoc();
475
476  switch (MI.getOpcode()) {
477  default:
478    llvm_unreachable("Unexpected instr type to insert");
479  case WebAssembly::FP_TO_SINT_I32_F32:
480    return LowerFPToInt(MI, DL, BB, TII, false, false, false,
481                        WebAssembly::I32_TRUNC_S_F32);
482  case WebAssembly::FP_TO_UINT_I32_F32:
483    return LowerFPToInt(MI, DL, BB, TII, true, false, false,
484                        WebAssembly::I32_TRUNC_U_F32);
485  case WebAssembly::FP_TO_SINT_I64_F32:
486    return LowerFPToInt(MI, DL, BB, TII, false, true, false,
487                        WebAssembly::I64_TRUNC_S_F32);
488  case WebAssembly::FP_TO_UINT_I64_F32:
489    return LowerFPToInt(MI, DL, BB, TII, true, true, false,
490                        WebAssembly::I64_TRUNC_U_F32);
491  case WebAssembly::FP_TO_SINT_I32_F64:
492    return LowerFPToInt(MI, DL, BB, TII, false, false, true,
493                        WebAssembly::I32_TRUNC_S_F64);
494  case WebAssembly::FP_TO_UINT_I32_F64:
495    return LowerFPToInt(MI, DL, BB, TII, true, false, true,
496                        WebAssembly::I32_TRUNC_U_F64);
497  case WebAssembly::FP_TO_SINT_I64_F64:
498    return LowerFPToInt(MI, DL, BB, TII, false, true, true,
499                        WebAssembly::I64_TRUNC_S_F64);
500  case WebAssembly::FP_TO_UINT_I64_F64:
501    return LowerFPToInt(MI, DL, BB, TII, true, true, true,
502                        WebAssembly::I64_TRUNC_U_F64);
503  case WebAssembly::CALL_RESULTS:
504  case WebAssembly::RET_CALL_RESULTS:
505    return LowerCallResults(MI, DL, BB, TII);
506  }
507}
508
509const char *
510WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
511  switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
512  case WebAssemblyISD::FIRST_NUMBER:
513  case WebAssemblyISD::FIRST_MEM_OPCODE:
514    break;
515#define HANDLE_NODETYPE(NODE)                                                  \
516  case WebAssemblyISD::NODE:                                                   \
517    return "WebAssemblyISD::" #NODE;
518#define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
519#include "WebAssemblyISD.def"
520#undef HANDLE_MEM_NODETYPE
521#undef HANDLE_NODETYPE
522  }
523  return nullptr;
524}
525
526std::pair<unsigned, const TargetRegisterClass *>
527WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
528    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
529  // First, see if this is a constraint that directly corresponds to a
530  // WebAssembly register class.
531  if (Constraint.size() == 1) {
532    switch (Constraint[0]) {
533    case 'r':
534      assert(VT != MVT::iPTR && "Pointer MVT not expected here");
535      if (Subtarget->hasSIMD128() && VT.isVector()) {
536        if (VT.getSizeInBits() == 128)
537          return std::make_pair(0U, &WebAssembly::V128RegClass);
538      }
539      if (VT.isInteger() && !VT.isVector()) {
540        if (VT.getSizeInBits() <= 32)
541          return std::make_pair(0U, &WebAssembly::I32RegClass);
542        if (VT.getSizeInBits() <= 64)
543          return std::make_pair(0U, &WebAssembly::I64RegClass);
544      }
545      break;
546    default:
547      break;
548    }
549  }
550
551  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
552}
553
554bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
555  // Assume ctz is a relatively cheap operation.
556  return true;
557}
558
559bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
560  // Assume clz is a relatively cheap operation.
561  return true;
562}
563
564bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
565                                                      const AddrMode &AM,
566                                                      Type *Ty, unsigned AS,
567                                                      Instruction *I) const {
568  // WebAssembly offsets are added as unsigned without wrapping. The
569  // isLegalAddressingMode gives us no way to determine if wrapping could be
570  // happening, so we approximate this by accepting only non-negative offsets.
571  if (AM.BaseOffs < 0)
572    return false;
573
574  // WebAssembly has no scale register operands.
575  if (AM.Scale != 0)
576    return false;
577
578  // Everything else is legal.
579  return true;
580}
581
582bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
583    EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
584    MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
585  // WebAssembly supports unaligned accesses, though it should be declared
586  // with the p2align attribute on loads and stores which do so, and there
587  // may be a performance impact. We tell LLVM they're "fast" because
588  // for the kinds of things that LLVM uses this for (merging adjacent stores
589  // of constants, etc.), WebAssembly implementations will either want the
590  // unaligned access or they'll split anyway.
591  if (Fast)
592    *Fast = true;
593  return true;
594}
595
596bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
597                                              AttributeList Attr) const {
598  // The current thinking is that wasm engines will perform this optimization,
599  // so we can save on code size.
600  return true;
601}
602
603bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
604  EVT ExtT = ExtVal.getValueType();
605  EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
606  return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
607         (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
608         (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
609}
610
611EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
612                                                  LLVMContext &C,
613                                                  EVT VT) const {
614  if (VT.isVector())
615    return VT.changeVectorElementTypeToInteger();
616
617  // So far, all branch instructions in Wasm take an I32 condition.
618  // The default TargetLowering::getSetCCResultType returns the pointer size,
619  // which would be useful to reduce instruction counts when testing
620  // against 64-bit pointers/values if at some point Wasm supports that.
621  return EVT::getIntegerVT(C, 32);
622}
623
624bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
625                                                   const CallInst &I,
626                                                   MachineFunction &MF,
627                                                   unsigned Intrinsic) const {
628  switch (Intrinsic) {
629  case Intrinsic::wasm_atomic_notify:
630    Info.opc = ISD::INTRINSIC_W_CHAIN;
631    Info.memVT = MVT::i32;
632    Info.ptrVal = I.getArgOperand(0);
633    Info.offset = 0;
634    Info.align = Align(4);
635    // atomic.notify instruction does not really load the memory specified with
636    // this argument, but MachineMemOperand should either be load or store, so
637    // we set this to a load.
638    // FIXME Volatile isn't really correct, but currently all LLVM atomic
639    // instructions are treated as volatiles in the backend, so we should be
640    // consistent. The same applies for wasm_atomic_wait intrinsics too.
641    Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
642    return true;
643  case Intrinsic::wasm_atomic_wait_i32:
644    Info.opc = ISD::INTRINSIC_W_CHAIN;
645    Info.memVT = MVT::i32;
646    Info.ptrVal = I.getArgOperand(0);
647    Info.offset = 0;
648    Info.align = Align(4);
649    Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
650    return true;
651  case Intrinsic::wasm_atomic_wait_i64:
652    Info.opc = ISD::INTRINSIC_W_CHAIN;
653    Info.memVT = MVT::i64;
654    Info.ptrVal = I.getArgOperand(0);
655    Info.offset = 0;
656    Info.align = Align(8);
657    Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
658    return true;
659  default:
660    return false;
661  }
662}
663
664//===----------------------------------------------------------------------===//
665// WebAssembly Lowering private implementation.
666//===----------------------------------------------------------------------===//
667
668//===----------------------------------------------------------------------===//
669// Lowering Code
670//===----------------------------------------------------------------------===//
671
672static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
673  MachineFunction &MF = DAG.getMachineFunction();
674  DAG.getContext()->diagnose(
675      DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
676}
677
678// Test whether the given calling convention is supported.
679static bool callingConvSupported(CallingConv::ID CallConv) {
680  // We currently support the language-independent target-independent
681  // conventions. We don't yet have a way to annotate calls with properties like
682  // "cold", and we don't have any call-clobbered registers, so these are mostly
683  // all handled the same.
684  return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
685         CallConv == CallingConv::Cold ||
686         CallConv == CallingConv::PreserveMost ||
687         CallConv == CallingConv::PreserveAll ||
688         CallConv == CallingConv::CXX_FAST_TLS ||
689         CallConv == CallingConv::WASM_EmscriptenInvoke ||
690         CallConv == CallingConv::Swift;
691}
692
693SDValue
694WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
695                                     SmallVectorImpl<SDValue> &InVals) const {
696  SelectionDAG &DAG = CLI.DAG;
697  SDLoc DL = CLI.DL;
698  SDValue Chain = CLI.Chain;
699  SDValue Callee = CLI.Callee;
700  MachineFunction &MF = DAG.getMachineFunction();
701  auto Layout = MF.getDataLayout();
702
703  CallingConv::ID CallConv = CLI.CallConv;
704  if (!callingConvSupported(CallConv))
705    fail(DL, DAG,
706         "WebAssembly doesn't support language-specific or target-specific "
707         "calling conventions yet");
708  if (CLI.IsPatchPoint)
709    fail(DL, DAG, "WebAssembly doesn't support patch point yet");
710
711  if (CLI.IsTailCall) {
712    auto NoTail = [&](const char *Msg) {
713      if (CLI.CB && CLI.CB->isMustTailCall())
714        fail(DL, DAG, Msg);
715      CLI.IsTailCall = false;
716    };
717
718    if (!Subtarget->hasTailCall())
719      NoTail("WebAssembly 'tail-call' feature not enabled");
720
721    // Varargs calls cannot be tail calls because the buffer is on the stack
722    if (CLI.IsVarArg)
723      NoTail("WebAssembly does not support varargs tail calls");
724
725    // Do not tail call unless caller and callee return types match
726    const Function &F = MF.getFunction();
727    const TargetMachine &TM = getTargetMachine();
728    Type *RetTy = F.getReturnType();
729    SmallVector<MVT, 4> CallerRetTys;
730    SmallVector<MVT, 4> CalleeRetTys;
731    computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
732    computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
733    bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
734                      std::equal(CallerRetTys.begin(), CallerRetTys.end(),
735                                 CalleeRetTys.begin());
736    if (!TypesMatch)
737      NoTail("WebAssembly tail call requires caller and callee return types to "
738             "match");
739
740    // If pointers to local stack values are passed, we cannot tail call
741    if (CLI.CB) {
742      for (auto &Arg : CLI.CB->args()) {
743        Value *Val = Arg.get();
744        // Trace the value back through pointer operations
745        while (true) {
746          Value *Src = Val->stripPointerCastsAndAliases();
747          if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
748            Src = GEP->getPointerOperand();
749          if (Val == Src)
750            break;
751          Val = Src;
752        }
753        if (isa<AllocaInst>(Val)) {
754          NoTail(
755              "WebAssembly does not support tail calling with stack arguments");
756          break;
757        }
758      }
759    }
760  }
761
762  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
763  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
764  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
765
766  // The generic code may have added an sret argument. If we're lowering an
767  // invoke function, the ABI requires that the function pointer be the first
768  // argument, so we may have to swap the arguments.
769  if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
770      Outs[0].Flags.isSRet()) {
771    std::swap(Outs[0], Outs[1]);
772    std::swap(OutVals[0], OutVals[1]);
773  }
774
775  bool HasSwiftSelfArg = false;
776  bool HasSwiftErrorArg = false;
777  unsigned NumFixedArgs = 0;
778  for (unsigned I = 0; I < Outs.size(); ++I) {
779    const ISD::OutputArg &Out = Outs[I];
780    SDValue &OutVal = OutVals[I];
781    HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
782    HasSwiftErrorArg |= Out.Flags.isSwiftError();
783    if (Out.Flags.isNest())
784      fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
785    if (Out.Flags.isInAlloca())
786      fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
787    if (Out.Flags.isInConsecutiveRegs())
788      fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
789    if (Out.Flags.isInConsecutiveRegsLast())
790      fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
791    if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
792      auto &MFI = MF.getFrameInfo();
793      int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
794                                     Out.Flags.getNonZeroByValAlign(),
795                                     /*isSS=*/false);
796      SDValue SizeNode =
797          DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
798      SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
799      Chain = DAG.getMemcpy(
800          Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(),
801          /*isVolatile*/ false, /*AlwaysInline=*/false,
802          /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
803      OutVal = FINode;
804    }
805    // Count the number of fixed args *after* legalization.
806    NumFixedArgs += Out.IsFixed;
807  }
808
809  bool IsVarArg = CLI.IsVarArg;
810  auto PtrVT = getPointerTy(Layout);
811
812  // For swiftcc, emit additional swiftself and swifterror arguments
813  // if there aren't. These additional arguments are also added for callee
814  // signature They are necessary to match callee and caller signature for
815  // indirect call.
816  if (CallConv == CallingConv::Swift) {
817    if (!HasSwiftSelfArg) {
818      NumFixedArgs++;
819      ISD::OutputArg Arg;
820      Arg.Flags.setSwiftSelf();
821      CLI.Outs.push_back(Arg);
822      SDValue ArgVal = DAG.getUNDEF(PtrVT);
823      CLI.OutVals.push_back(ArgVal);
824    }
825    if (!HasSwiftErrorArg) {
826      NumFixedArgs++;
827      ISD::OutputArg Arg;
828      Arg.Flags.setSwiftError();
829      CLI.Outs.push_back(Arg);
830      SDValue ArgVal = DAG.getUNDEF(PtrVT);
831      CLI.OutVals.push_back(ArgVal);
832    }
833  }
834
835  // Analyze operands of the call, assigning locations to each operand.
836  SmallVector<CCValAssign, 16> ArgLocs;
837  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
838
839  if (IsVarArg) {
840    // Outgoing non-fixed arguments are placed in a buffer. First
841    // compute their offsets and the total amount of buffer space needed.
842    for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
843      const ISD::OutputArg &Out = Outs[I];
844      SDValue &Arg = OutVals[I];
845      EVT VT = Arg.getValueType();
846      assert(VT != MVT::iPTR && "Legalized args should be concrete");
847      Type *Ty = VT.getTypeForEVT(*DAG.getContext());
848      Align Alignment =
849          std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
850      unsigned Offset =
851          CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
852      CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
853                                        Offset, VT.getSimpleVT(),
854                                        CCValAssign::Full));
855    }
856  }
857
858  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
859
860  SDValue FINode;
861  if (IsVarArg && NumBytes) {
862    // For non-fixed arguments, next emit stores to store the argument values
863    // to the stack buffer at the offsets computed above.
864    int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
865                                                 Layout.getStackAlignment(),
866                                                 /*isSS=*/false);
867    unsigned ValNo = 0;
868    SmallVector<SDValue, 8> Chains;
869    for (SDValue Arg :
870         make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
871      assert(ArgLocs[ValNo].getValNo() == ValNo &&
872             "ArgLocs should remain in order and only hold varargs args");
873      unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
874      FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
875      SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
876                                DAG.getConstant(Offset, DL, PtrVT));
877      Chains.push_back(
878          DAG.getStore(Chain, DL, Arg, Add,
879                       MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
880    }
881    if (!Chains.empty())
882      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
883  } else if (IsVarArg) {
884    FINode = DAG.getIntPtrConstant(0, DL);
885  }
886
887  if (Callee->getOpcode() == ISD::GlobalAddress) {
888    // If the callee is a GlobalAddress node (quite common, every direct call
889    // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
890    // doesn't at MO_GOT which is not needed for direct calls.
891    GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
892    Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
893                                        getPointerTy(DAG.getDataLayout()),
894                                        GA->getOffset());
895    Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
896                         getPointerTy(DAG.getDataLayout()), Callee);
897  }
898
899  // Compute the operands for the CALLn node.
900  SmallVector<SDValue, 16> Ops;
901  Ops.push_back(Chain);
902  Ops.push_back(Callee);
903
904  // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
905  // isn't reliable.
906  Ops.append(OutVals.begin(),
907             IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
908  // Add a pointer to the vararg buffer.
909  if (IsVarArg)
910    Ops.push_back(FINode);
911
912  SmallVector<EVT, 8> InTys;
913  for (const auto &In : Ins) {
914    assert(!In.Flags.isByVal() && "byval is not valid for return values");
915    assert(!In.Flags.isNest() && "nest is not valid for return values");
916    if (In.Flags.isInAlloca())
917      fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
918    if (In.Flags.isInConsecutiveRegs())
919      fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
920    if (In.Flags.isInConsecutiveRegsLast())
921      fail(DL, DAG,
922           "WebAssembly hasn't implemented cons regs last return values");
923    // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
924    // registers.
925    InTys.push_back(In.VT);
926  }
927
928  if (CLI.IsTailCall) {
929    // ret_calls do not return values to the current frame
930    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
931    return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
932  }
933
934  InTys.push_back(MVT::Other);
935  SDVTList InTyList = DAG.getVTList(InTys);
936  SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
937
938  for (size_t I = 0; I < Ins.size(); ++I)
939    InVals.push_back(Res.getValue(I));
940
941  // Return the chain
942  return Res.getValue(Ins.size());
943}
944
945bool WebAssemblyTargetLowering::CanLowerReturn(
946    CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
947    const SmallVectorImpl<ISD::OutputArg> &Outs,
948    LLVMContext & /*Context*/) const {
949  // WebAssembly can only handle returning tuples with multivalue enabled
950  return Subtarget->hasMultivalue() || Outs.size() <= 1;
951}
952
953SDValue WebAssemblyTargetLowering::LowerReturn(
954    SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
955    const SmallVectorImpl<ISD::OutputArg> &Outs,
956    const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
957    SelectionDAG &DAG) const {
958  assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
959         "MVP WebAssembly can only return up to one value");
960  if (!callingConvSupported(CallConv))
961    fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
962
963  SmallVector<SDValue, 4> RetOps(1, Chain);
964  RetOps.append(OutVals.begin(), OutVals.end());
965  Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
966
967  // Record the number and types of the return values.
968  for (const ISD::OutputArg &Out : Outs) {
969    assert(!Out.Flags.isByVal() && "byval is not valid for return values");
970    assert(!Out.Flags.isNest() && "nest is not valid for return values");
971    assert(Out.IsFixed && "non-fixed return value is not valid");
972    if (Out.Flags.isInAlloca())
973      fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
974    if (Out.Flags.isInConsecutiveRegs())
975      fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
976    if (Out.Flags.isInConsecutiveRegsLast())
977      fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
978  }
979
980  return Chain;
981}
982
983SDValue WebAssemblyTargetLowering::LowerFormalArguments(
984    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
985    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
986    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
987  if (!callingConvSupported(CallConv))
988    fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
989
990  MachineFunction &MF = DAG.getMachineFunction();
991  auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
992
993  // Set up the incoming ARGUMENTS value, which serves to represent the liveness
994  // of the incoming values before they're represented by virtual registers.
995  MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
996
997  bool HasSwiftErrorArg = false;
998  bool HasSwiftSelfArg = false;
999  for (const ISD::InputArg &In : Ins) {
1000    HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1001    HasSwiftErrorArg |= In.Flags.isSwiftError();
1002    if (In.Flags.isInAlloca())
1003      fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1004    if (In.Flags.isNest())
1005      fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1006    if (In.Flags.isInConsecutiveRegs())
1007      fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1008    if (In.Flags.isInConsecutiveRegsLast())
1009      fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1010    // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1011    // registers.
1012    InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1013                                           DAG.getTargetConstant(InVals.size(),
1014                                                                 DL, MVT::i32))
1015                             : DAG.getUNDEF(In.VT));
1016
1017    // Record the number and types of arguments.
1018    MFI->addParam(In.VT);
1019  }
1020
1021  // For swiftcc, emit additional swiftself and swifterror arguments
1022  // if there aren't. These additional arguments are also added for callee
1023  // signature They are necessary to match callee and caller signature for
1024  // indirect call.
1025  auto PtrVT = getPointerTy(MF.getDataLayout());
1026  if (CallConv == CallingConv::Swift) {
1027    if (!HasSwiftSelfArg) {
1028      MFI->addParam(PtrVT);
1029    }
1030    if (!HasSwiftErrorArg) {
1031      MFI->addParam(PtrVT);
1032    }
1033  }
1034  // Varargs are copied into a buffer allocated by the caller, and a pointer to
1035  // the buffer is passed as an argument.
1036  if (IsVarArg) {
1037    MVT PtrVT = getPointerTy(MF.getDataLayout());
1038    Register VarargVreg =
1039        MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
1040    MFI->setVarargBufferVreg(VarargVreg);
1041    Chain = DAG.getCopyToReg(
1042        Chain, DL, VarargVreg,
1043        DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1044                    DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1045    MFI->addParam(PtrVT);
1046  }
1047
1048  // Record the number and types of arguments and results.
1049  SmallVector<MVT, 4> Params;
1050  SmallVector<MVT, 4> Results;
1051  computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),
1052                      MF.getFunction(), DAG.getTarget(), Params, Results);
1053  for (MVT VT : Results)
1054    MFI->addResult(VT);
1055  // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1056  // the param logic here with ComputeSignatureVTs
1057  assert(MFI->getParams().size() == Params.size() &&
1058         std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1059                    Params.begin()));
1060
1061  return Chain;
1062}
1063
1064void WebAssemblyTargetLowering::ReplaceNodeResults(
1065    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1066  switch (N->getOpcode()) {
1067  case ISD::SIGN_EXTEND_INREG:
1068    // Do not add any results, signifying that N should not be custom lowered
1069    // after all. This happens because simd128 turns on custom lowering for
1070    // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1071    // illegal type.
1072    break;
1073  default:
1074    llvm_unreachable(
1075        "ReplaceNodeResults not implemented for this op for WebAssembly!");
1076  }
1077}
1078
1079//===----------------------------------------------------------------------===//
1080//  Custom lowering hooks.
1081//===----------------------------------------------------------------------===//
1082
1083SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1084                                                  SelectionDAG &DAG) const {
1085  SDLoc DL(Op);
1086  switch (Op.getOpcode()) {
1087  default:
1088    llvm_unreachable("unimplemented operation lowering");
1089    return SDValue();
1090  case ISD::FrameIndex:
1091    return LowerFrameIndex(Op, DAG);
1092  case ISD::GlobalAddress:
1093    return LowerGlobalAddress(Op, DAG);
1094  case ISD::ExternalSymbol:
1095    return LowerExternalSymbol(Op, DAG);
1096  case ISD::JumpTable:
1097    return LowerJumpTable(Op, DAG);
1098  case ISD::BR_JT:
1099    return LowerBR_JT(Op, DAG);
1100  case ISD::VASTART:
1101    return LowerVASTART(Op, DAG);
1102  case ISD::BlockAddress:
1103  case ISD::BRIND:
1104    fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1105    return SDValue();
1106  case ISD::RETURNADDR:
1107    return LowerRETURNADDR(Op, DAG);
1108  case ISD::FRAMEADDR:
1109    return LowerFRAMEADDR(Op, DAG);
1110  case ISD::CopyToReg:
1111    return LowerCopyToReg(Op, DAG);
1112  case ISD::EXTRACT_VECTOR_ELT:
1113  case ISD::INSERT_VECTOR_ELT:
1114    return LowerAccessVectorElement(Op, DAG);
1115  case ISD::INTRINSIC_VOID:
1116  case ISD::INTRINSIC_WO_CHAIN:
1117  case ISD::INTRINSIC_W_CHAIN:
1118    return LowerIntrinsic(Op, DAG);
1119  case ISD::SIGN_EXTEND_INREG:
1120    return LowerSIGN_EXTEND_INREG(Op, DAG);
1121  case ISD::BUILD_VECTOR:
1122    return LowerBUILD_VECTOR(Op, DAG);
1123  case ISD::VECTOR_SHUFFLE:
1124    return LowerVECTOR_SHUFFLE(Op, DAG);
1125  case ISD::SETCC:
1126    return LowerSETCC(Op, DAG);
1127  case ISD::SHL:
1128  case ISD::SRA:
1129  case ISD::SRL:
1130    return LowerShift(Op, DAG);
1131  }
1132}
1133
1134SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1135                                                  SelectionDAG &DAG) const {
1136  SDValue Src = Op.getOperand(2);
1137  if (isa<FrameIndexSDNode>(Src.getNode())) {
1138    // CopyToReg nodes don't support FrameIndex operands. Other targets select
1139    // the FI to some LEA-like instruction, but since we don't have that, we
1140    // need to insert some kind of instruction that can take an FI operand and
1141    // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1142    // local.copy between Op and its FI operand.
1143    SDValue Chain = Op.getOperand(0);
1144    SDLoc DL(Op);
1145    unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1146    EVT VT = Src.getValueType();
1147    SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1148                                                   : WebAssembly::COPY_I64,
1149                                    DL, VT, Src),
1150                 0);
1151    return Op.getNode()->getNumValues() == 1
1152               ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1153               : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1154                                  Op.getNumOperands() == 4 ? Op.getOperand(3)
1155                                                           : SDValue());
1156  }
1157  return SDValue();
1158}
1159
1160SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1161                                                   SelectionDAG &DAG) const {
1162  int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1163  return DAG.getTargetFrameIndex(FI, Op.getValueType());
1164}
1165
1166SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1167                                                   SelectionDAG &DAG) const {
1168  SDLoc DL(Op);
1169
1170  if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1171    fail(DL, DAG,
1172         "Non-Emscripten WebAssembly hasn't implemented "
1173         "__builtin_return_address");
1174    return SDValue();
1175  }
1176
1177  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1178    return SDValue();
1179
1180  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1181  MakeLibCallOptions CallOptions;
1182  return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1183                     {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1184      .first;
1185}
1186
1187SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1188                                                  SelectionDAG &DAG) const {
1189  // Non-zero depths are not supported by WebAssembly currently. Use the
1190  // legalizer's default expansion, which is to return 0 (what this function is
1191  // documented to do).
1192  if (Op.getConstantOperandVal(0) > 0)
1193    return SDValue();
1194
1195  DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1196  EVT VT = Op.getValueType();
1197  Register FP =
1198      Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1199  return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1200}
1201
1202SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1203                                                      SelectionDAG &DAG) const {
1204  SDLoc DL(Op);
1205  const auto *GA = cast<GlobalAddressSDNode>(Op);
1206  EVT VT = Op.getValueType();
1207  assert(GA->getTargetFlags() == 0 &&
1208         "Unexpected target flags on generic GlobalAddressSDNode");
1209  if (GA->getAddressSpace() != 0)
1210    fail(DL, DAG, "WebAssembly only expects the 0 address space");
1211
1212  unsigned OperandFlags = 0;
1213  if (isPositionIndependent()) {
1214    const GlobalValue *GV = GA->getGlobal();
1215    if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
1216      MachineFunction &MF = DAG.getMachineFunction();
1217      MVT PtrVT = getPointerTy(MF.getDataLayout());
1218      const char *BaseName;
1219      if (GV->getValueType()->isFunctionTy()) {
1220        BaseName = MF.createExternalSymbolName("__table_base");
1221        OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1222      }
1223      else {
1224        BaseName = MF.createExternalSymbolName("__memory_base");
1225        OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1226      }
1227      SDValue BaseAddr =
1228          DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1229                      DAG.getTargetExternalSymbol(BaseName, PtrVT));
1230
1231      SDValue SymAddr = DAG.getNode(
1232          WebAssemblyISD::WrapperPIC, DL, VT,
1233          DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
1234                                     OperandFlags));
1235
1236      return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
1237    } else {
1238      OperandFlags = WebAssemblyII::MO_GOT;
1239    }
1240  }
1241
1242  return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1243                     DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1244                                                GA->getOffset(), OperandFlags));
1245}
1246
1247SDValue
1248WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1249                                               SelectionDAG &DAG) const {
1250  SDLoc DL(Op);
1251  const auto *ES = cast<ExternalSymbolSDNode>(Op);
1252  EVT VT = Op.getValueType();
1253  assert(ES->getTargetFlags() == 0 &&
1254         "Unexpected target flags on generic ExternalSymbolSDNode");
1255  return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1256                     DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
1257}
1258
1259SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1260                                                  SelectionDAG &DAG) const {
1261  // There's no need for a Wrapper node because we always incorporate a jump
1262  // table operand into a BR_TABLE instruction, rather than ever
1263  // materializing it in a register.
1264  const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1265  return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
1266                                JT->getTargetFlags());
1267}
1268
1269SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1270                                              SelectionDAG &DAG) const {
1271  SDLoc DL(Op);
1272  SDValue Chain = Op.getOperand(0);
1273  const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1274  SDValue Index = Op.getOperand(2);
1275  assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1276
1277  SmallVector<SDValue, 8> Ops;
1278  Ops.push_back(Chain);
1279  Ops.push_back(Index);
1280
1281  MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1282  const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1283
1284  // Add an operand for each case.
1285  for (auto MBB : MBBs)
1286    Ops.push_back(DAG.getBasicBlock(MBB));
1287
1288  // Add the first MBB as a dummy default target for now. This will be replaced
1289  // with the proper default target (and the preceding range check eliminated)
1290  // if possible by WebAssemblyFixBrTableDefaults.
1291  Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
1292  return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1293}
1294
1295SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1296                                                SelectionDAG &DAG) const {
1297  SDLoc DL(Op);
1298  EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1299
1300  auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1301  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1302
1303  SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1304                                    MFI->getVarargBufferVreg(), PtrVT);
1305  return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1306                      MachinePointerInfo(SV), 0);
1307}
1308
1309SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1310                                                  SelectionDAG &DAG) const {
1311  MachineFunction &MF = DAG.getMachineFunction();
1312  unsigned IntNo;
1313  switch (Op.getOpcode()) {
1314  case ISD::INTRINSIC_VOID:
1315  case ISD::INTRINSIC_W_CHAIN:
1316    IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1317    break;
1318  case ISD::INTRINSIC_WO_CHAIN:
1319    IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1320    break;
1321  default:
1322    llvm_unreachable("Invalid intrinsic");
1323  }
1324  SDLoc DL(Op);
1325
1326  switch (IntNo) {
1327  default:
1328    return SDValue(); // Don't custom lower most intrinsics.
1329
1330  case Intrinsic::wasm_lsda: {
1331    EVT VT = Op.getValueType();
1332    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1333    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1334    auto &Context = MF.getMMI().getContext();
1335    MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1336                                            Twine(MF.getFunctionNumber()));
1337    return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1338                       DAG.getMCSymbol(S, PtrVT));
1339  }
1340
1341  case Intrinsic::wasm_throw: {
1342    // We only support C++ exceptions for now
1343    int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1344    if (Tag != CPP_EXCEPTION)
1345      llvm_unreachable("Invalid tag!");
1346    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1347    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1348    const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1349    SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1350                                  DAG.getTargetExternalSymbol(SymName, PtrVT));
1351    return DAG.getNode(WebAssemblyISD::THROW, DL,
1352                       MVT::Other, // outchain type
1353                       {
1354                           Op.getOperand(0), // inchain
1355                           SymNode,          // exception symbol
1356                           Op.getOperand(3)  // thrown value
1357                       });
1358  }
1359
1360  case Intrinsic::wasm_shuffle: {
1361    // Drop in-chain and replace undefs, but otherwise pass through unchanged
1362    SDValue Ops[18];
1363    size_t OpIdx = 0;
1364    Ops[OpIdx++] = Op.getOperand(1);
1365    Ops[OpIdx++] = Op.getOperand(2);
1366    while (OpIdx < 18) {
1367      const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
1368      if (MaskIdx.isUndef() ||
1369          cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
1370        Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
1371      } else {
1372        Ops[OpIdx++] = MaskIdx;
1373      }
1374    }
1375    return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1376  }
1377  }
1378}
1379
1380SDValue
1381WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1382                                                  SelectionDAG &DAG) const {
1383  SDLoc DL(Op);
1384  // If sign extension operations are disabled, allow sext_inreg only if operand
1385  // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
1386  // extension operations, but allowing sext_inreg in this context lets us have
1387  // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
1388  // everywhere would be simpler in this file, but would necessitate large and
1389  // brittle patterns to undo the expansion and select extract_lane_s
1390  // instructions.
1391  assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1392  if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1393    return SDValue();
1394
1395  const SDValue &Extract = Op.getOperand(0);
1396  MVT VecT = Extract.getOperand(0).getSimpleValueType();
1397  if (VecT.getVectorElementType().getSizeInBits() > 32)
1398    return SDValue();
1399  MVT ExtractedLaneT =
1400      cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
1401  MVT ExtractedVecT =
1402      MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
1403  if (ExtractedVecT == VecT)
1404    return Op;
1405
1406  // Bitcast vector to appropriate type to ensure ISel pattern coverage
1407  const SDNode *Index = Extract.getOperand(1).getNode();
1408  if (!isa<ConstantSDNode>(Index))
1409    return SDValue();
1410  unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue();
1411  unsigned Scale =
1412      ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1413  assert(Scale > 1);
1414  SDValue NewIndex =
1415      DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
1416  SDValue NewExtract = DAG.getNode(
1417      ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
1418      DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
1419  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
1420                     Op.getOperand(1));
1421}
1422
1423SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1424                                                     SelectionDAG &DAG) const {
1425  SDLoc DL(Op);
1426  const EVT VecT = Op.getValueType();
1427  const EVT LaneT = Op.getOperand(0).getValueType();
1428  const size_t Lanes = Op.getNumOperands();
1429  bool CanSwizzle = VecT == MVT::v16i8;
1430
1431  // BUILD_VECTORs are lowered to the instruction that initializes the highest
1432  // possible number of lanes at once followed by a sequence of replace_lane
1433  // instructions to individually initialize any remaining lanes.
1434
1435  // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1436  // swizzled lanes should be given greater weight.
1437
1438  // TODO: Investigate building vectors by shuffling together vectors built by
1439  // separately specialized means.
1440
1441  auto IsConstant = [](const SDValue &V) {
1442    return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
1443  };
1444
1445  // Returns the source vector and index vector pair if they exist. Checks for:
1446  //   (extract_vector_elt
1447  //     $src,
1448  //     (sign_extend_inreg (extract_vector_elt $indices, $i))
1449  //   )
1450  auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1451    auto Bail = std::make_pair(SDValue(), SDValue());
1452    if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1453      return Bail;
1454    const SDValue &SwizzleSrc = Lane->getOperand(0);
1455    const SDValue &IndexExt = Lane->getOperand(1);
1456    if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1457      return Bail;
1458    const SDValue &Index = IndexExt->getOperand(0);
1459    if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1460      return Bail;
1461    const SDValue &SwizzleIndices = Index->getOperand(0);
1462    if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1463        SwizzleIndices.getValueType() != MVT::v16i8 ||
1464        Index->getOperand(1)->getOpcode() != ISD::Constant ||
1465        Index->getConstantOperandVal(1) != I)
1466      return Bail;
1467    return std::make_pair(SwizzleSrc, SwizzleIndices);
1468  };
1469
1470  using ValueEntry = std::pair<SDValue, size_t>;
1471  SmallVector<ValueEntry, 16> SplatValueCounts;
1472
1473  using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1474  SmallVector<SwizzleEntry, 16> SwizzleCounts;
1475
1476  auto AddCount = [](auto &Counts, const auto &Val) {
1477    auto CountIt = std::find_if(Counts.begin(), Counts.end(),
1478                                [&Val](auto E) { return E.first == Val; });
1479    if (CountIt == Counts.end()) {
1480      Counts.emplace_back(Val, 1);
1481    } else {
1482      CountIt->second++;
1483    }
1484  };
1485
1486  auto GetMostCommon = [](auto &Counts) {
1487    auto CommonIt =
1488        std::max_element(Counts.begin(), Counts.end(),
1489                         [](auto A, auto B) { return A.second < B.second; });
1490    assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1491    return *CommonIt;
1492  };
1493
1494  size_t NumConstantLanes = 0;
1495
1496  // Count eligible lanes for each type of vector creation op
1497  for (size_t I = 0; I < Lanes; ++I) {
1498    const SDValue &Lane = Op->getOperand(I);
1499    if (Lane.isUndef())
1500      continue;
1501
1502    AddCount(SplatValueCounts, Lane);
1503
1504    if (IsConstant(Lane)) {
1505      NumConstantLanes++;
1506    } else if (CanSwizzle) {
1507      auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1508      if (SwizzleSrcs.first)
1509        AddCount(SwizzleCounts, SwizzleSrcs);
1510    }
1511  }
1512
1513  SDValue SplatValue;
1514  size_t NumSplatLanes;
1515  std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1516
1517  SDValue SwizzleSrc;
1518  SDValue SwizzleIndices;
1519  size_t NumSwizzleLanes = 0;
1520  if (SwizzleCounts.size())
1521    std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1522                          NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1523
1524  // Predicate returning true if the lane is properly initialized by the
1525  // original instruction
1526  std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1527  SDValue Result;
1528  // Prefer swizzles over vector consts over splats
1529  if (NumSwizzleLanes >= NumSplatLanes &&
1530      (!Subtarget->hasUnimplementedSIMD128() ||
1531       NumSwizzleLanes >= NumConstantLanes)) {
1532    Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1533                         SwizzleIndices);
1534    auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1535    IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1536      return Swizzled == GetSwizzleSrcs(I, Lane);
1537    };
1538  } else if (NumConstantLanes >= NumSplatLanes &&
1539             Subtarget->hasUnimplementedSIMD128()) {
1540    SmallVector<SDValue, 16> ConstLanes;
1541    for (const SDValue &Lane : Op->op_values()) {
1542      if (IsConstant(Lane)) {
1543        ConstLanes.push_back(Lane);
1544      } else if (LaneT.isFloatingPoint()) {
1545        ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
1546      } else {
1547        ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
1548      }
1549    }
1550    Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1551    IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1552      return IsConstant(Lane);
1553    };
1554  }
1555  if (!Result) {
1556    // Use a splat, but possibly a load_splat
1557    LoadSDNode *SplattedLoad;
1558    if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1559        SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1560      Result = DAG.getMemIntrinsicNode(
1561          WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
1562          {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
1563           SplattedLoad->getOffset()},
1564          SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
1565    } else {
1566      Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1567    }
1568    IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1569      return Lane == SplatValue;
1570    };
1571  }
1572
1573  // Add replace_lane instructions for any unhandled values
1574  for (size_t I = 0; I < Lanes; ++I) {
1575    const SDValue &Lane = Op->getOperand(I);
1576    if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
1577      Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1578                           DAG.getConstant(I, DL, MVT::i32));
1579  }
1580
1581  return Result;
1582}
1583
1584SDValue
1585WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1586                                               SelectionDAG &DAG) const {
1587  SDLoc DL(Op);
1588  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1589  MVT VecType = Op.getOperand(0).getSimpleValueType();
1590  assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1591  size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1592
1593  // Space for two vector args and sixteen mask indices
1594  SDValue Ops[18];
1595  size_t OpIdx = 0;
1596  Ops[OpIdx++] = Op.getOperand(0);
1597  Ops[OpIdx++] = Op.getOperand(1);
1598
1599  // Expand mask indices to byte indices and materialize them as operands
1600  for (int M : Mask) {
1601    for (size_t J = 0; J < LaneBytes; ++J) {
1602      // Lower undefs (represented by -1 in mask) to zero
1603      uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
1604      Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1605    }
1606  }
1607
1608  return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1609}
1610
1611SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
1612                                              SelectionDAG &DAG) const {
1613  SDLoc DL(Op);
1614  // The legalizer does not know how to expand the comparison modes of i64x2
1615  // vectors because no comparison modes are supported. We could solve this by
1616  // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
1617  // (which return i64x2 results) as well. So instead we manually unroll i64x2
1618  // comparisons here.
1619  assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
1620  SmallVector<SDValue, 2> LHS, RHS;
1621  DAG.ExtractVectorElements(Op->getOperand(0), LHS);
1622  DAG.ExtractVectorElements(Op->getOperand(1), RHS);
1623  const SDValue &CC = Op->getOperand(2);
1624  auto MakeLane = [&](unsigned I) {
1625    return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
1626                       DAG.getConstant(uint64_t(-1), DL, MVT::i64),
1627                       DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
1628  };
1629  return DAG.getBuildVector(Op->getValueType(0), DL,
1630                            {MakeLane(0), MakeLane(1)});
1631}
1632
1633SDValue
1634WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
1635                                                    SelectionDAG &DAG) const {
1636  // Allow constant lane indices, expand variable lane indices
1637  SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
1638  if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
1639    return Op;
1640  else
1641    // Perform default expansion
1642    return SDValue();
1643}
1644
1645static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1646  EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1647  // 32-bit and 64-bit unrolled shifts will have proper semantics
1648  if (LaneT.bitsGE(MVT::i32))
1649    return DAG.UnrollVectorOp(Op.getNode());
1650  // Otherwise mask the shift value to get proper semantics from 32-bit shift
1651  SDLoc DL(Op);
1652  size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
1653  SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
1654  unsigned ShiftOpcode = Op.getOpcode();
1655  SmallVector<SDValue, 16> ShiftedElements;
1656  DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
1657  SmallVector<SDValue, 16> ShiftElements;
1658  DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
1659  SmallVector<SDValue, 16> UnrolledOps;
1660  for (size_t i = 0; i < NumLanes; ++i) {
1661    SDValue MaskedShiftValue =
1662        DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
1663    SDValue ShiftedValue = ShiftedElements[i];
1664    if (ShiftOpcode == ISD::SRA)
1665      ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
1666                                 ShiftedValue, DAG.getValueType(LaneT));
1667    UnrolledOps.push_back(
1668        DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
1669  }
1670  return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
1671}
1672
1673SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
1674                                              SelectionDAG &DAG) const {
1675  SDLoc DL(Op);
1676
1677  // Only manually lower vector shifts
1678  assert(Op.getSimpleValueType().isVector());
1679
1680  auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
1681  if (!ShiftVal)
1682    return unrollVectorShift(Op, DAG);
1683
1684  // Use anyext because none of the high bits can affect the shift
1685  ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
1686
1687  unsigned Opcode;
1688  switch (Op.getOpcode()) {
1689  case ISD::SHL:
1690    Opcode = WebAssemblyISD::VEC_SHL;
1691    break;
1692  case ISD::SRA:
1693    Opcode = WebAssemblyISD::VEC_SHR_S;
1694    break;
1695  case ISD::SRL:
1696    Opcode = WebAssemblyISD::VEC_SHR_U;
1697    break;
1698  default:
1699    llvm_unreachable("unexpected opcode");
1700  }
1701
1702  return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
1703}
1704
1705//===----------------------------------------------------------------------===//
1706//   Custom DAG combine hooks
1707//===----------------------------------------------------------------------===//
1708static SDValue
1709performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
1710  auto &DAG = DCI.DAG;
1711  auto Shuffle = cast<ShuffleVectorSDNode>(N);
1712
1713  // Hoist vector bitcasts that don't change the number of lanes out of unary
1714  // shuffles, where they are less likely to get in the way of other combines.
1715  // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
1716  //  (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
1717  SDValue Bitcast = N->getOperand(0);
1718  if (Bitcast.getOpcode() != ISD::BITCAST)
1719    return SDValue();
1720  if (!N->getOperand(1).isUndef())
1721    return SDValue();
1722  SDValue CastOp = Bitcast.getOperand(0);
1723  MVT SrcType = CastOp.getSimpleValueType();
1724  MVT DstType = Bitcast.getSimpleValueType();
1725  if (!SrcType.is128BitVector() ||
1726      SrcType.getVectorNumElements() != DstType.getVectorNumElements())
1727    return SDValue();
1728  SDValue NewShuffle = DAG.getVectorShuffle(
1729      SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
1730  return DAG.getBitcast(DstType, NewShuffle);
1731}
1732
1733SDValue
1734WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
1735                                             DAGCombinerInfo &DCI) const {
1736  switch (N->getOpcode()) {
1737  default:
1738    return SDValue();
1739  case ISD::VECTOR_SHUFFLE:
1740    return performVECTOR_SHUFFLECombine(N, DCI);
1741  }
1742}
1743