1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
14#include "WebAssemblyISelLowering.h"
15#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16#include "WebAssemblyMachineFunctionInfo.h"
17#include "WebAssemblySubtarget.h"
18#include "WebAssemblyTargetMachine.h"
19#include "llvm/CodeGen/Analysis.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineJumpTableInfo.h"
23#include "llvm/CodeGen/MachineModuleInfo.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/WasmEHFuncInfo.h"
27#include "llvm/IR/DiagnosticInfo.h"
28#include "llvm/IR/DiagnosticPrinter.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsWebAssembly.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include "llvm/Target/TargetOptions.h"
36using namespace llvm;
37
38#define DEBUG_TYPE "wasm-lower"
39
40WebAssemblyTargetLowering::WebAssemblyTargetLowering(
41    const TargetMachine &TM, const WebAssemblySubtarget &STI)
42    : TargetLowering(TM), Subtarget(&STI) {
43  auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
44
45  // Booleans always contain 0 or 1.
46  setBooleanContents(ZeroOrOneBooleanContent);
47  // Except in SIMD vectors
48  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
49  // We don't know the microarchitecture here, so just reduce register pressure.
50  setSchedulingPreference(Sched::RegPressure);
51  // Tell ISel that we have a stack pointer.
52  setStackPointerRegisterToSaveRestore(
53      Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
54  // Set up the register classes.
55  addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
56  addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
57  addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
58  addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
59  if (Subtarget->hasSIMD128()) {
60    addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
61    addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
62    addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
63    addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
64  }
65  if (Subtarget->hasUnimplementedSIMD128()) {
66    addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
67    addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
68  }
69  // Compute derived properties from the register classes.
70  computeRegisterProperties(Subtarget->getRegisterInfo());
71
72  setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
73  setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
74  setOperationAction(ISD::JumpTable, MVTPtr, Custom);
75  setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
76  setOperationAction(ISD::BRIND, MVT::Other, Custom);
77
78  // Take the default expansion for va_arg, va_copy, and va_end. There is no
79  // default action for va_start, so we do that custom.
80  setOperationAction(ISD::VASTART, MVT::Other, Custom);
81  setOperationAction(ISD::VAARG, MVT::Other, Expand);
82  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
83  setOperationAction(ISD::VAEND, MVT::Other, Expand);
84
85  for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
86    // Don't expand the floating-point types to constant pools.
87    setOperationAction(ISD::ConstantFP, T, Legal);
88    // Expand floating-point comparisons.
89    for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
90                    ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
91      setCondCodeAction(CC, T, Expand);
92    // Expand floating-point library function operators.
93    for (auto Op :
94         {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
95      setOperationAction(Op, T, Expand);
96    // Note supported floating-point library function operators that otherwise
97    // default to expand.
98    for (auto Op :
99         {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
100      setOperationAction(Op, T, Legal);
101    // Support minimum and maximum, which otherwise default to expand.
102    setOperationAction(ISD::FMINIMUM, T, Legal);
103    setOperationAction(ISD::FMAXIMUM, T, Legal);
104    // WebAssembly currently has no builtin f16 support.
105    setOperationAction(ISD::FP16_TO_FP, T, Expand);
106    setOperationAction(ISD::FP_TO_FP16, T, Expand);
107    setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
108    setTruncStoreAction(T, MVT::f16, Expand);
109  }
110
111  // Expand unavailable integer operations.
112  for (auto Op :
113       {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
114        ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
115        ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
116    for (auto T : {MVT::i32, MVT::i64})
117      setOperationAction(Op, T, Expand);
118    if (Subtarget->hasSIMD128())
119      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
120        setOperationAction(Op, T, Expand);
121    if (Subtarget->hasUnimplementedSIMD128())
122      setOperationAction(Op, MVT::v2i64, Expand);
123  }
124
125  // SIMD-specific configuration
126  if (Subtarget->hasSIMD128()) {
127    // Support saturating add for i8x16 and i16x8
128    for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
129      for (auto T : {MVT::v16i8, MVT::v8i16})
130        setOperationAction(Op, T, Legal);
131
132    // Custom lower BUILD_VECTORs to minimize number of replace_lanes
133    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
134      setOperationAction(ISD::BUILD_VECTOR, T, Custom);
135    if (Subtarget->hasUnimplementedSIMD128())
136      for (auto T : {MVT::v2i64, MVT::v2f64})
137        setOperationAction(ISD::BUILD_VECTOR, T, Custom);
138
139    // We have custom shuffle lowering to expose the shuffle mask
140    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
141      setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
142    if (Subtarget->hasUnimplementedSIMD128())
143      for (auto T: {MVT::v2i64, MVT::v2f64})
144        setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
145
146    // Custom lowering since wasm shifts must have a scalar shift amount
147    for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) {
148      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
149        setOperationAction(Op, T, Custom);
150      if (Subtarget->hasUnimplementedSIMD128())
151        setOperationAction(Op, MVT::v2i64, Custom);
152    }
153
154    // Custom lower lane accesses to expand out variable indices
155    for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) {
156      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
157        setOperationAction(Op, T, Custom);
158      if (Subtarget->hasUnimplementedSIMD128())
159        for (auto T : {MVT::v2i64, MVT::v2f64})
160          setOperationAction(Op, T, Custom);
161    }
162
163    // There is no i64x2.mul instruction
164    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
165
166    // There are no vector select instructions
167    for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
168      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
169        setOperationAction(Op, T, Expand);
170      if (Subtarget->hasUnimplementedSIMD128())
171        for (auto T : {MVT::v2i64, MVT::v2f64})
172          setOperationAction(Op, T, Expand);
173    }
174
175    // Expand integer operations supported for scalars but not SIMD
176    for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
177                    ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) {
178      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
179        setOperationAction(Op, T, Expand);
180      if (Subtarget->hasUnimplementedSIMD128())
181        setOperationAction(Op, MVT::v2i64, Expand);
182    }
183
184    // But we do have integer min and max operations
185    if (Subtarget->hasUnimplementedSIMD128()) {
186      for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
187        for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
188          setOperationAction(Op, T, Legal);
189    }
190
191    // Expand float operations supported for scalars but not SIMD
192    for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
193                    ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
194                    ISD::FEXP, ISD::FEXP2, ISD::FRINT}) {
195      setOperationAction(Op, MVT::v4f32, Expand);
196      if (Subtarget->hasUnimplementedSIMD128())
197        setOperationAction(Op, MVT::v2f64, Expand);
198    }
199
200    // Expand operations not supported for i64x2 vectors
201    if (Subtarget->hasUnimplementedSIMD128())
202      for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
203        setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
204
205    // Expand additional SIMD ops that V8 hasn't implemented yet
206    if (!Subtarget->hasUnimplementedSIMD128()) {
207      setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
208      setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
209    }
210  }
211
212  // As a special case, these operators use the type to mean the type to
213  // sign-extend from.
214  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
215  if (!Subtarget->hasSignExt()) {
216    // Sign extends are legal only when extending a vector extract
217    auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
218    for (auto T : {MVT::i8, MVT::i16, MVT::i32})
219      setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
220  }
221  for (auto T : MVT::integer_fixedlen_vector_valuetypes())
222    setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
223
224  // Dynamic stack allocation: use the default expansion.
225  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
226  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
227  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
228
229  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
230  setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
231
232  // Expand these forms; we pattern-match the forms that we can handle in isel.
233  for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
234    for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
235      setOperationAction(Op, T, Expand);
236
237  // We have custom switch handling.
238  setOperationAction(ISD::BR_JT, MVT::Other, Custom);
239
240  // WebAssembly doesn't have:
241  //  - Floating-point extending loads.
242  //  - Floating-point truncating stores.
243  //  - i1 extending loads.
244  //  - truncating SIMD stores and most extending loads
245  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
246  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
247  for (auto T : MVT::integer_valuetypes())
248    for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
249      setLoadExtAction(Ext, T, MVT::i1, Promote);
250  if (Subtarget->hasSIMD128()) {
251    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
252                   MVT::v2f64}) {
253      for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
254        if (MVT(T) != MemT) {
255          setTruncStoreAction(T, MemT, Expand);
256          for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
257            setLoadExtAction(Ext, T, MemT, Expand);
258        }
259      }
260    }
261    // But some vector extending loads are legal
262    if (Subtarget->hasUnimplementedSIMD128()) {
263      for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
264        setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
265        setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
266        setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
267      }
268    }
269  }
270
271  // Don't do anything clever with build_pairs
272  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
273
274  // Trap lowers to wasm unreachable
275  setOperationAction(ISD::TRAP, MVT::Other, Legal);
276
277  // Exception handling intrinsics
278  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
279  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
280
281  setMaxAtomicSizeInBitsSupported(64);
282
283  // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
284  // consistent with the f64 and f128 names.
285  setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
286  setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
287
288  // Define the emscripten name for return address helper.
289  // TODO: when implementing other WASM backends, make this generic or only do
290  // this on emscripten depending on what they end up doing.
291  setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
292
293  // Always convert switches to br_tables unless there is only one case, which
294  // is equivalent to a simple branch. This reduces code size for wasm, and we
295  // defer possible jump table optimizations to the VM.
296  setMinimumJumpTableEntries(2);
297}
298
299TargetLowering::AtomicExpansionKind
300WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
301  // We have wasm instructions for these
302  switch (AI->getOperation()) {
303  case AtomicRMWInst::Add:
304  case AtomicRMWInst::Sub:
305  case AtomicRMWInst::And:
306  case AtomicRMWInst::Or:
307  case AtomicRMWInst::Xor:
308  case AtomicRMWInst::Xchg:
309    return AtomicExpansionKind::None;
310  default:
311    break;
312  }
313  return AtomicExpansionKind::CmpXChg;
314}
315
316FastISel *WebAssemblyTargetLowering::createFastISel(
317    FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
318  return WebAssembly::createFastISel(FuncInfo, LibInfo);
319}
320
321MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
322                                                      EVT VT) const {
323  unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
324  if (BitWidth > 1 && BitWidth < 8)
325    BitWidth = 8;
326
327  if (BitWidth > 64) {
328    // The shift will be lowered to a libcall, and compiler-rt libcalls expect
329    // the count to be an i32.
330    BitWidth = 32;
331    assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
332           "32-bit shift counts ought to be enough for anyone");
333  }
334
335  MVT Result = MVT::getIntegerVT(BitWidth);
336  assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
337         "Unable to represent scalar shift amount type");
338  return Result;
339}
340
341// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
342// undefined result on invalid/overflow, to the WebAssembly opcode, which
343// traps on invalid/overflow.
344static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
345                                       MachineBasicBlock *BB,
346                                       const TargetInstrInfo &TII,
347                                       bool IsUnsigned, bool Int64,
348                                       bool Float64, unsigned LoweredOpcode) {
349  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
350
351  Register OutReg = MI.getOperand(0).getReg();
352  Register InReg = MI.getOperand(1).getReg();
353
354  unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
355  unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
356  unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
357  unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
358  unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
359  unsigned Eqz = WebAssembly::EQZ_I32;
360  unsigned And = WebAssembly::AND_I32;
361  int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
362  int64_t Substitute = IsUnsigned ? 0 : Limit;
363  double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
364  auto &Context = BB->getParent()->getFunction().getContext();
365  Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
366
367  const BasicBlock *LLVMBB = BB->getBasicBlock();
368  MachineFunction *F = BB->getParent();
369  MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
370  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
371  MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
372
373  MachineFunction::iterator It = ++BB->getIterator();
374  F->insert(It, FalseMBB);
375  F->insert(It, TrueMBB);
376  F->insert(It, DoneMBB);
377
378  // Transfer the remainder of BB and its successor edges to DoneMBB.
379  DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
380  DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
381
382  BB->addSuccessor(TrueMBB);
383  BB->addSuccessor(FalseMBB);
384  TrueMBB->addSuccessor(DoneMBB);
385  FalseMBB->addSuccessor(DoneMBB);
386
387  unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
388  Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
389  Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
390  CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
391  EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
392  FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
393  TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
394
395  MI.eraseFromParent();
396  // For signed numbers, we can do a single comparison to determine whether
397  // fabs(x) is within range.
398  if (IsUnsigned) {
399    Tmp0 = InReg;
400  } else {
401    BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
402  }
403  BuildMI(BB, DL, TII.get(FConst), Tmp1)
404      .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
405  BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
406
407  // For unsigned numbers, we have to do a separate comparison with zero.
408  if (IsUnsigned) {
409    Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
410    Register SecondCmpReg =
411        MRI.createVirtualRegister(&WebAssembly::I32RegClass);
412    Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
413    BuildMI(BB, DL, TII.get(FConst), Tmp1)
414        .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
415    BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
416    BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
417    CmpReg = AndReg;
418  }
419
420  BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
421
422  // Create the CFG diamond to select between doing the conversion or using
423  // the substitute value.
424  BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
425  BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
426  BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
427  BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
428  BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
429      .addReg(FalseReg)
430      .addMBB(FalseMBB)
431      .addReg(TrueReg)
432      .addMBB(TrueMBB);
433
434  return DoneMBB;
435}
436
437MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
438    MachineInstr &MI, MachineBasicBlock *BB) const {
439  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
440  DebugLoc DL = MI.getDebugLoc();
441
442  switch (MI.getOpcode()) {
443  default:
444    llvm_unreachable("Unexpected instr type to insert");
445  case WebAssembly::FP_TO_SINT_I32_F32:
446    return LowerFPToInt(MI, DL, BB, TII, false, false, false,
447                        WebAssembly::I32_TRUNC_S_F32);
448  case WebAssembly::FP_TO_UINT_I32_F32:
449    return LowerFPToInt(MI, DL, BB, TII, true, false, false,
450                        WebAssembly::I32_TRUNC_U_F32);
451  case WebAssembly::FP_TO_SINT_I64_F32:
452    return LowerFPToInt(MI, DL, BB, TII, false, true, false,
453                        WebAssembly::I64_TRUNC_S_F32);
454  case WebAssembly::FP_TO_UINT_I64_F32:
455    return LowerFPToInt(MI, DL, BB, TII, true, true, false,
456                        WebAssembly::I64_TRUNC_U_F32);
457  case WebAssembly::FP_TO_SINT_I32_F64:
458    return LowerFPToInt(MI, DL, BB, TII, false, false, true,
459                        WebAssembly::I32_TRUNC_S_F64);
460  case WebAssembly::FP_TO_UINT_I32_F64:
461    return LowerFPToInt(MI, DL, BB, TII, true, false, true,
462                        WebAssembly::I32_TRUNC_U_F64);
463  case WebAssembly::FP_TO_SINT_I64_F64:
464    return LowerFPToInt(MI, DL, BB, TII, false, true, true,
465                        WebAssembly::I64_TRUNC_S_F64);
466  case WebAssembly::FP_TO_UINT_I64_F64:
467    return LowerFPToInt(MI, DL, BB, TII, true, true, true,
468                        WebAssembly::I64_TRUNC_U_F64);
469    llvm_unreachable("Unexpected instruction to emit with custom inserter");
470  }
471}
472
473const char *
474WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
475  switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
476  case WebAssemblyISD::FIRST_NUMBER:
477  case WebAssemblyISD::FIRST_MEM_OPCODE:
478    break;
479#define HANDLE_NODETYPE(NODE)                                                  \
480  case WebAssemblyISD::NODE:                                                   \
481    return "WebAssemblyISD::" #NODE;
482#define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
483#include "WebAssemblyISD.def"
484#undef HANDLE_MEM_NODETYPE
485#undef HANDLE_NODETYPE
486  }
487  return nullptr;
488}
489
490std::pair<unsigned, const TargetRegisterClass *>
491WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
492    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
493  // First, see if this is a constraint that directly corresponds to a
494  // WebAssembly register class.
495  if (Constraint.size() == 1) {
496    switch (Constraint[0]) {
497    case 'r':
498      assert(VT != MVT::iPTR && "Pointer MVT not expected here");
499      if (Subtarget->hasSIMD128() && VT.isVector()) {
500        if (VT.getSizeInBits() == 128)
501          return std::make_pair(0U, &WebAssembly::V128RegClass);
502      }
503      if (VT.isInteger() && !VT.isVector()) {
504        if (VT.getSizeInBits() <= 32)
505          return std::make_pair(0U, &WebAssembly::I32RegClass);
506        if (VT.getSizeInBits() <= 64)
507          return std::make_pair(0U, &WebAssembly::I64RegClass);
508      }
509      break;
510    default:
511      break;
512    }
513  }
514
515  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
516}
517
518bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
519  // Assume ctz is a relatively cheap operation.
520  return true;
521}
522
523bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
524  // Assume clz is a relatively cheap operation.
525  return true;
526}
527
528bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
529                                                      const AddrMode &AM,
530                                                      Type *Ty, unsigned AS,
531                                                      Instruction *I) const {
532  // WebAssembly offsets are added as unsigned without wrapping. The
533  // isLegalAddressingMode gives us no way to determine if wrapping could be
534  // happening, so we approximate this by accepting only non-negative offsets.
535  if (AM.BaseOffs < 0)
536    return false;
537
538  // WebAssembly has no scale register operands.
539  if (AM.Scale != 0)
540    return false;
541
542  // Everything else is legal.
543  return true;
544}
545
546bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
547    EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
548    MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
549  // WebAssembly supports unaligned accesses, though it should be declared
550  // with the p2align attribute on loads and stores which do so, and there
551  // may be a performance impact. We tell LLVM they're "fast" because
552  // for the kinds of things that LLVM uses this for (merging adjacent stores
553  // of constants, etc.), WebAssembly implementations will either want the
554  // unaligned access or they'll split anyway.
555  if (Fast)
556    *Fast = true;
557  return true;
558}
559
560bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
561                                              AttributeList Attr) const {
562  // The current thinking is that wasm engines will perform this optimization,
563  // so we can save on code size.
564  return true;
565}
566
567bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
568  if (!Subtarget->hasUnimplementedSIMD128())
569    return false;
570  MVT ExtT = ExtVal.getSimpleValueType();
571  MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0);
572  return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
573         (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
574         (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
575}
576
577EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
578                                                  LLVMContext &C,
579                                                  EVT VT) const {
580  if (VT.isVector())
581    return VT.changeVectorElementTypeToInteger();
582
583  return TargetLowering::getSetCCResultType(DL, C, VT);
584}
585
586bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
587                                                   const CallInst &I,
588                                                   MachineFunction &MF,
589                                                   unsigned Intrinsic) const {
590  switch (Intrinsic) {
591  case Intrinsic::wasm_atomic_notify:
592    Info.opc = ISD::INTRINSIC_W_CHAIN;
593    Info.memVT = MVT::i32;
594    Info.ptrVal = I.getArgOperand(0);
595    Info.offset = 0;
596    Info.align = Align(4);
597    // atomic.notify instruction does not really load the memory specified with
598    // this argument, but MachineMemOperand should either be load or store, so
599    // we set this to a load.
600    // FIXME Volatile isn't really correct, but currently all LLVM atomic
601    // instructions are treated as volatiles in the backend, so we should be
602    // consistent. The same applies for wasm_atomic_wait intrinsics too.
603    Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
604    return true;
605  case Intrinsic::wasm_atomic_wait_i32:
606    Info.opc = ISD::INTRINSIC_W_CHAIN;
607    Info.memVT = MVT::i32;
608    Info.ptrVal = I.getArgOperand(0);
609    Info.offset = 0;
610    Info.align = Align(4);
611    Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
612    return true;
613  case Intrinsic::wasm_atomic_wait_i64:
614    Info.opc = ISD::INTRINSIC_W_CHAIN;
615    Info.memVT = MVT::i64;
616    Info.ptrVal = I.getArgOperand(0);
617    Info.offset = 0;
618    Info.align = Align(8);
619    Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
620    return true;
621  default:
622    return false;
623  }
624}
625
626//===----------------------------------------------------------------------===//
627// WebAssembly Lowering private implementation.
628//===----------------------------------------------------------------------===//
629
630//===----------------------------------------------------------------------===//
631// Lowering Code
632//===----------------------------------------------------------------------===//
633
634static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
635  MachineFunction &MF = DAG.getMachineFunction();
636  DAG.getContext()->diagnose(
637      DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
638}
639
640// Test whether the given calling convention is supported.
641static bool callingConvSupported(CallingConv::ID CallConv) {
642  // We currently support the language-independent target-independent
643  // conventions. We don't yet have a way to annotate calls with properties like
644  // "cold", and we don't have any call-clobbered registers, so these are mostly
645  // all handled the same.
646  return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
647         CallConv == CallingConv::Cold ||
648         CallConv == CallingConv::PreserveMost ||
649         CallConv == CallingConv::PreserveAll ||
650         CallConv == CallingConv::CXX_FAST_TLS ||
651         CallConv == CallingConv::WASM_EmscriptenInvoke;
652}
653
654SDValue
655WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
656                                     SmallVectorImpl<SDValue> &InVals) const {
657  SelectionDAG &DAG = CLI.DAG;
658  SDLoc DL = CLI.DL;
659  SDValue Chain = CLI.Chain;
660  SDValue Callee = CLI.Callee;
661  MachineFunction &MF = DAG.getMachineFunction();
662  auto Layout = MF.getDataLayout();
663
664  CallingConv::ID CallConv = CLI.CallConv;
665  if (!callingConvSupported(CallConv))
666    fail(DL, DAG,
667         "WebAssembly doesn't support language-specific or target-specific "
668         "calling conventions yet");
669  if (CLI.IsPatchPoint)
670    fail(DL, DAG, "WebAssembly doesn't support patch point yet");
671
672  if (CLI.IsTailCall) {
673    bool MustTail = CLI.CS && CLI.CS.isMustTailCall();
674    if (Subtarget->hasTailCall() && !CLI.IsVarArg) {
675      // Do not tail call unless caller and callee return types match
676      const Function &F = MF.getFunction();
677      const TargetMachine &TM = getTargetMachine();
678      Type *RetTy = F.getReturnType();
679      SmallVector<MVT, 4> CallerRetTys;
680      SmallVector<MVT, 4> CalleeRetTys;
681      computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
682      computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
683      bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
684                        std::equal(CallerRetTys.begin(), CallerRetTys.end(),
685                                   CalleeRetTys.begin());
686      if (!TypesMatch) {
687        // musttail in this case would be an LLVM IR validation failure
688        assert(!MustTail);
689        CLI.IsTailCall = false;
690      }
691    } else {
692      CLI.IsTailCall = false;
693      if (MustTail) {
694        if (CLI.IsVarArg) {
695          // The return would pop the argument buffer
696          fail(DL, DAG, "WebAssembly does not support varargs tail calls");
697        } else {
698          fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled");
699        }
700      }
701    }
702  }
703
704  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
705  if (Ins.size() > 1)
706    fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet");
707
708  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
709  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
710
711  // The generic code may have added an sret argument. If we're lowering an
712  // invoke function, the ABI requires that the function pointer be the first
713  // argument, so we may have to swap the arguments.
714  if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
715      Outs[0].Flags.isSRet()) {
716    std::swap(Outs[0], Outs[1]);
717    std::swap(OutVals[0], OutVals[1]);
718  }
719
720  unsigned NumFixedArgs = 0;
721  for (unsigned I = 0; I < Outs.size(); ++I) {
722    const ISD::OutputArg &Out = Outs[I];
723    SDValue &OutVal = OutVals[I];
724    if (Out.Flags.isNest())
725      fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
726    if (Out.Flags.isInAlloca())
727      fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
728    if (Out.Flags.isInConsecutiveRegs())
729      fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
730    if (Out.Flags.isInConsecutiveRegsLast())
731      fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
732    if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
733      auto &MFI = MF.getFrameInfo();
734      int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
735                                     Out.Flags.getByValAlign(),
736                                     /*isSS=*/false);
737      SDValue SizeNode =
738          DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
739      SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
740      Chain = DAG.getMemcpy(
741          Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(),
742          /*isVolatile*/ false, /*AlwaysInline=*/false,
743          /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
744      OutVal = FINode;
745    }
746    // Count the number of fixed args *after* legalization.
747    NumFixedArgs += Out.IsFixed;
748  }
749
750  bool IsVarArg = CLI.IsVarArg;
751  auto PtrVT = getPointerTy(Layout);
752
753  // Analyze operands of the call, assigning locations to each operand.
754  SmallVector<CCValAssign, 16> ArgLocs;
755  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
756
757  if (IsVarArg) {
758    // Outgoing non-fixed arguments are placed in a buffer. First
759    // compute their offsets and the total amount of buffer space needed.
760    for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
761      const ISD::OutputArg &Out = Outs[I];
762      SDValue &Arg = OutVals[I];
763      EVT VT = Arg.getValueType();
764      assert(VT != MVT::iPTR && "Legalized args should be concrete");
765      Type *Ty = VT.getTypeForEVT(*DAG.getContext());
766      unsigned Align = std::max(Out.Flags.getOrigAlign(),
767                                Layout.getABITypeAlignment(Ty));
768      unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
769                                             Align);
770      CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
771                                        Offset, VT.getSimpleVT(),
772                                        CCValAssign::Full));
773    }
774  }
775
776  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
777
778  SDValue FINode;
779  if (IsVarArg && NumBytes) {
780    // For non-fixed arguments, next emit stores to store the argument values
781    // to the stack buffer at the offsets computed above.
782    int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
783                                                 Layout.getStackAlignment(),
784                                                 /*isSS=*/false);
785    unsigned ValNo = 0;
786    SmallVector<SDValue, 8> Chains;
787    for (SDValue Arg :
788         make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
789      assert(ArgLocs[ValNo].getValNo() == ValNo &&
790             "ArgLocs should remain in order and only hold varargs args");
791      unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
792      FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
793      SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
794                                DAG.getConstant(Offset, DL, PtrVT));
795      Chains.push_back(
796          DAG.getStore(Chain, DL, Arg, Add,
797                       MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
798    }
799    if (!Chains.empty())
800      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
801  } else if (IsVarArg) {
802    FINode = DAG.getIntPtrConstant(0, DL);
803  }
804
805  if (Callee->getOpcode() == ISD::GlobalAddress) {
806    // If the callee is a GlobalAddress node (quite common, every direct call
807    // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
808    // doesn't at MO_GOT which is not needed for direct calls.
809    GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
810    Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
811                                        getPointerTy(DAG.getDataLayout()),
812                                        GA->getOffset());
813    Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
814                         getPointerTy(DAG.getDataLayout()), Callee);
815  }
816
817  // Compute the operands for the CALLn node.
818  SmallVector<SDValue, 16> Ops;
819  Ops.push_back(Chain);
820  Ops.push_back(Callee);
821
822  // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
823  // isn't reliable.
824  Ops.append(OutVals.begin(),
825             IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
826  // Add a pointer to the vararg buffer.
827  if (IsVarArg)
828    Ops.push_back(FINode);
829
830  SmallVector<EVT, 8> InTys;
831  for (const auto &In : Ins) {
832    assert(!In.Flags.isByVal() && "byval is not valid for return values");
833    assert(!In.Flags.isNest() && "nest is not valid for return values");
834    if (In.Flags.isInAlloca())
835      fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
836    if (In.Flags.isInConsecutiveRegs())
837      fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
838    if (In.Flags.isInConsecutiveRegsLast())
839      fail(DL, DAG,
840           "WebAssembly hasn't implemented cons regs last return values");
841    // Ignore In.getOrigAlign() because all our arguments are passed in
842    // registers.
843    InTys.push_back(In.VT);
844  }
845
846  if (CLI.IsTailCall) {
847    // ret_calls do not return values to the current frame
848    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
849    return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
850  }
851
852  InTys.push_back(MVT::Other);
853  SDVTList InTyList = DAG.getVTList(InTys);
854  SDValue Res =
855      DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1,
856                  DL, InTyList, Ops);
857  if (Ins.empty()) {
858    Chain = Res;
859  } else {
860    InVals.push_back(Res);
861    Chain = Res.getValue(1);
862  }
863
864  return Chain;
865}
866
867bool WebAssemblyTargetLowering::CanLowerReturn(
868    CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
869    const SmallVectorImpl<ISD::OutputArg> &Outs,
870    LLVMContext & /*Context*/) const {
871  // WebAssembly can only handle returning tuples with multivalue enabled
872  return Subtarget->hasMultivalue() || Outs.size() <= 1;
873}
874
875SDValue WebAssemblyTargetLowering::LowerReturn(
876    SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
877    const SmallVectorImpl<ISD::OutputArg> &Outs,
878    const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
879    SelectionDAG &DAG) const {
880  assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
881         "MVP WebAssembly can only return up to one value");
882  if (!callingConvSupported(CallConv))
883    fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
884
885  SmallVector<SDValue, 4> RetOps(1, Chain);
886  RetOps.append(OutVals.begin(), OutVals.end());
887  Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
888
889  // Record the number and types of the return values.
890  for (const ISD::OutputArg &Out : Outs) {
891    assert(!Out.Flags.isByVal() && "byval is not valid for return values");
892    assert(!Out.Flags.isNest() && "nest is not valid for return values");
893    assert(Out.IsFixed && "non-fixed return value is not valid");
894    if (Out.Flags.isInAlloca())
895      fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
896    if (Out.Flags.isInConsecutiveRegs())
897      fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
898    if (Out.Flags.isInConsecutiveRegsLast())
899      fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
900  }
901
902  return Chain;
903}
904
905SDValue WebAssemblyTargetLowering::LowerFormalArguments(
906    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
907    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
908    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
909  if (!callingConvSupported(CallConv))
910    fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
911
912  MachineFunction &MF = DAG.getMachineFunction();
913  auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
914
915  // Set up the incoming ARGUMENTS value, which serves to represent the liveness
916  // of the incoming values before they're represented by virtual registers.
917  MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
918
919  for (const ISD::InputArg &In : Ins) {
920    if (In.Flags.isInAlloca())
921      fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
922    if (In.Flags.isNest())
923      fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
924    if (In.Flags.isInConsecutiveRegs())
925      fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
926    if (In.Flags.isInConsecutiveRegsLast())
927      fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
928    // Ignore In.getOrigAlign() because all our arguments are passed in
929    // registers.
930    InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
931                                           DAG.getTargetConstant(InVals.size(),
932                                                                 DL, MVT::i32))
933                             : DAG.getUNDEF(In.VT));
934
935    // Record the number and types of arguments.
936    MFI->addParam(In.VT);
937  }
938
939  // Varargs are copied into a buffer allocated by the caller, and a pointer to
940  // the buffer is passed as an argument.
941  if (IsVarArg) {
942    MVT PtrVT = getPointerTy(MF.getDataLayout());
943    Register VarargVreg =
944        MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
945    MFI->setVarargBufferVreg(VarargVreg);
946    Chain = DAG.getCopyToReg(
947        Chain, DL, VarargVreg,
948        DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
949                    DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
950    MFI->addParam(PtrVT);
951  }
952
953  // Record the number and types of arguments and results.
954  SmallVector<MVT, 4> Params;
955  SmallVector<MVT, 4> Results;
956  computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
957                      DAG.getTarget(), Params, Results);
958  for (MVT VT : Results)
959    MFI->addResult(VT);
960  // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
961  // the param logic here with ComputeSignatureVTs
962  assert(MFI->getParams().size() == Params.size() &&
963         std::equal(MFI->getParams().begin(), MFI->getParams().end(),
964                    Params.begin()));
965
966  return Chain;
967}
968
969void WebAssemblyTargetLowering::ReplaceNodeResults(
970    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
971  switch (N->getOpcode()) {
972  case ISD::SIGN_EXTEND_INREG:
973    // Do not add any results, signifying that N should not be custom lowered
974    // after all. This happens because simd128 turns on custom lowering for
975    // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
976    // illegal type.
977    break;
978  default:
979    llvm_unreachable(
980        "ReplaceNodeResults not implemented for this op for WebAssembly!");
981  }
982}
983
984//===----------------------------------------------------------------------===//
985//  Custom lowering hooks.
986//===----------------------------------------------------------------------===//
987
988SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
989                                                  SelectionDAG &DAG) const {
990  SDLoc DL(Op);
991  switch (Op.getOpcode()) {
992  default:
993    llvm_unreachable("unimplemented operation lowering");
994    return SDValue();
995  case ISD::FrameIndex:
996    return LowerFrameIndex(Op, DAG);
997  case ISD::GlobalAddress:
998    return LowerGlobalAddress(Op, DAG);
999  case ISD::ExternalSymbol:
1000    return LowerExternalSymbol(Op, DAG);
1001  case ISD::JumpTable:
1002    return LowerJumpTable(Op, DAG);
1003  case ISD::BR_JT:
1004    return LowerBR_JT(Op, DAG);
1005  case ISD::VASTART:
1006    return LowerVASTART(Op, DAG);
1007  case ISD::BlockAddress:
1008  case ISD::BRIND:
1009    fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1010    return SDValue();
1011  case ISD::RETURNADDR:
1012    return LowerRETURNADDR(Op, DAG);
1013  case ISD::FRAMEADDR:
1014    return LowerFRAMEADDR(Op, DAG);
1015  case ISD::CopyToReg:
1016    return LowerCopyToReg(Op, DAG);
1017  case ISD::EXTRACT_VECTOR_ELT:
1018  case ISD::INSERT_VECTOR_ELT:
1019    return LowerAccessVectorElement(Op, DAG);
1020  case ISD::INTRINSIC_VOID:
1021  case ISD::INTRINSIC_WO_CHAIN:
1022  case ISD::INTRINSIC_W_CHAIN:
1023    return LowerIntrinsic(Op, DAG);
1024  case ISD::SIGN_EXTEND_INREG:
1025    return LowerSIGN_EXTEND_INREG(Op, DAG);
1026  case ISD::BUILD_VECTOR:
1027    return LowerBUILD_VECTOR(Op, DAG);
1028  case ISD::VECTOR_SHUFFLE:
1029    return LowerVECTOR_SHUFFLE(Op, DAG);
1030  case ISD::SETCC:
1031    return LowerSETCC(Op, DAG);
1032  case ISD::SHL:
1033  case ISD::SRA:
1034  case ISD::SRL:
1035    return LowerShift(Op, DAG);
1036  }
1037}
1038
1039SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1040                                                  SelectionDAG &DAG) const {
1041  SDValue Src = Op.getOperand(2);
1042  if (isa<FrameIndexSDNode>(Src.getNode())) {
1043    // CopyToReg nodes don't support FrameIndex operands. Other targets select
1044    // the FI to some LEA-like instruction, but since we don't have that, we
1045    // need to insert some kind of instruction that can take an FI operand and
1046    // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1047    // local.copy between Op and its FI operand.
1048    SDValue Chain = Op.getOperand(0);
1049    SDLoc DL(Op);
1050    unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1051    EVT VT = Src.getValueType();
1052    SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1053                                                   : WebAssembly::COPY_I64,
1054                                    DL, VT, Src),
1055                 0);
1056    return Op.getNode()->getNumValues() == 1
1057               ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1058               : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1059                                  Op.getNumOperands() == 4 ? Op.getOperand(3)
1060                                                           : SDValue());
1061  }
1062  return SDValue();
1063}
1064
1065SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1066                                                   SelectionDAG &DAG) const {
1067  int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1068  return DAG.getTargetFrameIndex(FI, Op.getValueType());
1069}
1070
1071SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1072                                                   SelectionDAG &DAG) const {
1073  SDLoc DL(Op);
1074
1075  if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1076    fail(DL, DAG,
1077         "Non-Emscripten WebAssembly hasn't implemented "
1078         "__builtin_return_address");
1079    return SDValue();
1080  }
1081
1082  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1083    return SDValue();
1084
1085  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1086  MakeLibCallOptions CallOptions;
1087  return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1088                     {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1089      .first;
1090}
1091
1092SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1093                                                  SelectionDAG &DAG) const {
1094  // Non-zero depths are not supported by WebAssembly currently. Use the
1095  // legalizer's default expansion, which is to return 0 (what this function is
1096  // documented to do).
1097  if (Op.getConstantOperandVal(0) > 0)
1098    return SDValue();
1099
1100  DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1101  EVT VT = Op.getValueType();
1102  Register FP =
1103      Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1104  return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1105}
1106
1107SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1108                                                      SelectionDAG &DAG) const {
1109  SDLoc DL(Op);
1110  const auto *GA = cast<GlobalAddressSDNode>(Op);
1111  EVT VT = Op.getValueType();
1112  assert(GA->getTargetFlags() == 0 &&
1113         "Unexpected target flags on generic GlobalAddressSDNode");
1114  if (GA->getAddressSpace() != 0)
1115    fail(DL, DAG, "WebAssembly only expects the 0 address space");
1116
1117  unsigned OperandFlags = 0;
1118  if (isPositionIndependent()) {
1119    const GlobalValue *GV = GA->getGlobal();
1120    if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
1121      MachineFunction &MF = DAG.getMachineFunction();
1122      MVT PtrVT = getPointerTy(MF.getDataLayout());
1123      const char *BaseName;
1124      if (GV->getValueType()->isFunctionTy()) {
1125        BaseName = MF.createExternalSymbolName("__table_base");
1126        OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1127      }
1128      else {
1129        BaseName = MF.createExternalSymbolName("__memory_base");
1130        OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1131      }
1132      SDValue BaseAddr =
1133          DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1134                      DAG.getTargetExternalSymbol(BaseName, PtrVT));
1135
1136      SDValue SymAddr = DAG.getNode(
1137          WebAssemblyISD::WrapperPIC, DL, VT,
1138          DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
1139                                     OperandFlags));
1140
1141      return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
1142    } else {
1143      OperandFlags = WebAssemblyII::MO_GOT;
1144    }
1145  }
1146
1147  return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1148                     DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1149                                                GA->getOffset(), OperandFlags));
1150}
1151
1152SDValue
1153WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1154                                               SelectionDAG &DAG) const {
1155  SDLoc DL(Op);
1156  const auto *ES = cast<ExternalSymbolSDNode>(Op);
1157  EVT VT = Op.getValueType();
1158  assert(ES->getTargetFlags() == 0 &&
1159         "Unexpected target flags on generic ExternalSymbolSDNode");
1160  return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1161                     DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
1162}
1163
1164SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1165                                                  SelectionDAG &DAG) const {
1166  // There's no need for a Wrapper node because we always incorporate a jump
1167  // table operand into a BR_TABLE instruction, rather than ever
1168  // materializing it in a register.
1169  const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1170  return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
1171                                JT->getTargetFlags());
1172}
1173
1174SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1175                                              SelectionDAG &DAG) const {
1176  SDLoc DL(Op);
1177  SDValue Chain = Op.getOperand(0);
1178  const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1179  SDValue Index = Op.getOperand(2);
1180  assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1181
1182  SmallVector<SDValue, 8> Ops;
1183  Ops.push_back(Chain);
1184  Ops.push_back(Index);
1185
1186  MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1187  const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1188
1189  // Add an operand for each case.
1190  for (auto MBB : MBBs)
1191    Ops.push_back(DAG.getBasicBlock(MBB));
1192
1193  // TODO: For now, we just pick something arbitrary for a default case for now.
1194  // We really want to sniff out the guard and put in the real default case (and
1195  // delete the guard).
1196  Ops.push_back(DAG.getBasicBlock(MBBs[0]));
1197
1198  return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1199}
1200
1201SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1202                                                SelectionDAG &DAG) const {
1203  SDLoc DL(Op);
1204  EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1205
1206  auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1207  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1208
1209  SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1210                                    MFI->getVarargBufferVreg(), PtrVT);
1211  return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1212                      MachinePointerInfo(SV), 0);
1213}
1214
1215SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1216                                                  SelectionDAG &DAG) const {
1217  MachineFunction &MF = DAG.getMachineFunction();
1218  unsigned IntNo;
1219  switch (Op.getOpcode()) {
1220  case ISD::INTRINSIC_VOID:
1221  case ISD::INTRINSIC_W_CHAIN:
1222    IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1223    break;
1224  case ISD::INTRINSIC_WO_CHAIN:
1225    IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1226    break;
1227  default:
1228    llvm_unreachable("Invalid intrinsic");
1229  }
1230  SDLoc DL(Op);
1231
1232  switch (IntNo) {
1233  default:
1234    return SDValue(); // Don't custom lower most intrinsics.
1235
1236  case Intrinsic::wasm_lsda: {
1237    EVT VT = Op.getValueType();
1238    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1239    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1240    auto &Context = MF.getMMI().getContext();
1241    MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1242                                            Twine(MF.getFunctionNumber()));
1243    return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1244                       DAG.getMCSymbol(S, PtrVT));
1245  }
1246
1247  case Intrinsic::wasm_throw: {
1248    // We only support C++ exceptions for now
1249    int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1250    if (Tag != CPP_EXCEPTION)
1251      llvm_unreachable("Invalid tag!");
1252    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1253    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1254    const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1255    SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1256                                  DAG.getTargetExternalSymbol(SymName, PtrVT));
1257    return DAG.getNode(WebAssemblyISD::THROW, DL,
1258                       MVT::Other, // outchain type
1259                       {
1260                           Op.getOperand(0), // inchain
1261                           SymNode,          // exception symbol
1262                           Op.getOperand(3)  // thrown value
1263                       });
1264  }
1265  }
1266}
1267
1268SDValue
1269WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1270                                                  SelectionDAG &DAG) const {
1271  SDLoc DL(Op);
1272  // If sign extension operations are disabled, allow sext_inreg only if operand
1273  // is a vector extract. SIMD does not depend on sign extension operations, but
1274  // allowing sext_inreg in this context lets us have simple patterns to select
1275  // extract_lane_s instructions. Expanding sext_inreg everywhere would be
1276  // simpler in this file, but would necessitate large and brittle patterns to
1277  // undo the expansion and select extract_lane_s instructions.
1278  assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1279  if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1280    const SDValue &Extract = Op.getOperand(0);
1281    MVT VecT = Extract.getOperand(0).getSimpleValueType();
1282    MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
1283                             ->getVT()
1284                             .getSimpleVT();
1285    MVT ExtractedVecT =
1286        MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
1287    if (ExtractedVecT == VecT)
1288      return Op;
1289    // Bitcast vector to appropriate type to ensure ISel pattern coverage
1290    const SDValue &Index = Extract.getOperand(1);
1291    unsigned IndexVal =
1292        static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
1293    unsigned Scale =
1294        ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1295    assert(Scale > 1);
1296    SDValue NewIndex =
1297        DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
1298    SDValue NewExtract = DAG.getNode(
1299        ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
1300        DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
1301    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
1302                       NewExtract, Op.getOperand(1));
1303  }
1304  // Otherwise expand
1305  return SDValue();
1306}
1307
1308SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1309                                                     SelectionDAG &DAG) const {
1310  SDLoc DL(Op);
1311  const EVT VecT = Op.getValueType();
1312  const EVT LaneT = Op.getOperand(0).getValueType();
1313  const size_t Lanes = Op.getNumOperands();
1314  bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8;
1315
1316  // BUILD_VECTORs are lowered to the instruction that initializes the highest
1317  // possible number of lanes at once followed by a sequence of replace_lane
1318  // instructions to individually initialize any remaining lanes.
1319
1320  // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1321  // swizzled lanes should be given greater weight.
1322
1323  // TODO: Investigate building vectors by shuffling together vectors built by
1324  // separately specialized means.
1325
1326  auto IsConstant = [](const SDValue &V) {
1327    return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
1328  };
1329
1330  // Returns the source vector and index vector pair if they exist. Checks for:
1331  //   (extract_vector_elt
1332  //     $src,
1333  //     (sign_extend_inreg (extract_vector_elt $indices, $i))
1334  //   )
1335  auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1336    auto Bail = std::make_pair(SDValue(), SDValue());
1337    if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1338      return Bail;
1339    const SDValue &SwizzleSrc = Lane->getOperand(0);
1340    const SDValue &IndexExt = Lane->getOperand(1);
1341    if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1342      return Bail;
1343    const SDValue &Index = IndexExt->getOperand(0);
1344    if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1345      return Bail;
1346    const SDValue &SwizzleIndices = Index->getOperand(0);
1347    if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1348        SwizzleIndices.getValueType() != MVT::v16i8 ||
1349        Index->getOperand(1)->getOpcode() != ISD::Constant ||
1350        Index->getConstantOperandVal(1) != I)
1351      return Bail;
1352    return std::make_pair(SwizzleSrc, SwizzleIndices);
1353  };
1354
1355  using ValueEntry = std::pair<SDValue, size_t>;
1356  SmallVector<ValueEntry, 16> SplatValueCounts;
1357
1358  using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1359  SmallVector<SwizzleEntry, 16> SwizzleCounts;
1360
1361  auto AddCount = [](auto &Counts, const auto &Val) {
1362    auto CountIt = std::find_if(Counts.begin(), Counts.end(),
1363                                [&Val](auto E) { return E.first == Val; });
1364    if (CountIt == Counts.end()) {
1365      Counts.emplace_back(Val, 1);
1366    } else {
1367      CountIt->second++;
1368    }
1369  };
1370
1371  auto GetMostCommon = [](auto &Counts) {
1372    auto CommonIt =
1373        std::max_element(Counts.begin(), Counts.end(),
1374                         [](auto A, auto B) { return A.second < B.second; });
1375    assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1376    return *CommonIt;
1377  };
1378
1379  size_t NumConstantLanes = 0;
1380
1381  // Count eligible lanes for each type of vector creation op
1382  for (size_t I = 0; I < Lanes; ++I) {
1383    const SDValue &Lane = Op->getOperand(I);
1384    if (Lane.isUndef())
1385      continue;
1386
1387    AddCount(SplatValueCounts, Lane);
1388
1389    if (IsConstant(Lane)) {
1390      NumConstantLanes++;
1391    } else if (CanSwizzle) {
1392      auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1393      if (SwizzleSrcs.first)
1394        AddCount(SwizzleCounts, SwizzleSrcs);
1395    }
1396  }
1397
1398  SDValue SplatValue;
1399  size_t NumSplatLanes;
1400  std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1401
1402  SDValue SwizzleSrc;
1403  SDValue SwizzleIndices;
1404  size_t NumSwizzleLanes = 0;
1405  if (SwizzleCounts.size())
1406    std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1407                          NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1408
1409  // Predicate returning true if the lane is properly initialized by the
1410  // original instruction
1411  std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1412  SDValue Result;
1413  if (Subtarget->hasUnimplementedSIMD128()) {
1414    // Prefer swizzles over vector consts over splats
1415    if (NumSwizzleLanes >= NumSplatLanes &&
1416        NumSwizzleLanes >= NumConstantLanes) {
1417      Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1418                           SwizzleIndices);
1419      auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1420      IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1421        return Swizzled == GetSwizzleSrcs(I, Lane);
1422      };
1423    } else if (NumConstantLanes >= NumSplatLanes) {
1424      SmallVector<SDValue, 16> ConstLanes;
1425      for (const SDValue &Lane : Op->op_values()) {
1426        if (IsConstant(Lane)) {
1427          ConstLanes.push_back(Lane);
1428        } else if (LaneT.isFloatingPoint()) {
1429          ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
1430        } else {
1431          ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
1432        }
1433      }
1434      Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1435      IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1436        return IsConstant(Lane);
1437      };
1438    }
1439  }
1440  if (!Result) {
1441    // Use a splat, but possibly a load_splat
1442    LoadSDNode *SplattedLoad;
1443    if (Subtarget->hasUnimplementedSIMD128() &&
1444        (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1445        SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1446      Result = DAG.getMemIntrinsicNode(
1447          WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
1448          {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
1449           SplattedLoad->getOffset()},
1450          SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
1451    } else {
1452      Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1453    }
1454    IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1455      return Lane == SplatValue;
1456    };
1457  }
1458
1459  // Add replace_lane instructions for any unhandled values
1460  for (size_t I = 0; I < Lanes; ++I) {
1461    const SDValue &Lane = Op->getOperand(I);
1462    if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
1463      Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1464                           DAG.getConstant(I, DL, MVT::i32));
1465  }
1466
1467  return Result;
1468}
1469
1470SDValue
1471WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1472                                               SelectionDAG &DAG) const {
1473  SDLoc DL(Op);
1474  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1475  MVT VecType = Op.getOperand(0).getSimpleValueType();
1476  assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1477  size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1478
1479  // Space for two vector args and sixteen mask indices
1480  SDValue Ops[18];
1481  size_t OpIdx = 0;
1482  Ops[OpIdx++] = Op.getOperand(0);
1483  Ops[OpIdx++] = Op.getOperand(1);
1484
1485  // Expand mask indices to byte indices and materialize them as operands
1486  for (int M : Mask) {
1487    for (size_t J = 0; J < LaneBytes; ++J) {
1488      // Lower undefs (represented by -1 in mask) to zero
1489      uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
1490      Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1491    }
1492  }
1493
1494  return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1495}
1496
1497SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
1498                                              SelectionDAG &DAG) const {
1499  SDLoc DL(Op);
1500  // The legalizer does not know how to expand the comparison modes of i64x2
1501  // vectors because no comparison modes are supported. We could solve this by
1502  // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
1503  // (which return i64x2 results) as well. So instead we manually unroll i64x2
1504  // comparisons here.
1505  assert(Subtarget->hasUnimplementedSIMD128());
1506  assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
1507  SmallVector<SDValue, 2> LHS, RHS;
1508  DAG.ExtractVectorElements(Op->getOperand(0), LHS);
1509  DAG.ExtractVectorElements(Op->getOperand(1), RHS);
1510  const SDValue &CC = Op->getOperand(2);
1511  auto MakeLane = [&](unsigned I) {
1512    return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
1513                       DAG.getConstant(uint64_t(-1), DL, MVT::i64),
1514                       DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
1515  };
1516  return DAG.getBuildVector(Op->getValueType(0), DL,
1517                            {MakeLane(0), MakeLane(1)});
1518}
1519
1520SDValue
1521WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
1522                                                    SelectionDAG &DAG) const {
1523  // Allow constant lane indices, expand variable lane indices
1524  SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
1525  if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
1526    return Op;
1527  else
1528    // Perform default expansion
1529    return SDValue();
1530}
1531
1532static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1533  EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1534  // 32-bit and 64-bit unrolled shifts will have proper semantics
1535  if (LaneT.bitsGE(MVT::i32))
1536    return DAG.UnrollVectorOp(Op.getNode());
1537  // Otherwise mask the shift value to get proper semantics from 32-bit shift
1538  SDLoc DL(Op);
1539  SDValue ShiftVal = Op.getOperand(1);
1540  uint64_t MaskVal = LaneT.getSizeInBits() - 1;
1541  SDValue MaskedShiftVal = DAG.getNode(
1542      ISD::AND,                    // mask opcode
1543      DL, ShiftVal.getValueType(), // masked value type
1544      ShiftVal,                    // original shift value operand
1545      DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
1546  );
1547
1548  return DAG.UnrollVectorOp(
1549      DAG.getNode(Op.getOpcode(),        // original shift opcode
1550                  DL, Op.getValueType(), // original return type
1551                  Op.getOperand(0),      // original vector operand,
1552                  MaskedShiftVal         // new masked shift value operand
1553                  )
1554          .getNode());
1555}
1556
1557SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
1558                                              SelectionDAG &DAG) const {
1559  SDLoc DL(Op);
1560
1561  // Only manually lower vector shifts
1562  assert(Op.getSimpleValueType().isVector());
1563
1564  // Unroll non-splat vector shifts
1565  BuildVectorSDNode *ShiftVec;
1566  SDValue SplatVal;
1567  if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
1568      !(SplatVal = ShiftVec->getSplatValue()))
1569    return unrollVectorShift(Op, DAG);
1570
1571  // All splats except i64x2 const splats are handled by patterns
1572  auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
1573  if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
1574    return Op;
1575
1576  // i64x2 const splats are custom lowered to avoid unnecessary wraps
1577  unsigned Opcode;
1578  switch (Op.getOpcode()) {
1579  case ISD::SHL:
1580    Opcode = WebAssemblyISD::VEC_SHL;
1581    break;
1582  case ISD::SRA:
1583    Opcode = WebAssemblyISD::VEC_SHR_S;
1584    break;
1585  case ISD::SRL:
1586    Opcode = WebAssemblyISD::VEC_SHR_U;
1587    break;
1588  default:
1589    llvm_unreachable("unexpected opcode");
1590  }
1591  APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32);
1592  return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
1593                     DAG.getConstant(Shift, DL, MVT::i32));
1594}
1595
1596//===----------------------------------------------------------------------===//
1597//                          WebAssembly Optimization Hooks
1598//===----------------------------------------------------------------------===//
1599