FastISel.cpp revision 363496
1//===- FastISel.cpp - Implementation of the FastISel class ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the FastISel class.
10//
11// "Fast" instruction selection is designed to emit very poor code quickly.
12// Also, it is not designed to be able to do much lowering, so most illegal
13// types (e.g. i64 on 32-bit targets) and operations are not supported.  It is
14// also not intended to be able to do much optimization, except in a few cases
15// where doing optimizations reduces overall compile time.  For example, folding
16// constants into immediate fields is often done, because it's cheap and it
17// reduces the number of instructions later phases have to examine.
18//
19// "Fast" instruction selection is able to fail gracefully and transfer
20// control to the SelectionDAG selector for operations that it doesn't
21// support.  In many cases, this allows us to avoid duplicating a lot of
22// the complicated lowering logic that SelectionDAG currently has.
23//
24// The intended use for "fast" instruction selection is "-O0" mode
25// compilation, where the quality of the generated code is irrelevant when
26// weighed against the speed at which the code can be generated.  Also,
27// at -O0, the LLVM optimizers are not running, and this makes the
28// compile time of codegen a much higher portion of the overall compile
29// time.  Despite its limitations, "fast" instruction selection is able to
30// handle enough code on its own to provide noticeable overall speedups
31// in -O0 compiles.
32//
33// Basic operations are supported in a target-independent way, by reading
34// the same instruction descriptions that the SelectionDAG selector reads,
35// and identifying simple arithmetic operations that can be directly selected
36// from simple operators.  More complicated operations currently require
37// target-specific code.
38//
39//===----------------------------------------------------------------------===//
40
41#include "llvm/CodeGen/FastISel.h"
42#include "llvm/ADT/APFloat.h"
43#include "llvm/ADT/APSInt.h"
44#include "llvm/ADT/DenseMap.h"
45#include "llvm/ADT/Optional.h"
46#include "llvm/ADT/SmallPtrSet.h"
47#include "llvm/ADT/SmallString.h"
48#include "llvm/ADT/SmallVector.h"
49#include "llvm/ADT/Statistic.h"
50#include "llvm/Analysis/BranchProbabilityInfo.h"
51#include "llvm/Analysis/TargetLibraryInfo.h"
52#include "llvm/CodeGen/Analysis.h"
53#include "llvm/CodeGen/FunctionLoweringInfo.h"
54#include "llvm/CodeGen/ISDOpcodes.h"
55#include "llvm/CodeGen/MachineBasicBlock.h"
56#include "llvm/CodeGen/MachineFrameInfo.h"
57#include "llvm/CodeGen/MachineInstr.h"
58#include "llvm/CodeGen/MachineInstrBuilder.h"
59#include "llvm/CodeGen/MachineMemOperand.h"
60#include "llvm/CodeGen/MachineModuleInfo.h"
61#include "llvm/CodeGen/MachineOperand.h"
62#include "llvm/CodeGen/MachineRegisterInfo.h"
63#include "llvm/CodeGen/StackMaps.h"
64#include "llvm/CodeGen/TargetInstrInfo.h"
65#include "llvm/CodeGen/TargetLowering.h"
66#include "llvm/CodeGen/TargetSubtargetInfo.h"
67#include "llvm/CodeGen/ValueTypes.h"
68#include "llvm/IR/Argument.h"
69#include "llvm/IR/Attributes.h"
70#include "llvm/IR/BasicBlock.h"
71#include "llvm/IR/CallSite.h"
72#include "llvm/IR/CallingConv.h"
73#include "llvm/IR/Constant.h"
74#include "llvm/IR/Constants.h"
75#include "llvm/IR/DataLayout.h"
76#include "llvm/IR/DebugInfo.h"
77#include "llvm/IR/DebugLoc.h"
78#include "llvm/IR/DerivedTypes.h"
79#include "llvm/IR/Function.h"
80#include "llvm/IR/GetElementPtrTypeIterator.h"
81#include "llvm/IR/GlobalValue.h"
82#include "llvm/IR/InlineAsm.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
85#include "llvm/IR/Instructions.h"
86#include "llvm/IR/IntrinsicInst.h"
87#include "llvm/IR/LLVMContext.h"
88#include "llvm/IR/Mangler.h"
89#include "llvm/IR/Metadata.h"
90#include "llvm/IR/Operator.h"
91#include "llvm/IR/PatternMatch.h"
92#include "llvm/IR/Type.h"
93#include "llvm/IR/User.h"
94#include "llvm/IR/Value.h"
95#include "llvm/MC/MCContext.h"
96#include "llvm/MC/MCInstrDesc.h"
97#include "llvm/MC/MCRegisterInfo.h"
98#include "llvm/Support/Casting.h"
99#include "llvm/Support/Debug.h"
100#include "llvm/Support/ErrorHandling.h"
101#include "llvm/Support/MachineValueType.h"
102#include "llvm/Support/MathExtras.h"
103#include "llvm/Support/raw_ostream.h"
104#include "llvm/Target/TargetMachine.h"
105#include "llvm/Target/TargetOptions.h"
106#include <algorithm>
107#include <cassert>
108#include <cstdint>
109#include <iterator>
110#include <utility>
111
112using namespace llvm;
113using namespace PatternMatch;
114
115#define DEBUG_TYPE "isel"
116
117// FIXME: Remove this after the feature has proven reliable.
118static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values",
119                                     cl::init(true), cl::Hidden,
120                                     cl::desc("Sink local values in FastISel"));
121
122STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
123                                         "target-independent selector");
124STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
125                                    "target-specific selector");
126STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
127
128/// Set the current block to which generated machine instructions will be
129/// appended.
130void FastISel::startNewBlock() {
131  assert(LocalValueMap.empty() &&
132         "local values should be cleared after finishing a BB");
133
134  // Instructions are appended to FuncInfo.MBB. If the basic block already
135  // contains labels or copies, use the last instruction as the last local
136  // value.
137  EmitStartPt = nullptr;
138  if (!FuncInfo.MBB->empty())
139    EmitStartPt = &FuncInfo.MBB->back();
140  LastLocalValue = EmitStartPt;
141}
142
143/// Flush the local CSE map and sink anything we can.
144void FastISel::finishBasicBlock() { flushLocalValueMap(); }
145
146bool FastISel::lowerArguments() {
147  if (!FuncInfo.CanLowerReturn)
148    // Fallback to SDISel argument lowering code to deal with sret pointer
149    // parameter.
150    return false;
151
152  if (!fastLowerArguments())
153    return false;
154
155  // Enter arguments into ValueMap for uses in non-entry BBs.
156  for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
157                                    E = FuncInfo.Fn->arg_end();
158       I != E; ++I) {
159    DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I);
160    assert(VI != LocalValueMap.end() && "Missed an argument?");
161    FuncInfo.ValueMap[&*I] = VI->second;
162  }
163  return true;
164}
165
166/// Return the defined register if this instruction defines exactly one
167/// virtual register and uses no other virtual registers. Otherwise return 0.
168static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
169  unsigned RegDef = 0;
170  for (const MachineOperand &MO : MI.operands()) {
171    if (!MO.isReg())
172      continue;
173    if (MO.isDef()) {
174      if (RegDef)
175        return 0;
176      RegDef = MO.getReg();
177    } else if (Register::isVirtualRegister(MO.getReg())) {
178      // This is another use of a vreg. Don't try to sink it.
179      return 0;
180    }
181  }
182  return RegDef;
183}
184
185void FastISel::flushLocalValueMap() {
186  // Try to sink local values down to their first use so that we can give them a
187  // better debug location. This has the side effect of shrinking local value
188  // live ranges, which helps out fast regalloc.
189  if (SinkLocalValues && LastLocalValue != EmitStartPt) {
190    // Sink local value materialization instructions between EmitStartPt and
191    // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to
192    // avoid inserting into the range that we're iterating over.
193    MachineBasicBlock::reverse_iterator RE =
194        EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
195                    : FuncInfo.MBB->rend();
196    MachineBasicBlock::reverse_iterator RI(LastLocalValue);
197
198    InstOrderMap OrderMap;
199    for (; RI != RE;) {
200      MachineInstr &LocalMI = *RI;
201      ++RI;
202      bool Store = true;
203      if (!LocalMI.isSafeToMove(nullptr, Store))
204        continue;
205      unsigned DefReg = findSinkableLocalRegDef(LocalMI);
206      if (DefReg == 0)
207        continue;
208
209      sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap);
210    }
211  }
212
213  LocalValueMap.clear();
214  LastLocalValue = EmitStartPt;
215  recomputeInsertPt();
216  SavedInsertPt = FuncInfo.InsertPt;
217  LastFlushPoint = FuncInfo.InsertPt;
218}
219
220static bool isRegUsedByPhiNodes(unsigned DefReg,
221                                FunctionLoweringInfo &FuncInfo) {
222  for (auto &P : FuncInfo.PHINodesToUpdate)
223    if (P.second == DefReg)
224      return true;
225  return false;
226}
227
228static bool isTerminatingEHLabel(MachineBasicBlock *MBB, MachineInstr &MI) {
229  // Ignore non-EH labels.
230  if (!MI.isEHLabel())
231    return false;
232
233  // Any EH label outside a landing pad must be for an invoke. Consider it a
234  // terminator.
235  if (!MBB->isEHPad())
236    return true;
237
238  // If this is a landingpad, the first non-phi instruction will be an EH_LABEL.
239  // Don't consider that label to be a terminator.
240  return MI.getIterator() != MBB->getFirstNonPHI();
241}
242
243/// Build a map of instruction orders. Return the first terminator and its
244/// order. Consider EH_LABEL instructions to be terminators as well, since local
245/// values for phis after invokes must be materialized before the call.
246void FastISel::InstOrderMap::initialize(
247    MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) {
248  unsigned Order = 0;
249  for (MachineInstr &I : *MBB) {
250    if (!FirstTerminator &&
251        (I.isTerminator() || isTerminatingEHLabel(MBB, I))) {
252      FirstTerminator = &I;
253      FirstTerminatorOrder = Order;
254    }
255    Orders[&I] = Order++;
256
257    // We don't need to order instructions past the last flush point.
258    if (I.getIterator() == LastFlushPoint)
259      break;
260  }
261}
262
263void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI,
264                                             unsigned DefReg,
265                                             InstOrderMap &OrderMap) {
266  // If this register is used by a register fixup, MRI will not contain all
267  // the uses until after register fixups, so don't attempt to sink or DCE
268  // this instruction. Register fixups typically come from no-op cast
269  // instructions, which replace the cast instruction vreg with the local
270  // value vreg.
271  if (FuncInfo.RegsWithFixups.count(DefReg))
272    return;
273
274  // We can DCE this instruction if there are no uses and it wasn't a
275  // materialized for a successor PHI node.
276  bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
277  if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
278    if (EmitStartPt == &LocalMI)
279      EmitStartPt = EmitStartPt->getPrevNode();
280    LLVM_DEBUG(dbgs() << "removing dead local value materialization "
281                      << LocalMI);
282    OrderMap.Orders.erase(&LocalMI);
283    LocalMI.eraseFromParent();
284    return;
285  }
286
287  // Number the instructions if we haven't yet so we can efficiently find the
288  // earliest use.
289  if (OrderMap.Orders.empty())
290    OrderMap.initialize(FuncInfo.MBB, LastFlushPoint);
291
292  // Find the first user in the BB.
293  MachineInstr *FirstUser = nullptr;
294  unsigned FirstOrder = std::numeric_limits<unsigned>::max();
295  for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) {
296    auto I = OrderMap.Orders.find(&UseInst);
297    assert(I != OrderMap.Orders.end() &&
298           "local value used by instruction outside local region");
299    unsigned UseOrder = I->second;
300    if (UseOrder < FirstOrder) {
301      FirstOrder = UseOrder;
302      FirstUser = &UseInst;
303    }
304  }
305
306  // The insertion point will be the first terminator or the first user,
307  // whichever came first. If there was no terminator, this must be a
308  // fallthrough block and the insertion point is the end of the block.
309  MachineBasicBlock::instr_iterator SinkPos;
310  if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) {
311    FirstOrder = OrderMap.FirstTerminatorOrder;
312    SinkPos = OrderMap.FirstTerminator->getIterator();
313  } else if (FirstUser) {
314    SinkPos = FirstUser->getIterator();
315  } else {
316    assert(UsedByPHI && "must be users if not used by a phi");
317    SinkPos = FuncInfo.MBB->instr_end();
318  }
319
320  // Collect all DBG_VALUEs before the new insertion position so that we can
321  // sink them.
322  SmallVector<MachineInstr *, 1> DbgValues;
323  for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) {
324    if (!DbgVal.isDebugValue())
325      continue;
326    unsigned UseOrder = OrderMap.Orders[&DbgVal];
327    if (UseOrder < FirstOrder)
328      DbgValues.push_back(&DbgVal);
329  }
330
331  // Sink LocalMI before SinkPos and assign it the same DebugLoc.
332  LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI);
333  FuncInfo.MBB->remove(&LocalMI);
334  FuncInfo.MBB->insert(SinkPos, &LocalMI);
335  if (SinkPos != FuncInfo.MBB->end())
336    LocalMI.setDebugLoc(SinkPos->getDebugLoc());
337
338  // Sink any debug values that we've collected.
339  for (MachineInstr *DI : DbgValues) {
340    FuncInfo.MBB->remove(DI);
341    FuncInfo.MBB->insert(SinkPos, DI);
342  }
343}
344
345bool FastISel::hasTrivialKill(const Value *V) {
346  // Don't consider constants or arguments to have trivial kills.
347  const Instruction *I = dyn_cast<Instruction>(V);
348  if (!I)
349    return false;
350
351  // No-op casts are trivially coalesced by fast-isel.
352  if (const auto *Cast = dyn_cast<CastInst>(I))
353    if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0)))
354      return false;
355
356  // Even the value might have only one use in the LLVM IR, it is possible that
357  // FastISel might fold the use into another instruction and now there is more
358  // than one use at the Machine Instruction level.
359  unsigned Reg = lookUpRegForValue(V);
360  if (Reg && !MRI.use_empty(Reg))
361    return false;
362
363  // GEPs with all zero indices are trivially coalesced by fast-isel.
364  if (const auto *GEP = dyn_cast<GetElementPtrInst>(I))
365    if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
366      return false;
367
368  // Only instructions with a single use in the same basic block are considered
369  // to have trivial kills.
370  return I->hasOneUse() &&
371         !(I->getOpcode() == Instruction::BitCast ||
372           I->getOpcode() == Instruction::PtrToInt ||
373           I->getOpcode() == Instruction::IntToPtr) &&
374         cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
375}
376
377unsigned FastISel::getRegForValue(const Value *V) {
378  EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true);
379  // Don't handle non-simple values in FastISel.
380  if (!RealVT.isSimple())
381    return 0;
382
383  // Ignore illegal types. We must do this before looking up the value
384  // in ValueMap because Arguments are given virtual registers regardless
385  // of whether FastISel can handle them.
386  MVT VT = RealVT.getSimpleVT();
387  if (!TLI.isTypeLegal(VT)) {
388    // Handle integer promotions, though, because they're common and easy.
389    if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
390      VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
391    else
392      return 0;
393  }
394
395  // Look up the value to see if we already have a register for it.
396  unsigned Reg = lookUpRegForValue(V);
397  if (Reg)
398    return Reg;
399
400  // In bottom-up mode, just create the virtual register which will be used
401  // to hold the value. It will be materialized later.
402  if (isa<Instruction>(V) &&
403      (!isa<AllocaInst>(V) ||
404       !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
405    return FuncInfo.InitializeRegForValue(V);
406
407  SavePoint SaveInsertPt = enterLocalValueArea();
408
409  // Materialize the value in a register. Emit any instructions in the
410  // local value area.
411  Reg = materializeRegForValue(V, VT);
412
413  leaveLocalValueArea(SaveInsertPt);
414
415  return Reg;
416}
417
418unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
419  unsigned Reg = 0;
420  if (const auto *CI = dyn_cast<ConstantInt>(V)) {
421    if (CI->getValue().getActiveBits() <= 64)
422      Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
423  } else if (isa<AllocaInst>(V))
424    Reg = fastMaterializeAlloca(cast<AllocaInst>(V));
425  else if (isa<ConstantPointerNull>(V))
426    // Translate this as an integer zero so that it can be
427    // local-CSE'd with actual integer zeros.
428    Reg =
429        getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getType())));
430  else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
431    if (CF->isNullValue())
432      Reg = fastMaterializeFloatZero(CF);
433    else
434      // Try to emit the constant directly.
435      Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF);
436
437    if (!Reg) {
438      // Try to emit the constant by using an integer constant with a cast.
439      const APFloat &Flt = CF->getValueAPF();
440      EVT IntVT = TLI.getPointerTy(DL);
441      uint32_t IntBitWidth = IntVT.getSizeInBits();
442      APSInt SIntVal(IntBitWidth, /*isUnsigned=*/false);
443      bool isExact;
444      (void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact);
445      if (isExact) {
446        unsigned IntegerReg =
447            getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
448        if (IntegerReg != 0)
449          Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
450                           /*Kill=*/false);
451      }
452    }
453  } else if (const auto *Op = dyn_cast<Operator>(V)) {
454    if (!selectOperator(Op, Op->getOpcode()))
455      if (!isa<Instruction>(Op) ||
456          !fastSelectInstruction(cast<Instruction>(Op)))
457        return 0;
458    Reg = lookUpRegForValue(Op);
459  } else if (isa<UndefValue>(V)) {
460    Reg = createResultReg(TLI.getRegClassFor(VT));
461    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
462            TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
463  }
464  return Reg;
465}
466
467/// Helper for getRegForValue. This function is called when the value isn't
468/// already available in a register and must be materialized with new
469/// instructions.
470unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
471  unsigned Reg = 0;
472  // Give the target-specific code a try first.
473  if (isa<Constant>(V))
474    Reg = fastMaterializeConstant(cast<Constant>(V));
475
476  // If target-specific code couldn't or didn't want to handle the value, then
477  // give target-independent code a try.
478  if (!Reg)
479    Reg = materializeConstant(V, VT);
480
481  // Don't cache constant materializations in the general ValueMap.
482  // To do so would require tracking what uses they dominate.
483  if (Reg) {
484    LocalValueMap[V] = Reg;
485    LastLocalValue = MRI.getVRegDef(Reg);
486  }
487  return Reg;
488}
489
490unsigned FastISel::lookUpRegForValue(const Value *V) {
491  // Look up the value to see if we already have a register for it. We
492  // cache values defined by Instructions across blocks, and other values
493  // only locally. This is because Instructions already have the SSA
494  // def-dominates-use requirement enforced.
495  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
496  if (I != FuncInfo.ValueMap.end())
497    return I->second;
498  return LocalValueMap[V];
499}
500
501void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
502  if (!isa<Instruction>(I)) {
503    LocalValueMap[I] = Reg;
504    return;
505  }
506
507  unsigned &AssignedReg = FuncInfo.ValueMap[I];
508  if (AssignedReg == 0)
509    // Use the new register.
510    AssignedReg = Reg;
511  else if (Reg != AssignedReg) {
512    // Arrange for uses of AssignedReg to be replaced by uses of Reg.
513    for (unsigned i = 0; i < NumRegs; i++) {
514      FuncInfo.RegFixups[AssignedReg + i] = Reg + i;
515      FuncInfo.RegsWithFixups.insert(Reg + i);
516    }
517
518    AssignedReg = Reg;
519  }
520}
521
522std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
523  unsigned IdxN = getRegForValue(Idx);
524  if (IdxN == 0)
525    // Unhandled operand. Halt "fast" selection and bail.
526    return std::pair<unsigned, bool>(0, false);
527
528  bool IdxNIsKill = hasTrivialKill(Idx);
529
530  // If the index is smaller or larger than intptr_t, truncate or extend it.
531  MVT PtrVT = TLI.getPointerTy(DL);
532  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
533  if (IdxVT.bitsLT(PtrVT)) {
534    IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN,
535                      IdxNIsKill);
536    IdxNIsKill = true;
537  } else if (IdxVT.bitsGT(PtrVT)) {
538    IdxN =
539        fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill);
540    IdxNIsKill = true;
541  }
542  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
543}
544
545void FastISel::recomputeInsertPt() {
546  if (getLastLocalValue()) {
547    FuncInfo.InsertPt = getLastLocalValue();
548    FuncInfo.MBB = FuncInfo.InsertPt->getParent();
549    ++FuncInfo.InsertPt;
550  } else
551    FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
552
553  // Now skip past any EH_LABELs, which must remain at the beginning.
554  while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
555         FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
556    ++FuncInfo.InsertPt;
557}
558
559void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
560                              MachineBasicBlock::iterator E) {
561  assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
562         "Invalid iterator!");
563  while (I != E) {
564    if (LastFlushPoint == I)
565      LastFlushPoint = E;
566    if (SavedInsertPt == I)
567      SavedInsertPt = E;
568    if (EmitStartPt == I)
569      EmitStartPt = E.isValid() ? &*E : nullptr;
570    if (LastLocalValue == I)
571      LastLocalValue = E.isValid() ? &*E : nullptr;
572
573    MachineInstr *Dead = &*I;
574    ++I;
575    Dead->eraseFromParent();
576    ++NumFastIselDead;
577  }
578  recomputeInsertPt();
579}
580
581FastISel::SavePoint FastISel::enterLocalValueArea() {
582  MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
583  DebugLoc OldDL = DbgLoc;
584  recomputeInsertPt();
585  DbgLoc = DebugLoc();
586  SavePoint SP = {OldInsertPt, OldDL};
587  return SP;
588}
589
590void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
591  if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
592    LastLocalValue = &*std::prev(FuncInfo.InsertPt);
593
594  // Restore the previous insert position.
595  FuncInfo.InsertPt = OldInsertPt.InsertPt;
596  DbgLoc = OldInsertPt.DL;
597}
598
599bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
600  EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
601  if (VT == MVT::Other || !VT.isSimple())
602    // Unhandled type. Halt "fast" selection and bail.
603    return false;
604
605  // We only handle legal types. For example, on x86-32 the instruction
606  // selector contains all of the 64-bit instructions from x86-64,
607  // under the assumption that i64 won't be used if the target doesn't
608  // support it.
609  if (!TLI.isTypeLegal(VT)) {
610    // MVT::i1 is special. Allow AND, OR, or XOR because they
611    // don't require additional zeroing, which makes them easy.
612    if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
613                          ISDOpcode == ISD::XOR))
614      VT = TLI.getTypeToTransformTo(I->getContext(), VT);
615    else
616      return false;
617  }
618
619  // Check if the first operand is a constant, and handle it as "ri".  At -O0,
620  // we don't have anything that canonicalizes operand order.
621  if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
622    if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
623      unsigned Op1 = getRegForValue(I->getOperand(1));
624      if (!Op1)
625        return false;
626      bool Op1IsKill = hasTrivialKill(I->getOperand(1));
627
628      unsigned ResultReg =
629          fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill,
630                       CI->getZExtValue(), VT.getSimpleVT());
631      if (!ResultReg)
632        return false;
633
634      // We successfully emitted code for the given LLVM Instruction.
635      updateValueMap(I, ResultReg);
636      return true;
637    }
638
639  unsigned Op0 = getRegForValue(I->getOperand(0));
640  if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
641    return false;
642  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
643
644  // Check if the second operand is a constant and handle it appropriately.
645  if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
646    uint64_t Imm = CI->getSExtValue();
647
648    // Transform "sdiv exact X, 8" -> "sra X, 3".
649    if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
650        cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) {
651      Imm = Log2_64(Imm);
652      ISDOpcode = ISD::SRA;
653    }
654
655    // Transform "urem x, pow2" -> "and x, pow2-1".
656    if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
657        isPowerOf2_64(Imm)) {
658      --Imm;
659      ISDOpcode = ISD::AND;
660    }
661
662    unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
663                                      Op0IsKill, Imm, VT.getSimpleVT());
664    if (!ResultReg)
665      return false;
666
667    // We successfully emitted code for the given LLVM Instruction.
668    updateValueMap(I, ResultReg);
669    return true;
670  }
671
672  unsigned Op1 = getRegForValue(I->getOperand(1));
673  if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
674    return false;
675  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
676
677  // Now we have both operands in registers. Emit the instruction.
678  unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
679                                   ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill);
680  if (!ResultReg)
681    // Target-specific code wasn't able to find a machine opcode for
682    // the given ISD opcode and type. Halt "fast" selection and bail.
683    return false;
684
685  // We successfully emitted code for the given LLVM Instruction.
686  updateValueMap(I, ResultReg);
687  return true;
688}
689
690bool FastISel::selectGetElementPtr(const User *I) {
691  unsigned N = getRegForValue(I->getOperand(0));
692  if (!N) // Unhandled operand. Halt "fast" selection and bail.
693    return false;
694  bool NIsKill = hasTrivialKill(I->getOperand(0));
695
696  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
697  // into a single N = N + TotalOffset.
698  uint64_t TotalOffs = 0;
699  // FIXME: What's a good SWAG number for MaxOffs?
700  uint64_t MaxOffs = 2048;
701  MVT VT = TLI.getPointerTy(DL);
702  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
703       GTI != E; ++GTI) {
704    const Value *Idx = GTI.getOperand();
705    if (StructType *StTy = GTI.getStructTypeOrNull()) {
706      uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
707      if (Field) {
708        // N = N + Offset
709        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
710        if (TotalOffs >= MaxOffs) {
711          N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
712          if (!N) // Unhandled operand. Halt "fast" selection and bail.
713            return false;
714          NIsKill = true;
715          TotalOffs = 0;
716        }
717      }
718    } else {
719      Type *Ty = GTI.getIndexedType();
720
721      // If this is a constant subscript, handle it quickly.
722      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
723        if (CI->isZero())
724          continue;
725        // N = N + Offset
726        uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
727        TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
728        if (TotalOffs >= MaxOffs) {
729          N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
730          if (!N) // Unhandled operand. Halt "fast" selection and bail.
731            return false;
732          NIsKill = true;
733          TotalOffs = 0;
734        }
735        continue;
736      }
737      if (TotalOffs) {
738        N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
739        if (!N) // Unhandled operand. Halt "fast" selection and bail.
740          return false;
741        NIsKill = true;
742        TotalOffs = 0;
743      }
744
745      // N = N + Idx * ElementSize;
746      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
747      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
748      unsigned IdxN = Pair.first;
749      bool IdxNIsKill = Pair.second;
750      if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
751        return false;
752
753      if (ElementSize != 1) {
754        IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
755        if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
756          return false;
757        IdxNIsKill = true;
758      }
759      N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
760      if (!N) // Unhandled operand. Halt "fast" selection and bail.
761        return false;
762    }
763  }
764  if (TotalOffs) {
765    N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
766    if (!N) // Unhandled operand. Halt "fast" selection and bail.
767      return false;
768  }
769
770  // We successfully emitted code for the given LLVM Instruction.
771  updateValueMap(I, N);
772  return true;
773}
774
775bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
776                                   const CallInst *CI, unsigned StartIdx) {
777  for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
778    Value *Val = CI->getArgOperand(i);
779    // Check for constants and encode them with a StackMaps::ConstantOp prefix.
780    if (const auto *C = dyn_cast<ConstantInt>(Val)) {
781      Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
782      Ops.push_back(MachineOperand::CreateImm(C->getSExtValue()));
783    } else if (isa<ConstantPointerNull>(Val)) {
784      Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
785      Ops.push_back(MachineOperand::CreateImm(0));
786    } else if (auto *AI = dyn_cast<AllocaInst>(Val)) {
787      // Values coming from a stack location also require a special encoding,
788      // but that is added later on by the target specific frame index
789      // elimination implementation.
790      auto SI = FuncInfo.StaticAllocaMap.find(AI);
791      if (SI != FuncInfo.StaticAllocaMap.end())
792        Ops.push_back(MachineOperand::CreateFI(SI->second));
793      else
794        return false;
795    } else {
796      unsigned Reg = getRegForValue(Val);
797      if (!Reg)
798        return false;
799      Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
800    }
801  }
802  return true;
803}
804
805bool FastISel::selectStackmap(const CallInst *I) {
806  // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
807  //                                  [live variables...])
808  assert(I->getCalledFunction()->getReturnType()->isVoidTy() &&
809         "Stackmap cannot return a value.");
810
811  // The stackmap intrinsic only records the live variables (the arguments
812  // passed to it) and emits NOPS (if requested). Unlike the patchpoint
813  // intrinsic, this won't be lowered to a function call. This means we don't
814  // have to worry about calling conventions and target-specific lowering code.
815  // Instead we perform the call lowering right here.
816  //
817  // CALLSEQ_START(0, 0...)
818  // STACKMAP(id, nbytes, ...)
819  // CALLSEQ_END(0, 0)
820  //
821  SmallVector<MachineOperand, 32> Ops;
822
823  // Add the <id> and <numBytes> constants.
824  assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
825         "Expected a constant integer.");
826  const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
827  Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
828
829  assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
830         "Expected a constant integer.");
831  const auto *NumBytes =
832      cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
833  Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
834
835  // Push live variables for the stack map (skipping the first two arguments
836  // <id> and <numBytes>).
837  if (!addStackMapLiveVars(Ops, I, 2))
838    return false;
839
840  // We are not adding any register mask info here, because the stackmap doesn't
841  // clobber anything.
842
843  // Add scratch registers as implicit def and early clobber.
844  CallingConv::ID CC = I->getCallingConv();
845  const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
846  for (unsigned i = 0; ScratchRegs[i]; ++i)
847    Ops.push_back(MachineOperand::CreateReg(
848        ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
849        /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
850
851  // Issue CALLSEQ_START
852  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
853  auto Builder =
854      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown));
855  const MCInstrDesc &MCID = Builder.getInstr()->getDesc();
856  for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I)
857    Builder.addImm(0);
858
859  // Issue STACKMAP.
860  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
861                                    TII.get(TargetOpcode::STACKMAP));
862  for (auto const &MO : Ops)
863    MIB.add(MO);
864
865  // Issue CALLSEQ_END
866  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
867  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
868      .addImm(0)
869      .addImm(0);
870
871  // Inform the Frame Information that we have a stackmap in this function.
872  FuncInfo.MF->getFrameInfo().setHasStackMap();
873
874  return true;
875}
876
877/// Lower an argument list according to the target calling convention.
878///
879/// This is a helper for lowering intrinsics that follow a target calling
880/// convention or require stack pointer adjustment. Only a subset of the
881/// intrinsic's operands need to participate in the calling convention.
882bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
883                                 unsigned NumArgs, const Value *Callee,
884                                 bool ForceRetVoidTy, CallLoweringInfo &CLI) {
885  ArgListTy Args;
886  Args.reserve(NumArgs);
887
888  // Populate the argument list.
889  ImmutableCallSite CS(CI);
890  for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) {
891    Value *V = CI->getOperand(ArgI);
892
893    assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
894
895    ArgListEntry Entry;
896    Entry.Val = V;
897    Entry.Ty = V->getType();
898    Entry.setAttributes(&CS, ArgI);
899    Args.push_back(Entry);
900  }
901
902  Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext())
903                               : CI->getType();
904  CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs);
905
906  return lowerCallTo(CLI);
907}
908
909FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee(
910    const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy,
911    StringRef Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
912  SmallString<32> MangledName;
913  Mangler::getNameWithPrefix(MangledName, Target, DL);
914  MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
915  return setCallee(CC, ResultTy, Sym, std::move(ArgsList), FixedArgs);
916}
917
918bool FastISel::selectPatchpoint(const CallInst *I) {
919  // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
920  //                                                 i32 <numBytes>,
921  //                                                 i8* <target>,
922  //                                                 i32 <numArgs>,
923  //                                                 [Args...],
924  //                                                 [live variables...])
925  CallingConv::ID CC = I->getCallingConv();
926  bool IsAnyRegCC = CC == CallingConv::AnyReg;
927  bool HasDef = !I->getType()->isVoidTy();
928  Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts();
929
930  // Get the real number of arguments participating in the call <numArgs>
931  assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) &&
932         "Expected a constant integer.");
933  const auto *NumArgsVal =
934      cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos));
935  unsigned NumArgs = NumArgsVal->getZExtValue();
936
937  // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
938  // This includes all meta-operands up to but not including CC.
939  unsigned NumMetaOpers = PatchPointOpers::CCPos;
940  assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs &&
941         "Not enough arguments provided to the patchpoint intrinsic");
942
943  // For AnyRegCC the arguments are lowered later on manually.
944  unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
945  CallLoweringInfo CLI;
946  CLI.setIsPatchPoint();
947  if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI))
948    return false;
949
950  assert(CLI.Call && "No call instruction specified.");
951
952  SmallVector<MachineOperand, 32> Ops;
953
954  // Add an explicit result reg if we use the anyreg calling convention.
955  if (IsAnyRegCC && HasDef) {
956    assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
957    CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
958    CLI.NumResultRegs = 1;
959    Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true));
960  }
961
962  // Add the <id> and <numBytes> constants.
963  assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
964         "Expected a constant integer.");
965  const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
966  Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
967
968  assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
969         "Expected a constant integer.");
970  const auto *NumBytes =
971      cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
972  Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
973
974  // Add the call target.
975  if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) {
976    uint64_t CalleeConstAddr =
977      cast<ConstantInt>(C->getOperand(0))->getZExtValue();
978    Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
979  } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) {
980    if (C->getOpcode() == Instruction::IntToPtr) {
981      uint64_t CalleeConstAddr =
982        cast<ConstantInt>(C->getOperand(0))->getZExtValue();
983      Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
984    } else
985      llvm_unreachable("Unsupported ConstantExpr.");
986  } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) {
987    Ops.push_back(MachineOperand::CreateGA(GV, 0));
988  } else if (isa<ConstantPointerNull>(Callee))
989    Ops.push_back(MachineOperand::CreateImm(0));
990  else
991    llvm_unreachable("Unsupported callee address.");
992
993  // Adjust <numArgs> to account for any arguments that have been passed on
994  // the stack instead.
995  unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size();
996  Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs));
997
998  // Add the calling convention
999  Ops.push_back(MachineOperand::CreateImm((unsigned)CC));
1000
1001  // Add the arguments we omitted previously. The register allocator should
1002  // place these in any free register.
1003  if (IsAnyRegCC) {
1004    for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) {
1005      unsigned Reg = getRegForValue(I->getArgOperand(i));
1006      if (!Reg)
1007        return false;
1008      Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1009    }
1010  }
1011
1012  // Push the arguments from the call instruction.
1013  for (auto Reg : CLI.OutRegs)
1014    Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1015
1016  // Push live variables for the stack map.
1017  if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs))
1018    return false;
1019
1020  // Push the register mask info.
1021  Ops.push_back(MachineOperand::CreateRegMask(
1022      TRI.getCallPreservedMask(*FuncInfo.MF, CC)));
1023
1024  // Add scratch registers as implicit def and early clobber.
1025  const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
1026  for (unsigned i = 0; ScratchRegs[i]; ++i)
1027    Ops.push_back(MachineOperand::CreateReg(
1028        ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
1029        /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
1030
1031  // Add implicit defs (return values).
1032  for (auto Reg : CLI.InRegs)
1033    Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true,
1034                                            /*isImp=*/true));
1035
1036  // Insert the patchpoint instruction before the call generated by the target.
1037  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
1038                                    TII.get(TargetOpcode::PATCHPOINT));
1039
1040  for (auto &MO : Ops)
1041    MIB.add(MO);
1042
1043  MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI);
1044
1045  // Delete the original call instruction.
1046  CLI.Call->eraseFromParent();
1047
1048  // Inform the Frame Information that we have a patchpoint in this function.
1049  FuncInfo.MF->getFrameInfo().setHasPatchPoint();
1050
1051  if (CLI.NumResultRegs)
1052    updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs);
1053  return true;
1054}
1055
1056bool FastISel::selectXRayCustomEvent(const CallInst *I) {
1057  const auto &Triple = TM.getTargetTriple();
1058  if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
1059    return true; // don't do anything to this instruction.
1060  SmallVector<MachineOperand, 8> Ops;
1061  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
1062                                          /*isDef=*/false));
1063  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
1064                                          /*isDef=*/false));
1065  MachineInstrBuilder MIB =
1066      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1067              TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
1068  for (auto &MO : Ops)
1069    MIB.add(MO);
1070
1071  // Insert the Patchable Event Call instruction, that gets lowered properly.
1072  return true;
1073}
1074
1075bool FastISel::selectXRayTypedEvent(const CallInst *I) {
1076  const auto &Triple = TM.getTargetTriple();
1077  if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
1078    return true; // don't do anything to this instruction.
1079  SmallVector<MachineOperand, 8> Ops;
1080  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
1081                                          /*isDef=*/false));
1082  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
1083                                          /*isDef=*/false));
1084  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
1085                                          /*isDef=*/false));
1086  MachineInstrBuilder MIB =
1087      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1088              TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
1089  for (auto &MO : Ops)
1090    MIB.add(MO);
1091
1092  // Insert the Patchable Typed Event Call instruction, that gets lowered properly.
1093  return true;
1094}
1095
1096/// Returns an AttributeList representing the attributes applied to the return
1097/// value of the given call.
1098static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
1099  SmallVector<Attribute::AttrKind, 2> Attrs;
1100  if (CLI.RetSExt)
1101    Attrs.push_back(Attribute::SExt);
1102  if (CLI.RetZExt)
1103    Attrs.push_back(Attribute::ZExt);
1104  if (CLI.IsInReg)
1105    Attrs.push_back(Attribute::InReg);
1106
1107  return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
1108                            Attrs);
1109}
1110
1111bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
1112                           unsigned NumArgs) {
1113  MCContext &Ctx = MF->getContext();
1114  SmallString<32> MangledName;
1115  Mangler::getNameWithPrefix(MangledName, SymName, DL);
1116  MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
1117  return lowerCallTo(CI, Sym, NumArgs);
1118}
1119
1120bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
1121                           unsigned NumArgs) {
1122  ImmutableCallSite CS(CI);
1123
1124  FunctionType *FTy = CS.getFunctionType();
1125  Type *RetTy = CS.getType();
1126
1127  ArgListTy Args;
1128  Args.reserve(NumArgs);
1129
1130  // Populate the argument list.
1131  // Attributes for args start at offset 1, after the return attribute.
1132  for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) {
1133    Value *V = CI->getOperand(ArgI);
1134
1135    assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
1136
1137    ArgListEntry Entry;
1138    Entry.Val = V;
1139    Entry.Ty = V->getType();
1140    Entry.setAttributes(&CS, ArgI);
1141    Args.push_back(Entry);
1142  }
1143  TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args);
1144
1145  CallLoweringInfo CLI;
1146  CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs);
1147
1148  return lowerCallTo(CLI);
1149}
1150
1151bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
1152  // Handle the incoming return values from the call.
1153  CLI.clearIns();
1154  SmallVector<EVT, 4> RetTys;
1155  ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys);
1156
1157  SmallVector<ISD::OutputArg, 4> Outs;
1158  GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);
1159
1160  bool CanLowerReturn = TLI.CanLowerReturn(
1161      CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext());
1162
1163  // FIXME: sret demotion isn't supported yet - bail out.
1164  if (!CanLowerReturn)
1165    return false;
1166
1167  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
1168    EVT VT = RetTys[I];
1169    MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT);
1170    unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT);
1171    for (unsigned i = 0; i != NumRegs; ++i) {
1172      ISD::InputArg MyFlags;
1173      MyFlags.VT = RegisterVT;
1174      MyFlags.ArgVT = VT;
1175      MyFlags.Used = CLI.IsReturnValueUsed;
1176      if (CLI.RetSExt)
1177        MyFlags.Flags.setSExt();
1178      if (CLI.RetZExt)
1179        MyFlags.Flags.setZExt();
1180      if (CLI.IsInReg)
1181        MyFlags.Flags.setInReg();
1182      CLI.Ins.push_back(MyFlags);
1183    }
1184  }
1185
1186  // Handle all of the outgoing arguments.
1187  CLI.clearOuts();
1188  for (auto &Arg : CLI.getArgs()) {
1189    Type *FinalType = Arg.Ty;
1190    if (Arg.IsByVal)
1191      FinalType = cast<PointerType>(Arg.Ty)->getElementType();
1192    bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
1193        FinalType, CLI.CallConv, CLI.IsVarArg);
1194
1195    ISD::ArgFlagsTy Flags;
1196    if (Arg.IsZExt)
1197      Flags.setZExt();
1198    if (Arg.IsSExt)
1199      Flags.setSExt();
1200    if (Arg.IsInReg)
1201      Flags.setInReg();
1202    if (Arg.IsSRet)
1203      Flags.setSRet();
1204    if (Arg.IsSwiftSelf)
1205      Flags.setSwiftSelf();
1206    if (Arg.IsSwiftError)
1207      Flags.setSwiftError();
1208    if (Arg.IsCFGuardTarget)
1209      Flags.setCFGuardTarget();
1210    if (Arg.IsByVal)
1211      Flags.setByVal();
1212    if (Arg.IsInAlloca) {
1213      Flags.setInAlloca();
1214      // Set the byval flag for CCAssignFn callbacks that don't know about
1215      // inalloca. This way we can know how many bytes we should've allocated
1216      // and how many bytes a callee cleanup function will pop.  If we port
1217      // inalloca to more targets, we'll have to add custom inalloca handling in
1218      // the various CC lowering callbacks.
1219      Flags.setByVal();
1220    }
1221    if (Arg.IsByVal || Arg.IsInAlloca) {
1222      PointerType *Ty = cast<PointerType>(Arg.Ty);
1223      Type *ElementTy = Ty->getElementType();
1224      unsigned FrameSize =
1225          DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy);
1226
1227      // For ByVal, alignment should come from FE. BE will guess if this info
1228      // is not there, but there are cases it cannot get right.
1229      unsigned FrameAlign = Arg.Alignment;
1230      if (!FrameAlign)
1231        FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
1232      Flags.setByValSize(FrameSize);
1233      Flags.setByValAlign(Align(FrameAlign));
1234    }
1235    if (Arg.IsNest)
1236      Flags.setNest();
1237    if (NeedsRegBlock)
1238      Flags.setInConsecutiveRegs();
1239    Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));
1240
1241    CLI.OutVals.push_back(Arg.Val);
1242    CLI.OutFlags.push_back(Flags);
1243  }
1244
1245  if (!fastLowerCall(CLI))
1246    return false;
1247
1248  // Set all unused physreg defs as dead.
1249  assert(CLI.Call && "No call instruction specified.");
1250  CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI);
1251
1252  if (CLI.NumResultRegs && CLI.CS)
1253    updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
1254
1255  // Set labels for heapallocsite call.
1256  if (CLI.CS)
1257    if (MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"))
1258      CLI.Call->setHeapAllocMarker(*MF, MD);
1259
1260  return true;
1261}
1262
1263bool FastISel::lowerCall(const CallInst *CI) {
1264  ImmutableCallSite CS(CI);
1265
1266  FunctionType *FuncTy = CS.getFunctionType();
1267  Type *RetTy = CS.getType();
1268
1269  ArgListTy Args;
1270  ArgListEntry Entry;
1271  Args.reserve(CS.arg_size());
1272
1273  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1274       i != e; ++i) {
1275    Value *V = *i;
1276
1277    // Skip empty types
1278    if (V->getType()->isEmptyTy())
1279      continue;
1280
1281    Entry.Val = V;
1282    Entry.Ty = V->getType();
1283
1284    // Skip the first return-type Attribute to get to params.
1285    Entry.setAttributes(&CS, i - CS.arg_begin());
1286    Args.push_back(Entry);
1287  }
1288
1289  // Check if target-independent constraints permit a tail call here.
1290  // Target-dependent constraints are checked within fastLowerCall.
1291  bool IsTailCall = CI->isTailCall();
1292  if (IsTailCall && !isInTailCallPosition(CS, TM))
1293    IsTailCall = false;
1294  if (IsTailCall && MF->getFunction()
1295                            .getFnAttribute("disable-tail-calls")
1296                            .getValueAsString() == "true")
1297    IsTailCall = false;
1298
1299  CallLoweringInfo CLI;
1300  CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS)
1301      .setTailCall(IsTailCall);
1302
1303  return lowerCallTo(CLI);
1304}
1305
1306bool FastISel::selectCall(const User *I) {
1307  const CallInst *Call = cast<CallInst>(I);
1308
1309  // Handle simple inline asms.
1310  if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
1311    // If the inline asm has side effects, then make sure that no local value
1312    // lives across by flushing the local value map.
1313    if (IA->hasSideEffects())
1314      flushLocalValueMap();
1315
1316    // Don't attempt to handle constraints.
1317    if (!IA->getConstraintString().empty())
1318      return false;
1319
1320    unsigned ExtraInfo = 0;
1321    if (IA->hasSideEffects())
1322      ExtraInfo |= InlineAsm::Extra_HasSideEffects;
1323    if (IA->isAlignStack())
1324      ExtraInfo |= InlineAsm::Extra_IsAlignStack;
1325    ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
1326
1327    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1328            TII.get(TargetOpcode::INLINEASM))
1329        .addExternalSymbol(IA->getAsmString().c_str())
1330        .addImm(ExtraInfo);
1331    return true;
1332  }
1333
1334  // Handle intrinsic function calls.
1335  if (const auto *II = dyn_cast<IntrinsicInst>(Call))
1336    return selectIntrinsicCall(II);
1337
1338  // Usually, it does not make sense to initialize a value,
1339  // make an unrelated function call and use the value, because
1340  // it tends to be spilled on the stack. So, we move the pointer
1341  // to the last local value to the beginning of the block, so that
1342  // all the values which have already been materialized,
1343  // appear after the call. It also makes sense to skip intrinsics
1344  // since they tend to be inlined.
1345  flushLocalValueMap();
1346
1347  return lowerCall(Call);
1348}
1349
1350bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
1351  switch (II->getIntrinsicID()) {
1352  default:
1353    break;
1354  // At -O0 we don't care about the lifetime intrinsics.
1355  case Intrinsic::lifetime_start:
1356  case Intrinsic::lifetime_end:
1357  // The donothing intrinsic does, well, nothing.
1358  case Intrinsic::donothing:
1359  // Neither does the sideeffect intrinsic.
1360  case Intrinsic::sideeffect:
1361  // Neither does the assume intrinsic; it's also OK not to codegen its operand.
1362  case Intrinsic::assume:
1363    return true;
1364  case Intrinsic::dbg_declare: {
1365    const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
1366    assert(DI->getVariable() && "Missing variable");
1367    if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
1368      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1369      return true;
1370    }
1371
1372    const Value *Address = DI->getAddress();
1373    if (!Address || isa<UndefValue>(Address)) {
1374      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1375      return true;
1376    }
1377
1378    // Byval arguments with frame indices were already handled after argument
1379    // lowering and before isel.
1380    const auto *Arg =
1381        dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
1382    if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
1383      return true;
1384
1385    Optional<MachineOperand> Op;
1386    if (unsigned Reg = lookUpRegForValue(Address))
1387      Op = MachineOperand::CreateReg(Reg, false);
1388
1389    // If we have a VLA that has a "use" in a metadata node that's then used
1390    // here but it has no other uses, then we have a problem. E.g.,
1391    //
1392    //   int foo (const int *x) {
1393    //     char a[*x];
1394    //     return 0;
1395    //   }
1396    //
1397    // If we assign 'a' a vreg and fast isel later on has to use the selection
1398    // DAG isel, it will want to copy the value to the vreg. However, there are
1399    // no uses, which goes counter to what selection DAG isel expects.
1400    if (!Op && !Address->use_empty() && isa<Instruction>(Address) &&
1401        (!isa<AllocaInst>(Address) ||
1402         !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
1403      Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address),
1404                                     false);
1405
1406    if (Op) {
1407      assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
1408             "Expected inlined-at fields to agree");
1409      // A dbg.declare describes the address of a source variable, so lower it
1410      // into an indirect DBG_VALUE.
1411      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1412              TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
1413              *Op, DI->getVariable(), DI->getExpression());
1414    } else {
1415      // We can't yet handle anything else here because it would require
1416      // generating code, thus altering codegen because of debug info.
1417      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1418    }
1419    return true;
1420  }
1421  case Intrinsic::dbg_value: {
1422    // This form of DBG_VALUE is target-independent.
1423    const DbgValueInst *DI = cast<DbgValueInst>(II);
1424    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
1425    const Value *V = DI->getValue();
1426    assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
1427           "Expected inlined-at fields to agree");
1428    if (!V) {
1429      // Currently the optimizer can produce this; insert an undef to
1430      // help debugging.  Probably the optimizer should not do this.
1431      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
1432              DI->getVariable(), DI->getExpression());
1433    } else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
1434      if (CI->getBitWidth() > 64)
1435        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1436            .addCImm(CI)
1437            .addImm(0U)
1438            .addMetadata(DI->getVariable())
1439            .addMetadata(DI->getExpression());
1440      else
1441        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1442            .addImm(CI->getZExtValue())
1443            .addImm(0U)
1444            .addMetadata(DI->getVariable())
1445            .addMetadata(DI->getExpression());
1446    } else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
1447      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1448          .addFPImm(CF)
1449          .addImm(0U)
1450          .addMetadata(DI->getVariable())
1451          .addMetadata(DI->getExpression());
1452    } else if (unsigned Reg = lookUpRegForValue(V)) {
1453      // FIXME: This does not handle register-indirect values at offset 0.
1454      bool IsIndirect = false;
1455      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
1456              DI->getVariable(), DI->getExpression());
1457    } else {
1458      // We can't yet handle anything else here because it would require
1459      // generating code, thus altering codegen because of debug info.
1460      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1461    }
1462    return true;
1463  }
1464  case Intrinsic::dbg_label: {
1465    const DbgLabelInst *DI = cast<DbgLabelInst>(II);
1466    assert(DI->getLabel() && "Missing label");
1467    if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
1468      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1469      return true;
1470    }
1471
1472    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1473            TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
1474    return true;
1475  }
1476  case Intrinsic::objectsize:
1477    llvm_unreachable("llvm.objectsize.* should have been lowered already");
1478
1479  case Intrinsic::is_constant:
1480    llvm_unreachable("llvm.is.constant.* should have been lowered already");
1481
1482  case Intrinsic::launder_invariant_group:
1483  case Intrinsic::strip_invariant_group:
1484  case Intrinsic::expect: {
1485    unsigned ResultReg = getRegForValue(II->getArgOperand(0));
1486    if (!ResultReg)
1487      return false;
1488    updateValueMap(II, ResultReg);
1489    return true;
1490  }
1491  case Intrinsic::experimental_stackmap:
1492    return selectStackmap(II);
1493  case Intrinsic::experimental_patchpoint_void:
1494  case Intrinsic::experimental_patchpoint_i64:
1495    return selectPatchpoint(II);
1496
1497  case Intrinsic::xray_customevent:
1498    return selectXRayCustomEvent(II);
1499  case Intrinsic::xray_typedevent:
1500    return selectXRayTypedEvent(II);
1501  }
1502
1503  return fastLowerIntrinsicCall(II);
1504}
1505
1506bool FastISel::selectCast(const User *I, unsigned Opcode) {
1507  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
1508  EVT DstVT = TLI.getValueType(DL, I->getType());
1509
1510  if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other ||
1511      !DstVT.isSimple())
1512    // Unhandled type. Halt "fast" selection and bail.
1513    return false;
1514
1515  // Check if the destination type is legal.
1516  if (!TLI.isTypeLegal(DstVT))
1517    return false;
1518
1519  // Check if the source operand is legal.
1520  if (!TLI.isTypeLegal(SrcVT))
1521    return false;
1522
1523  unsigned InputReg = getRegForValue(I->getOperand(0));
1524  if (!InputReg)
1525    // Unhandled operand.  Halt "fast" selection and bail.
1526    return false;
1527
1528  bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
1529
1530  unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
1531                                  Opcode, InputReg, InputRegIsKill);
1532  if (!ResultReg)
1533    return false;
1534
1535  updateValueMap(I, ResultReg);
1536  return true;
1537}
1538
1539bool FastISel::selectBitCast(const User *I) {
1540  // If the bitcast doesn't change the type, just use the operand value.
1541  if (I->getType() == I->getOperand(0)->getType()) {
1542    unsigned Reg = getRegForValue(I->getOperand(0));
1543    if (!Reg)
1544      return false;
1545    updateValueMap(I, Reg);
1546    return true;
1547  }
1548
1549  // Bitcasts of other values become reg-reg copies or BITCAST operators.
1550  EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType());
1551  EVT DstEVT = TLI.getValueType(DL, I->getType());
1552  if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
1553      !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT))
1554    // Unhandled type. Halt "fast" selection and bail.
1555    return false;
1556
1557  MVT SrcVT = SrcEVT.getSimpleVT();
1558  MVT DstVT = DstEVT.getSimpleVT();
1559  unsigned Op0 = getRegForValue(I->getOperand(0));
1560  if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
1561    return false;
1562  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
1563
1564  // First, try to perform the bitcast by inserting a reg-reg copy.
1565  unsigned ResultReg = 0;
1566  if (SrcVT == DstVT) {
1567    const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT);
1568    const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
1569    // Don't attempt a cross-class copy. It will likely fail.
1570    if (SrcClass == DstClass) {
1571      ResultReg = createResultReg(DstClass);
1572      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1573              TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0);
1574    }
1575  }
1576
1577  // If the reg-reg copy failed, select a BITCAST opcode.
1578  if (!ResultReg)
1579    ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
1580
1581  if (!ResultReg)
1582    return false;
1583
1584  updateValueMap(I, ResultReg);
1585  return true;
1586}
1587
1588// Remove local value instructions starting from the instruction after
1589// SavedLastLocalValue to the current function insert point.
1590void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
1591{
1592  MachineInstr *CurLastLocalValue = getLastLocalValue();
1593  if (CurLastLocalValue != SavedLastLocalValue) {
1594    // Find the first local value instruction to be deleted.
1595    // This is the instruction after SavedLastLocalValue if it is non-NULL.
1596    // Otherwise it's the first instruction in the block.
1597    MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
1598    if (SavedLastLocalValue)
1599      ++FirstDeadInst;
1600    else
1601      FirstDeadInst = FuncInfo.MBB->getFirstNonPHI();
1602    setLastLocalValue(SavedLastLocalValue);
1603    removeDeadCode(FirstDeadInst, FuncInfo.InsertPt);
1604  }
1605}
1606
1607bool FastISel::selectInstruction(const Instruction *I) {
1608  MachineInstr *SavedLastLocalValue = getLastLocalValue();
1609  // Just before the terminator instruction, insert instructions to
1610  // feed PHI nodes in successor blocks.
1611  if (I->isTerminator()) {
1612    if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
1613      // PHI node handling may have generated local value instructions,
1614      // even though it failed to handle all PHI nodes.
1615      // We remove these instructions because SelectionDAGISel will generate
1616      // them again.
1617      removeDeadLocalValueCode(SavedLastLocalValue);
1618      return false;
1619    }
1620  }
1621
1622  // FastISel does not handle any operand bundles except OB_funclet.
1623  if (ImmutableCallSite CS = ImmutableCallSite(I))
1624    for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i)
1625      if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
1626        return false;
1627
1628  DbgLoc = I->getDebugLoc();
1629
1630  SavedInsertPt = FuncInfo.InsertPt;
1631
1632  if (const auto *Call = dyn_cast<CallInst>(I)) {
1633    const Function *F = Call->getCalledFunction();
1634    LibFunc Func;
1635
1636    // As a special case, don't handle calls to builtin library functions that
1637    // may be translated directly to target instructions.
1638    if (F && !F->hasLocalLinkage() && F->hasName() &&
1639        LibInfo->getLibFunc(F->getName(), Func) &&
1640        LibInfo->hasOptimizedCodeGen(Func))
1641      return false;
1642
1643    // Don't handle Intrinsic::trap if a trap function is specified.
1644    if (F && F->getIntrinsicID() == Intrinsic::trap &&
1645        Call->hasFnAttr("trap-func-name"))
1646      return false;
1647  }
1648
1649  // First, try doing target-independent selection.
1650  if (!SkipTargetIndependentISel) {
1651    if (selectOperator(I, I->getOpcode())) {
1652      ++NumFastIselSuccessIndependent;
1653      DbgLoc = DebugLoc();
1654      return true;
1655    }
1656    // Remove dead code.
1657    recomputeInsertPt();
1658    if (SavedInsertPt != FuncInfo.InsertPt)
1659      removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
1660    SavedInsertPt = FuncInfo.InsertPt;
1661  }
1662  // Next, try calling the target to attempt to handle the instruction.
1663  if (fastSelectInstruction(I)) {
1664    ++NumFastIselSuccessTarget;
1665    DbgLoc = DebugLoc();
1666    return true;
1667  }
1668  // Remove dead code.
1669  recomputeInsertPt();
1670  if (SavedInsertPt != FuncInfo.InsertPt)
1671    removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
1672
1673  DbgLoc = DebugLoc();
1674  // Undo phi node updates, because they will be added again by SelectionDAG.
1675  if (I->isTerminator()) {
1676    // PHI node handling may have generated local value instructions.
1677    // We remove them because SelectionDAGISel will generate them again.
1678    removeDeadLocalValueCode(SavedLastLocalValue);
1679    FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
1680  }
1681  return false;
1682}
1683
1684/// Emit an unconditional branch to the given block, unless it is the immediate
1685/// (fall-through) successor, and update the CFG.
1686void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
1687                              const DebugLoc &DbgLoc) {
1688  if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 &&
1689      FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
1690    // For more accurate line information if this is the only non-debug
1691    // instruction in the block then emit it, otherwise we have the
1692    // unconditional fall-through case, which needs no instructions.
1693  } else {
1694    // The unconditional branch case.
1695    TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr,
1696                     SmallVector<MachineOperand, 0>(), DbgLoc);
1697  }
1698  if (FuncInfo.BPI) {
1699    auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
1700        FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock());
1701    FuncInfo.MBB->addSuccessor(MSucc, BranchProbability);
1702  } else
1703    FuncInfo.MBB->addSuccessorWithoutProb(MSucc);
1704}
1705
1706void FastISel::finishCondBranch(const BasicBlock *BranchBB,
1707                                MachineBasicBlock *TrueMBB,
1708                                MachineBasicBlock *FalseMBB) {
1709  // Add TrueMBB as successor unless it is equal to the FalseMBB: This can
1710  // happen in degenerate IR and MachineIR forbids to have a block twice in the
1711  // successor/predecessor lists.
1712  if (TrueMBB != FalseMBB) {
1713    if (FuncInfo.BPI) {
1714      auto BranchProbability =
1715          FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock());
1716      FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability);
1717    } else
1718      FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB);
1719  }
1720
1721  fastEmitBranch(FalseMBB, DbgLoc);
1722}
1723
1724/// Emit an FNeg operation.
1725bool FastISel::selectFNeg(const User *I, const Value *In) {
1726  unsigned OpReg = getRegForValue(In);
1727  if (!OpReg)
1728    return false;
1729  bool OpRegIsKill = hasTrivialKill(In);
1730
1731  // If the target has ISD::FNEG, use it.
1732  EVT VT = TLI.getValueType(DL, I->getType());
1733  unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
1734                                  OpReg, OpRegIsKill);
1735  if (ResultReg) {
1736    updateValueMap(I, ResultReg);
1737    return true;
1738  }
1739
1740  // Bitcast the value to integer, twiddle the sign bit with xor,
1741  // and then bitcast it back to floating-point.
1742  if (VT.getSizeInBits() > 64)
1743    return false;
1744  EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
1745  if (!TLI.isTypeLegal(IntVT))
1746    return false;
1747
1748  unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
1749                               ISD::BITCAST, OpReg, OpRegIsKill);
1750  if (!IntReg)
1751    return false;
1752
1753  unsigned IntResultReg = fastEmit_ri_(
1754      IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true,
1755      UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
1756  if (!IntResultReg)
1757    return false;
1758
1759  ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST,
1760                         IntResultReg, /*IsKill=*/true);
1761  if (!ResultReg)
1762    return false;
1763
1764  updateValueMap(I, ResultReg);
1765  return true;
1766}
1767
1768bool FastISel::selectExtractValue(const User *U) {
1769  const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
1770  if (!EVI)
1771    return false;
1772
1773  // Make sure we only try to handle extracts with a legal result.  But also
1774  // allow i1 because it's easy.
1775  EVT RealVT = TLI.getValueType(DL, EVI->getType(), /*AllowUnknown=*/true);
1776  if (!RealVT.isSimple())
1777    return false;
1778  MVT VT = RealVT.getSimpleVT();
1779  if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
1780    return false;
1781
1782  const Value *Op0 = EVI->getOperand(0);
1783  Type *AggTy = Op0->getType();
1784
1785  // Get the base result register.
1786  unsigned ResultReg;
1787  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
1788  if (I != FuncInfo.ValueMap.end())
1789    ResultReg = I->second;
1790  else if (isa<Instruction>(Op0))
1791    ResultReg = FuncInfo.InitializeRegForValue(Op0);
1792  else
1793    return false; // fast-isel can't handle aggregate constants at the moment
1794
1795  // Get the actual result register, which is an offset from the base register.
1796  unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
1797
1798  SmallVector<EVT, 4> AggValueVTs;
1799  ComputeValueVTs(TLI, DL, AggTy, AggValueVTs);
1800
1801  for (unsigned i = 0; i < VTIndex; i++)
1802    ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
1803
1804  updateValueMap(EVI, ResultReg);
1805  return true;
1806}
1807
1808bool FastISel::selectOperator(const User *I, unsigned Opcode) {
1809  switch (Opcode) {
1810  case Instruction::Add:
1811    return selectBinaryOp(I, ISD::ADD);
1812  case Instruction::FAdd:
1813    return selectBinaryOp(I, ISD::FADD);
1814  case Instruction::Sub:
1815    return selectBinaryOp(I, ISD::SUB);
1816  case Instruction::FSub: {
1817    // FNeg is currently represented in LLVM IR as a special case of FSub.
1818    Value *X;
1819    if (match(I, m_FNeg(m_Value(X))))
1820       return selectFNeg(I, X);
1821    return selectBinaryOp(I, ISD::FSUB);
1822  }
1823  case Instruction::Mul:
1824    return selectBinaryOp(I, ISD::MUL);
1825  case Instruction::FMul:
1826    return selectBinaryOp(I, ISD::FMUL);
1827  case Instruction::SDiv:
1828    return selectBinaryOp(I, ISD::SDIV);
1829  case Instruction::UDiv:
1830    return selectBinaryOp(I, ISD::UDIV);
1831  case Instruction::FDiv:
1832    return selectBinaryOp(I, ISD::FDIV);
1833  case Instruction::SRem:
1834    return selectBinaryOp(I, ISD::SREM);
1835  case Instruction::URem:
1836    return selectBinaryOp(I, ISD::UREM);
1837  case Instruction::FRem:
1838    return selectBinaryOp(I, ISD::FREM);
1839  case Instruction::Shl:
1840    return selectBinaryOp(I, ISD::SHL);
1841  case Instruction::LShr:
1842    return selectBinaryOp(I, ISD::SRL);
1843  case Instruction::AShr:
1844    return selectBinaryOp(I, ISD::SRA);
1845  case Instruction::And:
1846    return selectBinaryOp(I, ISD::AND);
1847  case Instruction::Or:
1848    return selectBinaryOp(I, ISD::OR);
1849  case Instruction::Xor:
1850    return selectBinaryOp(I, ISD::XOR);
1851
1852  case Instruction::FNeg:
1853    return selectFNeg(I, I->getOperand(0));
1854
1855  case Instruction::GetElementPtr:
1856    return selectGetElementPtr(I);
1857
1858  case Instruction::Br: {
1859    const BranchInst *BI = cast<BranchInst>(I);
1860
1861    if (BI->isUnconditional()) {
1862      const BasicBlock *LLVMSucc = BI->getSuccessor(0);
1863      MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
1864      fastEmitBranch(MSucc, BI->getDebugLoc());
1865      return true;
1866    }
1867
1868    // Conditional branches are not handed yet.
1869    // Halt "fast" selection and bail.
1870    return false;
1871  }
1872
1873  case Instruction::Unreachable:
1874    if (TM.Options.TrapUnreachable)
1875      return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0;
1876    else
1877      return true;
1878
1879  case Instruction::Alloca:
1880    // FunctionLowering has the static-sized case covered.
1881    if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
1882      return true;
1883
1884    // Dynamic-sized alloca is not handled yet.
1885    return false;
1886
1887  case Instruction::Call:
1888    // On AIX, call lowering uses the DAG-ISEL path currently so that the
1889    // callee of the direct function call instruction will be mapped to the
1890    // symbol for the function's entry point, which is distinct from the
1891    // function descriptor symbol. The latter is the symbol whose XCOFF symbol
1892    // name is the C-linkage name of the source level function.
1893    if (TM.getTargetTriple().isOSAIX())
1894      return false;
1895    return selectCall(I);
1896
1897  case Instruction::BitCast:
1898    return selectBitCast(I);
1899
1900  case Instruction::FPToSI:
1901    return selectCast(I, ISD::FP_TO_SINT);
1902  case Instruction::ZExt:
1903    return selectCast(I, ISD::ZERO_EXTEND);
1904  case Instruction::SExt:
1905    return selectCast(I, ISD::SIGN_EXTEND);
1906  case Instruction::Trunc:
1907    return selectCast(I, ISD::TRUNCATE);
1908  case Instruction::SIToFP:
1909    return selectCast(I, ISD::SINT_TO_FP);
1910
1911  case Instruction::IntToPtr: // Deliberate fall-through.
1912  case Instruction::PtrToInt: {
1913    EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
1914    EVT DstVT = TLI.getValueType(DL, I->getType());
1915    if (DstVT.bitsGT(SrcVT))
1916      return selectCast(I, ISD::ZERO_EXTEND);
1917    if (DstVT.bitsLT(SrcVT))
1918      return selectCast(I, ISD::TRUNCATE);
1919    unsigned Reg = getRegForValue(I->getOperand(0));
1920    if (!Reg)
1921      return false;
1922    updateValueMap(I, Reg);
1923    return true;
1924  }
1925
1926  case Instruction::ExtractValue:
1927    return selectExtractValue(I);
1928
1929  case Instruction::PHI:
1930    llvm_unreachable("FastISel shouldn't visit PHI nodes!");
1931
1932  default:
1933    // Unhandled instruction. Halt "fast" selection and bail.
1934    return false;
1935  }
1936}
1937
1938FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
1939                   const TargetLibraryInfo *LibInfo,
1940                   bool SkipTargetIndependentISel)
1941    : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
1942      MFI(FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
1943      TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()),
1944      TII(*MF->getSubtarget().getInstrInfo()),
1945      TLI(*MF->getSubtarget().getTargetLowering()),
1946      TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
1947      SkipTargetIndependentISel(SkipTargetIndependentISel),
1948      LastLocalValue(nullptr), EmitStartPt(nullptr) {}
1949
1950FastISel::~FastISel() = default;
1951
1952bool FastISel::fastLowerArguments() { return false; }
1953
1954bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; }
1955
1956bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) {
1957  return false;
1958}
1959
1960unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; }
1961
1962unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/,
1963                              bool /*Op0IsKill*/) {
1964  return 0;
1965}
1966
1967unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/,
1968                               bool /*Op0IsKill*/, unsigned /*Op1*/,
1969                               bool /*Op1IsKill*/) {
1970  return 0;
1971}
1972
1973unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
1974  return 0;
1975}
1976
1977unsigned FastISel::fastEmit_f(MVT, MVT, unsigned,
1978                              const ConstantFP * /*FPImm*/) {
1979  return 0;
1980}
1981
1982unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
1983                               bool /*Op0IsKill*/, uint64_t /*Imm*/) {
1984  return 0;
1985}
1986
1987/// This method is a wrapper of fastEmit_ri. It first tries to emit an
1988/// instruction with an immediate operand using fastEmit_ri.
1989/// If that fails, it materializes the immediate into a register and try
1990/// fastEmit_rr instead.
1991unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
1992                                bool Op0IsKill, uint64_t Imm, MVT ImmType) {
1993  // If this is a multiply by a power of two, emit this as a shift left.
1994  if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
1995    Opcode = ISD::SHL;
1996    Imm = Log2_64(Imm);
1997  } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
1998    // div x, 8 -> srl x, 3
1999    Opcode = ISD::SRL;
2000    Imm = Log2_64(Imm);
2001  }
2002
2003  // Horrible hack (to be removed), check to make sure shift amounts are
2004  // in-range.
2005  if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
2006      Imm >= VT.getSizeInBits())
2007    return 0;
2008
2009  // First check if immediate type is legal. If not, we can't use the ri form.
2010  unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
2011  if (ResultReg)
2012    return ResultReg;
2013  unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
2014  bool IsImmKill = true;
2015  if (!MaterialReg) {
2016    // This is a bit ugly/slow, but failing here means falling out of
2017    // fast-isel, which would be very slow.
2018    IntegerType *ITy =
2019        IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits());
2020    MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
2021    if (!MaterialReg)
2022      return 0;
2023    // FIXME: If the materialized register here has no uses yet then this
2024    // will be the first use and we should be able to mark it as killed.
2025    // However, the local value area for materialising constant expressions
2026    // grows down, not up, which means that any constant expressions we generate
2027    // later which also use 'Imm' could be after this instruction and therefore
2028    // after this kill.
2029    IsImmKill = false;
2030  }
2031  return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill);
2032}
2033
2034unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
2035  return MRI.createVirtualRegister(RC);
2036}
2037
2038unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
2039                                            unsigned OpNum) {
2040  if (Register::isVirtualRegister(Op)) {
2041    const TargetRegisterClass *RegClass =
2042        TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
2043    if (!MRI.constrainRegClass(Op, RegClass)) {
2044      // If it's not legal to COPY between the register classes, something
2045      // has gone very wrong before we got here.
2046      unsigned NewOp = createResultReg(RegClass);
2047      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2048              TII.get(TargetOpcode::COPY), NewOp).addReg(Op);
2049      return NewOp;
2050    }
2051  }
2052  return Op;
2053}
2054
2055unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode,
2056                                 const TargetRegisterClass *RC) {
2057  unsigned ResultReg = createResultReg(RC);
2058  const MCInstrDesc &II = TII.get(MachineInstOpcode);
2059
2060  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
2061  return ResultReg;
2062}
2063
2064unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2065                                  const TargetRegisterClass *RC, unsigned Op0,
2066                                  bool Op0IsKill) {
2067  const MCInstrDesc &II = TII.get(MachineInstOpcode);
2068
2069  unsigned ResultReg = createResultReg(RC);
2070  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
2071
2072  if (II.getNumDefs() >= 1)
2073    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
2074        .addReg(Op0, getKillRegState(Op0IsKill));
2075  else {
2076    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2077        .addReg(Op0, getKillRegState(Op0IsKill));
2078    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2079            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
2080  }
2081
2082  return ResultReg;
2083}
2084
2085unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2086                                   const TargetRegisterClass *RC, unsigned Op0,
2087                                   bool Op0IsKill, unsigned Op1,
2088                                   bool Op1IsKill) {
2089  const MCInstrDesc &II = TII.get(MachineInstOpcode);
2090
2091  unsigned ResultReg = createResultReg(RC);
2092  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
2093  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
2094
2095  if (II.getNumDefs() >= 1)
2096    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
2097        .addReg(Op0, getKillRegState(Op0IsKill))
2098        .addReg(Op1, getKillRegState(Op1IsKill));
2099  else {
2100    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2101        .addReg(Op0, getKillRegState(Op0IsKill))
2102        .addReg(Op1, getKillRegState(Op1IsKill));
2103    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2104            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
2105  }
2106  return ResultReg;
2107}
2108
2109unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
2110                                    const TargetRegisterClass *RC, unsigned Op0,
2111                                    bool Op0IsKill, unsigned Op1,
2112                                    bool Op1IsKill, unsigned Op2,
2113                                    bool Op2IsKill) {
2114  const MCInstrDesc &II = TII.get(MachineInstOpcode);
2115
2116  unsigned ResultReg = createResultReg(RC);
2117  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
2118  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
2119  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
2120
2121  if (II.getNumDefs() >= 1)
2122    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
2123        .addReg(Op0, getKillRegState(Op0IsKill))
2124        .addReg(Op1, getKillRegState(Op1IsKill))
2125        .addReg(Op2, getKillRegState(Op2IsKill));
2126  else {
2127    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2128        .addReg(Op0, getKillRegState(Op0IsKill))
2129        .addReg(Op1, getKillRegState(Op1IsKill))
2130        .addReg(Op2, getKillRegState(Op2IsKill));
2131    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2132            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
2133  }
2134  return ResultReg;
2135}
2136
2137unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2138                                   const TargetRegisterClass *RC, unsigned Op0,
2139                                   bool Op0IsKill, uint64_t Imm) {
2140  const MCInstrDesc &II = TII.get(MachineInstOpcode);
2141
2142  unsigned ResultReg = createResultReg(RC);
2143  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
2144
2145  if (II.getNumDefs() >= 1)
2146    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
2147        .addReg(Op0, getKillRegState(Op0IsKill))
2148        .addImm(Imm);
2149  else {
2150    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2151        .addReg(Op0, getKillRegState(Op0IsKill))
2152        .addImm(Imm);
2153    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2154            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
2155  }
2156  return ResultReg;
2157}
2158
2159unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
2160                                    const TargetRegisterClass *RC, unsigned Op0,
2161                                    bool Op0IsKill, uint64_t Imm1,
2162                                    uint64_t Imm2) {
2163  const MCInstrDesc &II = TII.get(MachineInstOpcode);
2164
2165  unsigned ResultReg = createResultReg(RC);
2166  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
2167
2168  if (II.getNumDefs() >= 1)
2169    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
2170        .addReg(Op0, getKillRegState(Op0IsKill))
2171        .addImm(Imm1)
2172        .addImm(Imm2);
2173  else {
2174    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2175        .addReg(Op0, getKillRegState(Op0IsKill))
2176        .addImm(Imm1)
2177        .addImm(Imm2);
2178    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2179            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
2180  }
2181  return ResultReg;
2182}
2183
2184unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
2185                                  const TargetRegisterClass *RC,
2186                                  const ConstantFP *FPImm) {
2187  const MCInstrDesc &II = TII.get(MachineInstOpcode);
2188
2189  unsigned ResultReg = createResultReg(RC);
2190
2191  if (II.getNumDefs() >= 1)
2192    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
2193        .addFPImm(FPImm);
2194  else {
2195    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2196        .addFPImm(FPImm);
2197    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2198            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
2199  }
2200  return ResultReg;
2201}
2202
2203unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
2204                                    const TargetRegisterClass *RC, unsigned Op0,
2205                                    bool Op0IsKill, unsigned Op1,
2206                                    bool Op1IsKill, uint64_t Imm) {
2207  const MCInstrDesc &II = TII.get(MachineInstOpcode);
2208
2209  unsigned ResultReg = createResultReg(RC);
2210  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
2211  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
2212
2213  if (II.getNumDefs() >= 1)
2214    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
2215        .addReg(Op0, getKillRegState(Op0IsKill))
2216        .addReg(Op1, getKillRegState(Op1IsKill))
2217        .addImm(Imm);
2218  else {
2219    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2220        .addReg(Op0, getKillRegState(Op0IsKill))
2221        .addReg(Op1, getKillRegState(Op1IsKill))
2222        .addImm(Imm);
2223    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2224            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
2225  }
2226  return ResultReg;
2227}
2228
2229unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
2230                                  const TargetRegisterClass *RC, uint64_t Imm) {
2231  unsigned ResultReg = createResultReg(RC);
2232  const MCInstrDesc &II = TII.get(MachineInstOpcode);
2233
2234  if (II.getNumDefs() >= 1)
2235    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
2236        .addImm(Imm);
2237  else {
2238    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm);
2239    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2240            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
2241  }
2242  return ResultReg;
2243}
2244
2245unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
2246                                              bool Op0IsKill, uint32_t Idx) {
2247  unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
2248  assert(Register::isVirtualRegister(Op0) &&
2249         "Cannot yet extract from physregs");
2250  const TargetRegisterClass *RC = MRI.getRegClass(Op0);
2251  MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
2252  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
2253          ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx);
2254  return ResultReg;
2255}
2256
2257/// Emit MachineInstrs to compute the value of Op with all but the least
2258/// significant bit set to zero.
2259unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
2260  return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
2261}
2262
2263/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
2264/// Emit code to ensure constants are copied into registers when needed.
2265/// Remember the virtual registers that need to be added to the Machine PHI
2266/// nodes as input.  We cannot just directly add them, because expansion
2267/// might result in multiple MBB's for one BB.  As such, the start of the
2268/// BB might correspond to a different MBB than the end.
2269bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
2270  const Instruction *TI = LLVMBB->getTerminator();
2271
2272  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
2273  FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
2274
2275  // Check successor nodes' PHI nodes that expect a constant to be available
2276  // from this block.
2277  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
2278    const BasicBlock *SuccBB = TI->getSuccessor(succ);
2279    if (!isa<PHINode>(SuccBB->begin()))
2280      continue;
2281    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
2282
2283    // If this terminator has multiple identical successors (common for
2284    // switches), only handle each succ once.
2285    if (!SuccsHandled.insert(SuccMBB).second)
2286      continue;
2287
2288    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
2289
2290    // At this point we know that there is a 1-1 correspondence between LLVM PHI
2291    // nodes and Machine PHI nodes, but the incoming operands have not been
2292    // emitted yet.
2293    for (const PHINode &PN : SuccBB->phis()) {
2294      // Ignore dead phi's.
2295      if (PN.use_empty())
2296        continue;
2297
2298      // Only handle legal types. Two interesting things to note here. First,
2299      // by bailing out early, we may leave behind some dead instructions,
2300      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
2301      // own moves. Second, this check is necessary because FastISel doesn't
2302      // use CreateRegs to create registers, so it always creates
2303      // exactly one register for each non-void instruction.
2304      EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true);
2305      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
2306        // Handle integer promotions, though, because they're common and easy.
2307        if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
2308          FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
2309          return false;
2310        }
2311      }
2312
2313      const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
2314
2315      // Set the DebugLoc for the copy. Prefer the location of the operand
2316      // if there is one; use the location of the PHI otherwise.
2317      DbgLoc = PN.getDebugLoc();
2318      if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
2319        DbgLoc = Inst->getDebugLoc();
2320
2321      unsigned Reg = getRegForValue(PHIOp);
2322      if (!Reg) {
2323        FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
2324        return false;
2325      }
2326      FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg));
2327      DbgLoc = DebugLoc();
2328    }
2329  }
2330
2331  return true;
2332}
2333
2334bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
2335  assert(LI->hasOneUse() &&
2336         "tryToFoldLoad expected a LoadInst with a single use");
2337  // We know that the load has a single use, but don't know what it is.  If it
2338  // isn't one of the folded instructions, then we can't succeed here.  Handle
2339  // this by scanning the single-use users of the load until we get to FoldInst.
2340  unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
2341
2342  const Instruction *TheUser = LI->user_back();
2343  while (TheUser != FoldInst && // Scan up until we find FoldInst.
2344         // Stay in the right block.
2345         TheUser->getParent() == FoldInst->getParent() &&
2346         --MaxUsers) { // Don't scan too far.
2347    // If there are multiple or no uses of this instruction, then bail out.
2348    if (!TheUser->hasOneUse())
2349      return false;
2350
2351    TheUser = TheUser->user_back();
2352  }
2353
2354  // If we didn't find the fold instruction, then we failed to collapse the
2355  // sequence.
2356  if (TheUser != FoldInst)
2357    return false;
2358
2359  // Don't try to fold volatile loads.  Target has to deal with alignment
2360  // constraints.
2361  if (LI->isVolatile())
2362    return false;
2363
2364  // Figure out which vreg this is going into.  If there is no assigned vreg yet
2365  // then there actually was no reference to it.  Perhaps the load is referenced
2366  // by a dead instruction.
2367  unsigned LoadReg = getRegForValue(LI);
2368  if (!LoadReg)
2369    return false;
2370
2371  // We can't fold if this vreg has no uses or more than one use.  Multiple uses
2372  // may mean that the instruction got lowered to multiple MIs, or the use of
2373  // the loaded value ended up being multiple operands of the result.
2374  if (!MRI.hasOneUse(LoadReg))
2375    return false;
2376
2377  MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
2378  MachineInstr *User = RI->getParent();
2379
2380  // Set the insertion point properly.  Folding the load can cause generation of
2381  // other random instructions (like sign extends) for addressing modes; make
2382  // sure they get inserted in a logical place before the new instruction.
2383  FuncInfo.InsertPt = User;
2384  FuncInfo.MBB = User->getParent();
2385
2386  // Ask the target to try folding the load.
2387  return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
2388}
2389
2390bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
2391  // Must be an add.
2392  if (!isa<AddOperator>(Add))
2393    return false;
2394  // Type size needs to match.
2395  if (DL.getTypeSizeInBits(GEP->getType()) !=
2396      DL.getTypeSizeInBits(Add->getType()))
2397    return false;
2398  // Must be in the same basic block.
2399  if (isa<Instruction>(Add) &&
2400      FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB)
2401    return false;
2402  // Must have a constant operand.
2403  return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
2404}
2405
2406MachineMemOperand *
2407FastISel::createMachineMemOperandFor(const Instruction *I) const {
2408  const Value *Ptr;
2409  Type *ValTy;
2410  unsigned Alignment;
2411  MachineMemOperand::Flags Flags;
2412  bool IsVolatile;
2413
2414  if (const auto *LI = dyn_cast<LoadInst>(I)) {
2415    Alignment = LI->getAlignment();
2416    IsVolatile = LI->isVolatile();
2417    Flags = MachineMemOperand::MOLoad;
2418    Ptr = LI->getPointerOperand();
2419    ValTy = LI->getType();
2420  } else if (const auto *SI = dyn_cast<StoreInst>(I)) {
2421    Alignment = SI->getAlignment();
2422    IsVolatile = SI->isVolatile();
2423    Flags = MachineMemOperand::MOStore;
2424    Ptr = SI->getPointerOperand();
2425    ValTy = SI->getValueOperand()->getType();
2426  } else
2427    return nullptr;
2428
2429  bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal);
2430  bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load);
2431  bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);
2432  const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
2433
2434  AAMDNodes AAInfo;
2435  I->getAAMetadata(AAInfo);
2436
2437  if (Alignment == 0) // Ensure that codegen never sees alignment 0.
2438    Alignment = DL.getABITypeAlignment(ValTy);
2439
2440  unsigned Size = DL.getTypeStoreSize(ValTy);
2441
2442  if (IsVolatile)
2443    Flags |= MachineMemOperand::MOVolatile;
2444  if (IsNonTemporal)
2445    Flags |= MachineMemOperand::MONonTemporal;
2446  if (IsDereferenceable)
2447    Flags |= MachineMemOperand::MODereferenceable;
2448  if (IsInvariant)
2449    Flags |= MachineMemOperand::MOInvariant;
2450
2451  return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
2452                                           Alignment, AAInfo, Ranges);
2453}
2454
2455CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const {
2456  // If both operands are the same, then try to optimize or fold the cmp.
2457  CmpInst::Predicate Predicate = CI->getPredicate();
2458  if (CI->getOperand(0) != CI->getOperand(1))
2459    return Predicate;
2460
2461  switch (Predicate) {
2462  default: llvm_unreachable("Invalid predicate!");
2463  case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
2464  case CmpInst::FCMP_OEQ:   Predicate = CmpInst::FCMP_ORD;   break;
2465  case CmpInst::FCMP_OGT:   Predicate = CmpInst::FCMP_FALSE; break;
2466  case CmpInst::FCMP_OGE:   Predicate = CmpInst::FCMP_ORD;   break;
2467  case CmpInst::FCMP_OLT:   Predicate = CmpInst::FCMP_FALSE; break;
2468  case CmpInst::FCMP_OLE:   Predicate = CmpInst::FCMP_ORD;   break;
2469  case CmpInst::FCMP_ONE:   Predicate = CmpInst::FCMP_FALSE; break;
2470  case CmpInst::FCMP_ORD:   Predicate = CmpInst::FCMP_ORD;   break;
2471  case CmpInst::FCMP_UNO:   Predicate = CmpInst::FCMP_UNO;   break;
2472  case CmpInst::FCMP_UEQ:   Predicate = CmpInst::FCMP_TRUE;  break;
2473  case CmpInst::FCMP_UGT:   Predicate = CmpInst::FCMP_UNO;   break;
2474  case CmpInst::FCMP_UGE:   Predicate = CmpInst::FCMP_TRUE;  break;
2475  case CmpInst::FCMP_ULT:   Predicate = CmpInst::FCMP_UNO;   break;
2476  case CmpInst::FCMP_ULE:   Predicate = CmpInst::FCMP_TRUE;  break;
2477  case CmpInst::FCMP_UNE:   Predicate = CmpInst::FCMP_UNO;   break;
2478  case CmpInst::FCMP_TRUE:  Predicate = CmpInst::FCMP_TRUE;  break;
2479
2480  case CmpInst::ICMP_EQ:    Predicate = CmpInst::FCMP_TRUE;  break;
2481  case CmpInst::ICMP_NE:    Predicate = CmpInst::FCMP_FALSE; break;
2482  case CmpInst::ICMP_UGT:   Predicate = CmpInst::FCMP_FALSE; break;
2483  case CmpInst::ICMP_UGE:   Predicate = CmpInst::FCMP_TRUE;  break;
2484  case CmpInst::ICMP_ULT:   Predicate = CmpInst::FCMP_FALSE; break;
2485  case CmpInst::ICMP_ULE:   Predicate = CmpInst::FCMP_TRUE;  break;
2486  case CmpInst::ICMP_SGT:   Predicate = CmpInst::FCMP_FALSE; break;
2487  case CmpInst::ICMP_SGE:   Predicate = CmpInst::FCMP_TRUE;  break;
2488  case CmpInst::ICMP_SLT:   Predicate = CmpInst::FCMP_FALSE; break;
2489  case CmpInst::ICMP_SLE:   Predicate = CmpInst::FCMP_TRUE;  break;
2490  }
2491
2492  return Predicate;
2493}
2494