1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16#include "llvm/CodeGen/GlobalISel/CallLowering.h"
17#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/TargetFrameLowering.h"
21#include "llvm/CodeGen/TargetInstrInfo.h"
22#include "llvm/CodeGen/TargetLowering.h"
23#include "llvm/CodeGen/TargetSubtargetInfo.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/raw_ostream.h"
27
28#define DEBUG_TYPE "legalizer"
29
30using namespace llvm;
31using namespace LegalizeActions;
32
33/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34///
35/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36/// with any leftover piece as type \p LeftoverTy
37///
38/// Returns -1 in the first element of the pair if the breakdown is not
39/// satisfiable.
40static std::pair<int, int>
41getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
42  assert(!LeftoverTy.isValid() && "this is an out argument");
43
44  unsigned Size = OrigTy.getSizeInBits();
45  unsigned NarrowSize = NarrowTy.getSizeInBits();
46  unsigned NumParts = Size / NarrowSize;
47  unsigned LeftoverSize = Size - NumParts * NarrowSize;
48  assert(Size > NarrowSize);
49
50  if (LeftoverSize == 0)
51    return {NumParts, 0};
52
53  if (NarrowTy.isVector()) {
54    unsigned EltSize = OrigTy.getScalarSizeInBits();
55    if (LeftoverSize % EltSize != 0)
56      return {-1, -1};
57    LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
58  } else {
59    LeftoverTy = LLT::scalar(LeftoverSize);
60  }
61
62  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
63  return std::make_pair(NumParts, NumLeftover);
64}
65
66LegalizerHelper::LegalizerHelper(MachineFunction &MF,
67                                 GISelChangeObserver &Observer,
68                                 MachineIRBuilder &Builder)
69    : MIRBuilder(Builder), MRI(MF.getRegInfo()),
70      LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
71  MIRBuilder.setMF(MF);
72  MIRBuilder.setChangeObserver(Observer);
73}
74
75LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
76                                 GISelChangeObserver &Observer,
77                                 MachineIRBuilder &B)
78    : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
79  MIRBuilder.setMF(MF);
80  MIRBuilder.setChangeObserver(Observer);
81}
82LegalizerHelper::LegalizeResult
83LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
84  LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
85
86  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
87      MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
88    return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
89                                                     : UnableToLegalize;
90  auto Step = LI.getAction(MI, MRI);
91  switch (Step.Action) {
92  case Legal:
93    LLVM_DEBUG(dbgs() << ".. Already legal\n");
94    return AlreadyLegal;
95  case Libcall:
96    LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
97    return libcall(MI);
98  case NarrowScalar:
99    LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
100    return narrowScalar(MI, Step.TypeIdx, Step.NewType);
101  case WidenScalar:
102    LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
103    return widenScalar(MI, Step.TypeIdx, Step.NewType);
104  case Lower:
105    LLVM_DEBUG(dbgs() << ".. Lower\n");
106    return lower(MI, Step.TypeIdx, Step.NewType);
107  case FewerElements:
108    LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
109    return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
110  case MoreElements:
111    LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
112    return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
113  case Custom:
114    LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
115    return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
116                                                            : UnableToLegalize;
117  default:
118    LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
119    return UnableToLegalize;
120  }
121}
122
123void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
124                                   SmallVectorImpl<Register> &VRegs) {
125  for (int i = 0; i < NumParts; ++i)
126    VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
127  MIRBuilder.buildUnmerge(VRegs, Reg);
128}
129
130bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
131                                   LLT MainTy, LLT &LeftoverTy,
132                                   SmallVectorImpl<Register> &VRegs,
133                                   SmallVectorImpl<Register> &LeftoverRegs) {
134  assert(!LeftoverTy.isValid() && "this is an out argument");
135
136  unsigned RegSize = RegTy.getSizeInBits();
137  unsigned MainSize = MainTy.getSizeInBits();
138  unsigned NumParts = RegSize / MainSize;
139  unsigned LeftoverSize = RegSize - NumParts * MainSize;
140
141  // Use an unmerge when possible.
142  if (LeftoverSize == 0) {
143    for (unsigned I = 0; I < NumParts; ++I)
144      VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
145    MIRBuilder.buildUnmerge(VRegs, Reg);
146    return true;
147  }
148
149  if (MainTy.isVector()) {
150    unsigned EltSize = MainTy.getScalarSizeInBits();
151    if (LeftoverSize % EltSize != 0)
152      return false;
153    LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
154  } else {
155    LeftoverTy = LLT::scalar(LeftoverSize);
156  }
157
158  // For irregular sizes, extract the individual parts.
159  for (unsigned I = 0; I != NumParts; ++I) {
160    Register NewReg = MRI.createGenericVirtualRegister(MainTy);
161    VRegs.push_back(NewReg);
162    MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
163  }
164
165  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
166       Offset += LeftoverSize) {
167    Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
168    LeftoverRegs.push_back(NewReg);
169    MIRBuilder.buildExtract(NewReg, Reg, Offset);
170  }
171
172  return true;
173}
174
175static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
176  if (OrigTy.isVector() && TargetTy.isVector()) {
177    assert(OrigTy.getElementType() == TargetTy.getElementType());
178    int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
179                                    TargetTy.getNumElements());
180    return LLT::scalarOrVector(GCD, OrigTy.getElementType());
181  }
182
183  if (OrigTy.isVector() && !TargetTy.isVector()) {
184    assert(OrigTy.getElementType() == TargetTy);
185    return TargetTy;
186  }
187
188  assert(!OrigTy.isVector() && !TargetTy.isVector());
189
190  int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
191                                  TargetTy.getSizeInBits());
192  return LLT::scalar(GCD);
193}
194
195void LegalizerHelper::insertParts(Register DstReg,
196                                  LLT ResultTy, LLT PartTy,
197                                  ArrayRef<Register> PartRegs,
198                                  LLT LeftoverTy,
199                                  ArrayRef<Register> LeftoverRegs) {
200  if (!LeftoverTy.isValid()) {
201    assert(LeftoverRegs.empty());
202
203    if (!ResultTy.isVector()) {
204      MIRBuilder.buildMerge(DstReg, PartRegs);
205      return;
206    }
207
208    if (PartTy.isVector())
209      MIRBuilder.buildConcatVectors(DstReg, PartRegs);
210    else
211      MIRBuilder.buildBuildVector(DstReg, PartRegs);
212    return;
213  }
214
215  unsigned PartSize = PartTy.getSizeInBits();
216  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
217
218  Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
219  MIRBuilder.buildUndef(CurResultReg);
220
221  unsigned Offset = 0;
222  for (Register PartReg : PartRegs) {
223    Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
224    MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
225    CurResultReg = NewResultReg;
226    Offset += PartSize;
227  }
228
229  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
230    // Use the original output register for the final insert to avoid a copy.
231    Register NewResultReg = (I + 1 == E) ?
232      DstReg : MRI.createGenericVirtualRegister(ResultTy);
233
234    MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
235    CurResultReg = NewResultReg;
236    Offset += LeftoverPartSize;
237  }
238}
239
240static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
241  switch (Opcode) {
242  case TargetOpcode::G_SDIV:
243    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
244    switch (Size) {
245    case 32:
246      return RTLIB::SDIV_I32;
247    case 64:
248      return RTLIB::SDIV_I64;
249    case 128:
250      return RTLIB::SDIV_I128;
251    default:
252      llvm_unreachable("unexpected size");
253    }
254  case TargetOpcode::G_UDIV:
255    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
256    switch (Size) {
257    case 32:
258      return RTLIB::UDIV_I32;
259    case 64:
260      return RTLIB::UDIV_I64;
261    case 128:
262      return RTLIB::UDIV_I128;
263    default:
264      llvm_unreachable("unexpected size");
265    }
266  case TargetOpcode::G_SREM:
267    assert((Size == 32 || Size == 64) && "Unsupported size");
268    return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
269  case TargetOpcode::G_UREM:
270    assert((Size == 32 || Size == 64) && "Unsupported size");
271    return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
272  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
273    assert(Size == 32 && "Unsupported size");
274    return RTLIB::CTLZ_I32;
275  case TargetOpcode::G_FADD:
276    assert((Size == 32 || Size == 64) && "Unsupported size");
277    return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
278  case TargetOpcode::G_FSUB:
279    assert((Size == 32 || Size == 64) && "Unsupported size");
280    return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
281  case TargetOpcode::G_FMUL:
282    assert((Size == 32 || Size == 64) && "Unsupported size");
283    return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
284  case TargetOpcode::G_FDIV:
285    assert((Size == 32 || Size == 64) && "Unsupported size");
286    return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
287  case TargetOpcode::G_FEXP:
288    assert((Size == 32 || Size == 64) && "Unsupported size");
289    return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
290  case TargetOpcode::G_FEXP2:
291    assert((Size == 32 || Size == 64) && "Unsupported size");
292    return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
293  case TargetOpcode::G_FREM:
294    return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
295  case TargetOpcode::G_FPOW:
296    return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
297  case TargetOpcode::G_FMA:
298    assert((Size == 32 || Size == 64) && "Unsupported size");
299    return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
300  case TargetOpcode::G_FSIN:
301    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
302    return Size == 128 ? RTLIB::SIN_F128
303                       : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
304  case TargetOpcode::G_FCOS:
305    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
306    return Size == 128 ? RTLIB::COS_F128
307                       : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
308  case TargetOpcode::G_FLOG10:
309    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
310    return Size == 128 ? RTLIB::LOG10_F128
311                       : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
312  case TargetOpcode::G_FLOG:
313    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
314    return Size == 128 ? RTLIB::LOG_F128
315                       : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
316  case TargetOpcode::G_FLOG2:
317    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
318    return Size == 128 ? RTLIB::LOG2_F128
319                       : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
320  case TargetOpcode::G_FCEIL:
321    assert((Size == 32 || Size == 64) && "Unsupported size");
322    return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
323  case TargetOpcode::G_FFLOOR:
324    assert((Size == 32 || Size == 64) && "Unsupported size");
325    return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
326  }
327  llvm_unreachable("Unknown libcall function");
328}
329
330/// True if an instruction is in tail position in its caller. Intended for
331/// legalizing libcalls as tail calls when possible.
332static bool isLibCallInTailPosition(MachineInstr &MI) {
333  const Function &F = MI.getParent()->getParent()->getFunction();
334
335  // Conservatively require the attributes of the call to match those of
336  // the return. Ignore NoAlias and NonNull because they don't affect the
337  // call sequence.
338  AttributeList CallerAttrs = F.getAttributes();
339  if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
340          .removeAttribute(Attribute::NoAlias)
341          .removeAttribute(Attribute::NonNull)
342          .hasAttributes())
343    return false;
344
345  // It's not safe to eliminate the sign / zero extension of the return value.
346  if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
347      CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
348    return false;
349
350  // Only tail call if the following instruction is a standard return.
351  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
352  MachineInstr *Next = MI.getNextNode();
353  if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
354    return false;
355
356  return true;
357}
358
359LegalizerHelper::LegalizeResult
360llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
361                    const CallLowering::ArgInfo &Result,
362                    ArrayRef<CallLowering::ArgInfo> Args) {
363  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
364  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
365  const char *Name = TLI.getLibcallName(Libcall);
366
367  CallLowering::CallLoweringInfo Info;
368  Info.CallConv = TLI.getLibcallCallingConv(Libcall);
369  Info.Callee = MachineOperand::CreateES(Name);
370  Info.OrigRet = Result;
371  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
372  if (!CLI.lowerCall(MIRBuilder, Info))
373    return LegalizerHelper::UnableToLegalize;
374
375  return LegalizerHelper::Legalized;
376}
377
378// Useful for libcalls where all operands have the same type.
379static LegalizerHelper::LegalizeResult
380simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
381              Type *OpType) {
382  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
383
384  SmallVector<CallLowering::ArgInfo, 3> Args;
385  for (unsigned i = 1; i < MI.getNumOperands(); i++)
386    Args.push_back({MI.getOperand(i).getReg(), OpType});
387  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
388                       Args);
389}
390
391LegalizerHelper::LegalizeResult
392llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
393                       MachineInstr &MI) {
394  assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
395  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
396
397  SmallVector<CallLowering::ArgInfo, 3> Args;
398  // Add all the args, except for the last which is an imm denoting 'tail'.
399  for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
400    Register Reg = MI.getOperand(i).getReg();
401
402    // Need derive an IR type for call lowering.
403    LLT OpLLT = MRI.getType(Reg);
404    Type *OpTy = nullptr;
405    if (OpLLT.isPointer())
406      OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
407    else
408      OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
409    Args.push_back({Reg, OpTy});
410  }
411
412  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
413  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
414  Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
415  RTLIB::Libcall RTLibcall;
416  switch (ID) {
417  case Intrinsic::memcpy:
418    RTLibcall = RTLIB::MEMCPY;
419    break;
420  case Intrinsic::memset:
421    RTLibcall = RTLIB::MEMSET;
422    break;
423  case Intrinsic::memmove:
424    RTLibcall = RTLIB::MEMMOVE;
425    break;
426  default:
427    return LegalizerHelper::UnableToLegalize;
428  }
429  const char *Name = TLI.getLibcallName(RTLibcall);
430
431  MIRBuilder.setInstr(MI);
432
433  CallLowering::CallLoweringInfo Info;
434  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
435  Info.Callee = MachineOperand::CreateES(Name);
436  Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
437  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
438                    isLibCallInTailPosition(MI);
439
440  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
441  if (!CLI.lowerCall(MIRBuilder, Info))
442    return LegalizerHelper::UnableToLegalize;
443
444  if (Info.LoweredTailCall) {
445    assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
446    // We must have a return following the call to get past
447    // isLibCallInTailPosition.
448    assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
449           "Expected instr following MI to be a return?");
450
451    // We lowered a tail call, so the call is now the return from the block.
452    // Delete the old return.
453    MI.getNextNode()->eraseFromParent();
454  }
455
456  return LegalizerHelper::Legalized;
457}
458
459static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
460                                       Type *FromType) {
461  auto ToMVT = MVT::getVT(ToType);
462  auto FromMVT = MVT::getVT(FromType);
463
464  switch (Opcode) {
465  case TargetOpcode::G_FPEXT:
466    return RTLIB::getFPEXT(FromMVT, ToMVT);
467  case TargetOpcode::G_FPTRUNC:
468    return RTLIB::getFPROUND(FromMVT, ToMVT);
469  case TargetOpcode::G_FPTOSI:
470    return RTLIB::getFPTOSINT(FromMVT, ToMVT);
471  case TargetOpcode::G_FPTOUI:
472    return RTLIB::getFPTOUINT(FromMVT, ToMVT);
473  case TargetOpcode::G_SITOFP:
474    return RTLIB::getSINTTOFP(FromMVT, ToMVT);
475  case TargetOpcode::G_UITOFP:
476    return RTLIB::getUINTTOFP(FromMVT, ToMVT);
477  }
478  llvm_unreachable("Unsupported libcall function");
479}
480
481static LegalizerHelper::LegalizeResult
482conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
483                  Type *FromType) {
484  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
485  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
486                       {{MI.getOperand(1).getReg(), FromType}});
487}
488
489LegalizerHelper::LegalizeResult
490LegalizerHelper::libcall(MachineInstr &MI) {
491  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
492  unsigned Size = LLTy.getSizeInBits();
493  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
494
495  MIRBuilder.setInstr(MI);
496
497  switch (MI.getOpcode()) {
498  default:
499    return UnableToLegalize;
500  case TargetOpcode::G_SDIV:
501  case TargetOpcode::G_UDIV:
502  case TargetOpcode::G_SREM:
503  case TargetOpcode::G_UREM:
504  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
505    Type *HLTy = IntegerType::get(Ctx, Size);
506    auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
507    if (Status != Legalized)
508      return Status;
509    break;
510  }
511  case TargetOpcode::G_FADD:
512  case TargetOpcode::G_FSUB:
513  case TargetOpcode::G_FMUL:
514  case TargetOpcode::G_FDIV:
515  case TargetOpcode::G_FMA:
516  case TargetOpcode::G_FPOW:
517  case TargetOpcode::G_FREM:
518  case TargetOpcode::G_FCOS:
519  case TargetOpcode::G_FSIN:
520  case TargetOpcode::G_FLOG10:
521  case TargetOpcode::G_FLOG:
522  case TargetOpcode::G_FLOG2:
523  case TargetOpcode::G_FEXP:
524  case TargetOpcode::G_FEXP2:
525  case TargetOpcode::G_FCEIL:
526  case TargetOpcode::G_FFLOOR: {
527    if (Size > 64) {
528      LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
529      return UnableToLegalize;
530    }
531    Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
532    auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
533    if (Status != Legalized)
534      return Status;
535    break;
536  }
537  case TargetOpcode::G_FPEXT: {
538    // FIXME: Support other floating point types (half, fp128 etc)
539    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
540    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
541    if (ToSize != 64 || FromSize != 32)
542      return UnableToLegalize;
543    LegalizeResult Status = conversionLibcall(
544        MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
545    if (Status != Legalized)
546      return Status;
547    break;
548  }
549  case TargetOpcode::G_FPTRUNC: {
550    // FIXME: Support other floating point types (half, fp128 etc)
551    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
552    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
553    if (ToSize != 32 || FromSize != 64)
554      return UnableToLegalize;
555    LegalizeResult Status = conversionLibcall(
556        MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
557    if (Status != Legalized)
558      return Status;
559    break;
560  }
561  case TargetOpcode::G_FPTOSI:
562  case TargetOpcode::G_FPTOUI: {
563    // FIXME: Support other types
564    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
565    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
566    if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
567      return UnableToLegalize;
568    LegalizeResult Status = conversionLibcall(
569        MI, MIRBuilder,
570        ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
571        FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
572    if (Status != Legalized)
573      return Status;
574    break;
575  }
576  case TargetOpcode::G_SITOFP:
577  case TargetOpcode::G_UITOFP: {
578    // FIXME: Support other types
579    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
580    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
581    if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
582      return UnableToLegalize;
583    LegalizeResult Status = conversionLibcall(
584        MI, MIRBuilder,
585        ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
586        FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
587    if (Status != Legalized)
588      return Status;
589    break;
590  }
591  }
592
593  MI.eraseFromParent();
594  return Legalized;
595}
596
597LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
598                                                              unsigned TypeIdx,
599                                                              LLT NarrowTy) {
600  MIRBuilder.setInstr(MI);
601
602  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
603  uint64_t NarrowSize = NarrowTy.getSizeInBits();
604
605  switch (MI.getOpcode()) {
606  default:
607    return UnableToLegalize;
608  case TargetOpcode::G_IMPLICIT_DEF: {
609    // FIXME: add support for when SizeOp0 isn't an exact multiple of
610    // NarrowSize.
611    if (SizeOp0 % NarrowSize != 0)
612      return UnableToLegalize;
613    int NumParts = SizeOp0 / NarrowSize;
614
615    SmallVector<Register, 2> DstRegs;
616    for (int i = 0; i < NumParts; ++i)
617      DstRegs.push_back(
618          MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
619
620    Register DstReg = MI.getOperand(0).getReg();
621    if(MRI.getType(DstReg).isVector())
622      MIRBuilder.buildBuildVector(DstReg, DstRegs);
623    else
624      MIRBuilder.buildMerge(DstReg, DstRegs);
625    MI.eraseFromParent();
626    return Legalized;
627  }
628  case TargetOpcode::G_CONSTANT: {
629    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
630    const APInt &Val = MI.getOperand(1).getCImm()->getValue();
631    unsigned TotalSize = Ty.getSizeInBits();
632    unsigned NarrowSize = NarrowTy.getSizeInBits();
633    int NumParts = TotalSize / NarrowSize;
634
635    SmallVector<Register, 4> PartRegs;
636    for (int I = 0; I != NumParts; ++I) {
637      unsigned Offset = I * NarrowSize;
638      auto K = MIRBuilder.buildConstant(NarrowTy,
639                                        Val.lshr(Offset).trunc(NarrowSize));
640      PartRegs.push_back(K.getReg(0));
641    }
642
643    LLT LeftoverTy;
644    unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
645    SmallVector<Register, 1> LeftoverRegs;
646    if (LeftoverBits != 0) {
647      LeftoverTy = LLT::scalar(LeftoverBits);
648      auto K = MIRBuilder.buildConstant(
649        LeftoverTy,
650        Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
651      LeftoverRegs.push_back(K.getReg(0));
652    }
653
654    insertParts(MI.getOperand(0).getReg(),
655                Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
656
657    MI.eraseFromParent();
658    return Legalized;
659  }
660  case TargetOpcode::G_SEXT: {
661    if (TypeIdx != 0)
662      return UnableToLegalize;
663
664    Register SrcReg = MI.getOperand(1).getReg();
665    LLT SrcTy = MRI.getType(SrcReg);
666
667    // FIXME: support the general case where the requested NarrowTy may not be
668    // the same as the source type. E.g. s128 = sext(s32)
669    if ((SrcTy.getSizeInBits() != SizeOp0 / 2) ||
670        SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) {
671      LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
672      return UnableToLegalize;
673    }
674
675    // Shift the sign bit of the low register through the high register.
676    auto ShiftAmt =
677        MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
678    auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
679    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
680    MI.eraseFromParent();
681    return Legalized;
682  }
683  case TargetOpcode::G_ZEXT: {
684    if (TypeIdx != 0)
685      return UnableToLegalize;
686
687    LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
688    uint64_t SizeOp1 = SrcTy.getSizeInBits();
689    if (SizeOp0 % SizeOp1 != 0)
690      return UnableToLegalize;
691
692    // Generate a merge where the bottom bits are taken from the source, and
693    // zero everything else.
694    Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0);
695    unsigned NumParts = SizeOp0 / SizeOp1;
696    SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()};
697    for (unsigned Part = 1; Part < NumParts; ++Part)
698      Srcs.push_back(ZeroReg);
699    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs);
700    MI.eraseFromParent();
701    return Legalized;
702  }
703  case TargetOpcode::G_TRUNC: {
704    if (TypeIdx != 1)
705      return UnableToLegalize;
706
707    uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
708    if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
709      LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
710      return UnableToLegalize;
711    }
712
713    auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
714    MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0));
715    MI.eraseFromParent();
716    return Legalized;
717  }
718
719  case TargetOpcode::G_ADD: {
720    // FIXME: add support for when SizeOp0 isn't an exact multiple of
721    // NarrowSize.
722    if (SizeOp0 % NarrowSize != 0)
723      return UnableToLegalize;
724    // Expand in terms of carry-setting/consuming G_ADDE instructions.
725    int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
726
727    SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
728    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
729    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
730
731    Register CarryIn;
732    for (int i = 0; i < NumParts; ++i) {
733      Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
734      Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
735
736      if (i == 0)
737        MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
738      else {
739        MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
740                              Src2Regs[i], CarryIn);
741      }
742
743      DstRegs.push_back(DstReg);
744      CarryIn = CarryOut;
745    }
746    Register DstReg = MI.getOperand(0).getReg();
747    if(MRI.getType(DstReg).isVector())
748      MIRBuilder.buildBuildVector(DstReg, DstRegs);
749    else
750      MIRBuilder.buildMerge(DstReg, DstRegs);
751    MI.eraseFromParent();
752    return Legalized;
753  }
754  case TargetOpcode::G_SUB: {
755    // FIXME: add support for when SizeOp0 isn't an exact multiple of
756    // NarrowSize.
757    if (SizeOp0 % NarrowSize != 0)
758      return UnableToLegalize;
759
760    int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
761
762    SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
763    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
764    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
765
766    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
767    Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
768    MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
769                          {Src1Regs[0], Src2Regs[0]});
770    DstRegs.push_back(DstReg);
771    Register BorrowIn = BorrowOut;
772    for (int i = 1; i < NumParts; ++i) {
773      DstReg = MRI.createGenericVirtualRegister(NarrowTy);
774      BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
775
776      MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
777                            {Src1Regs[i], Src2Regs[i], BorrowIn});
778
779      DstRegs.push_back(DstReg);
780      BorrowIn = BorrowOut;
781    }
782    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
783    MI.eraseFromParent();
784    return Legalized;
785  }
786  case TargetOpcode::G_MUL:
787  case TargetOpcode::G_UMULH:
788    return narrowScalarMul(MI, NarrowTy);
789  case TargetOpcode::G_EXTRACT:
790    return narrowScalarExtract(MI, TypeIdx, NarrowTy);
791  case TargetOpcode::G_INSERT:
792    return narrowScalarInsert(MI, TypeIdx, NarrowTy);
793  case TargetOpcode::G_LOAD: {
794    const auto &MMO = **MI.memoperands_begin();
795    Register DstReg = MI.getOperand(0).getReg();
796    LLT DstTy = MRI.getType(DstReg);
797    if (DstTy.isVector())
798      return UnableToLegalize;
799
800    if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
801      Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
802      auto &MMO = **MI.memoperands_begin();
803      MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
804      MIRBuilder.buildAnyExt(DstReg, TmpReg);
805      MI.eraseFromParent();
806      return Legalized;
807    }
808
809    return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
810  }
811  case TargetOpcode::G_ZEXTLOAD:
812  case TargetOpcode::G_SEXTLOAD: {
813    bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
814    Register DstReg = MI.getOperand(0).getReg();
815    Register PtrReg = MI.getOperand(1).getReg();
816
817    Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
818    auto &MMO = **MI.memoperands_begin();
819    if (MMO.getSizeInBits() == NarrowSize) {
820      MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
821    } else {
822      unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
823        : TargetOpcode::G_SEXTLOAD;
824      MIRBuilder.buildInstr(ExtLoad)
825        .addDef(TmpReg)
826        .addUse(PtrReg)
827        .addMemOperand(&MMO);
828    }
829
830    if (ZExt)
831      MIRBuilder.buildZExt(DstReg, TmpReg);
832    else
833      MIRBuilder.buildSExt(DstReg, TmpReg);
834
835    MI.eraseFromParent();
836    return Legalized;
837  }
838  case TargetOpcode::G_STORE: {
839    const auto &MMO = **MI.memoperands_begin();
840
841    Register SrcReg = MI.getOperand(0).getReg();
842    LLT SrcTy = MRI.getType(SrcReg);
843    if (SrcTy.isVector())
844      return UnableToLegalize;
845
846    int NumParts = SizeOp0 / NarrowSize;
847    unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
848    unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
849    if (SrcTy.isVector() && LeftoverBits != 0)
850      return UnableToLegalize;
851
852    if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
853      Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
854      auto &MMO = **MI.memoperands_begin();
855      MIRBuilder.buildTrunc(TmpReg, SrcReg);
856      MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
857      MI.eraseFromParent();
858      return Legalized;
859    }
860
861    return reduceLoadStoreWidth(MI, 0, NarrowTy);
862  }
863  case TargetOpcode::G_SELECT:
864    return narrowScalarSelect(MI, TypeIdx, NarrowTy);
865  case TargetOpcode::G_AND:
866  case TargetOpcode::G_OR:
867  case TargetOpcode::G_XOR: {
868    // Legalize bitwise operation:
869    // A = BinOp<Ty> B, C
870    // into:
871    // B1, ..., BN = G_UNMERGE_VALUES B
872    // C1, ..., CN = G_UNMERGE_VALUES C
873    // A1 = BinOp<Ty/N> B1, C2
874    // ...
875    // AN = BinOp<Ty/N> BN, CN
876    // A = G_MERGE_VALUES A1, ..., AN
877    return narrowScalarBasic(MI, TypeIdx, NarrowTy);
878  }
879  case TargetOpcode::G_SHL:
880  case TargetOpcode::G_LSHR:
881  case TargetOpcode::G_ASHR:
882    return narrowScalarShift(MI, TypeIdx, NarrowTy);
883  case TargetOpcode::G_CTLZ:
884  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
885  case TargetOpcode::G_CTTZ:
886  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
887  case TargetOpcode::G_CTPOP:
888    if (TypeIdx != 0)
889      return UnableToLegalize; // TODO
890
891    Observer.changingInstr(MI);
892    narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
893    Observer.changedInstr(MI);
894    return Legalized;
895  case TargetOpcode::G_INTTOPTR:
896    if (TypeIdx != 1)
897      return UnableToLegalize;
898
899    Observer.changingInstr(MI);
900    narrowScalarSrc(MI, NarrowTy, 1);
901    Observer.changedInstr(MI);
902    return Legalized;
903  case TargetOpcode::G_PTRTOINT:
904    if (TypeIdx != 0)
905      return UnableToLegalize;
906
907    Observer.changingInstr(MI);
908    narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
909    Observer.changedInstr(MI);
910    return Legalized;
911  case TargetOpcode::G_PHI: {
912    unsigned NumParts = SizeOp0 / NarrowSize;
913    SmallVector<Register, 2> DstRegs;
914    SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
915    DstRegs.resize(NumParts);
916    SrcRegs.resize(MI.getNumOperands() / 2);
917    Observer.changingInstr(MI);
918    for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
919      MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
920      MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
921      extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
922                   SrcRegs[i / 2]);
923    }
924    MachineBasicBlock &MBB = *MI.getParent();
925    MIRBuilder.setInsertPt(MBB, MI);
926    for (unsigned i = 0; i < NumParts; ++i) {
927      DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
928      MachineInstrBuilder MIB =
929          MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
930      for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
931        MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
932    }
933    MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
934    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
935    Observer.changedInstr(MI);
936    MI.eraseFromParent();
937    return Legalized;
938  }
939  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
940  case TargetOpcode::G_INSERT_VECTOR_ELT: {
941    if (TypeIdx != 2)
942      return UnableToLegalize;
943
944    int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
945    Observer.changingInstr(MI);
946    narrowScalarSrc(MI, NarrowTy, OpIdx);
947    Observer.changedInstr(MI);
948    return Legalized;
949  }
950  case TargetOpcode::G_ICMP: {
951    uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
952    if (NarrowSize * 2 != SrcSize)
953      return UnableToLegalize;
954
955    Observer.changingInstr(MI);
956    Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
957    Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
958    MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
959
960    Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
961    Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
962    MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
963
964    CmpInst::Predicate Pred =
965        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
966    LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
967
968    if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
969      MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
970      MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
971      MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
972      MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
973      MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
974    } else {
975      MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
976      MachineInstrBuilder CmpHEQ =
977          MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
978      MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
979          ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
980      MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
981    }
982    Observer.changedInstr(MI);
983    MI.eraseFromParent();
984    return Legalized;
985  }
986  case TargetOpcode::G_SEXT_INREG: {
987    if (TypeIdx != 0)
988      return UnableToLegalize;
989
990    if (!MI.getOperand(2).isImm())
991      return UnableToLegalize;
992    int64_t SizeInBits = MI.getOperand(2).getImm();
993
994    // So long as the new type has more bits than the bits we're extending we
995    // don't need to break it apart.
996    if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
997      Observer.changingInstr(MI);
998      // We don't lose any non-extension bits by truncating the src and
999      // sign-extending the dst.
1000      MachineOperand &MO1 = MI.getOperand(1);
1001      auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg());
1002      MO1.setReg(TruncMIB->getOperand(0).getReg());
1003
1004      MachineOperand &MO2 = MI.getOperand(0);
1005      Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1006      MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1007      MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt});
1008      MO2.setReg(DstExt);
1009      Observer.changedInstr(MI);
1010      return Legalized;
1011    }
1012
1013    // Break it apart. Components below the extension point are unmodified. The
1014    // component containing the extension point becomes a narrower SEXT_INREG.
1015    // Components above it are ashr'd from the component containing the
1016    // extension point.
1017    if (SizeOp0 % NarrowSize != 0)
1018      return UnableToLegalize;
1019    int NumParts = SizeOp0 / NarrowSize;
1020
1021    // List the registers where the destination will be scattered.
1022    SmallVector<Register, 2> DstRegs;
1023    // List the registers where the source will be split.
1024    SmallVector<Register, 2> SrcRegs;
1025
1026    // Create all the temporary registers.
1027    for (int i = 0; i < NumParts; ++i) {
1028      Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1029
1030      SrcRegs.push_back(SrcReg);
1031    }
1032
1033    // Explode the big arguments into smaller chunks.
1034    MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg());
1035
1036    Register AshrCstReg =
1037        MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1038            ->getOperand(0)
1039            .getReg();
1040    Register FullExtensionReg = 0;
1041    Register PartialExtensionReg = 0;
1042
1043    // Do the operation on each small part.
1044    for (int i = 0; i < NumParts; ++i) {
1045      if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1046        DstRegs.push_back(SrcRegs[i]);
1047      else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1048        assert(PartialExtensionReg &&
1049               "Expected to visit partial extension before full");
1050        if (FullExtensionReg) {
1051          DstRegs.push_back(FullExtensionReg);
1052          continue;
1053        }
1054        DstRegs.push_back(MIRBuilder
1055                              .buildInstr(TargetOpcode::G_ASHR, {NarrowTy},
1056                                          {PartialExtensionReg, AshrCstReg})
1057                              ->getOperand(0)
1058                              .getReg());
1059        FullExtensionReg = DstRegs.back();
1060      } else {
1061        DstRegs.push_back(
1062            MIRBuilder
1063                .buildInstr(
1064                    TargetOpcode::G_SEXT_INREG, {NarrowTy},
1065                    {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1066                ->getOperand(0)
1067                .getReg());
1068        PartialExtensionReg = DstRegs.back();
1069      }
1070    }
1071
1072    // Gather the destination registers into the final destination.
1073    Register DstReg = MI.getOperand(0).getReg();
1074    MIRBuilder.buildMerge(DstReg, DstRegs);
1075    MI.eraseFromParent();
1076    return Legalized;
1077  }
1078  case TargetOpcode::G_BSWAP:
1079  case TargetOpcode::G_BITREVERSE: {
1080    if (SizeOp0 % NarrowSize != 0)
1081      return UnableToLegalize;
1082
1083    Observer.changingInstr(MI);
1084    SmallVector<Register, 2> SrcRegs, DstRegs;
1085    unsigned NumParts = SizeOp0 / NarrowSize;
1086    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1087
1088    for (unsigned i = 0; i < NumParts; ++i) {
1089      auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1090                                           {SrcRegs[NumParts - 1 - i]});
1091      DstRegs.push_back(DstPart.getReg(0));
1092    }
1093
1094    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
1095
1096    Observer.changedInstr(MI);
1097    MI.eraseFromParent();
1098    return Legalized;
1099  }
1100  }
1101}
1102
1103void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1104                                     unsigned OpIdx, unsigned ExtOpcode) {
1105  MachineOperand &MO = MI.getOperand(OpIdx);
1106  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
1107  MO.setReg(ExtB->getOperand(0).getReg());
1108}
1109
1110void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1111                                      unsigned OpIdx) {
1112  MachineOperand &MO = MI.getOperand(OpIdx);
1113  auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
1114                                    {MO.getReg()});
1115  MO.setReg(ExtB->getOperand(0).getReg());
1116}
1117
1118void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1119                                     unsigned OpIdx, unsigned TruncOpcode) {
1120  MachineOperand &MO = MI.getOperand(OpIdx);
1121  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1122  MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1123  MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
1124  MO.setReg(DstExt);
1125}
1126
1127void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1128                                      unsigned OpIdx, unsigned ExtOpcode) {
1129  MachineOperand &MO = MI.getOperand(OpIdx);
1130  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1131  MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1132  MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
1133  MO.setReg(DstTrunc);
1134}
1135
1136void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1137                                            unsigned OpIdx) {
1138  MachineOperand &MO = MI.getOperand(OpIdx);
1139  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1140  MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1141  MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
1142  MO.setReg(DstExt);
1143}
1144
1145void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1146                                            unsigned OpIdx) {
1147  MachineOperand &MO = MI.getOperand(OpIdx);
1148
1149  LLT OldTy = MRI.getType(MO.getReg());
1150  unsigned OldElts = OldTy.getNumElements();
1151  unsigned NewElts = MoreTy.getNumElements();
1152
1153  unsigned NumParts = NewElts / OldElts;
1154
1155  // Use concat_vectors if the result is a multiple of the number of elements.
1156  if (NumParts * OldElts == NewElts) {
1157    SmallVector<Register, 8> Parts;
1158    Parts.push_back(MO.getReg());
1159
1160    Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1161    for (unsigned I = 1; I != NumParts; ++I)
1162      Parts.push_back(ImpDef);
1163
1164    auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1165    MO.setReg(Concat.getReg(0));
1166    return;
1167  }
1168
1169  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1170  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1171  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1172  MO.setReg(MoreReg);
1173}
1174
1175LegalizerHelper::LegalizeResult
1176LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1177                                        LLT WideTy) {
1178  if (TypeIdx != 1)
1179    return UnableToLegalize;
1180
1181  Register DstReg = MI.getOperand(0).getReg();
1182  LLT DstTy = MRI.getType(DstReg);
1183  if (DstTy.isVector())
1184    return UnableToLegalize;
1185
1186  Register Src1 = MI.getOperand(1).getReg();
1187  LLT SrcTy = MRI.getType(Src1);
1188  const int DstSize = DstTy.getSizeInBits();
1189  const int SrcSize = SrcTy.getSizeInBits();
1190  const int WideSize = WideTy.getSizeInBits();
1191  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1192
1193  unsigned NumOps = MI.getNumOperands();
1194  unsigned NumSrc = MI.getNumOperands() - 1;
1195  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1196
1197  if (WideSize >= DstSize) {
1198    // Directly pack the bits in the target type.
1199    Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1200
1201    for (unsigned I = 2; I != NumOps; ++I) {
1202      const unsigned Offset = (I - 1) * PartSize;
1203
1204      Register SrcReg = MI.getOperand(I).getReg();
1205      assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1206
1207      auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1208
1209      Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1210        MRI.createGenericVirtualRegister(WideTy);
1211
1212      auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1213      auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1214      MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1215      ResultReg = NextResult;
1216    }
1217
1218    if (WideSize > DstSize)
1219      MIRBuilder.buildTrunc(DstReg, ResultReg);
1220    else if (DstTy.isPointer())
1221      MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1222
1223    MI.eraseFromParent();
1224    return Legalized;
1225  }
1226
1227  // Unmerge the original values to the GCD type, and recombine to the next
1228  // multiple greater than the original type.
1229  //
1230  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1231  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1232  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1233  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1234  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1235  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1236  // %12:_(s12) = G_MERGE_VALUES %10, %11
1237  //
1238  // Padding with undef if necessary:
1239  //
1240  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1241  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1242  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1243  // %7:_(s2) = G_IMPLICIT_DEF
1244  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1245  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1246  // %10:_(s12) = G_MERGE_VALUES %8, %9
1247
1248  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1249  LLT GCDTy = LLT::scalar(GCD);
1250
1251  SmallVector<Register, 8> Parts;
1252  SmallVector<Register, 8> NewMergeRegs;
1253  SmallVector<Register, 8> Unmerges;
1254  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1255
1256  // Decompose the original operands if they don't evenly divide.
1257  for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1258    Register SrcReg = MI.getOperand(I).getReg();
1259    if (GCD == SrcSize) {
1260      Unmerges.push_back(SrcReg);
1261    } else {
1262      auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1263      for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1264        Unmerges.push_back(Unmerge.getReg(J));
1265    }
1266  }
1267
1268  // Pad with undef to the next size that is a multiple of the requested size.
1269  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1270    Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1271    for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1272      Unmerges.push_back(UndefReg);
1273  }
1274
1275  const int PartsPerGCD = WideSize / GCD;
1276
1277  // Build merges of each piece.
1278  ArrayRef<Register> Slicer(Unmerges);
1279  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1280    auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1281    NewMergeRegs.push_back(Merge.getReg(0));
1282  }
1283
1284  // A truncate may be necessary if the requested type doesn't evenly divide the
1285  // original result type.
1286  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1287    MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1288  } else {
1289    auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1290    MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1291  }
1292
1293  MI.eraseFromParent();
1294  return Legalized;
1295}
1296
1297LegalizerHelper::LegalizeResult
1298LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1299                                          LLT WideTy) {
1300  if (TypeIdx != 0)
1301    return UnableToLegalize;
1302
1303  unsigned NumDst = MI.getNumOperands() - 1;
1304  Register SrcReg = MI.getOperand(NumDst).getReg();
1305  LLT SrcTy = MRI.getType(SrcReg);
1306  if (!SrcTy.isScalar())
1307    return UnableToLegalize;
1308
1309  Register Dst0Reg = MI.getOperand(0).getReg();
1310  LLT DstTy = MRI.getType(Dst0Reg);
1311  if (!DstTy.isScalar())
1312    return UnableToLegalize;
1313
1314  unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
1315  LLT NewSrcTy = LLT::scalar(NewSrcSize);
1316  unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
1317
1318  auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
1319
1320  for (unsigned I = 1; I != NumDst; ++I) {
1321    auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
1322    auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
1323    WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
1324  }
1325
1326  Observer.changingInstr(MI);
1327
1328  MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
1329  for (unsigned I = 0; I != NumDst; ++I)
1330    widenScalarDst(MI, WideTy, I);
1331
1332  Observer.changedInstr(MI);
1333
1334  return Legalized;
1335}
1336
1337LegalizerHelper::LegalizeResult
1338LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1339                                    LLT WideTy) {
1340  Register DstReg = MI.getOperand(0).getReg();
1341  Register SrcReg = MI.getOperand(1).getReg();
1342  LLT SrcTy = MRI.getType(SrcReg);
1343
1344  LLT DstTy = MRI.getType(DstReg);
1345  unsigned Offset = MI.getOperand(2).getImm();
1346
1347  if (TypeIdx == 0) {
1348    if (SrcTy.isVector() || DstTy.isVector())
1349      return UnableToLegalize;
1350
1351    SrcOp Src(SrcReg);
1352    if (SrcTy.isPointer()) {
1353      // Extracts from pointers can be handled only if they are really just
1354      // simple integers.
1355      const DataLayout &DL = MIRBuilder.getDataLayout();
1356      if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1357        return UnableToLegalize;
1358
1359      LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1360      Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1361      SrcTy = SrcAsIntTy;
1362    }
1363
1364    if (DstTy.isPointer())
1365      return UnableToLegalize;
1366
1367    if (Offset == 0) {
1368      // Avoid a shift in the degenerate case.
1369      MIRBuilder.buildTrunc(DstReg,
1370                            MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1371      MI.eraseFromParent();
1372      return Legalized;
1373    }
1374
1375    // Do a shift in the source type.
1376    LLT ShiftTy = SrcTy;
1377    if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1378      Src = MIRBuilder.buildAnyExt(WideTy, Src);
1379      ShiftTy = WideTy;
1380    } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1381      return UnableToLegalize;
1382
1383    auto LShr = MIRBuilder.buildLShr(
1384      ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1385    MIRBuilder.buildTrunc(DstReg, LShr);
1386    MI.eraseFromParent();
1387    return Legalized;
1388  }
1389
1390  if (SrcTy.isScalar()) {
1391    Observer.changingInstr(MI);
1392    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1393    Observer.changedInstr(MI);
1394    return Legalized;
1395  }
1396
1397  if (!SrcTy.isVector())
1398    return UnableToLegalize;
1399
1400  if (DstTy != SrcTy.getElementType())
1401    return UnableToLegalize;
1402
1403  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1404    return UnableToLegalize;
1405
1406  Observer.changingInstr(MI);
1407  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1408
1409  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1410                          Offset);
1411  widenScalarDst(MI, WideTy.getScalarType(), 0);
1412  Observer.changedInstr(MI);
1413  return Legalized;
1414}
1415
1416LegalizerHelper::LegalizeResult
1417LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1418                                   LLT WideTy) {
1419  if (TypeIdx != 0)
1420    return UnableToLegalize;
1421  Observer.changingInstr(MI);
1422  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1423  widenScalarDst(MI, WideTy);
1424  Observer.changedInstr(MI);
1425  return Legalized;
1426}
1427
1428LegalizerHelper::LegalizeResult
1429LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1430  MIRBuilder.setInstr(MI);
1431
1432  switch (MI.getOpcode()) {
1433  default:
1434    return UnableToLegalize;
1435  case TargetOpcode::G_EXTRACT:
1436    return widenScalarExtract(MI, TypeIdx, WideTy);
1437  case TargetOpcode::G_INSERT:
1438    return widenScalarInsert(MI, TypeIdx, WideTy);
1439  case TargetOpcode::G_MERGE_VALUES:
1440    return widenScalarMergeValues(MI, TypeIdx, WideTy);
1441  case TargetOpcode::G_UNMERGE_VALUES:
1442    return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1443  case TargetOpcode::G_UADDO:
1444  case TargetOpcode::G_USUBO: {
1445    if (TypeIdx == 1)
1446      return UnableToLegalize; // TODO
1447    auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1448                                         {MI.getOperand(2).getReg()});
1449    auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1450                                         {MI.getOperand(3).getReg()});
1451    unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1452                          ? TargetOpcode::G_ADD
1453                          : TargetOpcode::G_SUB;
1454    // Do the arithmetic in the larger type.
1455    auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1456    LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1457    APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
1458    auto AndOp = MIRBuilder.buildInstr(
1459        TargetOpcode::G_AND, {WideTy},
1460        {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
1461    // There is no overflow if the AndOp is the same as NewOp.
1462    MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
1463                         AndOp);
1464    // Now trunc the NewOp to the original result.
1465    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
1466    MI.eraseFromParent();
1467    return Legalized;
1468  }
1469  case TargetOpcode::G_CTTZ:
1470  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1471  case TargetOpcode::G_CTLZ:
1472  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1473  case TargetOpcode::G_CTPOP: {
1474    if (TypeIdx == 0) {
1475      Observer.changingInstr(MI);
1476      widenScalarDst(MI, WideTy, 0);
1477      Observer.changedInstr(MI);
1478      return Legalized;
1479    }
1480
1481    Register SrcReg = MI.getOperand(1).getReg();
1482
1483    // First ZEXT the input.
1484    auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1485    LLT CurTy = MRI.getType(SrcReg);
1486    if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1487      // The count is the same in the larger type except if the original
1488      // value was zero.  This can be handled by setting the bit just off
1489      // the top of the original type.
1490      auto TopBit =
1491          APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1492      MIBSrc = MIRBuilder.buildOr(
1493        WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1494    }
1495
1496    // Perform the operation at the larger size.
1497    auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1498    // This is already the correct result for CTPOP and CTTZs
1499    if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1500        MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1501      // The correct result is NewOp - (Difference in widety and current ty).
1502      unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1503      MIBNewOp = MIRBuilder.buildInstr(
1504          TargetOpcode::G_SUB, {WideTy},
1505          {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1506    }
1507
1508    MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1509    MI.eraseFromParent();
1510    return Legalized;
1511  }
1512  case TargetOpcode::G_BSWAP: {
1513    Observer.changingInstr(MI);
1514    Register DstReg = MI.getOperand(0).getReg();
1515
1516    Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1517    Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1518    Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1519    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1520
1521    MI.getOperand(0).setReg(DstExt);
1522
1523    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1524
1525    LLT Ty = MRI.getType(DstReg);
1526    unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1527    MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1528    MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1529      .addDef(ShrReg)
1530      .addUse(DstExt)
1531      .addUse(ShiftAmtReg);
1532
1533    MIRBuilder.buildTrunc(DstReg, ShrReg);
1534    Observer.changedInstr(MI);
1535    return Legalized;
1536  }
1537  case TargetOpcode::G_BITREVERSE: {
1538    Observer.changingInstr(MI);
1539
1540    Register DstReg = MI.getOperand(0).getReg();
1541    LLT Ty = MRI.getType(DstReg);
1542    unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1543
1544    Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1545    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1546    MI.getOperand(0).setReg(DstExt);
1547    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1548
1549    auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1550    auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1551    MIRBuilder.buildTrunc(DstReg, Shift);
1552    Observer.changedInstr(MI);
1553    return Legalized;
1554  }
1555  case TargetOpcode::G_ADD:
1556  case TargetOpcode::G_AND:
1557  case TargetOpcode::G_MUL:
1558  case TargetOpcode::G_OR:
1559  case TargetOpcode::G_XOR:
1560  case TargetOpcode::G_SUB:
1561    // Perform operation at larger width (any extension is fines here, high bits
1562    // don't affect the result) and then truncate the result back to the
1563    // original type.
1564    Observer.changingInstr(MI);
1565    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1566    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1567    widenScalarDst(MI, WideTy);
1568    Observer.changedInstr(MI);
1569    return Legalized;
1570
1571  case TargetOpcode::G_SHL:
1572    Observer.changingInstr(MI);
1573
1574    if (TypeIdx == 0) {
1575      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1576      widenScalarDst(MI, WideTy);
1577    } else {
1578      assert(TypeIdx == 1);
1579      // The "number of bits to shift" operand must preserve its value as an
1580      // unsigned integer:
1581      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1582    }
1583
1584    Observer.changedInstr(MI);
1585    return Legalized;
1586
1587  case TargetOpcode::G_SDIV:
1588  case TargetOpcode::G_SREM:
1589  case TargetOpcode::G_SMIN:
1590  case TargetOpcode::G_SMAX:
1591    Observer.changingInstr(MI);
1592    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1593    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1594    widenScalarDst(MI, WideTy);
1595    Observer.changedInstr(MI);
1596    return Legalized;
1597
1598  case TargetOpcode::G_ASHR:
1599  case TargetOpcode::G_LSHR:
1600    Observer.changingInstr(MI);
1601
1602    if (TypeIdx == 0) {
1603      unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1604        TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1605
1606      widenScalarSrc(MI, WideTy, 1, CvtOp);
1607      widenScalarDst(MI, WideTy);
1608    } else {
1609      assert(TypeIdx == 1);
1610      // The "number of bits to shift" operand must preserve its value as an
1611      // unsigned integer:
1612      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1613    }
1614
1615    Observer.changedInstr(MI);
1616    return Legalized;
1617  case TargetOpcode::G_UDIV:
1618  case TargetOpcode::G_UREM:
1619  case TargetOpcode::G_UMIN:
1620  case TargetOpcode::G_UMAX:
1621    Observer.changingInstr(MI);
1622    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1623    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1624    widenScalarDst(MI, WideTy);
1625    Observer.changedInstr(MI);
1626    return Legalized;
1627
1628  case TargetOpcode::G_SELECT:
1629    Observer.changingInstr(MI);
1630    if (TypeIdx == 0) {
1631      // Perform operation at larger width (any extension is fine here, high
1632      // bits don't affect the result) and then truncate the result back to the
1633      // original type.
1634      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1635      widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1636      widenScalarDst(MI, WideTy);
1637    } else {
1638      bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1639      // Explicit extension is required here since high bits affect the result.
1640      widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1641    }
1642    Observer.changedInstr(MI);
1643    return Legalized;
1644
1645  case TargetOpcode::G_FPTOSI:
1646  case TargetOpcode::G_FPTOUI:
1647    Observer.changingInstr(MI);
1648
1649    if (TypeIdx == 0)
1650      widenScalarDst(MI, WideTy);
1651    else
1652      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
1653
1654    Observer.changedInstr(MI);
1655    return Legalized;
1656  case TargetOpcode::G_SITOFP:
1657    if (TypeIdx != 1)
1658      return UnableToLegalize;
1659    Observer.changingInstr(MI);
1660    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1661    Observer.changedInstr(MI);
1662    return Legalized;
1663
1664  case TargetOpcode::G_UITOFP:
1665    if (TypeIdx != 1)
1666      return UnableToLegalize;
1667    Observer.changingInstr(MI);
1668    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1669    Observer.changedInstr(MI);
1670    return Legalized;
1671
1672  case TargetOpcode::G_LOAD:
1673  case TargetOpcode::G_SEXTLOAD:
1674  case TargetOpcode::G_ZEXTLOAD:
1675    Observer.changingInstr(MI);
1676    widenScalarDst(MI, WideTy);
1677    Observer.changedInstr(MI);
1678    return Legalized;
1679
1680  case TargetOpcode::G_STORE: {
1681    if (TypeIdx != 0)
1682      return UnableToLegalize;
1683
1684    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1685    if (!isPowerOf2_32(Ty.getSizeInBits()))
1686      return UnableToLegalize;
1687
1688    Observer.changingInstr(MI);
1689
1690    unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1691      TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1692    widenScalarSrc(MI, WideTy, 0, ExtType);
1693
1694    Observer.changedInstr(MI);
1695    return Legalized;
1696  }
1697  case TargetOpcode::G_CONSTANT: {
1698    MachineOperand &SrcMO = MI.getOperand(1);
1699    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1700    unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
1701        MRI.getType(MI.getOperand(0).getReg()));
1702    assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
1703            ExtOpc == TargetOpcode::G_ANYEXT) &&
1704           "Illegal Extend");
1705    const APInt &SrcVal = SrcMO.getCImm()->getValue();
1706    const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
1707                           ? SrcVal.sext(WideTy.getSizeInBits())
1708                           : SrcVal.zext(WideTy.getSizeInBits());
1709    Observer.changingInstr(MI);
1710    SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1711
1712    widenScalarDst(MI, WideTy);
1713    Observer.changedInstr(MI);
1714    return Legalized;
1715  }
1716  case TargetOpcode::G_FCONSTANT: {
1717    MachineOperand &SrcMO = MI.getOperand(1);
1718    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1719    APFloat Val = SrcMO.getFPImm()->getValueAPF();
1720    bool LosesInfo;
1721    switch (WideTy.getSizeInBits()) {
1722    case 32:
1723      Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1724                  &LosesInfo);
1725      break;
1726    case 64:
1727      Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1728                  &LosesInfo);
1729      break;
1730    default:
1731      return UnableToLegalize;
1732    }
1733
1734    assert(!LosesInfo && "extend should always be lossless");
1735
1736    Observer.changingInstr(MI);
1737    SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1738
1739    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1740    Observer.changedInstr(MI);
1741    return Legalized;
1742  }
1743  case TargetOpcode::G_IMPLICIT_DEF: {
1744    Observer.changingInstr(MI);
1745    widenScalarDst(MI, WideTy);
1746    Observer.changedInstr(MI);
1747    return Legalized;
1748  }
1749  case TargetOpcode::G_BRCOND:
1750    Observer.changingInstr(MI);
1751    widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1752    Observer.changedInstr(MI);
1753    return Legalized;
1754
1755  case TargetOpcode::G_FCMP:
1756    Observer.changingInstr(MI);
1757    if (TypeIdx == 0)
1758      widenScalarDst(MI, WideTy);
1759    else {
1760      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1761      widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1762    }
1763    Observer.changedInstr(MI);
1764    return Legalized;
1765
1766  case TargetOpcode::G_ICMP:
1767    Observer.changingInstr(MI);
1768    if (TypeIdx == 0)
1769      widenScalarDst(MI, WideTy);
1770    else {
1771      unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1772                               MI.getOperand(1).getPredicate()))
1773                               ? TargetOpcode::G_SEXT
1774                               : TargetOpcode::G_ZEXT;
1775      widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1776      widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1777    }
1778    Observer.changedInstr(MI);
1779    return Legalized;
1780
1781  case TargetOpcode::G_PTR_ADD:
1782    assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
1783    Observer.changingInstr(MI);
1784    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1785    Observer.changedInstr(MI);
1786    return Legalized;
1787
1788  case TargetOpcode::G_PHI: {
1789    assert(TypeIdx == 0 && "Expecting only Idx 0");
1790
1791    Observer.changingInstr(MI);
1792    for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1793      MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1794      MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1795      widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1796    }
1797
1798    MachineBasicBlock &MBB = *MI.getParent();
1799    MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1800    widenScalarDst(MI, WideTy);
1801    Observer.changedInstr(MI);
1802    return Legalized;
1803  }
1804  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1805    if (TypeIdx == 0) {
1806      Register VecReg = MI.getOperand(1).getReg();
1807      LLT VecTy = MRI.getType(VecReg);
1808      Observer.changingInstr(MI);
1809
1810      widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1811                                     WideTy.getSizeInBits()),
1812                     1, TargetOpcode::G_SEXT);
1813
1814      widenScalarDst(MI, WideTy, 0);
1815      Observer.changedInstr(MI);
1816      return Legalized;
1817    }
1818
1819    if (TypeIdx != 2)
1820      return UnableToLegalize;
1821    Observer.changingInstr(MI);
1822    // TODO: Probably should be zext
1823    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1824    Observer.changedInstr(MI);
1825    return Legalized;
1826  }
1827  case TargetOpcode::G_INSERT_VECTOR_ELT: {
1828    if (TypeIdx == 1) {
1829      Observer.changingInstr(MI);
1830
1831      Register VecReg = MI.getOperand(1).getReg();
1832      LLT VecTy = MRI.getType(VecReg);
1833      LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
1834
1835      widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
1836      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1837      widenScalarDst(MI, WideVecTy, 0);
1838      Observer.changedInstr(MI);
1839      return Legalized;
1840    }
1841
1842    if (TypeIdx == 2) {
1843      Observer.changingInstr(MI);
1844      // TODO: Probably should be zext
1845      widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
1846      Observer.changedInstr(MI);
1847    }
1848
1849    return Legalized;
1850  }
1851  case TargetOpcode::G_FADD:
1852  case TargetOpcode::G_FMUL:
1853  case TargetOpcode::G_FSUB:
1854  case TargetOpcode::G_FMA:
1855  case TargetOpcode::G_FMAD:
1856  case TargetOpcode::G_FNEG:
1857  case TargetOpcode::G_FABS:
1858  case TargetOpcode::G_FCANONICALIZE:
1859  case TargetOpcode::G_FMINNUM:
1860  case TargetOpcode::G_FMAXNUM:
1861  case TargetOpcode::G_FMINNUM_IEEE:
1862  case TargetOpcode::G_FMAXNUM_IEEE:
1863  case TargetOpcode::G_FMINIMUM:
1864  case TargetOpcode::G_FMAXIMUM:
1865  case TargetOpcode::G_FDIV:
1866  case TargetOpcode::G_FREM:
1867  case TargetOpcode::G_FCEIL:
1868  case TargetOpcode::G_FFLOOR:
1869  case TargetOpcode::G_FCOS:
1870  case TargetOpcode::G_FSIN:
1871  case TargetOpcode::G_FLOG10:
1872  case TargetOpcode::G_FLOG:
1873  case TargetOpcode::G_FLOG2:
1874  case TargetOpcode::G_FRINT:
1875  case TargetOpcode::G_FNEARBYINT:
1876  case TargetOpcode::G_FSQRT:
1877  case TargetOpcode::G_FEXP:
1878  case TargetOpcode::G_FEXP2:
1879  case TargetOpcode::G_FPOW:
1880  case TargetOpcode::G_INTRINSIC_TRUNC:
1881  case TargetOpcode::G_INTRINSIC_ROUND:
1882    assert(TypeIdx == 0);
1883    Observer.changingInstr(MI);
1884
1885    for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1886      widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1887
1888    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1889    Observer.changedInstr(MI);
1890    return Legalized;
1891  case TargetOpcode::G_INTTOPTR:
1892    if (TypeIdx != 1)
1893      return UnableToLegalize;
1894
1895    Observer.changingInstr(MI);
1896    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1897    Observer.changedInstr(MI);
1898    return Legalized;
1899  case TargetOpcode::G_PTRTOINT:
1900    if (TypeIdx != 0)
1901      return UnableToLegalize;
1902
1903    Observer.changingInstr(MI);
1904    widenScalarDst(MI, WideTy, 0);
1905    Observer.changedInstr(MI);
1906    return Legalized;
1907  case TargetOpcode::G_BUILD_VECTOR: {
1908    Observer.changingInstr(MI);
1909
1910    const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
1911    for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
1912      widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
1913
1914    // Avoid changing the result vector type if the source element type was
1915    // requested.
1916    if (TypeIdx == 1) {
1917      auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1918      MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
1919    } else {
1920      widenScalarDst(MI, WideTy, 0);
1921    }
1922
1923    Observer.changedInstr(MI);
1924    return Legalized;
1925  }
1926  case TargetOpcode::G_SEXT_INREG:
1927    if (TypeIdx != 0)
1928      return UnableToLegalize;
1929
1930    Observer.changingInstr(MI);
1931    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1932    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
1933    Observer.changedInstr(MI);
1934    return Legalized;
1935  }
1936}
1937
1938LegalizerHelper::LegalizeResult
1939LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1940  using namespace TargetOpcode;
1941  MIRBuilder.setInstr(MI);
1942
1943  switch(MI.getOpcode()) {
1944  default:
1945    return UnableToLegalize;
1946  case TargetOpcode::G_SREM:
1947  case TargetOpcode::G_UREM: {
1948    Register QuotReg = MRI.createGenericVirtualRegister(Ty);
1949    MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1950        .addDef(QuotReg)
1951        .addUse(MI.getOperand(1).getReg())
1952        .addUse(MI.getOperand(2).getReg());
1953
1954    Register ProdReg = MRI.createGenericVirtualRegister(Ty);
1955    MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1956    MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1957                        ProdReg);
1958    MI.eraseFromParent();
1959    return Legalized;
1960  }
1961  case TargetOpcode::G_SADDO:
1962  case TargetOpcode::G_SSUBO:
1963    return lowerSADDO_SSUBO(MI);
1964  case TargetOpcode::G_SMULO:
1965  case TargetOpcode::G_UMULO: {
1966    // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1967    // result.
1968    Register Res = MI.getOperand(0).getReg();
1969    Register Overflow = MI.getOperand(1).getReg();
1970    Register LHS = MI.getOperand(2).getReg();
1971    Register RHS = MI.getOperand(3).getReg();
1972
1973    MIRBuilder.buildMul(Res, LHS, RHS);
1974
1975    unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1976                          ? TargetOpcode::G_SMULH
1977                          : TargetOpcode::G_UMULH;
1978
1979    Register HiPart = MRI.createGenericVirtualRegister(Ty);
1980    MIRBuilder.buildInstr(Opcode)
1981      .addDef(HiPart)
1982      .addUse(LHS)
1983      .addUse(RHS);
1984
1985    Register Zero = MRI.createGenericVirtualRegister(Ty);
1986    MIRBuilder.buildConstant(Zero, 0);
1987
1988    // For *signed* multiply, overflow is detected by checking:
1989    // (hi != (lo >> bitwidth-1))
1990    if (Opcode == TargetOpcode::G_SMULH) {
1991      Register Shifted = MRI.createGenericVirtualRegister(Ty);
1992      Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1993      MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1994      MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1995        .addDef(Shifted)
1996        .addUse(Res)
1997        .addUse(ShiftAmt);
1998      MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1999    } else {
2000      MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2001    }
2002    MI.eraseFromParent();
2003    return Legalized;
2004  }
2005  case TargetOpcode::G_FNEG: {
2006    // TODO: Handle vector types once we are able to
2007    // represent them.
2008    if (Ty.isVector())
2009      return UnableToLegalize;
2010    Register Res = MI.getOperand(0).getReg();
2011    Type *ZeroTy;
2012    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2013    switch (Ty.getSizeInBits()) {
2014    case 16:
2015      ZeroTy = Type::getHalfTy(Ctx);
2016      break;
2017    case 32:
2018      ZeroTy = Type::getFloatTy(Ctx);
2019      break;
2020    case 64:
2021      ZeroTy = Type::getDoubleTy(Ctx);
2022      break;
2023    case 128:
2024      ZeroTy = Type::getFP128Ty(Ctx);
2025      break;
2026    default:
2027      llvm_unreachable("unexpected floating-point type");
2028    }
2029    ConstantFP &ZeroForNegation =
2030        *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
2031    auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
2032    Register SubByReg = MI.getOperand(1).getReg();
2033    Register ZeroReg = Zero->getOperand(0).getReg();
2034    MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
2035                          MI.getFlags());
2036    MI.eraseFromParent();
2037    return Legalized;
2038  }
2039  case TargetOpcode::G_FSUB: {
2040    // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2041    // First, check if G_FNEG is marked as Lower. If so, we may
2042    // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2043    if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2044      return UnableToLegalize;
2045    Register Res = MI.getOperand(0).getReg();
2046    Register LHS = MI.getOperand(1).getReg();
2047    Register RHS = MI.getOperand(2).getReg();
2048    Register Neg = MRI.createGenericVirtualRegister(Ty);
2049    MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
2050    MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
2051    MI.eraseFromParent();
2052    return Legalized;
2053  }
2054  case TargetOpcode::G_FMAD:
2055    return lowerFMad(MI);
2056  case TargetOpcode::G_INTRINSIC_ROUND:
2057    return lowerIntrinsicRound(MI);
2058  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2059    Register OldValRes = MI.getOperand(0).getReg();
2060    Register SuccessRes = MI.getOperand(1).getReg();
2061    Register Addr = MI.getOperand(2).getReg();
2062    Register CmpVal = MI.getOperand(3).getReg();
2063    Register NewVal = MI.getOperand(4).getReg();
2064    MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2065                                  **MI.memoperands_begin());
2066    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2067    MI.eraseFromParent();
2068    return Legalized;
2069  }
2070  case TargetOpcode::G_LOAD:
2071  case TargetOpcode::G_SEXTLOAD:
2072  case TargetOpcode::G_ZEXTLOAD: {
2073    // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2074    Register DstReg = MI.getOperand(0).getReg();
2075    Register PtrReg = MI.getOperand(1).getReg();
2076    LLT DstTy = MRI.getType(DstReg);
2077    auto &MMO = **MI.memoperands_begin();
2078
2079    if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2080      if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2081        // This load needs splitting into power of 2 sized loads.
2082        if (DstTy.isVector())
2083          return UnableToLegalize;
2084        if (isPowerOf2_32(DstTy.getSizeInBits()))
2085          return UnableToLegalize; // Don't know what we're being asked to do.
2086
2087        // Our strategy here is to generate anyextending loads for the smaller
2088        // types up to next power-2 result type, and then combine the two larger
2089        // result values together, before truncating back down to the non-pow-2
2090        // type.
2091        // E.g. v1 = i24 load =>
2092        // v2 = i32 load (2 byte)
2093        // v3 = i32 load (1 byte)
2094        // v4 = i32 shl v3, 16
2095        // v5 = i32 or v4, v2
2096        // v1 = i24 trunc v5
2097        // By doing this we generate the correct truncate which should get
2098        // combined away as an artifact with a matching extend.
2099        uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2100        uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2101
2102        MachineFunction &MF = MIRBuilder.getMF();
2103        MachineMemOperand *LargeMMO =
2104            MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2105        MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2106            &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2107
2108        LLT PtrTy = MRI.getType(PtrReg);
2109        unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2110        LLT AnyExtTy = LLT::scalar(AnyExtSize);
2111        Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2112        Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2113        auto LargeLoad =
2114            MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
2115
2116        auto OffsetCst =
2117            MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2118        Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2119        auto SmallPtr =
2120            MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2121        auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2122                                              *SmallMMO);
2123
2124        auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2125        auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2126        auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2127        MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2128        MI.eraseFromParent();
2129        return Legalized;
2130      }
2131      MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2132      MI.eraseFromParent();
2133      return Legalized;
2134    }
2135
2136    if (DstTy.isScalar()) {
2137      Register TmpReg =
2138          MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2139      MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2140      switch (MI.getOpcode()) {
2141      default:
2142        llvm_unreachable("Unexpected opcode");
2143      case TargetOpcode::G_LOAD:
2144        MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
2145        break;
2146      case TargetOpcode::G_SEXTLOAD:
2147        MIRBuilder.buildSExt(DstReg, TmpReg);
2148        break;
2149      case TargetOpcode::G_ZEXTLOAD:
2150        MIRBuilder.buildZExt(DstReg, TmpReg);
2151        break;
2152      }
2153      MI.eraseFromParent();
2154      return Legalized;
2155    }
2156
2157    return UnableToLegalize;
2158  }
2159  case TargetOpcode::G_STORE: {
2160    // Lower a non-power of 2 store into multiple pow-2 stores.
2161    // E.g. split an i24 store into an i16 store + i8 store.
2162    // We do this by first extending the stored value to the next largest power
2163    // of 2 type, and then using truncating stores to store the components.
2164    // By doing this, likewise with G_LOAD, generate an extend that can be
2165    // artifact-combined away instead of leaving behind extracts.
2166    Register SrcReg = MI.getOperand(0).getReg();
2167    Register PtrReg = MI.getOperand(1).getReg();
2168    LLT SrcTy = MRI.getType(SrcReg);
2169    MachineMemOperand &MMO = **MI.memoperands_begin();
2170    if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2171      return UnableToLegalize;
2172    if (SrcTy.isVector())
2173      return UnableToLegalize;
2174    if (isPowerOf2_32(SrcTy.getSizeInBits()))
2175      return UnableToLegalize; // Don't know what we're being asked to do.
2176
2177    // Extend to the next pow-2.
2178    const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2179    auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2180
2181    // Obtain the smaller value by shifting away the larger value.
2182    uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2183    uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2184    auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2185    auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2186
2187    // Generate the PtrAdd and truncating stores.
2188    LLT PtrTy = MRI.getType(PtrReg);
2189    auto OffsetCst =
2190        MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2191    Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2192    auto SmallPtr =
2193        MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2194
2195    MachineFunction &MF = MIRBuilder.getMF();
2196    MachineMemOperand *LargeMMO =
2197        MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2198    MachineMemOperand *SmallMMO =
2199        MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2200    MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2201    MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2202    MI.eraseFromParent();
2203    return Legalized;
2204  }
2205  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2206  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2207  case TargetOpcode::G_CTLZ:
2208  case TargetOpcode::G_CTTZ:
2209  case TargetOpcode::G_CTPOP:
2210    return lowerBitCount(MI, TypeIdx, Ty);
2211  case G_UADDO: {
2212    Register Res = MI.getOperand(0).getReg();
2213    Register CarryOut = MI.getOperand(1).getReg();
2214    Register LHS = MI.getOperand(2).getReg();
2215    Register RHS = MI.getOperand(3).getReg();
2216
2217    MIRBuilder.buildAdd(Res, LHS, RHS);
2218    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2219
2220    MI.eraseFromParent();
2221    return Legalized;
2222  }
2223  case G_UADDE: {
2224    Register Res = MI.getOperand(0).getReg();
2225    Register CarryOut = MI.getOperand(1).getReg();
2226    Register LHS = MI.getOperand(2).getReg();
2227    Register RHS = MI.getOperand(3).getReg();
2228    Register CarryIn = MI.getOperand(4).getReg();
2229
2230    Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2231    Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
2232
2233    MIRBuilder.buildAdd(TmpRes, LHS, RHS);
2234    MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
2235    MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
2236    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
2237
2238    MI.eraseFromParent();
2239    return Legalized;
2240  }
2241  case G_USUBO: {
2242    Register Res = MI.getOperand(0).getReg();
2243    Register BorrowOut = MI.getOperand(1).getReg();
2244    Register LHS = MI.getOperand(2).getReg();
2245    Register RHS = MI.getOperand(3).getReg();
2246
2247    MIRBuilder.buildSub(Res, LHS, RHS);
2248    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
2249
2250    MI.eraseFromParent();
2251    return Legalized;
2252  }
2253  case G_USUBE: {
2254    Register Res = MI.getOperand(0).getReg();
2255    Register BorrowOut = MI.getOperand(1).getReg();
2256    Register LHS = MI.getOperand(2).getReg();
2257    Register RHS = MI.getOperand(3).getReg();
2258    Register BorrowIn = MI.getOperand(4).getReg();
2259
2260    Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2261    Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
2262    Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2263    Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2264
2265    MIRBuilder.buildSub(TmpRes, LHS, RHS);
2266    MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
2267    MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
2268    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
2269    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
2270    MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
2271
2272    MI.eraseFromParent();
2273    return Legalized;
2274  }
2275  case G_UITOFP:
2276    return lowerUITOFP(MI, TypeIdx, Ty);
2277  case G_SITOFP:
2278    return lowerSITOFP(MI, TypeIdx, Ty);
2279  case G_FPTOUI:
2280    return lowerFPTOUI(MI, TypeIdx, Ty);
2281  case G_SMIN:
2282  case G_SMAX:
2283  case G_UMIN:
2284  case G_UMAX:
2285    return lowerMinMax(MI, TypeIdx, Ty);
2286  case G_FCOPYSIGN:
2287    return lowerFCopySign(MI, TypeIdx, Ty);
2288  case G_FMINNUM:
2289  case G_FMAXNUM:
2290    return lowerFMinNumMaxNum(MI);
2291  case G_UNMERGE_VALUES:
2292    return lowerUnmergeValues(MI);
2293  case TargetOpcode::G_SEXT_INREG: {
2294    assert(MI.getOperand(2).isImm() && "Expected immediate");
2295    int64_t SizeInBits = MI.getOperand(2).getImm();
2296
2297    Register DstReg = MI.getOperand(0).getReg();
2298    Register SrcReg = MI.getOperand(1).getReg();
2299    LLT DstTy = MRI.getType(DstReg);
2300    Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
2301
2302    auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
2303    MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()});
2304    MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()});
2305    MI.eraseFromParent();
2306    return Legalized;
2307  }
2308  case G_SHUFFLE_VECTOR:
2309    return lowerShuffleVector(MI);
2310  case G_DYN_STACKALLOC:
2311    return lowerDynStackAlloc(MI);
2312  case G_EXTRACT:
2313    return lowerExtract(MI);
2314  case G_INSERT:
2315    return lowerInsert(MI);
2316  case G_BSWAP:
2317    return lowerBswap(MI);
2318  case G_BITREVERSE:
2319    return lowerBitreverse(MI);
2320  case G_READ_REGISTER:
2321    return lowerReadRegister(MI);
2322  }
2323}
2324
2325LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
2326    MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
2327  SmallVector<Register, 2> DstRegs;
2328
2329  unsigned NarrowSize = NarrowTy.getSizeInBits();
2330  Register DstReg = MI.getOperand(0).getReg();
2331  unsigned Size = MRI.getType(DstReg).getSizeInBits();
2332  int NumParts = Size / NarrowSize;
2333  // FIXME: Don't know how to handle the situation where the small vectors
2334  // aren't all the same size yet.
2335  if (Size % NarrowSize != 0)
2336    return UnableToLegalize;
2337
2338  for (int i = 0; i < NumParts; ++i) {
2339    Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2340    MIRBuilder.buildUndef(TmpReg);
2341    DstRegs.push_back(TmpReg);
2342  }
2343
2344  if (NarrowTy.isVector())
2345    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2346  else
2347    MIRBuilder.buildBuildVector(DstReg, DstRegs);
2348
2349  MI.eraseFromParent();
2350  return Legalized;
2351}
2352
2353LegalizerHelper::LegalizeResult
2354LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
2355                                          LLT NarrowTy) {
2356  const unsigned Opc = MI.getOpcode();
2357  const unsigned NumOps = MI.getNumOperands() - 1;
2358  const unsigned NarrowSize = NarrowTy.getSizeInBits();
2359  const Register DstReg = MI.getOperand(0).getReg();
2360  const unsigned Flags = MI.getFlags();
2361  const LLT DstTy = MRI.getType(DstReg);
2362  const unsigned Size = DstTy.getSizeInBits();
2363  const int NumParts = Size / NarrowSize;
2364  const LLT EltTy = DstTy.getElementType();
2365  const unsigned EltSize = EltTy.getSizeInBits();
2366  const unsigned BitsForNumParts = NarrowSize * NumParts;
2367
2368  // Check if we have any leftovers. If we do, then only handle the case where
2369  // the leftover is one element.
2370  if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
2371    return UnableToLegalize;
2372
2373  if (BitsForNumParts != Size) {
2374    Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
2375    MIRBuilder.buildUndef(AccumDstReg);
2376
2377    // Handle the pieces which evenly divide into the requested type with
2378    // extract/op/insert sequence.
2379    for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
2380      SmallVector<SrcOp, 4> SrcOps;
2381      for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2382        Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
2383        MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
2384        SrcOps.push_back(PartOpReg);
2385      }
2386
2387      Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
2388      MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2389
2390      Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
2391      MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
2392      AccumDstReg = PartInsertReg;
2393    }
2394
2395    // Handle the remaining element sized leftover piece.
2396    SmallVector<SrcOp, 4> SrcOps;
2397    for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2398      Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
2399      MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
2400                              BitsForNumParts);
2401      SrcOps.push_back(PartOpReg);
2402    }
2403
2404    Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
2405    MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2406    MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
2407    MI.eraseFromParent();
2408
2409    return Legalized;
2410  }
2411
2412  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2413
2414  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
2415
2416  if (NumOps >= 2)
2417    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
2418
2419  if (NumOps >= 3)
2420    extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
2421
2422  for (int i = 0; i < NumParts; ++i) {
2423    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2424
2425    if (NumOps == 1)
2426      MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
2427    else if (NumOps == 2) {
2428      MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
2429    } else if (NumOps == 3) {
2430      MIRBuilder.buildInstr(Opc, {DstReg},
2431                            {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
2432    }
2433
2434    DstRegs.push_back(DstReg);
2435  }
2436
2437  if (NarrowTy.isVector())
2438    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2439  else
2440    MIRBuilder.buildBuildVector(DstReg, DstRegs);
2441
2442  MI.eraseFromParent();
2443  return Legalized;
2444}
2445
2446// Handle splitting vector operations which need to have the same number of
2447// elements in each type index, but each type index may have a different element
2448// type.
2449//
2450// e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2451//       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2452//       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2453//
2454// Also handles some irregular breakdown cases, e.g.
2455// e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2456//       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2457//             s64 = G_SHL s64, s32
2458LegalizerHelper::LegalizeResult
2459LegalizerHelper::fewerElementsVectorMultiEltType(
2460  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2461  if (TypeIdx != 0)
2462    return UnableToLegalize;
2463
2464  const LLT NarrowTy0 = NarrowTyArg;
2465  const unsigned NewNumElts =
2466      NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2467
2468  const Register DstReg = MI.getOperand(0).getReg();
2469  LLT DstTy = MRI.getType(DstReg);
2470  LLT LeftoverTy0;
2471
2472  // All of the operands need to have the same number of elements, so if we can
2473  // determine a type breakdown for the result type, we can for all of the
2474  // source types.
2475  int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2476  if (NumParts < 0)
2477    return UnableToLegalize;
2478
2479  SmallVector<MachineInstrBuilder, 4> NewInsts;
2480
2481  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2482  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2483
2484  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2485    LLT LeftoverTy;
2486    Register SrcReg = MI.getOperand(I).getReg();
2487    LLT SrcTyI = MRI.getType(SrcReg);
2488    LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2489    LLT LeftoverTyI;
2490
2491    // Split this operand into the requested typed registers, and any leftover
2492    // required to reproduce the original type.
2493    if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2494                      LeftoverRegs))
2495      return UnableToLegalize;
2496
2497    if (I == 1) {
2498      // For the first operand, create an instruction for each part and setup
2499      // the result.
2500      for (Register PartReg : PartRegs) {
2501        Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2502        NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2503                               .addDef(PartDstReg)
2504                               .addUse(PartReg));
2505        DstRegs.push_back(PartDstReg);
2506      }
2507
2508      for (Register LeftoverReg : LeftoverRegs) {
2509        Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2510        NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2511                               .addDef(PartDstReg)
2512                               .addUse(LeftoverReg));
2513        LeftoverDstRegs.push_back(PartDstReg);
2514      }
2515    } else {
2516      assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2517
2518      // Add the newly created operand splits to the existing instructions. The
2519      // odd-sized pieces are ordered after the requested NarrowTyArg sized
2520      // pieces.
2521      unsigned InstCount = 0;
2522      for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2523        NewInsts[InstCount++].addUse(PartRegs[J]);
2524      for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2525        NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2526    }
2527
2528    PartRegs.clear();
2529    LeftoverRegs.clear();
2530  }
2531
2532  // Insert the newly built operations and rebuild the result register.
2533  for (auto &MIB : NewInsts)
2534    MIRBuilder.insertInstr(MIB);
2535
2536  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2537
2538  MI.eraseFromParent();
2539  return Legalized;
2540}
2541
2542LegalizerHelper::LegalizeResult
2543LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
2544                                          LLT NarrowTy) {
2545  if (TypeIdx != 0)
2546    return UnableToLegalize;
2547
2548  Register DstReg = MI.getOperand(0).getReg();
2549  Register SrcReg = MI.getOperand(1).getReg();
2550  LLT DstTy = MRI.getType(DstReg);
2551  LLT SrcTy = MRI.getType(SrcReg);
2552
2553  LLT NarrowTy0 = NarrowTy;
2554  LLT NarrowTy1;
2555  unsigned NumParts;
2556
2557  if (NarrowTy.isVector()) {
2558    // Uneven breakdown not handled.
2559    NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2560    if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2561      return UnableToLegalize;
2562
2563    NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2564  } else {
2565    NumParts = DstTy.getNumElements();
2566    NarrowTy1 = SrcTy.getElementType();
2567  }
2568
2569  SmallVector<Register, 4> SrcRegs, DstRegs;
2570  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2571
2572  for (unsigned I = 0; I < NumParts; ++I) {
2573    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2574    MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
2575      .addDef(DstReg)
2576      .addUse(SrcRegs[I]);
2577
2578    NewInst->setFlags(MI.getFlags());
2579    DstRegs.push_back(DstReg);
2580  }
2581
2582  if (NarrowTy.isVector())
2583    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2584  else
2585    MIRBuilder.buildBuildVector(DstReg, DstRegs);
2586
2587  MI.eraseFromParent();
2588  return Legalized;
2589}
2590
2591LegalizerHelper::LegalizeResult
2592LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
2593                                        LLT NarrowTy) {
2594  Register DstReg = MI.getOperand(0).getReg();
2595  Register Src0Reg = MI.getOperand(2).getReg();
2596  LLT DstTy = MRI.getType(DstReg);
2597  LLT SrcTy = MRI.getType(Src0Reg);
2598
2599  unsigned NumParts;
2600  LLT NarrowTy0, NarrowTy1;
2601
2602  if (TypeIdx == 0) {
2603    unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2604    unsigned OldElts = DstTy.getNumElements();
2605
2606    NarrowTy0 = NarrowTy;
2607    NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2608    NarrowTy1 = NarrowTy.isVector() ?
2609      LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2610      SrcTy.getElementType();
2611
2612  } else {
2613    unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2614    unsigned OldElts = SrcTy.getNumElements();
2615
2616    NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2617      NarrowTy.getNumElements();
2618    NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2619                            DstTy.getScalarSizeInBits());
2620    NarrowTy1 = NarrowTy;
2621  }
2622
2623  // FIXME: Don't know how to handle the situation where the small vectors
2624  // aren't all the same size yet.
2625  if (NarrowTy1.isVector() &&
2626      NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2627    return UnableToLegalize;
2628
2629  CmpInst::Predicate Pred
2630    = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2631
2632  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2633  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2634  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2635
2636  for (unsigned I = 0; I < NumParts; ++I) {
2637    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2638    DstRegs.push_back(DstReg);
2639
2640    if (MI.getOpcode() == TargetOpcode::G_ICMP)
2641      MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2642    else {
2643      MachineInstr *NewCmp
2644        = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2645      NewCmp->setFlags(MI.getFlags());
2646    }
2647  }
2648
2649  if (NarrowTy1.isVector())
2650    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2651  else
2652    MIRBuilder.buildBuildVector(DstReg, DstRegs);
2653
2654  MI.eraseFromParent();
2655  return Legalized;
2656}
2657
2658LegalizerHelper::LegalizeResult
2659LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2660                                           LLT NarrowTy) {
2661  Register DstReg = MI.getOperand(0).getReg();
2662  Register CondReg = MI.getOperand(1).getReg();
2663
2664  unsigned NumParts = 0;
2665  LLT NarrowTy0, NarrowTy1;
2666
2667  LLT DstTy = MRI.getType(DstReg);
2668  LLT CondTy = MRI.getType(CondReg);
2669  unsigned Size = DstTy.getSizeInBits();
2670
2671  assert(TypeIdx == 0 || CondTy.isVector());
2672
2673  if (TypeIdx == 0) {
2674    NarrowTy0 = NarrowTy;
2675    NarrowTy1 = CondTy;
2676
2677    unsigned NarrowSize = NarrowTy0.getSizeInBits();
2678    // FIXME: Don't know how to handle the situation where the small vectors
2679    // aren't all the same size yet.
2680    if (Size % NarrowSize != 0)
2681      return UnableToLegalize;
2682
2683    NumParts = Size / NarrowSize;
2684
2685    // Need to break down the condition type
2686    if (CondTy.isVector()) {
2687      if (CondTy.getNumElements() == NumParts)
2688        NarrowTy1 = CondTy.getElementType();
2689      else
2690        NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2691                                CondTy.getScalarSizeInBits());
2692    }
2693  } else {
2694    NumParts = CondTy.getNumElements();
2695    if (NarrowTy.isVector()) {
2696      // TODO: Handle uneven breakdown.
2697      if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2698        return UnableToLegalize;
2699
2700      return UnableToLegalize;
2701    } else {
2702      NarrowTy0 = DstTy.getElementType();
2703      NarrowTy1 = NarrowTy;
2704    }
2705  }
2706
2707  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2708  if (CondTy.isVector())
2709    extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2710
2711  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2712  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2713
2714  for (unsigned i = 0; i < NumParts; ++i) {
2715    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2716    MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2717                           Src1Regs[i], Src2Regs[i]);
2718    DstRegs.push_back(DstReg);
2719  }
2720
2721  if (NarrowTy0.isVector())
2722    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2723  else
2724    MIRBuilder.buildBuildVector(DstReg, DstRegs);
2725
2726  MI.eraseFromParent();
2727  return Legalized;
2728}
2729
2730LegalizerHelper::LegalizeResult
2731LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2732                                        LLT NarrowTy) {
2733  const Register DstReg = MI.getOperand(0).getReg();
2734  LLT PhiTy = MRI.getType(DstReg);
2735  LLT LeftoverTy;
2736
2737  // All of the operands need to have the same number of elements, so if we can
2738  // determine a type breakdown for the result type, we can for all of the
2739  // source types.
2740  int NumParts, NumLeftover;
2741  std::tie(NumParts, NumLeftover)
2742    = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2743  if (NumParts < 0)
2744    return UnableToLegalize;
2745
2746  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2747  SmallVector<MachineInstrBuilder, 4> NewInsts;
2748
2749  const int TotalNumParts = NumParts + NumLeftover;
2750
2751  // Insert the new phis in the result block first.
2752  for (int I = 0; I != TotalNumParts; ++I) {
2753    LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2754    Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2755    NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2756                       .addDef(PartDstReg));
2757    if (I < NumParts)
2758      DstRegs.push_back(PartDstReg);
2759    else
2760      LeftoverDstRegs.push_back(PartDstReg);
2761  }
2762
2763  MachineBasicBlock *MBB = MI.getParent();
2764  MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2765  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2766
2767  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2768
2769  // Insert code to extract the incoming values in each predecessor block.
2770  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2771    PartRegs.clear();
2772    LeftoverRegs.clear();
2773
2774    Register SrcReg = MI.getOperand(I).getReg();
2775    MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2776    MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2777
2778    LLT Unused;
2779    if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2780                      LeftoverRegs))
2781      return UnableToLegalize;
2782
2783    // Add the newly created operand splits to the existing instructions. The
2784    // odd-sized pieces are ordered after the requested NarrowTyArg sized
2785    // pieces.
2786    for (int J = 0; J != TotalNumParts; ++J) {
2787      MachineInstrBuilder MIB = NewInsts[J];
2788      MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2789      MIB.addMBB(&OpMBB);
2790    }
2791  }
2792
2793  MI.eraseFromParent();
2794  return Legalized;
2795}
2796
2797LegalizerHelper::LegalizeResult
2798LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
2799                                                  unsigned TypeIdx,
2800                                                  LLT NarrowTy) {
2801  if (TypeIdx != 1)
2802    return UnableToLegalize;
2803
2804  const int NumDst = MI.getNumOperands() - 1;
2805  const Register SrcReg = MI.getOperand(NumDst).getReg();
2806  LLT SrcTy = MRI.getType(SrcReg);
2807
2808  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2809
2810  // TODO: Create sequence of extracts.
2811  if (DstTy == NarrowTy)
2812    return UnableToLegalize;
2813
2814  LLT GCDTy = getGCDType(SrcTy, NarrowTy);
2815  if (DstTy == GCDTy) {
2816    // This would just be a copy of the same unmerge.
2817    // TODO: Create extracts, pad with undef and create intermediate merges.
2818    return UnableToLegalize;
2819  }
2820
2821  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2822  const int NumUnmerge = Unmerge->getNumOperands() - 1;
2823  const int PartsPerUnmerge = NumDst / NumUnmerge;
2824
2825  for (int I = 0; I != NumUnmerge; ++I) {
2826    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2827
2828    for (int J = 0; J != PartsPerUnmerge; ++J)
2829      MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
2830    MIB.addUse(Unmerge.getReg(I));
2831  }
2832
2833  MI.eraseFromParent();
2834  return Legalized;
2835}
2836
2837LegalizerHelper::LegalizeResult
2838LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
2839                                                unsigned TypeIdx,
2840                                                LLT NarrowTy) {
2841  assert(TypeIdx == 0 && "not a vector type index");
2842  Register DstReg = MI.getOperand(0).getReg();
2843  LLT DstTy = MRI.getType(DstReg);
2844  LLT SrcTy = DstTy.getElementType();
2845
2846  int DstNumElts = DstTy.getNumElements();
2847  int NarrowNumElts = NarrowTy.getNumElements();
2848  int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
2849  LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
2850
2851  SmallVector<Register, 8> ConcatOps;
2852  SmallVector<Register, 8> SubBuildVector;
2853
2854  Register UndefReg;
2855  if (WidenedDstTy != DstTy)
2856    UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
2857
2858  // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
2859  // necessary.
2860  //
2861  // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
2862  //   -> <2 x s16>
2863  //
2864  // %4:_(s16) = G_IMPLICIT_DEF
2865  // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
2866  // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
2867  // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
2868  // %3:_(<3 x s16>) = G_EXTRACT %7, 0
2869  for (int I = 0; I != NumConcat; ++I) {
2870    for (int J = 0; J != NarrowNumElts; ++J) {
2871      int SrcIdx = NarrowNumElts * I + J;
2872
2873      if (SrcIdx < DstNumElts) {
2874        Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
2875        SubBuildVector.push_back(SrcReg);
2876      } else
2877        SubBuildVector.push_back(UndefReg);
2878    }
2879
2880    auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
2881    ConcatOps.push_back(BuildVec.getReg(0));
2882    SubBuildVector.clear();
2883  }
2884
2885  if (DstTy == WidenedDstTy)
2886    MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
2887  else {
2888    auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
2889    MIRBuilder.buildExtract(DstReg, Concat, 0);
2890  }
2891
2892  MI.eraseFromParent();
2893  return Legalized;
2894}
2895
2896LegalizerHelper::LegalizeResult
2897LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2898                                      LLT NarrowTy) {
2899  // FIXME: Don't know how to handle secondary types yet.
2900  if (TypeIdx != 0)
2901    return UnableToLegalize;
2902
2903  MachineMemOperand *MMO = *MI.memoperands_begin();
2904
2905  // This implementation doesn't work for atomics. Give up instead of doing
2906  // something invalid.
2907  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2908      MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
2909    return UnableToLegalize;
2910
2911  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2912  Register ValReg = MI.getOperand(0).getReg();
2913  Register AddrReg = MI.getOperand(1).getReg();
2914  LLT ValTy = MRI.getType(ValReg);
2915
2916  int NumParts = -1;
2917  int NumLeftover = -1;
2918  LLT LeftoverTy;
2919  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
2920  if (IsLoad) {
2921    std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2922  } else {
2923    if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2924                     NarrowLeftoverRegs)) {
2925      NumParts = NarrowRegs.size();
2926      NumLeftover = NarrowLeftoverRegs.size();
2927    }
2928  }
2929
2930  if (NumParts == -1)
2931    return UnableToLegalize;
2932
2933  const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2934
2935  unsigned TotalSize = ValTy.getSizeInBits();
2936
2937  // Split the load/store into PartTy sized pieces starting at Offset. If this
2938  // is a load, return the new registers in ValRegs. For a store, each elements
2939  // of ValRegs should be PartTy. Returns the next offset that needs to be
2940  // handled.
2941  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
2942                             unsigned Offset) -> unsigned {
2943    MachineFunction &MF = MIRBuilder.getMF();
2944    unsigned PartSize = PartTy.getSizeInBits();
2945    for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2946         Offset += PartSize, ++Idx) {
2947      unsigned ByteSize = PartSize / 8;
2948      unsigned ByteOffset = Offset / 8;
2949      Register NewAddrReg;
2950
2951      MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2952
2953      MachineMemOperand *NewMMO =
2954        MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2955
2956      if (IsLoad) {
2957        Register Dst = MRI.createGenericVirtualRegister(PartTy);
2958        ValRegs.push_back(Dst);
2959        MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2960      } else {
2961        MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2962      }
2963    }
2964
2965    return Offset;
2966  };
2967
2968  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2969
2970  // Handle the rest of the register if this isn't an even type breakdown.
2971  if (LeftoverTy.isValid())
2972    splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2973
2974  if (IsLoad) {
2975    insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2976                LeftoverTy, NarrowLeftoverRegs);
2977  }
2978
2979  MI.eraseFromParent();
2980  return Legalized;
2981}
2982
2983LegalizerHelper::LegalizeResult
2984LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
2985                                     LLT NarrowTy) {
2986  using namespace TargetOpcode;
2987
2988  MIRBuilder.setInstr(MI);
2989  switch (MI.getOpcode()) {
2990  case G_IMPLICIT_DEF:
2991    return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2992  case G_AND:
2993  case G_OR:
2994  case G_XOR:
2995  case G_ADD:
2996  case G_SUB:
2997  case G_MUL:
2998  case G_SMULH:
2999  case G_UMULH:
3000  case G_FADD:
3001  case G_FMUL:
3002  case G_FSUB:
3003  case G_FNEG:
3004  case G_FABS:
3005  case G_FCANONICALIZE:
3006  case G_FDIV:
3007  case G_FREM:
3008  case G_FMA:
3009  case G_FMAD:
3010  case G_FPOW:
3011  case G_FEXP:
3012  case G_FEXP2:
3013  case G_FLOG:
3014  case G_FLOG2:
3015  case G_FLOG10:
3016  case G_FNEARBYINT:
3017  case G_FCEIL:
3018  case G_FFLOOR:
3019  case G_FRINT:
3020  case G_INTRINSIC_ROUND:
3021  case G_INTRINSIC_TRUNC:
3022  case G_FCOS:
3023  case G_FSIN:
3024  case G_FSQRT:
3025  case G_BSWAP:
3026  case G_BITREVERSE:
3027  case G_SDIV:
3028  case G_UDIV:
3029  case G_SREM:
3030  case G_UREM:
3031  case G_SMIN:
3032  case G_SMAX:
3033  case G_UMIN:
3034  case G_UMAX:
3035  case G_FMINNUM:
3036  case G_FMAXNUM:
3037  case G_FMINNUM_IEEE:
3038  case G_FMAXNUM_IEEE:
3039  case G_FMINIMUM:
3040  case G_FMAXIMUM:
3041    return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
3042  case G_SHL:
3043  case G_LSHR:
3044  case G_ASHR:
3045  case G_CTLZ:
3046  case G_CTLZ_ZERO_UNDEF:
3047  case G_CTTZ:
3048  case G_CTTZ_ZERO_UNDEF:
3049  case G_CTPOP:
3050  case G_FCOPYSIGN:
3051    return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
3052  case G_ZEXT:
3053  case G_SEXT:
3054  case G_ANYEXT:
3055  case G_FPEXT:
3056  case G_FPTRUNC:
3057  case G_SITOFP:
3058  case G_UITOFP:
3059  case G_FPTOSI:
3060  case G_FPTOUI:
3061  case G_INTTOPTR:
3062  case G_PTRTOINT:
3063  case G_ADDRSPACE_CAST:
3064    return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
3065  case G_ICMP:
3066  case G_FCMP:
3067    return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
3068  case G_SELECT:
3069    return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
3070  case G_PHI:
3071    return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
3072  case G_UNMERGE_VALUES:
3073    return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
3074  case G_BUILD_VECTOR:
3075    return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
3076  case G_LOAD:
3077  case G_STORE:
3078    return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
3079  default:
3080    return UnableToLegalize;
3081  }
3082}
3083
3084LegalizerHelper::LegalizeResult
3085LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
3086                                             const LLT HalfTy, const LLT AmtTy) {
3087
3088  Register InL = MRI.createGenericVirtualRegister(HalfTy);
3089  Register InH = MRI.createGenericVirtualRegister(HalfTy);
3090  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
3091
3092  if (Amt.isNullValue()) {
3093    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
3094    MI.eraseFromParent();
3095    return Legalized;
3096  }
3097
3098  LLT NVT = HalfTy;
3099  unsigned NVTBits = HalfTy.getSizeInBits();
3100  unsigned VTBits = 2 * NVTBits;
3101
3102  SrcOp Lo(Register(0)), Hi(Register(0));
3103  if (MI.getOpcode() == TargetOpcode::G_SHL) {
3104    if (Amt.ugt(VTBits)) {
3105      Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3106    } else if (Amt.ugt(NVTBits)) {
3107      Lo = MIRBuilder.buildConstant(NVT, 0);
3108      Hi = MIRBuilder.buildShl(NVT, InL,
3109                               MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3110    } else if (Amt == NVTBits) {
3111      Lo = MIRBuilder.buildConstant(NVT, 0);
3112      Hi = InL;
3113    } else {
3114      Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
3115      auto OrLHS =
3116          MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
3117      auto OrRHS = MIRBuilder.buildLShr(
3118          NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3119      Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3120    }
3121  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3122    if (Amt.ugt(VTBits)) {
3123      Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3124    } else if (Amt.ugt(NVTBits)) {
3125      Lo = MIRBuilder.buildLShr(NVT, InH,
3126                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3127      Hi = MIRBuilder.buildConstant(NVT, 0);
3128    } else if (Amt == NVTBits) {
3129      Lo = InH;
3130      Hi = MIRBuilder.buildConstant(NVT, 0);
3131    } else {
3132      auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3133
3134      auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3135      auto OrRHS = MIRBuilder.buildShl(
3136          NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3137
3138      Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3139      Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
3140    }
3141  } else {
3142    if (Amt.ugt(VTBits)) {
3143      Hi = Lo = MIRBuilder.buildAShr(
3144          NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3145    } else if (Amt.ugt(NVTBits)) {
3146      Lo = MIRBuilder.buildAShr(NVT, InH,
3147                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3148      Hi = MIRBuilder.buildAShr(NVT, InH,
3149                                MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3150    } else if (Amt == NVTBits) {
3151      Lo = InH;
3152      Hi = MIRBuilder.buildAShr(NVT, InH,
3153                                MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3154    } else {
3155      auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3156
3157      auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3158      auto OrRHS = MIRBuilder.buildShl(
3159          NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3160
3161      Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3162      Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
3163    }
3164  }
3165
3166  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
3167  MI.eraseFromParent();
3168
3169  return Legalized;
3170}
3171
3172// TODO: Optimize if constant shift amount.
3173LegalizerHelper::LegalizeResult
3174LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
3175                                   LLT RequestedTy) {
3176  if (TypeIdx == 1) {
3177    Observer.changingInstr(MI);
3178    narrowScalarSrc(MI, RequestedTy, 2);
3179    Observer.changedInstr(MI);
3180    return Legalized;
3181  }
3182
3183  Register DstReg = MI.getOperand(0).getReg();
3184  LLT DstTy = MRI.getType(DstReg);
3185  if (DstTy.isVector())
3186    return UnableToLegalize;
3187
3188  Register Amt = MI.getOperand(2).getReg();
3189  LLT ShiftAmtTy = MRI.getType(Amt);
3190  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
3191  if (DstEltSize % 2 != 0)
3192    return UnableToLegalize;
3193
3194  // Ignore the input type. We can only go to exactly half the size of the
3195  // input. If that isn't small enough, the resulting pieces will be further
3196  // legalized.
3197  const unsigned NewBitSize = DstEltSize / 2;
3198  const LLT HalfTy = LLT::scalar(NewBitSize);
3199  const LLT CondTy = LLT::scalar(1);
3200
3201  if (const MachineInstr *KShiftAmt =
3202          getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
3203    return narrowScalarShiftByConstant(
3204        MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
3205  }
3206
3207  // TODO: Expand with known bits.
3208
3209  // Handle the fully general expansion by an unknown amount.
3210  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
3211
3212  Register InL = MRI.createGenericVirtualRegister(HalfTy);
3213  Register InH = MRI.createGenericVirtualRegister(HalfTy);
3214  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
3215
3216  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
3217  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
3218
3219  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
3220  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
3221  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
3222
3223  Register ResultRegs[2];
3224  switch (MI.getOpcode()) {
3225  case TargetOpcode::G_SHL: {
3226    // Short: ShAmt < NewBitSize
3227    auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
3228
3229    auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
3230    auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
3231    auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3232
3233    // Long: ShAmt >= NewBitSize
3234    auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
3235    auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
3236
3237    auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
3238    auto Hi = MIRBuilder.buildSelect(
3239        HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
3240
3241    ResultRegs[0] = Lo.getReg(0);
3242    ResultRegs[1] = Hi.getReg(0);
3243    break;
3244  }
3245  case TargetOpcode::G_LSHR:
3246  case TargetOpcode::G_ASHR: {
3247    // Short: ShAmt < NewBitSize
3248    auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
3249
3250    auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
3251    auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
3252    auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3253
3254    // Long: ShAmt >= NewBitSize
3255    MachineInstrBuilder HiL;
3256    if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3257      HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
3258    } else {
3259      auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
3260      HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
3261    }
3262    auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
3263                                     {InH, AmtExcess});     // Lo from Hi part.
3264
3265    auto Lo = MIRBuilder.buildSelect(
3266        HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
3267
3268    auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
3269
3270    ResultRegs[0] = Lo.getReg(0);
3271    ResultRegs[1] = Hi.getReg(0);
3272    break;
3273  }
3274  default:
3275    llvm_unreachable("not a shift");
3276  }
3277
3278  MIRBuilder.buildMerge(DstReg, ResultRegs);
3279  MI.eraseFromParent();
3280  return Legalized;
3281}
3282
3283LegalizerHelper::LegalizeResult
3284LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3285                                       LLT MoreTy) {
3286  assert(TypeIdx == 0 && "Expecting only Idx 0");
3287
3288  Observer.changingInstr(MI);
3289  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3290    MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3291    MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3292    moreElementsVectorSrc(MI, MoreTy, I);
3293  }
3294
3295  MachineBasicBlock &MBB = *MI.getParent();
3296  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3297  moreElementsVectorDst(MI, MoreTy, 0);
3298  Observer.changedInstr(MI);
3299  return Legalized;
3300}
3301
3302LegalizerHelper::LegalizeResult
3303LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
3304                                    LLT MoreTy) {
3305  MIRBuilder.setInstr(MI);
3306  unsigned Opc = MI.getOpcode();
3307  switch (Opc) {
3308  case TargetOpcode::G_IMPLICIT_DEF:
3309  case TargetOpcode::G_LOAD: {
3310    if (TypeIdx != 0)
3311      return UnableToLegalize;
3312    Observer.changingInstr(MI);
3313    moreElementsVectorDst(MI, MoreTy, 0);
3314    Observer.changedInstr(MI);
3315    return Legalized;
3316  }
3317  case TargetOpcode::G_STORE:
3318    if (TypeIdx != 0)
3319      return UnableToLegalize;
3320    Observer.changingInstr(MI);
3321    moreElementsVectorSrc(MI, MoreTy, 0);
3322    Observer.changedInstr(MI);
3323    return Legalized;
3324  case TargetOpcode::G_AND:
3325  case TargetOpcode::G_OR:
3326  case TargetOpcode::G_XOR:
3327  case TargetOpcode::G_SMIN:
3328  case TargetOpcode::G_SMAX:
3329  case TargetOpcode::G_UMIN:
3330  case TargetOpcode::G_UMAX:
3331  case TargetOpcode::G_FMINNUM:
3332  case TargetOpcode::G_FMAXNUM:
3333  case TargetOpcode::G_FMINNUM_IEEE:
3334  case TargetOpcode::G_FMAXNUM_IEEE:
3335  case TargetOpcode::G_FMINIMUM:
3336  case TargetOpcode::G_FMAXIMUM: {
3337    Observer.changingInstr(MI);
3338    moreElementsVectorSrc(MI, MoreTy, 1);
3339    moreElementsVectorSrc(MI, MoreTy, 2);
3340    moreElementsVectorDst(MI, MoreTy, 0);
3341    Observer.changedInstr(MI);
3342    return Legalized;
3343  }
3344  case TargetOpcode::G_EXTRACT:
3345    if (TypeIdx != 1)
3346      return UnableToLegalize;
3347    Observer.changingInstr(MI);
3348    moreElementsVectorSrc(MI, MoreTy, 1);
3349    Observer.changedInstr(MI);
3350    return Legalized;
3351  case TargetOpcode::G_INSERT:
3352    if (TypeIdx != 0)
3353      return UnableToLegalize;
3354    Observer.changingInstr(MI);
3355    moreElementsVectorSrc(MI, MoreTy, 1);
3356    moreElementsVectorDst(MI, MoreTy, 0);
3357    Observer.changedInstr(MI);
3358    return Legalized;
3359  case TargetOpcode::G_SELECT:
3360    if (TypeIdx != 0)
3361      return UnableToLegalize;
3362    if (MRI.getType(MI.getOperand(1).getReg()).isVector())
3363      return UnableToLegalize;
3364
3365    Observer.changingInstr(MI);
3366    moreElementsVectorSrc(MI, MoreTy, 2);
3367    moreElementsVectorSrc(MI, MoreTy, 3);
3368    moreElementsVectorDst(MI, MoreTy, 0);
3369    Observer.changedInstr(MI);
3370    return Legalized;
3371  case TargetOpcode::G_UNMERGE_VALUES: {
3372    if (TypeIdx != 1)
3373      return UnableToLegalize;
3374
3375    LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3376    int NumDst = MI.getNumOperands() - 1;
3377    moreElementsVectorSrc(MI, MoreTy, NumDst);
3378
3379    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3380    for (int I = 0; I != NumDst; ++I)
3381      MIB.addDef(MI.getOperand(I).getReg());
3382
3383    int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
3384    for (int I = NumDst; I != NewNumDst; ++I)
3385      MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
3386
3387    MIB.addUse(MI.getOperand(NumDst).getReg());
3388    MI.eraseFromParent();
3389    return Legalized;
3390  }
3391  case TargetOpcode::G_PHI:
3392    return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
3393  default:
3394    return UnableToLegalize;
3395  }
3396}
3397
3398void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
3399                                        ArrayRef<Register> Src1Regs,
3400                                        ArrayRef<Register> Src2Regs,
3401                                        LLT NarrowTy) {
3402  MachineIRBuilder &B = MIRBuilder;
3403  unsigned SrcParts = Src1Regs.size();
3404  unsigned DstParts = DstRegs.size();
3405
3406  unsigned DstIdx = 0; // Low bits of the result.
3407  Register FactorSum =
3408      B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
3409  DstRegs[DstIdx] = FactorSum;
3410
3411  unsigned CarrySumPrevDstIdx;
3412  SmallVector<Register, 4> Factors;
3413
3414  for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
3415    // Collect low parts of muls for DstIdx.
3416    for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
3417         i <= std::min(DstIdx, SrcParts - 1); ++i) {
3418      MachineInstrBuilder Mul =
3419          B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
3420      Factors.push_back(Mul.getReg(0));
3421    }
3422    // Collect high parts of muls from previous DstIdx.
3423    for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
3424         i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
3425      MachineInstrBuilder Umulh =
3426          B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
3427      Factors.push_back(Umulh.getReg(0));
3428    }
3429    // Add CarrySum from additions calculated for previous DstIdx.
3430    if (DstIdx != 1) {
3431      Factors.push_back(CarrySumPrevDstIdx);
3432    }
3433
3434    Register CarrySum;
3435    // Add all factors and accumulate all carries into CarrySum.
3436    if (DstIdx != DstParts - 1) {
3437      MachineInstrBuilder Uaddo =
3438          B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
3439      FactorSum = Uaddo.getReg(0);
3440      CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
3441      for (unsigned i = 2; i < Factors.size(); ++i) {
3442        MachineInstrBuilder Uaddo =
3443            B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
3444        FactorSum = Uaddo.getReg(0);
3445        MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
3446        CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
3447      }
3448    } else {
3449      // Since value for the next index is not calculated, neither is CarrySum.
3450      FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
3451      for (unsigned i = 2; i < Factors.size(); ++i)
3452        FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
3453    }
3454
3455    CarrySumPrevDstIdx = CarrySum;
3456    DstRegs[DstIdx] = FactorSum;
3457    Factors.clear();
3458  }
3459}
3460
3461LegalizerHelper::LegalizeResult
3462LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
3463  Register DstReg = MI.getOperand(0).getReg();
3464  Register Src1 = MI.getOperand(1).getReg();
3465  Register Src2 = MI.getOperand(2).getReg();
3466
3467  LLT Ty = MRI.getType(DstReg);
3468  if (Ty.isVector())
3469    return UnableToLegalize;
3470
3471  unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3472  unsigned DstSize = Ty.getSizeInBits();
3473  unsigned NarrowSize = NarrowTy.getSizeInBits();
3474  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3475    return UnableToLegalize;
3476
3477  unsigned NumDstParts = DstSize / NarrowSize;
3478  unsigned NumSrcParts = SrcSize / NarrowSize;
3479  bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3480  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3481
3482  SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
3483  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3484  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3485  DstTmpRegs.resize(DstTmpParts);
3486  multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3487
3488  // Take only high half of registers if this is high mul.
3489  ArrayRef<Register> DstRegs(
3490      IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3491  MIRBuilder.buildMerge(DstReg, DstRegs);
3492  MI.eraseFromParent();
3493  return Legalized;
3494}
3495
3496LegalizerHelper::LegalizeResult
3497LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
3498                                     LLT NarrowTy) {
3499  if (TypeIdx != 1)
3500    return UnableToLegalize;
3501
3502  uint64_t NarrowSize = NarrowTy.getSizeInBits();
3503
3504  int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3505  // FIXME: add support for when SizeOp1 isn't an exact multiple of
3506  // NarrowSize.
3507  if (SizeOp1 % NarrowSize != 0)
3508    return UnableToLegalize;
3509  int NumParts = SizeOp1 / NarrowSize;
3510
3511  SmallVector<Register, 2> SrcRegs, DstRegs;
3512  SmallVector<uint64_t, 2> Indexes;
3513  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3514
3515  Register OpReg = MI.getOperand(0).getReg();
3516  uint64_t OpStart = MI.getOperand(2).getImm();
3517  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3518  for (int i = 0; i < NumParts; ++i) {
3519    unsigned SrcStart = i * NarrowSize;
3520
3521    if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
3522      // No part of the extract uses this subregister, ignore it.
3523      continue;
3524    } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3525      // The entire subregister is extracted, forward the value.
3526      DstRegs.push_back(SrcRegs[i]);
3527      continue;
3528    }
3529
3530    // OpSegStart is where this destination segment would start in OpReg if it
3531    // extended infinitely in both directions.
3532    int64_t ExtractOffset;
3533    uint64_t SegSize;
3534    if (OpStart < SrcStart) {
3535      ExtractOffset = 0;
3536      SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
3537    } else {
3538      ExtractOffset = OpStart - SrcStart;
3539      SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
3540    }
3541
3542    Register SegReg = SrcRegs[i];
3543    if (ExtractOffset != 0 || SegSize != NarrowSize) {
3544      // A genuine extract is needed.
3545      SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3546      MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
3547    }
3548
3549    DstRegs.push_back(SegReg);
3550  }
3551
3552  Register DstReg = MI.getOperand(0).getReg();
3553  if(MRI.getType(DstReg).isVector())
3554    MIRBuilder.buildBuildVector(DstReg, DstRegs);
3555  else
3556    MIRBuilder.buildMerge(DstReg, DstRegs);
3557  MI.eraseFromParent();
3558  return Legalized;
3559}
3560
3561LegalizerHelper::LegalizeResult
3562LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
3563                                    LLT NarrowTy) {
3564  // FIXME: Don't know how to handle secondary types yet.
3565  if (TypeIdx != 0)
3566    return UnableToLegalize;
3567
3568  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3569  uint64_t NarrowSize = NarrowTy.getSizeInBits();
3570
3571  // FIXME: add support for when SizeOp0 isn't an exact multiple of
3572  // NarrowSize.
3573  if (SizeOp0 % NarrowSize != 0)
3574    return UnableToLegalize;
3575
3576  int NumParts = SizeOp0 / NarrowSize;
3577
3578  SmallVector<Register, 2> SrcRegs, DstRegs;
3579  SmallVector<uint64_t, 2> Indexes;
3580  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3581
3582  Register OpReg = MI.getOperand(2).getReg();
3583  uint64_t OpStart = MI.getOperand(3).getImm();
3584  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3585  for (int i = 0; i < NumParts; ++i) {
3586    unsigned DstStart = i * NarrowSize;
3587
3588    if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
3589      // No part of the insert affects this subregister, forward the original.
3590      DstRegs.push_back(SrcRegs[i]);
3591      continue;
3592    } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3593      // The entire subregister is defined by this insert, forward the new
3594      // value.
3595      DstRegs.push_back(OpReg);
3596      continue;
3597    }
3598
3599    // OpSegStart is where this destination segment would start in OpReg if it
3600    // extended infinitely in both directions.
3601    int64_t ExtractOffset, InsertOffset;
3602    uint64_t SegSize;
3603    if (OpStart < DstStart) {
3604      InsertOffset = 0;
3605      ExtractOffset = DstStart - OpStart;
3606      SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3607    } else {
3608      InsertOffset = OpStart - DstStart;
3609      ExtractOffset = 0;
3610      SegSize =
3611        std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3612    }
3613
3614    Register SegReg = OpReg;
3615    if (ExtractOffset != 0 || SegSize != OpSize) {
3616      // A genuine extract is needed.
3617      SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3618      MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3619    }
3620
3621    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3622    MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3623    DstRegs.push_back(DstReg);
3624  }
3625
3626  assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3627  Register DstReg = MI.getOperand(0).getReg();
3628  if(MRI.getType(DstReg).isVector())
3629    MIRBuilder.buildBuildVector(DstReg, DstRegs);
3630  else
3631    MIRBuilder.buildMerge(DstReg, DstRegs);
3632  MI.eraseFromParent();
3633  return Legalized;
3634}
3635
3636LegalizerHelper::LegalizeResult
3637LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
3638                                   LLT NarrowTy) {
3639  Register DstReg = MI.getOperand(0).getReg();
3640  LLT DstTy = MRI.getType(DstReg);
3641
3642  assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3643
3644  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3645  SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3646  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3647  LLT LeftoverTy;
3648  if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3649                    Src0Regs, Src0LeftoverRegs))
3650    return UnableToLegalize;
3651
3652  LLT Unused;
3653  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3654                    Src1Regs, Src1LeftoverRegs))
3655    llvm_unreachable("inconsistent extractParts result");
3656
3657  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3658    auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3659                                        {Src0Regs[I], Src1Regs[I]});
3660    DstRegs.push_back(Inst->getOperand(0).getReg());
3661  }
3662
3663  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3664    auto Inst = MIRBuilder.buildInstr(
3665      MI.getOpcode(),
3666      {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3667    DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
3668  }
3669
3670  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3671              LeftoverTy, DstLeftoverRegs);
3672
3673  MI.eraseFromParent();
3674  return Legalized;
3675}
3676
3677LegalizerHelper::LegalizeResult
3678LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
3679                                    LLT NarrowTy) {
3680  if (TypeIdx != 0)
3681    return UnableToLegalize;
3682
3683  Register CondReg = MI.getOperand(1).getReg();
3684  LLT CondTy = MRI.getType(CondReg);
3685  if (CondTy.isVector()) // TODO: Handle vselect
3686    return UnableToLegalize;
3687
3688  Register DstReg = MI.getOperand(0).getReg();
3689  LLT DstTy = MRI.getType(DstReg);
3690
3691  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3692  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3693  SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3694  LLT LeftoverTy;
3695  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3696                    Src1Regs, Src1LeftoverRegs))
3697    return UnableToLegalize;
3698
3699  LLT Unused;
3700  if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3701                    Src2Regs, Src2LeftoverRegs))
3702    llvm_unreachable("inconsistent extractParts result");
3703
3704  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3705    auto Select = MIRBuilder.buildSelect(NarrowTy,
3706                                         CondReg, Src1Regs[I], Src2Regs[I]);
3707    DstRegs.push_back(Select->getOperand(0).getReg());
3708  }
3709
3710  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3711    auto Select = MIRBuilder.buildSelect(
3712      LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3713    DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
3714  }
3715
3716  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3717              LeftoverTy, DstLeftoverRegs);
3718
3719  MI.eraseFromParent();
3720  return Legalized;
3721}
3722
3723LegalizerHelper::LegalizeResult
3724LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3725  unsigned Opc = MI.getOpcode();
3726  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3727  auto isSupported = [this](const LegalityQuery &Q) {
3728    auto QAction = LI.getAction(Q).Action;
3729    return QAction == Legal || QAction == Libcall || QAction == Custom;
3730  };
3731  switch (Opc) {
3732  default:
3733    return UnableToLegalize;
3734  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3735    // This trivially expands to CTLZ.
3736    Observer.changingInstr(MI);
3737    MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3738    Observer.changedInstr(MI);
3739    return Legalized;
3740  }
3741  case TargetOpcode::G_CTLZ: {
3742    Register SrcReg = MI.getOperand(1).getReg();
3743    unsigned Len = Ty.getSizeInBits();
3744    if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
3745      // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3746      auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
3747                                             {Ty}, {SrcReg});
3748      auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3749      auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3750      auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3751                                          SrcReg, MIBZero);
3752      MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3753                             MIBCtlzZU);
3754      MI.eraseFromParent();
3755      return Legalized;
3756    }
3757    // for now, we do this:
3758    // NewLen = NextPowerOf2(Len);
3759    // x = x | (x >> 1);
3760    // x = x | (x >> 2);
3761    // ...
3762    // x = x | (x >>16);
3763    // x = x | (x >>32); // for 64-bit input
3764    // Upto NewLen/2
3765    // return Len - popcount(x);
3766    //
3767    // Ref: "Hacker's Delight" by Henry Warren
3768    Register Op = SrcReg;
3769    unsigned NewLen = PowerOf2Ceil(Len);
3770    for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
3771      auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
3772      auto MIBOp = MIRBuilder.buildInstr(
3773          TargetOpcode::G_OR, {Ty},
3774          {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
3775                                     {Op, MIBShiftAmt})});
3776      Op = MIBOp->getOperand(0).getReg();
3777    }
3778    auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
3779    MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3780                          {MIRBuilder.buildConstant(Ty, Len), MIBPop});
3781    MI.eraseFromParent();
3782    return Legalized;
3783  }
3784  case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3785    // This trivially expands to CTTZ.
3786    Observer.changingInstr(MI);
3787    MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3788    Observer.changedInstr(MI);
3789    return Legalized;
3790  }
3791  case TargetOpcode::G_CTTZ: {
3792    Register SrcReg = MI.getOperand(1).getReg();
3793    unsigned Len = Ty.getSizeInBits();
3794    if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3795      // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3796      // zero.
3797      auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
3798                                             {Ty}, {SrcReg});
3799      auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3800      auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3801      auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3802                                          SrcReg, MIBZero);
3803      MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3804                             MIBCttzZU);
3805      MI.eraseFromParent();
3806      return Legalized;
3807    }
3808    // for now, we use: { return popcount(~x & (x - 1)); }
3809    // unless the target has ctlz but not ctpop, in which case we use:
3810    // { return 32 - nlz(~x & (x-1)); }
3811    // Ref: "Hacker's Delight" by Henry Warren
3812    auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3813    auto MIBNot =
3814        MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
3815    auto MIBTmp = MIRBuilder.buildInstr(
3816        TargetOpcode::G_AND, {Ty},
3817        {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
3818                                       {SrcReg, MIBCstNeg1})});
3819    if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3820        isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
3821      auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3822      MIRBuilder.buildInstr(
3823          TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3824          {MIBCstLen,
3825           MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
3826      MI.eraseFromParent();
3827      return Legalized;
3828    }
3829    MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3830    MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
3831    return Legalized;
3832  }
3833  }
3834}
3835
3836// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3837// representation.
3838LegalizerHelper::LegalizeResult
3839LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
3840  Register Dst = MI.getOperand(0).getReg();
3841  Register Src = MI.getOperand(1).getReg();
3842  const LLT S64 = LLT::scalar(64);
3843  const LLT S32 = LLT::scalar(32);
3844  const LLT S1 = LLT::scalar(1);
3845
3846  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3847
3848  // unsigned cul2f(ulong u) {
3849  //   uint lz = clz(u);
3850  //   uint e = (u != 0) ? 127U + 63U - lz : 0;
3851  //   u = (u << lz) & 0x7fffffffffffffffUL;
3852  //   ulong t = u & 0xffffffffffUL;
3853  //   uint v = (e << 23) | (uint)(u >> 40);
3854  //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3855  //   return as_float(v + r);
3856  // }
3857
3858  auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3859  auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3860
3861  auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3862
3863  auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3864  auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3865
3866  auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3867  auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3868
3869  auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3870  auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3871
3872  auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3873
3874  auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3875  auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3876
3877  auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3878  auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3879  auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3880
3881  auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3882  auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3883  auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3884  auto One = MIRBuilder.buildConstant(S32, 1);
3885
3886  auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3887  auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3888  auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3889  MIRBuilder.buildAdd(Dst, V, R);
3890
3891  return Legalized;
3892}
3893
3894LegalizerHelper::LegalizeResult
3895LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3896  Register Dst = MI.getOperand(0).getReg();
3897  Register Src = MI.getOperand(1).getReg();
3898  LLT DstTy = MRI.getType(Dst);
3899  LLT SrcTy = MRI.getType(Src);
3900
3901  if (SrcTy == LLT::scalar(1)) {
3902    auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
3903    auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
3904    MIRBuilder.buildSelect(Dst, Src, True, False);
3905    MI.eraseFromParent();
3906    return Legalized;
3907  }
3908
3909  if (SrcTy != LLT::scalar(64))
3910    return UnableToLegalize;
3911
3912  if (DstTy == LLT::scalar(32)) {
3913    // TODO: SelectionDAG has several alternative expansions to port which may
3914    // be more reasonble depending on the available instructions. If a target
3915    // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3916    // intermediate type, this is probably worse.
3917    return lowerU64ToF32BitOps(MI);
3918  }
3919
3920  return UnableToLegalize;
3921}
3922
3923LegalizerHelper::LegalizeResult
3924LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3925  Register Dst = MI.getOperand(0).getReg();
3926  Register Src = MI.getOperand(1).getReg();
3927  LLT DstTy = MRI.getType(Dst);
3928  LLT SrcTy = MRI.getType(Src);
3929
3930  const LLT S64 = LLT::scalar(64);
3931  const LLT S32 = LLT::scalar(32);
3932  const LLT S1 = LLT::scalar(1);
3933
3934  if (SrcTy == S1) {
3935    auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
3936    auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
3937    MIRBuilder.buildSelect(Dst, Src, True, False);
3938    MI.eraseFromParent();
3939    return Legalized;
3940  }
3941
3942  if (SrcTy != S64)
3943    return UnableToLegalize;
3944
3945  if (DstTy == S32) {
3946    // signed cl2f(long l) {
3947    //   long s = l >> 63;
3948    //   float r = cul2f((l + s) ^ s);
3949    //   return s ? -r : r;
3950    // }
3951    Register L = Src;
3952    auto SignBit = MIRBuilder.buildConstant(S64, 63);
3953    auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3954
3955    auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3956    auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3957    auto R = MIRBuilder.buildUITOFP(S32, Xor);
3958
3959    auto RNeg = MIRBuilder.buildFNeg(S32, R);
3960    auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3961                                            MIRBuilder.buildConstant(S64, 0));
3962    MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3963    return Legalized;
3964  }
3965
3966  return UnableToLegalize;
3967}
3968
3969LegalizerHelper::LegalizeResult
3970LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3971  Register Dst = MI.getOperand(0).getReg();
3972  Register Src = MI.getOperand(1).getReg();
3973  LLT DstTy = MRI.getType(Dst);
3974  LLT SrcTy = MRI.getType(Src);
3975  const LLT S64 = LLT::scalar(64);
3976  const LLT S32 = LLT::scalar(32);
3977
3978  if (SrcTy != S64 && SrcTy != S32)
3979    return UnableToLegalize;
3980  if (DstTy != S32 && DstTy != S64)
3981    return UnableToLegalize;
3982
3983  // FPTOSI gives same result as FPTOUI for positive signed integers.
3984  // FPTOUI needs to deal with fp values that convert to unsigned integers
3985  // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
3986
3987  APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
3988  APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
3989                                                : APFloat::IEEEdouble(),
3990                    APInt::getNullValue(SrcTy.getSizeInBits()));
3991  TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
3992
3993  MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
3994
3995  MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
3996  // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
3997  // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
3998  MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
3999  MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
4000  MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
4001  MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
4002
4003  const LLT S1 = LLT::scalar(1);
4004
4005  MachineInstrBuilder FCMP =
4006      MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
4007  MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
4008
4009  MI.eraseFromParent();
4010  return Legalized;
4011}
4012
4013static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
4014  switch (Opc) {
4015  case TargetOpcode::G_SMIN:
4016    return CmpInst::ICMP_SLT;
4017  case TargetOpcode::G_SMAX:
4018    return CmpInst::ICMP_SGT;
4019  case TargetOpcode::G_UMIN:
4020    return CmpInst::ICMP_ULT;
4021  case TargetOpcode::G_UMAX:
4022    return CmpInst::ICMP_UGT;
4023  default:
4024    llvm_unreachable("not in integer min/max");
4025  }
4026}
4027
4028LegalizerHelper::LegalizeResult
4029LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4030  Register Dst = MI.getOperand(0).getReg();
4031  Register Src0 = MI.getOperand(1).getReg();
4032  Register Src1 = MI.getOperand(2).getReg();
4033
4034  const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
4035  LLT CmpType = MRI.getType(Dst).changeElementSize(1);
4036
4037  auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
4038  MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
4039
4040  MI.eraseFromParent();
4041  return Legalized;
4042}
4043
4044LegalizerHelper::LegalizeResult
4045LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4046  Register Dst = MI.getOperand(0).getReg();
4047  Register Src0 = MI.getOperand(1).getReg();
4048  Register Src1 = MI.getOperand(2).getReg();
4049
4050  const LLT Src0Ty = MRI.getType(Src0);
4051  const LLT Src1Ty = MRI.getType(Src1);
4052
4053  const int Src0Size = Src0Ty.getScalarSizeInBits();
4054  const int Src1Size = Src1Ty.getScalarSizeInBits();
4055
4056  auto SignBitMask = MIRBuilder.buildConstant(
4057    Src0Ty, APInt::getSignMask(Src0Size));
4058
4059  auto NotSignBitMask = MIRBuilder.buildConstant(
4060    Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
4061
4062  auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
4063  MachineInstr *Or;
4064
4065  if (Src0Ty == Src1Ty) {
4066    auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
4067    Or = MIRBuilder.buildOr(Dst, And0, And1);
4068  } else if (Src0Size > Src1Size) {
4069    auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
4070    auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
4071    auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
4072    auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
4073    Or = MIRBuilder.buildOr(Dst, And0, And1);
4074  } else {
4075    auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
4076    auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
4077    auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
4078    auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
4079    Or = MIRBuilder.buildOr(Dst, And0, And1);
4080  }
4081
4082  // Be careful about setting nsz/nnan/ninf on every instruction, since the
4083  // constants are a nan and -0.0, but the final result should preserve
4084  // everything.
4085  if (unsigned Flags = MI.getFlags())
4086    Or->setFlags(Flags);
4087
4088  MI.eraseFromParent();
4089  return Legalized;
4090}
4091
4092LegalizerHelper::LegalizeResult
4093LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
4094  unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
4095    TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
4096
4097  Register Dst = MI.getOperand(0).getReg();
4098  Register Src0 = MI.getOperand(1).getReg();
4099  Register Src1 = MI.getOperand(2).getReg();
4100  LLT Ty = MRI.getType(Dst);
4101
4102  if (!MI.getFlag(MachineInstr::FmNoNans)) {
4103    // Insert canonicalizes if it's possible we need to quiet to get correct
4104    // sNaN behavior.
4105
4106    // Note this must be done here, and not as an optimization combine in the
4107    // absence of a dedicate quiet-snan instruction as we're using an
4108    // omni-purpose G_FCANONICALIZE.
4109    if (!isKnownNeverSNaN(Src0, MRI))
4110      Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
4111
4112    if (!isKnownNeverSNaN(Src1, MRI))
4113      Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
4114  }
4115
4116  // If there are no nans, it's safe to simply replace this with the non-IEEE
4117  // version.
4118  MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
4119  MI.eraseFromParent();
4120  return Legalized;
4121}
4122
4123LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
4124  // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
4125  Register DstReg = MI.getOperand(0).getReg();
4126  LLT Ty = MRI.getType(DstReg);
4127  unsigned Flags = MI.getFlags();
4128
4129  auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
4130                                  Flags);
4131  MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
4132  MI.eraseFromParent();
4133  return Legalized;
4134}
4135
4136LegalizerHelper::LegalizeResult
4137LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
4138  Register DstReg = MI.getOperand(0).getReg();
4139  Register SrcReg = MI.getOperand(1).getReg();
4140  unsigned Flags = MI.getFlags();
4141  LLT Ty = MRI.getType(DstReg);
4142  const LLT CondTy = Ty.changeElementSize(1);
4143
4144  // result = trunc(src);
4145  // if (src < 0.0 && src != result)
4146  //   result += -1.0.
4147
4148  auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
4149  auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
4150
4151  auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
4152                                  SrcReg, Zero, Flags);
4153  auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
4154                                      SrcReg, Trunc, Flags);
4155  auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
4156  auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
4157
4158  MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
4159  MI.eraseFromParent();
4160  return Legalized;
4161}
4162
4163LegalizerHelper::LegalizeResult
4164LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
4165  const unsigned NumDst = MI.getNumOperands() - 1;
4166  const Register SrcReg = MI.getOperand(NumDst).getReg();
4167  LLT SrcTy = MRI.getType(SrcReg);
4168
4169  Register Dst0Reg = MI.getOperand(0).getReg();
4170  LLT DstTy = MRI.getType(Dst0Reg);
4171
4172
4173  // Expand scalarizing unmerge as bitcast to integer and shift.
4174  if (!DstTy.isVector() && SrcTy.isVector() &&
4175      SrcTy.getElementType() == DstTy) {
4176    LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
4177    Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
4178
4179    MIRBuilder.buildTrunc(Dst0Reg, Cast);
4180
4181    const unsigned DstSize = DstTy.getSizeInBits();
4182    unsigned Offset = DstSize;
4183    for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
4184      auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
4185      auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
4186      MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
4187    }
4188
4189    MI.eraseFromParent();
4190    return Legalized;
4191  }
4192
4193  return UnableToLegalize;
4194}
4195
4196LegalizerHelper::LegalizeResult
4197LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
4198  Register DstReg = MI.getOperand(0).getReg();
4199  Register Src0Reg = MI.getOperand(1).getReg();
4200  Register Src1Reg = MI.getOperand(2).getReg();
4201  LLT Src0Ty = MRI.getType(Src0Reg);
4202  LLT DstTy = MRI.getType(DstReg);
4203  LLT IdxTy = LLT::scalar(32);
4204
4205  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4206
4207  if (DstTy.isScalar()) {
4208    if (Src0Ty.isVector())
4209      return UnableToLegalize;
4210
4211    // This is just a SELECT.
4212    assert(Mask.size() == 1 && "Expected a single mask element");
4213    Register Val;
4214    if (Mask[0] < 0 || Mask[0] > 1)
4215      Val = MIRBuilder.buildUndef(DstTy).getReg(0);
4216    else
4217      Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
4218    MIRBuilder.buildCopy(DstReg, Val);
4219    MI.eraseFromParent();
4220    return Legalized;
4221  }
4222
4223  Register Undef;
4224  SmallVector<Register, 32> BuildVec;
4225  LLT EltTy = DstTy.getElementType();
4226
4227  for (int Idx : Mask) {
4228    if (Idx < 0) {
4229      if (!Undef.isValid())
4230        Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
4231      BuildVec.push_back(Undef);
4232      continue;
4233    }
4234
4235    if (Src0Ty.isScalar()) {
4236      BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
4237    } else {
4238      int NumElts = Src0Ty.getNumElements();
4239      Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
4240      int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
4241      auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
4242      auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
4243      BuildVec.push_back(Extract.getReg(0));
4244    }
4245  }
4246
4247  MIRBuilder.buildBuildVector(DstReg, BuildVec);
4248  MI.eraseFromParent();
4249  return Legalized;
4250}
4251
4252LegalizerHelper::LegalizeResult
4253LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
4254  Register Dst = MI.getOperand(0).getReg();
4255  Register AllocSize = MI.getOperand(1).getReg();
4256  unsigned Align = MI.getOperand(2).getImm();
4257
4258  const auto &MF = *MI.getMF();
4259  const auto &TLI = *MF.getSubtarget().getTargetLowering();
4260
4261  LLT PtrTy = MRI.getType(Dst);
4262  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
4263
4264  Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
4265  auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
4266  SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
4267
4268  // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
4269  // have to generate an extra instruction to negate the alloc and then use
4270  // G_PTR_ADD to add the negative offset.
4271  auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
4272  if (Align) {
4273    APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
4274    AlignMask.negate();
4275    auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
4276    Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
4277  }
4278
4279  SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
4280  MIRBuilder.buildCopy(SPReg, SPTmp);
4281  MIRBuilder.buildCopy(Dst, SPTmp);
4282
4283  MI.eraseFromParent();
4284  return Legalized;
4285}
4286
4287LegalizerHelper::LegalizeResult
4288LegalizerHelper::lowerExtract(MachineInstr &MI) {
4289  Register Dst = MI.getOperand(0).getReg();
4290  Register Src = MI.getOperand(1).getReg();
4291  unsigned Offset = MI.getOperand(2).getImm();
4292
4293  LLT DstTy = MRI.getType(Dst);
4294  LLT SrcTy = MRI.getType(Src);
4295
4296  if (DstTy.isScalar() &&
4297      (SrcTy.isScalar() ||
4298       (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
4299    LLT SrcIntTy = SrcTy;
4300    if (!SrcTy.isScalar()) {
4301      SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
4302      Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
4303    }
4304
4305    if (Offset == 0)
4306      MIRBuilder.buildTrunc(Dst, Src);
4307    else {
4308      auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
4309      auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
4310      MIRBuilder.buildTrunc(Dst, Shr);
4311    }
4312
4313    MI.eraseFromParent();
4314    return Legalized;
4315  }
4316
4317  return UnableToLegalize;
4318}
4319
4320LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
4321  Register Dst = MI.getOperand(0).getReg();
4322  Register Src = MI.getOperand(1).getReg();
4323  Register InsertSrc = MI.getOperand(2).getReg();
4324  uint64_t Offset = MI.getOperand(3).getImm();
4325
4326  LLT DstTy = MRI.getType(Src);
4327  LLT InsertTy = MRI.getType(InsertSrc);
4328
4329  if (InsertTy.isScalar() &&
4330      (DstTy.isScalar() ||
4331       (DstTy.isVector() && DstTy.getElementType() == InsertTy))) {
4332    LLT IntDstTy = DstTy;
4333    if (!DstTy.isScalar()) {
4334      IntDstTy = LLT::scalar(DstTy.getSizeInBits());
4335      Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0);
4336    }
4337
4338    Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
4339    if (Offset != 0) {
4340      auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
4341      ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
4342    }
4343
4344    APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset,
4345                                       InsertTy.getSizeInBits());
4346
4347    auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
4348    auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
4349    auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
4350
4351    MIRBuilder.buildBitcast(Dst, Or);
4352    MI.eraseFromParent();
4353    return Legalized;
4354  }
4355
4356  return UnableToLegalize;
4357}
4358
4359LegalizerHelper::LegalizeResult
4360LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
4361  Register Dst0 = MI.getOperand(0).getReg();
4362  Register Dst1 = MI.getOperand(1).getReg();
4363  Register LHS = MI.getOperand(2).getReg();
4364  Register RHS = MI.getOperand(3).getReg();
4365  const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
4366
4367  LLT Ty = MRI.getType(Dst0);
4368  LLT BoolTy = MRI.getType(Dst1);
4369
4370  if (IsAdd)
4371    MIRBuilder.buildAdd(Dst0, LHS, RHS);
4372  else
4373    MIRBuilder.buildSub(Dst0, LHS, RHS);
4374
4375  // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
4376
4377  auto Zero = MIRBuilder.buildConstant(Ty, 0);
4378
4379  // For an addition, the result should be less than one of the operands (LHS)
4380  // if and only if the other operand (RHS) is negative, otherwise there will
4381  // be overflow.
4382  // For a subtraction, the result should be less than one of the operands
4383  // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
4384  // otherwise there will be overflow.
4385  auto ResultLowerThanLHS =
4386      MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
4387  auto ConditionRHS = MIRBuilder.buildICmp(
4388      IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
4389
4390  MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
4391  MI.eraseFromParent();
4392  return Legalized;
4393}
4394
4395LegalizerHelper::LegalizeResult
4396LegalizerHelper::lowerBswap(MachineInstr &MI) {
4397  Register Dst = MI.getOperand(0).getReg();
4398  Register Src = MI.getOperand(1).getReg();
4399  const LLT Ty = MRI.getType(Src);
4400  unsigned SizeInBytes = Ty.getSizeInBytes();
4401  unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
4402
4403  // Swap most and least significant byte, set remaining bytes in Res to zero.
4404  auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
4405  auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
4406  auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
4407  auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
4408
4409  // Set i-th high/low byte in Res to i-th low/high byte from Src.
4410  for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
4411    // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
4412    APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
4413    auto Mask = MIRBuilder.buildConstant(Ty, APMask);
4414    auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
4415    // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
4416    auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
4417    auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
4418    Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
4419    // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
4420    auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
4421    auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
4422    Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
4423  }
4424  Res.getInstr()->getOperand(0).setReg(Dst);
4425
4426  MI.eraseFromParent();
4427  return Legalized;
4428}
4429
4430//{ (Src & Mask) >> N } | { (Src << N) & Mask }
4431static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
4432                                 MachineInstrBuilder Src, APInt Mask) {
4433  const LLT Ty = Dst.getLLTTy(*B.getMRI());
4434  MachineInstrBuilder C_N = B.buildConstant(Ty, N);
4435  MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
4436  auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
4437  auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
4438  return B.buildOr(Dst, LHS, RHS);
4439}
4440
4441LegalizerHelper::LegalizeResult
4442LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
4443  Register Dst = MI.getOperand(0).getReg();
4444  Register Src = MI.getOperand(1).getReg();
4445  const LLT Ty = MRI.getType(Src);
4446  unsigned Size = Ty.getSizeInBits();
4447
4448  MachineInstrBuilder BSWAP =
4449      MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
4450
4451  // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
4452  //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
4453  // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
4454  MachineInstrBuilder Swap4 =
4455      SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
4456
4457  // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
4458  //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
4459  // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
4460  MachineInstrBuilder Swap2 =
4461      SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
4462
4463  // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
4464  //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
4465  // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
4466  SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
4467
4468  MI.eraseFromParent();
4469  return Legalized;
4470}
4471
4472LegalizerHelper::LegalizeResult
4473LegalizerHelper::lowerReadRegister(MachineInstr &MI) {
4474  Register Dst = MI.getOperand(0).getReg();
4475  const LLT Ty = MRI.getType(Dst);
4476  const MDString *RegStr = cast<MDString>(
4477    cast<MDNode>(MI.getOperand(1).getMetadata())->getOperand(0));
4478
4479  MachineFunction &MF = MIRBuilder.getMF();
4480  const TargetSubtargetInfo &STI = MF.getSubtarget();
4481  const TargetLowering *TLI = STI.getTargetLowering();
4482  Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
4483  if (!Reg.isValid())
4484    return UnableToLegalize;
4485
4486  MIRBuilder.buildCopy(Dst, Reg);
4487  MI.eraseFromParent();
4488  return Legalized;
4489}
4490