1311116Sdim//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2311116Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6311116Sdim//
7311116Sdim//===----------------------------------------------------------------------===//
8311116Sdim/// \file
9311116Sdim/// This file implements the targeting of the Machinelegalizer class for
10311116Sdim/// AArch64.
11311116Sdim/// \todo This should be generated by TableGen.
12311116Sdim//===----------------------------------------------------------------------===//
13311116Sdim
14311116Sdim#include "AArch64LegalizerInfo.h"
15327952Sdim#include "AArch64Subtarget.h"
16360784Sdim#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17321369Sdim#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18360784Sdim#include "llvm/CodeGen/GlobalISel/Utils.h"
19321369Sdim#include "llvm/CodeGen/MachineInstr.h"
20321369Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
21327952Sdim#include "llvm/CodeGen/TargetOpcodes.h"
22311116Sdim#include "llvm/CodeGen/ValueTypes.h"
23321369Sdim#include "llvm/IR/DerivedTypes.h"
24311116Sdim#include "llvm/IR/Type.h"
25311116Sdim
26353358Sdim#define DEBUG_TYPE "aarch64-legalinfo"
27353358Sdim
28311116Sdimusing namespace llvm;
29341825Sdimusing namespace LegalizeActions;
30353358Sdimusing namespace LegalizeMutations;
31341825Sdimusing namespace LegalityPredicates;
32311116Sdim
33327952SdimAArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
34311116Sdim  using namespace TargetOpcode;
35311116Sdim  const LLT p0 = LLT::pointer(0, 64);
36311116Sdim  const LLT s1 = LLT::scalar(1);
37311116Sdim  const LLT s8 = LLT::scalar(8);
38311116Sdim  const LLT s16 = LLT::scalar(16);
39311116Sdim  const LLT s32 = LLT::scalar(32);
40311116Sdim  const LLT s64 = LLT::scalar(64);
41327952Sdim  const LLT s128 = LLT::scalar(128);
42341825Sdim  const LLT s256 = LLT::scalar(256);
43341825Sdim  const LLT s512 = LLT::scalar(512);
44341825Sdim  const LLT v16s8 = LLT::vector(16, 8);
45341825Sdim  const LLT v8s8 = LLT::vector(8, 8);
46341825Sdim  const LLT v4s8 = LLT::vector(4, 8);
47341825Sdim  const LLT v8s16 = LLT::vector(8, 16);
48341825Sdim  const LLT v4s16 = LLT::vector(4, 16);
49341825Sdim  const LLT v2s16 = LLT::vector(2, 16);
50311116Sdim  const LLT v2s32 = LLT::vector(2, 32);
51311116Sdim  const LLT v4s32 = LLT::vector(4, 32);
52311116Sdim  const LLT v2s64 = LLT::vector(2, 64);
53353358Sdim  const LLT v2p0 = LLT::vector(2, p0);
54311116Sdim
55360784Sdim  // FIXME: support subtargets which have neon/fp-armv8 disabled.
56360784Sdim  if (!ST.hasNEON() || !ST.hasFPARMv8()) {
57360784Sdim    computeTables();
58360784Sdim    return;
59360784Sdim  }
60360784Sdim
61341825Sdim  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
62360784Sdim    .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
63344779Sdim    .clampScalar(0, s1, s64)
64344779Sdim    .widenScalarToNextPow2(0, 8)
65344779Sdim    .fewerElementsIf(
66344779Sdim      [=](const LegalityQuery &Query) {
67344779Sdim        return Query.Types[0].isVector() &&
68344779Sdim          (Query.Types[0].getElementType() != s64 ||
69344779Sdim           Query.Types[0].getNumElements() != 2);
70344779Sdim      },
71344779Sdim      [=](const LegalityQuery &Query) {
72344779Sdim        LLT EltTy = Query.Types[0].getElementType();
73344779Sdim        if (EltTy == s64)
74344779Sdim          return std::make_pair(0, LLT::vector(2, 64));
75344779Sdim        return std::make_pair(0, EltTy);
76344779Sdim      });
77321369Sdim
78341825Sdim  getActionDefinitionsBuilder(G_PHI)
79353358Sdim      .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
80341825Sdim      .clampScalar(0, s16, s64)
81341825Sdim      .widenScalarToNextPow2(0);
82327952Sdim
83341825Sdim  getActionDefinitionsBuilder(G_BSWAP)
84353358Sdim      .legalFor({s32, s64, v4s32, v2s32, v2s64})
85360784Sdim      .clampScalar(0, s32, s64)
86341825Sdim      .widenScalarToNextPow2(0);
87327952Sdim
88353358Sdim  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
89353358Sdim      .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
90341825Sdim      .clampScalar(0, s32, s64)
91341825Sdim      .widenScalarToNextPow2(0)
92341825Sdim      .clampNumElements(0, v2s32, v4s32)
93341825Sdim      .clampNumElements(0, v2s64, v2s64)
94341825Sdim      .moreElementsToNextPow2(0);
95327952Sdim
96353358Sdim  getActionDefinitionsBuilder(G_SHL)
97353358Sdim    .legalFor({{s32, s32}, {s64, s64},
98353358Sdim               {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
99353358Sdim    .clampScalar(1, s32, s64)
100353358Sdim    .clampScalar(0, s32, s64)
101353358Sdim    .widenScalarToNextPow2(0)
102353358Sdim    .clampNumElements(0, v2s32, v4s32)
103353358Sdim    .clampNumElements(0, v2s64, v2s64)
104353358Sdim    .moreElementsToNextPow2(0)
105353358Sdim    .minScalarSameAs(1, 0);
106353358Sdim
107360784Sdim  getActionDefinitionsBuilder(G_PTR_ADD)
108341825Sdim      .legalFor({{p0, s64}})
109341825Sdim      .clampScalar(1, s64, s64);
110321369Sdim
111341825Sdim  getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
112311116Sdim
113353358Sdim  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
114341825Sdim      .legalFor({s32, s64})
115360784Sdim      .libcallFor({s128})
116341825Sdim      .clampScalar(0, s32, s64)
117353358Sdim      .widenScalarToNextPow2(0)
118353358Sdim      .scalarize(0);
119311116Sdim
120353358Sdim  getActionDefinitionsBuilder({G_LSHR, G_ASHR})
121353358Sdim      .customIf([=](const LegalityQuery &Query) {
122353358Sdim        const auto &SrcTy = Query.Types[0];
123353358Sdim        const auto &AmtTy = Query.Types[1];
124353358Sdim        return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
125353358Sdim               AmtTy.getSizeInBits() == 32;
126353358Sdim      })
127360784Sdim      .legalFor({{s32, s32},
128360784Sdim                 {s32, s64},
129360784Sdim                 {s64, s64},
130360784Sdim                 {v2s32, v2s32},
131360784Sdim                 {v4s32, v4s32},
132360784Sdim                 {v2s64, v2s64}})
133353358Sdim      .clampScalar(1, s32, s64)
134353358Sdim      .clampScalar(0, s32, s64)
135353358Sdim      .minScalarSameAs(1, 0);
136353358Sdim
137341825Sdim  getActionDefinitionsBuilder({G_SREM, G_UREM})
138341825Sdim      .lowerFor({s1, s8, s16, s32, s64});
139311116Sdim
140341825Sdim  getActionDefinitionsBuilder({G_SMULO, G_UMULO})
141341825Sdim      .lowerFor({{s64, s1}});
142321369Sdim
143341825Sdim  getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
144311116Sdim
145353358Sdim  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
146360784Sdim      .legalFor({{s32, s1}, {s64, s1}})
147360784Sdim      .minScalar(0, s32);
148311116Sdim
149353358Sdim  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
150353358Sdim    .legalFor({s32, s64, v2s64, v4s32, v2s32});
151311116Sdim
152353358Sdim  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
153321369Sdim
154353358Sdim  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
155353358Sdim                               G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
156353358Sdim                               G_FNEARBYINT})
157353358Sdim      // If we don't have full FP16 support, then scalarize the elements of
158353358Sdim      // vectors containing fp16 types.
159353358Sdim      .fewerElementsIf(
160353358Sdim          [=, &ST](const LegalityQuery &Query) {
161353358Sdim            const auto &Ty = Query.Types[0];
162353358Sdim            return Ty.isVector() && Ty.getElementType() == s16 &&
163353358Sdim                   !ST.hasFullFP16();
164353358Sdim          },
165353358Sdim          [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
166344779Sdim      // If we don't have full FP16 support, then widen s16 to s32 if we
167344779Sdim      // encounter it.
168344779Sdim      .widenScalarIf(
169344779Sdim          [=, &ST](const LegalityQuery &Query) {
170344779Sdim            return Query.Types[0] == s16 && !ST.hasFullFP16();
171344779Sdim          },
172344779Sdim          [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
173353358Sdim      .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
174344779Sdim
175353358Sdim  getActionDefinitionsBuilder(
176353358Sdim      {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
177353358Sdim      // We need a call for these, so we always need to scalarize.
178353358Sdim      .scalarize(0)
179353358Sdim      // Regardless of FP16 support, widen 16-bit elements to 32-bits.
180353358Sdim      .minScalar(0, s32)
181353358Sdim      .libcallFor({s32, s64, v2s32, v4s32, v2s64});
182353358Sdim
183341825Sdim  getActionDefinitionsBuilder(G_INSERT)
184341825Sdim      .unsupportedIf([=](const LegalityQuery &Query) {
185341825Sdim        return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
186341825Sdim      })
187341825Sdim      .legalIf([=](const LegalityQuery &Query) {
188341825Sdim        const LLT &Ty0 = Query.Types[0];
189341825Sdim        const LLT &Ty1 = Query.Types[1];
190341825Sdim        if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
191341825Sdim          return false;
192341825Sdim        return isPowerOf2_32(Ty1.getSizeInBits()) &&
193341825Sdim               (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
194341825Sdim      })
195341825Sdim      .clampScalar(0, s32, s64)
196341825Sdim      .widenScalarToNextPow2(0)
197341825Sdim      .maxScalarIf(typeInSet(0, {s32}), 1, s16)
198341825Sdim      .maxScalarIf(typeInSet(0, {s64}), 1, s32)
199341825Sdim      .widenScalarToNextPow2(1);
200311116Sdim
201341825Sdim  getActionDefinitionsBuilder(G_EXTRACT)
202341825Sdim      .unsupportedIf([=](const LegalityQuery &Query) {
203341825Sdim        return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
204341825Sdim      })
205341825Sdim      .legalIf([=](const LegalityQuery &Query) {
206341825Sdim        const LLT &Ty0 = Query.Types[0];
207341825Sdim        const LLT &Ty1 = Query.Types[1];
208360784Sdim        if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
209341825Sdim          return false;
210341825Sdim        if (Ty1 == p0)
211341825Sdim          return true;
212341825Sdim        return isPowerOf2_32(Ty0.getSizeInBits()) &&
213341825Sdim               (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
214341825Sdim      })
215360784Sdim      .clampScalar(1, s32, s128)
216341825Sdim      .widenScalarToNextPow2(1)
217341825Sdim      .maxScalarIf(typeInSet(1, {s32}), 0, s16)
218341825Sdim      .maxScalarIf(typeInSet(1, {s64}), 0, s32)
219341825Sdim      .widenScalarToNextPow2(0);
220311116Sdim
221341825Sdim  getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
222353358Sdim      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
223353358Sdim                                 {s32, p0, 16, 8},
224353358Sdim                                 {s32, p0, 32, 8},
225353358Sdim                                 {s64, p0, 8, 2},
226353358Sdim                                 {s64, p0, 16, 2},
227353358Sdim                                 {s64, p0, 32, 4},
228353358Sdim                                 {s64, p0, 64, 8},
229353358Sdim                                 {p0, p0, 64, 8},
230353358Sdim                                 {v2s32, p0, 64, 8}})
231341825Sdim      .clampScalar(0, s32, s64)
232341825Sdim      .widenScalarToNextPow2(0)
233341825Sdim      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
234341825Sdim      //       how to do that yet.
235341825Sdim      .unsupportedIfMemSizeNotPow2()
236341825Sdim      // Lower anything left over into G_*EXT and G_LOAD
237341825Sdim      .lower();
238311116Sdim
239353358Sdim  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
240353358Sdim    const LLT &ValTy = Query.Types[0];
241353358Sdim    if (!ValTy.isVector())
242353358Sdim      return false;
243353358Sdim    const LLT EltTy = ValTy.getElementType();
244353358Sdim    return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
245353358Sdim  };
246353358Sdim
247341825Sdim  getActionDefinitionsBuilder(G_LOAD)
248353358Sdim      .legalForTypesWithMemDesc({{s8, p0, 8, 8},
249353358Sdim                                 {s16, p0, 16, 8},
250353358Sdim                                 {s32, p0, 32, 8},
251353358Sdim                                 {s64, p0, 64, 8},
252353358Sdim                                 {p0, p0, 64, 8},
253360784Sdim                                 {s128, p0, 128, 8},
254353358Sdim                                 {v8s8, p0, 64, 8},
255353358Sdim                                 {v16s8, p0, 128, 8},
256353358Sdim                                 {v4s16, p0, 64, 8},
257353358Sdim                                 {v8s16, p0, 128, 8},
258353358Sdim                                 {v2s32, p0, 64, 8},
259353358Sdim                                 {v4s32, p0, 128, 8},
260353358Sdim                                 {v2s64, p0, 128, 8}})
261341825Sdim      // These extends are also legal
262353358Sdim      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
263353358Sdim                                 {s32, p0, 16, 8}})
264341825Sdim      .clampScalar(0, s8, s64)
265360784Sdim      .lowerIfMemSizeNotPow2()
266341825Sdim      // Lower any any-extending loads left into G_ANYEXT and G_LOAD
267341825Sdim      .lowerIf([=](const LegalityQuery &Query) {
268344779Sdim        return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
269341825Sdim      })
270360784Sdim      .widenScalarToNextPow2(0)
271353358Sdim      .clampMaxNumElements(0, s32, 2)
272353358Sdim      .clampMaxNumElements(0, s64, 1)
273353358Sdim      .customIf(IsPtrVecPred);
274311116Sdim
275341825Sdim  getActionDefinitionsBuilder(G_STORE)
276353358Sdim      .legalForTypesWithMemDesc({{s8, p0, 8, 8},
277353358Sdim                                 {s16, p0, 16, 8},
278360784Sdim                                 {s32, p0, 8, 8},
279360784Sdim                                 {s32, p0, 16, 8},
280353358Sdim                                 {s32, p0, 32, 8},
281353358Sdim                                 {s64, p0, 64, 8},
282353358Sdim                                 {p0, p0, 64, 8},
283360784Sdim                                 {s128, p0, 128, 8},
284353358Sdim                                 {v16s8, p0, 128, 8},
285353358Sdim                                 {v4s16, p0, 64, 8},
286353358Sdim                                 {v8s16, p0, 128, 8},
287353358Sdim                                 {v2s32, p0, 64, 8},
288353358Sdim                                 {v4s32, p0, 128, 8},
289353358Sdim                                 {v2s64, p0, 128, 8}})
290341825Sdim      .clampScalar(0, s8, s64)
291360784Sdim      .lowerIfMemSizeNotPow2()
292341825Sdim      .lowerIf([=](const LegalityQuery &Query) {
293341825Sdim        return Query.Types[0].isScalar() &&
294344779Sdim               Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
295341825Sdim      })
296353358Sdim      .clampMaxNumElements(0, s32, 2)
297353358Sdim      .clampMaxNumElements(0, s64, 1)
298353358Sdim      .customIf(IsPtrVecPred);
299321369Sdim
300311116Sdim  // Constants
301341825Sdim  getActionDefinitionsBuilder(G_CONSTANT)
302353358Sdim    .legalFor({p0, s8, s16, s32, s64})
303353358Sdim      .clampScalar(0, s8, s64)
304341825Sdim      .widenScalarToNextPow2(0);
305341825Sdim  getActionDefinitionsBuilder(G_FCONSTANT)
306341825Sdim      .legalFor({s32, s64})
307341825Sdim      .clampScalar(0, s32, s64);
308311116Sdim
309341825Sdim  getActionDefinitionsBuilder(G_ICMP)
310353358Sdim      .legalFor({{s32, s32},
311353358Sdim                 {s32, s64},
312353358Sdim                 {s32, p0},
313353358Sdim                 {v4s32, v4s32},
314353358Sdim                 {v2s32, v2s32},
315353358Sdim                 {v2s64, v2s64},
316353358Sdim                 {v2s64, v2p0},
317353358Sdim                 {v4s16, v4s16},
318353358Sdim                 {v8s16, v8s16},
319353358Sdim                 {v8s8, v8s8},
320353358Sdim                 {v16s8, v16s8}})
321360784Sdim      .clampScalar(1, s32, s64)
322341825Sdim      .clampScalar(0, s32, s32)
323353358Sdim      .minScalarEltSameAsIf(
324353358Sdim          [=](const LegalityQuery &Query) {
325353358Sdim            const LLT &Ty = Query.Types[0];
326353358Sdim            const LLT &SrcTy = Query.Types[1];
327353358Sdim            return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
328353358Sdim                   Ty.getElementType() != SrcTy.getElementType();
329353358Sdim          },
330353358Sdim          0, 1)
331353358Sdim      .minScalarOrEltIf(
332353358Sdim          [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
333353358Sdim          1, s32)
334353358Sdim      .minScalarOrEltIf(
335353358Sdim          [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
336353358Sdim          s64)
337353358Sdim      .widenScalarOrEltToNextPow2(1);
338311116Sdim
339341825Sdim  getActionDefinitionsBuilder(G_FCMP)
340341825Sdim      .legalFor({{s32, s32}, {s32, s64}})
341341825Sdim      .clampScalar(0, s32, s32)
342341825Sdim      .clampScalar(1, s32, s64)
343341825Sdim      .widenScalarToNextPow2(1);
344311116Sdim
345311116Sdim  // Extensions
346360784Sdim  auto ExtLegalFunc = [=](const LegalityQuery &Query) {
347360784Sdim    unsigned DstSize = Query.Types[0].getSizeInBits();
348311116Sdim
349360784Sdim    if (DstSize == 128 && !Query.Types[0].isVector())
350360784Sdim      return false; // Extending to a scalar s128 needs narrowing.
351360784Sdim
352360784Sdim    // Make sure that we have something that will fit in a register, and
353360784Sdim    // make sure it's a power of 2.
354360784Sdim    if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
355360784Sdim      return false;
356353358Sdim
357360784Sdim    const LLT &SrcTy = Query.Types[1];
358353358Sdim
359360784Sdim    // Special case for s1.
360360784Sdim    if (SrcTy == s1)
361360784Sdim      return true;
362353358Sdim
363360784Sdim    // Make sure we fit in a register otherwise. Don't bother checking that
364360784Sdim    // the source type is below 128 bits. We shouldn't be allowing anything
365360784Sdim    // through which is wider than the destination in the first place.
366360784Sdim    unsigned SrcSize = SrcTy.getSizeInBits();
367360784Sdim    if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
368360784Sdim      return false;
369353358Sdim
370360784Sdim    return true;
371360784Sdim  };
372360784Sdim  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
373360784Sdim      .legalIf(ExtLegalFunc)
374360784Sdim      .clampScalar(0, s64, s64); // Just for s128, others are handled above.
375353358Sdim
376353358Sdim  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
377353358Sdim
378360784Sdim  getActionDefinitionsBuilder(G_SEXT_INREG).lower();
379360784Sdim
380327952Sdim  // FP conversions
381341825Sdim  getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
382353358Sdim      {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
383341825Sdim  getActionDefinitionsBuilder(G_FPEXT).legalFor(
384353358Sdim      {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
385311116Sdim
386311116Sdim  // Conversions
387341825Sdim  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
388353358Sdim      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
389341825Sdim      .clampScalar(0, s32, s64)
390341825Sdim      .widenScalarToNextPow2(0)
391341825Sdim      .clampScalar(1, s32, s64)
392341825Sdim      .widenScalarToNextPow2(1);
393311116Sdim
394341825Sdim  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
395353358Sdim      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
396341825Sdim      .clampScalar(1, s32, s64)
397341825Sdim      .widenScalarToNextPow2(1)
398341825Sdim      .clampScalar(0, s32, s64)
399341825Sdim      .widenScalarToNextPow2(0);
400311116Sdim
401311116Sdim  // Control-flow
402341825Sdim  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
403341825Sdim  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
404311116Sdim
405311116Sdim  // Select
406353358Sdim  // FIXME: We can probably do a bit better than just scalarizing vector
407353358Sdim  // selects.
408341825Sdim  getActionDefinitionsBuilder(G_SELECT)
409341825Sdim      .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
410341825Sdim      .clampScalar(0, s32, s64)
411353358Sdim      .widenScalarToNextPow2(0)
412353358Sdim      .scalarize(0);
413321369Sdim
414311116Sdim  // Pointer-handling
415341825Sdim  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
416341825Sdim  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
417311116Sdim
418341825Sdim  getActionDefinitionsBuilder(G_PTRTOINT)
419341825Sdim      .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
420341825Sdim      .maxScalar(0, s64)
421341825Sdim      .widenScalarToNextPow2(0, /*Min*/ 8);
422311116Sdim
423341825Sdim  getActionDefinitionsBuilder(G_INTTOPTR)
424341825Sdim      .unsupportedIf([&](const LegalityQuery &Query) {
425341825Sdim        return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
426341825Sdim      })
427341825Sdim      .legalFor({{p0, s64}});
428311116Sdim
429311116Sdim  // Casts for 32 and 64-bit width type are just copies.
430327952Sdim  // Same for 128-bit width type, except they are on the FPR bank.
431341825Sdim  getActionDefinitionsBuilder(G_BITCAST)
432341825Sdim      // FIXME: This is wrong since G_BITCAST is not allowed to change the
433341825Sdim      // number of bits but it's what the previous code described and fixing
434341825Sdim      // it breaks tests.
435341825Sdim      .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
436353358Sdim                                 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
437353358Sdim                                 v2p0});
438311116Sdim
439341825Sdim  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
440311116Sdim
441321369Sdim  // va_list must be a pointer, but most sized types are pretty easy to handle
442321369Sdim  // as the destination.
443341825Sdim  getActionDefinitionsBuilder(G_VAARG)
444341825Sdim      .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
445341825Sdim      .clampScalar(0, s8, s64)
446341825Sdim      .widenScalarToNextPow2(0, /*Min*/ 8);
447321369Sdim
448327952Sdim  if (ST.hasLSE()) {
449341825Sdim    getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
450341825Sdim        .lowerIf(all(
451341825Sdim            typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
452341825Sdim            atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
453327952Sdim
454341825Sdim    getActionDefinitionsBuilder(
455341825Sdim        {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
456341825Sdim         G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
457341825Sdim         G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
458341825Sdim        .legalIf(all(
459341825Sdim            typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
460341825Sdim            atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
461327952Sdim  }
462327952Sdim
463341825Sdim  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
464341825Sdim
465327952Sdim  // Merge/Unmerge
466341825Sdim  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
467341825Sdim    unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
468341825Sdim    unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
469341825Sdim
470341825Sdim    auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
471341825Sdim      const LLT &Ty = Query.Types[TypeIdx];
472341825Sdim      if (Ty.isVector()) {
473341825Sdim        const LLT &EltTy = Ty.getElementType();
474341825Sdim        if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
475341825Sdim          return true;
476341825Sdim        if (!isPowerOf2_32(EltTy.getSizeInBits()))
477341825Sdim          return true;
478327952Sdim      }
479341825Sdim      return false;
480341825Sdim    };
481327952Sdim
482341825Sdim    // FIXME: This rule is horrible, but specifies the same as what we had
483341825Sdim    // before with the particularly strange definitions removed (e.g.
484341825Sdim    // s8 = G_MERGE_VALUES s32, s32).
485341825Sdim    // Part of the complexity comes from these ops being extremely flexible. For
486341825Sdim    // example, you can build/decompose vectors with it, concatenate vectors,
487341825Sdim    // etc. and in addition to this you can also bitcast with it at the same
488341825Sdim    // time. We've been considering breaking it up into multiple ops to make it
489341825Sdim    // more manageable throughout the backend.
490341825Sdim    getActionDefinitionsBuilder(Op)
491341825Sdim        // Break up vectors with weird elements into scalars
492341825Sdim        .fewerElementsIf(
493341825Sdim            [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
494353358Sdim            scalarize(0))
495341825Sdim        .fewerElementsIf(
496341825Sdim            [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
497353358Sdim            scalarize(1))
498341825Sdim        // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
499341825Sdim        // or 384.
500341825Sdim        .clampScalar(BigTyIdx, s8, s512)
501341825Sdim        .widenScalarIf(
502341825Sdim            [=](const LegalityQuery &Query) {
503341825Sdim              const LLT &Ty = Query.Types[BigTyIdx];
504341825Sdim              return !isPowerOf2_32(Ty.getSizeInBits()) &&
505341825Sdim                     Ty.getSizeInBits() % 64 != 0;
506341825Sdim            },
507341825Sdim            [=](const LegalityQuery &Query) {
508341825Sdim              // Pick the next power of 2, or a multiple of 64 over 128.
509341825Sdim              // Whichever is smaller.
510341825Sdim              const LLT &Ty = Query.Types[BigTyIdx];
511341825Sdim              unsigned NewSizeInBits = 1
512341825Sdim                                       << Log2_32_Ceil(Ty.getSizeInBits() + 1);
513341825Sdim              if (NewSizeInBits >= 256) {
514341825Sdim                unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
515341825Sdim                if (RoundedTo < NewSizeInBits)
516341825Sdim                  NewSizeInBits = RoundedTo;
517341825Sdim              }
518341825Sdim              return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
519341825Sdim            })
520341825Sdim        // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
521341825Sdim        // worth considering the multiples of 64 since 2*192 and 2*384 are not
522341825Sdim        // valid.
523341825Sdim        .clampScalar(LitTyIdx, s8, s256)
524341825Sdim        .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
525341825Sdim        // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
526341825Sdim        // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
527341825Sdim        // At this point it's simple enough to accept the legal types.
528341825Sdim        .legalIf([=](const LegalityQuery &Query) {
529341825Sdim          const LLT &BigTy = Query.Types[BigTyIdx];
530341825Sdim          const LLT &LitTy = Query.Types[LitTyIdx];
531341825Sdim          if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
532341825Sdim            return false;
533341825Sdim          if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
534341825Sdim            return false;
535341825Sdim          return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
536341825Sdim        })
537341825Sdim        // Any vectors left are the wrong size. Scalarize them.
538353358Sdim      .scalarize(0)
539353358Sdim      .scalarize(1);
540341825Sdim  }
541341825Sdim
542344779Sdim  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
543344779Sdim      .unsupportedIf([=](const LegalityQuery &Query) {
544344779Sdim        const LLT &EltTy = Query.Types[1].getElementType();
545344779Sdim        return Query.Types[0] != EltTy;
546344779Sdim      })
547344779Sdim      .minScalar(2, s64)
548344779Sdim      .legalIf([=](const LegalityQuery &Query) {
549344779Sdim        const LLT &VecTy = Query.Types[1];
550353358Sdim        return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
551353358Sdim               VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
552344779Sdim      });
553344779Sdim
554353358Sdim  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
555353358Sdim      .legalIf([=](const LegalityQuery &Query) {
556353358Sdim        const LLT &VecTy = Query.Types[0];
557353358Sdim        // TODO: Support s8 and s16
558353358Sdim        return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
559353358Sdim      });
560353358Sdim
561344779Sdim  getActionDefinitionsBuilder(G_BUILD_VECTOR)
562353358Sdim      .legalFor({{v4s16, s16},
563353358Sdim                 {v8s16, s16},
564353358Sdim                 {v2s32, s32},
565353358Sdim                 {v4s32, s32},
566353358Sdim                 {v2p0, p0},
567353358Sdim                 {v2s64, s64}})
568344779Sdim      .clampNumElements(0, v4s32, v4s32)
569344779Sdim      .clampNumElements(0, v2s64, v2s64)
570344779Sdim
571344779Sdim      // Deal with larger scalar types, which will be implicitly truncated.
572344779Sdim      .legalIf([=](const LegalityQuery &Query) {
573344779Sdim        return Query.Types[0].getScalarSizeInBits() <
574344779Sdim               Query.Types[1].getSizeInBits();
575344779Sdim      })
576344779Sdim      .minScalarSameAs(1, 0);
577344779Sdim
578353358Sdim  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
579353358Sdim      {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
580353358Sdim      .scalarize(1);
581353358Sdim
582353358Sdim  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
583353358Sdim      .legalIf([=](const LegalityQuery &Query) {
584353358Sdim        const LLT &DstTy = Query.Types[0];
585353358Sdim        const LLT &SrcTy = Query.Types[1];
586353358Sdim        // For now just support the TBL2 variant which needs the source vectors
587353358Sdim        // to be the same size as the dest.
588353358Sdim        if (DstTy != SrcTy)
589353358Sdim          return false;
590353358Sdim        for (auto &Ty : {v2s32, v4s32, v2s64}) {
591353358Sdim          if (DstTy == Ty)
592353358Sdim            return true;
593353358Sdim        }
594353358Sdim        return false;
595353358Sdim      })
596353358Sdim      // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
597353358Sdim      // just want those lowered into G_BUILD_VECTOR
598353358Sdim      .lowerIf([=](const LegalityQuery &Query) {
599353358Sdim        return !Query.Types[1].isVector();
600353358Sdim      })
601353358Sdim      .clampNumElements(0, v4s32, v4s32)
602353358Sdim      .clampNumElements(0, v2s64, v2s64);
603353358Sdim
604353358Sdim  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
605353358Sdim      .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
606353358Sdim
607353358Sdim  getActionDefinitionsBuilder(G_JUMP_TABLE)
608353358Sdim    .legalFor({{p0}, {s64}});
609353358Sdim
610353358Sdim  getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
611353358Sdim    return Query.Types[0] == p0 && Query.Types[1] == s64;
612353358Sdim  });
613353358Sdim
614360784Sdim  getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
615360784Sdim
616311116Sdim  computeTables();
617341825Sdim  verify(*ST.getInstrInfo());
618311116Sdim}
619321369Sdim
620321369Sdimbool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
621321369Sdim                                          MachineRegisterInfo &MRI,
622344779Sdim                                          MachineIRBuilder &MIRBuilder,
623344779Sdim                                          GISelChangeObserver &Observer) const {
624321369Sdim  switch (MI.getOpcode()) {
625321369Sdim  default:
626321369Sdim    // No idea what to do.
627321369Sdim    return false;
628321369Sdim  case TargetOpcode::G_VAARG:
629321369Sdim    return legalizeVaArg(MI, MRI, MIRBuilder);
630353358Sdim  case TargetOpcode::G_LOAD:
631353358Sdim  case TargetOpcode::G_STORE:
632353358Sdim    return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
633353358Sdim  case TargetOpcode::G_SHL:
634353358Sdim  case TargetOpcode::G_ASHR:
635353358Sdim  case TargetOpcode::G_LSHR:
636353358Sdim    return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
637321369Sdim  }
638321369Sdim
639321369Sdim  llvm_unreachable("expected switch to return");
640321369Sdim}
641321369Sdim
642360784Sdimbool AArch64LegalizerInfo::legalizeIntrinsic(
643360784Sdim    MachineInstr &MI, MachineRegisterInfo &MRI,
644360784Sdim    MachineIRBuilder &MIRBuilder) const {
645360784Sdim  switch (MI.getIntrinsicID()) {
646360784Sdim  case Intrinsic::memcpy:
647360784Sdim  case Intrinsic::memset:
648360784Sdim  case Intrinsic::memmove:
649360784Sdim    if (createMemLibcall(MIRBuilder, MRI, MI) ==
650360784Sdim        LegalizerHelper::UnableToLegalize)
651360784Sdim      return false;
652360784Sdim    MI.eraseFromParent();
653360784Sdim    return true;
654360784Sdim  default:
655360784Sdim    break;
656360784Sdim  }
657360784Sdim  return true;
658360784Sdim}
659360784Sdim
660353358Sdimbool AArch64LegalizerInfo::legalizeShlAshrLshr(
661353358Sdim    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
662353358Sdim    GISelChangeObserver &Observer) const {
663353358Sdim  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
664353358Sdim         MI.getOpcode() == TargetOpcode::G_LSHR ||
665353358Sdim         MI.getOpcode() == TargetOpcode::G_SHL);
666353358Sdim  // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
667353358Sdim  // imported patterns can select it later. Either way, it will be legal.
668353358Sdim  Register AmtReg = MI.getOperand(2).getReg();
669353358Sdim  auto *CstMI = MRI.getVRegDef(AmtReg);
670353358Sdim  assert(CstMI && "expected to find a vreg def");
671353358Sdim  if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
672353358Sdim    return true;
673353358Sdim  // Check the shift amount is in range for an immediate form.
674353358Sdim  unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
675353358Sdim  if (Amount > 31)
676353358Sdim    return true; // This will have to remain a register variant.
677353358Sdim  assert(MRI.getType(AmtReg).getSizeInBits() == 32);
678353358Sdim  MIRBuilder.setInstr(MI);
679353358Sdim  auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
680353358Sdim  MI.getOperand(2).setReg(ExtCst.getReg(0));
681353358Sdim  return true;
682353358Sdim}
683353358Sdim
684353358Sdimbool AArch64LegalizerInfo::legalizeLoadStore(
685353358Sdim    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
686353358Sdim    GISelChangeObserver &Observer) const {
687353358Sdim  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
688353358Sdim         MI.getOpcode() == TargetOpcode::G_LOAD);
689353358Sdim  // Here we just try to handle vector loads/stores where our value type might
690353358Sdim  // have pointer elements, which the SelectionDAG importer can't handle. To
691353358Sdim  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
692353358Sdim  // the value to use s64 types.
693353358Sdim
694353358Sdim  // Custom legalization requires the instruction, if not deleted, must be fully
695353358Sdim  // legalized. In order to allow further legalization of the inst, we create
696353358Sdim  // a new instruction and erase the existing one.
697353358Sdim
698360784Sdim  Register ValReg = MI.getOperand(0).getReg();
699353358Sdim  const LLT ValTy = MRI.getType(ValReg);
700353358Sdim
701353358Sdim  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
702353358Sdim      ValTy.getElementType().getAddressSpace() != 0) {
703353358Sdim    LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
704353358Sdim    return false;
705353358Sdim  }
706353358Sdim
707353358Sdim  MIRBuilder.setInstr(MI);
708353358Sdim  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
709353358Sdim  const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
710353358Sdim  auto &MMO = **MI.memoperands_begin();
711353358Sdim  if (MI.getOpcode() == TargetOpcode::G_STORE) {
712353358Sdim    auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
713353358Sdim    MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
714353358Sdim  } else {
715360784Sdim    Register NewReg = MRI.createGenericVirtualRegister(NewTy);
716353358Sdim    auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
717353358Sdim    MIRBuilder.buildBitcast({ValReg}, {NewLoad});
718353358Sdim  }
719353358Sdim  MI.eraseFromParent();
720353358Sdim  return true;
721353358Sdim}
722353358Sdim
723321369Sdimbool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
724321369Sdim                                         MachineRegisterInfo &MRI,
725321369Sdim                                         MachineIRBuilder &MIRBuilder) const {
726321369Sdim  MIRBuilder.setInstr(MI);
727321369Sdim  MachineFunction &MF = MIRBuilder.getMF();
728321369Sdim  unsigned Align = MI.getOperand(2).getImm();
729353358Sdim  Register Dst = MI.getOperand(0).getReg();
730353358Sdim  Register ListPtr = MI.getOperand(1).getReg();
731321369Sdim
732321369Sdim  LLT PtrTy = MRI.getType(ListPtr);
733321369Sdim  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
734321369Sdim
735321369Sdim  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
736353358Sdim  Register List = MRI.createGenericVirtualRegister(PtrTy);
737321369Sdim  MIRBuilder.buildLoad(
738321369Sdim      List, ListPtr,
739321369Sdim      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
740321369Sdim                               PtrSize, /* Align = */ PtrSize));
741321369Sdim
742353358Sdim  Register DstPtr;
743321369Sdim  if (Align > PtrSize) {
744321369Sdim    // Realign the list to the actual required alignment.
745321369Sdim    auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
746321369Sdim
747360784Sdim    auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
748321369Sdim
749321369Sdim    DstPtr = MRI.createGenericVirtualRegister(PtrTy);
750321369Sdim    MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
751321369Sdim  } else
752321369Sdim    DstPtr = List;
753321369Sdim
754321369Sdim  uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
755321369Sdim  MIRBuilder.buildLoad(
756321369Sdim      Dst, DstPtr,
757321369Sdim      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
758321369Sdim                               ValSize, std::max(Align, PtrSize)));
759321369Sdim
760353358Sdim  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
761321369Sdim
762360784Sdim  auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
763321369Sdim
764321369Sdim  MIRBuilder.buildStore(
765321369Sdim      NewList, ListPtr,
766321369Sdim      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
767321369Sdim                               PtrSize, /* Align = */ PtrSize));
768321369Sdim
769321369Sdim  MI.eraseFromParent();
770321369Sdim  return true;
771321369Sdim}
772