AArch64LegalizerInfo.cpp revision 353358
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64LegalizerInfo.h"
15#include "AArch64Subtarget.h"
16#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
17#include "llvm/CodeGen/MachineInstr.h"
18#include "llvm/CodeGen/MachineRegisterInfo.h"
19#include "llvm/CodeGen/TargetOpcodes.h"
20#include "llvm/CodeGen/ValueTypes.h"
21#include "llvm/IR/DerivedTypes.h"
22#include "llvm/IR/Type.h"
23
24#define DEBUG_TYPE "aarch64-legalinfo"
25
26using namespace llvm;
27using namespace LegalizeActions;
28using namespace LegalizeMutations;
29using namespace LegalityPredicates;
30
31AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
32  using namespace TargetOpcode;
33  const LLT p0 = LLT::pointer(0, 64);
34  const LLT s1 = LLT::scalar(1);
35  const LLT s8 = LLT::scalar(8);
36  const LLT s16 = LLT::scalar(16);
37  const LLT s32 = LLT::scalar(32);
38  const LLT s64 = LLT::scalar(64);
39  const LLT s128 = LLT::scalar(128);
40  const LLT s256 = LLT::scalar(256);
41  const LLT s512 = LLT::scalar(512);
42  const LLT v16s8 = LLT::vector(16, 8);
43  const LLT v8s8 = LLT::vector(8, 8);
44  const LLT v4s8 = LLT::vector(4, 8);
45  const LLT v8s16 = LLT::vector(8, 16);
46  const LLT v4s16 = LLT::vector(4, 16);
47  const LLT v2s16 = LLT::vector(2, 16);
48  const LLT v2s32 = LLT::vector(2, 32);
49  const LLT v4s32 = LLT::vector(4, 32);
50  const LLT v2s64 = LLT::vector(2, 64);
51  const LLT v2p0 = LLT::vector(2, p0);
52
53  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
54    .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
55    .clampScalar(0, s1, s64)
56    .widenScalarToNextPow2(0, 8)
57    .fewerElementsIf(
58      [=](const LegalityQuery &Query) {
59        return Query.Types[0].isVector() &&
60          (Query.Types[0].getElementType() != s64 ||
61           Query.Types[0].getNumElements() != 2);
62      },
63      [=](const LegalityQuery &Query) {
64        LLT EltTy = Query.Types[0].getElementType();
65        if (EltTy == s64)
66          return std::make_pair(0, LLT::vector(2, 64));
67        return std::make_pair(0, EltTy);
68      });
69
70  getActionDefinitionsBuilder(G_PHI)
71      .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
72      .clampScalar(0, s16, s64)
73      .widenScalarToNextPow2(0);
74
75  getActionDefinitionsBuilder(G_BSWAP)
76      .legalFor({s32, s64, v4s32, v2s32, v2s64})
77      .clampScalar(0, s16, s64)
78      .widenScalarToNextPow2(0);
79
80  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
81      .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
82      .clampScalar(0, s32, s64)
83      .widenScalarToNextPow2(0)
84      .clampNumElements(0, v2s32, v4s32)
85      .clampNumElements(0, v2s64, v2s64)
86      .moreElementsToNextPow2(0);
87
88  getActionDefinitionsBuilder(G_SHL)
89    .legalFor({{s32, s32}, {s64, s64},
90               {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
91    .clampScalar(1, s32, s64)
92    .clampScalar(0, s32, s64)
93    .widenScalarToNextPow2(0)
94    .clampNumElements(0, v2s32, v4s32)
95    .clampNumElements(0, v2s64, v2s64)
96    .moreElementsToNextPow2(0)
97    .minScalarSameAs(1, 0);
98
99  getActionDefinitionsBuilder(G_GEP)
100      .legalFor({{p0, s64}})
101      .clampScalar(1, s64, s64);
102
103  getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
104
105  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
106      .legalFor({s32, s64})
107      .clampScalar(0, s32, s64)
108      .widenScalarToNextPow2(0)
109      .scalarize(0);
110
111  getActionDefinitionsBuilder({G_LSHR, G_ASHR})
112      .customIf([=](const LegalityQuery &Query) {
113        const auto &SrcTy = Query.Types[0];
114        const auto &AmtTy = Query.Types[1];
115        return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
116               AmtTy.getSizeInBits() == 32;
117      })
118      .legalFor(
119          {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
120      .clampScalar(1, s32, s64)
121      .clampScalar(0, s32, s64)
122      .minScalarSameAs(1, 0);
123
124  getActionDefinitionsBuilder({G_SREM, G_UREM})
125      .lowerFor({s1, s8, s16, s32, s64});
126
127  getActionDefinitionsBuilder({G_SMULO, G_UMULO})
128      .lowerFor({{s64, s1}});
129
130  getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
131
132  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
133      .legalFor({{s32, s1}, {s64, s1}});
134
135  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
136    .legalFor({s32, s64, v2s64, v4s32, v2s32});
137
138  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
139
140  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
141                               G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
142                               G_FNEARBYINT})
143      // If we don't have full FP16 support, then scalarize the elements of
144      // vectors containing fp16 types.
145      .fewerElementsIf(
146          [=, &ST](const LegalityQuery &Query) {
147            const auto &Ty = Query.Types[0];
148            return Ty.isVector() && Ty.getElementType() == s16 &&
149                   !ST.hasFullFP16();
150          },
151          [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
152      // If we don't have full FP16 support, then widen s16 to s32 if we
153      // encounter it.
154      .widenScalarIf(
155          [=, &ST](const LegalityQuery &Query) {
156            return Query.Types[0] == s16 && !ST.hasFullFP16();
157          },
158          [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
159      .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
160
161  getActionDefinitionsBuilder(
162      {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
163      // We need a call for these, so we always need to scalarize.
164      .scalarize(0)
165      // Regardless of FP16 support, widen 16-bit elements to 32-bits.
166      .minScalar(0, s32)
167      .libcallFor({s32, s64, v2s32, v4s32, v2s64});
168
169  getActionDefinitionsBuilder(G_INSERT)
170      .unsupportedIf([=](const LegalityQuery &Query) {
171        return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
172      })
173      .legalIf([=](const LegalityQuery &Query) {
174        const LLT &Ty0 = Query.Types[0];
175        const LLT &Ty1 = Query.Types[1];
176        if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
177          return false;
178        return isPowerOf2_32(Ty1.getSizeInBits()) &&
179               (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
180      })
181      .clampScalar(0, s32, s64)
182      .widenScalarToNextPow2(0)
183      .maxScalarIf(typeInSet(0, {s32}), 1, s16)
184      .maxScalarIf(typeInSet(0, {s64}), 1, s32)
185      .widenScalarToNextPow2(1);
186
187  getActionDefinitionsBuilder(G_EXTRACT)
188      .unsupportedIf([=](const LegalityQuery &Query) {
189        return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
190      })
191      .legalIf([=](const LegalityQuery &Query) {
192        const LLT &Ty0 = Query.Types[0];
193        const LLT &Ty1 = Query.Types[1];
194        if (Ty1 != s32 && Ty1 != s64)
195          return false;
196        if (Ty1 == p0)
197          return true;
198        return isPowerOf2_32(Ty0.getSizeInBits()) &&
199               (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
200      })
201      .clampScalar(1, s32, s64)
202      .widenScalarToNextPow2(1)
203      .maxScalarIf(typeInSet(1, {s32}), 0, s16)
204      .maxScalarIf(typeInSet(1, {s64}), 0, s32)
205      .widenScalarToNextPow2(0);
206
207  getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
208      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
209                                 {s32, p0, 16, 8},
210                                 {s32, p0, 32, 8},
211                                 {s64, p0, 8, 2},
212                                 {s64, p0, 16, 2},
213                                 {s64, p0, 32, 4},
214                                 {s64, p0, 64, 8},
215                                 {p0, p0, 64, 8},
216                                 {v2s32, p0, 64, 8}})
217      .clampScalar(0, s32, s64)
218      .widenScalarToNextPow2(0)
219      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
220      //       how to do that yet.
221      .unsupportedIfMemSizeNotPow2()
222      // Lower anything left over into G_*EXT and G_LOAD
223      .lower();
224
225  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
226    const LLT &ValTy = Query.Types[0];
227    if (!ValTy.isVector())
228      return false;
229    const LLT EltTy = ValTy.getElementType();
230    return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
231  };
232
233  getActionDefinitionsBuilder(G_LOAD)
234      .legalForTypesWithMemDesc({{s8, p0, 8, 8},
235                                 {s16, p0, 16, 8},
236                                 {s32, p0, 32, 8},
237                                 {s64, p0, 64, 8},
238                                 {p0, p0, 64, 8},
239                                 {v8s8, p0, 64, 8},
240                                 {v16s8, p0, 128, 8},
241                                 {v4s16, p0, 64, 8},
242                                 {v8s16, p0, 128, 8},
243                                 {v2s32, p0, 64, 8},
244                                 {v4s32, p0, 128, 8},
245                                 {v2s64, p0, 128, 8}})
246      // These extends are also legal
247      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
248                                 {s32, p0, 16, 8}})
249      .clampScalar(0, s8, s64)
250      .widenScalarToNextPow2(0)
251      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
252      //       how to do that yet.
253      .unsupportedIfMemSizeNotPow2()
254      // Lower any any-extending loads left into G_ANYEXT and G_LOAD
255      .lowerIf([=](const LegalityQuery &Query) {
256        return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
257      })
258      .clampMaxNumElements(0, s32, 2)
259      .clampMaxNumElements(0, s64, 1)
260      .customIf(IsPtrVecPred);
261
262  getActionDefinitionsBuilder(G_STORE)
263      .legalForTypesWithMemDesc({{s8, p0, 8, 8},
264                                 {s16, p0, 16, 8},
265                                 {s32, p0, 32, 8},
266                                 {s64, p0, 64, 8},
267                                 {p0, p0, 64, 8},
268                                 {v16s8, p0, 128, 8},
269                                 {v4s16, p0, 64, 8},
270                                 {v8s16, p0, 128, 8},
271                                 {v2s32, p0, 64, 8},
272                                 {v4s32, p0, 128, 8},
273                                 {v2s64, p0, 128, 8}})
274      .clampScalar(0, s8, s64)
275      .widenScalarToNextPow2(0)
276      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
277      //       how to do that yet.
278      .unsupportedIfMemSizeNotPow2()
279      .lowerIf([=](const LegalityQuery &Query) {
280        return Query.Types[0].isScalar() &&
281               Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
282      })
283      .clampMaxNumElements(0, s32, 2)
284      .clampMaxNumElements(0, s64, 1)
285      .customIf(IsPtrVecPred);
286
287  // Constants
288  getActionDefinitionsBuilder(G_CONSTANT)
289    .legalFor({p0, s8, s16, s32, s64})
290      .clampScalar(0, s8, s64)
291      .widenScalarToNextPow2(0);
292  getActionDefinitionsBuilder(G_FCONSTANT)
293      .legalFor({s32, s64})
294      .clampScalar(0, s32, s64);
295
296  getActionDefinitionsBuilder(G_ICMP)
297      .legalFor({{s32, s32},
298                 {s32, s64},
299                 {s32, p0},
300                 {v4s32, v4s32},
301                 {v2s32, v2s32},
302                 {v2s64, v2s64},
303                 {v2s64, v2p0},
304                 {v4s16, v4s16},
305                 {v8s16, v8s16},
306                 {v8s8, v8s8},
307                 {v16s8, v16s8}})
308      .clampScalar(0, s32, s32)
309      .clampScalar(1, s32, s64)
310      .minScalarEltSameAsIf(
311          [=](const LegalityQuery &Query) {
312            const LLT &Ty = Query.Types[0];
313            const LLT &SrcTy = Query.Types[1];
314            return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
315                   Ty.getElementType() != SrcTy.getElementType();
316          },
317          0, 1)
318      .minScalarOrEltIf(
319          [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
320          1, s32)
321      .minScalarOrEltIf(
322          [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
323          s64)
324      .widenScalarOrEltToNextPow2(1);
325
326  getActionDefinitionsBuilder(G_FCMP)
327      .legalFor({{s32, s32}, {s32, s64}})
328      .clampScalar(0, s32, s32)
329      .clampScalar(1, s32, s64)
330      .widenScalarToNextPow2(1);
331
332  // Extensions
333  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
334      .legalIf([=](const LegalityQuery &Query) {
335        unsigned DstSize = Query.Types[0].getSizeInBits();
336
337        // Make sure that we have something that will fit in a register, and
338        // make sure it's a power of 2.
339        if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
340          return false;
341
342        const LLT &SrcTy = Query.Types[1];
343
344        // Special case for s1.
345        if (SrcTy == s1)
346          return true;
347
348        // Make sure we fit in a register otherwise. Don't bother checking that
349        // the source type is below 128 bits. We shouldn't be allowing anything
350        // through which is wider than the destination in the first place.
351        unsigned SrcSize = SrcTy.getSizeInBits();
352        if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
353          return false;
354
355        return true;
356      });
357
358  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
359
360  // FP conversions
361  getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
362      {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
363  getActionDefinitionsBuilder(G_FPEXT).legalFor(
364      {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
365
366  // Conversions
367  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
368      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
369      .clampScalar(0, s32, s64)
370      .widenScalarToNextPow2(0)
371      .clampScalar(1, s32, s64)
372      .widenScalarToNextPow2(1);
373
374  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
375      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
376      .clampScalar(1, s32, s64)
377      .widenScalarToNextPow2(1)
378      .clampScalar(0, s32, s64)
379      .widenScalarToNextPow2(0);
380
381  // Control-flow
382  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
383  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
384
385  // Select
386  // FIXME: We can probably do a bit better than just scalarizing vector
387  // selects.
388  getActionDefinitionsBuilder(G_SELECT)
389      .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
390      .clampScalar(0, s32, s64)
391      .widenScalarToNextPow2(0)
392      .scalarize(0);
393
394  // Pointer-handling
395  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
396  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
397
398  getActionDefinitionsBuilder(G_PTRTOINT)
399      .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
400      .maxScalar(0, s64)
401      .widenScalarToNextPow2(0, /*Min*/ 8);
402
403  getActionDefinitionsBuilder(G_INTTOPTR)
404      .unsupportedIf([&](const LegalityQuery &Query) {
405        return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
406      })
407      .legalFor({{p0, s64}});
408
409  // Casts for 32 and 64-bit width type are just copies.
410  // Same for 128-bit width type, except they are on the FPR bank.
411  getActionDefinitionsBuilder(G_BITCAST)
412      // FIXME: This is wrong since G_BITCAST is not allowed to change the
413      // number of bits but it's what the previous code described and fixing
414      // it breaks tests.
415      .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
416                                 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
417                                 v2p0});
418
419  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
420
421  // va_list must be a pointer, but most sized types are pretty easy to handle
422  // as the destination.
423  getActionDefinitionsBuilder(G_VAARG)
424      .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
425      .clampScalar(0, s8, s64)
426      .widenScalarToNextPow2(0, /*Min*/ 8);
427
428  if (ST.hasLSE()) {
429    getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
430        .lowerIf(all(
431            typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
432            atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
433
434    getActionDefinitionsBuilder(
435        {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
436         G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
437         G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
438        .legalIf(all(
439            typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
440            atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
441  }
442
443  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
444
445  // Merge/Unmerge
446  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
447    unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
448    unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
449
450    auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
451      const LLT &Ty = Query.Types[TypeIdx];
452      if (Ty.isVector()) {
453        const LLT &EltTy = Ty.getElementType();
454        if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
455          return true;
456        if (!isPowerOf2_32(EltTy.getSizeInBits()))
457          return true;
458      }
459      return false;
460    };
461
462    // FIXME: This rule is horrible, but specifies the same as what we had
463    // before with the particularly strange definitions removed (e.g.
464    // s8 = G_MERGE_VALUES s32, s32).
465    // Part of the complexity comes from these ops being extremely flexible. For
466    // example, you can build/decompose vectors with it, concatenate vectors,
467    // etc. and in addition to this you can also bitcast with it at the same
468    // time. We've been considering breaking it up into multiple ops to make it
469    // more manageable throughout the backend.
470    getActionDefinitionsBuilder(Op)
471        // Break up vectors with weird elements into scalars
472        .fewerElementsIf(
473            [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
474            scalarize(0))
475        .fewerElementsIf(
476            [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
477            scalarize(1))
478        // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
479        // or 384.
480        .clampScalar(BigTyIdx, s8, s512)
481        .widenScalarIf(
482            [=](const LegalityQuery &Query) {
483              const LLT &Ty = Query.Types[BigTyIdx];
484              return !isPowerOf2_32(Ty.getSizeInBits()) &&
485                     Ty.getSizeInBits() % 64 != 0;
486            },
487            [=](const LegalityQuery &Query) {
488              // Pick the next power of 2, or a multiple of 64 over 128.
489              // Whichever is smaller.
490              const LLT &Ty = Query.Types[BigTyIdx];
491              unsigned NewSizeInBits = 1
492                                       << Log2_32_Ceil(Ty.getSizeInBits() + 1);
493              if (NewSizeInBits >= 256) {
494                unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
495                if (RoundedTo < NewSizeInBits)
496                  NewSizeInBits = RoundedTo;
497              }
498              return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
499            })
500        // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
501        // worth considering the multiples of 64 since 2*192 and 2*384 are not
502        // valid.
503        .clampScalar(LitTyIdx, s8, s256)
504        .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
505        // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
506        // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
507        // At this point it's simple enough to accept the legal types.
508        .legalIf([=](const LegalityQuery &Query) {
509          const LLT &BigTy = Query.Types[BigTyIdx];
510          const LLT &LitTy = Query.Types[LitTyIdx];
511          if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
512            return false;
513          if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
514            return false;
515          return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
516        })
517        // Any vectors left are the wrong size. Scalarize them.
518      .scalarize(0)
519      .scalarize(1);
520  }
521
522  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
523      .unsupportedIf([=](const LegalityQuery &Query) {
524        const LLT &EltTy = Query.Types[1].getElementType();
525        return Query.Types[0] != EltTy;
526      })
527      .minScalar(2, s64)
528      .legalIf([=](const LegalityQuery &Query) {
529        const LLT &VecTy = Query.Types[1];
530        return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
531               VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
532      });
533
534  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
535      .legalIf([=](const LegalityQuery &Query) {
536        const LLT &VecTy = Query.Types[0];
537        // TODO: Support s8 and s16
538        return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
539      });
540
541  getActionDefinitionsBuilder(G_BUILD_VECTOR)
542      .legalFor({{v4s16, s16},
543                 {v8s16, s16},
544                 {v2s32, s32},
545                 {v4s32, s32},
546                 {v2p0, p0},
547                 {v2s64, s64}})
548      .clampNumElements(0, v4s32, v4s32)
549      .clampNumElements(0, v2s64, v2s64)
550
551      // Deal with larger scalar types, which will be implicitly truncated.
552      .legalIf([=](const LegalityQuery &Query) {
553        return Query.Types[0].getScalarSizeInBits() <
554               Query.Types[1].getSizeInBits();
555      })
556      .minScalarSameAs(1, 0);
557
558  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
559      {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
560      .scalarize(1);
561
562  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
563      .legalIf([=](const LegalityQuery &Query) {
564        const LLT &DstTy = Query.Types[0];
565        const LLT &SrcTy = Query.Types[1];
566        // For now just support the TBL2 variant which needs the source vectors
567        // to be the same size as the dest.
568        if (DstTy != SrcTy)
569          return false;
570        for (auto &Ty : {v2s32, v4s32, v2s64}) {
571          if (DstTy == Ty)
572            return true;
573        }
574        return false;
575      })
576      // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
577      // just want those lowered into G_BUILD_VECTOR
578      .lowerIf([=](const LegalityQuery &Query) {
579        return !Query.Types[1].isVector();
580      })
581      .clampNumElements(0, v4s32, v4s32)
582      .clampNumElements(0, v2s64, v2s64);
583
584  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
585      .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
586
587  getActionDefinitionsBuilder(G_JUMP_TABLE)
588    .legalFor({{p0}, {s64}});
589
590  getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
591    return Query.Types[0] == p0 && Query.Types[1] == s64;
592  });
593
594  computeTables();
595  verify(*ST.getInstrInfo());
596}
597
598bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
599                                          MachineRegisterInfo &MRI,
600                                          MachineIRBuilder &MIRBuilder,
601                                          GISelChangeObserver &Observer) const {
602  switch (MI.getOpcode()) {
603  default:
604    // No idea what to do.
605    return false;
606  case TargetOpcode::G_VAARG:
607    return legalizeVaArg(MI, MRI, MIRBuilder);
608  case TargetOpcode::G_LOAD:
609  case TargetOpcode::G_STORE:
610    return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
611  case TargetOpcode::G_SHL:
612  case TargetOpcode::G_ASHR:
613  case TargetOpcode::G_LSHR:
614    return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
615  }
616
617  llvm_unreachable("expected switch to return");
618}
619
620bool AArch64LegalizerInfo::legalizeShlAshrLshr(
621    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
622    GISelChangeObserver &Observer) const {
623  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
624         MI.getOpcode() == TargetOpcode::G_LSHR ||
625         MI.getOpcode() == TargetOpcode::G_SHL);
626  // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
627  // imported patterns can select it later. Either way, it will be legal.
628  Register AmtReg = MI.getOperand(2).getReg();
629  auto *CstMI = MRI.getVRegDef(AmtReg);
630  assert(CstMI && "expected to find a vreg def");
631  if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
632    return true;
633  // Check the shift amount is in range for an immediate form.
634  unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
635  if (Amount > 31)
636    return true; // This will have to remain a register variant.
637  assert(MRI.getType(AmtReg).getSizeInBits() == 32);
638  MIRBuilder.setInstr(MI);
639  auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
640  MI.getOperand(2).setReg(ExtCst.getReg(0));
641  return true;
642}
643
644bool AArch64LegalizerInfo::legalizeLoadStore(
645    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
646    GISelChangeObserver &Observer) const {
647  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
648         MI.getOpcode() == TargetOpcode::G_LOAD);
649  // Here we just try to handle vector loads/stores where our value type might
650  // have pointer elements, which the SelectionDAG importer can't handle. To
651  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
652  // the value to use s64 types.
653
654  // Custom legalization requires the instruction, if not deleted, must be fully
655  // legalized. In order to allow further legalization of the inst, we create
656  // a new instruction and erase the existing one.
657
658  unsigned ValReg = MI.getOperand(0).getReg();
659  const LLT ValTy = MRI.getType(ValReg);
660
661  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
662      ValTy.getElementType().getAddressSpace() != 0) {
663    LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
664    return false;
665  }
666
667  MIRBuilder.setInstr(MI);
668  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
669  const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
670  auto &MMO = **MI.memoperands_begin();
671  if (MI.getOpcode() == TargetOpcode::G_STORE) {
672    auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
673    MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
674  } else {
675    unsigned NewReg = MRI.createGenericVirtualRegister(NewTy);
676    auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
677    MIRBuilder.buildBitcast({ValReg}, {NewLoad});
678  }
679  MI.eraseFromParent();
680  return true;
681}
682
683bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
684                                         MachineRegisterInfo &MRI,
685                                         MachineIRBuilder &MIRBuilder) const {
686  MIRBuilder.setInstr(MI);
687  MachineFunction &MF = MIRBuilder.getMF();
688  unsigned Align = MI.getOperand(2).getImm();
689  Register Dst = MI.getOperand(0).getReg();
690  Register ListPtr = MI.getOperand(1).getReg();
691
692  LLT PtrTy = MRI.getType(ListPtr);
693  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
694
695  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
696  Register List = MRI.createGenericVirtualRegister(PtrTy);
697  MIRBuilder.buildLoad(
698      List, ListPtr,
699      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
700                               PtrSize, /* Align = */ PtrSize));
701
702  Register DstPtr;
703  if (Align > PtrSize) {
704    // Realign the list to the actual required alignment.
705    auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
706
707    auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0));
708
709    DstPtr = MRI.createGenericVirtualRegister(PtrTy);
710    MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
711  } else
712    DstPtr = List;
713
714  uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
715  MIRBuilder.buildLoad(
716      Dst, DstPtr,
717      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
718                               ValSize, std::max(Align, PtrSize)));
719
720  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
721
722  auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0));
723
724  MIRBuilder.buildStore(
725      NewList, ListPtr,
726      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
727                               PtrSize, /* Align = */ PtrSize));
728
729  MI.eraseFromParent();
730  return true;
731}
732