1//===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Define several functions to decode x86 specific shuffle semantics using
10// constants from the constant pool.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ShuffleDecodeConstantPool.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/IR/Constants.h"
19
20//===----------------------------------------------------------------------===//
21//  Vector Mask Decoding
22//===----------------------------------------------------------------------===//
23
24namespace llvm {
25
26static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
27                                APInt &UndefElts,
28                                SmallVectorImpl<uint64_t> &RawMask) {
29  // It is not an error for shuffle masks to not be a vector of
30  // MaskEltSizeInBits because the constant pool uniques constants by their
31  // bit representation.
32  // e.g. the following take up the same space in the constant pool:
33  //   i128 -170141183420855150465331762880109871104
34  //
35  //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
36  //
37  //   <4 x i32> <i32 -2147483648, i32 -2147483648,
38  //              i32 -2147483648, i32 -2147483648>
39  auto *CstTy = dyn_cast<FixedVectorType>(C->getType());
40  if (!CstTy)
41    return false;
42
43  Type *CstEltTy = CstTy->getElementType();
44  if (!CstEltTy->isIntegerTy())
45    return false;
46
47  unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
48  unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
49  unsigned NumCstElts = CstTy->getNumElements();
50
51  assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
52         "Unaligned shuffle mask size");
53
54  unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
55  UndefElts = APInt(NumMaskElts, 0);
56  RawMask.resize(NumMaskElts, 0);
57
58  // Fast path - if the constants match the mask size then copy direct.
59  if (MaskEltSizeInBits == CstEltSizeInBits) {
60    assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
61    for (unsigned i = 0; i != NumMaskElts; ++i) {
62      Constant *COp = C->getAggregateElement(i);
63      if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
64        return false;
65
66      if (isa<UndefValue>(COp)) {
67        UndefElts.setBit(i);
68        RawMask[i] = 0;
69        continue;
70      }
71
72      auto *Elt = cast<ConstantInt>(COp);
73      RawMask[i] = Elt->getValue().getZExtValue();
74    }
75    return true;
76  }
77
78  // Extract all the undef/constant element data and pack into single bitsets.
79  APInt UndefBits(CstSizeInBits, 0);
80  APInt MaskBits(CstSizeInBits, 0);
81  for (unsigned i = 0; i != NumCstElts; ++i) {
82    Constant *COp = C->getAggregateElement(i);
83    if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
84      return false;
85
86    unsigned BitOffset = i * CstEltSizeInBits;
87
88    if (isa<UndefValue>(COp)) {
89      UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits);
90      continue;
91    }
92
93    MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset);
94  }
95
96  // Now extract the undef/constant bit data into the raw shuffle masks.
97  for (unsigned i = 0; i != NumMaskElts; ++i) {
98    unsigned BitOffset = i * MaskEltSizeInBits;
99    APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);
100
101    // Only treat the element as UNDEF if all bits are UNDEF, otherwise
102    // treat it as zero.
103    if (EltUndef.isAllOnesValue()) {
104      UndefElts.setBit(i);
105      RawMask[i] = 0;
106      continue;
107    }
108
109    APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset);
110    RawMask[i] = EltBits.getZExtValue();
111  }
112
113  return true;
114}
115
116void DecodePSHUFBMask(const Constant *C, unsigned Width,
117                      SmallVectorImpl<int> &ShuffleMask) {
118  assert((Width == 128 || Width == 256 || Width == 512) &&
119         C->getType()->getPrimitiveSizeInBits() >= Width &&
120         "Unexpected vector size.");
121
122  // The shuffle mask requires a byte vector.
123  APInt UndefElts;
124  SmallVector<uint64_t, 64> RawMask;
125  if (!extractConstantMask(C, 8, UndefElts, RawMask))
126    return;
127
128  unsigned NumElts = Width / 8;
129  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
130         "Unexpected number of vector elements.");
131
132  for (unsigned i = 0; i != NumElts; ++i) {
133    if (UndefElts[i]) {
134      ShuffleMask.push_back(SM_SentinelUndef);
135      continue;
136    }
137
138    uint64_t Element = RawMask[i];
139    // If the high bit (7) of the byte is set, the element is zeroed.
140    if (Element & (1 << 7))
141      ShuffleMask.push_back(SM_SentinelZero);
142    else {
143      // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
144      // lane of the vector we're inside.
145      unsigned Base = i & ~0xf;
146
147      // Only the least significant 4 bits of the byte are used.
148      int Index = Base + (Element & 0xf);
149      ShuffleMask.push_back(Index);
150    }
151  }
152}
153
154void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
155                        SmallVectorImpl<int> &ShuffleMask) {
156  assert((Width == 128 || Width == 256 || Width == 512) &&
157         C->getType()->getPrimitiveSizeInBits() >= Width &&
158         "Unexpected vector size.");
159  assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
160
161  // The shuffle mask requires elements the same size as the target.
162  APInt UndefElts;
163  SmallVector<uint64_t, 16> RawMask;
164  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
165    return;
166
167  unsigned NumElts = Width / ElSize;
168  unsigned NumEltsPerLane = 128 / ElSize;
169  assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
170         "Unexpected number of vector elements.");
171
172  for (unsigned i = 0; i != NumElts; ++i) {
173    if (UndefElts[i]) {
174      ShuffleMask.push_back(SM_SentinelUndef);
175      continue;
176    }
177
178    int Index = i & ~(NumEltsPerLane - 1);
179    uint64_t Element = RawMask[i];
180    if (ElSize == 64)
181      Index += (Element >> 1) & 0x1;
182    else
183      Index += Element & 0x3;
184
185    ShuffleMask.push_back(Index);
186  }
187}
188
189void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
190                         unsigned Width, SmallVectorImpl<int> &ShuffleMask) {
191  Type *MaskTy = C->getType();
192  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
193  (void)MaskTySize;
194  assert((MaskTySize == 128 || MaskTySize == 256) && Width >= MaskTySize &&
195         "Unexpected vector size.");
196
197  // The shuffle mask requires elements the same size as the target.
198  APInt UndefElts;
199  SmallVector<uint64_t, 8> RawMask;
200  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
201    return;
202
203  unsigned NumElts = Width / ElSize;
204  unsigned NumEltsPerLane = 128 / ElSize;
205  assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
206         "Unexpected number of vector elements.");
207
208  for (unsigned i = 0; i != NumElts; ++i) {
209    if (UndefElts[i]) {
210      ShuffleMask.push_back(SM_SentinelUndef);
211      continue;
212    }
213
214    // VPERMIL2 Operation.
215    // Bits[3] - Match Bit.
216    // Bits[2:1] - (Per Lane) PD Shuffle Mask.
217    // Bits[2:0] - (Per Lane) PS Shuffle Mask.
218    uint64_t Selector = RawMask[i];
219    unsigned MatchBit = (Selector >> 3) & 0x1;
220
221    // M2Z[0:1]     MatchBit
222    //   0Xb           X        Source selected by Selector index.
223    //   10b           0        Source selected by Selector index.
224    //   10b           1        Zero.
225    //   11b           0        Zero.
226    //   11b           1        Source selected by Selector index.
227    if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
228      ShuffleMask.push_back(SM_SentinelZero);
229      continue;
230    }
231
232    int Index = i & ~(NumEltsPerLane - 1);
233    if (ElSize == 64)
234      Index += (Selector >> 1) & 0x1;
235    else
236      Index += Selector & 0x3;
237
238    int Src = (Selector >> 2) & 0x1;
239    Index += Src * NumElts;
240    ShuffleMask.push_back(Index);
241  }
242}
243
244void DecodeVPPERMMask(const Constant *C, unsigned Width,
245                      SmallVectorImpl<int> &ShuffleMask) {
246  Type *MaskTy = C->getType();
247  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
248  (void)MaskTySize;
249  assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size.");
250
251  // The shuffle mask requires a byte vector.
252  APInt UndefElts;
253  SmallVector<uint64_t, 16> RawMask;
254  if (!extractConstantMask(C, 8, UndefElts, RawMask))
255    return;
256
257  unsigned NumElts = Width / 8;
258  assert(NumElts == 16 && "Unexpected number of vector elements.");
259
260  for (unsigned i = 0; i != NumElts; ++i) {
261    if (UndefElts[i]) {
262      ShuffleMask.push_back(SM_SentinelUndef);
263      continue;
264    }
265
266    // VPPERM Operation
267    // Bits[4:0] - Byte Index (0 - 31)
268    // Bits[7:5] - Permute Operation
269    //
270    // Permute Operation:
271    // 0 - Source byte (no logical operation).
272    // 1 - Invert source byte.
273    // 2 - Bit reverse of source byte.
274    // 3 - Bit reverse of inverted source byte.
275    // 4 - 00h (zero - fill).
276    // 5 - FFh (ones - fill).
277    // 6 - Most significant bit of source byte replicated in all bit positions.
278    // 7 - Invert most significant bit of source byte and replicate in all bit
279    // positions.
280    uint64_t Element = RawMask[i];
281    uint64_t Index = Element & 0x1F;
282    uint64_t PermuteOp = (Element >> 5) & 0x7;
283
284    if (PermuteOp == 4) {
285      ShuffleMask.push_back(SM_SentinelZero);
286      continue;
287    }
288    if (PermuteOp != 0) {
289      ShuffleMask.clear();
290      return;
291    }
292    ShuffleMask.push_back((int)Index);
293  }
294}
295
296void DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width,
297                      SmallVectorImpl<int> &ShuffleMask) {
298  assert((Width == 128 || Width == 256 || Width == 512) &&
299         C->getType()->getPrimitiveSizeInBits() >= Width &&
300         "Unexpected vector size.");
301  assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
302         "Unexpected vector element size.");
303
304  // The shuffle mask requires elements the same size as the target.
305  APInt UndefElts;
306  SmallVector<uint64_t, 64> RawMask;
307  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
308    return;
309
310  unsigned NumElts = Width / ElSize;
311
312  for (unsigned i = 0; i != NumElts; ++i) {
313    if (UndefElts[i]) {
314      ShuffleMask.push_back(SM_SentinelUndef);
315      continue;
316    }
317    int Index = RawMask[i] & (NumElts - 1);
318    ShuffleMask.push_back(Index);
319  }
320}
321
322void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width,
323                       SmallVectorImpl<int> &ShuffleMask) {
324  assert((Width == 128 || Width == 256 || Width == 512) &&
325         C->getType()->getPrimitiveSizeInBits() >= Width &&
326         "Unexpected vector size.");
327  assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
328         "Unexpected vector element size.");
329
330  // The shuffle mask requires elements the same size as the target.
331  APInt UndefElts;
332  SmallVector<uint64_t, 64> RawMask;
333  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
334    return;
335
336  unsigned NumElts = Width / ElSize;
337
338  for (unsigned i = 0; i != NumElts; ++i) {
339    if (UndefElts[i]) {
340      ShuffleMask.push_back(SM_SentinelUndef);
341      continue;
342    }
343    int Index = RawMask[i] & (NumElts*2 - 1);
344    ShuffleMask.push_back(Index);
345  }
346}
347} // llvm namespace
348