1293248Sdim//===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
2293248Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6293248Sdim//
7293248Sdim//===----------------------------------------------------------------------===//
8293248Sdim//
9293248Sdim// Define several functions to decode x86 specific shuffle semantics using
10293248Sdim// constants from the constant pool.
11293248Sdim//
12293248Sdim//===----------------------------------------------------------------------===//
13293248Sdim
14293248Sdim#include "Utils/X86ShuffleDecode.h"
15321369Sdim#include "llvm/ADT/APInt.h"
16293248Sdim#include "llvm/IR/Constants.h"
17293248Sdim
18293248Sdim//===----------------------------------------------------------------------===//
19293248Sdim//  Vector Mask Decoding
20293248Sdim//===----------------------------------------------------------------------===//
21293248Sdim
22293248Sdimnamespace llvm {
23293248Sdim
24314564Sdimstatic bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
25321369Sdim                                APInt &UndefElts,
26314564Sdim                                SmallVectorImpl<uint64_t> &RawMask) {
27314564Sdim  // It is not an error for shuffle masks to not be a vector of
28314564Sdim  // MaskEltSizeInBits because the constant pool uniques constants by their
29314564Sdim  // bit representation.
30293248Sdim  // e.g. the following take up the same space in the constant pool:
31293248Sdim  //   i128 -170141183420855150465331762880109871104
32293248Sdim  //
33293248Sdim  //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
34293248Sdim  //
35293248Sdim  //   <4 x i32> <i32 -2147483648, i32 -2147483648,
36293248Sdim  //              i32 -2147483648, i32 -2147483648>
37314564Sdim  Type *CstTy = C->getType();
38314564Sdim  if (!CstTy->isVectorTy())
39314564Sdim    return false;
40293248Sdim
41314564Sdim  Type *CstEltTy = CstTy->getVectorElementType();
42314564Sdim  if (!CstEltTy->isIntegerTy())
43314564Sdim    return false;
44293248Sdim
45314564Sdim  unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
46314564Sdim  unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
47314564Sdim  unsigned NumCstElts = CstTy->getVectorNumElements();
48293248Sdim
49321369Sdim  assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
50321369Sdim         "Unaligned shuffle mask size");
51321369Sdim
52321369Sdim  unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
53321369Sdim  UndefElts = APInt(NumMaskElts, 0);
54321369Sdim  RawMask.resize(NumMaskElts, 0);
55321369Sdim
56321369Sdim  // Fast path - if the constants match the mask size then copy direct.
57321369Sdim  if (MaskEltSizeInBits == CstEltSizeInBits) {
58321369Sdim    assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
59321369Sdim    for (unsigned i = 0; i != NumMaskElts; ++i) {
60321369Sdim      Constant *COp = C->getAggregateElement(i);
61321369Sdim      if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
62321369Sdim        return false;
63321369Sdim
64321369Sdim      if (isa<UndefValue>(COp)) {
65321369Sdim        UndefElts.setBit(i);
66321369Sdim        RawMask[i] = 0;
67321369Sdim        continue;
68321369Sdim      }
69321369Sdim
70321369Sdim      auto *Elt = cast<ConstantInt>(COp);
71321369Sdim      RawMask[i] = Elt->getValue().getZExtValue();
72321369Sdim    }
73321369Sdim    return true;
74321369Sdim  }
75321369Sdim
76314564Sdim  // Extract all the undef/constant element data and pack into single bitsets.
77314564Sdim  APInt UndefBits(CstSizeInBits, 0);
78314564Sdim  APInt MaskBits(CstSizeInBits, 0);
79314564Sdim  for (unsigned i = 0; i != NumCstElts; ++i) {
80314564Sdim    Constant *COp = C->getAggregateElement(i);
81314564Sdim    if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
82314564Sdim      return false;
83309124Sdim
84321369Sdim    unsigned BitOffset = i * CstEltSizeInBits;
85321369Sdim
86314564Sdim    if (isa<UndefValue>(COp)) {
87321369Sdim      UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits);
88314564Sdim      continue;
89314564Sdim    }
90314564Sdim
91321369Sdim    MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset);
92314564Sdim  }
93314564Sdim
94314564Sdim  // Now extract the undef/constant bit data into the raw shuffle masks.
95314564Sdim  for (unsigned i = 0; i != NumMaskElts; ++i) {
96321369Sdim    unsigned BitOffset = i * MaskEltSizeInBits;
97321369Sdim    APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);
98314564Sdim
99314564Sdim    // Only treat the element as UNDEF if all bits are UNDEF, otherwise
100314564Sdim    // treat it as zero.
101314564Sdim    if (EltUndef.isAllOnesValue()) {
102321369Sdim      UndefElts.setBit(i);
103314564Sdim      RawMask[i] = 0;
104314564Sdim      continue;
105314564Sdim    }
106314564Sdim
107321369Sdim    APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset);
108314564Sdim    RawMask[i] = EltBits.getZExtValue();
109314564Sdim  }
110314564Sdim
111314564Sdim  return true;
112314564Sdim}
113314564Sdim
114344779Sdimvoid DecodePSHUFBMask(const Constant *C, unsigned Width,
115344779Sdim                      SmallVectorImpl<int> &ShuffleMask) {
116344779Sdim  assert((Width == 128 || Width == 256 || Width == 512) &&
117344779Sdim         C->getType()->getPrimitiveSizeInBits() >= Width &&
118314564Sdim         "Unexpected vector size.");
119314564Sdim
120314564Sdim  // The shuffle mask requires a byte vector.
121321369Sdim  APInt UndefElts;
122321369Sdim  SmallVector<uint64_t, 64> RawMask;
123314564Sdim  if (!extractConstantMask(C, 8, UndefElts, RawMask))
124309124Sdim    return;
125309124Sdim
126344779Sdim  unsigned NumElts = Width / 8;
127314564Sdim  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
128314564Sdim         "Unexpected number of vector elements.");
129309124Sdim
130314564Sdim  for (unsigned i = 0; i != NumElts; ++i) {
131314564Sdim    if (UndefElts[i]) {
132314564Sdim      ShuffleMask.push_back(SM_SentinelUndef);
133309124Sdim      continue;
134309124Sdim    }
135309124Sdim
136314564Sdim    uint64_t Element = RawMask[i];
137314564Sdim    // If the high bit (7) of the byte is set, the element is zeroed.
138314564Sdim    if (Element & (1 << 7))
139314564Sdim      ShuffleMask.push_back(SM_SentinelZero);
140314564Sdim    else {
141293248Sdim      // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
142293248Sdim      // lane of the vector we're inside.
143314564Sdim      unsigned Base = i & ~0xf;
144309124Sdim
145314564Sdim      // Only the least significant 4 bits of the byte are used.
146314564Sdim      int Index = Base + (Element & 0xf);
147314564Sdim      ShuffleMask.push_back(Index);
148293248Sdim    }
149293248Sdim  }
150293248Sdim}
151293248Sdim
152344779Sdimvoid DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
153293248Sdim                        SmallVectorImpl<int> &ShuffleMask) {
154344779Sdim  assert((Width == 128 || Width == 256 || Width == 512) &&
155344779Sdim         C->getType()->getPrimitiveSizeInBits() >= Width &&
156314564Sdim         "Unexpected vector size.");
157314564Sdim  assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
158293248Sdim
159314564Sdim  // The shuffle mask requires elements the same size as the target.
160321369Sdim  APInt UndefElts;
161321369Sdim  SmallVector<uint64_t, 16> RawMask;
162314564Sdim  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
163293248Sdim    return;
164293248Sdim
165344779Sdim  unsigned NumElts = Width / ElSize;
166314564Sdim  unsigned NumEltsPerLane = 128 / ElSize;
167314564Sdim  assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
168293248Sdim         "Unexpected number of vector elements.");
169293248Sdim
170314564Sdim  for (unsigned i = 0; i != NumElts; ++i) {
171314564Sdim    if (UndefElts[i]) {
172293248Sdim      ShuffleMask.push_back(SM_SentinelUndef);
173293248Sdim      continue;
174293248Sdim    }
175314564Sdim
176314564Sdim    int Index = i & ~(NumEltsPerLane - 1);
177314564Sdim    uint64_t Element = RawMask[i];
178293248Sdim    if (ElSize == 64)
179293248Sdim      Index += (Element >> 1) & 0x1;
180293248Sdim    else
181293248Sdim      Index += Element & 0x3;
182314564Sdim
183293248Sdim    ShuffleMask.push_back(Index);
184293248Sdim  }
185293248Sdim}
186293248Sdim
187309124Sdimvoid DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
188344779Sdim                         unsigned Width,
189309124Sdim                         SmallVectorImpl<int> &ShuffleMask) {
190309124Sdim  Type *MaskTy = C->getType();
191309124Sdim  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
192314564Sdim  (void)MaskTySize;
193344779Sdim  assert((MaskTySize == 128 || MaskTySize == 256) &&
194344779Sdim         Width >= MaskTySize && "Unexpected vector size.");
195309124Sdim
196314564Sdim  // The shuffle mask requires elements the same size as the target.
197321369Sdim  APInt UndefElts;
198314564Sdim  SmallVector<uint64_t, 8> RawMask;
199314564Sdim  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
200309124Sdim    return;
201309124Sdim
202344779Sdim  unsigned NumElts = Width / ElSize;
203314564Sdim  unsigned NumEltsPerLane = 128 / ElSize;
204314564Sdim  assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
205309124Sdim         "Unexpected number of vector elements.");
206309124Sdim
207314564Sdim  for (unsigned i = 0; i != NumElts; ++i) {
208314564Sdim    if (UndefElts[i]) {
209309124Sdim      ShuffleMask.push_back(SM_SentinelUndef);
210309124Sdim      continue;
211309124Sdim    }
212309124Sdim
213309124Sdim    // VPERMIL2 Operation.
214309124Sdim    // Bits[3] - Match Bit.
215309124Sdim    // Bits[2:1] - (Per Lane) PD Shuffle Mask.
216309124Sdim    // Bits[2:0] - (Per Lane) PS Shuffle Mask.
217314564Sdim    uint64_t Selector = RawMask[i];
218309124Sdim    unsigned MatchBit = (Selector >> 3) & 0x1;
219309124Sdim
220309124Sdim    // M2Z[0:1]     MatchBit
221309124Sdim    //   0Xb           X        Source selected by Selector index.
222309124Sdim    //   10b           0        Source selected by Selector index.
223309124Sdim    //   10b           1        Zero.
224309124Sdim    //   11b           0        Zero.
225309124Sdim    //   11b           1        Source selected by Selector index.
226309124Sdim    if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
227309124Sdim      ShuffleMask.push_back(SM_SentinelZero);
228309124Sdim      continue;
229309124Sdim    }
230309124Sdim
231314564Sdim    int Index = i & ~(NumEltsPerLane - 1);
232309124Sdim    if (ElSize == 64)
233309124Sdim      Index += (Selector >> 1) & 0x1;
234309124Sdim    else
235309124Sdim      Index += Selector & 0x3;
236309124Sdim
237309124Sdim    int Src = (Selector >> 2) & 0x1;
238314564Sdim    Index += Src * NumElts;
239309124Sdim    ShuffleMask.push_back(Index);
240309124Sdim  }
241309124Sdim}
242309124Sdim
243344779Sdimvoid DecodeVPPERMMask(const Constant *C, unsigned Width,
244344779Sdim                      SmallVectorImpl<int> &ShuffleMask) {
245344779Sdim  Type *MaskTy = C->getType();
246344779Sdim  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
247344779Sdim  (void)MaskTySize;
248344779Sdim  assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size.");
249309124Sdim
250314564Sdim  // The shuffle mask requires a byte vector.
251321369Sdim  APInt UndefElts;
252321369Sdim  SmallVector<uint64_t, 16> RawMask;
253314564Sdim  if (!extractConstantMask(C, 8, UndefElts, RawMask))
254309124Sdim    return;
255309124Sdim
256344779Sdim  unsigned NumElts = Width / 8;
257314564Sdim  assert(NumElts == 16 && "Unexpected number of vector elements.");
258309124Sdim
259314564Sdim  for (unsigned i = 0; i != NumElts; ++i) {
260314564Sdim    if (UndefElts[i]) {
261314564Sdim      ShuffleMask.push_back(SM_SentinelUndef);
262309124Sdim      continue;
263309124Sdim    }
264309124Sdim
265309124Sdim    // VPPERM Operation
266309124Sdim    // Bits[4:0] - Byte Index (0 - 31)
267309124Sdim    // Bits[7:5] - Permute Operation
268309124Sdim    //
269309124Sdim    // Permute Operation:
270309124Sdim    // 0 - Source byte (no logical operation).
271309124Sdim    // 1 - Invert source byte.
272309124Sdim    // 2 - Bit reverse of source byte.
273309124Sdim    // 3 - Bit reverse of inverted source byte.
274309124Sdim    // 4 - 00h (zero - fill).
275309124Sdim    // 5 - FFh (ones - fill).
276309124Sdim    // 6 - Most significant bit of source byte replicated in all bit positions.
277314564Sdim    // 7 - Invert most significant bit of source byte and replicate in all bit
278314564Sdim    // positions.
279314564Sdim    uint64_t Element = RawMask[i];
280314564Sdim    uint64_t Index = Element & 0x1F;
281314564Sdim    uint64_t PermuteOp = (Element >> 5) & 0x7;
282309124Sdim
283314564Sdim    if (PermuteOp == 4) {
284314564Sdim      ShuffleMask.push_back(SM_SentinelZero);
285314564Sdim      continue;
286309124Sdim    }
287314564Sdim    if (PermuteOp != 0) {
288314564Sdim      ShuffleMask.clear();
289314564Sdim      return;
290314564Sdim    }
291314564Sdim    ShuffleMask.push_back((int)Index);
292309124Sdim  }
293309124Sdim}
294309124Sdim
295344779Sdimvoid DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width,
296293248Sdim                      SmallVectorImpl<int> &ShuffleMask) {
297344779Sdim  assert((Width == 128 || Width == 256 || Width == 512) &&
298344779Sdim         C->getType()->getPrimitiveSizeInBits() >= Width &&
299314564Sdim         "Unexpected vector size.");
300314564Sdim  assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
301314564Sdim         "Unexpected vector element size.");
302314564Sdim
303314564Sdim  // The shuffle mask requires elements the same size as the target.
304321369Sdim  APInt UndefElts;
305321369Sdim  SmallVector<uint64_t, 64> RawMask;
306314564Sdim  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
307314564Sdim    return;
308314564Sdim
309344779Sdim  unsigned NumElts = Width / ElSize;
310314564Sdim
311314564Sdim  for (unsigned i = 0; i != NumElts; ++i) {
312314564Sdim    if (UndefElts[i]) {
313314564Sdim      ShuffleMask.push_back(SM_SentinelUndef);
314314564Sdim      continue;
315293248Sdim    }
316314564Sdim    int Index = RawMask[i] & (NumElts - 1);
317314564Sdim    ShuffleMask.push_back(Index);
318293248Sdim  }
319293248Sdim}
320293248Sdim
321344779Sdimvoid DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width,
322293248Sdim                       SmallVectorImpl<int> &ShuffleMask) {
323344779Sdim  assert((Width == 128 || Width == 256 || Width == 512) &&
324344779Sdim         C->getType()->getPrimitiveSizeInBits() >= Width &&
325314564Sdim         "Unexpected vector size.");
326314564Sdim  assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
327314564Sdim         "Unexpected vector element size.");
328314564Sdim
329314564Sdim  // The shuffle mask requires elements the same size as the target.
330321369Sdim  APInt UndefElts;
331321369Sdim  SmallVector<uint64_t, 64> RawMask;
332314564Sdim  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
333314564Sdim    return;
334314564Sdim
335344779Sdim  unsigned NumElts = Width / ElSize;
336314564Sdim
337314564Sdim  for (unsigned i = 0; i != NumElts; ++i) {
338314564Sdim    if (UndefElts[i]) {
339314564Sdim      ShuffleMask.push_back(SM_SentinelUndef);
340314564Sdim      continue;
341293248Sdim    }
342314564Sdim    int Index = RawMask[i] & (NumElts*2 - 1);
343314564Sdim    ShuffleMask.push_back(Index);
344293248Sdim  }
345293248Sdim}
346293248Sdim} // llvm namespace
347