1293248Sdim//===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===// 2293248Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6293248Sdim// 7293248Sdim//===----------------------------------------------------------------------===// 8293248Sdim// 9293248Sdim// Define several functions to decode x86 specific shuffle semantics using 10293248Sdim// constants from the constant pool. 11293248Sdim// 12293248Sdim//===----------------------------------------------------------------------===// 13293248Sdim 14293248Sdim#include "Utils/X86ShuffleDecode.h" 15321369Sdim#include "llvm/ADT/APInt.h" 16293248Sdim#include "llvm/IR/Constants.h" 17293248Sdim 18293248Sdim//===----------------------------------------------------------------------===// 19293248Sdim// Vector Mask Decoding 20293248Sdim//===----------------------------------------------------------------------===// 21293248Sdim 22293248Sdimnamespace llvm { 23293248Sdim 24314564Sdimstatic bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits, 25321369Sdim APInt &UndefElts, 26314564Sdim SmallVectorImpl<uint64_t> &RawMask) { 27314564Sdim // It is not an error for shuffle masks to not be a vector of 28314564Sdim // MaskEltSizeInBits because the constant pool uniques constants by their 29314564Sdim // bit representation. 30293248Sdim // e.g. the following take up the same space in the constant pool: 31293248Sdim // i128 -170141183420855150465331762880109871104 32293248Sdim // 33293248Sdim // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160> 34293248Sdim // 35293248Sdim // <4 x i32> <i32 -2147483648, i32 -2147483648, 36293248Sdim // i32 -2147483648, i32 -2147483648> 37314564Sdim Type *CstTy = C->getType(); 38314564Sdim if (!CstTy->isVectorTy()) 39314564Sdim return false; 40293248Sdim 41314564Sdim Type *CstEltTy = CstTy->getVectorElementType(); 42314564Sdim if (!CstEltTy->isIntegerTy()) 43314564Sdim return false; 44293248Sdim 45314564Sdim unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); 46314564Sdim unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); 47314564Sdim unsigned NumCstElts = CstTy->getVectorNumElements(); 48293248Sdim 49321369Sdim assert((CstSizeInBits % MaskEltSizeInBits) == 0 && 50321369Sdim "Unaligned shuffle mask size"); 51321369Sdim 52321369Sdim unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits; 53321369Sdim UndefElts = APInt(NumMaskElts, 0); 54321369Sdim RawMask.resize(NumMaskElts, 0); 55321369Sdim 56321369Sdim // Fast path - if the constants match the mask size then copy direct. 57321369Sdim if (MaskEltSizeInBits == CstEltSizeInBits) { 58321369Sdim assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size"); 59321369Sdim for (unsigned i = 0; i != NumMaskElts; ++i) { 60321369Sdim Constant *COp = C->getAggregateElement(i); 61321369Sdim if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 62321369Sdim return false; 63321369Sdim 64321369Sdim if (isa<UndefValue>(COp)) { 65321369Sdim UndefElts.setBit(i); 66321369Sdim RawMask[i] = 0; 67321369Sdim continue; 68321369Sdim } 69321369Sdim 70321369Sdim auto *Elt = cast<ConstantInt>(COp); 71321369Sdim RawMask[i] = Elt->getValue().getZExtValue(); 72321369Sdim } 73321369Sdim return true; 74321369Sdim } 75321369Sdim 76314564Sdim // Extract all the undef/constant element data and pack into single bitsets. 77314564Sdim APInt UndefBits(CstSizeInBits, 0); 78314564Sdim APInt MaskBits(CstSizeInBits, 0); 79314564Sdim for (unsigned i = 0; i != NumCstElts; ++i) { 80314564Sdim Constant *COp = C->getAggregateElement(i); 81314564Sdim if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 82314564Sdim return false; 83309124Sdim 84321369Sdim unsigned BitOffset = i * CstEltSizeInBits; 85321369Sdim 86314564Sdim if (isa<UndefValue>(COp)) { 87321369Sdim UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits); 88314564Sdim continue; 89314564Sdim } 90314564Sdim 91321369Sdim MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset); 92314564Sdim } 93314564Sdim 94314564Sdim // Now extract the undef/constant bit data into the raw shuffle masks. 95314564Sdim for (unsigned i = 0; i != NumMaskElts; ++i) { 96321369Sdim unsigned BitOffset = i * MaskEltSizeInBits; 97321369Sdim APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset); 98314564Sdim 99314564Sdim // Only treat the element as UNDEF if all bits are UNDEF, otherwise 100314564Sdim // treat it as zero. 101314564Sdim if (EltUndef.isAllOnesValue()) { 102321369Sdim UndefElts.setBit(i); 103314564Sdim RawMask[i] = 0; 104314564Sdim continue; 105314564Sdim } 106314564Sdim 107321369Sdim APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset); 108314564Sdim RawMask[i] = EltBits.getZExtValue(); 109314564Sdim } 110314564Sdim 111314564Sdim return true; 112314564Sdim} 113314564Sdim 114344779Sdimvoid DecodePSHUFBMask(const Constant *C, unsigned Width, 115344779Sdim SmallVectorImpl<int> &ShuffleMask) { 116344779Sdim assert((Width == 128 || Width == 256 || Width == 512) && 117344779Sdim C->getType()->getPrimitiveSizeInBits() >= Width && 118314564Sdim "Unexpected vector size."); 119314564Sdim 120314564Sdim // The shuffle mask requires a byte vector. 121321369Sdim APInt UndefElts; 122321369Sdim SmallVector<uint64_t, 64> RawMask; 123314564Sdim if (!extractConstantMask(C, 8, UndefElts, RawMask)) 124309124Sdim return; 125309124Sdim 126344779Sdim unsigned NumElts = Width / 8; 127314564Sdim assert((NumElts == 16 || NumElts == 32 || NumElts == 64) && 128314564Sdim "Unexpected number of vector elements."); 129309124Sdim 130314564Sdim for (unsigned i = 0; i != NumElts; ++i) { 131314564Sdim if (UndefElts[i]) { 132314564Sdim ShuffleMask.push_back(SM_SentinelUndef); 133309124Sdim continue; 134309124Sdim } 135309124Sdim 136314564Sdim uint64_t Element = RawMask[i]; 137314564Sdim // If the high bit (7) of the byte is set, the element is zeroed. 138314564Sdim if (Element & (1 << 7)) 139314564Sdim ShuffleMask.push_back(SM_SentinelZero); 140314564Sdim else { 141293248Sdim // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte 142293248Sdim // lane of the vector we're inside. 143314564Sdim unsigned Base = i & ~0xf; 144309124Sdim 145314564Sdim // Only the least significant 4 bits of the byte are used. 146314564Sdim int Index = Base + (Element & 0xf); 147314564Sdim ShuffleMask.push_back(Index); 148293248Sdim } 149293248Sdim } 150293248Sdim} 151293248Sdim 152344779Sdimvoid DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width, 153293248Sdim SmallVectorImpl<int> &ShuffleMask) { 154344779Sdim assert((Width == 128 || Width == 256 || Width == 512) && 155344779Sdim C->getType()->getPrimitiveSizeInBits() >= Width && 156314564Sdim "Unexpected vector size."); 157314564Sdim assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size."); 158293248Sdim 159314564Sdim // The shuffle mask requires elements the same size as the target. 160321369Sdim APInt UndefElts; 161321369Sdim SmallVector<uint64_t, 16> RawMask; 162314564Sdim if (!extractConstantMask(C, ElSize, UndefElts, RawMask)) 163293248Sdim return; 164293248Sdim 165344779Sdim unsigned NumElts = Width / ElSize; 166314564Sdim unsigned NumEltsPerLane = 128 / ElSize; 167314564Sdim assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) && 168293248Sdim "Unexpected number of vector elements."); 169293248Sdim 170314564Sdim for (unsigned i = 0; i != NumElts; ++i) { 171314564Sdim if (UndefElts[i]) { 172293248Sdim ShuffleMask.push_back(SM_SentinelUndef); 173293248Sdim continue; 174293248Sdim } 175314564Sdim 176314564Sdim int Index = i & ~(NumEltsPerLane - 1); 177314564Sdim uint64_t Element = RawMask[i]; 178293248Sdim if (ElSize == 64) 179293248Sdim Index += (Element >> 1) & 0x1; 180293248Sdim else 181293248Sdim Index += Element & 0x3; 182314564Sdim 183293248Sdim ShuffleMask.push_back(Index); 184293248Sdim } 185293248Sdim} 186293248Sdim 187309124Sdimvoid DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize, 188344779Sdim unsigned Width, 189309124Sdim SmallVectorImpl<int> &ShuffleMask) { 190309124Sdim Type *MaskTy = C->getType(); 191309124Sdim unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); 192314564Sdim (void)MaskTySize; 193344779Sdim assert((MaskTySize == 128 || MaskTySize == 256) && 194344779Sdim Width >= MaskTySize && "Unexpected vector size."); 195309124Sdim 196314564Sdim // The shuffle mask requires elements the same size as the target. 197321369Sdim APInt UndefElts; 198314564Sdim SmallVector<uint64_t, 8> RawMask; 199314564Sdim if (!extractConstantMask(C, ElSize, UndefElts, RawMask)) 200309124Sdim return; 201309124Sdim 202344779Sdim unsigned NumElts = Width / ElSize; 203314564Sdim unsigned NumEltsPerLane = 128 / ElSize; 204314564Sdim assert((NumElts == 2 || NumElts == 4 || NumElts == 8) && 205309124Sdim "Unexpected number of vector elements."); 206309124Sdim 207314564Sdim for (unsigned i = 0; i != NumElts; ++i) { 208314564Sdim if (UndefElts[i]) { 209309124Sdim ShuffleMask.push_back(SM_SentinelUndef); 210309124Sdim continue; 211309124Sdim } 212309124Sdim 213309124Sdim // VPERMIL2 Operation. 214309124Sdim // Bits[3] - Match Bit. 215309124Sdim // Bits[2:1] - (Per Lane) PD Shuffle Mask. 216309124Sdim // Bits[2:0] - (Per Lane) PS Shuffle Mask. 217314564Sdim uint64_t Selector = RawMask[i]; 218309124Sdim unsigned MatchBit = (Selector >> 3) & 0x1; 219309124Sdim 220309124Sdim // M2Z[0:1] MatchBit 221309124Sdim // 0Xb X Source selected by Selector index. 222309124Sdim // 10b 0 Source selected by Selector index. 223309124Sdim // 10b 1 Zero. 224309124Sdim // 11b 0 Zero. 225309124Sdim // 11b 1 Source selected by Selector index. 226309124Sdim if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) { 227309124Sdim ShuffleMask.push_back(SM_SentinelZero); 228309124Sdim continue; 229309124Sdim } 230309124Sdim 231314564Sdim int Index = i & ~(NumEltsPerLane - 1); 232309124Sdim if (ElSize == 64) 233309124Sdim Index += (Selector >> 1) & 0x1; 234309124Sdim else 235309124Sdim Index += Selector & 0x3; 236309124Sdim 237309124Sdim int Src = (Selector >> 2) & 0x1; 238314564Sdim Index += Src * NumElts; 239309124Sdim ShuffleMask.push_back(Index); 240309124Sdim } 241309124Sdim} 242309124Sdim 243344779Sdimvoid DecodeVPPERMMask(const Constant *C, unsigned Width, 244344779Sdim SmallVectorImpl<int> &ShuffleMask) { 245344779Sdim Type *MaskTy = C->getType(); 246344779Sdim unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); 247344779Sdim (void)MaskTySize; 248344779Sdim assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size."); 249309124Sdim 250314564Sdim // The shuffle mask requires a byte vector. 251321369Sdim APInt UndefElts; 252321369Sdim SmallVector<uint64_t, 16> RawMask; 253314564Sdim if (!extractConstantMask(C, 8, UndefElts, RawMask)) 254309124Sdim return; 255309124Sdim 256344779Sdim unsigned NumElts = Width / 8; 257314564Sdim assert(NumElts == 16 && "Unexpected number of vector elements."); 258309124Sdim 259314564Sdim for (unsigned i = 0; i != NumElts; ++i) { 260314564Sdim if (UndefElts[i]) { 261314564Sdim ShuffleMask.push_back(SM_SentinelUndef); 262309124Sdim continue; 263309124Sdim } 264309124Sdim 265309124Sdim // VPPERM Operation 266309124Sdim // Bits[4:0] - Byte Index (0 - 31) 267309124Sdim // Bits[7:5] - Permute Operation 268309124Sdim // 269309124Sdim // Permute Operation: 270309124Sdim // 0 - Source byte (no logical operation). 271309124Sdim // 1 - Invert source byte. 272309124Sdim // 2 - Bit reverse of source byte. 273309124Sdim // 3 - Bit reverse of inverted source byte. 274309124Sdim // 4 - 00h (zero - fill). 275309124Sdim // 5 - FFh (ones - fill). 276309124Sdim // 6 - Most significant bit of source byte replicated in all bit positions. 277314564Sdim // 7 - Invert most significant bit of source byte and replicate in all bit 278314564Sdim // positions. 279314564Sdim uint64_t Element = RawMask[i]; 280314564Sdim uint64_t Index = Element & 0x1F; 281314564Sdim uint64_t PermuteOp = (Element >> 5) & 0x7; 282309124Sdim 283314564Sdim if (PermuteOp == 4) { 284314564Sdim ShuffleMask.push_back(SM_SentinelZero); 285314564Sdim continue; 286309124Sdim } 287314564Sdim if (PermuteOp != 0) { 288314564Sdim ShuffleMask.clear(); 289314564Sdim return; 290314564Sdim } 291314564Sdim ShuffleMask.push_back((int)Index); 292309124Sdim } 293309124Sdim} 294309124Sdim 295344779Sdimvoid DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width, 296293248Sdim SmallVectorImpl<int> &ShuffleMask) { 297344779Sdim assert((Width == 128 || Width == 256 || Width == 512) && 298344779Sdim C->getType()->getPrimitiveSizeInBits() >= Width && 299314564Sdim "Unexpected vector size."); 300314564Sdim assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) && 301314564Sdim "Unexpected vector element size."); 302314564Sdim 303314564Sdim // The shuffle mask requires elements the same size as the target. 304321369Sdim APInt UndefElts; 305321369Sdim SmallVector<uint64_t, 64> RawMask; 306314564Sdim if (!extractConstantMask(C, ElSize, UndefElts, RawMask)) 307314564Sdim return; 308314564Sdim 309344779Sdim unsigned NumElts = Width / ElSize; 310314564Sdim 311314564Sdim for (unsigned i = 0; i != NumElts; ++i) { 312314564Sdim if (UndefElts[i]) { 313314564Sdim ShuffleMask.push_back(SM_SentinelUndef); 314314564Sdim continue; 315293248Sdim } 316314564Sdim int Index = RawMask[i] & (NumElts - 1); 317314564Sdim ShuffleMask.push_back(Index); 318293248Sdim } 319293248Sdim} 320293248Sdim 321344779Sdimvoid DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width, 322293248Sdim SmallVectorImpl<int> &ShuffleMask) { 323344779Sdim assert((Width == 128 || Width == 256 || Width == 512) && 324344779Sdim C->getType()->getPrimitiveSizeInBits() >= Width && 325314564Sdim "Unexpected vector size."); 326314564Sdim assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) && 327314564Sdim "Unexpected vector element size."); 328314564Sdim 329314564Sdim // The shuffle mask requires elements the same size as the target. 330321369Sdim APInt UndefElts; 331321369Sdim SmallVector<uint64_t, 64> RawMask; 332314564Sdim if (!extractConstantMask(C, ElSize, UndefElts, RawMask)) 333314564Sdim return; 334314564Sdim 335344779Sdim unsigned NumElts = Width / ElSize; 336314564Sdim 337314564Sdim for (unsigned i = 0; i != NumElts; ++i) { 338314564Sdim if (UndefElts[i]) { 339314564Sdim ShuffleMask.push_back(SM_SentinelUndef); 340314564Sdim continue; 341293248Sdim } 342314564Sdim int Index = RawMask[i] & (NumElts*2 - 1); 343314564Sdim ShuffleMask.push_back(Index); 344293248Sdim } 345293248Sdim} 346293248Sdim} // llvm namespace 347