1//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// Define several functions to decode x86 specific shuffle semantics into a 10// generic vector mask. 11// 12//===----------------------------------------------------------------------===// 13 14#include "X86ShuffleDecode.h" 15#include "llvm/ADT/ArrayRef.h" 16 17//===----------------------------------------------------------------------===// 18// Vector Mask Decoding 19//===----------------------------------------------------------------------===// 20 21namespace llvm { 22 23void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 24 // Defaults the copying the dest value. 25 ShuffleMask.push_back(0); 26 ShuffleMask.push_back(1); 27 ShuffleMask.push_back(2); 28 ShuffleMask.push_back(3); 29 30 // Decode the immediate. 31 unsigned ZMask = Imm & 15; 32 unsigned CountD = (Imm >> 4) & 3; 33 unsigned CountS = (Imm >> 6) & 3; 34 35 // CountS selects which input element to use. 36 unsigned InVal = 4 + CountS; 37 // CountD specifies which element of destination to update. 38 ShuffleMask[CountD] = InVal; 39 // ZMask zaps values, potentially overriding the CountD elt. 40 if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 41 if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 42 if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 43 if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 44} 45 46void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len, 47 SmallVectorImpl<int> &ShuffleMask) { 48 assert((Idx + Len) <= NumElts && "Insertion out of range"); 49 50 for (unsigned i = 0; i != NumElts; ++i) 51 ShuffleMask.push_back(i); 52 for (unsigned i = 0; i != Len; ++i) 53 ShuffleMask[Idx + i] = NumElts + i; 54} 55 56// <3,1> or <6,7,2,3> 57void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 58 for (unsigned i = NElts / 2; i != NElts; ++i) 59 ShuffleMask.push_back(NElts + i); 60 61 for (unsigned i = NElts / 2; i != NElts; ++i) 62 ShuffleMask.push_back(i); 63} 64 65// <0,2> or <0,1,4,5> 66void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 67 for (unsigned i = 0; i != NElts / 2; ++i) 68 ShuffleMask.push_back(i); 69 70 for (unsigned i = 0; i != NElts / 2; ++i) 71 ShuffleMask.push_back(NElts + i); 72} 73 74void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 75 for (int i = 0, e = NumElts / 2; i < e; ++i) { 76 ShuffleMask.push_back(2 * i); 77 ShuffleMask.push_back(2 * i); 78 } 79} 80 81void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 82 for (int i = 0, e = NumElts / 2; i < e; ++i) { 83 ShuffleMask.push_back(2 * i + 1); 84 ShuffleMask.push_back(2 * i + 1); 85 } 86} 87 88void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 89 const unsigned NumLaneElts = 2; 90 91 for (unsigned l = 0; l < NumElts; l += NumLaneElts) 92 for (unsigned i = 0; i < NumLaneElts; ++i) 93 ShuffleMask.push_back(l); 94} 95 96void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, 97 SmallVectorImpl<int> &ShuffleMask) { 98 const unsigned NumLaneElts = 16; 99 100 for (unsigned l = 0; l < NumElts; l += NumLaneElts) 101 for (unsigned i = 0; i < NumLaneElts; ++i) { 102 int M = SM_SentinelZero; 103 if (i >= Imm) M = i - Imm + l; 104 ShuffleMask.push_back(M); 105 } 106} 107 108void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, 109 SmallVectorImpl<int> &ShuffleMask) { 110 const unsigned NumLaneElts = 16; 111 112 for (unsigned l = 0; l < NumElts; l += NumLaneElts) 113 for (unsigned i = 0; i < NumLaneElts; ++i) { 114 unsigned Base = i + Imm; 115 int M = Base + l; 116 if (Base >= NumLaneElts) M = SM_SentinelZero; 117 ShuffleMask.push_back(M); 118 } 119} 120 121void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, 122 SmallVectorImpl<int> &ShuffleMask) { 123 const unsigned NumLaneElts = 16; 124 125 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 126 for (unsigned i = 0; i != NumLaneElts; ++i) { 127 unsigned Base = i + Imm; 128 // if i+imm is out of this lane then we actually need the other source 129 if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; 130 ShuffleMask.push_back(Base + l); 131 } 132 } 133} 134 135void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, 136 SmallVectorImpl<int> &ShuffleMask) { 137 // Not all bits of the immediate are used so mask it. 138 assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2"); 139 Imm = Imm & (NumElts - 1); 140 for (unsigned i = 0; i != NumElts; ++i) 141 ShuffleMask.push_back(i + Imm); 142} 143 144/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*. 145/// VT indicates the type of the vector allowing it to handle different 146/// datatypes and vector widths. 147void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, 148 SmallVectorImpl<int> &ShuffleMask) { 149 unsigned Size = NumElts * ScalarBits; 150 unsigned NumLanes = Size / 128; 151 if (NumLanes == 0) NumLanes = 1; // Handle MMX 152 unsigned NumLaneElts = NumElts / NumLanes; 153 154 uint32_t SplatImm = (Imm & 0xff) * 0x01010101; 155 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 156 for (unsigned i = 0; i != NumLaneElts; ++i) { 157 ShuffleMask.push_back(SplatImm % NumLaneElts + l); 158 SplatImm /= NumLaneElts; 159 } 160 } 161} 162 163void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, 164 SmallVectorImpl<int> &ShuffleMask) { 165 for (unsigned l = 0; l != NumElts; l += 8) { 166 unsigned NewImm = Imm; 167 for (unsigned i = 0, e = 4; i != e; ++i) { 168 ShuffleMask.push_back(l + i); 169 } 170 for (unsigned i = 4, e = 8; i != e; ++i) { 171 ShuffleMask.push_back(l + 4 + (NewImm & 3)); 172 NewImm >>= 2; 173 } 174 } 175} 176 177void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, 178 SmallVectorImpl<int> &ShuffleMask) { 179 for (unsigned l = 0; l != NumElts; l += 8) { 180 unsigned NewImm = Imm; 181 for (unsigned i = 0, e = 4; i != e; ++i) { 182 ShuffleMask.push_back(l + (NewImm & 3)); 183 NewImm >>= 2; 184 } 185 for (unsigned i = 4, e = 8; i != e; ++i) { 186 ShuffleMask.push_back(l + i); 187 } 188 } 189} 190 191void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 192 unsigned NumHalfElts = NumElts / 2; 193 194 for (unsigned l = 0; l != NumHalfElts; ++l) 195 ShuffleMask.push_back(l + NumHalfElts); 196 for (unsigned h = 0; h != NumHalfElts; ++h) 197 ShuffleMask.push_back(h); 198} 199 200/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates 201/// the type of the vector allowing it to handle different datatypes and vector 202/// widths. 203void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, 204 unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 205 unsigned NumLaneElts = 128 / ScalarBits; 206 207 unsigned NewImm = Imm; 208 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 209 // each half of a lane comes from different source 210 for (unsigned s = 0; s != NumElts * 2; s += NumElts) { 211 for (unsigned i = 0; i != NumLaneElts / 2; ++i) { 212 ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 213 NewImm /= NumLaneElts; 214 } 215 } 216 if (NumLaneElts == 4) NewImm = Imm; // reload imm 217 } 218} 219 220/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd 221/// and punpckh*. VT indicates the type of the vector allowing it to handle 222/// different datatypes and vector widths. 223void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, 224 SmallVectorImpl<int> &ShuffleMask) { 225 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 226 // independently on 128-bit lanes. 227 unsigned NumLanes = (NumElts * ScalarBits) / 128; 228 if (NumLanes == 0) NumLanes = 1; // Handle MMX 229 unsigned NumLaneElts = NumElts / NumLanes; 230 231 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 232 for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) { 233 ShuffleMask.push_back(i); // Reads from dest/src1 234 ShuffleMask.push_back(i + NumElts); // Reads from src/src2 235 } 236 } 237} 238 239/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd 240/// and punpckl*. VT indicates the type of the vector allowing it to handle 241/// different datatypes and vector widths. 242void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, 243 SmallVectorImpl<int> &ShuffleMask) { 244 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 245 // independently on 128-bit lanes. 246 unsigned NumLanes = (NumElts * ScalarBits) / 128; 247 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 248 unsigned NumLaneElts = NumElts / NumLanes; 249 250 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 251 for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) { 252 ShuffleMask.push_back(i); // Reads from dest/src1 253 ShuffleMask.push_back(i + NumElts); // Reads from src/src2 254 } 255 } 256} 257 258/// Decodes a broadcast of the first element of a vector. 259void DecodeVectorBroadcast(unsigned NumElts, 260 SmallVectorImpl<int> &ShuffleMask) { 261 ShuffleMask.append(NumElts, 0); 262} 263 264/// Decodes a broadcast of a subvector to a larger vector type. 265void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts, 266 SmallVectorImpl<int> &ShuffleMask) { 267 unsigned Scale = DstNumElts / SrcNumElts; 268 269 for (unsigned i = 0; i != Scale; ++i) 270 for (unsigned j = 0; j != SrcNumElts; ++j) 271 ShuffleMask.push_back(j); 272} 273 274/// Decode a shuffle packed values at 128-bit granularity 275/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) 276/// immediate mask into a shuffle mask. 277void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, 278 unsigned Imm, 279 SmallVectorImpl<int> &ShuffleMask) { 280 unsigned NumElementsInLane = 128 / ScalarSize; 281 unsigned NumLanes = NumElts / NumElementsInLane; 282 283 for (unsigned l = 0; l != NumElts; l += NumElementsInLane) { 284 unsigned Index = (Imm % NumLanes) * NumElementsInLane; 285 Imm /= NumLanes; // Discard the bits we just used. 286 // We actually need the other source. 287 if (l >= (NumElts / 2)) 288 Index += NumElts; 289 for (unsigned i = 0; i != NumElementsInLane; ++i) 290 ShuffleMask.push_back(Index + i); 291 } 292} 293 294void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, 295 SmallVectorImpl<int> &ShuffleMask) { 296 unsigned HalfSize = NumElts / 2; 297 298 for (unsigned l = 0; l != 2; ++l) { 299 unsigned HalfMask = Imm >> (l * 4); 300 unsigned HalfBegin = (HalfMask & 0x3) * HalfSize; 301 for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i) 302 ShuffleMask.push_back((HalfMask & 8) ? SM_SentinelZero : (int)i); 303 } 304} 305 306void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 307 SmallVectorImpl<int> &ShuffleMask) { 308 for (int i = 0, e = RawMask.size(); i < e; ++i) { 309 uint64_t M = RawMask[i]; 310 if (UndefElts[i]) { 311 ShuffleMask.push_back(SM_SentinelUndef); 312 continue; 313 } 314 // For 256/512-bit vectors the base of the shuffle is the 128-bit 315 // subvector we're inside. 316 int Base = (i / 16) * 16; 317 // If the high bit (7) of the byte is set, the element is zeroed. 318 if (M & (1 << 7)) 319 ShuffleMask.push_back(SM_SentinelZero); 320 else { 321 // Only the least significant 4 bits of the byte are used. 322 int Index = Base + (M & 0xf); 323 ShuffleMask.push_back(Index); 324 } 325 } 326} 327 328void DecodeBLENDMask(unsigned NumElts, unsigned Imm, 329 SmallVectorImpl<int> &ShuffleMask) { 330 for (unsigned i = 0; i < NumElts; ++i) { 331 // If there are more than 8 elements in the vector, then any immediate blend 332 // mask wraps around. 333 unsigned Bit = i % 8; 334 ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElts + i : i); 335 } 336} 337 338void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 339 SmallVectorImpl<int> &ShuffleMask) { 340 assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size"); 341 342 // VPPERM Operation 343 // Bits[4:0] - Byte Index (0 - 31) 344 // Bits[7:5] - Permute Operation 345 // 346 // Permute Operation: 347 // 0 - Source byte (no logical operation). 348 // 1 - Invert source byte. 349 // 2 - Bit reverse of source byte. 350 // 3 - Bit reverse of inverted source byte. 351 // 4 - 00h (zero - fill). 352 // 5 - FFh (ones - fill). 353 // 6 - Most significant bit of source byte replicated in all bit positions. 354 // 7 - Invert most significant bit of source byte and replicate in all bit positions. 355 for (int i = 0, e = RawMask.size(); i < e; ++i) { 356 if (UndefElts[i]) { 357 ShuffleMask.push_back(SM_SentinelUndef); 358 continue; 359 } 360 361 uint64_t M = RawMask[i]; 362 uint64_t PermuteOp = (M >> 5) & 0x7; 363 if (PermuteOp == 4) { 364 ShuffleMask.push_back(SM_SentinelZero); 365 continue; 366 } 367 if (PermuteOp != 0) { 368 ShuffleMask.clear(); 369 return; 370 } 371 372 uint64_t Index = M & 0x1F; 373 ShuffleMask.push_back((int)Index); 374 } 375} 376 377/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. 378void DecodeVPERMMask(unsigned NumElts, unsigned Imm, 379 SmallVectorImpl<int> &ShuffleMask) { 380 for (unsigned l = 0; l != NumElts; l += 4) 381 for (unsigned i = 0; i != 4; ++i) 382 ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3)); 383} 384 385void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, 386 unsigned NumDstElts, bool IsAnyExtend, 387 SmallVectorImpl<int> &Mask) { 388 unsigned Scale = DstScalarBits / SrcScalarBits; 389 assert(SrcScalarBits < DstScalarBits && 390 "Expected zero extension mask to increase scalar size"); 391 392 for (unsigned i = 0; i != NumDstElts; i++) { 393 Mask.push_back(i); 394 for (unsigned j = 1; j != Scale; j++) 395 Mask.push_back(IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero); 396 } 397} 398 399void DecodeZeroMoveLowMask(unsigned NumElts, 400 SmallVectorImpl<int> &ShuffleMask) { 401 ShuffleMask.push_back(0); 402 for (unsigned i = 1; i < NumElts; i++) 403 ShuffleMask.push_back(SM_SentinelZero); 404} 405 406void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, 407 SmallVectorImpl<int> &Mask) { 408 // First element comes from the first element of second source. 409 // Remaining elements: Load zero extends / Move copies from first source. 410 Mask.push_back(NumElts); 411 for (unsigned i = 1; i < NumElts; i++) 412 Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i); 413} 414 415void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, 416 SmallVectorImpl<int> &ShuffleMask) { 417 unsigned HalfElts = NumElts / 2; 418 419 // Only the bottom 6 bits are valid for each immediate. 420 Len &= 0x3F; 421 Idx &= 0x3F; 422 423 // We can only decode this bit extraction instruction as a shuffle if both the 424 // length and index work with whole elements. 425 if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) 426 return; 427 428 // A length of zero is equivalent to a bit length of 64. 429 if (Len == 0) 430 Len = 64; 431 432 // If the length + index exceeds the bottom 64 bits the result is undefined. 433 if ((Len + Idx) > 64) { 434 ShuffleMask.append(NumElts, SM_SentinelUndef); 435 return; 436 } 437 438 // Convert index and index to work with elements. 439 Len /= EltSize; 440 Idx /= EltSize; 441 442 // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining 443 // elements of the lower 64-bits. The upper 64-bits are undefined. 444 for (int i = 0; i != Len; ++i) 445 ShuffleMask.push_back(i + Idx); 446 for (int i = Len; i != (int)HalfElts; ++i) 447 ShuffleMask.push_back(SM_SentinelZero); 448 for (int i = HalfElts; i != (int)NumElts; ++i) 449 ShuffleMask.push_back(SM_SentinelUndef); 450} 451 452void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, 453 SmallVectorImpl<int> &ShuffleMask) { 454 unsigned HalfElts = NumElts / 2; 455 456 // Only the bottom 6 bits are valid for each immediate. 457 Len &= 0x3F; 458 Idx &= 0x3F; 459 460 // We can only decode this bit insertion instruction as a shuffle if both the 461 // length and index work with whole elements. 462 if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) 463 return; 464 465 // A length of zero is equivalent to a bit length of 64. 466 if (Len == 0) 467 Len = 64; 468 469 // If the length + index exceeds the bottom 64 bits the result is undefined. 470 if ((Len + Idx) > 64) { 471 ShuffleMask.append(NumElts, SM_SentinelUndef); 472 return; 473 } 474 475 // Convert index and index to work with elements. 476 Len /= EltSize; 477 Idx /= EltSize; 478 479 // INSERTQ: Extract lowest Len elements from lower half of second source and 480 // insert over first source starting at Idx element. The upper 64-bits are 481 // undefined. 482 for (int i = 0; i != Idx; ++i) 483 ShuffleMask.push_back(i); 484 for (int i = 0; i != Len; ++i) 485 ShuffleMask.push_back(i + NumElts); 486 for (int i = Idx + Len; i != (int)HalfElts; ++i) 487 ShuffleMask.push_back(i); 488 for (int i = HalfElts; i != (int)NumElts; ++i) 489 ShuffleMask.push_back(SM_SentinelUndef); 490} 491 492void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, 493 ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 494 SmallVectorImpl<int> &ShuffleMask) { 495 unsigned VecSize = NumElts * ScalarBits; 496 unsigned NumLanes = VecSize / 128; 497 unsigned NumEltsPerLane = NumElts / NumLanes; 498 assert((VecSize == 128 || VecSize == 256 || VecSize == 512) && 499 "Unexpected vector size"); 500 assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); 501 502 for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { 503 if (UndefElts[i]) { 504 ShuffleMask.push_back(SM_SentinelUndef); 505 continue; 506 } 507 uint64_t M = RawMask[i]; 508 M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3)); 509 unsigned LaneOffset = i & ~(NumEltsPerLane - 1); 510 ShuffleMask.push_back((int)(LaneOffset + M)); 511 } 512} 513 514void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, 515 ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 516 SmallVectorImpl<int> &ShuffleMask) { 517 unsigned VecSize = NumElts * ScalarBits; 518 unsigned NumLanes = VecSize / 128; 519 unsigned NumEltsPerLane = NumElts / NumLanes; 520 assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size"); 521 assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); 522 assert((NumElts == RawMask.size()) && "Unexpected mask size"); 523 524 for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { 525 if (UndefElts[i]) { 526 ShuffleMask.push_back(SM_SentinelUndef); 527 continue; 528 } 529 530 // VPERMIL2 Operation. 531 // Bits[3] - Match Bit. 532 // Bits[2:1] - (Per Lane) PD Shuffle Mask. 533 // Bits[2:0] - (Per Lane) PS Shuffle Mask. 534 uint64_t Selector = RawMask[i]; 535 unsigned MatchBit = (Selector >> 3) & 0x1; 536 537 // M2Z[0:1] MatchBit 538 // 0Xb X Source selected by Selector index. 539 // 10b 0 Source selected by Selector index. 540 // 10b 1 Zero. 541 // 11b 0 Zero. 542 // 11b 1 Source selected by Selector index. 543 if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) { 544 ShuffleMask.push_back(SM_SentinelZero); 545 continue; 546 } 547 548 int Index = i & ~(NumEltsPerLane - 1); 549 if (ScalarBits == 64) 550 Index += (Selector >> 1) & 0x1; 551 else 552 Index += Selector & 0x3; 553 554 int Src = (Selector >> 2) & 0x1; 555 Index += Src * NumElts; 556 ShuffleMask.push_back(Index); 557 } 558} 559 560void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 561 SmallVectorImpl<int> &ShuffleMask) { 562 uint64_t EltMaskSize = RawMask.size() - 1; 563 for (int i = 0, e = RawMask.size(); i != e; ++i) { 564 if (UndefElts[i]) { 565 ShuffleMask.push_back(SM_SentinelUndef); 566 continue; 567 } 568 uint64_t M = RawMask[i]; 569 M &= EltMaskSize; 570 ShuffleMask.push_back((int)M); 571 } 572} 573 574void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 575 SmallVectorImpl<int> &ShuffleMask) { 576 uint64_t EltMaskSize = (RawMask.size() * 2) - 1; 577 for (int i = 0, e = RawMask.size(); i != e; ++i) { 578 if (UndefElts[i]) { 579 ShuffleMask.push_back(SM_SentinelUndef); 580 continue; 581 } 582 uint64_t M = RawMask[i]; 583 M &= EltMaskSize; 584 ShuffleMask.push_back((int)M); 585 } 586} 587 588} // llvm namespace 589