X86ShuffleDecode.cpp revision 218885
1218893Sdim//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===// 2193326Sed// 3193326Sed// The LLVM Compiler Infrastructure 4193326Sed// 5193326Sed// This file is distributed under the University of Illinois Open Source 6193326Sed// License. See LICENSE.TXT for details. 7193326Sed// 8193326Sed//===----------------------------------------------------------------------===// 9193326Sed// 10193326Sed// Define several functions to decode x86 specific shuffle semantics into a 11193326Sed// generic vector mask. 12193326Sed// 13193326Sed//===----------------------------------------------------------------------===// 14199512Srdivacky 15193326Sed#include "X86ShuffleDecode.h" 16226633Sdim 17261991Sdim//===----------------------------------------------------------------------===// 18261991Sdim// Vector Mask Decoding 19193326Sed//===----------------------------------------------------------------------===// 20226633Sdim 21193326Sednamespace llvm { 22226633Sdim 23261991Sdimvoid DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { 24193326Sed // Defaults the copying the dest value. 25210299Sed ShuffleMask.push_back(0); 26210299Sed ShuffleMask.push_back(1); 27296417Sdim ShuffleMask.push_back(2); 28296417Sdim ShuffleMask.push_back(3); 29296417Sdim 30276479Sdim // Decode the immediate. 31193326Sed unsigned ZMask = Imm & 15; 32198092Srdivacky unsigned CountD = (Imm >> 4) & 3; 33210299Sed unsigned CountS = (Imm >> 6) & 3; 34193326Sed 35198092Srdivacky // CountS selects which input element to use. 36193326Sed unsigned InVal = 4+CountS; 37198092Srdivacky // CountD specifies which element of destination to update. 38198092Srdivacky ShuffleMask[CountD] = InVal; 39198092Srdivacky // ZMask zaps values, potentially overriding the CountD elt. 40210299Sed if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 41210299Sed if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 42193326Sed if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 43226633Sdim if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 44226633Sdim} 45226633Sdim 46226633Sdim// <3,1> or <6,7,2,3> 47226633Sdimvoid DecodeMOVHLPSMask(unsigned NElts, 48226633Sdim SmallVectorImpl<unsigned> &ShuffleMask) { 49193326Sed for (unsigned i = NElts/2; i != NElts; ++i) 50193326Sed ShuffleMask.push_back(NElts+i); 51198092Srdivacky 52198092Srdivacky for (unsigned i = NElts/2; i != NElts; ++i) 53193326Sed ShuffleMask.push_back(i); 54193326Sed} 55193326Sed 56198092Srdivacky// <0,2> or <0,1,4,5> 57210299Sedvoid DecodeMOVLHPSMask(unsigned NElts, 58210299Sed SmallVectorImpl<unsigned> &ShuffleMask) { 59210299Sed for (unsigned i = 0; i != NElts/2; ++i) 60210299Sed ShuffleMask.push_back(i); 61210299Sed 62193326Sed for (unsigned i = 0; i != NElts/2; ++i) 63193326Sed ShuffleMask.push_back(NElts+i); 64193326Sed} 65193326Sed 66261991Sdimvoid DecodePSHUFMask(unsigned NElts, unsigned Imm, 67261991Sdim SmallVectorImpl<unsigned> &ShuffleMask) { 68261991Sdim for (unsigned i = 0; i != NElts; ++i) { 69261991Sdim ShuffleMask.push_back(Imm % NElts); 70261991Sdim Imm /= NElts; 71261991Sdim } 72193326Sed} 73261991Sdim 74261991Sdimvoid DecodePSHUFHWMask(unsigned Imm, 75261991Sdim SmallVectorImpl<unsigned> &ShuffleMask) { 76243830Sdim ShuffleMask.push_back(0); 77249423Sdim ShuffleMask.push_back(1); 78249423Sdim ShuffleMask.push_back(2); 79249423Sdim ShuffleMask.push_back(3); 80261991Sdim for (unsigned i = 0; i != 4; ++i) { 81249423Sdim ShuffleMask.push_back(4+(Imm & 3)); 82249423Sdim Imm >>= 2; 83276479Sdim } 84261991Sdim} 85261991Sdim 86261991Sdimvoid DecodePSHUFLWMask(unsigned Imm, 87280031Sdim SmallVectorImpl<unsigned> &ShuffleMask) { 88261991Sdim for (unsigned i = 0; i != 4; ++i) { 89261991Sdim ShuffleMask.push_back((Imm & 3)); 90261991Sdim Imm >>= 2; 91261991Sdim } 92249423Sdim ShuffleMask.push_back(4); 93249423Sdim ShuffleMask.push_back(5); 94249423Sdim ShuffleMask.push_back(6); 95249423Sdim ShuffleMask.push_back(7); 96198092Srdivacky} 97193326Sed 98193326Sedvoid DecodePUNPCKLMask(unsigned NElts, 99249423Sdim SmallVectorImpl<unsigned> &ShuffleMask) { 100249423Sdim for (unsigned i = 0; i != NElts/2; ++i) { 101249423Sdim ShuffleMask.push_back(i); 102249423Sdim ShuffleMask.push_back(i+NElts); 103249423Sdim } 104193326Sed} 105249423Sdim 106249423Sdimvoid DecodePUNPCKHMask(unsigned NElts, 107249423Sdim SmallVectorImpl<unsigned> &ShuffleMask) { 108249423Sdim for (unsigned i = 0; i != NElts/2; ++i) { 109249423Sdim ShuffleMask.push_back(i+NElts/2); 110193326Sed ShuffleMask.push_back(i+NElts+NElts/2); 111199990Srdivacky } 112249423Sdim} 113249423Sdim 114249423Sdimvoid DecodeSHUFPSMask(unsigned NElts, unsigned Imm, 115221345Sdim SmallVectorImpl<unsigned> &ShuffleMask) { 116249423Sdim // Part that reads from dest. 117193326Sed for (unsigned i = 0; i != NElts/2; ++i) { 118193326Sed ShuffleMask.push_back(Imm % NElts); 119193326Sed Imm /= NElts; 120193326Sed } 121195341Sed // Part that reads from src. 122195341Sed for (unsigned i = 0; i != NElts/2; ++i) { 123261991Sdim ShuffleMask.push_back(Imm % NElts + NElts); 124226633Sdim Imm /= NElts; 125226633Sdim } 126198092Srdivacky} 127205408Srdivacky 128205408Srdivackyvoid DecodeUNPCKHPMask(unsigned NElts, 129205408Srdivacky SmallVectorImpl<unsigned> &ShuffleMask) { 130280031Sdim for (unsigned i = 0; i != NElts/2; ++i) { 131280031Sdim ShuffleMask.push_back(i+NElts/2); // Reads from dest 132276479Sdim ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src 133276479Sdim } 134280031Sdim} 135205408Srdivacky 136280031Sdim 137205408Srdivacky/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd 138205408Srdivacky/// etc. NElts indicates the number of elements in the vector allowing it to 139205408Srdivacky/// handle different datatypes and vector widths. 140205408Srdivackyvoid DecodeUNPCKLPMask(unsigned NElts, 141205408Srdivacky SmallVectorImpl<unsigned> &ShuffleMask) { 142205408Srdivacky for (unsigned i = 0; i != NElts/2; ++i) { 143205408Srdivacky ShuffleMask.push_back(i); // Reads from dest 144205408Srdivacky ShuffleMask.push_back(i+NElts); // Reads from src 145205408Srdivacky } 146261991Sdim} 147205408Srdivacky 148205408Srdivacky} // llvm namespace 149205408Srdivacky