X86ShuffleDecode.cpp revision 218885
1218893Sdim//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
2193326Sed//
3193326Sed//                     The LLVM Compiler Infrastructure
4193326Sed//
5193326Sed// This file is distributed under the University of Illinois Open Source
6193326Sed// License. See LICENSE.TXT for details.
7193326Sed//
8193326Sed//===----------------------------------------------------------------------===//
9193326Sed//
10193326Sed// Define several functions to decode x86 specific shuffle semantics into a
11193326Sed// generic vector mask.
12193326Sed//
13193326Sed//===----------------------------------------------------------------------===//
14199512Srdivacky
15193326Sed#include "X86ShuffleDecode.h"
16226633Sdim
17261991Sdim//===----------------------------------------------------------------------===//
18261991Sdim//  Vector Mask Decoding
19193326Sed//===----------------------------------------------------------------------===//
20226633Sdim
21193326Sednamespace llvm {
22226633Sdim
23261991Sdimvoid DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
24193326Sed  // Defaults the copying the dest value.
25210299Sed  ShuffleMask.push_back(0);
26210299Sed  ShuffleMask.push_back(1);
27296417Sdim  ShuffleMask.push_back(2);
28296417Sdim  ShuffleMask.push_back(3);
29296417Sdim
30276479Sdim  // Decode the immediate.
31193326Sed  unsigned ZMask = Imm & 15;
32198092Srdivacky  unsigned CountD = (Imm >> 4) & 3;
33210299Sed  unsigned CountS = (Imm >> 6) & 3;
34193326Sed
35198092Srdivacky  // CountS selects which input element to use.
36193326Sed  unsigned InVal = 4+CountS;
37198092Srdivacky  // CountD specifies which element of destination to update.
38198092Srdivacky  ShuffleMask[CountD] = InVal;
39198092Srdivacky  // ZMask zaps values, potentially overriding the CountD elt.
40210299Sed  if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
41210299Sed  if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
42193326Sed  if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
43226633Sdim  if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
44226633Sdim}
45226633Sdim
46226633Sdim// <3,1> or <6,7,2,3>
47226633Sdimvoid DecodeMOVHLPSMask(unsigned NElts,
48226633Sdim                       SmallVectorImpl<unsigned> &ShuffleMask) {
49193326Sed  for (unsigned i = NElts/2; i != NElts; ++i)
50193326Sed    ShuffleMask.push_back(NElts+i);
51198092Srdivacky
52198092Srdivacky  for (unsigned i = NElts/2; i != NElts; ++i)
53193326Sed    ShuffleMask.push_back(i);
54193326Sed}
55193326Sed
56198092Srdivacky// <0,2> or <0,1,4,5>
57210299Sedvoid DecodeMOVLHPSMask(unsigned NElts,
58210299Sed                       SmallVectorImpl<unsigned> &ShuffleMask) {
59210299Sed  for (unsigned i = 0; i != NElts/2; ++i)
60210299Sed    ShuffleMask.push_back(i);
61210299Sed
62193326Sed  for (unsigned i = 0; i != NElts/2; ++i)
63193326Sed    ShuffleMask.push_back(NElts+i);
64193326Sed}
65193326Sed
66261991Sdimvoid DecodePSHUFMask(unsigned NElts, unsigned Imm,
67261991Sdim                     SmallVectorImpl<unsigned> &ShuffleMask) {
68261991Sdim  for (unsigned i = 0; i != NElts; ++i) {
69261991Sdim    ShuffleMask.push_back(Imm % NElts);
70261991Sdim    Imm /= NElts;
71261991Sdim  }
72193326Sed}
73261991Sdim
74261991Sdimvoid DecodePSHUFHWMask(unsigned Imm,
75261991Sdim                       SmallVectorImpl<unsigned> &ShuffleMask) {
76243830Sdim  ShuffleMask.push_back(0);
77249423Sdim  ShuffleMask.push_back(1);
78249423Sdim  ShuffleMask.push_back(2);
79249423Sdim  ShuffleMask.push_back(3);
80261991Sdim  for (unsigned i = 0; i != 4; ++i) {
81249423Sdim    ShuffleMask.push_back(4+(Imm & 3));
82249423Sdim    Imm >>= 2;
83276479Sdim  }
84261991Sdim}
85261991Sdim
86261991Sdimvoid DecodePSHUFLWMask(unsigned Imm,
87280031Sdim                       SmallVectorImpl<unsigned> &ShuffleMask) {
88261991Sdim  for (unsigned i = 0; i != 4; ++i) {
89261991Sdim    ShuffleMask.push_back((Imm & 3));
90261991Sdim    Imm >>= 2;
91261991Sdim  }
92249423Sdim  ShuffleMask.push_back(4);
93249423Sdim  ShuffleMask.push_back(5);
94249423Sdim  ShuffleMask.push_back(6);
95249423Sdim  ShuffleMask.push_back(7);
96198092Srdivacky}
97193326Sed
98193326Sedvoid DecodePUNPCKLMask(unsigned NElts,
99249423Sdim                       SmallVectorImpl<unsigned> &ShuffleMask) {
100249423Sdim  for (unsigned i = 0; i != NElts/2; ++i) {
101249423Sdim    ShuffleMask.push_back(i);
102249423Sdim    ShuffleMask.push_back(i+NElts);
103249423Sdim  }
104193326Sed}
105249423Sdim
106249423Sdimvoid DecodePUNPCKHMask(unsigned NElts,
107249423Sdim                       SmallVectorImpl<unsigned> &ShuffleMask) {
108249423Sdim  for (unsigned i = 0; i != NElts/2; ++i) {
109249423Sdim    ShuffleMask.push_back(i+NElts/2);
110193326Sed    ShuffleMask.push_back(i+NElts+NElts/2);
111199990Srdivacky  }
112249423Sdim}
113249423Sdim
114249423Sdimvoid DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
115221345Sdim                      SmallVectorImpl<unsigned> &ShuffleMask) {
116249423Sdim  // Part that reads from dest.
117193326Sed  for (unsigned i = 0; i != NElts/2; ++i) {
118193326Sed    ShuffleMask.push_back(Imm % NElts);
119193326Sed    Imm /= NElts;
120193326Sed  }
121195341Sed  // Part that reads from src.
122195341Sed  for (unsigned i = 0; i != NElts/2; ++i) {
123261991Sdim    ShuffleMask.push_back(Imm % NElts + NElts);
124226633Sdim    Imm /= NElts;
125226633Sdim  }
126198092Srdivacky}
127205408Srdivacky
128205408Srdivackyvoid DecodeUNPCKHPMask(unsigned NElts,
129205408Srdivacky                       SmallVectorImpl<unsigned> &ShuffleMask) {
130280031Sdim  for (unsigned i = 0; i != NElts/2; ++i) {
131280031Sdim    ShuffleMask.push_back(i+NElts/2);        // Reads from dest
132276479Sdim    ShuffleMask.push_back(i+NElts+NElts/2);  // Reads from src
133276479Sdim  }
134280031Sdim}
135205408Srdivacky
136280031Sdim
137205408Srdivacky/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
138205408Srdivacky/// etc.  NElts indicates the number of elements in the vector allowing it to
139205408Srdivacky/// handle different datatypes and vector widths.
140205408Srdivackyvoid DecodeUNPCKLPMask(unsigned NElts,
141205408Srdivacky                       SmallVectorImpl<unsigned> &ShuffleMask) {
142205408Srdivacky  for (unsigned i = 0; i != NElts/2; ++i) {
143205408Srdivacky    ShuffleMask.push_back(i);        // Reads from dest
144205408Srdivacky    ShuffleMask.push_back(i+NElts);  // Reads from src
145205408Srdivacky  }
146261991Sdim}
147205408Srdivacky
148205408Srdivacky} // llvm namespace
149205408Srdivacky